test_morestats.py 153 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474
  1. # Author: Travis Oliphant, 2002
  2. #
  3. # Further enhancements and tests added by numerous SciPy developers.
  4. #
  5. import math
  6. import re
  7. import sys
  8. import warnings
  9. from functools import partial
  10. import numpy as np
  11. from numpy.random import RandomState
  12. from numpy.testing import (assert_array_equal, assert_almost_equal,
  13. assert_array_less, assert_array_almost_equal,
  14. assert_, assert_allclose, assert_equal)
  15. import pytest
  16. from pytest import raises as assert_raises
  17. from scipy import optimize, stats, special
  18. from scipy.stats._morestats import _abw_state, _get_As_weibull, _Avals_weibull
  19. from .common_tests import check_named_results
  20. from .._hypotests import _get_wilcoxon_distr, _get_wilcoxon_distr2
  21. from scipy.stats._binomtest import _binary_search_for_binom_tst
  22. from scipy.stats._distr_params import distcont
  23. from scipy.stats._axis_nan_policy import (SmallSampleWarning, too_small_nd_omit,
  24. too_small_1d_omit, too_small_1d_not_omit)
  25. import scipy._lib.array_api_extra as xpx
  26. from scipy._lib._array_api import (is_torch, make_xp_test_case, eager_warns, xp_ravel,
  27. is_numpy, xp_default_dtype)
  28. from scipy._lib._array_api_no_0d import (
  29. xp_assert_close,
  30. xp_assert_equal,
  31. xp_assert_less,
  32. )
  33. skip_xp_backends = pytest.mark.skip_xp_backends
  34. distcont = dict(distcont) # type: ignore
  35. # Matplotlib is not a scipy dependency but is optionally used in probplot, so
  36. # check if it's available
  37. try:
  38. import matplotlib
  39. matplotlib.rcParams['backend'] = 'Agg'
  40. import matplotlib.pyplot as plt
  41. have_matplotlib = True
  42. except Exception:
  43. have_matplotlib = False
  44. # test data gear.dat from NIST for Levene and Bartlett test
  45. # https://www.itl.nist.gov/div898/handbook/eda/section3/eda3581.htm
  46. g1 = [1.006, 0.996, 0.998, 1.000, 0.992, 0.993, 1.002, 0.999, 0.994, 1.000]
  47. g2 = [0.998, 1.006, 1.000, 1.002, 0.997, 0.998, 0.996, 1.000, 1.006, 0.988]
  48. g3 = [0.991, 0.987, 0.997, 0.999, 0.995, 0.994, 1.000, 0.999, 0.996, 0.996]
  49. g4 = [1.005, 1.002, 0.994, 1.000, 0.995, 0.994, 0.998, 0.996, 1.002, 0.996]
  50. g5 = [0.998, 0.998, 0.982, 0.990, 1.002, 0.984, 0.996, 0.993, 0.980, 0.996]
  51. g6 = [1.009, 1.013, 1.009, 0.997, 0.988, 1.002, 0.995, 0.998, 0.981, 0.996]
  52. g7 = [0.990, 1.004, 0.996, 1.001, 0.998, 1.000, 1.018, 1.010, 0.996, 1.002]
  53. g8 = [0.998, 1.000, 1.006, 1.000, 1.002, 0.996, 0.998, 0.996, 1.002, 1.006]
  54. g9 = [1.002, 0.998, 0.996, 0.995, 0.996, 1.004, 1.004, 0.998, 0.999, 0.991]
  55. g10 = [0.991, 0.995, 0.984, 0.994, 0.997, 0.997, 0.991, 0.998, 1.004, 0.997]
  56. # The loggamma RVS stream is changing due to gh-13349; this version
  57. # preserves the old stream so that tests don't change.
  58. def _old_loggamma_rvs(*args, **kwargs):
  59. return np.log(stats.gamma.rvs(*args, **kwargs))
  60. class TestBayes_mvs:
  61. def test_basic(self):
  62. # Expected values in this test simply taken from the function. For
  63. # some checks regarding correctness of implementation, see review in
  64. # gh-674
  65. data = [6, 9, 12, 7, 8, 8, 13]
  66. mean, var, std = stats.bayes_mvs(data)
  67. assert_almost_equal(mean.statistic, 9.0)
  68. assert_allclose(mean.minmax, (7.103650222492964, 10.896349777507034),
  69. rtol=1e-6)
  70. assert_almost_equal(var.statistic, 10.0)
  71. assert_allclose(var.minmax, (3.1767242068607087, 24.45910381334018),
  72. rtol=1e-09)
  73. assert_almost_equal(std.statistic, 2.9724954732045084, decimal=14)
  74. assert_allclose(std.minmax, (1.7823367265645145, 4.9456146050146312),
  75. rtol=1e-14)
  76. def test_empty_input(self):
  77. assert_raises(ValueError, stats.bayes_mvs, [])
  78. def test_result_attributes(self):
  79. x = np.arange(15)
  80. attributes = ('statistic', 'minmax')
  81. res = stats.bayes_mvs(x)
  82. for i in res:
  83. check_named_results(i, attributes)
  84. class TestMvsdist:
  85. def test_basic(self):
  86. data = [6, 9, 12, 7, 8, 8, 13]
  87. mean, var, std = stats.mvsdist(data)
  88. assert_almost_equal(mean.mean(), 9.0)
  89. assert_allclose(mean.interval(0.9), (7.103650222492964,
  90. 10.896349777507034), rtol=1e-14)
  91. assert_almost_equal(var.mean(), 10.0)
  92. assert_allclose(var.interval(0.9), (3.1767242068607087,
  93. 24.45910381334018), rtol=1e-09)
  94. assert_almost_equal(std.mean(), 2.9724954732045084, decimal=14)
  95. assert_allclose(std.interval(0.9), (1.7823367265645145,
  96. 4.9456146050146312), rtol=1e-14)
  97. def test_empty_input(self):
  98. assert_raises(ValueError, stats.mvsdist, [])
  99. def test_bad_arg(self):
  100. # Raise ValueError if fewer than two data points are given.
  101. data = [1]
  102. assert_raises(ValueError, stats.mvsdist, data)
  103. def test_warns(self):
  104. # regression test for gh-5270
  105. # make sure there are no spurious divide-by-zero warnings
  106. with warnings.catch_warnings():
  107. warnings.simplefilter('error', RuntimeWarning)
  108. [x.mean() for x in stats.mvsdist([1, 2, 3])]
  109. [x.mean() for x in stats.mvsdist([1, 2, 3, 4, 5])]
  110. class TestShapiro:
  111. def test_basic(self):
  112. x1 = [0.11, 7.87, 4.61, 10.14, 7.95, 3.14, 0.46,
  113. 4.43, 0.21, 4.75, 0.71, 1.52, 3.24,
  114. 0.93, 0.42, 4.97, 9.53, 4.55, 0.47, 6.66]
  115. w, pw = stats.shapiro(x1)
  116. shapiro_test = stats.shapiro(x1)
  117. assert_almost_equal(w, 0.90047299861907959, decimal=6)
  118. assert_almost_equal(shapiro_test.statistic, 0.90047299861907959, decimal=6)
  119. assert_almost_equal(pw, 0.042089745402336121, decimal=6)
  120. assert_almost_equal(shapiro_test.pvalue, 0.042089745402336121, decimal=6)
  121. x2 = [1.36, 1.14, 2.92, 2.55, 1.46, 1.06, 5.27, -1.11,
  122. 3.48, 1.10, 0.88, -0.51, 1.46, 0.52, 6.20, 1.69,
  123. 0.08, 3.67, 2.81, 3.49]
  124. w, pw = stats.shapiro(x2)
  125. shapiro_test = stats.shapiro(x2)
  126. assert_almost_equal(w, 0.9590270, decimal=6)
  127. assert_almost_equal(shapiro_test.statistic, 0.9590270, decimal=6)
  128. assert_almost_equal(pw, 0.52460, decimal=3)
  129. assert_almost_equal(shapiro_test.pvalue, 0.52460, decimal=3)
  130. # Verified against R
  131. x3 = stats.norm.rvs(loc=5, scale=3, size=100, random_state=12345678)
  132. w, pw = stats.shapiro(x3)
  133. shapiro_test = stats.shapiro(x3)
  134. assert_almost_equal(w, 0.9772805571556091, decimal=6)
  135. assert_almost_equal(shapiro_test.statistic, 0.9772805571556091, decimal=6)
  136. assert_almost_equal(pw, 0.08144091814756393, decimal=3)
  137. assert_almost_equal(shapiro_test.pvalue, 0.08144091814756393, decimal=3)
  138. # Extracted from original paper
  139. x4 = [0.139, 0.157, 0.175, 0.256, 0.344, 0.413, 0.503, 0.577, 0.614,
  140. 0.655, 0.954, 1.392, 1.557, 1.648, 1.690, 1.994, 2.174, 2.206,
  141. 3.245, 3.510, 3.571, 4.354, 4.980, 6.084, 8.351]
  142. W_expected = 0.83467
  143. p_expected = 0.000914
  144. w, pw = stats.shapiro(x4)
  145. shapiro_test = stats.shapiro(x4)
  146. assert_almost_equal(w, W_expected, decimal=4)
  147. assert_almost_equal(shapiro_test.statistic, W_expected, decimal=4)
  148. assert_almost_equal(pw, p_expected, decimal=5)
  149. assert_almost_equal(shapiro_test.pvalue, p_expected, decimal=5)
  150. def test_2d(self):
  151. x1 = [[0.11, 7.87, 4.61, 10.14, 7.95, 3.14, 0.46,
  152. 4.43, 0.21, 4.75], [0.71, 1.52, 3.24,
  153. 0.93, 0.42, 4.97, 9.53, 4.55, 0.47, 6.66]]
  154. w, pw = stats.shapiro(x1)
  155. shapiro_test = stats.shapiro(x1)
  156. assert_almost_equal(w, 0.90047299861907959, decimal=6)
  157. assert_almost_equal(shapiro_test.statistic, 0.90047299861907959, decimal=6)
  158. assert_almost_equal(pw, 0.042089745402336121, decimal=6)
  159. assert_almost_equal(shapiro_test.pvalue, 0.042089745402336121, decimal=6)
  160. x2 = [[1.36, 1.14, 2.92, 2.55, 1.46, 1.06, 5.27, -1.11,
  161. 3.48, 1.10], [0.88, -0.51, 1.46, 0.52, 6.20, 1.69,
  162. 0.08, 3.67, 2.81, 3.49]]
  163. w, pw = stats.shapiro(x2)
  164. shapiro_test = stats.shapiro(x2)
  165. assert_almost_equal(w, 0.9590270, decimal=6)
  166. assert_almost_equal(shapiro_test.statistic, 0.9590270, decimal=6)
  167. assert_almost_equal(pw, 0.52460, decimal=3)
  168. assert_almost_equal(shapiro_test.pvalue, 0.52460, decimal=3)
  169. @pytest.mark.parametrize('x', ([], [1], [1, 2]))
  170. def test_not_enough_values(self, x):
  171. with pytest.warns(SmallSampleWarning, match=too_small_1d_not_omit):
  172. res = stats.shapiro(x)
  173. assert_equal(res.statistic, np.nan)
  174. assert_equal(res.pvalue, np.nan)
  175. def test_nan_input(self):
  176. x = np.arange(10.)
  177. x[9] = np.nan
  178. w, pw = stats.shapiro(x)
  179. shapiro_test = stats.shapiro(x)
  180. assert_equal(w, np.nan)
  181. assert_equal(shapiro_test.statistic, np.nan)
  182. # Originally, shapiro returned a p-value of 1 in this case,
  183. # but there is no way to produce a numerical p-value if the
  184. # statistic is not a number. NaN is more appropriate.
  185. assert_almost_equal(pw, np.nan)
  186. assert_almost_equal(shapiro_test.pvalue, np.nan)
  187. def test_gh14462(self):
  188. # shapiro is theoretically location-invariant, but when the magnitude
  189. # of the values is much greater than the variance, there can be
  190. # numerical issues. Fixed by subtracting median from the data.
  191. # See gh-14462.
  192. trans_val, maxlog = stats.boxcox([122500, 474400, 110400])
  193. res = stats.shapiro(trans_val)
  194. # Reference from R:
  195. # options(digits=16)
  196. # x = c(0.00000000e+00, 3.39996924e-08, -6.35166875e-09)
  197. # shapiro.test(x)
  198. ref = (0.86468431705371, 0.2805581751566)
  199. assert_allclose(res, ref, rtol=1e-5)
  200. def test_length_3_gh18322(self):
  201. # gh-18322 reported that the p-value could be negative for input of
  202. # length 3. Check that this is resolved.
  203. res = stats.shapiro([0.6931471805599453, 0.0, 0.0])
  204. assert res.pvalue >= 0
  205. # R `shapiro.test` doesn't produce an accurate p-value in the case
  206. # above. Check that the formula used in `stats.shapiro` is not wrong.
  207. # options(digits=16)
  208. # x = c(-0.7746653110021126, -0.4344432067942129, 1.8157053280290931)
  209. # shapiro.test(x)
  210. x = [-0.7746653110021126, -0.4344432067942129, 1.8157053280290931]
  211. res = stats.shapiro(x)
  212. assert_allclose(res.statistic, 0.84658770645509)
  213. assert_allclose(res.pvalue, 0.2313666489882, rtol=1e-6)
  214. @pytest.mark.filterwarnings("ignore: As of SciPy 1.17: FutureWarning")
  215. class TestAnderson:
  216. def test_normal(self):
  217. rs = RandomState(1234567890)
  218. x1 = rs.standard_exponential(size=50)
  219. x2 = rs.standard_normal(size=50)
  220. A, crit, sig = stats.anderson(x1)
  221. assert_array_less(crit[:-1], A)
  222. A, crit, sig = stats.anderson(x2)
  223. assert_array_less(A, crit[-2:])
  224. v = np.ones(10)
  225. v[0] = 0
  226. A, crit, sig = stats.anderson(v)
  227. # The expected statistic 3.208057 was computed independently of scipy.
  228. # For example, in R:
  229. # > library(nortest)
  230. # > v <- rep(1, 10)
  231. # > v[1] <- 0
  232. # > result <- ad.test(v)
  233. # > result$statistic
  234. # A
  235. # 3.208057
  236. assert_allclose(A, 3.208057)
  237. def test_expon(self):
  238. rs = RandomState(1234567890)
  239. x1 = rs.standard_exponential(size=50)
  240. x2 = rs.standard_normal(size=50)
  241. A, crit, sig = stats.anderson(x1, 'expon')
  242. assert_array_less(A, crit[-2:])
  243. with np.errstate(all='ignore'):
  244. A, crit, sig = stats.anderson(x2, 'expon')
  245. assert_(A > crit[-1])
  246. def test_gumbel(self):
  247. # Regression test for gh-6306. Before that issue was fixed,
  248. # this case would return a2=inf.
  249. v = np.ones(100)
  250. v[0] = 0.0
  251. a2, crit, sig = stats.anderson(v, 'gumbel')
  252. # A brief reimplementation of the calculation of the statistic.
  253. n = len(v)
  254. xbar, s = stats.gumbel_l.fit(v)
  255. logcdf = stats.gumbel_l.logcdf(v, xbar, s)
  256. logsf = stats.gumbel_l.logsf(v, xbar, s)
  257. i = np.arange(1, n+1)
  258. expected_a2 = -n - np.mean((2*i - 1) * (logcdf + logsf[::-1]))
  259. assert_allclose(a2, expected_a2)
  260. def test_bad_arg(self):
  261. assert_raises(ValueError, stats.anderson, [1], dist='plate_of_shrimp')
  262. def test_result_attributes(self):
  263. rs = RandomState(1234567890)
  264. x = rs.standard_exponential(size=50)
  265. res = stats.anderson(x)
  266. attributes = ('statistic', 'critical_values', 'significance_level')
  267. check_named_results(res, attributes)
  268. def test_gumbel_l(self):
  269. # gh-2592, gh-6337
  270. # Adds support to 'gumbel_r' and 'gumbel_l' as valid inputs for dist.
  271. rs = RandomState(1234567890)
  272. x = rs.gumbel(size=100)
  273. A1, crit1, sig1 = stats.anderson(x, 'gumbel')
  274. A2, crit2, sig2 = stats.anderson(x, 'gumbel_l')
  275. assert_allclose(A2, A1)
  276. def test_gumbel_r(self):
  277. # gh-2592, gh-6337
  278. # Adds support to 'gumbel_r' and 'gumbel_l' as valid inputs for dist.
  279. rs = RandomState(1234567890)
  280. x1 = rs.gumbel(size=100)
  281. x2 = np.ones(100)
  282. # A constant array is a degenerate case and breaks gumbel_r.fit, so
  283. # change one value in x2.
  284. x2[0] = 0.996
  285. A1, crit1, sig1 = stats.anderson(x1, 'gumbel_r')
  286. A2, crit2, sig2 = stats.anderson(x2, 'gumbel_r')
  287. assert_array_less(A1, crit1[-2:])
  288. assert_(A2 > crit2[-1])
  289. def test_weibull_min_case_A(self):
  290. # data and reference values from `anderson` reference [7]
  291. x = np.array([225, 171, 198, 189, 189, 135, 162, 135, 117, 162])
  292. res = stats.anderson(x, 'weibull_min')
  293. m, loc, scale = res.fit_result.params
  294. assert_allclose((m, loc, scale), (2.38, 99.02, 78.23), rtol=2e-3)
  295. assert_allclose(res.statistic, 0.260, rtol=1e-3)
  296. assert res.statistic < res.critical_values[0]
  297. c = 1 / m # ~0.42
  298. assert_allclose(c, 1/2.38, rtol=2e-3)
  299. # interpolate between rows for c=0.4 and c=0.45, indices -3 and -2
  300. As40 = _Avals_weibull[-3]
  301. As45 = _Avals_weibull[-2]
  302. As_ref = As40 + (c - 0.4)/(0.45 - 0.4) * (As45 - As40)
  303. # atol=1e-3 because results are rounded up to the next third decimal
  304. assert np.all(res.critical_values > As_ref)
  305. assert_allclose(res.critical_values, As_ref, atol=1e-3)
  306. def test_weibull_min_case_B(self):
  307. # From `anderson` reference [7]
  308. x = np.array([74, 57, 48, 29, 502, 12, 70, 21,
  309. 29, 386, 59, 27, 153, 26, 326])
  310. message = "Maximum likelihood estimation has converged to "
  311. with pytest.raises(ValueError, match=message):
  312. stats.anderson(x, 'weibull_min')
  313. def test_weibull_warning_error(self):
  314. # Check for warning message when there are too few observations
  315. # This is also an example in which an error occurs during fitting
  316. x = -np.array([225, 75, 57, 168, 107, 12, 61, 43, 29])
  317. wmessage = "Critical values of the test statistic are given for the..."
  318. emessage = "An error occurred while fitting the Weibull distribution..."
  319. wcontext = pytest.warns(UserWarning, match=wmessage)
  320. econtext = pytest.raises(ValueError, match=emessage)
  321. with wcontext, econtext:
  322. stats.anderson(x, 'weibull_min')
  323. @pytest.mark.parametrize('distname',
  324. ['norm', 'expon', 'gumbel_l', 'extreme1',
  325. 'gumbel', 'gumbel_r', 'logistic', 'weibull_min'])
  326. def test_anderson_fit_params(self, distname):
  327. # check that anderson now returns a FitResult
  328. rng = np.random.default_rng(330691555377792039)
  329. real_distname = ('gumbel_l' if distname in {'extreme1', 'gumbel'}
  330. else distname)
  331. dist = getattr(stats, real_distname)
  332. params = distcont[real_distname]
  333. x = dist.rvs(*params, size=1000, random_state=rng)
  334. res = stats.anderson(x, distname)
  335. assert res.fit_result.success
  336. def test_anderson_weibull_As(self):
  337. m = 1 # "when mi < 2, so that c > 0.5, the last line...should be used"
  338. assert_equal(_get_As_weibull(1/m), _Avals_weibull[-1])
  339. m = np.inf
  340. assert_equal(_get_As_weibull(1/m), _Avals_weibull[0])
  341. class TestAndersonMethod:
  342. def test_warning(self):
  343. message = "As of SciPy 1.17, users..."
  344. with pytest.warns(FutureWarning, match=message):
  345. stats.anderson([1, 2, 3], 'norm')
  346. def test_method_input_validation(self):
  347. message = "`method` must be either..."
  348. with pytest.raises(ValueError, match=message):
  349. stats.anderson([1, 2, 3], 'norm', method='ekki-ekki')
  350. def test_monte_carlo_method(self):
  351. rng = np.random.default_rng(94982389149239)
  352. message = "The `rvs` attribute..."
  353. with pytest.warns(UserWarning, match=message):
  354. method = stats.MonteCarloMethod(rvs=rng.random)
  355. stats.anderson([1, 2, 3], 'norm', method=method)
  356. message = "The `batch` attribute..."
  357. with pytest.warns(UserWarning, match=message):
  358. method = stats.MonteCarloMethod(batch=10)
  359. stats.anderson([1, 2, 3], 'norm', method=method)
  360. method = stats.MonteCarloMethod(n_resamples=9, rng=rng)
  361. res = stats.anderson([1, 2, 3], 'norm', method=method)
  362. ten_p = res.pvalue * 10
  363. # p-value will always be divisible by n_resamples + 1
  364. assert np.round(ten_p) == ten_p
  365. method = stats.MonteCarloMethod(rng=np.random.default_rng(23495984827))
  366. ref = stats.anderson([1, 2, 3, 4, 5], 'norm', method=method)
  367. method = stats.MonteCarloMethod(rng=np.random.default_rng(23495984827))
  368. res = stats.anderson([1, 2, 3, 4, 5], 'norm', method=method)
  369. assert res.pvalue == ref.pvalue # same random state -> same p-value
  370. method = stats.MonteCarloMethod(rng=np.random.default_rng(23495984828))
  371. res = stats.anderson([1, 2, 3, 4, 5], 'norm', method=method)
  372. assert res.pvalue != ref.pvalue # different random state -> different p-value
  373. @pytest.mark.parametrize('dist_name, seed',
  374. [('norm', 4202165767275),
  375. ('expon', 9094400417269),
  376. pytest.param('logistic', 3776634590070, marks=pytest.mark.xslow),
  377. pytest.param('gumbel_l', 7966588969335, marks=pytest.mark.xslow),
  378. pytest.param('gumbel_r', 1886450383828, marks=pytest.mark.xslow)])
  379. def test_method_consistency(self, dist_name, seed):
  380. dist = getattr(stats, dist_name)
  381. rng = np.random.default_rng(seed)
  382. x = dist.rvs(size=50, random_state=rng)
  383. ref = stats.anderson(x, dist_name, method='interpolate')
  384. res = stats.anderson(x, dist_name, method=stats.MonteCarloMethod(rng=rng))
  385. np.testing.assert_allclose(res.statistic, ref.statistic)
  386. np.testing.assert_allclose(res.pvalue, ref.pvalue, atol=0.005)
  387. @pytest.mark.parametrize('dist_name',
  388. ['norm', 'expon', 'logistic', 'gumbel_l', 'gumbel_r', 'weibull_min'])
  389. def test_interpolate_saturation(self, dist_name):
  390. dist = getattr(stats, dist_name)
  391. rng = np.random.default_rng(4202165767276)
  392. args = (3.5,) if dist_name == 'weibull_min' else tuple()
  393. x = dist.rvs(*args, size=50, random_state=rng)
  394. with pytest.warns(FutureWarning):
  395. res = stats.anderson(x, dist_name)
  396. pvalues = (1 - np.asarray(res.significance_level) if dist_name == 'weibull_min'
  397. else np.asarray(res.significance_level) / 100)
  398. pvalue_min = np.min(pvalues)
  399. pvalue_max = np.max(pvalues)
  400. statistic_min = np.min(res.critical_values)
  401. statistic_max = np.max(res.critical_values)
  402. # data drawn from distribution -> low statistic / high p-value
  403. res = stats.anderson(x, dist_name, method='interpolate')
  404. assert res.statistic < statistic_min
  405. assert res.pvalue == pvalue_max
  406. # data not from distribution -> high statistic / low p-value
  407. res = stats.anderson(rng.random(size=50), dist_name, method='interpolate')
  408. assert res.statistic > statistic_max
  409. assert res.pvalue == pvalue_min
  410. @pytest.mark.filterwarnings("ignore:Parameter `variant`...:UserWarning")
  411. class TestAndersonKSamp:
  412. def test_example1a(self):
  413. # Example data from Scholz & Stephens (1987), originally
  414. # published in Lehmann (1995, Nonparametrics, Statistical
  415. # Methods Based on Ranks, p. 309)
  416. # Pass a mixture of lists and arrays
  417. t1 = [38.7, 41.5, 43.8, 44.5, 45.5, 46.0, 47.7, 58.0]
  418. t2 = np.array([39.2, 39.3, 39.7, 41.4, 41.8, 42.9, 43.3, 45.8])
  419. t3 = np.array([34.0, 35.0, 39.0, 40.0, 43.0, 43.0, 44.0, 45.0])
  420. t4 = np.array([34.0, 34.8, 34.8, 35.4, 37.2, 37.8, 41.2, 42.8])
  421. Tk, tm, p = stats.anderson_ksamp((t1, t2, t3, t4), midrank=False)
  422. assert_almost_equal(Tk, 4.449, 3)
  423. assert_array_almost_equal([0.4985, 1.3237, 1.9158, 2.4930, 3.2459],
  424. tm[0:5], 4)
  425. assert_allclose(p, 0.0021, atol=0.00025)
  426. def test_example1b(self):
  427. # Example data from Scholz & Stephens (1987), originally
  428. # published in Lehmann (1995, Nonparametrics, Statistical
  429. # Methods Based on Ranks, p. 309)
  430. # Pass arrays
  431. t1 = np.array([38.7, 41.5, 43.8, 44.5, 45.5, 46.0, 47.7, 58.0])
  432. t2 = np.array([39.2, 39.3, 39.7, 41.4, 41.8, 42.9, 43.3, 45.8])
  433. t3 = np.array([34.0, 35.0, 39.0, 40.0, 43.0, 43.0, 44.0, 45.0])
  434. t4 = np.array([34.0, 34.8, 34.8, 35.4, 37.2, 37.8, 41.2, 42.8])
  435. Tk, tm, p = stats.anderson_ksamp((t1, t2, t3, t4), midrank=True)
  436. assert_almost_equal(Tk, 4.480, 3)
  437. assert_array_almost_equal([0.4985, 1.3237, 1.9158, 2.4930, 3.2459],
  438. tm[0:5], 4)
  439. assert_allclose(p, 0.0020, atol=0.00025)
  440. @pytest.mark.xslow
  441. def test_example2a(self):
  442. # Example data taken from an earlier technical report of
  443. # Scholz and Stephens
  444. # Pass lists instead of arrays
  445. t1 = [194, 15, 41, 29, 33, 181]
  446. t2 = [413, 14, 58, 37, 100, 65, 9, 169, 447, 184, 36, 201, 118]
  447. t3 = [34, 31, 18, 18, 67, 57, 62, 7, 22, 34]
  448. t4 = [90, 10, 60, 186, 61, 49, 14, 24, 56, 20, 79, 84, 44, 59, 29,
  449. 118, 25, 156, 310, 76, 26, 44, 23, 62]
  450. t5 = [130, 208, 70, 101, 208]
  451. t6 = [74, 57, 48, 29, 502, 12, 70, 21, 29, 386, 59, 27]
  452. t7 = [55, 320, 56, 104, 220, 239, 47, 246, 176, 182, 33]
  453. t8 = [23, 261, 87, 7, 120, 14, 62, 47, 225, 71, 246, 21, 42, 20, 5,
  454. 12, 120, 11, 3, 14, 71, 11, 14, 11, 16, 90, 1, 16, 52, 95]
  455. t9 = [97, 51, 11, 4, 141, 18, 142, 68, 77, 80, 1, 16, 106, 206, 82,
  456. 54, 31, 216, 46, 111, 39, 63, 18, 191, 18, 163, 24]
  457. t10 = [50, 44, 102, 72, 22, 39, 3, 15, 197, 188, 79, 88, 46, 5, 5, 36,
  458. 22, 139, 210, 97, 30, 23, 13, 14]
  459. t11 = [359, 9, 12, 270, 603, 3, 104, 2, 438]
  460. t12 = [50, 254, 5, 283, 35, 12]
  461. t13 = [487, 18, 100, 7, 98, 5, 85, 91, 43, 230, 3, 130]
  462. t14 = [102, 209, 14, 57, 54, 32, 67, 59, 134, 152, 27, 14, 230, 66,
  463. 61, 34]
  464. samples = (t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t13, t14)
  465. Tk, tm, p = stats.anderson_ksamp(samples, midrank=False)
  466. assert_almost_equal(Tk, 3.288, 3)
  467. assert_array_almost_equal([0.5990, 1.3269, 1.8052, 2.2486, 2.8009],
  468. tm[0:5], 4)
  469. assert_allclose(p, 0.0041, atol=0.00025)
  470. rng = np.random.default_rng(6989860141921615054)
  471. method = stats.PermutationMethod(n_resamples=9999, rng=rng)
  472. res = stats.anderson_ksamp(samples, midrank=False, method=method)
  473. assert_array_equal(res.statistic, Tk)
  474. assert_array_equal(res.critical_values, tm)
  475. assert_allclose(res.pvalue, p, atol=6e-4)
  476. def test_example2b(self):
  477. # Example data taken from an earlier technical report of
  478. # Scholz and Stephens
  479. t1 = [194, 15, 41, 29, 33, 181]
  480. t2 = [413, 14, 58, 37, 100, 65, 9, 169, 447, 184, 36, 201, 118]
  481. t3 = [34, 31, 18, 18, 67, 57, 62, 7, 22, 34]
  482. t4 = [90, 10, 60, 186, 61, 49, 14, 24, 56, 20, 79, 84, 44, 59, 29,
  483. 118, 25, 156, 310, 76, 26, 44, 23, 62]
  484. t5 = [130, 208, 70, 101, 208]
  485. t6 = [74, 57, 48, 29, 502, 12, 70, 21, 29, 386, 59, 27]
  486. t7 = [55, 320, 56, 104, 220, 239, 47, 246, 176, 182, 33]
  487. t8 = [23, 261, 87, 7, 120, 14, 62, 47, 225, 71, 246, 21, 42, 20, 5,
  488. 12, 120, 11, 3, 14, 71, 11, 14, 11, 16, 90, 1, 16, 52, 95]
  489. t9 = [97, 51, 11, 4, 141, 18, 142, 68, 77, 80, 1, 16, 106, 206, 82,
  490. 54, 31, 216, 46, 111, 39, 63, 18, 191, 18, 163, 24]
  491. t10 = [50, 44, 102, 72, 22, 39, 3, 15, 197, 188, 79, 88, 46, 5, 5, 36,
  492. 22, 139, 210, 97, 30, 23, 13, 14]
  493. t11 = [359, 9, 12, 270, 603, 3, 104, 2, 438]
  494. t12 = [50, 254, 5, 283, 35, 12]
  495. t13 = [487, 18, 100, 7, 98, 5, 85, 91, 43, 230, 3, 130]
  496. t14 = [102, 209, 14, 57, 54, 32, 67, 59, 134, 152, 27, 14, 230, 66,
  497. 61, 34]
  498. Tk, tm, p = stats.anderson_ksamp((t1, t2, t3, t4, t5, t6, t7, t8,
  499. t9, t10, t11, t12, t13, t14),
  500. midrank=True)
  501. assert_almost_equal(Tk, 3.294, 3)
  502. assert_array_almost_equal([0.5990, 1.3269, 1.8052, 2.2486, 2.8009],
  503. tm[0:5], 4)
  504. assert_allclose(p, 0.0041, atol=0.00025)
  505. def test_R_kSamples(self):
  506. # test values generates with R package kSamples
  507. # package version 1.2-6 (2017-06-14)
  508. # r1 = 1:100
  509. # continuous case (no ties) --> version 1
  510. # res <- kSamples::ad.test(r1, r1 + 40.5)
  511. # res$ad[1, "T.AD"] # 41.105
  512. # res$ad[1, " asympt. P-value"] # 5.8399e-18
  513. #
  514. # discrete case (ties allowed) --> version 2 (here: midrank=True)
  515. # res$ad[2, "T.AD"] # 41.235
  516. #
  517. # res <- kSamples::ad.test(r1, r1 + .5)
  518. # res$ad[1, "T.AD"] # -1.2824
  519. # res$ad[1, " asympt. P-value"] # 1
  520. # res$ad[2, "T.AD"] # -1.2944
  521. #
  522. # res <- kSamples::ad.test(r1, r1 + 7.5)
  523. # res$ad[1, "T.AD"] # 1.4923
  524. # res$ad[1, " asympt. P-value"] # 0.077501
  525. #
  526. # res <- kSamples::ad.test(r1, r1 + 6)
  527. # res$ad[2, "T.AD"] # 0.63892
  528. # res$ad[2, " asympt. P-value"] # 0.17981
  529. #
  530. # res <- kSamples::ad.test(r1, r1 + 11.5)
  531. # res$ad[1, "T.AD"] # 4.5042
  532. # res$ad[1, " asympt. P-value"] # 0.00545
  533. #
  534. # res <- kSamples::ad.test(r1, r1 + 13.5)
  535. # res$ad[1, "T.AD"] # 6.2982
  536. # res$ad[1, " asympt. P-value"] # 0.00118
  537. x1 = np.linspace(1, 100, 100)
  538. # test case: different distributions;p-value floored at 0.001
  539. # test case for issue #5493 / #8536
  540. with warnings.catch_warnings():
  541. warnings.filterwarnings("ignore", 'p-value floored', UserWarning)
  542. s, _, p = stats.anderson_ksamp([x1, x1 + 40.5], midrank=False)
  543. assert_almost_equal(s, 41.105, 3)
  544. assert_equal(p, 0.001)
  545. with warnings.catch_warnings():
  546. warnings.filterwarnings("ignore", 'p-value floored', UserWarning)
  547. s, _, p = stats.anderson_ksamp([x1, x1 + 40.5])
  548. assert_almost_equal(s, 41.235, 3)
  549. assert_equal(p, 0.001)
  550. # test case: similar distributions --> p-value capped at 0.25
  551. with warnings.catch_warnings():
  552. warnings.filterwarnings("ignore", 'p-value capped', UserWarning)
  553. s, _, p = stats.anderson_ksamp([x1, x1 + .5], midrank=False)
  554. assert_almost_equal(s, -1.2824, 4)
  555. assert_equal(p, 0.25)
  556. with warnings.catch_warnings():
  557. warnings.filterwarnings("ignore", 'p-value capped', UserWarning)
  558. s, _, p = stats.anderson_ksamp([x1, x1 + .5])
  559. assert_almost_equal(s, -1.2944, 4)
  560. assert_equal(p, 0.25)
  561. # test case: check interpolated p-value in [0.01, 0.25] (no ties)
  562. s, _, p = stats.anderson_ksamp([x1, x1 + 7.5], midrank=False)
  563. assert_almost_equal(s, 1.4923, 4)
  564. assert_allclose(p, 0.0775, atol=0.005, rtol=0)
  565. # test case: check interpolated p-value in [0.01, 0.25] (w/ ties)
  566. s, _, p = stats.anderson_ksamp([x1, x1 + 6])
  567. assert_almost_equal(s, 0.6389, 4)
  568. assert_allclose(p, 0.1798, atol=0.005, rtol=0)
  569. # test extended critical values for p=0.001 and p=0.005
  570. s, _, p = stats.anderson_ksamp([x1, x1 + 11.5], midrank=False)
  571. assert_almost_equal(s, 4.5042, 4)
  572. assert_allclose(p, 0.00545, atol=0.0005, rtol=0)
  573. s, _, p = stats.anderson_ksamp([x1, x1 + 13.5], midrank=False)
  574. assert_almost_equal(s, 6.2982, 4)
  575. assert_allclose(p, 0.00118, atol=0.0001, rtol=0)
  576. def test_not_enough_samples(self):
  577. assert_raises(ValueError, stats.anderson_ksamp, np.ones(5))
  578. def test_no_distinct_observations(self):
  579. assert_raises(ValueError, stats.anderson_ksamp,
  580. (np.ones(5), np.ones(5)))
  581. def test_empty_sample(self):
  582. assert_raises(ValueError, stats.anderson_ksamp, (np.ones(5), []))
  583. def test_result_attributes(self):
  584. # Pass a mixture of lists and arrays
  585. t1 = [38.7, 41.5, 43.8, 44.5, 45.5, 46.0, 47.7, 58.0]
  586. t2 = np.array([39.2, 39.3, 39.7, 41.4, 41.8, 42.9, 43.3, 45.8])
  587. res = stats.anderson_ksamp((t1, t2), midrank=False)
  588. attributes = ('statistic', 'critical_values', 'significance_level')
  589. check_named_results(res, attributes)
  590. assert_equal(res.significance_level, res.pvalue)
  591. class TestAndersonKSampVariant:
  592. def test_variant_values(self):
  593. x = [1, 2, 2, 3, 4, 5]
  594. y = [1, 2, 3, 4, 4, 5, 6, 6, 6, 7]
  595. message = "Parameter `variant` has been introduced..."
  596. with pytest.warns(UserWarning, match=message):
  597. ref = stats.anderson_ksamp((x, y))
  598. assert len(ref) == 3 and hasattr(ref, 'critical_values')
  599. with pytest.warns(UserWarning, match=message):
  600. res = stats.anderson_ksamp((x, y), midrank=True)
  601. assert_equal(res.statistic, ref.statistic)
  602. assert_equal(res.pvalue, ref.pvalue)
  603. assert len(res) == 3 and hasattr(res, 'critical_values')
  604. with pytest.warns(UserWarning, match=message):
  605. res = stats.anderson_ksamp((x, y), midrank=False, variant='midrank')
  606. assert_equal(res.statistic, ref.statistic)
  607. assert_equal(res.pvalue, ref.pvalue)
  608. assert not hasattr(res, 'critical_values')
  609. res = stats.anderson_ksamp((x, y), variant='midrank')
  610. assert_equal(res.statistic, ref.statistic)
  611. assert_equal(res.pvalue, ref.pvalue)
  612. assert not hasattr(res, 'critical_values')
  613. with pytest.warns(UserWarning, match=message):
  614. ref = stats.anderson_ksamp((x, y), midrank=False)
  615. assert len(ref) == 3 and hasattr(ref, 'critical_values')
  616. with pytest.warns(UserWarning, match=message):
  617. res = stats.anderson_ksamp((x, y), midrank=True, variant='right')
  618. assert_equal(res.statistic, ref.statistic)
  619. assert_equal(res.pvalue, ref.pvalue)
  620. assert not hasattr(res, 'critical_values')
  621. res = stats.anderson_ksamp((x, y), variant='right')
  622. assert_equal(res.statistic, ref.statistic)
  623. assert_equal(res.pvalue, ref.pvalue)
  624. assert not hasattr(res, 'critical_values')
  625. def test_variant_input_validation(self):
  626. x = np.arange(10)
  627. message = "`variant` must be one of 'midrank', 'right', or 'continuous'."
  628. with pytest.raises(ValueError, match=message):
  629. stats.anderson_ksamp((x, x), variant='Camelot')
  630. @pytest.mark.parametrize('n_samples', [2, 3])
  631. def test_variant_continuous(self, n_samples):
  632. rng = np.random.default_rng(20182053007)
  633. samples = rng.random((n_samples, 15)) + 0.1*np.arange(n_samples)[:, np.newaxis]
  634. ref = stats.anderson_ksamp(samples, variant='right')
  635. res = stats.anderson_ksamp(samples, variant='continuous')
  636. assert_allclose(res.statistic, ref.statistic)
  637. assert_allclose(res.pvalue, ref.pvalue)
  638. @make_xp_test_case(stats.ansari)
  639. class TestAnsari:
  640. def test_small(self, xp):
  641. x = xp.asarray([1, 2, 3, 3, 4])
  642. y = xp.asarray([3, 2, 6, 1, 6, 1, 4, 1])
  643. W, pval = stats.ansari(x, y)
  644. xp_assert_close(W, xp.asarray(23.5))
  645. xp_assert_close(pval, xp.asarray(0.13499256881897437))
  646. def test_approx(self, xp):
  647. ramsay = xp.asarray([111, 107, 100, 99, 102, 106, 109, 108, 104, 99,
  648. 101, 96, 97, 102, 107, 113, 116, 113, 110, 98])
  649. parekh = xp.asarray([107, 108, 106, 98, 105, 103, 110, 105, 104,
  650. 100, 96, 108, 103, 104, 114, 114, 113, 108,
  651. 106, 99])
  652. W, pval = stats.ansari(ramsay, parekh)
  653. xp_assert_close(W, xp.asarray(185.5))
  654. xp_assert_close(pval, xp.asarray(0.18145819972867083))
  655. def test_exact(self, xp):
  656. x, y = xp.asarray([1, 2, 3, 4]), xp.asarray([15, 5, 20, 8, 10, 12])
  657. W, pval = stats.ansari(x, y)
  658. xp_assert_close(W, xp.asarray(10.0))
  659. xp_assert_close(pval, xp.asarray(0.533333333333333333))
  660. @pytest.mark.skip_xp_backends('jax.numpy', reason='no _axis_nan_policy decorator')
  661. @pytest.mark.parametrize('args', [([], [1.]), ([1.], [])])
  662. def test_bad_arg(self, args, xp):
  663. args = [xp.asarray(arg) for arg in args]
  664. with pytest.warns(SmallSampleWarning, match=too_small_1d_not_omit):
  665. res = stats.ansari(*args)
  666. xp_assert_equal(res.statistic, xp.asarray(xp.nan))
  667. xp_assert_equal(res.pvalue, xp.asarray(xp.nan))
  668. def test_bad_alternative(self, xp):
  669. # invalid value for alternative must raise a ValueError
  670. x1 = xp.asarray([1, 2, 3, 4])
  671. x2 = xp.asarray([5, 6, 7, 8])
  672. match = "'alternative' must be 'two-sided'"
  673. with assert_raises(ValueError, match=match):
  674. stats.ansari(x1, x2, alternative='foo')
  675. def test_alternative_exact(self, xp):
  676. x1 = xp.asarray([-5, 1, 5, 10, 15, 20, 25.]) # high scale, loc=10
  677. x2 = xp.asarray([7.5, 8.5, 9.5, 10.5, 11.5, 12.5]) # low scale, loc=10
  678. # ratio of scales is greater than 1. So, the
  679. # p-value must be high when `alternative='less'`
  680. # and low when `alternative='greater'`.
  681. statistic, pval = stats.ansari(x1, x2)
  682. pval_l = stats.ansari(x1, x2, alternative='less').pvalue
  683. pval_g = stats.ansari(x1, x2, alternative='greater').pvalue
  684. assert pval_l > 0.95
  685. assert pval_g < 0.05 # level of significance.
  686. # also check if the p-values sum up to 1 plus the probability
  687. # mass under the calculated statistic.
  688. prob = _abw_state.a.pmf(float(statistic), x1.shape[0], x2.shape[0])
  689. prob = xp.asarray(float(prob))
  690. xp_assert_close(pval_g + pval_l, 1 + prob, atol=1e-12)
  691. # also check if one of the one-sided p-value equals half the
  692. # two-sided p-value and the other one-sided p-value is its
  693. # compliment.
  694. xp_assert_close(pval_g, pval/2, atol=1e-12)
  695. xp_assert_close(pval_l, 1+prob-pval/2, atol=1e-12)
  696. # sanity check. The result should flip if
  697. # we exchange x and y.
  698. pval_l_reverse = stats.ansari(x2, x1, alternative='less').pvalue
  699. pval_g_reverse = stats.ansari(x2, x1, alternative='greater').pvalue
  700. assert pval_l_reverse < 0.05
  701. assert pval_g_reverse > 0.95
  702. @pytest.mark.parametrize(
  703. 'x, y, alternative, expected',
  704. # the tests are designed in such a way that the
  705. # if else statement in ansari test for exact
  706. # mode is covered.
  707. [([1, 2, 3, 4], [5, 6, 7, 8], 'less', 0.6285714285714),
  708. ([1, 2, 3, 4], [5, 6, 7, 8], 'greater', 0.6285714285714),
  709. ([1, 2, 3], [4, 5, 6, 7, 8], 'less', 0.8928571428571),
  710. ([1, 2, 3], [4, 5, 6, 7, 8], 'greater', 0.2857142857143),
  711. ([1, 2, 3, 4, 5], [6, 7, 8], 'less', 0.2857142857143),
  712. ([1, 2, 3, 4, 5], [6, 7, 8], 'greater', 0.8928571428571)]
  713. )
  714. def test_alternative_exact_with_R(self, x, y, alternative, expected, xp):
  715. # testing with R on arbitrary data
  716. # Sample R code used for the third test case above:
  717. # ```R
  718. # > options(digits=16)
  719. # > x <- c(1,2,3)
  720. # > y <- c(4,5,6,7,8)
  721. # > ansari.test(x, y, alternative='less', exact=TRUE)
  722. #
  723. # Ansari-Bradley test
  724. #
  725. # data: x and y
  726. # AB = 6, p-value = 0.8928571428571
  727. # alternative hypothesis: true ratio of scales is less than 1
  728. #
  729. # ```
  730. x, y = xp.asarray(x), xp.asarray(y)
  731. pval = stats.ansari(x, y, alternative=alternative).pvalue
  732. xp_assert_close(pval, xp.asarray(expected), atol=1e-12)
  733. def test_alternative_approx(self, xp):
  734. # intuitive tests for approximation
  735. x1 = xp.asarray(stats.norm.rvs(0, 5, size=100, random_state=123))
  736. x2 = xp.asarray(stats.norm.rvs(0, 2, size=100, random_state=123))
  737. # for m > 55 or n > 55, the test should automatically
  738. # switch to approximation.
  739. pval_l = stats.ansari(x1, x2, alternative='less').pvalue
  740. pval_g = stats.ansari(x1, x2, alternative='greater').pvalue
  741. xp_assert_close(pval_l, xp.asarray(1.0, dtype=xp.float64), atol=1e-12)
  742. xp_assert_close(pval_g, xp.asarray(0.0, dtype=xp.float64), atol=1e-12)
  743. # also check if one of the one-sided p-value equals half the
  744. # two-sided p-value and the other one-sided p-value is its
  745. # compliment.
  746. x1 = xp.asarray(stats.norm.rvs(0, 2, size=60, random_state=123))
  747. x2 = xp.asarray(stats.norm.rvs(0, 1.5, size=60, random_state=123))
  748. pval = stats.ansari(x1, x2).pvalue
  749. pval_l = stats.ansari(x1, x2, alternative='less').pvalue
  750. pval_g = stats.ansari(x1, x2, alternative='greater').pvalue
  751. xp_assert_close(pval_g, pval/2, atol=1e-12)
  752. xp_assert_close(pval_l, 1-pval/2, atol=1e-12)
  753. @pytest.mark.parametrize('dtype', [None, 'float32', 'float64'])
  754. @pytest.mark.parametrize('n', [10, 100]) # affects code path
  755. @pytest.mark.parametrize('ties', [False, True]) # affects code path
  756. def test_dtypes(self, dtype, n, ties, xp):
  757. if is_numpy(xp) and xp.__version__ < "2.0" and dtype == 'float32':
  758. pytest.skip("Scalar dtypes only respected after NEP 50.")
  759. dtype = xp_default_dtype(xp) if dtype is None else getattr(xp, dtype)
  760. rng = np.random.default_rng(78587342806484)
  761. x, y = rng.integers(6, size=(2, n)) if ties else rng.random(size=(2, n))
  762. ref = stats.ansari(x, y)
  763. res = stats.ansari(xp.asarray(x, dtype=dtype), xp.asarray(y, dtype=dtype))
  764. xp_assert_close(res.statistic, xp.asarray(ref.statistic, dtype=dtype))
  765. xp_assert_close(res.pvalue, xp.asarray(ref.pvalue, dtype=dtype))
  766. @make_xp_test_case(stats.bartlett)
  767. class TestBartlett:
  768. def test_data(self, xp):
  769. # https://www.itl.nist.gov/div898/handbook/eda/section3/eda357.htm
  770. args = [g1, g2, g3, g4, g5, g6, g7, g8, g9, g10]
  771. args = [xp.asarray(arg) for arg in args]
  772. T, pval = stats.bartlett(*args)
  773. xp_assert_close(T, xp.asarray(20.78587342806484))
  774. xp_assert_close(pval, xp.asarray(0.0136358632781))
  775. def test_too_few_args(self, xp):
  776. message = "Must enter at least two input sample vectors."
  777. with pytest.raises(ValueError, match=message):
  778. stats.bartlett(xp.asarray([1.]))
  779. def test_result_attributes(self, xp):
  780. args = [g1, g2, g3, g4, g5, g6, g7, g8, g9, g10]
  781. args = [xp.asarray(arg) for arg in args]
  782. res = stats.bartlett(*args)
  783. attributes = ('statistic', 'pvalue')
  784. check_named_results(res, attributes, xp=xp)
  785. @pytest.mark.filterwarnings("ignore:invalid value encountered") # Dask
  786. def test_empty_arg(self, xp):
  787. args = (g1, g2, g3, g4, g5, g6, g7, g8, g9, g10, [])
  788. args = [xp.asarray(arg) for arg in args]
  789. with eager_warns(SmallSampleWarning, match=too_small_1d_not_omit, xp=xp):
  790. res = stats.bartlett(*args)
  791. NaN = xp.asarray(xp.nan)
  792. xp_assert_equal(res.statistic, NaN)
  793. xp_assert_equal(res.pvalue, NaN)
  794. def test_negative_pvalue_gh21152(self, xp):
  795. a = xp.asarray([10.1, 10.2, 10.3, 10.4], dtype=xp.float32)
  796. b = xp.asarray([10.15, 10.25, 10.35, 10.45], dtype=xp.float32)
  797. c = xp.asarray([10.05, 10.15, 10.25, 10.35], dtype=xp.float32)
  798. res = stats.bartlett(a, b, c)
  799. assert xp.all(res.statistic >= 0)
  800. @make_xp_test_case(stats.levene)
  801. class TestLevene:
  802. def test_data(self, xp):
  803. # https://www.itl.nist.gov/div898/handbook/eda/section3/eda35a.htm
  804. args = [g1, g2, g3, g4, g5, g6, g7, g8, g9, g10]
  805. args = [xp.asarray(arg) for arg in args]
  806. W, pval = stats.levene(*args)
  807. xp_assert_close(W, xp.asarray(1.7059176930008939))
  808. xp_assert_close(pval, xp.asarray(0.0990829755522))
  809. def test_mean(self, xp):
  810. # numbers from R: leveneTest in package car
  811. args = [g1, g2, g3, g4, g5, g6, g7, g8, g9, g10]
  812. args = [xp.asarray(arg) for arg in args]
  813. W, pval = stats.levene(*args, center="mean")
  814. xp_assert_close(W, xp.asarray(2.15945985647285))
  815. xp_assert_close(pval, xp.asarray(0.032236826559783))
  816. def test_trimmed1(self, xp):
  817. # Test that center='trimmed' gives the same result as center='mean'
  818. # when proportiontocut=0.
  819. args = (xp.asarray(g1), xp.asarray(g2), xp.asarray(g3))
  820. W1, pval1 = stats.levene(*args, center='mean')
  821. W2, pval2 = stats.levene(*args, center='trimmed', proportiontocut=0.0)
  822. xp_assert_close(W1, W2)
  823. xp_assert_close(pval1, pval2)
  824. def test_trimmed2(self, xp):
  825. # numbers from R: leveneTest in package car
  826. args = [g1, g2, g3, g4, g5, g6, g7, g8, g9, g10]
  827. args = [xp.asarray(arg) for arg in args]
  828. W, pval = stats.levene(*args, center="trimmed", proportiontocut=0.25)
  829. xp_assert_close(W, xp.asarray(2.07712845686874))
  830. xp_assert_close(pval, xp.asarray(0.0397269688035377))
  831. def test_equal_mean_median(self, xp):
  832. x = np.linspace(-1, 1, 21)
  833. rng = np.random.default_rng(4058827756)
  834. x2 = rng.permutation(x)
  835. y = x**3
  836. x, x2, y = xp.asarray(x), xp.asarray(x2), xp.asarray(y)
  837. W1, pval1 = stats.levene(x, y, center='mean')
  838. W2, pval2 = stats.levene(x2, y, center='median')
  839. xp_assert_close(W1, W2)
  840. xp_assert_close(pval1, pval2)
  841. def test_bad_center_value(self, xp):
  842. x = xp.linspace(-1, 1, 21)
  843. message = "center must be 'mean', 'median' or 'trimmed'."
  844. with pytest.raises(ValueError, match=message):
  845. stats.levene(x, x, center='trim')
  846. def test_too_few_args(self, xp):
  847. message = "Must provide at least two samples."
  848. with pytest.raises(ValueError, match=message):
  849. stats.levene(xp.asarray([1]))
  850. def test_result_attributes(self, xp):
  851. args = [g1, g2, g3, g4, g5, g6, g7, g8, g9, g10]
  852. args = [xp.asarray(arg) for arg in args]
  853. res = stats.levene(*args)
  854. attributes = ('statistic', 'pvalue')
  855. check_named_results(res, attributes, xp=xp)
  856. class TestBinomTest:
  857. """Tests for stats.binomtest."""
  858. # Expected results here are from R binom.test, e.g.
  859. # options(digits=16)
  860. # binom.test(484, 967, p=0.48)
  861. #
  862. def test_two_sided_pvalues1(self):
  863. # `tol` could be stricter on most architectures, but the value
  864. # here is limited by accuracy of `binom.cdf` for large inputs on
  865. # Linux_Python_37_32bit_full and aarch64
  866. rtol = 1e-10 # aarch64 observed rtol: 1.5e-11
  867. res = stats.binomtest(10079999, 21000000, 0.48)
  868. assert_allclose(res.pvalue, 1.0, rtol=rtol)
  869. res = stats.binomtest(10079990, 21000000, 0.48)
  870. assert_allclose(res.pvalue, 0.9966892187965, rtol=rtol)
  871. res = stats.binomtest(10080009, 21000000, 0.48)
  872. assert_allclose(res.pvalue, 0.9970377203856, rtol=rtol)
  873. res = stats.binomtest(10080017, 21000000, 0.48)
  874. assert_allclose(res.pvalue, 0.9940754817328, rtol=1e-9)
  875. def test_two_sided_pvalues2(self):
  876. rtol = 1e-10 # no aarch64 failure with 1e-15, preemptive bump
  877. res = stats.binomtest(9, n=21, p=0.48)
  878. assert_allclose(res.pvalue, 0.6689672431939, rtol=rtol)
  879. res = stats.binomtest(4, 21, 0.48)
  880. assert_allclose(res.pvalue, 0.008139563452106, rtol=rtol)
  881. res = stats.binomtest(11, 21, 0.48)
  882. assert_allclose(res.pvalue, 0.8278629664608, rtol=rtol)
  883. res = stats.binomtest(7, 21, 0.48)
  884. assert_allclose(res.pvalue, 0.1966772901718, rtol=rtol)
  885. res = stats.binomtest(3, 10, .5)
  886. assert_allclose(res.pvalue, 0.34375, rtol=rtol)
  887. res = stats.binomtest(2, 2, .4)
  888. assert_allclose(res.pvalue, 0.16, rtol=rtol)
  889. res = stats.binomtest(2, 4, .3)
  890. assert_allclose(res.pvalue, 0.5884, rtol=rtol)
  891. def test_edge_cases(self):
  892. rtol = 1e-10 # aarch64 observed rtol: 1.33e-15
  893. res = stats.binomtest(484, 967, 0.5)
  894. assert_allclose(res.pvalue, 1, rtol=rtol)
  895. res = stats.binomtest(3, 47, 3/47)
  896. assert_allclose(res.pvalue, 1, rtol=rtol)
  897. res = stats.binomtest(13, 46, 13/46)
  898. assert_allclose(res.pvalue, 1, rtol=rtol)
  899. res = stats.binomtest(15, 44, 15/44)
  900. assert_allclose(res.pvalue, 1, rtol=rtol)
  901. res = stats.binomtest(7, 13, 0.5)
  902. assert_allclose(res.pvalue, 1, rtol=rtol)
  903. res = stats.binomtest(6, 11, 0.5)
  904. assert_allclose(res.pvalue, 1, rtol=rtol)
  905. def test_binary_srch_for_binom_tst(self):
  906. # Test that old behavior of binomtest is maintained
  907. # by the new binary search method in cases where d
  908. # exactly equals the input on one side.
  909. n = 10
  910. p = 0.5
  911. k = 3
  912. # First test for the case where k > mode of PMF
  913. i = np.arange(np.ceil(p * n), n+1)
  914. d = stats.binom.pmf(k, n, p)
  915. # Old way of calculating y, probably consistent with R.
  916. y1 = np.sum(stats.binom.pmf(i, n, p) <= d, axis=0)
  917. # New way with binary search.
  918. ix = _binary_search_for_binom_tst(lambda x1:
  919. -stats.binom.pmf(x1, n, p),
  920. -d, np.ceil(p * n), n)
  921. y2 = n - ix + int(d == stats.binom.pmf(ix, n, p))
  922. assert_allclose(y1, y2, rtol=1e-9)
  923. # Now test for the other side.
  924. k = 7
  925. i = np.arange(np.floor(p * n) + 1)
  926. d = stats.binom.pmf(k, n, p)
  927. # Old way of calculating y.
  928. y1 = np.sum(stats.binom.pmf(i, n, p) <= d, axis=0)
  929. # New way with binary search.
  930. ix = _binary_search_for_binom_tst(lambda x1:
  931. stats.binom.pmf(x1, n, p),
  932. d, 0, np.floor(p * n))
  933. y2 = ix + 1
  934. assert_allclose(y1, y2, rtol=1e-9)
  935. # Expected results here are from R 3.6.2 binom.test
  936. @pytest.mark.parametrize('alternative, pval, ci_low, ci_high',
  937. [('less', 0.148831050443,
  938. 0.0, 0.2772002496709138),
  939. ('greater', 0.9004695898947,
  940. 0.1366613252458672, 1.0),
  941. ('two-sided', 0.2983720970096,
  942. 0.1266555521019559, 0.2918426890886281)])
  943. def test_confidence_intervals1(self, alternative, pval, ci_low, ci_high):
  944. res = stats.binomtest(20, n=100, p=0.25, alternative=alternative)
  945. assert_allclose(res.pvalue, pval, rtol=1e-12)
  946. assert_equal(res.statistic, 0.2)
  947. ci = res.proportion_ci(confidence_level=0.95)
  948. assert_allclose((ci.low, ci.high), (ci_low, ci_high), rtol=1e-12)
  949. # Expected results here are from R 3.6.2 binom.test.
  950. @pytest.mark.parametrize('alternative, pval, ci_low, ci_high',
  951. [('less',
  952. 0.005656361, 0.0, 0.1872093),
  953. ('greater',
  954. 0.9987146, 0.008860761, 1.0),
  955. ('two-sided',
  956. 0.01191714, 0.006872485, 0.202706269)])
  957. def test_confidence_intervals2(self, alternative, pval, ci_low, ci_high):
  958. res = stats.binomtest(3, n=50, p=0.2, alternative=alternative)
  959. assert_allclose(res.pvalue, pval, rtol=1e-6)
  960. assert_equal(res.statistic, 0.06)
  961. ci = res.proportion_ci(confidence_level=0.99)
  962. assert_allclose((ci.low, ci.high), (ci_low, ci_high), rtol=1e-6)
  963. # Expected results here are from R 3.6.2 binom.test.
  964. @pytest.mark.parametrize('alternative, pval, ci_high',
  965. [('less', 0.05631351, 0.2588656),
  966. ('greater', 1.0, 1.0),
  967. ('two-sided', 0.07604122, 0.3084971)])
  968. def test_confidence_interval_exact_k0(self, alternative, pval, ci_high):
  969. # Test with k=0, n = 10.
  970. res = stats.binomtest(0, 10, p=0.25, alternative=alternative)
  971. assert_allclose(res.pvalue, pval, rtol=1e-6)
  972. ci = res.proportion_ci(confidence_level=0.95)
  973. assert_equal(ci.low, 0.0)
  974. assert_allclose(ci.high, ci_high, rtol=1e-6)
  975. # Expected results here are from R 3.6.2 binom.test.
  976. @pytest.mark.parametrize('alternative, pval, ci_low',
  977. [('less', 1.0, 0.0),
  978. ('greater', 9.536743e-07, 0.7411344),
  979. ('two-sided', 9.536743e-07, 0.6915029)])
  980. def test_confidence_interval_exact_k_is_n(self, alternative, pval, ci_low):
  981. # Test with k = n = 10.
  982. res = stats.binomtest(10, 10, p=0.25, alternative=alternative)
  983. assert_allclose(res.pvalue, pval, rtol=1e-6)
  984. ci = res.proportion_ci(confidence_level=0.95)
  985. assert_equal(ci.high, 1.0)
  986. assert_allclose(ci.low, ci_low, rtol=1e-6)
  987. # Expected results are from the prop.test function in R 3.6.2.
  988. @pytest.mark.parametrize(
  989. 'k, alternative, corr, conf, ci_low, ci_high',
  990. [[3, 'two-sided', True, 0.95, 0.08094782, 0.64632928],
  991. [3, 'two-sided', True, 0.99, 0.0586329, 0.7169416],
  992. [3, 'two-sided', False, 0.95, 0.1077913, 0.6032219],
  993. [3, 'two-sided', False, 0.99, 0.07956632, 0.6799753],
  994. [3, 'less', True, 0.95, 0.0, 0.6043476],
  995. [3, 'less', True, 0.99, 0.0, 0.6901811],
  996. [3, 'less', False, 0.95, 0.0, 0.5583002],
  997. [3, 'less', False, 0.99, 0.0, 0.6507187],
  998. [3, 'greater', True, 0.95, 0.09644904, 1.0],
  999. [3, 'greater', True, 0.99, 0.06659141, 1.0],
  1000. [3, 'greater', False, 0.95, 0.1268766, 1.0],
  1001. [3, 'greater', False, 0.99, 0.08974147, 1.0],
  1002. [0, 'two-sided', True, 0.95, 0.0, 0.3445372],
  1003. [0, 'two-sided', False, 0.95, 0.0, 0.2775328],
  1004. [0, 'less', True, 0.95, 0.0, 0.2847374],
  1005. [0, 'less', False, 0.95, 0.0, 0.212942],
  1006. [0, 'greater', True, 0.95, 0.0, 1.0],
  1007. [0, 'greater', False, 0.95, 0.0, 1.0],
  1008. [10, 'two-sided', True, 0.95, 0.6554628, 1.0],
  1009. [10, 'two-sided', False, 0.95, 0.7224672, 1.0],
  1010. [10, 'less', True, 0.95, 0.0, 1.0],
  1011. [10, 'less', False, 0.95, 0.0, 1.0],
  1012. [10, 'greater', True, 0.95, 0.7152626, 1.0],
  1013. [10, 'greater', False, 0.95, 0.787058, 1.0]]
  1014. )
  1015. def test_ci_wilson_method(self, k, alternative, corr, conf,
  1016. ci_low, ci_high):
  1017. res = stats.binomtest(k, n=10, p=0.1, alternative=alternative)
  1018. if corr:
  1019. method = 'wilsoncc'
  1020. else:
  1021. method = 'wilson'
  1022. ci = res.proportion_ci(confidence_level=conf, method=method)
  1023. assert_allclose((ci.low, ci.high), (ci_low, ci_high), rtol=1e-6)
  1024. def test_estimate_equals_hypothesized_prop(self):
  1025. # Test the special case where the estimated proportion equals
  1026. # the hypothesized proportion. When alternative is 'two-sided',
  1027. # the p-value is 1.
  1028. res = stats.binomtest(4, 16, 0.25)
  1029. assert_equal(res.statistic, 0.25)
  1030. assert_equal(res.pvalue, 1.0)
  1031. @pytest.mark.parametrize('k, n', [(0, 0), (-1, 2)])
  1032. def test_invalid_k_n(self, k, n):
  1033. with pytest.raises(ValueError,
  1034. match="must be an integer not less than"):
  1035. stats.binomtest(k, n)
  1036. def test_invalid_k_too_big(self):
  1037. with pytest.raises(ValueError,
  1038. match=r"k \(11\) must not be greater than n \(10\)."):
  1039. stats.binomtest(11, 10, 0.25)
  1040. def test_invalid_k_wrong_type(self):
  1041. with pytest.raises(TypeError,
  1042. match="k must be an integer."):
  1043. stats.binomtest([10, 11], 21, 0.25)
  1044. def test_invalid_p_range(self):
  1045. message = r'p \(-0.5\) must be in range...'
  1046. with pytest.raises(ValueError, match=message):
  1047. stats.binomtest(50, 150, p=-0.5)
  1048. message = r'p \(1.5\) must be in range...'
  1049. with pytest.raises(ValueError, match=message):
  1050. stats.binomtest(50, 150, p=1.5)
  1051. def test_invalid_confidence_level(self):
  1052. res = stats.binomtest(3, n=10, p=0.1)
  1053. message = r"confidence_level \(-1\) must be in the interval"
  1054. with pytest.raises(ValueError, match=message):
  1055. res.proportion_ci(confidence_level=-1)
  1056. def test_invalid_ci_method(self):
  1057. res = stats.binomtest(3, n=10, p=0.1)
  1058. with pytest.raises(ValueError, match=r"method \('plate of shrimp'\) must be"):
  1059. res.proportion_ci(method="plate of shrimp")
  1060. def test_invalid_alternative(self):
  1061. with pytest.raises(ValueError, match=r"alternative \('ekki'\) not..."):
  1062. stats.binomtest(3, n=10, p=0.1, alternative='ekki')
  1063. def test_alias(self):
  1064. res = stats.binomtest(3, n=10, p=0.1)
  1065. assert_equal(res.proportion_estimate, res.statistic)
  1066. @pytest.mark.skipif(sys.maxsize <= 2**32, reason="32-bit does not overflow")
  1067. def test_boost_overflow_raises(self):
  1068. # Boost.Math error policy should raise exceptions in Python
  1069. with pytest.raises(OverflowError, match='Error in function...'):
  1070. stats.binomtest(5, 6, p=sys.float_info.min)
  1071. @make_xp_test_case(stats.fligner)
  1072. class TestFligner:
  1073. def _perturb(self, g, rng=124987234782812):
  1074. # g arrays have ties to which statistic is very sensitive; break them
  1075. rng = np.random.default_rng(rng)
  1076. return (np.asarray(g) + 1e-10 * rng.standard_normal(len(g))).tolist()
  1077. @pytest.mark.parametrize('dtype', [None, 'float32', 'float64'])
  1078. def test_data(self, dtype, xp):
  1079. if is_numpy(xp) and dtype == 'float32' and xp.__version__ < "2":
  1080. pytest.skip("Scalar dtypes only respected after NEP 50.")
  1081. # numbers from R: fligner.test in package stats
  1082. dtype = xp_default_dtype(xp) if dtype is None else getattr(xp, dtype)
  1083. x1 = xp.arange(5, dtype=dtype)
  1084. res = stats.fligner(x1, x1**2)
  1085. ref = (xp.asarray(3.2282229927203536, dtype=dtype),
  1086. xp.asarray(0.072379187848207877, dtype=dtype))
  1087. xp_assert_close(res[0], ref[0])
  1088. xp_assert_close(res[1], ref[1])
  1089. def test_trimmed1(self, xp):
  1090. # Perturb input to break ties in the transformed data
  1091. # See https://github.com/scipy/scipy/pull/8042 for more details
  1092. rng = np.random.default_rng(9952379681)
  1093. g1_ = xp.asarray(self._perturb(g1, rng=rng))
  1094. g2_ = xp.asarray(self._perturb(g2, rng=rng))
  1095. g3_ = xp.asarray(self._perturb(g3, rng=rng))
  1096. # Test that center='trimmed' gives the same result as center='mean'
  1097. # when proportiontocut=0.
  1098. Xsq1, pval1 = stats.fligner(g1_, g2_, g3_, center='mean')
  1099. Xsq2, pval2 = stats.fligner(g1_, g2_, g3_, center='trimmed',
  1100. proportiontocut=0.0)
  1101. xp_assert_close(Xsq1, Xsq2)
  1102. xp_assert_close(pval1, pval2)
  1103. @pytest.mark.skip_xp_backends(np_only=True,
  1104. reason="inconsistent tie-breaking across backends")
  1105. def test_trimmed_nonregression(self, xp):
  1106. # This is a non-regression test
  1107. # Expected results are *not* from an external gold standard,
  1108. # we're just making sure the results remain consistent
  1109. # in the future in case of changes
  1110. args = [g1, g2, g3, g4, g5, g6, g7, g8, g9, g10]
  1111. args = [xp.asarray(arg, dtype=xp.float64) for arg in args]
  1112. W, pval = stats.fligner(*args, center="trimmed", proportiontocut=0.25)
  1113. xp_assert_close(W, xp.asarray(15.953569890010614), rtol=5e-14)
  1114. xp_assert_close(pval, xp.asarray(0.06785752327432863), rtol=5e-14)
  1115. def test_trimmed_consistency(self, xp):
  1116. # Tests for consistency across multiple backends when ties are broken
  1117. rng = np.random.default_rng(4839206199)
  1118. args = [g1, g2, g3, g4, g5, g6, g7, g8, g9, g10]
  1119. args = [self._perturb(arg, rng=rng) for arg in args]
  1120. ref = stats.fligner(*args, center="trimmed", proportiontocut=0.25)
  1121. args = [xp.asarray(arg, dtype=xp.float64) for arg in args]
  1122. res = stats.fligner(*args, center="trimmed", proportiontocut=0.25)
  1123. xp_assert_close(res.statistic, xp.asarray(ref.statistic), rtol=5e-14)
  1124. xp_assert_close(res.pvalue, xp.asarray(ref.pvalue), rtol=5e-14)
  1125. def test_bad_center_value(self, xp):
  1126. x = xp.linspace(-1, 1, 21)
  1127. message = "center must be 'mean', 'median' or 'trimmed'."
  1128. with pytest.raises(ValueError, match=message):
  1129. stats.fligner(x, x, center='trim')
  1130. def test_bad_num_args(self, xp):
  1131. # Too few args raises ValueError.
  1132. message = "Must provide at least two samples."
  1133. with pytest.raises(ValueError, match=message):
  1134. stats.fligner(xp.asarray([1, 2]))
  1135. @pytest.mark.skip_xp_backends('jax.numpy', reason='lazy -> no _axis_nan_policy')
  1136. def test_empty_arg(self, xp):
  1137. x = xp.arange(5.)
  1138. with pytest.warns(SmallSampleWarning, match=too_small_1d_not_omit):
  1139. res = stats.fligner(x, x**2, xp.asarray([]))
  1140. xp_assert_equal(res.statistic, xp.asarray(xp.nan))
  1141. xp_assert_equal(res.pvalue, xp.asarray(xp.nan))
  1142. def mood_cases_with_ties():
  1143. # Generate random `x` and `y` arrays with ties both between and within the
  1144. # samples. Expected results are (statistic, pvalue) from SAS.
  1145. expected_results = [(-1.76658511464992, .0386488678399305),
  1146. (-.694031428192304, .2438312498647250),
  1147. (-1.15093525352151, .1248794365836150)]
  1148. seeds = [23453254, 1298352315, 987234597]
  1149. for si, seed in enumerate(seeds):
  1150. rng = np.random.default_rng(seed)
  1151. xy = rng.random(100)
  1152. # Generate random indices to make ties
  1153. tie_ind = rng.integers(low=0, high=99, size=5)
  1154. # Generate a random number of ties for each index.
  1155. num_ties_per_ind = rng.integers(low=1, high=5, size=5)
  1156. # At each `tie_ind`, mark the next `n` indices equal to that value.
  1157. for i, n in zip(tie_ind, num_ties_per_ind):
  1158. for j in range(i + 1, i + n):
  1159. xy[j] = xy[i]
  1160. # scramble order of xy before splitting into `x, y`
  1161. rng.shuffle(xy)
  1162. x, y = np.split(xy, 2)
  1163. yield x, y, 'less', *expected_results[si]
  1164. @make_xp_test_case(stats.mood)
  1165. class TestMood:
  1166. @pytest.mark.parametrize("x,y,alternative,stat_expect,p_expect",
  1167. mood_cases_with_ties())
  1168. def test_against_SAS(self, x, y, alternative, stat_expect, p_expect, xp):
  1169. """
  1170. Example code used to generate SAS output:
  1171. DATA myData;
  1172. INPUT X Y;
  1173. CARDS;
  1174. 1 0
  1175. 1 1
  1176. 1 2
  1177. 1 3
  1178. 1 4
  1179. 2 0
  1180. 2 1
  1181. 2 4
  1182. 2 9
  1183. 2 16
  1184. ods graphics on;
  1185. proc npar1way mood data=myData ;
  1186. class X;
  1187. ods output MoodTest=mt;
  1188. proc contents data=mt;
  1189. proc print data=mt;
  1190. format Prob1 17.16 Prob2 17.16 Statistic 17.16 Z 17.16 ;
  1191. title "Mood Two-Sample Test";
  1192. proc print data=myData;
  1193. title "Data for above results";
  1194. run;
  1195. """
  1196. x, y = xp.asarray(x.tolist()), xp.asarray(y.tolist())
  1197. statistic, pvalue = stats.mood(x, y, alternative=alternative)
  1198. xp_assert_close(statistic, xp.asarray(stat_expect), atol=1e-16)
  1199. xp_assert_close(pvalue, xp.asarray(p_expect), atol=1e-16)
  1200. @pytest.mark.parametrize("dtype", [None, 'float32', 'float64'])
  1201. @pytest.mark.parametrize("alternative, expected",
  1202. [('two-sided', (1.019938533549930,
  1203. .3077576129778760)),
  1204. ('less', (1.019938533549930,
  1205. 1 - .1538788064889380)),
  1206. ('greater', (1.019938533549930,
  1207. .1538788064889380))])
  1208. def test_against_SAS_2(self, dtype, alternative, expected, xp):
  1209. # Code to run in SAS in above function
  1210. if is_numpy(xp) and xp.__version__ < "2.0" and dtype == 'float32':
  1211. pytest.skip("Pre-NEP 50 doesn't respect dtypes")
  1212. dtype = xp_default_dtype(xp) if dtype is None else getattr(xp, dtype)
  1213. x = [111, 107, 100, 99, 102, 106, 109, 108, 104, 99,
  1214. 101, 96, 97, 102, 107, 113, 116, 113, 110, 98]
  1215. y = [107, 108, 106, 98, 105, 103, 110, 105, 104, 100,
  1216. 96, 108, 103, 104, 114, 114, 113, 108, 106, 99]
  1217. x, y = xp.asarray(x, dtype=dtype), xp.asarray(y, dtype=dtype)
  1218. res = stats.mood(x, y, alternative=alternative)
  1219. xp_assert_close(res.statistic, xp.asarray(expected[0], dtype=dtype))
  1220. xp_assert_close(res.pvalue, xp.asarray(expected[1], dtype=dtype))
  1221. @pytest.mark.skip_xp_backends('jax.numpy', reason='lazy -> no _axis_nan_policy')
  1222. def test_mood_order_of_args(self, xp):
  1223. # z should change sign when the order of arguments changes, pvalue
  1224. # should not change
  1225. rng = np.random.default_rng(4058827756)
  1226. x1 = xp.asarray(rng.standard_normal((10, 1)))
  1227. x2 = xp.asarray(rng.standard_normal((15, 1)))
  1228. z1, p1 = stats.mood(x1, x2)
  1229. z2, p2 = stats.mood(x2, x1)
  1230. xp_assert_close(z2, -z1)
  1231. xp_assert_close(p2, p1)
  1232. @pytest.mark.skip_xp_backends('jax.numpy', reason='lazy -> no _axis_nan_policy')
  1233. def test_mood_with_axis_none(self, xp):
  1234. # Test with axis = None, compare with results from R
  1235. x1 = [-0.626453810742332, 0.183643324222082, -0.835628612410047,
  1236. 1.59528080213779, 0.329507771815361, -0.820468384118015,
  1237. 0.487429052428485, 0.738324705129217, 0.575781351653492,
  1238. -0.305388387156356, 1.51178116845085, 0.389843236411431,
  1239. -0.621240580541804, -2.2146998871775, 1.12493091814311,
  1240. -0.0449336090152309, -0.0161902630989461, 0.943836210685299,
  1241. 0.821221195098089, 0.593901321217509]
  1242. x2 = [-0.896914546624981, 0.184849184646742, 1.58784533120882,
  1243. -1.13037567424629, -0.0802517565509893, 0.132420284381094,
  1244. 0.707954729271733, -0.23969802417184, 1.98447393665293,
  1245. -0.138787012119665, 0.417650750792556, 0.981752777463662,
  1246. -0.392695355503813, -1.03966897694891, 1.78222896030858,
  1247. -2.31106908460517, 0.878604580921265, 0.035806718015226,
  1248. 1.01282869212708, 0.432265154539617, 2.09081920524915,
  1249. -1.19992581964387, 1.58963820029007, 1.95465164222325,
  1250. 0.00493777682814261, -2.45170638784613, 0.477237302613617,
  1251. -0.596558168631403, 0.792203270299649, 0.289636710177348]
  1252. x1 = xp.reshape(xp.asarray(x1), (10, 2))
  1253. x2 = xp.reshape(xp.asarray(x2), (15, 2))
  1254. res = stats.mood(x1, x2, axis=None)
  1255. xp_assert_close(res.statistic, xp.asarray(-1.31716607555))
  1256. xp_assert_close(res.pvalue, xp.asarray(0.18778296257))
  1257. @pytest.mark.skip_xp_backends('jax.numpy', reason='lazy -> no _axis_nan_policy')
  1258. @pytest.mark.parametrize('rng_method, args', [('standard_normal', tuple()),
  1259. ('integers', (8,))])
  1260. def test_mood_2d(self, rng_method, args, xp):
  1261. # Test if the results of mood test in 2-D vectorized call are consistent
  1262. # result when looping over the slices.
  1263. ny = 5
  1264. rng = np.random.default_rng()
  1265. rng_method = getattr(rng, rng_method)
  1266. x1 = rng_method(*args, size=(10, ny))
  1267. x2 = rng_method(*args, size=(15, ny))
  1268. dtype = xp.float64
  1269. res = stats.mood(xp.asarray(x1, dtype=dtype), xp.asarray(x2, dtype=dtype))
  1270. for j in range(ny):
  1271. ref = stats.mood(x1[:, j], x2[:, j])
  1272. xp_assert_close(res.statistic[j], xp.asarray(ref.statistic))
  1273. xp_assert_close(res.pvalue[j], xp.asarray(ref.pvalue))
  1274. # inverse order of dimensions
  1275. x1 = x1.transpose()
  1276. x2 = x2.transpose()
  1277. res = stats.mood(xp.asarray(x1, dtype=dtype), xp.asarray(x2, dtype=dtype),
  1278. axis=1)
  1279. for i in range(ny):
  1280. # check axis handling is self consistent
  1281. ref = stats.mood(x1[i, :], x2[i, :])
  1282. xp_assert_close(res.statistic[i], xp.asarray(ref.statistic))
  1283. xp_assert_close(res.pvalue[i], xp.asarray(ref.pvalue))
  1284. @pytest.mark.skip_xp_backends('jax.numpy', reason='lazy -> no _axis_nan_policy')
  1285. @pytest.mark.parametrize('rng_method, args', [('standard_normal', tuple()),
  1286. ('integers', (8,))])
  1287. def test_mood_3d(self, rng_method, args, xp):
  1288. shape = (10, 5, 6)
  1289. rng = np.random.default_rng(3602349075)
  1290. rng_method = getattr(rng, rng_method)
  1291. x1 = xp.asarray(rng_method(*args, size=shape))
  1292. x2 = xp.asarray(rng_method(*args, size=shape))
  1293. for axis in range(3):
  1294. res = stats.mood(x1, x2, axis=axis)
  1295. # Tests that result for 3-D arrays is equal to that for the
  1296. # same calculation on a set of 1-D arrays taken from the
  1297. # 3-D array
  1298. axes_idx = ([1, 2], [0, 2], [0, 1]) # the two axes != axis
  1299. for i in range(shape[axes_idx[axis][0]]):
  1300. for j in range(shape[axes_idx[axis][1]]):
  1301. if axis == 0:
  1302. slice1 = x1[:, i, j]
  1303. slice2 = x2[:, i, j]
  1304. elif axis == 1:
  1305. slice1 = x1[i, :, j]
  1306. slice2 = x2[i, :, j]
  1307. else:
  1308. slice1 = x1[i, j, :]
  1309. slice2 = x2[i, j, :]
  1310. ref = stats.mood(slice1, slice2)
  1311. xp_assert_close(res.statistic[i, j], ref.statistic)
  1312. xp_assert_close(res.pvalue[i, j], ref.pvalue)
  1313. @pytest.mark.skip_xp_backends('jax.numpy', reason='lazy -> no _axis_nan_policy')
  1314. def test_mood_bad_arg(self, xp):
  1315. # Warns when the sum of the lengths of the args is less than 3
  1316. with pytest.warns(SmallSampleWarning, match=too_small_1d_not_omit):
  1317. res = stats.mood(xp.asarray([1.]), xp.asarray([]))
  1318. xp_assert_equal(res.statistic, xp.asarray(np.nan))
  1319. xp_assert_equal(res.pvalue, xp.asarray(np.nan))
  1320. @pytest.mark.parametrize("dtype", [None, 'float32', 'float64'])
  1321. def test_mood_alternative(self, dtype, xp):
  1322. if is_numpy(xp) and xp.__version__ < "2.0" and dtype == 'float32':
  1323. pytest.skip("Pre-NEP 50 doesn't respect dtypes")
  1324. dtype = xp_default_dtype(xp) if dtype is None else getattr(xp, dtype)
  1325. rng = np.random.RandomState(0)
  1326. x = stats.norm.rvs(scale=0.75, size=100, random_state=rng)
  1327. y = stats.norm.rvs(scale=1.25, size=100, random_state=rng)
  1328. x, y = xp.asarray(x, dtype=dtype), xp.asarray(y, dtype=dtype)
  1329. stat1, p1 = stats.mood(x, y, alternative='two-sided')
  1330. stat2, p2 = stats.mood(x, y, alternative='less')
  1331. stat3, p3 = stats.mood(x, y, alternative='greater')
  1332. assert stat1 == stat2 == stat3
  1333. xp_assert_close(p1, xp.asarray(0., dtype=dtype), atol=1e-7)
  1334. xp_assert_close(p2, xp.asarray(p1/2, dtype=dtype))
  1335. xp_assert_close(p3, xp.asarray(1 - p1/2, dtype=dtype))
  1336. with pytest.raises(ValueError, match="`alternative` must be..."):
  1337. stats.mood(x, y, alternative='ekki-ekki')
  1338. class TestProbplot:
  1339. def test_basic(self):
  1340. x = stats.norm.rvs(size=20, random_state=12345)
  1341. osm, osr = stats.probplot(x, fit=False)
  1342. osm_expected = [-1.8241636, -1.38768012, -1.11829229, -0.91222575,
  1343. -0.73908135, -0.5857176, -0.44506467, -0.31273668,
  1344. -0.18568928, -0.06158146, 0.06158146, 0.18568928,
  1345. 0.31273668, 0.44506467, 0.5857176, 0.73908135,
  1346. 0.91222575, 1.11829229, 1.38768012, 1.8241636]
  1347. assert_allclose(osr, np.sort(x))
  1348. assert_allclose(osm, osm_expected)
  1349. res, res_fit = stats.probplot(x, fit=True)
  1350. res_fit_expected = [1.05361841, 0.31297795, 0.98741609]
  1351. assert_allclose(res_fit, res_fit_expected)
  1352. def test_sparams_keyword(self):
  1353. x = stats.norm.rvs(size=100, random_state=123456)
  1354. # Check that None, () and 0 (loc=0, for normal distribution) all work
  1355. # and give the same results
  1356. osm1, osr1 = stats.probplot(x, sparams=None, fit=False)
  1357. osm2, osr2 = stats.probplot(x, sparams=0, fit=False)
  1358. osm3, osr3 = stats.probplot(x, sparams=(), fit=False)
  1359. assert_allclose(osm1, osm2)
  1360. assert_allclose(osm1, osm3)
  1361. assert_allclose(osr1, osr2)
  1362. assert_allclose(osr1, osr3)
  1363. # Check giving (loc, scale) params for normal distribution
  1364. osm, osr = stats.probplot(x, sparams=(), fit=False)
  1365. def test_dist_keyword(self):
  1366. x = stats.norm.rvs(size=20, random_state=12345)
  1367. osm1, osr1 = stats.probplot(x, fit=False, dist='t', sparams=(3,))
  1368. osm2, osr2 = stats.probplot(x, fit=False, dist=stats.t, sparams=(3,))
  1369. assert_allclose(osm1, osm2)
  1370. assert_allclose(osr1, osr2)
  1371. assert_raises(ValueError, stats.probplot, x, dist='wrong-dist-name')
  1372. assert_raises(AttributeError, stats.probplot, x, dist=[])
  1373. class custom_dist:
  1374. """Some class that looks just enough like a distribution."""
  1375. def ppf(self, q):
  1376. return stats.norm.ppf(q, loc=2)
  1377. osm1, osr1 = stats.probplot(x, sparams=(2,), fit=False)
  1378. osm2, osr2 = stats.probplot(x, dist=custom_dist(), fit=False)
  1379. assert_allclose(osm1, osm2)
  1380. assert_allclose(osr1, osr2)
  1381. @pytest.mark.skipif(not have_matplotlib, reason="no matplotlib")
  1382. def test_plot_kwarg(self):
  1383. fig = plt.figure()
  1384. fig.add_subplot(111)
  1385. x = stats.t.rvs(3, size=100, random_state=7654321)
  1386. res1, fitres1 = stats.probplot(x, plot=plt)
  1387. plt.close()
  1388. res2, fitres2 = stats.probplot(x, plot=None)
  1389. res3 = stats.probplot(x, fit=False, plot=plt)
  1390. plt.close()
  1391. res4 = stats.probplot(x, fit=False, plot=None)
  1392. # Check that results are consistent between combinations of `fit` and
  1393. # `plot` keywords.
  1394. assert_(len(res1) == len(res2) == len(res3) == len(res4) == 2)
  1395. assert_allclose(res1, res2)
  1396. assert_allclose(res1, res3)
  1397. assert_allclose(res1, res4)
  1398. assert_allclose(fitres1, fitres2)
  1399. # Check that a Matplotlib Axes object is accepted
  1400. fig = plt.figure()
  1401. ax = fig.add_subplot(111)
  1402. stats.probplot(x, fit=False, plot=ax)
  1403. plt.close()
  1404. def test_probplot_bad_args(self):
  1405. # Raise ValueError when given an invalid distribution.
  1406. assert_raises(ValueError, stats.probplot, [1], dist="plate_of_shrimp")
  1407. def test_empty(self):
  1408. assert_equal(stats.probplot([], fit=False),
  1409. (np.array([]), np.array([])))
  1410. assert_equal(stats.probplot([], fit=True),
  1411. ((np.array([]), np.array([])),
  1412. (np.nan, np.nan, 0.0)))
  1413. def test_array_of_size_one(self):
  1414. message = "One or more sample arguments is too small..."
  1415. with (np.errstate(invalid='ignore'),
  1416. pytest.warns(SmallSampleWarning, match=message)):
  1417. assert_equal(stats.probplot([1], fit=True),
  1418. ((np.array([0.]), np.array([1])),
  1419. (np.nan, np.nan, np.nan)))
  1420. @make_xp_test_case(stats.wilcoxon)
  1421. class TestWilcoxon:
  1422. def test_wilcoxon_bad_arg(self, xp):
  1423. # Raise ValueError when two args of different lengths are given or
  1424. # zero_method is unknown.
  1425. x = xp.asarray([1, 2])
  1426. d = xp.asarray([1]*10)
  1427. message = "`zero_method` must be one of..."
  1428. with pytest.raises(ValueError, match=message):
  1429. stats.wilcoxon(x, x, "dummy...")
  1430. message = "`alternative` must be one of"
  1431. with pytest.raises(ValueError, match=message):
  1432. stats.wilcoxon(x, x, alternative="dummy")
  1433. message = "`method` must be one of..."
  1434. with pytest.raises(ValueError, match=message):
  1435. stats.wilcoxon(d, method="xyz")
  1436. def test_zero_diff(self, xp):
  1437. x = xp.arange(20)
  1438. # pratt and wilcox do not work if x - y == 0 and method == "asymptotic"
  1439. # => warning may be emitted and p-value is nan
  1440. with np.errstate(invalid="ignore"):
  1441. w, p = stats.wilcoxon(x, x, "wilcox", method="asymptotic")
  1442. xp_assert_equal(w, xp.asarray(0.0))
  1443. xp_assert_equal(p, xp.asarray(xp.nan))
  1444. w, p = stats.wilcoxon(x, x, "pratt", method="asymptotic")
  1445. xp_assert_equal(w, xp.asarray(0.0))
  1446. xp_assert_equal(p, xp.asarray(xp.nan))
  1447. # ranksum is n*(n+1)/2, split in half if zero_method == "zsplit"
  1448. w, p = stats.wilcoxon(x, x, "zsplit", method="asymptotic")
  1449. xp_assert_equal(w, xp.asarray(20*21/4))
  1450. xp_assert_equal(p, xp.asarray(1.0))
  1451. def test_pratt(self, xp):
  1452. # regression test for gh-6805: p-value matches value from R package
  1453. # coin (wilcoxsign_test) reported in the issue
  1454. x = xp.asarray([1, 2, 3, 4])
  1455. y = xp.asarray([1, 2, 3, 5])
  1456. res = stats.wilcoxon(x, y, zero_method="pratt", method="asymptotic",
  1457. correction=False)
  1458. xp_assert_close(res.statistic, xp.asarray(0.0))
  1459. xp_assert_close(res.pvalue, xp.asarray(0.31731050786291415))
  1460. def test_wilcoxon_arg_type(self):
  1461. # Should be able to accept list as arguments.
  1462. # Address issue 6070.
  1463. arr = [1, 2, 3, 0, -1, 3, 1, 2, 1, 1, 2]
  1464. _ = stats.wilcoxon(arr, zero_method="pratt", method="asymptotic")
  1465. _ = stats.wilcoxon(arr, zero_method="zsplit", method="asymptotic")
  1466. _ = stats.wilcoxon(arr, zero_method="wilcox", method="asymptotic")
  1467. def test_accuracy_wilcoxon(self, xp):
  1468. freq = [1, 4, 16, 15, 8, 4, 5, 1, 2]
  1469. nums = range(-4, 5)
  1470. x = xp.asarray(np.concatenate([[u] * v for u, v in zip(nums, freq)]))
  1471. y = xp.zeros_like(x)
  1472. T, p = stats.wilcoxon(x, y, "pratt", method="asymptotic",
  1473. correction=False)
  1474. xp_assert_close(T, xp.asarray(423.))
  1475. xp_assert_close(p, xp.asarray(0.0031724568006762576))
  1476. T, p = stats.wilcoxon(x, y, "zsplit", method="asymptotic",
  1477. correction=False)
  1478. xp_assert_equal(T, xp.asarray(441.))
  1479. xp_assert_close(p, xp.asarray(0.0032145343172473055))
  1480. T, p = stats.wilcoxon(x, y, "wilcox", method="asymptotic",
  1481. correction=False)
  1482. xp_assert_equal(T, xp.asarray(327.))
  1483. xp_assert_close(p, xp.asarray(0.00641346115861))
  1484. # Test the 'correction' option, using values computed in R with:
  1485. # > options(digits=16)
  1486. # > wilcox.test(x, y, paired=TRUE, exact=FALSE, correct={FALSE,TRUE})
  1487. x = xp.asarray([120, 114, 181, 188, 180, 146, 121, 191, 132, 113, 127, 112])
  1488. y = xp.asarray([133, 143, 119, 189, 112, 199, 198, 113, 115, 121, 142, 187])
  1489. T, p = stats.wilcoxon(x, y, correction=False, method="asymptotic")
  1490. xp_assert_equal(T, xp.asarray(34.))
  1491. xp_assert_close(p, xp.asarray(0.6948866023724735))
  1492. T, p = stats.wilcoxon(x, y, correction=True, method="asymptotic")
  1493. xp_assert_equal(T, xp.asarray(34.))
  1494. xp_assert_close(p, xp.asarray(0.7240816609153895))
  1495. @pytest.mark.parametrize("kwarg",
  1496. [{"method": "approx"}, {"mode": "approx"}, {"mode": "asymptotic"}])
  1497. def test_approx_mode(self, kwarg, xp):
  1498. # Check that `mode` is still an alias of keyword `method`,
  1499. # and `"approx"` is still an alias of argument `"asymptotic"`
  1500. x = xp.asarray([3, 5, 23, 7, 243, 58, 98, 2, 8, -3, 9, 11])
  1501. y = xp.asarray([2, -2, 1, 23, 0, 5, 12, 18, 99, 12, 17, 27])
  1502. res = stats.wilcoxon(x, y, "wilcox", **kwarg)
  1503. ref = stats.wilcoxon(x, y, "wilcox", method="asymptotic")
  1504. xp_assert_equal(res.statistic, ref.statistic)
  1505. xp_assert_equal(res.pvalue, ref.pvalue)
  1506. def test_wilcoxon_result_attributes(self, xp):
  1507. x = xp.asarray([120, 114, 181, 188, 180, 146, 121, 191, 132, 113, 127, 112])
  1508. y = xp.asarray([133, 143, 119, 189, 112, 199, 198, 113, 115, 121, 142, 187])
  1509. res = stats.wilcoxon(x, y, correction=False, method="asymptotic")
  1510. attributes = ('statistic', 'pvalue')
  1511. check_named_results(res, attributes, xp=xp)
  1512. def test_wilcoxon_has_zstatistic(self, xp):
  1513. rng = np.random.default_rng(89426135444)
  1514. x, y = rng.random(15), rng.random(15)
  1515. x, y = xp.asarray(x), xp.asarray(y)
  1516. res = stats.wilcoxon(x, y, method="asymptotic")
  1517. ref = special.ndtri(res.pvalue/2)
  1518. xp_assert_close(res.zstatistic, ref)
  1519. res = stats.wilcoxon(x, y, method="exact")
  1520. assert not hasattr(res, 'zstatistic')
  1521. res = stats.wilcoxon(x, y)
  1522. assert not hasattr(res, 'zstatistic')
  1523. def test_wilcoxon_tie(self, xp):
  1524. # Regression test for gh-2391.
  1525. # Corresponding R code is:
  1526. # > result = wilcox.test(rep(0.1, 10), exact=FALSE, correct=FALSE)
  1527. # > result$p.value
  1528. # [1] 0.001565402258002551
  1529. # > result = wilcox.test(rep(0.1, 10), exact=FALSE, correct=TRUE)
  1530. # > result$p.value
  1531. # [1] 0.00190419504300439
  1532. d = xp.asarray([0.1] * 10)
  1533. expected_stat = xp.asarray(0.0)
  1534. stat, p = stats.wilcoxon(d, method="asymptotic", correction=False)
  1535. expected_p = xp.asarray(0.001565402258002551)
  1536. xp_assert_equal(stat, expected_stat)
  1537. xp_assert_close(p, expected_p)
  1538. stat, p = stats.wilcoxon(d, method="asymptotic", correction=True)
  1539. expected_p = xp.asarray(0.00190419504300439)
  1540. xp_assert_equal(stat, expected_stat)
  1541. xp_assert_close(p, expected_p)
  1542. @pytest.mark.parametrize('dtype', [None, 'float32', 'float64'])
  1543. def test_onesided(self, dtype, xp):
  1544. # tested against "R version 4.0.3 (2020-10-10)"
  1545. #
  1546. # x <- c(125, 115, 130, 140, 140, 115, 140, 125, 140, 135)
  1547. # y <- c(110, 122, 125, 120, 140, 124, 123, 137, 135, 145)
  1548. # cfg <- list(x = x, y = y, paired = TRUE, exact = FALSE)
  1549. # do.call(wilcox.test, c(cfg, list(alternative = "less", correct = FALSE)))
  1550. # do.call(wilcox.test, c(cfg, list(alternative = "less", correct = TRUE)))
  1551. # do.call(wilcox.test, c(cfg, list(alternative = "greater", correct = FALSE)))
  1552. # do.call(wilcox.test, c(cfg, list(alternative = "greater", correct = TRUE)))
  1553. if is_numpy(xp) and xp.__version__ < "2.0" and dtype == 'float32':
  1554. pytest.skip("dtypes not preserved with pre-NEP 50 rules")
  1555. dtype = dtype if dtype is None else getattr(xp, dtype)
  1556. x = xp.asarray([125, 115, 130, 140, 140, 115, 140, 125, 140, 135], dtype=dtype)
  1557. y = xp.asarray([110, 122, 125, 120, 140, 124, 123, 137, 135, 145], dtype=dtype)
  1558. w_ref = xp.asarray(27.0, dtype=dtype)
  1559. w, p = stats.wilcoxon(x, y, alternative="less", method="asymptotic",
  1560. correction=False)
  1561. xp_assert_equal(w, w_ref)
  1562. xp_assert_close(p, xp.asarray(0.7031847042787, dtype=dtype))
  1563. w, p = stats.wilcoxon(x, y, alternative="less", correction=True,
  1564. method="asymptotic")
  1565. xp_assert_equal(w, w_ref)
  1566. xp_assert_close(p, xp.asarray(0.72336564289, dtype=dtype))
  1567. w, p = stats.wilcoxon(x, y, alternative="greater",
  1568. method="asymptotic", correction=False)
  1569. xp_assert_equal(w, w_ref)
  1570. xp_assert_close(p, xp.asarray(0.2968152957213, dtype=dtype))
  1571. w, p = stats.wilcoxon(x, y, alternative="greater",
  1572. correction=True, method="asymptotic")
  1573. xp_assert_equal(w, w_ref)
  1574. xp_assert_close(p, xp.asarray(0.3176446594176, dtype=dtype))
  1575. def test_exact_basic(self): # exact distribution is NumPy-only
  1576. for n in range(1, 51):
  1577. pmf1 = _get_wilcoxon_distr(n)
  1578. pmf2 = _get_wilcoxon_distr2(n)
  1579. assert_equal(n*(n+1)/2 + 1, len(pmf1))
  1580. assert_equal(sum(pmf1), 1)
  1581. assert_array_almost_equal(pmf1, pmf2)
  1582. @pytest.mark.parametrize('dtype', [None, 'float32', 'float64'])
  1583. def test_exact_pval(self, dtype, xp):
  1584. # expected values computed with "R version 4.0.3 (2020-10-10)"
  1585. # options(digits=16)
  1586. # x < - c(1.81, 0.82, 1.56, -0.48, 0.81, 1.28, -1.04, 0.23, -0.75, 0.14)
  1587. # y < - c(0.71, 0.65, -0.2, 0.85, -1.1, -0.45, -0.84, -0.24, -0.68, -0.76)
  1588. # result = wilcox.test(x - y, exact=TRUE, alternative='l', correct=TRUE)
  1589. # result$p.value
  1590. dtype = dtype if dtype is None else getattr(xp, dtype)
  1591. x = xp.asarray([1.81, 0.82, 1.56, -0.48, 0.81, 1.28, -1.04, 0.23,
  1592. -0.75, 0.14], dtype=dtype)
  1593. y = xp.asarray([0.71, 0.65, -0.2, 0.85, -1.1, -0.45, -0.84, -0.24,
  1594. -0.68, -0.76], dtype=dtype)
  1595. _, p = stats.wilcoxon(x, y, alternative="two-sided", method="exact")
  1596. xp_assert_close(p, xp.asarray(0.10546875, dtype=dtype))
  1597. _, p = stats.wilcoxon(x, y, alternative="less", method="exact")
  1598. xp_assert_close(p, xp.asarray(0.95800781256, dtype=dtype))
  1599. _, p = stats.wilcoxon(x, y, alternative="greater", method="exact")
  1600. xp_assert_close(p, xp.asarray(0.052734375, dtype=dtype))
  1601. x = xp.arange(0., 20., dtype=dtype) + 0.5
  1602. y = xp.arange(20., 0., -1., dtype=dtype)
  1603. _, p = stats.wilcoxon(x, y, alternative="two-sided", method="exact")
  1604. xp_assert_close(p, xp.asarray(0.8694877624511719, dtype=dtype))
  1605. _, p = stats.wilcoxon(x, y, alternative="less", method="exact")
  1606. xp_assert_close(p, xp.asarray(0.4347438812255859, dtype=dtype))
  1607. _, p = stats.wilcoxon(x, y, alternative="greater", method="exact")
  1608. xp_assert_close(p, xp.asarray(0.5795888900756836, dtype=dtype))
  1609. # These inputs were chosen to give a W statistic that is either the
  1610. # center of the distribution (when the length of the support is odd), or
  1611. # the value to the left of the center (when the length of the support is
  1612. # even). Also, the numbers are chosen so that the W statistic is the
  1613. # sum of the positive values.
  1614. @pytest.mark.parametrize('x', [[-1, -2, 3],
  1615. [-1, 2, -3, -4, 5],
  1616. [-1, -2, 3, -4, -5, -6, 7, 8]])
  1617. def test_exact_p_1(self, x, xp):
  1618. w, p = stats.wilcoxon(xp.asarray(x))
  1619. x = np.array(x)
  1620. wtrue = x[x > 0].sum()
  1621. xp_assert_equal(w, xp.asarray(float(wtrue)))
  1622. xp_assert_equal(p, xp.asarray(1.0))
  1623. def test_auto(self, xp):
  1624. # auto default to exact if there are no ties and n <= 50
  1625. x = xp.arange(0., 50.) + 0.5
  1626. y = xp.arange(50., 0., -1.)
  1627. xp_assert_equal(stats.wilcoxon(x, y).pvalue,
  1628. stats.wilcoxon(x, y, method="exact").pvalue)
  1629. if is_numpy(xp): # PermutationMethod is NumPy-only until gh-23772 merges
  1630. # n <= 50: if there are zeros in d = x-y, use PermutationMethod
  1631. pm = stats.PermutationMethod()
  1632. d = np.arange(-2, 5)
  1633. w, p = stats.wilcoxon(d)
  1634. # rerunning the test gives the same results since n_resamples
  1635. # is large enough to get deterministic results if n <= 13
  1636. # so we do not need to use a seed. to avoid longer runtimes of the
  1637. # test, use n=7 only. For n=13, see test_auto_permutation_edge_case
  1638. assert_equal((w, p), stats.wilcoxon(d, method=pm))
  1639. # for larger vectors (n > 13) with ties/zeros, use asymptotic test
  1640. d = xp.arange(-5, 9) # zero
  1641. _, p = stats.wilcoxon(d)
  1642. xp_assert_equal(stats.wilcoxon(d, method="asymptotic").pvalue, xp.asarray(p))
  1643. d = xpx.at(d)[d == 0].set(1) # tie
  1644. _, p = stats.wilcoxon(d)
  1645. xp_assert_equal(stats.wilcoxon(d, method="asymptotic").pvalue, xp.asarray(p))
  1646. # use approximation for samples > 50
  1647. d = xp.arange(1, 52)
  1648. _, p = stats.wilcoxon(d)
  1649. xp_assert_equal(stats.wilcoxon(d, method="asymptotic").pvalue, xp.asarray(p))
  1650. @pytest.mark.xslow
  1651. @pytest.mark.skip_xp_backends(np_only=True)
  1652. def test_auto_permutation_edge_case(self, xp):
  1653. # Check that `PermutationMethod()` is used and results are deterministic when
  1654. # `method='auto'`, there are zeros or ties in `d = x-y`, and `len(d) <= 13`.
  1655. d = np.arange(-5, 8) # zero
  1656. res = stats.wilcoxon(xp.asarray(d))
  1657. # generated with stats.wilcoxon(d, method=PermutationMethod())
  1658. w, p = xp.asarray([27.5, 0.3955078125])
  1659. xp_assert_equal(res.statistic, w)
  1660. xp_assert_equal(res.pvalue, p)
  1661. d[d == 0] = 1 # tie
  1662. res = stats.wilcoxon(xp.asarray(d))
  1663. # generated with stats.wilcoxon(d, method=PermutationMethod())
  1664. w, p = xp.asarray([32, 0.3779296875])
  1665. xp_assert_equal(res.statistic, w)
  1666. xp_assert_equal(res.pvalue, p)
  1667. @pytest.mark.parametrize('size', [3, 5, 10])
  1668. @pytest.mark.skip_xp_backends(np_only=True)
  1669. def test_permutation_method(self, size, xp):
  1670. rng = np.random.default_rng(92348034828501345)
  1671. x = xp.asarray(rng.random(size=size))
  1672. res = stats.wilcoxon(x, method=stats.PermutationMethod())
  1673. ref = stats.wilcoxon(x, method='exact')
  1674. xp_assert_equal(res.statistic, ref.statistic)
  1675. xp_assert_equal(res.pvalue, ref.pvalue)
  1676. x = xp.asarray(rng.random(size=size*10))
  1677. rng = np.random.default_rng(59234803482850134)
  1678. pm = stats.PermutationMethod(n_resamples=99, rng=rng)
  1679. ref = stats.wilcoxon(x, method=pm)
  1680. # preserve use of old random_state during SPEC 7 transition
  1681. rng = np.random.default_rng(59234803482850134)
  1682. pm = stats.PermutationMethod(n_resamples=99, random_state=rng)
  1683. res = stats.wilcoxon(x, method=pm)
  1684. xp_assert_equal(xp.round(res.pvalue, 2), res.pvalue) # n_resamples used
  1685. xp_assert_equal(res.pvalue, ref.pvalue) # rng/random_state used
  1686. def test_method_auto_nan_propagate_ND_length_gt_50_gh20591(self, xp):
  1687. # When method!='asymptotic', nan_policy='propagate', and a slice of
  1688. # a >1 dimensional array input contained NaN, the result object of
  1689. # `wilcoxon` could (under yet other conditions) return `zstatistic`
  1690. # for some slices but not others. This resulted in an error because
  1691. # `apply_along_axis` would have to create a ragged array.
  1692. # Check that this is resolved.
  1693. rng = np.random.default_rng(235889269872456)
  1694. A = rng.normal(size=(51, 2)) # length along slice > exact threshold
  1695. A[5, 1] = np.nan
  1696. A = xp.asarray(A)
  1697. res = stats.wilcoxon(A)
  1698. ref = stats.wilcoxon(A, method='asymptotic')
  1699. xp_assert_close(res.statistic, ref.statistic)
  1700. xp_assert_close(res.pvalue, ref.pvalue)
  1701. assert hasattr(ref, 'zstatistic')
  1702. assert not hasattr(res, 'zstatistic')
  1703. @pytest.mark.parametrize('method', ['exact', 'asymptotic'])
  1704. def test_symmetry_gh19872_gh20752(self, method, xp):
  1705. # Check that one-sided exact tests obey required symmetry. Bug reported
  1706. # in gh-19872 and again in gh-20752; example from gh-19872 is more concise:
  1707. var1 = xp.asarray([62, 66, 61, 68, 74, 62, 68, 62, 55, 59])
  1708. var2 = xp.asarray([71, 71, 69, 61, 75, 71, 77, 72, 62, 65])
  1709. ref = stats.wilcoxon(var1, var2, alternative='less', method=method)
  1710. res = stats.wilcoxon(var2, var1, alternative='greater', method=method)
  1711. max_statistic = var1.shape[0] * (var1.shape[0] + 1) / 2
  1712. assert int(res.statistic) != res.statistic
  1713. xp_assert_close(max_statistic - res.statistic, ref.statistic)
  1714. xp_assert_close(res.pvalue, ref.pvalue)
  1715. @pytest.mark.parametrize("method", ('exact', stats.PermutationMethod()))
  1716. def test_all_zeros_exact(self, method, xp):
  1717. # previously, this raised a RuntimeWarning when calculating Z, even
  1718. # when the Z value was not needed. Confirm that this no longer
  1719. # occurs when `method` is 'exact' or a `PermutationMethod`.
  1720. if method != "exact":
  1721. pytest.skip("PermutationMethod is NumPy-only until gh-23772 merges")
  1722. res = stats.wilcoxon(xp.zeros(5), method=method)
  1723. xp_assert_close(res.statistic, xp.asarray(0.))
  1724. xp_assert_close(res.pvalue, xp.asarray(1.))
  1725. @pytest.mark.skip_xp_backends('jax.numpy', reason='lazy->limited input validation')
  1726. def test_wilcoxon_axis_broadcasting_errors_gh22051(self, xp):
  1727. # In previous versions of SciPy, `wilcoxon` gave an incorrect error
  1728. # message when `AxisError` was not found in the base NumPy namespace.
  1729. # Check that this is resolved with and without the ANP decorator.
  1730. x = xp.asarray([1, 2, 3])
  1731. y = xp.asarray([4, 5, 6])
  1732. message = "Array shapes are incompatible for broadcasting."
  1733. with pytest.raises(ValueError, match=message):
  1734. stats.wilcoxon(x, y[:-1])
  1735. if not is_numpy(xp):
  1736. return # hard to generalize the rest
  1737. message = "operands could not be broadcast together with..."
  1738. with pytest.raises(ValueError, match=message):
  1739. stats.wilcoxon(x, y[:-1], _no_deco=True)
  1740. AxisError = getattr(np, 'AxisError', None) or np.exceptions.AxisError
  1741. message = "source: axis 3 is out of bounds for array of dimension 1"
  1742. with pytest.raises(AxisError, match=message):
  1743. stats.wilcoxon(x, y, axis=3)
  1744. message = "`axis` must be compatible with the shape..."
  1745. with pytest.raises(AxisError, match=message):
  1746. stats.wilcoxon(x, y, axis=3, _no_deco=True)
  1747. # data for k-statistics tests from
  1748. # https://cran.r-project.org/web/packages/kStatistics/kStatistics.pdf
  1749. # see nKS "Examples"
  1750. x_kstat = [16.34, 10.76, 11.84, 13.55, 15.85, 18.20, 7.51, 10.22, 12.52, 14.68,
  1751. 16.08, 19.43, 8.12, 11.20, 12.95, 14.77, 16.83, 19.80, 8.55, 11.58,
  1752. 12.10, 15.02, 16.83, 16.98, 19.92, 9.47, 11.68, 13.41, 15.35, 19.11]
  1753. @make_xp_test_case(stats.kstat)
  1754. class TestKstat:
  1755. def test_moments_normal_distribution(self, xp):
  1756. rng = np.random.RandomState(32149)
  1757. data = xp.asarray(rng.randn(12345), dtype=xp.float64)
  1758. moments = xp.stack([stats.kstat(data, n) for n in [1, 2, 3, 4]])
  1759. expected = xp.asarray([0.011315, 1.017931, 0.05811052, 0.0754134],
  1760. dtype=data.dtype)
  1761. xp_assert_close(moments, expected, rtol=1e-4)
  1762. # test equivalence with `stats.moment`
  1763. m1 = stats.moment(data, order=1)
  1764. m2 = stats.moment(data, order=2)
  1765. m3 = stats.moment(data, order=3)
  1766. xp_assert_close(xp.stack((m1, m2, m3)), expected[:-1], atol=0.02, rtol=1e-2)
  1767. @pytest.mark.filterwarnings("ignore:invalid value encountered") # Dask
  1768. def test_empty_input(self, xp):
  1769. with eager_warns(SmallSampleWarning, match=too_small_1d_not_omit, xp=xp):
  1770. res = stats.kstat(xp.asarray([]))
  1771. xp_assert_equal(res, xp.asarray(xp.nan))
  1772. def test_nan_input(self, xp):
  1773. data = xp.arange(10.)
  1774. data = xp.where(data == 6, xp.nan, data)
  1775. xp_assert_equal(stats.kstat(data), xp.asarray(xp.nan))
  1776. @pytest.mark.parametrize('n', [0, 4.001])
  1777. def test_kstat_bad_arg(self, n, xp):
  1778. # Raise ValueError if n > 4 or n < 1.
  1779. data = xp.arange(10)
  1780. message = 'k-statistics only supported for 1<=n<=4'
  1781. with pytest.raises(ValueError, match=message):
  1782. stats.kstat(data, n=n)
  1783. @pytest.mark.parametrize('case', [(1, 14.02166666666667),
  1784. (2, 12.65006954022974),
  1785. (3, -1.447059503280798),
  1786. (4, -141.6682291883626)])
  1787. def test_against_R(self, case, xp):
  1788. # Test against reference values computed with R kStatistics, e.g.
  1789. # options(digits=16)
  1790. # library(kStatistics)
  1791. # data <-c (16.34, 10.76, 11.84, 13.55, 15.85, 18.20, 7.51, 10.22,
  1792. # 12.52, 14.68, 16.08, 19.43, 8.12, 11.20, 12.95, 14.77,
  1793. # 16.83, 19.80, 8.55, 11.58, 12.10, 15.02, 16.83, 16.98,
  1794. # 19.92, 9.47, 11.68, 13.41, 15.35, 19.11)
  1795. # nKS(4, data)
  1796. n, ref = case
  1797. res = stats.kstat(xp.asarray(x_kstat), n)
  1798. xp_assert_close(res, xp.asarray(ref))
  1799. @make_xp_test_case(stats.kstatvar)
  1800. class TestKstatVar:
  1801. @pytest.mark.filterwarnings("ignore:invalid value encountered") # Dask
  1802. def test_empty_input(self, xp):
  1803. x = xp.asarray([])
  1804. with eager_warns(SmallSampleWarning, match=too_small_1d_not_omit, xp=xp):
  1805. res = stats.kstatvar(x)
  1806. xp_assert_equal(res, xp.asarray(xp.nan))
  1807. def test_nan_input(self, xp):
  1808. data = xp.arange(10.)
  1809. data = xp.where(data == 6, xp.nan, data)
  1810. xp_assert_equal(stats.kstat(data), xp.asarray(xp.nan))
  1811. @skip_xp_backends(np_only=True,
  1812. reason='input validation of `n` does not depend on backend')
  1813. def test_bad_arg(self, xp):
  1814. # Raise ValueError is n is not 1 or 2.
  1815. data = [1]
  1816. n = 10
  1817. message = 'Only n=1 or n=2 supported.'
  1818. with pytest.raises(ValueError, match=message):
  1819. stats.kstatvar(data, n=n)
  1820. def test_against_R_mathworld(self, xp):
  1821. # Test against reference values computed using formulas exactly as
  1822. # they appear at https://mathworld.wolfram.com/k-Statistic.html
  1823. # This is *really* similar to how they appear in the implementation,
  1824. # but that could change, and this should not.
  1825. n = len(x_kstat)
  1826. k2 = 12.65006954022974 # see source code in TestKstat
  1827. k4 = -141.6682291883626
  1828. res = stats.kstatvar(xp.asarray(x_kstat), 1)
  1829. ref = k2 / n
  1830. xp_assert_close(res, xp.asarray(ref))
  1831. res = stats.kstatvar(xp.asarray(x_kstat), 2)
  1832. # *unbiased estimator* for var(k2)
  1833. ref = (2*k2**2*n + (n-1)*k4) / (n * (n+1))
  1834. xp_assert_close(res, xp.asarray(ref))
  1835. class TestPpccPlot:
  1836. def setup_method(self):
  1837. self.x = _old_loggamma_rvs(5, size=500, random_state=7654321) + 5
  1838. def test_basic(self):
  1839. N = 5
  1840. svals, ppcc = stats.ppcc_plot(self.x, -10, 10, N=N)
  1841. ppcc_expected = [0.21139644, 0.21384059, 0.98766719, 0.97980182,
  1842. 0.93519298]
  1843. assert_allclose(svals, np.linspace(-10, 10, num=N))
  1844. assert_allclose(ppcc, ppcc_expected)
  1845. def test_dist(self):
  1846. # Test that we can specify distributions both by name and as objects.
  1847. svals1, ppcc1 = stats.ppcc_plot(self.x, -10, 10, dist='tukeylambda')
  1848. svals2, ppcc2 = stats.ppcc_plot(self.x, -10, 10,
  1849. dist=stats.tukeylambda)
  1850. assert_allclose(svals1, svals2, rtol=1e-20)
  1851. assert_allclose(ppcc1, ppcc2, rtol=1e-20)
  1852. # Test that 'tukeylambda' is the default dist
  1853. svals3, ppcc3 = stats.ppcc_plot(self.x, -10, 10)
  1854. assert_allclose(svals1, svals3, rtol=1e-20)
  1855. assert_allclose(ppcc1, ppcc3, rtol=1e-20)
  1856. @pytest.mark.skipif(not have_matplotlib, reason="no matplotlib")
  1857. def test_plot_kwarg(self):
  1858. # Check with the matplotlib.pyplot module
  1859. fig = plt.figure()
  1860. ax = fig.add_subplot(111)
  1861. stats.ppcc_plot(self.x, -20, 20, plot=plt)
  1862. fig.delaxes(ax)
  1863. # Check that a Matplotlib Axes object is accepted
  1864. ax = fig.add_subplot(111)
  1865. stats.ppcc_plot(self.x, -20, 20, plot=ax)
  1866. plt.close()
  1867. def test_invalid_inputs(self):
  1868. # `b` has to be larger than `a`
  1869. assert_raises(ValueError, stats.ppcc_plot, self.x, 1, 0)
  1870. # Raise ValueError when given an invalid distribution.
  1871. assert_raises(ValueError, stats.ppcc_plot, [1, 2, 3], 0, 1,
  1872. dist="plate_of_shrimp")
  1873. def test_empty(self):
  1874. # For consistency with probplot return for one empty array,
  1875. # ppcc contains all zeros and svals is the same as for normal array
  1876. # input.
  1877. svals, ppcc = stats.ppcc_plot([], 0, 1)
  1878. assert_allclose(svals, np.linspace(0, 1, num=80))
  1879. assert_allclose(ppcc, np.zeros(80, dtype=float))
  1880. class TestPpccMax:
  1881. def test_ppcc_max_bad_arg(self):
  1882. # Raise ValueError when given an invalid distribution.
  1883. data = [1]
  1884. assert_raises(ValueError, stats.ppcc_max, data, dist="plate_of_shrimp")
  1885. def test_ppcc_max_basic(self):
  1886. x = stats.tukeylambda.rvs(-0.7, loc=2, scale=0.5, size=10000,
  1887. random_state=1234567) + 1e4
  1888. assert_almost_equal(stats.ppcc_max(x), -0.71215366521264145, decimal=7)
  1889. def test_dist(self):
  1890. x = stats.tukeylambda.rvs(-0.7, loc=2, scale=0.5, size=10000,
  1891. random_state=1234567) + 1e4
  1892. # Test that we can specify distributions both by name and as objects.
  1893. max1 = stats.ppcc_max(x, dist='tukeylambda')
  1894. max2 = stats.ppcc_max(x, dist=stats.tukeylambda)
  1895. assert_almost_equal(max1, -0.71215366521264145, decimal=5)
  1896. assert_almost_equal(max2, -0.71215366521264145, decimal=5)
  1897. # Test that 'tukeylambda' is the default dist
  1898. max3 = stats.ppcc_max(x)
  1899. assert_almost_equal(max3, -0.71215366521264145, decimal=5)
  1900. def test_brack(self):
  1901. x = stats.tukeylambda.rvs(-0.7, loc=2, scale=0.5, size=10000,
  1902. random_state=1234567) + 1e4
  1903. assert_raises(ValueError, stats.ppcc_max, x, brack=(0.0, 1.0, 0.5))
  1904. assert_almost_equal(stats.ppcc_max(x, brack=(0, 1)),
  1905. -0.71215366521264145, decimal=7)
  1906. assert_almost_equal(stats.ppcc_max(x, brack=(-2, 2)),
  1907. -0.71215366521264145, decimal=7)
  1908. @make_xp_test_case(stats.boxcox_llf)
  1909. class TestBoxcox_llf:
  1910. @pytest.mark.parametrize("dtype", ["float32", "float64"])
  1911. def test_basic(self, dtype, xp):
  1912. dt = getattr(xp, dtype)
  1913. x = stats.norm.rvs(size=10000, loc=10, random_state=54321)
  1914. lmbda = 1
  1915. llf = stats.boxcox_llf(lmbda, xp.asarray(x, dtype=dt))
  1916. llf_expected = -x.size / 2. * np.log(np.sum(x.std()**2))
  1917. xp_assert_close(llf, xp.asarray(llf_expected, dtype=dt))
  1918. @skip_xp_backends(np_only=True,
  1919. reason='array-likes only accepted for NumPy backend.')
  1920. def test_array_like(self, xp):
  1921. x = stats.norm.rvs(size=100, loc=10, random_state=54321)
  1922. lmbda = 1
  1923. llf = stats.boxcox_llf(lmbda, x)
  1924. llf2 = stats.boxcox_llf(lmbda, list(x))
  1925. xp_assert_close(llf, llf2, rtol=1e-12)
  1926. def test_2d_input(self, xp):
  1927. # Note: boxcox_llf() was already working with 2-D input (sort of), so
  1928. # keep it like that. boxcox() doesn't work with 2-D input though, due
  1929. # to brent() returning a scalar.
  1930. x = stats.norm.rvs(size=100, loc=10, random_state=54321)
  1931. lmbda = 1
  1932. llf = stats.boxcox_llf(lmbda, x)
  1933. llf2 = stats.boxcox_llf(lmbda, np.vstack([x, x]).T)
  1934. xp_assert_close(xp.asarray([llf, llf]), xp.asarray(llf2), rtol=1e-12)
  1935. def test_empty(self, xp):
  1936. message = "One or more sample arguments is too small..."
  1937. with eager_warns(SmallSampleWarning, match=message, xp=xp):
  1938. assert xp.isnan(xp.asarray(stats.boxcox_llf(1, xp.asarray([]))))
  1939. def test_gh_6873(self, xp):
  1940. # Regression test for gh-6873.
  1941. # This example was taken from gh-7534, a duplicate of gh-6873.
  1942. data = xp.asarray([198.0, 233.0, 233.0, 392.0])
  1943. llf = stats.boxcox_llf(-8, data)
  1944. # The expected value was computed with mpmath.
  1945. xp_assert_close(llf, xp.asarray(-17.93934208579061))
  1946. def test_instability_gh20021(self, xp):
  1947. data = xp.asarray([2003, 1950, 1997, 2000, 2009], dtype=xp.float64)
  1948. llf = stats.boxcox_llf(1e-8, data)
  1949. # The expected value was computed with mpsci, set mpmath.mp.dps=100
  1950. # expect float64 output for integer input
  1951. xp_assert_close(llf, xp.asarray(-15.32401272869016598, dtype=xp.float64),
  1952. rtol=5e-7) # bumped tolerance from 1e-7 for Accelerate
  1953. def test_axis(self, xp):
  1954. data = xp.asarray([[100, 200], [300, 400]])
  1955. llf_axis_0 = stats.boxcox_llf(1, data, axis=0)
  1956. llf_0 = xp.stack([
  1957. stats.boxcox_llf(1, data[:, 0]),
  1958. stats.boxcox_llf(1, data[:, 1]),
  1959. ])
  1960. xp_assert_close(llf_axis_0, llf_0)
  1961. llf_axis_1 = stats.boxcox_llf(1, data, axis=1)
  1962. llf_1 = xp.stack([
  1963. stats.boxcox_llf(1, data[0, :]),
  1964. stats.boxcox_llf(1, data[1, :]),
  1965. ])
  1966. xp_assert_close(llf_axis_1, llf_1)
  1967. # This is the data from GitHub user Qukaiyi, given as an example
  1968. # of a data set that caused boxcox to fail.
  1969. _boxcox_data = [
  1970. 15957, 112079, 1039553, 711775, 173111, 307382, 183155, 53366, 760875,
  1971. 207500, 160045, 473714, 40194, 440319, 133261, 265444, 155590, 36660,
  1972. 904939, 55108, 138391, 339146, 458053, 63324, 1377727, 1342632, 41575,
  1973. 68685, 172755, 63323, 368161, 199695, 538214, 167760, 388610, 398855,
  1974. 1001873, 364591, 1320518, 194060, 194324, 2318551, 196114, 64225, 272000,
  1975. 198668, 123585, 86420, 1925556, 695798, 88664, 46199, 759135, 28051,
  1976. 345094, 1977752, 51778, 82746, 638126, 2560910, 45830, 140576, 1603787,
  1977. 57371, 548730, 5343629, 2298913, 998813, 2156812, 423966, 68350, 145237,
  1978. 131935, 1600305, 342359, 111398, 1409144, 281007, 60314, 242004, 113418,
  1979. 246211, 61940, 95858, 957805, 40909, 307955, 174159, 124278, 241193,
  1980. 872614, 304180, 146719, 64361, 87478, 509360, 167169, 933479, 620561,
  1981. 483333, 97416, 143518, 286905, 597837, 2556043, 89065, 69944, 196858,
  1982. 88883, 49379, 916265, 1527392, 626954, 54415, 89013, 2883386, 106096,
  1983. 402697, 45578, 349852, 140379, 34648, 757343, 1305442, 2054757, 121232,
  1984. 606048, 101492, 51426, 1820833, 83412, 136349, 1379924, 505977, 1303486,
  1985. 95853, 146451, 285422, 2205423, 259020, 45864, 684547, 182014, 784334,
  1986. 174793, 563068, 170745, 1195531, 63337, 71833, 199978, 2330904, 227335,
  1987. 898280, 75294, 2011361, 116771, 157489, 807147, 1321443, 1148635, 2456524,
  1988. 81839, 1228251, 97488, 1051892, 75397, 3009923, 2732230, 90923, 39735,
  1989. 132433, 225033, 337555, 1204092, 686588, 1062402, 40362, 1361829, 1497217,
  1990. 150074, 551459, 2019128, 39581, 45349, 1117187, 87845, 1877288, 164448,
  1991. 10338362, 24942, 64737, 769946, 2469124, 2366997, 259124, 2667585, 29175,
  1992. 56250, 74450, 96697, 5920978, 838375, 225914, 119494, 206004, 430907,
  1993. 244083, 219495, 322239, 407426, 618748, 2087536, 2242124, 4736149, 124624,
  1994. 406305, 240921, 2675273, 4425340, 821457, 578467, 28040, 348943, 48795,
  1995. 145531, 52110, 1645730, 1768364, 348363, 85042, 2673847, 81935, 169075,
  1996. 367733, 135474, 383327, 1207018, 93481, 5934183, 352190, 636533, 145870,
  1997. 55659, 146215, 73191, 248681, 376907, 1606620, 169381, 81164, 246390,
  1998. 236093, 885778, 335969, 49266, 381430, 307437, 350077, 34346, 49340,
  1999. 84715, 527120, 40163, 46898, 4609439, 617038, 2239574, 159905, 118337,
  2000. 120357, 430778, 3799158, 3516745, 54198, 2970796, 729239, 97848, 6317375,
  2001. 887345, 58198, 88111, 867595, 210136, 1572103, 1420760, 574046, 845988,
  2002. 509743, 397927, 1119016, 189955, 3883644, 291051, 126467, 1239907, 2556229,
  2003. 411058, 657444, 2025234, 1211368, 93151, 577594, 4842264, 1531713, 305084,
  2004. 479251, 20591, 1466166, 137417, 897756, 594767, 3606337, 32844, 82426,
  2005. 1294831, 57174, 290167, 322066, 813146, 5671804, 4425684, 895607, 450598,
  2006. 1048958, 232844, 56871, 46113, 70366, 701618, 97739, 157113, 865047,
  2007. 194810, 1501615, 1765727, 38125, 2733376, 40642, 437590, 127337, 106310,
  2008. 4167579, 665303, 809250, 1210317, 45750, 1853687, 348954, 156786, 90793,
  2009. 1885504, 281501, 3902273, 359546, 797540, 623508, 3672775, 55330, 648221,
  2010. 266831, 90030, 7118372, 735521, 1009925, 283901, 806005, 2434897, 94321,
  2011. 309571, 4213597, 2213280, 120339, 64403, 8155209, 1686948, 4327743,
  2012. 1868312, 135670, 3189615, 1569446, 706058, 58056, 2438625, 520619, 105201,
  2013. 141961, 179990, 1351440, 3148662, 2804457, 2760144, 70775, 33807, 1926518,
  2014. 2362142, 186761, 240941, 97860, 1040429, 1431035, 78892, 484039, 57845,
  2015. 724126, 3166209, 175913, 159211, 1182095, 86734, 1921472, 513546, 326016,
  2016. 1891609
  2017. ]
  2018. class TestBoxcox:
  2019. def test_fixed_lmbda(self):
  2020. x = _old_loggamma_rvs(5, size=50, random_state=12345) + 5
  2021. xt = stats.boxcox(x, lmbda=1)
  2022. assert_allclose(xt, x - 1)
  2023. xt = stats.boxcox(x, lmbda=-1)
  2024. assert_allclose(xt, 1 - 1/x)
  2025. xt = stats.boxcox(x, lmbda=0)
  2026. assert_allclose(xt, np.log(x))
  2027. # Also test that array_like input works
  2028. xt = stats.boxcox(list(x), lmbda=0)
  2029. assert_allclose(xt, np.log(x))
  2030. # test that constant input is accepted; see gh-12225
  2031. xt = stats.boxcox(np.ones(10), 2)
  2032. assert_equal(xt, np.zeros(10))
  2033. def test_lmbda_None(self):
  2034. # Start from normal rv's, do inverse transform to check that
  2035. # optimization function gets close to the right answer.
  2036. lmbda = 2.5
  2037. x = stats.norm.rvs(loc=10, size=50000, random_state=1245)
  2038. x_inv = (x * lmbda + 1)**(-lmbda)
  2039. xt, maxlog = stats.boxcox(x_inv)
  2040. assert_almost_equal(maxlog, -1 / lmbda, decimal=2)
  2041. def test_alpha(self):
  2042. rng = np.random.RandomState(1234)
  2043. x = _old_loggamma_rvs(5, size=50, random_state=rng) + 5
  2044. # Some regular values for alpha, on a small sample size
  2045. _, _, interval = stats.boxcox(x, alpha=0.75)
  2046. assert_allclose(interval, [4.004485780226041, 5.138756355035744])
  2047. _, _, interval = stats.boxcox(x, alpha=0.05)
  2048. assert_allclose(interval, [1.2138178554857557, 8.209033272375663])
  2049. # Try some extreme values, see we don't hit the N=500 limit
  2050. x = _old_loggamma_rvs(7, size=500, random_state=rng) + 15
  2051. _, _, interval = stats.boxcox(x, alpha=0.001)
  2052. assert_allclose(interval, [0.3988867, 11.40553131])
  2053. _, _, interval = stats.boxcox(x, alpha=0.999)
  2054. assert_allclose(interval, [5.83316246, 5.83735292])
  2055. def test_boxcox_bad_arg(self):
  2056. # Raise ValueError if any data value is negative.
  2057. x = np.array([-1, 2])
  2058. assert_raises(ValueError, stats.boxcox, x)
  2059. # Raise ValueError if data is constant.
  2060. assert_raises(ValueError, stats.boxcox, np.array([1]))
  2061. # Raise ValueError if data is not 1-dimensional.
  2062. assert_raises(ValueError, stats.boxcox, np.array([[1], [2]]))
  2063. def test_empty(self):
  2064. assert_(stats.boxcox([]).shape == (0,))
  2065. def test_gh_6873(self):
  2066. # Regression test for gh-6873.
  2067. y, lam = stats.boxcox(_boxcox_data)
  2068. # The expected value of lam was computed with the function
  2069. # powerTransform in the R library 'car'. I trust that value
  2070. # to only about five significant digits.
  2071. assert_allclose(lam, -0.051654, rtol=1e-5)
  2072. @pytest.mark.parametrize("bounds", [(-1, 1), (1.1, 2), (-2, -1.1)])
  2073. def test_bounded_optimizer_within_bounds(self, bounds):
  2074. # Define custom optimizer with bounds.
  2075. def optimizer(fun):
  2076. return optimize.minimize_scalar(fun, bounds=bounds,
  2077. method="bounded")
  2078. _, lmbda = stats.boxcox(_boxcox_data, lmbda=None, optimizer=optimizer)
  2079. assert bounds[0] < lmbda < bounds[1]
  2080. def test_bounded_optimizer_against_unbounded_optimizer(self):
  2081. # Test whether setting bounds on optimizer excludes solution from
  2082. # unbounded optimizer.
  2083. # Get unbounded solution.
  2084. _, lmbda = stats.boxcox(_boxcox_data, lmbda=None)
  2085. # Set tolerance and bounds around solution.
  2086. bounds = (lmbda + 0.1, lmbda + 1)
  2087. options = {'xatol': 1e-12}
  2088. def optimizer(fun):
  2089. return optimize.minimize_scalar(fun, bounds=bounds,
  2090. method="bounded", options=options)
  2091. # Check bounded solution. Lower bound should be active.
  2092. _, lmbda_bounded = stats.boxcox(_boxcox_data, lmbda=None,
  2093. optimizer=optimizer)
  2094. assert lmbda_bounded != lmbda
  2095. assert_allclose(lmbda_bounded, bounds[0])
  2096. @pytest.mark.parametrize("optimizer", ["str", (1, 2), 0.1])
  2097. def test_bad_optimizer_type_raises_error(self, optimizer):
  2098. # Check if error is raised if string, tuple or float is passed
  2099. with pytest.raises(ValueError, match="`optimizer` must be a callable"):
  2100. stats.boxcox(_boxcox_data, lmbda=None, optimizer=optimizer)
  2101. def test_bad_optimizer_value_raises_error(self):
  2102. # Check if error is raised if `optimizer` function does not return
  2103. # `OptimizeResult` object
  2104. # Define test function that always returns 1
  2105. def optimizer(fun):
  2106. return 1
  2107. message = "return an object containing the optimal `lmbda`"
  2108. with pytest.raises(ValueError, match=message):
  2109. stats.boxcox(_boxcox_data, lmbda=None, optimizer=optimizer)
  2110. @pytest.mark.parametrize(
  2111. "bad_x", [np.array([1, -42, 12345.6]), np.array([np.nan, 42, 1])]
  2112. )
  2113. def test_negative_x_value_raises_error(self, bad_x):
  2114. """Test boxcox_normmax raises ValueError if x contains non-positive values."""
  2115. message = "only positive, finite, real numbers"
  2116. with pytest.raises(ValueError, match=message):
  2117. stats.boxcox_normmax(bad_x)
  2118. @pytest.mark.parametrize('x', [
  2119. # Attempt to trigger overflow in power expressions.
  2120. np.array([2003.0, 1950.0, 1997.0, 2000.0, 2009.0,
  2121. 2009.0, 1980.0, 1999.0, 2007.0, 1991.0]),
  2122. # Attempt to trigger overflow with a large optimal lambda.
  2123. np.array([2003.0, 1950.0, 1997.0, 2000.0, 2009.0]),
  2124. # Attempt to trigger overflow with large data.
  2125. np.array([2003.0e200, 1950.0e200, 1997.0e200, 2000.0e200, 2009.0e200])
  2126. ])
  2127. def test_overflow(self, x):
  2128. with pytest.warns(UserWarning, match="The optimal lambda is"):
  2129. xt_bc, lam_bc = stats.boxcox(x)
  2130. assert np.all(np.isfinite(xt_bc))
  2131. class TestBoxcoxNormmax:
  2132. def setup_method(self):
  2133. self.x = _old_loggamma_rvs(5, size=50, random_state=12345) + 5
  2134. def test_pearsonr(self):
  2135. maxlog = stats.boxcox_normmax(self.x)
  2136. assert_allclose(maxlog, 1.804465, rtol=1e-6)
  2137. def test_mle(self):
  2138. maxlog = stats.boxcox_normmax(self.x, method='mle')
  2139. assert_allclose(maxlog, 1.758101, rtol=1e-6)
  2140. # Check that boxcox() uses 'mle'
  2141. _, maxlog_boxcox = stats.boxcox(self.x)
  2142. assert_allclose(maxlog_boxcox, maxlog)
  2143. def test_all(self):
  2144. maxlog_all = stats.boxcox_normmax(self.x, method='all')
  2145. assert_allclose(maxlog_all, [1.804465, 1.758101], rtol=1e-6)
  2146. @pytest.mark.parametrize("method", ["mle", "pearsonr", "all"])
  2147. @pytest.mark.parametrize("bounds", [(-1, 1), (1.1, 2), (-2, -1.1)])
  2148. def test_bounded_optimizer_within_bounds(self, method, bounds):
  2149. def optimizer(fun):
  2150. return optimize.minimize_scalar(fun, bounds=bounds,
  2151. method="bounded")
  2152. maxlog = stats.boxcox_normmax(self.x, method=method,
  2153. optimizer=optimizer)
  2154. assert np.all(bounds[0] < maxlog)
  2155. assert np.all(maxlog < bounds[1])
  2156. @pytest.mark.slow
  2157. def test_user_defined_optimizer(self):
  2158. # tests an optimizer that is not based on scipy.optimize.minimize
  2159. lmbda = stats.boxcox_normmax(self.x)
  2160. lmbda_rounded = np.round(lmbda, 5)
  2161. lmbda_range = np.linspace(lmbda_rounded-0.01, lmbda_rounded+0.01, 1001)
  2162. class MyResult:
  2163. pass
  2164. def optimizer(fun):
  2165. # brute force minimum over the range
  2166. objs = []
  2167. for lmbda in lmbda_range:
  2168. objs.append(fun(lmbda))
  2169. res = MyResult()
  2170. res.x = lmbda_range[np.argmin(objs)]
  2171. return res
  2172. lmbda2 = stats.boxcox_normmax(self.x, optimizer=optimizer)
  2173. assert lmbda2 != lmbda # not identical
  2174. assert_allclose(lmbda2, lmbda, 1e-5) # but as close as it should be
  2175. def test_user_defined_optimizer_and_brack_raises_error(self):
  2176. optimizer = optimize.minimize_scalar
  2177. # Using default `brack=None` with user-defined `optimizer` works as
  2178. # expected.
  2179. stats.boxcox_normmax(self.x, brack=None, optimizer=optimizer)
  2180. # Using user-defined `brack` with user-defined `optimizer` is expected
  2181. # to throw an error. Instead, users should specify
  2182. # optimizer-specific parameters in the optimizer function itself.
  2183. with pytest.raises(ValueError, match="`brack` must be None if "
  2184. "`optimizer` is given"):
  2185. stats.boxcox_normmax(self.x, brack=(-2.0, 2.0),
  2186. optimizer=optimizer)
  2187. @pytest.mark.parametrize(
  2188. 'x', ([2003.0, 1950.0, 1997.0, 2000.0, 2009.0],
  2189. [0.50000471, 0.50004979, 0.50005902, 0.50009312, 0.50001632]))
  2190. def test_overflow(self, x):
  2191. message = "The optimal lambda is..."
  2192. with pytest.warns(UserWarning, match=message):
  2193. lmbda = stats.boxcox_normmax(x, method='mle')
  2194. assert np.isfinite(special.boxcox(x, lmbda)).all()
  2195. # 10000 is safety factor used in boxcox_normmax
  2196. ymax = np.finfo(np.float64).max / 10000
  2197. x_treme = np.max(x) if lmbda > 0 else np.min(x)
  2198. y_extreme = special.boxcox(x_treme, lmbda)
  2199. assert_allclose(y_extreme, ymax * np.sign(lmbda))
  2200. def test_negative_ymax(self):
  2201. with pytest.raises(ValueError, match="`ymax` must be strictly positive"):
  2202. stats.boxcox_normmax(self.x, ymax=-1)
  2203. @pytest.mark.parametrize("x", [
  2204. # positive overflow in float64
  2205. np.array([2003.0, 1950.0, 1997.0, 2000.0, 2009.0],
  2206. dtype=np.float64),
  2207. # negative overflow in float64
  2208. np.array([0.50000471, 0.50004979, 0.50005902, 0.50009312, 0.50001632],
  2209. dtype=np.float64),
  2210. # positive overflow in float32
  2211. np.array([200.3, 195.0, 199.7, 200.0, 200.9],
  2212. dtype=np.float32),
  2213. # negative overflow in float32
  2214. np.array([2e-30, 1e-30, 1e-30, 1e-30, 1e-30, 1e-30],
  2215. dtype=np.float32),
  2216. ])
  2217. @pytest.mark.parametrize("ymax", [1e10, 1e30, None])
  2218. # TODO: add method "pearsonr" after fix overflow issue
  2219. @pytest.mark.parametrize("method", ["mle"])
  2220. def test_user_defined_ymax_input_float64_32(self, x, ymax, method):
  2221. # Test the maximum of the transformed data close to ymax
  2222. with pytest.warns(UserWarning, match="The optimal lambda is"):
  2223. kwarg = {'ymax': ymax} if ymax is not None else {}
  2224. lmb = stats.boxcox_normmax(x, method=method, **kwarg)
  2225. x_treme = [np.min(x), np.max(x)]
  2226. ymax_res = max(abs(stats.boxcox(x_treme, lmb)))
  2227. if ymax is None:
  2228. # 10000 is safety factor used in boxcox_normmax
  2229. ymax = np.finfo(x.dtype).max / 10000
  2230. assert_allclose(ymax, ymax_res, rtol=1e-5)
  2231. @pytest.mark.parametrize("x", [
  2232. # positive overflow in float32 but not float64
  2233. [200.3, 195.0, 199.7, 200.0, 200.9],
  2234. # negative overflow in float32 but not float64
  2235. [2e-30, 1e-30, 1e-30, 1e-30, 1e-30, 1e-30],
  2236. ])
  2237. # TODO: add method "pearsonr" after fix overflow issue
  2238. @pytest.mark.parametrize("method", ["mle"])
  2239. def test_user_defined_ymax_inf(self, x, method):
  2240. x_32 = np.asarray(x, dtype=np.float32)
  2241. x_64 = np.asarray(x, dtype=np.float64)
  2242. # assert overflow with float32 but not float64
  2243. with pytest.warns(UserWarning, match="The optimal lambda is"):
  2244. stats.boxcox_normmax(x_32, method=method)
  2245. stats.boxcox_normmax(x_64, method=method)
  2246. # compute the true optimal lambda then compare them
  2247. lmb_32 = stats.boxcox_normmax(x_32, ymax=np.inf, method=method)
  2248. lmb_64 = stats.boxcox_normmax(x_64, ymax=np.inf, method=method)
  2249. assert_allclose(lmb_32, lmb_64, rtol=1e-2)
  2250. class TestBoxcoxNormplot:
  2251. def setup_method(self):
  2252. self.x = _old_loggamma_rvs(5, size=500, random_state=7654321) + 5
  2253. def test_basic(self):
  2254. N = 5
  2255. lmbdas, ppcc = stats.boxcox_normplot(self.x, -10, 10, N=N)
  2256. ppcc_expected = [0.57783375, 0.83610988, 0.97524311, 0.99756057,
  2257. 0.95843297]
  2258. assert_allclose(lmbdas, np.linspace(-10, 10, num=N))
  2259. assert_allclose(ppcc, ppcc_expected)
  2260. @pytest.mark.skipif(not have_matplotlib, reason="no matplotlib")
  2261. def test_plot_kwarg(self):
  2262. # Check with the matplotlib.pyplot module
  2263. fig = plt.figure()
  2264. ax = fig.add_subplot(111)
  2265. stats.boxcox_normplot(self.x, -20, 20, plot=plt)
  2266. fig.delaxes(ax)
  2267. # Check that a Matplotlib Axes object is accepted
  2268. ax = fig.add_subplot(111)
  2269. stats.boxcox_normplot(self.x, -20, 20, plot=ax)
  2270. plt.close()
  2271. def test_invalid_inputs(self):
  2272. # `lb` has to be larger than `la`
  2273. assert_raises(ValueError, stats.boxcox_normplot, self.x, 1, 0)
  2274. # `x` can not contain negative values
  2275. assert_raises(ValueError, stats.boxcox_normplot, [-1, 1], 0, 1)
  2276. def test_empty(self):
  2277. assert_(stats.boxcox_normplot([], 0, 1).size == 0)
  2278. @make_xp_test_case(stats.yeojohnson_llf)
  2279. class TestYeojohnson_llf:
  2280. def test_array_like(self):
  2281. # array_like not applicable with SCIPY_ARRAY_API=1
  2282. x = stats.norm.rvs(size=100, loc=0, random_state=54321)
  2283. lmbda = 1
  2284. llf = stats.yeojohnson_llf(lmbda, x)
  2285. llf2 = stats.yeojohnson_llf(lmbda, list(x))
  2286. assert_allclose(llf, llf2, rtol=1e-12)
  2287. def test_2d_input(self, xp):
  2288. x = stats.norm.rvs(size=100, loc=10, random_state=54321)
  2289. x = xp.asarray(x)
  2290. lmbda = 1
  2291. ref = stats.yeojohnson_llf(lmbda, x)
  2292. res = stats.yeojohnson_llf(lmbda, xp.stack([x, x]).T)
  2293. xp_assert_close(res, xp.stack((ref, ref)), rtol=1e-12)
  2294. def test_empty(self, xp):
  2295. message = "One or more sample arguments is too small..."
  2296. with eager_warns(SmallSampleWarning, match=message, xp=xp):
  2297. assert xp.isnan(stats.yeojohnson_llf(1, xp.asarray([])))
  2298. class TestYeojohnson:
  2299. def test_fixed_lmbda(self):
  2300. rng = np.random.RandomState(12345)
  2301. # Test positive input
  2302. x = _old_loggamma_rvs(5, size=50, random_state=rng) + 5
  2303. assert np.all(x > 0)
  2304. xt = stats.yeojohnson(x, lmbda=1)
  2305. assert_allclose(xt, x)
  2306. xt = stats.yeojohnson(x, lmbda=-1)
  2307. assert_allclose(xt, 1 - 1 / (x + 1))
  2308. xt = stats.yeojohnson(x, lmbda=0)
  2309. assert_allclose(xt, np.log(x + 1))
  2310. xt = stats.yeojohnson(x, lmbda=1)
  2311. assert_allclose(xt, x)
  2312. # Test negative input
  2313. x = _old_loggamma_rvs(5, size=50, random_state=rng) - 5
  2314. assert np.all(x < 0)
  2315. xt = stats.yeojohnson(x, lmbda=2)
  2316. assert_allclose(xt, -np.log(-x + 1))
  2317. xt = stats.yeojohnson(x, lmbda=1)
  2318. assert_allclose(xt, x)
  2319. xt = stats.yeojohnson(x, lmbda=3)
  2320. assert_allclose(xt, 1 / (-x + 1) - 1)
  2321. # test both positive and negative input
  2322. x = _old_loggamma_rvs(5, size=50, random_state=rng) - 2
  2323. assert not np.all(x < 0)
  2324. assert not np.all(x >= 0)
  2325. pos = x >= 0
  2326. xt = stats.yeojohnson(x, lmbda=1)
  2327. assert_allclose(xt[pos], x[pos])
  2328. xt = stats.yeojohnson(x, lmbda=-1)
  2329. assert_allclose(xt[pos], 1 - 1 / (x[pos] + 1))
  2330. xt = stats.yeojohnson(x, lmbda=0)
  2331. assert_allclose(xt[pos], np.log(x[pos] + 1))
  2332. xt = stats.yeojohnson(x, lmbda=1)
  2333. assert_allclose(xt[pos], x[pos])
  2334. neg = ~pos
  2335. xt = stats.yeojohnson(x, lmbda=2)
  2336. assert_allclose(xt[neg], -np.log(-x[neg] + 1))
  2337. xt = stats.yeojohnson(x, lmbda=1)
  2338. assert_allclose(xt[neg], x[neg])
  2339. xt = stats.yeojohnson(x, lmbda=3)
  2340. assert_allclose(xt[neg], 1 / (-x[neg] + 1) - 1)
  2341. @pytest.mark.parametrize('lmbda', [0, .1, .5, 2])
  2342. def test_lmbda_None(self, lmbda):
  2343. # Start from normal rv's, do inverse transform to check that
  2344. # optimization function gets close to the right answer.
  2345. def _inverse_transform(x, lmbda):
  2346. x_inv = np.zeros(x.shape, dtype=x.dtype)
  2347. pos = x >= 0
  2348. # when x >= 0
  2349. if abs(lmbda) < np.spacing(1.):
  2350. x_inv[pos] = np.exp(x[pos]) - 1
  2351. else: # lmbda != 0
  2352. x_inv[pos] = np.power(x[pos] * lmbda + 1, 1 / lmbda) - 1
  2353. # when x < 0
  2354. if abs(lmbda - 2) > np.spacing(1.):
  2355. x_inv[~pos] = 1 - np.power(-(2 - lmbda) * x[~pos] + 1,
  2356. 1 / (2 - lmbda))
  2357. else: # lmbda == 2
  2358. x_inv[~pos] = 1 - np.exp(-x[~pos])
  2359. return x_inv
  2360. n_samples = 20000
  2361. rng = np.random.RandomState(1234567)
  2362. x = rng.normal(loc=0, scale=1, size=(n_samples))
  2363. x_inv = _inverse_transform(x, lmbda)
  2364. xt, maxlog = stats.yeojohnson(x_inv)
  2365. assert_allclose(maxlog, lmbda, atol=1e-2)
  2366. assert_almost_equal(0, np.linalg.norm(x - xt) / n_samples, decimal=2)
  2367. assert_almost_equal(0, xt.mean(), decimal=1)
  2368. assert_almost_equal(1, xt.std(), decimal=1)
  2369. def test_empty(self):
  2370. assert_(stats.yeojohnson([]).shape == (0,))
  2371. def test_array_like(self):
  2372. x = stats.norm.rvs(size=100, loc=0, random_state=54321)
  2373. xt1, _ = stats.yeojohnson(x)
  2374. xt2, _ = stats.yeojohnson(list(x))
  2375. assert_allclose(xt1, xt2, rtol=1e-12)
  2376. @pytest.mark.parametrize('dtype', [np.complex64, np.complex128])
  2377. def test_input_dtype_complex(self, dtype):
  2378. x = np.arange(6, dtype=dtype)
  2379. err_msg = ('Yeo-Johnson transformation is not defined for complex '
  2380. 'numbers.')
  2381. with pytest.raises(ValueError, match=err_msg):
  2382. stats.yeojohnson(x)
  2383. @pytest.mark.parametrize('dtype', [np.int8, np.uint8, np.int16, np.int32])
  2384. def test_input_dtype_integer(self, dtype):
  2385. x_int = np.arange(8, dtype=dtype)
  2386. x_float = np.arange(8, dtype=np.float64)
  2387. xt_int, lmbda_int = stats.yeojohnson(x_int)
  2388. xt_float, lmbda_float = stats.yeojohnson(x_float)
  2389. assert_allclose(xt_int, xt_float, rtol=1e-7)
  2390. assert_allclose(lmbda_int, lmbda_float, rtol=1e-7)
  2391. def test_input_high_variance(self):
  2392. # non-regression test for gh-10821
  2393. x = np.array([3251637.22, 620695.44, 11642969.00, 2223468.22,
  2394. 85307500.00, 16494389.89, 917215.88, 11642969.00,
  2395. 2145773.87, 4962000.00, 620695.44, 651234.50,
  2396. 1907876.71, 4053297.88, 3251637.22, 3259103.08,
  2397. 9547969.00, 20631286.23, 12807072.08, 2383819.84,
  2398. 90114500.00, 17209575.46, 12852969.00, 2414609.99,
  2399. 2170368.23])
  2400. xt_yeo, lam_yeo = stats.yeojohnson(x)
  2401. xt_box, lam_box = stats.boxcox(x + 1)
  2402. assert_allclose(xt_yeo, xt_box, rtol=1e-6)
  2403. assert_allclose(lam_yeo, lam_box, rtol=1e-6)
  2404. @pytest.mark.parametrize('x', [
  2405. np.array([1.0, float("nan"), 2.0]),
  2406. np.array([1.0, float("inf"), 2.0]),
  2407. np.array([1.0, -float("inf"), 2.0]),
  2408. np.array([-1.0, float("nan"), float("inf"), -float("inf"), 1.0])
  2409. ])
  2410. def test_nonfinite_input(self, x):
  2411. with pytest.raises(ValueError, match='Yeo-Johnson input must be finite'):
  2412. xt_yeo, lam_yeo = stats.yeojohnson(x)
  2413. @pytest.mark.parametrize('x', [
  2414. # Attempt to trigger overflow in power expressions.
  2415. np.array([2003.0, 1950.0, 1997.0, 2000.0, 2009.0,
  2416. 2009.0, 1980.0, 1999.0, 2007.0, 1991.0]),
  2417. # Attempt to trigger overflow with a large optimal lambda.
  2418. np.array([2003.0, 1950.0, 1997.0, 2000.0, 2009.0]),
  2419. # Attempt to trigger overflow with large data.
  2420. np.array([2003.0e200, 1950.0e200, 1997.0e200, 2000.0e200, 2009.0e200])
  2421. ])
  2422. def test_overflow(self, x):
  2423. # non-regression test for gh-18389
  2424. def optimizer(fun, lam_yeo):
  2425. out = optimize.fminbound(fun, -lam_yeo, lam_yeo, xtol=1.48e-08)
  2426. result = optimize.OptimizeResult()
  2427. result.x = out
  2428. return result
  2429. with np.errstate(all="raise"):
  2430. xt_yeo, lam_yeo = stats.yeojohnson(x)
  2431. xt_box, lam_box = stats.boxcox(
  2432. x + 1, optimizer=partial(optimizer, lam_yeo=lam_yeo))
  2433. assert np.isfinite(np.var(xt_yeo))
  2434. assert np.isfinite(np.var(xt_box))
  2435. assert_allclose(lam_yeo, lam_box, rtol=1e-6)
  2436. assert_allclose(xt_yeo, xt_box, rtol=1e-4)
  2437. @pytest.mark.parametrize('x', [
  2438. np.array([2003.0, 1950.0, 1997.0, 2000.0, 2009.0,
  2439. 2009.0, 1980.0, 1999.0, 2007.0, 1991.0]),
  2440. np.array([2003.0, 1950.0, 1997.0, 2000.0, 2009.0])
  2441. ])
  2442. @pytest.mark.parametrize('scale', [1, 1e-12, 1e-32, 1e-150, 1e32, 1e200])
  2443. @pytest.mark.parametrize('sign', [1, -1])
  2444. def test_overflow_underflow_signed_data(self, x, scale, sign):
  2445. # non-regression test for gh-18389
  2446. with np.errstate(all="raise"):
  2447. xt_yeo, lam_yeo = stats.yeojohnson(sign * x * scale)
  2448. assert np.all(np.sign(sign * x) == np.sign(xt_yeo))
  2449. assert np.isfinite(lam_yeo)
  2450. assert np.isfinite(np.var(xt_yeo))
  2451. @pytest.mark.parametrize('x', [
  2452. np.array([0, 1, 2, 3]),
  2453. np.array([0, -1, 2, -3]),
  2454. np.array([0, 0, 0])
  2455. ])
  2456. @pytest.mark.parametrize('sign', [1, -1])
  2457. @pytest.mark.parametrize('brack', [None, (-2, 2)])
  2458. def test_integer_signed_data(self, x, sign, brack):
  2459. with np.errstate(all="raise"):
  2460. x_int = sign * x
  2461. x_float = x_int.astype(np.float64)
  2462. lam_yeo_int = stats.yeojohnson_normmax(x_int, brack=brack)
  2463. xt_yeo_int = stats.yeojohnson(x_int, lmbda=lam_yeo_int)
  2464. lam_yeo_float = stats.yeojohnson_normmax(x_float, brack=brack)
  2465. xt_yeo_float = stats.yeojohnson(x_float, lmbda=lam_yeo_float)
  2466. assert np.all(np.sign(x_int) == np.sign(xt_yeo_int))
  2467. assert np.isfinite(lam_yeo_int)
  2468. assert np.isfinite(np.var(xt_yeo_int))
  2469. assert lam_yeo_int == lam_yeo_float
  2470. assert np.all(xt_yeo_int == xt_yeo_float)
  2471. class TestYeojohnsonNormmax:
  2472. def setup_method(self):
  2473. self.x = _old_loggamma_rvs(5, size=50, random_state=12345) + 5
  2474. def test_mle(self):
  2475. maxlog = stats.yeojohnson_normmax(self.x)
  2476. assert_allclose(maxlog, 1.876393, rtol=1e-6)
  2477. def test_darwin_example(self):
  2478. # test from original paper "A new family of power transformations to
  2479. # improve normality or symmetry" by Yeo and Johnson.
  2480. x = [6.1, -8.4, 1.0, 2.0, 0.7, 2.9, 3.5, 5.1, 1.8, 3.6, 7.0, 3.0, 9.3,
  2481. 7.5, -6.0]
  2482. lmbda = stats.yeojohnson_normmax(x)
  2483. assert np.allclose(lmbda, 1.305, atol=1e-3)
  2484. @make_xp_test_case(stats.circmean, stats.circvar, stats.circstd)
  2485. class TestCircFuncs:
  2486. # In gh-5747, the R package `circular` was used to calculate reference
  2487. # values for the circular variance, e.g.:
  2488. # library(circular)
  2489. # options(digits=16)
  2490. # x = c(0, 2*pi/3, 5*pi/3)
  2491. # var.circular(x)
  2492. @pytest.mark.parametrize("test_func,expected",
  2493. [(stats.circmean, 0.167690146),
  2494. (stats.circvar, 0.006455174000787767),
  2495. (stats.circstd, 6.520702116)])
  2496. def test_circfuncs(self, test_func, expected, xp):
  2497. x = xp.asarray([355., 5., 2., 359., 10., 350.])
  2498. xp_assert_close(test_func(x, high=360), xp.asarray(expected))
  2499. def test_circfuncs_small(self, xp):
  2500. # Default tolerances won't work here because the reference values
  2501. # are approximations. Ensure all array types work in float64 to
  2502. # avoid needing separate float32 and float64 tolerances.
  2503. x = xp.asarray([20, 21, 22, 18, 19, 20.5, 19.2], dtype=xp.float64)
  2504. M1 = xp.mean(x)
  2505. M2 = stats.circmean(x, high=360)
  2506. xp_assert_close(M2, M1, rtol=1e-5)
  2507. V1 = xp.var(x*xp.pi/180, correction=0)
  2508. # for small variations, circvar is approximately half the
  2509. # linear variance
  2510. V1 = V1 / 2.
  2511. V2 = stats.circvar(x, high=360)
  2512. xp_assert_close(V2, V1, rtol=1e-4)
  2513. S1 = xp.std(x, correction=0)
  2514. S2 = stats.circstd(x, high=360)
  2515. xp_assert_close(S2, S1, rtol=1e-4)
  2516. @pytest.mark.parametrize("test_func, numpy_func",
  2517. [(stats.circmean, np.mean),
  2518. (stats.circvar, np.var),
  2519. (stats.circstd, np.std)])
  2520. def test_circfuncs_close(self, test_func, numpy_func, xp):
  2521. # circfuncs should handle very similar inputs (gh-12740)
  2522. x = np.asarray([0.12675364631578953] * 10 + [0.12675365920187928] * 100)
  2523. circstat = test_func(xp.asarray(x))
  2524. normal = xp.asarray(numpy_func(x))
  2525. xp_assert_close(circstat, normal, atol=2e-8)
  2526. @pytest.mark.parametrize('circfunc', [stats.circmean,
  2527. stats.circvar,
  2528. stats.circstd])
  2529. def test_circmean_axis(self, xp, circfunc):
  2530. x = xp.asarray([[355, 5, 2, 359, 10, 350],
  2531. [351, 7, 4, 352, 9, 349],
  2532. [357, 9, 8, 358, 4, 356.]])
  2533. res = circfunc(x, high=360)
  2534. ref = circfunc(xp.reshape(x, (-1,)), high=360)
  2535. xp_assert_close(res, xp.asarray(ref))
  2536. res = circfunc(x, high=360, axis=1)
  2537. ref = [circfunc(x[i, :], high=360) for i in range(x.shape[0])]
  2538. xp_assert_close(res, xp.stack(ref))
  2539. res = circfunc(x, high=360, axis=0)
  2540. ref = [circfunc(x[:, i], high=360) for i in range(x.shape[1])]
  2541. xp_assert_close(res, xp.stack(ref))
  2542. @pytest.mark.parametrize("test_func,expected",
  2543. [(stats.circmean, 0.167690146),
  2544. (stats.circvar, 0.006455174270186603),
  2545. (stats.circstd, 6.520702116)])
  2546. def test_circfuncs_array_like(self, test_func, expected, xp):
  2547. x = xp.asarray([355, 5, 2, 359, 10, 350.])
  2548. xp_assert_close(test_func(x, high=360), xp.asarray(expected))
  2549. @pytest.mark.parametrize("test_func", [stats.circmean, stats.circvar,
  2550. stats.circstd])
  2551. def test_empty(self, test_func, xp):
  2552. dtype = xp.float64
  2553. x = xp.asarray([], dtype=dtype)
  2554. with eager_warns(SmallSampleWarning, match=too_small_1d_not_omit, xp=xp):
  2555. res = test_func(x)
  2556. xp_assert_equal(res, xp.asarray(xp.nan, dtype=dtype))
  2557. @pytest.mark.parametrize("test_func", [stats.circmean, stats.circvar,
  2558. stats.circstd])
  2559. def test_nan_propagate(self, test_func, xp):
  2560. x = xp.asarray([355, 5, 2, 359, 10, 350, np.nan])
  2561. xp_assert_equal(test_func(x, high=360), xp.asarray(xp.nan))
  2562. @pytest.mark.parametrize("test_func,expected",
  2563. [(stats.circmean,
  2564. {None: np.nan, 0: 355.66582264, 1: 0.28725053}),
  2565. (stats.circvar,
  2566. {None: np.nan,
  2567. 0: 0.002570671054089924,
  2568. 1: 0.005545914017677123}),
  2569. (stats.circstd,
  2570. {None: np.nan, 0: 4.11093193, 1: 6.04265394})])
  2571. def test_nan_propagate_array(self, test_func, expected, xp):
  2572. x = xp.asarray([[355, 5, 2, 359, 10, 350, 1],
  2573. [351, 7, 4, 352, 9, 349, np.nan],
  2574. [1, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan]])
  2575. for axis in expected.keys():
  2576. out = test_func(x, high=360, axis=axis)
  2577. if axis is None:
  2578. xp_assert_equal(out, xp.asarray(xp.nan))
  2579. else:
  2580. xp_assert_close(out[0], xp.asarray(expected[axis]))
  2581. xp_assert_equal(out[1:], xp.full_like(out[1:], xp.nan))
  2582. def test_circmean_scalar(self, xp):
  2583. x = xp.asarray(1.)[()]
  2584. M1 = x
  2585. M2 = stats.circmean(x)
  2586. xp_assert_close(M2, M1, rtol=1e-5)
  2587. def test_circmean_range(self, xp):
  2588. # regression test for gh-6420: circmean(..., high, low) must be
  2589. # between `high` and `low`
  2590. m = stats.circmean(xp.arange(0, 2, 0.1), xp.pi, -xp.pi)
  2591. xp_assert_less(m, xp.asarray(xp.pi))
  2592. xp_assert_less(-m, xp.asarray(xp.pi))
  2593. def test_circfuncs_uint8(self, xp):
  2594. # regression test for gh-7255: overflow when working with
  2595. # numpy uint8 data type
  2596. x = xp.asarray([150, 10], dtype=xp.uint8)
  2597. xp_assert_close(stats.circmean(x, high=180), xp.asarray(170.0))
  2598. xp_assert_close(stats.circvar(x, high=180), xp.asarray(0.2339555554617))
  2599. xp_assert_close(stats.circstd(x, high=180), xp.asarray(20.91551378))
  2600. def test_circstd_zero(self, xp):
  2601. # circstd() of a single number should return positive zero.
  2602. y = stats.circstd(xp.asarray([0]))
  2603. assert math.copysign(1.0, y) == 1.0
  2604. def test_circmean_accuracy_tiny_input(self, xp):
  2605. # For tiny x such that sin(x) == x and cos(x) == 1.0 numerically,
  2606. # circmean(x) should return x because atan2(sin(x), cos(x)) == x.
  2607. # This test verifies this.
  2608. #
  2609. # The purpose of this test is not to show that circmean() is
  2610. # accurate in the last digit for certain input, because this is
  2611. # neither guaranteed not particularly useful. Rather, it is a
  2612. # "white-box" sanity check that no undue loss of precision is
  2613. # introduced by conversion between (high - low) and (2 * pi).
  2614. x = xp.linspace(1e-9, 6e-9, 50)
  2615. assert xp.all(xp.sin(x) == x) and xp.all(xp.cos(x) == 1.0)
  2616. m = (x * (2 * xp.pi) / (2 * xp.pi)) != x
  2617. assert xp.any(m)
  2618. x = x[m]
  2619. y = stats.circmean(x[:, None], axis=1)
  2620. assert xp.all(y == x)
  2621. def test_circmean_accuracy_huge_input(self, xp):
  2622. # White-box test that circmean() does not introduce undue loss of
  2623. # numerical accuracy by eagerly rotating the input. This is detected
  2624. # by supplying a huge input x such that (x - low) == x numerically.
  2625. x = xp.asarray(1e17, dtype=xp.float64)
  2626. y = math.atan2(xp.sin(x), xp.cos(x)) # -2.6584887370946806
  2627. expected = xp.asarray(y, dtype=xp.float64)
  2628. actual = stats.circmean(x, high=xp.pi, low=-xp.pi)
  2629. xp_assert_close(actual, expected, rtol=1e-15, atol=0.0)
  2630. class TestCircFuncsNanPolicy:
  2631. # `nan_policy` is implemented by the `_axis_nan_policy` decorator, which is
  2632. # not yet array-API compatible. When it is array-API compatible, the generic
  2633. # tests run on every function will be much stronger than these, so these
  2634. # will not be necessary. So I don't see a need to make these array-API compatible;
  2635. # when the time comes, they can just be removed.
  2636. @pytest.mark.parametrize("test_func,expected",
  2637. [(stats.circmean,
  2638. {None: 359.4178026893944,
  2639. 0: np.array([353.0, 6.0, 3.0, 355.5, 9.5,
  2640. 349.5]),
  2641. 1: np.array([0.16769015, 358.66510252])}),
  2642. (stats.circvar,
  2643. {None: 0.008396678483192477,
  2644. 0: np.array([1.9997969, 0.4999873, 0.4999873,
  2645. 6.1230956, 0.1249992, 0.1249992]
  2646. )*(np.pi/180)**2,
  2647. 1: np.array([0.006455174270186603,
  2648. 0.01016767581393285])}),
  2649. (stats.circstd,
  2650. {None: 7.440570778057074,
  2651. 0: np.array([2.00020313, 1.00002539, 1.00002539,
  2652. 3.50108929, 0.50000317,
  2653. 0.50000317]),
  2654. 1: np.array([6.52070212, 8.19138093])})])
  2655. def test_nan_omit_array(self, test_func, expected):
  2656. x = np.array([[355, 5, 2, 359, 10, 350, np.nan],
  2657. [351, 7, 4, 352, 9, 349, np.nan],
  2658. [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan]])
  2659. for axis in expected.keys():
  2660. if axis is None:
  2661. out = test_func(x, high=360, nan_policy='omit', axis=axis)
  2662. assert_allclose(out, expected[axis], rtol=1e-7)
  2663. else:
  2664. with pytest.warns(SmallSampleWarning, match=too_small_nd_omit):
  2665. out = test_func(x, high=360, nan_policy='omit', axis=axis)
  2666. assert_allclose(out[:-1], expected[axis], rtol=1e-7)
  2667. assert_(np.isnan(out[-1]))
  2668. @pytest.mark.parametrize("test_func,expected",
  2669. [(stats.circmean, 0.167690146),
  2670. (stats.circvar, 0.006455174270186603),
  2671. (stats.circstd, 6.520702116)])
  2672. def test_nan_omit(self, test_func, expected):
  2673. x = [355, 5, 2, 359, 10, 350, np.nan]
  2674. assert_allclose(test_func(x, high=360, nan_policy='omit'),
  2675. expected, rtol=1e-7)
  2676. @pytest.mark.parametrize("test_func", [stats.circmean, stats.circvar,
  2677. stats.circstd])
  2678. def test_nan_omit_all(self, test_func):
  2679. x = [np.nan, np.nan, np.nan, np.nan, np.nan]
  2680. with pytest.warns(SmallSampleWarning, match=too_small_1d_omit):
  2681. assert_(np.isnan(test_func(x, nan_policy='omit')))
  2682. @pytest.mark.parametrize("test_func", [stats.circmean, stats.circvar,
  2683. stats.circstd])
  2684. def test_nan_omit_all_axis(self, test_func):
  2685. with pytest.warns(SmallSampleWarning, match=too_small_nd_omit):
  2686. x = np.array([[np.nan, np.nan, np.nan, np.nan, np.nan],
  2687. [np.nan, np.nan, np.nan, np.nan, np.nan]])
  2688. out = test_func(x, nan_policy='omit', axis=1)
  2689. assert_(np.isnan(out).all())
  2690. assert_(len(out) == 2)
  2691. @pytest.mark.parametrize("x",
  2692. [[355, 5, 2, 359, 10, 350, np.nan],
  2693. np.array([[355, 5, 2, 359, 10, 350, np.nan],
  2694. [351, 7, 4, 352, np.nan, 9, 349]])])
  2695. @pytest.mark.parametrize("test_func", [stats.circmean, stats.circvar,
  2696. stats.circstd])
  2697. def test_nan_raise(self, test_func, x):
  2698. assert_raises(ValueError, test_func, x, high=360, nan_policy='raise')
  2699. @pytest.mark.parametrize("x",
  2700. [[355, 5, 2, 359, 10, 350, np.nan],
  2701. np.array([[355, 5, 2, 359, 10, 350, np.nan],
  2702. [351, 7, 4, 352, np.nan, 9, 349]])])
  2703. @pytest.mark.parametrize("test_func", [stats.circmean, stats.circvar,
  2704. stats.circstd])
  2705. def test_bad_nan_policy(self, test_func, x):
  2706. assert_raises(ValueError, test_func, x, high=360, nan_policy='foobar')
  2707. class TestMedianTest:
  2708. def test_bad_n_samples(self):
  2709. # median_test requires at least two samples.
  2710. assert_raises(ValueError, stats.median_test, [1, 2, 3])
  2711. def test_empty_sample(self):
  2712. # Each sample must contain at least one value.
  2713. assert_raises(ValueError, stats.median_test, [], [1, 2, 3])
  2714. def test_empty_when_ties_ignored(self):
  2715. # The grand median is 1, and all values in the first argument are
  2716. # equal to the grand median. With ties="ignore", those values are
  2717. # ignored, which results in the first sample being (in effect) empty.
  2718. # This should raise a ValueError.
  2719. assert_raises(ValueError, stats.median_test,
  2720. [1, 1, 1, 1], [2, 0, 1], [2, 0], ties="ignore")
  2721. def test_empty_contingency_row(self):
  2722. # The grand median is 1, and with the default ties="below", all the
  2723. # values in the samples are counted as being below the grand median.
  2724. # This would result a row of zeros in the contingency table, which is
  2725. # an error.
  2726. assert_raises(ValueError, stats.median_test, [1, 1, 1], [1, 1, 1])
  2727. # With ties="above", all the values are counted as above the
  2728. # grand median.
  2729. assert_raises(ValueError, stats.median_test, [1, 1, 1], [1, 1, 1],
  2730. ties="above")
  2731. def test_bad_ties(self):
  2732. assert_raises(ValueError, stats.median_test, [1, 2, 3], [4, 5],
  2733. ties="foo")
  2734. def test_bad_nan_policy(self):
  2735. assert_raises(ValueError, stats.median_test, [1, 2, 3], [4, 5],
  2736. nan_policy='foobar')
  2737. def test_bad_keyword(self):
  2738. assert_raises(TypeError, stats.median_test, [1, 2, 3], [4, 5],
  2739. foo="foo")
  2740. def test_simple(self):
  2741. x = [1, 2, 3]
  2742. y = [1, 2, 3]
  2743. stat, p, med, tbl = stats.median_test(x, y)
  2744. # The median is floating point, but this equality test should be safe.
  2745. assert_equal(med, 2.0)
  2746. assert_array_equal(tbl, [[1, 1], [2, 2]])
  2747. # The expected values of the contingency table equal the contingency
  2748. # table, so the statistic should be 0 and the p-value should be 1.
  2749. assert_equal(stat, 0)
  2750. assert_equal(p, 1)
  2751. def test_ties_options(self):
  2752. # Test the contingency table calculation.
  2753. x = [1, 2, 3, 4]
  2754. y = [5, 6]
  2755. z = [7, 8, 9]
  2756. # grand median is 5.
  2757. # Default 'ties' option is "below".
  2758. stat, p, m, tbl = stats.median_test(x, y, z)
  2759. assert_equal(m, 5)
  2760. assert_equal(tbl, [[0, 1, 3], [4, 1, 0]])
  2761. stat, p, m, tbl = stats.median_test(x, y, z, ties="ignore")
  2762. assert_equal(m, 5)
  2763. assert_equal(tbl, [[0, 1, 3], [4, 0, 0]])
  2764. stat, p, m, tbl = stats.median_test(x, y, z, ties="above")
  2765. assert_equal(m, 5)
  2766. assert_equal(tbl, [[0, 2, 3], [4, 0, 0]])
  2767. def test_nan_policy_options(self):
  2768. x = [1, 2, np.nan]
  2769. y = [4, 5, 6]
  2770. mt1 = stats.median_test(x, y, nan_policy='propagate')
  2771. s, p, m, t = stats.median_test(x, y, nan_policy='omit')
  2772. assert_equal(mt1, (np.nan, np.nan, np.nan, None))
  2773. assert_allclose(s, 0.31250000000000006)
  2774. assert_allclose(p, 0.57615012203057869)
  2775. assert_equal(m, 4.0)
  2776. assert_equal(t, np.array([[0, 2], [2, 1]]))
  2777. assert_raises(ValueError, stats.median_test, x, y, nan_policy='raise')
  2778. def test_basic(self):
  2779. # median_test calls chi2_contingency to compute the test statistic
  2780. # and p-value. Make sure it hasn't screwed up the call...
  2781. x = [1, 2, 3, 4, 5]
  2782. y = [2, 4, 6, 8]
  2783. stat, p, m, tbl = stats.median_test(x, y)
  2784. assert_equal(m, 4)
  2785. assert_equal(tbl, [[1, 2], [4, 2]])
  2786. exp_stat, exp_p, dof, e = stats.chi2_contingency(tbl)
  2787. assert_allclose(stat, exp_stat)
  2788. assert_allclose(p, exp_p)
  2789. stat, p, m, tbl = stats.median_test(x, y, lambda_=0)
  2790. assert_equal(m, 4)
  2791. assert_equal(tbl, [[1, 2], [4, 2]])
  2792. exp_stat, exp_p, dof, e = stats.chi2_contingency(tbl, lambda_=0)
  2793. assert_allclose(stat, exp_stat)
  2794. assert_allclose(p, exp_p)
  2795. stat, p, m, tbl = stats.median_test(x, y, correction=False)
  2796. assert_equal(m, 4)
  2797. assert_equal(tbl, [[1, 2], [4, 2]])
  2798. exp_stat, exp_p, dof, e = stats.chi2_contingency(tbl, correction=False)
  2799. assert_allclose(stat, exp_stat)
  2800. assert_allclose(p, exp_p)
  2801. @pytest.mark.parametrize("correction", [False, True])
  2802. def test_result(self, correction):
  2803. x = [1, 2, 3]
  2804. y = [1, 2, 3]
  2805. res = stats.median_test(x, y, correction=correction)
  2806. assert_equal((res.statistic, res.pvalue, res.median, res.table), res)
  2807. @make_xp_test_case(stats.directional_stats)
  2808. class TestDirectionalStats:
  2809. # Reference implementations are not available
  2810. def test_directional_stats_correctness(self, xp):
  2811. # Data from Fisher: Dispersion on a sphere, 1953 and
  2812. # Mardia and Jupp, Directional Statistics.
  2813. decl = -np.deg2rad(np.array([343.2, 62., 36.9, 27., 359.,
  2814. 5.7, 50.4, 357.6, 44.]))
  2815. incl = -np.deg2rad(np.array([66.1, 68.7, 70.1, 82.1, 79.5,
  2816. 73., 69.3, 58.8, 51.4]))
  2817. data = np.stack((np.cos(incl) * np.cos(decl),
  2818. np.cos(incl) * np.sin(decl),
  2819. np.sin(incl)),
  2820. axis=1)
  2821. decl = xp.asarray(decl.tolist())
  2822. incl = xp.asarray(incl.tolist())
  2823. data = xp.asarray(data.tolist())
  2824. dirstats = stats.directional_stats(data)
  2825. directional_mean = dirstats.mean_direction
  2826. reference_mean = xp.asarray([0.2984, -0.1346, -0.9449])
  2827. xp_assert_close(directional_mean, reference_mean, atol=1e-4)
  2828. @pytest.mark.parametrize('angles, ref', [
  2829. ([-np.pi/2, np.pi/2], 1.),
  2830. ([0, 2 * np.pi], 0.)
  2831. ])
  2832. def test_directional_stats_2d_special_cases(self, angles, ref, xp):
  2833. angles = xp.asarray(angles)
  2834. ref = xp.asarray(ref)
  2835. data = xp.stack([xp.cos(angles), xp.sin(angles)], axis=1)
  2836. res = 1 - stats.directional_stats(data).mean_resultant_length
  2837. xp_assert_close(res, ref)
  2838. def test_directional_stats_2d(self, xp):
  2839. # Test that for circular data directional_stats
  2840. # yields the same result as circmean/circvar
  2841. rng = np.random.default_rng(0xec9a6899d5a2830e0d1af479dbe1fd0c)
  2842. testdata = xp.asarray(2 * xp.pi * rng.random((1000, )))
  2843. testdata_vector = xp.stack((xp.cos(testdata),
  2844. xp.sin(testdata)),
  2845. axis=1)
  2846. dirstats = stats.directional_stats(testdata_vector)
  2847. directional_mean = dirstats.mean_direction
  2848. directional_mean_angle = xp.atan2(directional_mean[1], directional_mean[0])
  2849. directional_mean_angle = directional_mean_angle % (2 * xp.pi)
  2850. circmean = stats.circmean(testdata)
  2851. xp_assert_close(directional_mean_angle, circmean)
  2852. directional_var = 1. - dirstats.mean_resultant_length
  2853. circular_var = stats.circvar(testdata)
  2854. xp_assert_close(directional_var, circular_var)
  2855. def test_directional_mean_higher_dim(self, xp):
  2856. # test that directional_stats works for higher dimensions
  2857. # here a 4D array is reduced over axis = 2
  2858. data = xp.asarray([[0.8660254, 0.5, 0.],
  2859. [0.8660254, -0.5, 0.]])
  2860. full_array = xp.asarray(xp.tile(data, (2, 2, 2, 1)))
  2861. expected = xp.asarray([[[1., 0., 0.],
  2862. [1., 0., 0.]],
  2863. [[1., 0., 0.],
  2864. [1., 0., 0.]]])
  2865. dirstats = stats.directional_stats(full_array, axis=2)
  2866. xp_assert_close(dirstats.mean_direction, expected)
  2867. @skip_xp_backends(np_only=True, reason='checking array-like input')
  2868. def test_directional_stats_list_ndarray_input(self, xp):
  2869. # test that list and numpy array inputs yield same results
  2870. data = [[0.8660254, 0.5, 0.], [0.8660254, -0.5, 0]]
  2871. data_array = xp.asarray(data, dtype=xp.float64)
  2872. ref = stats.directional_stats(data)
  2873. res = stats.directional_stats(data_array)
  2874. xp_assert_close(res.mean_direction,
  2875. xp.asarray(ref.mean_direction))
  2876. xp_assert_close(res.mean_resultant_length,
  2877. xp.asarray(res.mean_resultant_length))
  2878. def test_directional_stats_1d_error(self, xp):
  2879. # test that one-dimensional data raises ValueError
  2880. data = xp.ones((5, ))
  2881. message = (r"samples must at least be two-dimensional. "
  2882. r"Instead samples has shape: (5,)")
  2883. with pytest.raises(ValueError, match=re.escape(message)):
  2884. stats.directional_stats(data)
  2885. @pytest.mark.parametrize("dtype", ["float32", "float64"])
  2886. def test_directional_stats_normalize(self, dtype, xp):
  2887. # test that directional stats calculations yield same results
  2888. # for unnormalized input with normalize=True and normalized
  2889. # input with normalize=False
  2890. data = np.array([[0.8660254, 0.5, 0.],
  2891. [1.7320508, -1., 0.]], dtype=dtype)
  2892. res = stats.directional_stats(xp.asarray(data), normalize=True)
  2893. normalized_data = data / np.linalg.norm(data, axis=-1,
  2894. keepdims=True)
  2895. ref = stats.directional_stats(normalized_data, normalize=False)
  2896. xp_assert_close(res.mean_direction,
  2897. xp.asarray(ref.mean_direction))
  2898. xp_assert_close(res.mean_resultant_length,
  2899. xp.asarray(ref.mean_resultant_length))
  2900. @make_xp_test_case(stats.false_discovery_control)
  2901. class TestFDRControl:
  2902. def test_input_validation(self, xp):
  2903. message = "`ps` must include only numbers between 0 and 1"
  2904. with pytest.raises(ValueError, match=message):
  2905. stats.false_discovery_control(xp.asarray([-1, 0.5, 0.7]))
  2906. with pytest.raises(ValueError, match=message):
  2907. stats.false_discovery_control(xp.asarray([0.5, 0.7, 2]))
  2908. with pytest.raises(ValueError, match=message):
  2909. stats.false_discovery_control(xp.asarray([0.5, 0.7, xp.nan]))
  2910. message = "Unrecognized `method` 'YAK'"
  2911. with pytest.raises(ValueError, match=message):
  2912. stats.false_discovery_control(xp.asarray([0.5, 0.7, 0.9]), method='YAK')
  2913. message = "`axis` must be an integer or `None`"
  2914. with pytest.raises(ValueError, match=message):
  2915. stats.false_discovery_control(xp.asarray([0.5, 0.7, 0.9]), axis=1.5)
  2916. with pytest.raises(ValueError, match=message):
  2917. stats.false_discovery_control(xp.asarray([0.5, 0.7, 0.9]), axis=(1, 2))
  2918. def test_against_TileStats(self, xp):
  2919. # See reference [3] of false_discovery_control
  2920. ps = xp.asarray([0.005, 0.009, 0.019, 0.022, 0.051, 0.101, 0.361, 0.387])
  2921. res = stats.false_discovery_control(ps)
  2922. ref = xp.asarray([0.036, 0.036, 0.044, 0.044, 0.082, 0.135, 0.387, 0.387])
  2923. xp_assert_close(res, ref, atol=1e-3)
  2924. @pytest.mark.parametrize("case",
  2925. [([0.24617028, 0.01140030, 0.05652047, 0.06841983,
  2926. 0.07989886, 0.01841490, 0.17540784, 0.06841983,
  2927. 0.06841983, 0.25464082], 'bh'),
  2928. ([0.72102493, 0.03339112, 0.16554665, 0.20039952,
  2929. 0.23402122, 0.05393666, 0.51376399, 0.20039952,
  2930. 0.20039952, 0.74583488], 'by')])
  2931. def test_against_R(self, case, xp):
  2932. # Test against p.adjust, e.g.
  2933. # p = c(0.22155325, 0.00114003,..., 0.0364813 , 0.25464082)
  2934. # p.adjust(p, "BY")
  2935. ref, method = case
  2936. rng = np.random.default_rng(6134137338861652935)
  2937. ps = stats.loguniform.rvs(1e-3, 0.5, size=10, random_state=rng).tolist()
  2938. ps[3] = ps[7] # force a tie
  2939. res = stats.false_discovery_control(xp.asarray(ps), method=method)
  2940. xp_assert_close(res, xp.asarray(ref), atol=1e-6)
  2941. def test_axis_None(self, xp):
  2942. rng = np.random.default_rng(6134137338861652935)
  2943. ps = stats.loguniform.rvs(1e-3, 0.5, size=(3, 4, 5), random_state=rng)
  2944. ps = xp.asarray(ps)
  2945. res = stats.false_discovery_control(ps, axis=None)
  2946. ref = stats.false_discovery_control(xp_ravel(ps))
  2947. xp_assert_equal(res, ref)
  2948. @pytest.mark.parametrize("axis", [0, 1, -1])
  2949. def test_axis(self, axis, xp):
  2950. rng = np.random.default_rng(6134137338861652935)
  2951. ps = stats.loguniform.rvs(1e-3, 0.5, size=(3, 4, 5), random_state=rng)
  2952. res = stats.false_discovery_control(xp.asarray(ps), axis=axis)
  2953. ref = np.apply_along_axis(stats.false_discovery_control, axis, ps)
  2954. xp_assert_close(res, xp.asarray(ref)) # torch isn't *equal*
  2955. def test_edge_cases(self, xp):
  2956. ps = xp.asarray([0.25])
  2957. xp_assert_equal(stats.false_discovery_control(ps), ps)
  2958. ps = xp.asarray([])
  2959. xp_assert_equal(stats.false_discovery_control(ps), ps)
  2960. if is_numpy(xp):
  2961. xp_assert_equal(stats.false_discovery_control(0.25), 0.25)
  2962. class TestCommonAxis:
  2963. # More thorough testing of `axis` in `test_axis_nan_policy`,
  2964. # but those tests aren't run with array API yet. This class
  2965. # is in `test_morestats` instead of `test_axis_nan_policy`
  2966. # because there is no reason to run `test_axis_nan_policy`
  2967. # with the array API CI job right now.
  2968. @pytest.mark.parametrize('case', [(stats.sem, {}),
  2969. (stats.kstat, {'n': 4}),
  2970. (stats.kstat, {'n': 2}),
  2971. (stats.variation, {})])
  2972. def test_axis(self, case, xp):
  2973. if is_torch(xp) and case[0] == stats.variation:
  2974. pytest.xfail(reason="copysign doesn't accept scalar array-api-compat#271")
  2975. fun, kwargs = case
  2976. rng = np.random.default_rng(24598245982345)
  2977. x = xp.asarray(rng.random((6, 7)))
  2978. res = fun(x, **kwargs, axis=0)
  2979. ref = xp.stack([fun(x[:, i], **kwargs) for i in range(x.shape[1])])
  2980. xp_assert_close(res, ref)
  2981. res = fun(x, **kwargs, axis=1)
  2982. ref = xp.stack([fun(x[i, :], **kwargs) for i in range(x.shape[0])])
  2983. xp_assert_close(res, ref)
  2984. res = fun(x, **kwargs, axis=None)
  2985. ref = fun(xp.reshape(x, (-1,)), **kwargs)
  2986. xp_assert_close(res, ref)