test_mstats_basic.py 88 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121
  1. """
  2. Tests for the stats.mstats module (support for masked arrays)
  3. """
  4. import warnings
  5. import platform
  6. import numpy as np
  7. from numpy import nan
  8. import numpy.ma as ma
  9. from numpy.ma import masked, nomask
  10. import scipy.stats.mstats as mstats
  11. from scipy import stats
  12. from .common_tests import check_named_results
  13. import pytest
  14. from pytest import raises as assert_raises
  15. from numpy.ma.testutils import (assert_equal, assert_almost_equal,
  16. assert_array_almost_equal,
  17. assert_array_almost_equal_nulp, assert_,
  18. assert_allclose, assert_array_equal)
  19. from scipy.stats import _mstats_basic, _stats_py
  20. from scipy.conftest import skip_xp_invalid_arg
  21. from scipy.stats._axis_nan_policy import SmallSampleWarning, too_small_1d_not_omit
  22. class TestMquantiles:
  23. def test_mquantiles_limit_keyword(self):
  24. # Regression test for Trac ticket #867
  25. data = np.array([[6., 7., 1.],
  26. [47., 15., 2.],
  27. [49., 36., 3.],
  28. [15., 39., 4.],
  29. [42., 40., -999.],
  30. [41., 41., -999.],
  31. [7., -999., -999.],
  32. [39., -999., -999.],
  33. [43., -999., -999.],
  34. [40., -999., -999.],
  35. [36., -999., -999.]])
  36. desired = [[19.2, 14.6, 1.45],
  37. [40.0, 37.5, 2.5],
  38. [42.8, 40.05, 3.55]]
  39. quants = mstats.mquantiles(data, axis=0, limit=(0, 50))
  40. assert_almost_equal(quants, desired)
  41. def check_equal_gmean(array_like, desired, axis=None, dtype=None, rtol=1e-7):
  42. # Note this doesn't test when axis is not specified
  43. x = mstats.gmean(array_like, axis=axis, dtype=dtype)
  44. assert_allclose(x, desired, rtol=rtol)
  45. assert_equal(x.dtype, dtype)
  46. def check_equal_hmean(array_like, desired, axis=None, dtype=None, rtol=1e-7):
  47. x = stats.hmean(array_like, axis=axis, dtype=dtype)
  48. assert_allclose(x, desired, rtol=rtol)
  49. assert_equal(x.dtype, dtype)
  50. @skip_xp_invalid_arg
  51. class TestGeoMean:
  52. def test_1d(self):
  53. a = [1, 2, 3, 4]
  54. desired = np.power(1*2*3*4, 1./4.)
  55. check_equal_gmean(a, desired, rtol=1e-14)
  56. def test_1d_ma(self):
  57. # Test a 1d masked array
  58. a = ma.array([10, 20, 30, 40, 50, 60, 70, 80, 90, 100])
  59. desired = 45.2872868812
  60. check_equal_gmean(a, desired)
  61. a = ma.array([1, 2, 3, 4], mask=[0, 0, 0, 1])
  62. desired = np.power(1*2*3, 1./3.)
  63. check_equal_gmean(a, desired, rtol=1e-14)
  64. def test_1d_ma_value(self):
  65. # Test a 1d masked array with a masked value
  66. a = np.ma.array([10, 20, 30, 40, 50, 60, 70, 80, 90, 100],
  67. mask=[0, 0, 0, 0, 0, 0, 0, 0, 0, 1])
  68. desired = 41.4716627439
  69. check_equal_gmean(a, desired)
  70. def test_1d_ma0(self):
  71. # Test a 1d masked array with zero element
  72. a = np.ma.array([10, 20, 30, 40, 50, 60, 70, 80, 90, 0])
  73. desired = 0
  74. check_equal_gmean(a, desired)
  75. def test_1d_ma_inf(self):
  76. # Test a 1d masked array with negative element
  77. a = np.ma.array([10, 20, 30, 40, 50, 60, 70, 80, 90, -1])
  78. desired = np.nan
  79. with np.errstate(invalid='ignore'):
  80. check_equal_gmean(a, desired)
  81. @pytest.mark.skipif(not hasattr(np, 'float96'),
  82. reason='cannot find float96 so skipping')
  83. def test_1d_float96(self):
  84. a = ma.array([1, 2, 3, 4], mask=[0, 0, 0, 1])
  85. desired_dt = np.power(1*2*3, 1./3.).astype(np.float96)
  86. check_equal_gmean(a, desired_dt, dtype=np.float96, rtol=1e-14)
  87. def test_2d_ma(self):
  88. a = ma.array([[1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4]],
  89. mask=[[0, 0, 0, 0], [1, 0, 0, 1], [0, 1, 1, 0]])
  90. desired = np.array([1, 2, 3, 4])
  91. check_equal_gmean(a, desired, axis=0, rtol=1e-14)
  92. desired = ma.array([np.power(1*2*3*4, 1./4.),
  93. np.power(2*3, 1./2.),
  94. np.power(1*4, 1./2.)])
  95. check_equal_gmean(a, desired, axis=-1, rtol=1e-14)
  96. # Test a 2d masked array
  97. a = [[10, 20, 30, 40], [50, 60, 70, 80], [90, 100, 110, 120]]
  98. desired = 52.8885199
  99. check_equal_gmean(np.ma.array(a), desired)
  100. @skip_xp_invalid_arg
  101. class TestHarMean:
  102. def test_1d(self):
  103. a = ma.array([1, 2, 3, 4], mask=[0, 0, 0, 1])
  104. desired = 3. / (1./1 + 1./2 + 1./3)
  105. check_equal_hmean(a, desired, rtol=1e-14)
  106. a = np.ma.array([10, 20, 30, 40, 50, 60, 70, 80, 90, 100])
  107. desired = 34.1417152147
  108. check_equal_hmean(a, desired)
  109. a = np.ma.array([10, 20, 30, 40, 50, 60, 70, 80, 90, 100],
  110. mask=[0, 0, 0, 0, 0, 0, 0, 0, 0, 1])
  111. desired = 31.8137186141
  112. check_equal_hmean(a, desired)
  113. @pytest.mark.skipif(not hasattr(np, 'float96'),
  114. reason='cannot find float96 so skipping')
  115. def test_1d_float96(self):
  116. a = ma.array([1, 2, 3, 4], mask=[0, 0, 0, 1])
  117. desired_dt = np.asarray(3. / (1./1 + 1./2 + 1./3), dtype=np.float96)
  118. check_equal_hmean(a, desired_dt, dtype=np.float96)
  119. def test_2d(self):
  120. a = ma.array([[1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4]],
  121. mask=[[0, 0, 0, 0], [1, 0, 0, 1], [0, 1, 1, 0]])
  122. desired = ma.array([1, 2, 3, 4])
  123. check_equal_hmean(a, desired, axis=0, rtol=1e-14)
  124. desired = [4./(1/1.+1/2.+1/3.+1/4.), 2./(1/2.+1/3.), 2./(1/1.+1/4.)]
  125. check_equal_hmean(a, desired, axis=-1, rtol=1e-14)
  126. a = [[10, 20, 30, 40], [50, 60, 70, 80], [90, 100, 110, 120]]
  127. desired = 38.6696271841
  128. check_equal_hmean(np.ma.array(a), desired)
  129. class TestRanking:
  130. def test_ranking(self):
  131. x = ma.array([0,1,1,1,2,3,4,5,5,6,])
  132. assert_almost_equal(mstats.rankdata(x),
  133. [1,3,3,3,5,6,7,8.5,8.5,10])
  134. x[[3,4]] = masked
  135. assert_almost_equal(mstats.rankdata(x),
  136. [1,2.5,2.5,0,0,4,5,6.5,6.5,8])
  137. assert_almost_equal(mstats.rankdata(x, use_missing=True),
  138. [1,2.5,2.5,4.5,4.5,4,5,6.5,6.5,8])
  139. x = ma.array([0,1,5,1,2,4,3,5,1,6,])
  140. assert_almost_equal(mstats.rankdata(x),
  141. [1,3,8.5,3,5,7,6,8.5,3,10])
  142. x = ma.array([[0,1,1,1,2], [3,4,5,5,6,]])
  143. assert_almost_equal(mstats.rankdata(x),
  144. [[1,3,3,3,5], [6,7,8.5,8.5,10]])
  145. assert_almost_equal(mstats.rankdata(x, axis=1),
  146. [[1,3,3,3,5], [1,2,3.5,3.5,5]])
  147. assert_almost_equal(mstats.rankdata(x,axis=0),
  148. [[1,1,1,1,1], [2,2,2,2,2,]])
  149. class TestCorr:
  150. def test_pearsonr(self):
  151. # Tests some computations of Pearson's r
  152. x = ma.arange(10)
  153. with warnings.catch_warnings():
  154. # The tests in this context are edge cases, with perfect
  155. # correlation or anticorrelation, or totally masked data.
  156. # None of these should trigger a RuntimeWarning.
  157. warnings.simplefilter("error", RuntimeWarning)
  158. assert_almost_equal(mstats.pearsonr(x, x)[0], 1.0)
  159. assert_almost_equal(mstats.pearsonr(x, x[::-1])[0], -1.0)
  160. x = ma.array(x, mask=True)
  161. pr = mstats.pearsonr(x, x)
  162. assert_(pr[0] is masked)
  163. assert_(pr[1] is masked)
  164. x1 = ma.array([-1.0, 0.0, 1.0])
  165. y1 = ma.array([0, 0, 3])
  166. r, p = mstats.pearsonr(x1, y1)
  167. assert_almost_equal(r, np.sqrt(3)/2)
  168. assert_almost_equal(p, 1.0/3)
  169. # (x2, y2) have the same unmasked data as (x1, y1).
  170. mask = [False, False, False, True]
  171. x2 = ma.array([-1.0, 0.0, 1.0, 99.0], mask=mask)
  172. y2 = ma.array([0, 0, 3, -1], mask=mask)
  173. r, p = mstats.pearsonr(x2, y2)
  174. assert_almost_equal(r, np.sqrt(3)/2)
  175. assert_almost_equal(p, 1.0/3)
  176. def test_pearsonr_misaligned_mask(self):
  177. mx = np.ma.masked_array([1, 2, 3, 4, 5, 6], mask=[0, 1, 0, 0, 0, 0])
  178. my = np.ma.masked_array([9, 8, 7, 6, 5, 9], mask=[0, 0, 1, 0, 0, 0])
  179. x = np.array([1, 4, 5, 6])
  180. y = np.array([9, 6, 5, 9])
  181. mr, mp = mstats.pearsonr(mx, my)
  182. r, p = stats.pearsonr(x, y)
  183. assert_equal(mr, r)
  184. assert_equal(mp, p)
  185. def test_spearmanr(self):
  186. # Tests some computations of Spearman's rho
  187. (x, y) = ([5.05,6.75,3.21,2.66], [1.65,2.64,2.64,6.95])
  188. assert_almost_equal(mstats.spearmanr(x,y)[0], -0.6324555)
  189. (x, y) = ([5.05,6.75,3.21,2.66,np.nan],[1.65,2.64,2.64,6.95,np.nan])
  190. (x, y) = (ma.fix_invalid(x), ma.fix_invalid(y))
  191. assert_almost_equal(mstats.spearmanr(x,y)[0], -0.6324555)
  192. x = [2.0, 47.4, 42.0, 10.8, 60.1, 1.7, 64.0, 63.1,
  193. 1.0, 1.4, 7.9, 0.3, 3.9, 0.3, 6.7]
  194. y = [22.6, 8.3, 44.4, 11.9, 24.6, 0.6, 5.7, 41.6,
  195. 0.0, 0.6, 6.7, 3.8, 1.0, 1.2, 1.4]
  196. assert_almost_equal(mstats.spearmanr(x,y)[0], 0.6887299)
  197. x = [2.0, 47.4, 42.0, 10.8, 60.1, 1.7, 64.0, 63.1,
  198. 1.0, 1.4, 7.9, 0.3, 3.9, 0.3, 6.7, np.nan]
  199. y = [22.6, 8.3, 44.4, 11.9, 24.6, 0.6, 5.7, 41.6,
  200. 0.0, 0.6, 6.7, 3.8, 1.0, 1.2, 1.4, np.nan]
  201. (x, y) = (ma.fix_invalid(x), ma.fix_invalid(y))
  202. assert_almost_equal(mstats.spearmanr(x,y)[0], 0.6887299)
  203. # Next test is to make sure calculation uses sufficient precision.
  204. # The denominator's value is ~n^3 and used to be represented as an
  205. # int. 2000**3 > 2**32 so these arrays would cause overflow on
  206. # some machines.
  207. x = list(range(2000))
  208. y = list(range(2000))
  209. y[0], y[9] = y[9], y[0]
  210. y[10], y[434] = y[434], y[10]
  211. y[435], y[1509] = y[1509], y[435]
  212. # rho = 1 - 6 * (2 * (9^2 + 424^2 + 1074^2))/(2000 * (2000^2 - 1))
  213. # = 1 - (1 / 500)
  214. # = 0.998
  215. assert_almost_equal(mstats.spearmanr(x,y)[0], 0.998)
  216. # test for namedtuple attributes
  217. res = mstats.spearmanr(x, y)
  218. attributes = ('correlation', 'pvalue')
  219. check_named_results(res, attributes, ma=True)
  220. def test_spearmanr_alternative(self):
  221. # check against R
  222. # options(digits=16)
  223. # cor.test(c(2.0, 47.4, 42.0, 10.8, 60.1, 1.7, 64.0, 63.1,
  224. # 1.0, 1.4, 7.9, 0.3, 3.9, 0.3, 6.7),
  225. # c(22.6, 8.3, 44.4, 11.9, 24.6, 0.6, 5.7, 41.6,
  226. # 0.0, 0.6, 6.7, 3.8, 1.0, 1.2, 1.4),
  227. # alternative='two.sided', method='spearman')
  228. x = [2.0, 47.4, 42.0, 10.8, 60.1, 1.7, 64.0, 63.1,
  229. 1.0, 1.4, 7.9, 0.3, 3.9, 0.3, 6.7]
  230. y = [22.6, 8.3, 44.4, 11.9, 24.6, 0.6, 5.7, 41.6,
  231. 0.0, 0.6, 6.7, 3.8, 1.0, 1.2, 1.4]
  232. r_exp = 0.6887298747763864 # from cor.test
  233. r, p = mstats.spearmanr(x, y)
  234. assert_allclose(r, r_exp)
  235. assert_allclose(p, 0.004519192910756)
  236. r, p = mstats.spearmanr(x, y, alternative='greater')
  237. assert_allclose(r, r_exp)
  238. assert_allclose(p, 0.002259596455378)
  239. r, p = mstats.spearmanr(x, y, alternative='less')
  240. assert_allclose(r, r_exp)
  241. assert_allclose(p, 0.9977404035446)
  242. # intuitive test (with obvious positive correlation)
  243. rng = np.random.default_rng(9607138567)
  244. n = 100
  245. x = np.linspace(0, 5, n)
  246. y = 0.1*x + rng.random(n) # y is positively correlated w/ x
  247. stat1, p1 = mstats.spearmanr(x, y)
  248. stat2, p2 = mstats.spearmanr(x, y, alternative="greater")
  249. assert_allclose(p2, p1 / 2) # positive correlation -> small p
  250. stat3, p3 = mstats.spearmanr(x, y, alternative="less")
  251. assert_allclose(p3, 1 - p1 / 2) # positive correlation -> large p
  252. assert stat1 == stat2 == stat3
  253. with pytest.raises(ValueError, match="alternative must be 'less'..."):
  254. mstats.spearmanr(x, y, alternative="ekki-ekki")
  255. @pytest.mark.skipif(platform.machine() == 'ppc64le',
  256. reason="fails/crashes on ppc64le")
  257. def test_kendalltau(self):
  258. # check case with maximum disorder and p=1
  259. x = ma.array(np.array([9, 2, 5, 6]))
  260. y = ma.array(np.array([4, 7, 9, 11]))
  261. # Cross-check with exact result from R:
  262. # cor.test(x,y,method="kendall",exact=1)
  263. expected = [0.0, 1.0]
  264. assert_almost_equal(np.asarray(mstats.kendalltau(x, y)), expected)
  265. # simple case without ties
  266. x = ma.array(np.arange(10))
  267. y = ma.array(np.arange(10))
  268. # Cross-check with exact result from R:
  269. # cor.test(x,y,method="kendall",exact=1)
  270. expected = [1.0, 5.511463844797e-07]
  271. assert_almost_equal(np.asarray(mstats.kendalltau(x, y)), expected)
  272. # check exception in case of invalid method keyword
  273. assert_raises(ValueError, mstats.kendalltau, x, y, method='banana')
  274. # swap a couple of values
  275. b = y[1]
  276. y[1] = y[2]
  277. y[2] = b
  278. # Cross-check with exact result from R:
  279. # cor.test(x,y,method="kendall",exact=1)
  280. expected = [0.9555555555555556, 5.511463844797e-06]
  281. assert_almost_equal(np.asarray(mstats.kendalltau(x, y)), expected)
  282. # swap a couple more
  283. b = y[5]
  284. y[5] = y[6]
  285. y[6] = b
  286. # Cross-check with exact result from R:
  287. # cor.test(x,y,method="kendall",exact=1)
  288. expected = [0.9111111111111111, 2.976190476190e-05]
  289. assert_almost_equal(np.asarray(mstats.kendalltau(x, y)), expected)
  290. # same in opposite direction
  291. x = ma.array(np.arange(10))
  292. y = ma.array(np.arange(10)[::-1])
  293. # Cross-check with exact result from R:
  294. # cor.test(x,y,method="kendall",exact=1)
  295. expected = [-1.0, 5.511463844797e-07]
  296. assert_almost_equal(np.asarray(mstats.kendalltau(x, y)), expected)
  297. # swap a couple of values
  298. b = y[1]
  299. y[1] = y[2]
  300. y[2] = b
  301. # Cross-check with exact result from R:
  302. # cor.test(x,y,method="kendall",exact=1)
  303. expected = [-0.9555555555555556, 5.511463844797e-06]
  304. assert_almost_equal(np.asarray(mstats.kendalltau(x, y)), expected)
  305. # swap a couple more
  306. b = y[5]
  307. y[5] = y[6]
  308. y[6] = b
  309. # Cross-check with exact result from R:
  310. # cor.test(x,y,method="kendall",exact=1)
  311. expected = [-0.9111111111111111, 2.976190476190e-05]
  312. assert_almost_equal(np.asarray(mstats.kendalltau(x, y)), expected)
  313. # Tests some computations of Kendall's tau
  314. x = ma.fix_invalid([5.05, 6.75, 3.21, 2.66, np.nan])
  315. y = ma.fix_invalid([1.65, 26.5, -5.93, 7.96, np.nan])
  316. z = ma.fix_invalid([1.65, 2.64, 2.64, 6.95, np.nan])
  317. assert_almost_equal(np.asarray(mstats.kendalltau(x, y)),
  318. [+0.3333333, 0.75])
  319. assert_almost_equal(np.asarray(mstats.kendalltau(x, y, method='asymptotic')),
  320. [+0.3333333, 0.4969059])
  321. assert_almost_equal(np.asarray(mstats.kendalltau(x, z)),
  322. [-0.5477226, 0.2785987])
  323. #
  324. x = ma.fix_invalid([0, 0, 0, 0, 20, 20, 0, 60, 0, 20,
  325. 10, 10, 0, 40, 0, 20, 0, 0, 0, 0, 0, np.nan])
  326. y = ma.fix_invalid([0, 80, 80, 80, 10, 33, 60, 0, 67, 27,
  327. 25, 80, 80, 80, 80, 80, 80, 0, 10, 45, np.nan, 0])
  328. result = mstats.kendalltau(x, y)
  329. assert_almost_equal(np.asarray(result), [-0.1585188, 0.4128009])
  330. # test for namedtuple attributes
  331. attributes = ('correlation', 'pvalue')
  332. check_named_results(result, attributes, ma=True)
  333. @pytest.mark.skipif(platform.machine() == 'ppc64le',
  334. reason="fails/crashes on ppc64le")
  335. @pytest.mark.slow
  336. def test_kendalltau_large(self):
  337. # make sure internal variable use correct precision with
  338. # larger arrays
  339. x = np.arange(2000, dtype=float)
  340. x = ma.masked_greater(x, 1995)
  341. y = np.arange(2000, dtype=float)
  342. y = np.concatenate((y[1000:], y[:1000]))
  343. assert_(np.isfinite(mstats.kendalltau(x, y)[1]))
  344. def test_kendalltau_seasonal(self):
  345. # Tests the seasonal Kendall tau.
  346. x = [[nan, nan, 4, 2, 16, 26, 5, 1, 5, 1, 2, 3, 1],
  347. [4, 3, 5, 3, 2, 7, 3, 1, 1, 2, 3, 5, 3],
  348. [3, 2, 5, 6, 18, 4, 9, 1, 1, nan, 1, 1, nan],
  349. [nan, 6, 11, 4, 17, nan, 6, 1, 1, 2, 5, 1, 1]]
  350. x = ma.fix_invalid(x).T
  351. output = mstats.kendalltau_seasonal(x)
  352. assert_almost_equal(output['global p-value (indep)'], 0.008, 3)
  353. assert_almost_equal(output['seasonal p-value'].round(2),
  354. [0.18,0.53,0.20,0.04])
  355. @pytest.mark.parametrize("method", ("exact", "asymptotic"))
  356. @pytest.mark.parametrize("alternative", ("two-sided", "greater", "less"))
  357. def test_kendalltau_mstats_vs_stats(self, method, alternative):
  358. # Test that mstats.kendalltau and stats.kendalltau with
  359. # nan_policy='omit' matches behavior of stats.kendalltau
  360. # Accuracy of the alternatives is tested in stats/tests/test_stats.py
  361. rng = np.random.default_rng(8755235945)
  362. n = 50
  363. x = rng.random(n)
  364. y = rng.random(n)
  365. mask = rng.random(n) > 0.5
  366. x_masked = ma.array(x, mask=mask)
  367. y_masked = ma.array(y, mask=mask)
  368. res_masked = mstats.kendalltau(
  369. x_masked, y_masked, method=method, alternative=alternative)
  370. x_compressed = x_masked.compressed()
  371. y_compressed = y_masked.compressed()
  372. res_compressed = stats.kendalltau(
  373. x_compressed, y_compressed, method=method, alternative=alternative)
  374. x[mask] = np.nan
  375. y[mask] = np.nan
  376. res_nan = stats.kendalltau(
  377. x, y, method=method, nan_policy='omit', alternative=alternative)
  378. assert_allclose(res_masked, res_compressed)
  379. assert_allclose(res_nan, res_compressed)
  380. def test_kendall_p_exact_medium(self):
  381. # Test for the exact method with medium samples (some n >= 171)
  382. # expected values generated using SymPy
  383. expectations = {(100, 2393): 0.62822615287956040664,
  384. (101, 2436): 0.60439525773513602669,
  385. (170, 0): 2.755801935583541e-307,
  386. (171, 0): 0.0,
  387. (171, 1): 2.755801935583541e-307,
  388. (172, 1): 0.0,
  389. (200, 9797): 0.74753983745929675209,
  390. (201, 9656): 0.40959218958120363618}
  391. for nc, expected in expectations.items():
  392. res = _mstats_basic._kendall_p_exact(nc[0], nc[1])
  393. assert_almost_equal(res, expected)
  394. @pytest.mark.xslow
  395. def test_kendall_p_exact_large(self):
  396. # Test for the exact method with large samples (n >= 171)
  397. # expected values generated using SymPy
  398. expectations = {(400, 38965): 0.48444283672113314099,
  399. (401, 39516): 0.66363159823474837662,
  400. (800, 156772): 0.42265448483120932055,
  401. (801, 157849): 0.53437553412194416236,
  402. (1600, 637472): 0.84200727400323538419,
  403. (1601, 630304): 0.34465255088058593946}
  404. for nc, expected in expectations.items():
  405. res = _mstats_basic._kendall_p_exact(nc[0], nc[1])
  406. assert_almost_equal(res, expected)
  407. @skip_xp_invalid_arg
  408. # mstats.pointbiserialr returns a NumPy float for the statistic, but converts
  409. # it to a masked array with no masked elements before calling `special.betainc`,
  410. # which won't accept masked arrays when `SCIPY_ARRAY_API=1`.
  411. def test_pointbiserial(self):
  412. x = [1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0,
  413. 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, -1]
  414. y = [14.8, 13.8, 12.4, 10.1, 7.1, 6.1, 5.8, 4.6, 4.3, 3.5, 3.3, 3.2,
  415. 3.0, 2.8, 2.8, 2.5, 2.4, 2.3, 2.1, 1.7, 1.7, 1.5, 1.3, 1.3, 1.2,
  416. 1.2, 1.1, 0.8, 0.7, 0.6, 0.5, 0.2, 0.2, 0.1, np.nan]
  417. assert_almost_equal(mstats.pointbiserialr(x, y)[0], 0.36149, 5)
  418. # test for namedtuple attributes
  419. res = mstats.pointbiserialr(x, y)
  420. attributes = ('correlation', 'pvalue')
  421. check_named_results(res, attributes, ma=True)
  422. @skip_xp_invalid_arg
  423. class TestTrimming:
  424. def test_trim(self):
  425. a = ma.arange(10)
  426. assert_equal(mstats.trim(a), [0,1,2,3,4,5,6,7,8,9])
  427. a = ma.arange(10)
  428. assert_equal(mstats.trim(a,(2,8)), [None,None,2,3,4,5,6,7,8,None])
  429. a = ma.arange(10)
  430. assert_equal(mstats.trim(a,limits=(2,8),inclusive=(False,False)),
  431. [None,None,None,3,4,5,6,7,None,None])
  432. a = ma.arange(10)
  433. assert_equal(mstats.trim(a,limits=(0.1,0.2),relative=True),
  434. [None,1,2,3,4,5,6,7,None,None])
  435. a = ma.arange(12)
  436. a[[0,-1]] = a[5] = masked
  437. assert_equal(mstats.trim(a, (2,8)),
  438. [None, None, 2, 3, 4, None, 6, 7, 8, None, None, None])
  439. x = ma.arange(100).reshape(10, 10)
  440. expected = [1]*10 + [0]*70 + [1]*20
  441. trimx = mstats.trim(x, (0.1,0.2), relative=True, axis=None)
  442. assert_equal(trimx._mask.ravel(), expected)
  443. trimx = mstats.trim(x, (0.1,0.2), relative=True, axis=0)
  444. assert_equal(trimx._mask.ravel(), expected)
  445. trimx = mstats.trim(x, (0.1,0.2), relative=True, axis=-1)
  446. assert_equal(trimx._mask.T.ravel(), expected)
  447. # same as above, but with an extra masked row inserted
  448. x = ma.arange(110).reshape(11, 10)
  449. x[1] = masked
  450. expected = [1]*20 + [0]*70 + [1]*20
  451. trimx = mstats.trim(x, (0.1,0.2), relative=True, axis=None)
  452. assert_equal(trimx._mask.ravel(), expected)
  453. trimx = mstats.trim(x, (0.1,0.2), relative=True, axis=0)
  454. assert_equal(trimx._mask.ravel(), expected)
  455. trimx = mstats.trim(x.T, (0.1,0.2), relative=True, axis=-1)
  456. assert_equal(trimx.T._mask.ravel(), expected)
  457. def test_trim_old(self):
  458. x = ma.arange(100)
  459. assert_equal(mstats.trimboth(x).count(), 60)
  460. assert_equal(mstats.trimtail(x,tail='r').count(), 80)
  461. x[50:70] = masked
  462. trimx = mstats.trimboth(x)
  463. assert_equal(trimx.count(), 48)
  464. assert_equal(trimx._mask, [1]*16 + [0]*34 + [1]*20 + [0]*14 + [1]*16)
  465. x._mask = nomask
  466. x = x.reshape((10,10))
  467. assert_equal(mstats.trimboth(x).count(), 60)
  468. assert_equal(mstats.trimtail(x).count(), 80)
  469. def test_trimr(self):
  470. x = ma.arange(10)
  471. result = mstats.trimr(x, limits=(0.15, 0.14), inclusive=(False, False))
  472. expected = ma.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
  473. mask=[1, 1, 0, 0, 0, 0, 0, 0, 0, 1])
  474. assert_equal(result, expected)
  475. assert_equal(result.mask, expected.mask)
  476. def test_trimmedmean(self):
  477. data = ma.array([77, 87, 88,114,151,210,219,246,253,262,
  478. 296,299,306,376,428,515,666,1310,2611])
  479. assert_almost_equal(mstats.trimmed_mean(data,0.1), 343, 0)
  480. assert_almost_equal(mstats.trimmed_mean(data,(0.1,0.1)), 343, 0)
  481. assert_almost_equal(mstats.trimmed_mean(data,(0.2,0.2)), 283, 0)
  482. def test_trimmedvar(self):
  483. # Basic test. Additional tests of all arguments, edge cases,
  484. # input validation, and proper treatment of masked arrays are needed.
  485. rng = np.random.default_rng(3262323289434724460)
  486. data_orig = rng.random(size=20)
  487. data = np.sort(data_orig)
  488. data = ma.array(data, mask=[1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
  489. 0, 0, 0, 0, 0, 0, 0, 0, 1, 1])
  490. assert_allclose(mstats.trimmed_var(data_orig, 0.1), data.var())
  491. def test_trimmedstd(self):
  492. # Basic test. Additional tests of all arguments, edge cases,
  493. # input validation, and proper treatment of masked arrays are needed.
  494. rng = np.random.default_rng(7121029245207162780)
  495. data_orig = rng.random(size=20)
  496. data = np.sort(data_orig)
  497. data = ma.array(data, mask=[1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
  498. 0, 0, 0, 0, 0, 0, 0, 0, 1, 1])
  499. assert_allclose(mstats.trimmed_std(data_orig, 0.1), data.std())
  500. def test_trimmed_stde(self):
  501. data = ma.array([77, 87, 88,114,151,210,219,246,253,262,
  502. 296,299,306,376,428,515,666,1310,2611])
  503. assert_almost_equal(mstats.trimmed_stde(data,(0.2,0.2)), 56.13193, 5)
  504. assert_almost_equal(mstats.trimmed_stde(data,0.2), 56.13193, 5)
  505. def test_winsorization(self):
  506. data = ma.array([77, 87, 88,114,151,210,219,246,253,262,
  507. 296,299,306,376,428,515,666,1310,2611])
  508. assert_almost_equal(mstats.winsorize(data,(0.2,0.2)).var(ddof=1),
  509. 21551.4, 1)
  510. assert_almost_equal(
  511. mstats.winsorize(data, (0.2,0.2),(False,False)).var(ddof=1),
  512. 11887.3, 1)
  513. data[5] = masked
  514. winsorized = mstats.winsorize(data)
  515. assert_equal(winsorized.mask, data.mask)
  516. def test_winsorization_nan(self):
  517. data = ma.array([np.nan, np.nan, 0, 1, 2])
  518. assert_raises(ValueError, mstats.winsorize, data, (0.05, 0.05),
  519. nan_policy='raise')
  520. # Testing propagate (default behavior)
  521. assert_equal(mstats.winsorize(data, (0.4, 0.4)),
  522. ma.array([2, 2, 2, 2, 2]))
  523. assert_equal(mstats.winsorize(data, (0.8, 0.8)),
  524. ma.array([np.nan, np.nan, np.nan, np.nan, np.nan]))
  525. assert_equal(mstats.winsorize(data, (0.4, 0.4), nan_policy='omit'),
  526. ma.array([np.nan, np.nan, 2, 2, 2]))
  527. assert_equal(mstats.winsorize(data, (0.8, 0.8), nan_policy='omit'),
  528. ma.array([np.nan, np.nan, 2, 2, 2]))
  529. @skip_xp_invalid_arg
  530. class TestMoments:
  531. # Comparison numbers are found using R v.1.5.1
  532. # note that length(testcase) = 4
  533. # testmathworks comes from documentation for the
  534. # Statistics Toolbox for Matlab and can be found at both
  535. # https://www.mathworks.com/help/stats/kurtosis.html
  536. # https://www.mathworks.com/help/stats/skewness.html
  537. # Note that both test cases came from here.
  538. testcase = [1,2,3,4]
  539. testmathworks = ma.fix_invalid([1.165, 0.6268, 0.0751, 0.3516, -0.6965,
  540. np.nan])
  541. testcase_2d = ma.array(
  542. np.array([[0.05245846, 0.50344235, 0.86589117, 0.36936353, 0.46961149],
  543. [0.11574073, 0.31299969, 0.45925772, 0.72618805, 0.75194407],
  544. [0.67696689, 0.91878127, 0.09769044, 0.04645137, 0.37615733],
  545. [0.05903624, 0.29908861, 0.34088298, 0.66216337, 0.83160998],
  546. [0.64619526, 0.94894632, 0.27855892, 0.0706151, 0.39962917]]),
  547. mask=np.array([[True, False, False, True, False],
  548. [True, True, True, False, True],
  549. [False, False, False, False, False],
  550. [True, True, True, True, True],
  551. [False, False, True, False, False]], dtype=bool))
  552. def _assert_equal(self, actual, expect, *, shape=None, dtype=None):
  553. expect = np.asarray(expect)
  554. if shape is not None:
  555. expect = np.broadcast_to(expect, shape)
  556. assert_array_equal(actual, expect)
  557. if dtype is None:
  558. dtype = expect.dtype
  559. assert actual.dtype == dtype
  560. def test_moment(self):
  561. y = mstats.moment(self.testcase,1)
  562. assert_almost_equal(y,0.0,10)
  563. y = mstats.moment(self.testcase,2)
  564. assert_almost_equal(y,1.25)
  565. y = mstats.moment(self.testcase,3)
  566. assert_almost_equal(y,0.0)
  567. y = mstats.moment(self.testcase,4)
  568. assert_almost_equal(y,2.5625)
  569. # check array_like input for moment
  570. y = mstats.moment(self.testcase, [1, 2, 3, 4])
  571. assert_allclose(y, [0, 1.25, 0, 2.5625])
  572. # check moment input consists only of integers
  573. y = mstats.moment(self.testcase, 0.0)
  574. assert_allclose(y, 1.0)
  575. assert_raises(ValueError, mstats.moment, self.testcase, 1.2)
  576. y = mstats.moment(self.testcase, [1.0, 2, 3, 4.0])
  577. assert_allclose(y, [0, 1.25, 0, 2.5625])
  578. # test empty input
  579. y = mstats.moment([])
  580. self._assert_equal(y, np.nan, dtype=np.float64)
  581. y = mstats.moment(np.array([], dtype=np.float32))
  582. self._assert_equal(y, np.nan, dtype=np.float32)
  583. y = mstats.moment(np.zeros((1, 0)), axis=0)
  584. self._assert_equal(y, [], shape=(0,), dtype=np.float64)
  585. y = mstats.moment([[]], axis=1)
  586. self._assert_equal(y, np.nan, shape=(1,), dtype=np.float64)
  587. y = mstats.moment([[]], moment=[0, 1], axis=0)
  588. self._assert_equal(y, [], shape=(2, 0))
  589. x = np.arange(10.)
  590. x[9] = np.nan
  591. assert_equal(mstats.moment(x, 2), ma.masked) # NaN value is ignored
  592. def test_variation(self):
  593. y = mstats.variation(self.testcase)
  594. assert_almost_equal(y,0.44721359549996, 10)
  595. def test_variation_ddof(self):
  596. # test variation with delta degrees of freedom
  597. # regression test for gh-13341
  598. a = np.array([1, 2, 3, 4, 5])
  599. y = mstats.variation(a, ddof=1)
  600. assert_almost_equal(y, 0.5270462766947299)
  601. def test_skewness(self):
  602. y = mstats.skew(self.testmathworks)
  603. assert_almost_equal(y,-0.29322304336607,10)
  604. y = mstats.skew(self.testmathworks,bias=0)
  605. assert_almost_equal(y,-0.437111105023940,10)
  606. y = mstats.skew(self.testcase)
  607. assert_almost_equal(y,0.0,10)
  608. # test that skew works on multidimensional masked arrays
  609. correct_2d = ma.array(
  610. np.array([0.6882870394455785, 0, 0.2665647526856708,
  611. 0, -0.05211472114254485]),
  612. mask=np.array([False, False, False, True, False], dtype=bool)
  613. )
  614. assert_allclose(mstats.skew(self.testcase_2d, 1), correct_2d)
  615. for i, row in enumerate(self.testcase_2d):
  616. assert_almost_equal(mstats.skew(row), correct_2d[i])
  617. correct_2d_bias_corrected = ma.array(
  618. np.array([1.685952043212545, 0.0, 0.3973712716070531, 0,
  619. -0.09026534484117164]),
  620. mask=np.array([False, False, False, True, False], dtype=bool)
  621. )
  622. assert_allclose(mstats.skew(self.testcase_2d, 1, bias=False),
  623. correct_2d_bias_corrected)
  624. for i, row in enumerate(self.testcase_2d):
  625. assert_almost_equal(mstats.skew(row, bias=False),
  626. correct_2d_bias_corrected[i])
  627. # Check consistency between stats and mstats implementations
  628. assert_allclose(mstats.skew(self.testcase_2d[2, :]),
  629. stats.skew(self.testcase_2d[2, :]))
  630. def test_kurtosis(self):
  631. # Set flags for axis = 0 and fisher=0 (Pearson's definition of kurtosis
  632. # for compatibility with Matlab)
  633. y = mstats.kurtosis(self.testmathworks, 0, fisher=0, bias=1)
  634. assert_almost_equal(y, 2.1658856802973, 10)
  635. # Note that MATLAB has confusing docs for the following case
  636. # kurtosis(x,0) gives an unbiased estimate of Pearson's skewness
  637. # kurtosis(x) gives a biased estimate of Fisher's skewness (Pearson-3)
  638. # The MATLAB docs imply that both should give Fisher's
  639. y = mstats.kurtosis(self.testmathworks, fisher=0, bias=0)
  640. assert_almost_equal(y, 3.663542721189047, 10)
  641. y = mstats.kurtosis(self.testcase, 0, 0)
  642. assert_almost_equal(y, 1.64)
  643. # test that kurtosis works on multidimensional masked arrays
  644. correct_2d = ma.array(np.array([-1.5, -3., -1.47247052385, 0.,
  645. -1.26979517952]),
  646. mask=np.array([False, False, False, True,
  647. False], dtype=bool))
  648. assert_array_almost_equal(mstats.kurtosis(self.testcase_2d, 1),
  649. correct_2d)
  650. for i, row in enumerate(self.testcase_2d):
  651. assert_almost_equal(mstats.kurtosis(row), correct_2d[i])
  652. correct_2d_bias_corrected = ma.array(
  653. np.array([-1.5, -3., -1.88988209538, 0., -0.5234638463918877]),
  654. mask=np.array([False, False, False, True, False], dtype=bool))
  655. assert_array_almost_equal(mstats.kurtosis(self.testcase_2d, 1,
  656. bias=False),
  657. correct_2d_bias_corrected)
  658. for i, row in enumerate(self.testcase_2d):
  659. assert_almost_equal(mstats.kurtosis(row, bias=False),
  660. correct_2d_bias_corrected[i])
  661. # Check consistency between stats and mstats implementations
  662. assert_array_almost_equal_nulp(mstats.kurtosis(self.testcase_2d[2, :]),
  663. stats.kurtosis(self.testcase_2d[2, :]),
  664. nulp=4)
  665. class TestMode:
  666. def test_mode(self):
  667. a1 = [0,0,0,1,1,1,2,3,3,3,3,4,5,6,7]
  668. a2 = np.reshape(a1, (3,5))
  669. a3 = np.array([1,2,3,4,5,6])
  670. a4 = np.reshape(a3, (3,2))
  671. ma1 = ma.masked_where(ma.array(a1) > 2, a1)
  672. ma2 = ma.masked_where(a2 > 2, a2)
  673. ma3 = ma.masked_where(a3 < 2, a3)
  674. ma4 = ma.masked_where(ma.array(a4) < 2, a4)
  675. assert_equal(mstats.mode(a1, axis=None), (3,4))
  676. assert_equal(mstats.mode(a1, axis=0), (3,4))
  677. assert_equal(mstats.mode(ma1, axis=None), (0,3))
  678. assert_equal(mstats.mode(a2, axis=None), (3,4))
  679. assert_equal(mstats.mode(ma2, axis=None), (0,3))
  680. assert_equal(mstats.mode(a3, axis=None), (1,1))
  681. assert_equal(mstats.mode(ma3, axis=None), (2,1))
  682. assert_equal(mstats.mode(a2, axis=0), ([[0,0,0,1,1]], [[1,1,1,1,1]]))
  683. assert_equal(mstats.mode(ma2, axis=0), ([[0,0,0,1,1]], [[1,1,1,1,1]]))
  684. assert_equal(mstats.mode(a2, axis=-1), ([[0],[3],[3]], [[3],[3],[1]]))
  685. assert_equal(mstats.mode(ma2, axis=-1), ([[0],[1],[0]], [[3],[1],[0]]))
  686. assert_equal(mstats.mode(ma4, axis=0), ([[3,2]], [[1,1]]))
  687. assert_equal(mstats.mode(ma4, axis=-1), ([[2],[3],[5]], [[1],[1],[1]]))
  688. a1_res = mstats.mode(a1, axis=None)
  689. # test for namedtuple attributes
  690. attributes = ('mode', 'count')
  691. check_named_results(a1_res, attributes, ma=True)
  692. def test_mode_modifies_input(self):
  693. # regression test for gh-6428: mode(..., axis=None) may not modify
  694. # the input array
  695. im = np.zeros((100, 100))
  696. im[:50, :] += 1
  697. im[:, :50] += 1
  698. cp = im.copy()
  699. mstats.mode(im, None)
  700. assert_equal(im, cp)
  701. class TestPercentile:
  702. def setup_method(self):
  703. self.a1 = [3, 4, 5, 10, -3, -5, 6]
  704. self.a2 = [3, -6, -2, 8, 7, 4, 2, 1]
  705. self.a3 = [3., 4, 5, 10, -3, -5, -6, 7.0]
  706. def test_percentile(self):
  707. x = np.arange(8) * 0.5
  708. assert_equal(mstats.scoreatpercentile(x, 0), 0.)
  709. assert_equal(mstats.scoreatpercentile(x, 100), 3.5)
  710. assert_equal(mstats.scoreatpercentile(x, 50), 1.75)
  711. def test_2D(self):
  712. x = ma.array([[1, 1, 1],
  713. [1, 1, 1],
  714. [4, 4, 3],
  715. [1, 1, 1],
  716. [1, 1, 1]])
  717. assert_equal(mstats.scoreatpercentile(x, 50), [1, 1, 1])
  718. @skip_xp_invalid_arg
  719. class TestVariability:
  720. """ Comparison numbers are found using R v.1.5.1
  721. note that length(testcase) = 4
  722. """
  723. testcase = ma.fix_invalid([1,2,3,4,np.nan])
  724. def test_sem(self):
  725. # This is not in R, so used: sqrt(var(testcase)*3/4) / sqrt(3)
  726. y = mstats.sem(self.testcase)
  727. assert_almost_equal(y, 0.6454972244)
  728. n = self.testcase.count()
  729. assert_allclose(mstats.sem(self.testcase, ddof=0) * np.sqrt(n/(n-2)),
  730. mstats.sem(self.testcase, ddof=2))
  731. def test_zmap(self):
  732. # This is not in R, so tested by using:
  733. # (testcase[i]-mean(testcase,axis=0)) / sqrt(var(testcase)*3/4)
  734. y = mstats.zmap(self.testcase, self.testcase)
  735. desired_unmaskedvals = ([-1.3416407864999, -0.44721359549996,
  736. 0.44721359549996, 1.3416407864999])
  737. assert_array_almost_equal(desired_unmaskedvals,
  738. y.data[y.mask == False], decimal=12) # noqa: E712
  739. def test_zscore(self):
  740. # This is not in R, so tested by using:
  741. # (testcase[i]-mean(testcase,axis=0)) / sqrt(var(testcase)*3/4)
  742. y = mstats.zscore(self.testcase)
  743. desired = ma.fix_invalid([-1.3416407864999, -0.44721359549996,
  744. 0.44721359549996, 1.3416407864999, np.nan])
  745. assert_almost_equal(desired, y, decimal=12)
  746. @skip_xp_invalid_arg
  747. class TestMisc:
  748. def test_obrientransform(self):
  749. args = [[5]*5+[6]*11+[7]*9+[8]*3+[9]*2+[10]*2,
  750. [6]+[7]*2+[8]*4+[9]*9+[10]*16]
  751. result = [5*[3.1828]+11*[0.5591]+9*[0.0344]+3*[1.6086]+2*[5.2817]+2*[11.0538],
  752. [10.4352]+2*[4.8599]+4*[1.3836]+9*[0.0061]+16*[0.7277]]
  753. assert_almost_equal(np.round(mstats.obrientransform(*args).T, 4),
  754. result, 4)
  755. def test_ks_2samp(self):
  756. x = [[nan,nan, 4, 2, 16, 26, 5, 1, 5, 1, 2, 3, 1],
  757. [4, 3, 5, 3, 2, 7, 3, 1, 1, 2, 3, 5, 3],
  758. [3, 2, 5, 6, 18, 4, 9, 1, 1, nan, 1, 1, nan],
  759. [nan, 6, 11, 4, 17, nan, 6, 1, 1, 2, 5, 1, 1]]
  760. x = ma.fix_invalid(x).T
  761. (winter, spring, summer, fall) = x.T
  762. assert_almost_equal(np.round(mstats.ks_2samp(winter, spring), 4),
  763. (0.1818, 0.9628))
  764. assert_almost_equal(np.round(mstats.ks_2samp(winter, spring, 'g'), 4),
  765. (0.1469, 0.6886))
  766. assert_almost_equal(np.round(mstats.ks_2samp(winter, spring, 'l'), 4),
  767. (0.1818, 0.6011))
  768. def test_friedmanchisq(self):
  769. # No missing values
  770. args = ([9.0,9.5,5.0,7.5,9.5,7.5,8.0,7.0,8.5,6.0],
  771. [7.0,6.5,7.0,7.5,5.0,8.0,6.0,6.5,7.0,7.0],
  772. [6.0,8.0,4.0,6.0,7.0,6.5,6.0,4.0,6.5,3.0])
  773. result = mstats.friedmanchisquare(*args)
  774. assert_almost_equal(result[0], 10.4737, 4)
  775. assert_almost_equal(result[1], 0.005317, 6)
  776. # Missing values
  777. x = [[nan,nan, 4, 2, 16, 26, 5, 1, 5, 1, 2, 3, 1],
  778. [4, 3, 5, 3, 2, 7, 3, 1, 1, 2, 3, 5, 3],
  779. [3, 2, 5, 6, 18, 4, 9, 1, 1,nan, 1, 1,nan],
  780. [nan, 6, 11, 4, 17,nan, 6, 1, 1, 2, 5, 1, 1]]
  781. x = ma.fix_invalid(x)
  782. result = mstats.friedmanchisquare(*x)
  783. assert_almost_equal(result[0], 2.0156, 4)
  784. assert_almost_equal(result[1], 0.5692, 4)
  785. # test for namedtuple attributes
  786. attributes = ('statistic', 'pvalue')
  787. check_named_results(result, attributes, ma=True)
  788. # copied from `test_stats.py`
  789. array = np.ma.asarray
  790. x1 = [array([0.763, 0.599, 0.954, 0.628, 0.882, 0.936, 0.661,
  791. 0.583, 0.775, 1.0, 0.94, 0.619, 0.972, 0.957]),
  792. array([0.768, 0.591, 0.971, 0.661, 0.888, 0.931, 0.668,
  793. 0.583, 0.838, 1.0, 0.962, 0.666, 0.981, 0.978]),
  794. array([0.771, 0.590, 0.968, 0.654, 0.886, 0.916, 0.609,
  795. 0.563, 0.866, 1.0, 0.965, 0.614, 0.9751, 0.946]),
  796. array([0.798, 0.569, 0.967, 0.657, 0.898, 0.931, 0.685,
  797. 0.625, 0.875, 1.0, 0.962, 0.669, 0.975, 0.970])]
  798. # From "Bioestadistica para las ciencias de la salud" Xf=18.95 p<0.001:
  799. # x2 = [array([4, 3, 5, 3, 5, 3, 2, 5, 4, 4, 4, 3]),
  800. # array([2, 2, 1, 2, 3, 1, 2, 3, 2, 1, 1, 3]),
  801. # array([2, 4, 3, 3, 4, 3, 3, 4, 4, 1, 2, 1]),
  802. # array([3, 5, 4, 3, 4, 4, 3, 3, 3, 4, 4, 4])]
  803. # From Jerrorl H. Zar, "Biostatistical Analysis"(example 12.6),
  804. # Xf=10.68, 0.005 < p < 0.01:
  805. # Probability from this example is inexact
  806. # using Chisquare approximation of Friedman Chisquare.
  807. x3 = [array([7.0, 9.9, 8.5, 5.1, 10.3]),
  808. array([5.3, 5.7, 4.7, 3.5, 7.7]),
  809. array([4.9, 7.6, 5.5, 2.8, 8.4]),
  810. array([8.8, 8.9, 8.1, 3.3, 9.1])]
  811. # test using mstats
  812. assert_array_almost_equal(mstats.friedmanchisquare(*x1),
  813. (10.2283464566929, 0.0167215803284414))
  814. # the following fails
  815. # assert_array_almost_equal(mstats.friedmanchisquare(*x2),
  816. # (18.9428571428571, 0.000280938375189499))
  817. assert_array_almost_equal(mstats.friedmanchisquare(*x3),
  818. (10.68, 0.0135882729582176))
  819. assert_raises(ValueError, mstats.friedmanchisquare, x3[0], x3[1])
  820. def test_regress_simple():
  821. # Regress a line with sinusoidal noise. Test for #1273.
  822. x = np.linspace(0, 100, 100)
  823. y = 0.2 * np.linspace(0, 100, 100) + 10
  824. y += np.sin(np.linspace(0, 20, 100))
  825. result = mstats.linregress(x, y)
  826. # Result is of a correct class and with correct fields
  827. lr = _stats_py.LinregressResult
  828. assert_(isinstance(result, lr))
  829. attributes = ('slope', 'intercept', 'rvalue', 'pvalue', 'stderr')
  830. check_named_results(result, attributes, ma=True)
  831. assert 'intercept_stderr' in dir(result)
  832. # Slope and intercept are estimated correctly
  833. assert_almost_equal(result.slope, 0.19644990055858422)
  834. assert_almost_equal(result.intercept, 10.211269918932341)
  835. assert_almost_equal(result.stderr, 0.002395781449783862)
  836. assert_almost_equal(result.intercept_stderr, 0.13866936078570702)
  837. def test_linregress_identical_x():
  838. rng = np.random.default_rng(74578245698)
  839. x = np.zeros(10)
  840. y = rng.random(10)
  841. msg = "Cannot calculate a linear regression if all x values are identical"
  842. with assert_raises(ValueError, match=msg):
  843. mstats.linregress(x, y)
  844. class TestTheilslopes:
  845. def test_theilslopes(self):
  846. # Test for basic slope and intercept.
  847. slope, intercept, lower, upper = mstats.theilslopes([0, 1, 1])
  848. assert_almost_equal(slope, 0.5)
  849. assert_almost_equal(intercept, 0.5)
  850. slope, intercept, lower, upper = mstats.theilslopes([0, 1, 1],
  851. method='joint')
  852. assert_almost_equal(slope, 0.5)
  853. assert_almost_equal(intercept, 0.0)
  854. # Test for correct masking.
  855. y = np.ma.array([0, 1, 100, 1], mask=[False, False, True, False])
  856. slope, intercept, lower, upper = mstats.theilslopes(y)
  857. assert_almost_equal(slope, 1./3)
  858. assert_almost_equal(intercept, 2./3)
  859. slope, intercept, lower, upper = mstats.theilslopes(y,
  860. method='joint')
  861. assert_almost_equal(slope, 1./3)
  862. assert_almost_equal(intercept, 0.0)
  863. # Test of confidence intervals from example in Sen (1968).
  864. x = [1, 2, 3, 4, 10, 12, 18]
  865. y = [9, 15, 19, 20, 45, 55, 78]
  866. slope, intercept, lower, upper = mstats.theilslopes(y, x, 0.07)
  867. assert_almost_equal(slope, 4)
  868. assert_almost_equal(intercept, 4.0)
  869. assert_almost_equal(upper, 4.38, decimal=2)
  870. assert_almost_equal(lower, 3.71, decimal=2)
  871. slope, intercept, lower, upper = mstats.theilslopes(y, x, 0.07,
  872. method='joint')
  873. assert_almost_equal(slope, 4)
  874. assert_almost_equal(intercept, 6.0)
  875. assert_almost_equal(upper, 4.38, decimal=2)
  876. assert_almost_equal(lower, 3.71, decimal=2)
  877. def test_theilslopes_warnings(self):
  878. # Test `theilslopes` with degenerate input; see gh-15943
  879. msg = "All `x` coordinates.*|Mean of empty slice|invalid value encountered.*"
  880. with pytest.warns(RuntimeWarning, match=msg):
  881. res = mstats.theilslopes([0, 1], [0, 0])
  882. assert np.all(np.isnan(res))
  883. with warnings.catch_warnings():
  884. warnings.filterwarnings(
  885. "ignore", "invalid value encountered...", RuntimeWarning)
  886. res = mstats.theilslopes([0, 0, 0], [0, 1, 0])
  887. assert_allclose(res, (0, 0, np.nan, np.nan))
  888. def test_theilslopes_namedtuple_consistency(self):
  889. """
  890. Simple test to ensure tuple backwards-compatibility of the returned
  891. TheilslopesResult object
  892. """
  893. y = [1, 2, 4]
  894. x = [4, 6, 8]
  895. slope, intercept, low_slope, high_slope = mstats.theilslopes(y, x)
  896. result = mstats.theilslopes(y, x)
  897. # note all four returned values are distinct here
  898. assert_equal(slope, result.slope)
  899. assert_equal(intercept, result.intercept)
  900. assert_equal(low_slope, result.low_slope)
  901. assert_equal(high_slope, result.high_slope)
  902. def test_gh19678_uint8(self):
  903. # `theilslopes` returned unexpected results when `y` was an unsigned type.
  904. # Check that this is resolved.
  905. rng = np.random.default_rng(2549824598234528)
  906. y = rng.integers(0, 255, size=10, dtype=np.uint8)
  907. res = stats.theilslopes(y, y)
  908. np.testing.assert_allclose(res.slope, 1)
  909. def test_siegelslopes():
  910. # method should be exact for straight line
  911. y = 2 * np.arange(10) + 0.5
  912. assert_equal(mstats.siegelslopes(y), (2.0, 0.5))
  913. assert_equal(mstats.siegelslopes(y, method='separate'), (2.0, 0.5))
  914. x = 2 * np.arange(10)
  915. y = 5 * x - 3.0
  916. assert_equal(mstats.siegelslopes(y, x), (5.0, -3.0))
  917. assert_equal(mstats.siegelslopes(y, x, method='separate'), (5.0, -3.0))
  918. # method is robust to outliers: brekdown point of 50%
  919. y[:4] = 1000
  920. assert_equal(mstats.siegelslopes(y, x), (5.0, -3.0))
  921. # if there are no outliers, results should be comparable to linregress
  922. x = np.arange(10)
  923. y = -2.3 + 0.3*x + stats.norm.rvs(size=10, random_state=231)
  924. slope_ols, intercept_ols, _, _, _ = stats.linregress(x, y)
  925. slope, intercept = mstats.siegelslopes(y, x)
  926. assert_allclose(slope, slope_ols, rtol=0.1)
  927. assert_allclose(intercept, intercept_ols, rtol=0.1)
  928. slope, intercept = mstats.siegelslopes(y, x, method='separate')
  929. assert_allclose(slope, slope_ols, rtol=0.1)
  930. assert_allclose(intercept, intercept_ols, rtol=0.1)
  931. def test_siegelslopes_namedtuple_consistency():
  932. """
  933. Simple test to ensure tuple backwards-compatibility of the returned
  934. SiegelslopesResult object.
  935. """
  936. y = [1, 2, 4]
  937. x = [4, 6, 8]
  938. slope, intercept = mstats.siegelslopes(y, x)
  939. result = mstats.siegelslopes(y, x)
  940. # note both returned values are distinct here
  941. assert_equal(slope, result.slope)
  942. assert_equal(intercept, result.intercept)
  943. def test_sen_seasonal_slopes():
  944. rng = np.random.default_rng(5765986256978575148)
  945. x = rng.random(size=(100, 4))
  946. intra_slope, inter_slope = mstats.sen_seasonal_slopes(x)
  947. # reference implementation from the `sen_seasonal_slopes` documentation
  948. def dijk(yi):
  949. n = len(yi)
  950. x = np.arange(n)
  951. dy = yi - yi[:, np.newaxis]
  952. dx = x - x[:, np.newaxis]
  953. mask = np.triu(np.ones((n, n), dtype=bool), k=1)
  954. return dy[mask]/dx[mask]
  955. for i in range(4):
  956. assert_allclose(np.median(dijk(x[:, i])), intra_slope[i])
  957. all_slopes = np.concatenate([dijk(x[:, i]) for i in range(x.shape[1])])
  958. assert_allclose(np.median(all_slopes), inter_slope)
  959. def test_plotting_positions():
  960. # Regression test for #1256
  961. pos = mstats.plotting_positions(np.arange(3), 0, 0)
  962. assert_array_almost_equal(pos.data, np.array([0.25, 0.5, 0.75]))
  963. @skip_xp_invalid_arg
  964. class TestNormalitytests:
  965. def test_vs_nonmasked(self):
  966. x = np.array((-2, -1, 0, 1, 2, 3)*4)**2
  967. assert_array_almost_equal(mstats.normaltest(x),
  968. stats.normaltest(x))
  969. assert_array_almost_equal(mstats.skewtest(x),
  970. stats.skewtest(x))
  971. assert_array_almost_equal(mstats.kurtosistest(x),
  972. stats.kurtosistest(x))
  973. funcs = [stats.normaltest, stats.skewtest, stats.kurtosistest]
  974. mfuncs = [mstats.normaltest, mstats.skewtest, mstats.kurtosistest]
  975. x = [1, 2, 3, 4]
  976. for func, mfunc in zip(funcs, mfuncs):
  977. with pytest.warns(SmallSampleWarning, match=too_small_1d_not_omit):
  978. res = func(x)
  979. assert np.isnan(res.statistic)
  980. assert np.isnan(res.pvalue)
  981. assert_raises(ValueError, mfunc, x)
  982. def test_axis_None(self):
  983. # Test axis=None (equal to axis=0 for 1-D input)
  984. x = np.array((-2,-1,0,1,2,3)*4)**2
  985. assert_allclose(mstats.normaltest(x, axis=None), mstats.normaltest(x))
  986. assert_allclose(mstats.skewtest(x, axis=None), mstats.skewtest(x))
  987. assert_allclose(mstats.kurtosistest(x, axis=None),
  988. mstats.kurtosistest(x))
  989. def test_maskedarray_input(self):
  990. # Add some masked values, test result doesn't change
  991. x = np.array((-2, -1, 0, 1, 2, 3)*4)**2
  992. xm = np.ma.array(np.r_[np.inf, x, 10],
  993. mask=np.r_[True, [False] * x.size, True])
  994. assert_allclose(mstats.normaltest(xm), stats.normaltest(x))
  995. assert_allclose(mstats.skewtest(xm), stats.skewtest(x))
  996. assert_allclose(mstats.kurtosistest(xm), stats.kurtosistest(x))
  997. def test_nd_input(self):
  998. x = np.array((-2, -1, 0, 1, 2, 3)*4)**2
  999. x_2d = np.vstack([x] * 2).T
  1000. for func in [mstats.normaltest, mstats.skewtest, mstats.kurtosistest]:
  1001. res_1d = func(x)
  1002. res_2d = func(x_2d)
  1003. assert_allclose(res_2d[0], [res_1d[0]] * 2)
  1004. assert_allclose(res_2d[1], [res_1d[1]] * 2)
  1005. def test_normaltest_result_attributes(self):
  1006. x = np.array((-2, -1, 0, 1, 2, 3)*4)**2
  1007. res = mstats.normaltest(x)
  1008. attributes = ('statistic', 'pvalue')
  1009. check_named_results(res, attributes, ma=True)
  1010. def test_kurtosistest_result_attributes(self):
  1011. x = np.array((-2, -1, 0, 1, 2, 3)*4)**2
  1012. res = mstats.kurtosistest(x)
  1013. attributes = ('statistic', 'pvalue')
  1014. check_named_results(res, attributes, ma=True)
  1015. def test_regression_9033(self):
  1016. # x clearly non-normal but power of negative denom needs
  1017. # to be handled correctly to reject normality
  1018. counts = [128, 0, 58, 7, 0, 41, 16, 0, 0, 167]
  1019. x = np.hstack([np.full(c, i) for i, c in enumerate(counts)])
  1020. assert_equal(mstats.kurtosistest(x)[1] < 0.01, True)
  1021. @pytest.mark.parametrize("test", ["skewtest", "kurtosistest"])
  1022. @pytest.mark.parametrize("alternative", ["less", "greater"])
  1023. def test_alternative(self, test, alternative):
  1024. x = stats.norm.rvs(loc=10, scale=2.5, size=30, random_state=123)
  1025. stats_test = getattr(stats, test)
  1026. mstats_test = getattr(mstats, test)
  1027. z_ex, p_ex = stats_test(x, alternative=alternative)
  1028. z, p = mstats_test(x, alternative=alternative)
  1029. assert_allclose(z, z_ex, atol=1e-12)
  1030. assert_allclose(p, p_ex, atol=1e-12)
  1031. # test with masked arrays
  1032. x[1:5] = np.nan
  1033. x = np.ma.masked_array(x, mask=np.isnan(x))
  1034. z_ex, p_ex = stats_test(x.compressed(), alternative=alternative)
  1035. z, p = mstats_test(x, alternative=alternative)
  1036. assert_allclose(z, z_ex, atol=1e-12)
  1037. assert_allclose(p, p_ex, atol=1e-12)
  1038. def test_bad_alternative(self):
  1039. x = stats.norm.rvs(size=20, random_state=123)
  1040. msg = r"`alternative` must be..."
  1041. with pytest.raises(ValueError, match=msg):
  1042. mstats.skewtest(x, alternative='error')
  1043. with pytest.raises(ValueError, match=msg):
  1044. mstats.kurtosistest(x, alternative='error')
  1045. class TestFOneway:
  1046. def test_result_attributes(self):
  1047. a = np.array([655, 788], dtype=np.uint16)
  1048. b = np.array([789, 772], dtype=np.uint16)
  1049. res = mstats.f_oneway(a, b)
  1050. attributes = ('statistic', 'pvalue')
  1051. check_named_results(res, attributes, ma=True)
  1052. class TestMannwhitneyu:
  1053. # data from gh-1428
  1054. x = np.array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
  1055. 1., 1., 1., 1., 1., 1., 1., 2., 1., 1., 1., 1., 1., 1.,
  1056. 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
  1057. 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
  1058. 1., 1., 1., 1., 1., 1., 1., 2., 1., 1., 1., 1., 1., 1.,
  1059. 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
  1060. 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 2.,
  1061. 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
  1062. 1., 1., 2., 1., 1., 1., 1., 2., 1., 1., 2., 1., 1., 2.,
  1063. 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
  1064. 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 2., 1.,
  1065. 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
  1066. 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
  1067. 1., 1., 1., 1., 1., 1., 1., 2., 1., 1., 1., 1., 1., 1.,
  1068. 1., 1., 1., 2., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
  1069. 1., 1., 1., 1., 1., 1., 1., 1., 3., 1., 1., 1., 1., 1.,
  1070. 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
  1071. 1., 1., 1., 1., 1., 1.])
  1072. y = np.array([1., 1., 1., 1., 1., 1., 1., 2., 1., 2., 1., 1., 1., 1.,
  1073. 2., 1., 1., 1., 2., 1., 1., 1., 1., 1., 2., 1., 1., 3.,
  1074. 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 2., 1., 2., 1.,
  1075. 1., 1., 1., 1., 1., 2., 1., 1., 1., 1., 1., 1., 1., 1.,
  1076. 1., 1., 1., 1., 1., 1., 1., 2., 1., 1., 1., 1., 1., 2.,
  1077. 2., 1., 1., 2., 1., 1., 2., 1., 2., 1., 1., 1., 1., 2.,
  1078. 2., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
  1079. 1., 2., 1., 1., 1., 1., 1., 2., 2., 2., 1., 1., 1., 1.,
  1080. 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
  1081. 2., 1., 1., 2., 1., 1., 1., 1., 2., 1., 1., 1., 1., 1.,
  1082. 1., 1., 1., 1., 1., 1., 1., 2., 1., 1., 1., 2., 1., 1.,
  1083. 1., 1., 1., 1.])
  1084. def test_result_attributes(self):
  1085. res = mstats.mannwhitneyu(self.x, self.y)
  1086. attributes = ('statistic', 'pvalue')
  1087. check_named_results(res, attributes, ma=True)
  1088. def test_against_stats(self):
  1089. # gh-4641 reported that stats.mannwhitneyu returned half the p-value
  1090. # of mstats.mannwhitneyu. Default alternative of stats.mannwhitneyu
  1091. # is now two-sided, so they match.
  1092. res1 = mstats.mannwhitneyu(self.x, self.y)
  1093. res2 = stats.mannwhitneyu(self.x, self.y)
  1094. assert res1.statistic == res2.statistic
  1095. assert_allclose(res1.pvalue, res2.pvalue)
  1096. class TestKruskal:
  1097. def test_result_attributes(self):
  1098. x = [1, 3, 5, 7, 9]
  1099. y = [2, 4, 6, 8, 10]
  1100. res = mstats.kruskal(x, y)
  1101. attributes = ('statistic', 'pvalue')
  1102. check_named_results(res, attributes, ma=True)
  1103. # TODO: for all ttest functions, add tests with masked array inputs
  1104. class TestTtest_rel:
  1105. def setup_method(self):
  1106. self.rng = np.random.default_rng(9373599542)
  1107. def test_vs_nonmasked(self):
  1108. outcome = self.rng.standard_normal((20, 4)) + [0, 0, 1, 2]
  1109. # 1-D inputs
  1110. res1 = stats.ttest_rel(outcome[:, 0], outcome[:, 1])
  1111. res2 = mstats.ttest_rel(outcome[:, 0], outcome[:, 1])
  1112. assert_allclose(res1, res2)
  1113. # 2-D inputs
  1114. res1 = stats.ttest_rel(outcome[:, 0], outcome[:, 1], axis=None)
  1115. res2 = mstats.ttest_rel(outcome[:, 0], outcome[:, 1], axis=None)
  1116. assert_allclose(res1, res2)
  1117. res1 = stats.ttest_rel(outcome[:, :2], outcome[:, 2:], axis=0)
  1118. res2 = mstats.ttest_rel(outcome[:, :2], outcome[:, 2:], axis=0)
  1119. assert_allclose(res1, res2)
  1120. # Check default is axis=0
  1121. res3 = mstats.ttest_rel(outcome[:, :2], outcome[:, 2:])
  1122. assert_allclose(res2, res3)
  1123. def test_fully_masked(self):
  1124. outcome = ma.masked_array(self.rng.standard_normal((3, 2)),
  1125. mask=[[1, 1, 1], [0, 0, 0]])
  1126. with warnings.catch_warnings():
  1127. warnings.filterwarnings(
  1128. "ignore", "invalid value encountered in absolute", RuntimeWarning)
  1129. for pair in [(outcome[:, 0], outcome[:, 1]),
  1130. ([np.nan, np.nan], [1.0, 2.0])]:
  1131. t, p = mstats.ttest_rel(*pair)
  1132. assert_array_equal(t, (np.nan, np.nan))
  1133. assert_array_equal(p, (np.nan, np.nan))
  1134. def test_result_attributes(self):
  1135. outcome = self.rng.standard_normal((20, 4)) + [0, 0, 1, 2]
  1136. res = mstats.ttest_rel(outcome[:, 0], outcome[:, 1])
  1137. attributes = ('statistic', 'pvalue')
  1138. check_named_results(res, attributes, ma=True)
  1139. def test_invalid_input_size(self):
  1140. assert_raises(ValueError, mstats.ttest_rel,
  1141. np.arange(10), np.arange(11))
  1142. x = np.arange(24)
  1143. assert_raises(ValueError, mstats.ttest_rel,
  1144. x.reshape(2, 3, 4), x.reshape(2, 4, 3), axis=1)
  1145. assert_raises(ValueError, mstats.ttest_rel,
  1146. x.reshape(2, 3, 4), x.reshape(2, 4, 3), axis=2)
  1147. def test_empty(self):
  1148. res1 = mstats.ttest_rel([], [])
  1149. assert_(np.all(np.isnan(res1)))
  1150. def test_zero_division(self):
  1151. t, p = mstats.ttest_ind([0, 0, 0], [1, 1, 1])
  1152. assert_equal((np.abs(t), p), (np.inf, 0))
  1153. with warnings.catch_warnings():
  1154. warnings.filterwarnings(
  1155. "ignore", "invalid value encountered in absolute", RuntimeWarning)
  1156. t, p = mstats.ttest_ind([0, 0, 0], [0, 0, 0])
  1157. assert_array_equal(t, np.array([np.nan, np.nan]))
  1158. assert_array_equal(p, np.array([np.nan, np.nan]))
  1159. def test_bad_alternative(self):
  1160. msg = r"alternative must be 'less', 'greater' or 'two-sided'"
  1161. with pytest.raises(ValueError, match=msg):
  1162. mstats.ttest_ind([1, 2, 3], [4, 5, 6], alternative='foo')
  1163. @pytest.mark.parametrize("alternative", ["less", "greater"])
  1164. def test_alternative(self, alternative):
  1165. x = stats.norm.rvs(loc=10, scale=5, size=25, random_state=42)
  1166. y = stats.norm.rvs(loc=8, scale=2, size=25, random_state=42)
  1167. t_ex, p_ex = stats.ttest_rel(x, y, alternative=alternative)
  1168. t, p = mstats.ttest_rel(x, y, alternative=alternative)
  1169. assert_allclose(t, t_ex, rtol=1e-14)
  1170. assert_allclose(p, p_ex, rtol=1e-14)
  1171. # test with masked arrays
  1172. x[1:10] = np.nan
  1173. y[1:10] = np.nan
  1174. x = np.ma.masked_array(x, mask=np.isnan(x))
  1175. y = np.ma.masked_array(y, mask=np.isnan(y))
  1176. t, p = mstats.ttest_rel(x, y, alternative=alternative)
  1177. t_ex, p_ex = stats.ttest_rel(x.compressed(), y.compressed(),
  1178. alternative=alternative)
  1179. assert_allclose(t, t_ex, rtol=1e-14)
  1180. assert_allclose(p, p_ex, rtol=1e-14)
  1181. class TestTtest_ind:
  1182. def setup_method(self):
  1183. self.rng = np.random.default_rng(4776115069)
  1184. def test_vs_nonmasked(self):
  1185. outcome = self.rng.standard_normal((20, 4)) + [0, 0, 1, 2]
  1186. # 1-D inputs
  1187. res1 = stats.ttest_ind(outcome[:, 0], outcome[:, 1])
  1188. res2 = mstats.ttest_ind(outcome[:, 0], outcome[:, 1])
  1189. assert_allclose(res1, res2)
  1190. # 2-D inputs
  1191. res1 = stats.ttest_ind(outcome[:, 0], outcome[:, 1], axis=None)
  1192. res2 = mstats.ttest_ind(outcome[:, 0], outcome[:, 1], axis=None)
  1193. assert_allclose(res1, res2)
  1194. res1 = stats.ttest_ind(outcome[:, :2], outcome[:, 2:], axis=0)
  1195. res2 = mstats.ttest_ind(outcome[:, :2], outcome[:, 2:], axis=0)
  1196. assert_allclose(res1, res2)
  1197. # Check default is axis=0
  1198. res3 = mstats.ttest_ind(outcome[:, :2], outcome[:, 2:])
  1199. assert_allclose(res2, res3)
  1200. # Check equal_var
  1201. res4 = stats.ttest_ind(outcome[:, 0], outcome[:, 1], equal_var=True)
  1202. res5 = mstats.ttest_ind(outcome[:, 0], outcome[:, 1], equal_var=True)
  1203. assert_allclose(res4, res5)
  1204. res4 = stats.ttest_ind(outcome[:, 0], outcome[:, 1], equal_var=False)
  1205. res5 = mstats.ttest_ind(outcome[:, 0], outcome[:, 1], equal_var=False)
  1206. assert_allclose(res4, res5)
  1207. def test_fully_masked(self):
  1208. outcome = ma.masked_array(self.rng.standard_normal((3, 2)),
  1209. mask=[[1, 1, 1], [0, 0, 0]])
  1210. with warnings.catch_warnings():
  1211. warnings.filterwarnings(
  1212. "ignore", "invalid value encountered in absolute", RuntimeWarning)
  1213. for pair in [(outcome[:, 0], outcome[:, 1]),
  1214. ([np.nan, np.nan], [1.0, 2.0])]:
  1215. t, p = mstats.ttest_ind(*pair)
  1216. assert_array_equal(t, (np.nan, np.nan))
  1217. assert_array_equal(p, (np.nan, np.nan))
  1218. def test_result_attributes(self):
  1219. outcome = self.rng.standard_normal((20, 4)) + [0, 0, 1, 2]
  1220. res = mstats.ttest_ind(outcome[:, 0], outcome[:, 1])
  1221. attributes = ('statistic', 'pvalue')
  1222. check_named_results(res, attributes, ma=True)
  1223. def test_empty(self):
  1224. res1 = mstats.ttest_ind([], [])
  1225. assert_(np.all(np.isnan(res1)))
  1226. def test_zero_division(self):
  1227. t, p = mstats.ttest_ind([0, 0, 0], [1, 1, 1])
  1228. assert_equal((np.abs(t), p), (np.inf, 0))
  1229. with warnings.catch_warnings():
  1230. warnings.filterwarnings(
  1231. "ignore", "invalid value encountered in absolute", RuntimeWarning)
  1232. t, p = mstats.ttest_ind([0, 0, 0], [0, 0, 0])
  1233. assert_array_equal(t, (np.nan, np.nan))
  1234. assert_array_equal(p, (np.nan, np.nan))
  1235. t, p = mstats.ttest_ind([0, 0, 0], [1, 1, 1], equal_var=False)
  1236. assert_equal((np.abs(t), p), (np.inf, 0))
  1237. assert_array_equal(mstats.ttest_ind([0, 0, 0], [0, 0, 0],
  1238. equal_var=False), (np.nan, np.nan))
  1239. def test_bad_alternative(self):
  1240. msg = r"alternative must be 'less', 'greater' or 'two-sided'"
  1241. with pytest.raises(ValueError, match=msg):
  1242. mstats.ttest_ind([1, 2, 3], [4, 5, 6], alternative='foo')
  1243. @pytest.mark.parametrize("alternative", ["less", "greater"])
  1244. def test_alternative(self, alternative):
  1245. x = stats.norm.rvs(loc=10, scale=2, size=100, random_state=123)
  1246. y = stats.norm.rvs(loc=8, scale=2, size=100, random_state=123)
  1247. t_ex, p_ex = stats.ttest_ind(x, y, alternative=alternative)
  1248. t, p = mstats.ttest_ind(x, y, alternative=alternative)
  1249. assert_allclose(t, t_ex, rtol=1e-14)
  1250. assert_allclose(p, p_ex, rtol=1e-14)
  1251. # test with masked arrays
  1252. x[1:10] = np.nan
  1253. y[80:90] = np.nan
  1254. x = np.ma.masked_array(x, mask=np.isnan(x))
  1255. y = np.ma.masked_array(y, mask=np.isnan(y))
  1256. t_ex, p_ex = stats.ttest_ind(x.compressed(), y.compressed(),
  1257. alternative=alternative)
  1258. t, p = mstats.ttest_ind(x, y, alternative=alternative)
  1259. assert_allclose(t, t_ex, rtol=1e-14)
  1260. assert_allclose(p, p_ex, rtol=1e-14)
  1261. class TestTtest_1samp:
  1262. def setup_method(self):
  1263. self.rng = np.random.default_rng(6043813830)
  1264. def test_vs_nonmasked(self):
  1265. outcome = self.rng.standard_normal((20, 4)) + [0, 0, 1, 2]
  1266. # 1-D inputs
  1267. res1 = stats.ttest_1samp(outcome[:, 0], 1)
  1268. res2 = mstats.ttest_1samp(outcome[:, 0], 1)
  1269. assert_allclose(res1, res2)
  1270. def test_fully_masked(self):
  1271. outcome = ma.masked_array(self.rng.standard_normal(3), mask=[1, 1, 1])
  1272. expected = (np.nan, np.nan)
  1273. with warnings.catch_warnings():
  1274. warnings.filterwarnings(
  1275. "ignore", "invalid value encountered in absolute", RuntimeWarning)
  1276. for pair in [((np.nan, np.nan), 0.0), (outcome, 0.0)]:
  1277. t, p = mstats.ttest_1samp(*pair)
  1278. assert_array_equal(p, expected)
  1279. assert_array_equal(t, expected)
  1280. def test_result_attributes(self):
  1281. outcome = self.rng.standard_normal((20, 4)) + [0, 0, 1, 2]
  1282. res = mstats.ttest_1samp(outcome[:, 0], 1)
  1283. attributes = ('statistic', 'pvalue')
  1284. check_named_results(res, attributes, ma=True)
  1285. def test_empty(self):
  1286. res1 = mstats.ttest_1samp([], 1)
  1287. assert_(np.all(np.isnan(res1)))
  1288. def test_zero_division(self):
  1289. t, p = mstats.ttest_1samp([0, 0, 0], 1)
  1290. assert_equal((np.abs(t), p), (np.inf, 0))
  1291. with warnings.catch_warnings():
  1292. warnings.filterwarnings(
  1293. "ignore", "invalid value encountered in absolute", RuntimeWarning)
  1294. t, p = mstats.ttest_1samp([0, 0, 0], 0)
  1295. assert_(np.isnan(t))
  1296. assert_array_equal(p, (np.nan, np.nan))
  1297. def test_bad_alternative(self):
  1298. msg = r"alternative must be 'less', 'greater' or 'two-sided'"
  1299. with pytest.raises(ValueError, match=msg):
  1300. mstats.ttest_1samp([1, 2, 3], 4, alternative='foo')
  1301. @pytest.mark.parametrize("alternative", ["less", "greater"])
  1302. def test_alternative(self, alternative):
  1303. x = stats.norm.rvs(loc=10, scale=2, size=100, random_state=123)
  1304. t_ex, p_ex = stats.ttest_1samp(x, 9, alternative=alternative)
  1305. t, p = mstats.ttest_1samp(x, 9, alternative=alternative)
  1306. assert_allclose(t, t_ex, rtol=1e-14)
  1307. assert_allclose(p, p_ex, rtol=1e-14)
  1308. # test with masked arrays
  1309. x[1:10] = np.nan
  1310. x = np.ma.masked_array(x, mask=np.isnan(x))
  1311. t_ex, p_ex = stats.ttest_1samp(x.compressed(), 9,
  1312. alternative=alternative)
  1313. t, p = mstats.ttest_1samp(x, 9, alternative=alternative)
  1314. assert_allclose(t, t_ex, rtol=1e-14)
  1315. assert_allclose(p, p_ex, rtol=1e-14)
  1316. class TestDescribe:
  1317. """
  1318. Tests for mstats.describe.
  1319. Note that there are also tests for `mstats.describe` in the
  1320. class TestCompareWithStats.
  1321. """
  1322. def test_basic_with_axis(self):
  1323. # This is a basic test that is also a regression test for gh-7303.
  1324. a = np.ma.masked_array([[0, 1, 2, 3, 4, 9],
  1325. [5, 5, 0, 9, 3, 3]],
  1326. mask=[[0, 0, 0, 0, 0, 1],
  1327. [0, 0, 1, 1, 0, 0]])
  1328. result = mstats.describe(a, axis=1)
  1329. assert_equal(result.nobs, [5, 4])
  1330. amin, amax = result.minmax
  1331. assert_equal(amin, [0, 3])
  1332. assert_equal(amax, [4, 5])
  1333. assert_equal(result.mean, [2.0, 4.0])
  1334. assert_equal(result.variance, [2.0, 1.0])
  1335. assert_equal(result.skewness, [0.0, 0.0])
  1336. assert_allclose(result.kurtosis, [-1.3, -2.0])
  1337. @skip_xp_invalid_arg
  1338. class TestCompareWithStats:
  1339. """
  1340. Class to compare mstats results with stats results.
  1341. It is in general assumed that scipy.stats is at a more mature stage than
  1342. stats.mstats. If a routine in mstats results in similar results like in
  1343. scipy.stats, this is considered also as a proper validation of scipy.mstats
  1344. routine.
  1345. Different sample sizes are used for testing, as some problems between stats
  1346. and mstats are dependent on sample size.
  1347. Author: Alexander Loew
  1348. NOTE that some tests fail. This might be caused by
  1349. a) actual differences or bugs between stats and mstats
  1350. b) numerical inaccuracies
  1351. c) different definitions of routine interfaces
  1352. These failures need to be checked. Current workaround is to have disabled these
  1353. tests, but issuing reports on scipy-dev
  1354. """
  1355. def get_n(self):
  1356. """ Returns list of sample sizes to be used for comparison. """
  1357. return [1000, 100, 10, 5]
  1358. def generate_xy_sample(self, n):
  1359. # This routine generates numpy arrays and corresponding masked arrays
  1360. # with the same data, but additional masked values
  1361. rng = np.random.RandomState(1234567)
  1362. x = rng.randn(n)
  1363. y = x + rng.randn(n)
  1364. xm = np.full(len(x) + 5, 1e16)
  1365. ym = np.full(len(y) + 5, 1e16)
  1366. xm[0:len(x)] = x
  1367. ym[0:len(y)] = y
  1368. mask = xm > 9e15
  1369. xm = np.ma.array(xm, mask=mask)
  1370. ym = np.ma.array(ym, mask=mask)
  1371. return x, y, xm, ym
  1372. def generate_xy_sample2D(self, n, nx):
  1373. x = np.full((n, nx), np.nan)
  1374. y = np.full((n, nx), np.nan)
  1375. xm = np.full((n+5, nx), np.nan)
  1376. ym = np.full((n+5, nx), np.nan)
  1377. for i in range(nx):
  1378. x[:, i], y[:, i], dx, dy = self.generate_xy_sample(n)
  1379. xm[0:n, :] = x[0:n]
  1380. ym[0:n, :] = y[0:n]
  1381. xm = np.ma.array(xm, mask=np.isnan(xm))
  1382. ym = np.ma.array(ym, mask=np.isnan(ym))
  1383. return x, y, xm, ym
  1384. def test_linregress(self):
  1385. for n in self.get_n():
  1386. x, y, xm, ym = self.generate_xy_sample(n)
  1387. result1 = stats.linregress(x, y)
  1388. result2 = stats.mstats.linregress(xm, ym)
  1389. assert_allclose(np.asarray(result1), np.asarray(result2))
  1390. def test_pearsonr(self):
  1391. for n in self.get_n():
  1392. x, y, xm, ym = self.generate_xy_sample(n)
  1393. r, p = stats.pearsonr(x, y)
  1394. rm, pm = stats.mstats.pearsonr(xm, ym)
  1395. assert_almost_equal(r, rm, decimal=14)
  1396. assert_almost_equal(p, pm, decimal=14)
  1397. def test_spearmanr(self):
  1398. for n in self.get_n():
  1399. x, y, xm, ym = self.generate_xy_sample(n)
  1400. r, p = stats.spearmanr(x, y)
  1401. rm, pm = stats.mstats.spearmanr(xm, ym)
  1402. assert_almost_equal(r, rm, 14)
  1403. assert_almost_equal(p, pm, 14)
  1404. def test_spearmanr_backcompat_useties(self):
  1405. # A regression test to ensure we don't break backwards compat
  1406. # more than we have to (see gh-9204).
  1407. x = np.arange(6)
  1408. assert_raises(ValueError, mstats.spearmanr, x, x, False)
  1409. def test_gmean(self):
  1410. for n in self.get_n():
  1411. x, y, xm, ym = self.generate_xy_sample(n)
  1412. r = stats.gmean(abs(x))
  1413. rm = stats.mstats.gmean(abs(xm))
  1414. assert_allclose(r, rm, rtol=1e-13)
  1415. r = stats.gmean(abs(y))
  1416. rm = stats.mstats.gmean(abs(ym))
  1417. assert_allclose(r, rm, rtol=1e-13)
  1418. def test_hmean(self):
  1419. for n in self.get_n():
  1420. x, y, xm, ym = self.generate_xy_sample(n)
  1421. r = stats.hmean(abs(x))
  1422. rm = stats.mstats.hmean(abs(xm))
  1423. assert_almost_equal(r, rm, 10)
  1424. r = stats.hmean(abs(y))
  1425. rm = stats.mstats.hmean(abs(ym))
  1426. assert_almost_equal(r, rm, 10)
  1427. def test_skew(self):
  1428. for n in self.get_n():
  1429. x, y, xm, ym = self.generate_xy_sample(n)
  1430. r = stats.skew(x)
  1431. rm = stats.mstats.skew(xm)
  1432. assert_almost_equal(r, rm, 10)
  1433. r = stats.skew(y)
  1434. rm = stats.mstats.skew(ym)
  1435. assert_almost_equal(r, rm, 10)
  1436. def test_moment(self):
  1437. for n in self.get_n():
  1438. x, y, xm, ym = self.generate_xy_sample(n)
  1439. r = stats.moment(x)
  1440. rm = stats.mstats.moment(xm)
  1441. assert_almost_equal(r, rm, 10)
  1442. r = stats.moment(y)
  1443. rm = stats.mstats.moment(ym)
  1444. assert_almost_equal(r, rm, 10)
  1445. def test_zscore(self):
  1446. for n in self.get_n():
  1447. x, y, xm, ym = self.generate_xy_sample(n)
  1448. # reference solution
  1449. zx = (x - x.mean()) / x.std()
  1450. zy = (y - y.mean()) / y.std()
  1451. # validate stats
  1452. assert_allclose(stats.zscore(x), zx, rtol=1e-10)
  1453. assert_allclose(stats.zscore(y), zy, rtol=1e-10)
  1454. # compare stats and mstats
  1455. assert_allclose(stats.zscore(x), stats.mstats.zscore(xm[0:len(x)]),
  1456. rtol=1e-10)
  1457. assert_allclose(stats.zscore(y), stats.mstats.zscore(ym[0:len(y)]),
  1458. rtol=1e-10)
  1459. def test_kurtosis(self):
  1460. for n in self.get_n():
  1461. x, y, xm, ym = self.generate_xy_sample(n)
  1462. r = stats.kurtosis(x)
  1463. rm = stats.mstats.kurtosis(xm)
  1464. assert_almost_equal(r, rm, 10)
  1465. r = stats.kurtosis(y)
  1466. rm = stats.mstats.kurtosis(ym)
  1467. assert_almost_equal(r, rm, 10)
  1468. def test_sem(self):
  1469. # example from stats.sem doc
  1470. a = np.arange(20).reshape(5, 4)
  1471. am = np.ma.array(a)
  1472. r = stats.sem(a, ddof=1)
  1473. rm = stats.mstats.sem(am, ddof=1)
  1474. assert_allclose(r, 2.82842712, atol=1e-5)
  1475. assert_allclose(rm, 2.82842712, atol=1e-5)
  1476. for n in self.get_n():
  1477. x, y, xm, ym = self.generate_xy_sample(n)
  1478. assert_almost_equal(stats.mstats.sem(xm, axis=None, ddof=0),
  1479. stats.sem(x, axis=None, ddof=0), decimal=13)
  1480. assert_almost_equal(stats.mstats.sem(ym, axis=None, ddof=0),
  1481. stats.sem(y, axis=None, ddof=0), decimal=13)
  1482. assert_almost_equal(stats.mstats.sem(xm, axis=None, ddof=1),
  1483. stats.sem(x, axis=None, ddof=1), decimal=13)
  1484. assert_almost_equal(stats.mstats.sem(ym, axis=None, ddof=1),
  1485. stats.sem(y, axis=None, ddof=1), decimal=13)
  1486. def test_describe(self):
  1487. for n in self.get_n():
  1488. x, y, xm, ym = self.generate_xy_sample(n)
  1489. r = stats.describe(x, ddof=1)
  1490. rm = stats.mstats.describe(xm, ddof=1)
  1491. for ii in range(6):
  1492. assert_almost_equal(np.asarray(r[ii]),
  1493. np.asarray(rm[ii]),
  1494. decimal=12)
  1495. def test_describe_result_attributes(self):
  1496. actual = mstats.describe(np.arange(5))
  1497. attributes = ('nobs', 'minmax', 'mean', 'variance', 'skewness',
  1498. 'kurtosis')
  1499. check_named_results(actual, attributes, ma=True)
  1500. def test_rankdata(self):
  1501. for n in self.get_n():
  1502. x, y, xm, ym = self.generate_xy_sample(n)
  1503. r = stats.rankdata(x)
  1504. rm = stats.mstats.rankdata(x)
  1505. assert_allclose(r, rm)
  1506. def test_tmean(self):
  1507. for n in self.get_n():
  1508. x, y, xm, ym = self.generate_xy_sample(n)
  1509. assert_almost_equal(stats.tmean(x),stats.mstats.tmean(xm), 14)
  1510. assert_almost_equal(stats.tmean(y),stats.mstats.tmean(ym), 14)
  1511. def test_tmax(self):
  1512. for n in self.get_n():
  1513. x, y, xm, ym = self.generate_xy_sample(n)
  1514. assert_almost_equal(stats.tmax(x,2.),
  1515. stats.mstats.tmax(xm,2.), 10)
  1516. assert_almost_equal(stats.tmax(y,2.),
  1517. stats.mstats.tmax(ym,2.), 10)
  1518. assert_almost_equal(stats.tmax(x, upperlimit=3.),
  1519. stats.mstats.tmax(xm, upperlimit=3.), 10)
  1520. assert_almost_equal(stats.tmax(y, upperlimit=3.),
  1521. stats.mstats.tmax(ym, upperlimit=3.), 10)
  1522. def test_tmin(self):
  1523. for n in self.get_n():
  1524. x, y, xm, ym = self.generate_xy_sample(n)
  1525. assert_equal(stats.tmin(x), stats.mstats.tmin(xm))
  1526. assert_equal(stats.tmin(y), stats.mstats.tmin(ym))
  1527. assert_almost_equal(stats.tmin(x, lowerlimit=-1.),
  1528. stats.mstats.tmin(xm, lowerlimit=-1.), 10)
  1529. assert_almost_equal(stats.tmin(y, lowerlimit=-1.),
  1530. stats.mstats.tmin(ym, lowerlimit=-1.), 10)
  1531. def test_zmap(self):
  1532. for n in self.get_n():
  1533. x, y, xm, ym = self.generate_xy_sample(n)
  1534. z = stats.zmap(x, y)
  1535. zm = stats.mstats.zmap(xm, ym)
  1536. assert_allclose(z, zm[0:len(z)], atol=1e-10)
  1537. def test_variation(self):
  1538. for n in self.get_n():
  1539. x, y, xm, ym = self.generate_xy_sample(n)
  1540. assert_almost_equal(stats.variation(x), stats.mstats.variation(xm),
  1541. decimal=12)
  1542. assert_almost_equal(stats.variation(y), stats.mstats.variation(ym),
  1543. decimal=12)
  1544. def test_tvar(self):
  1545. for n in self.get_n():
  1546. x, y, xm, ym = self.generate_xy_sample(n)
  1547. assert_almost_equal(stats.tvar(x), stats.mstats.tvar(xm),
  1548. decimal=12)
  1549. assert_almost_equal(stats.tvar(y), stats.mstats.tvar(ym),
  1550. decimal=12)
  1551. def test_trimboth(self):
  1552. a = np.arange(20)
  1553. b = stats.trimboth(a, 0.1)
  1554. bm = stats.mstats.trimboth(a, 0.1)
  1555. assert_allclose(np.sort(b), bm.data[~bm.mask])
  1556. def test_tsem(self):
  1557. for n in self.get_n():
  1558. x, y, xm, ym = self.generate_xy_sample(n)
  1559. assert_almost_equal(stats.tsem(x), stats.mstats.tsem(xm),
  1560. decimal=14)
  1561. assert_almost_equal(stats.tsem(y), stats.mstats.tsem(ym),
  1562. decimal=14)
  1563. assert_almost_equal(stats.tsem(x, limits=(-2., 2.)),
  1564. stats.mstats.tsem(xm, limits=(-2., 2.)),
  1565. decimal=14)
  1566. def test_skewtest(self):
  1567. # this test is for 1D data
  1568. for n in self.get_n():
  1569. if n > 8:
  1570. x, y, xm, ym = self.generate_xy_sample(n)
  1571. r = stats.skewtest(x)
  1572. rm = stats.mstats.skewtest(xm)
  1573. assert_allclose(r, rm)
  1574. def test_skewtest_result_attributes(self):
  1575. x = np.array((-2, -1, 0, 1, 2, 3)*4)**2
  1576. res = mstats.skewtest(x)
  1577. attributes = ('statistic', 'pvalue')
  1578. check_named_results(res, attributes, ma=True)
  1579. def test_skewtest_2D_notmasked(self):
  1580. # a normal ndarray is passed to the masked function
  1581. rng = np.random.default_rng(2790153686)
  1582. x = rng.random((20, 2)) * 20.
  1583. r = stats.skewtest(x)
  1584. rm = stats.mstats.skewtest(x)
  1585. assert_allclose(np.asarray(r), np.asarray(rm))
  1586. def test_skewtest_2D_WithMask(self):
  1587. nx = 2
  1588. for n in self.get_n():
  1589. if n > 8:
  1590. x, y, xm, ym = self.generate_xy_sample2D(n, nx)
  1591. r = stats.skewtest(x)
  1592. rm = stats.mstats.skewtest(xm)
  1593. assert_allclose(r[0][0], rm[0][0], rtol=1e-14)
  1594. assert_allclose(r[0][1], rm[0][1], rtol=1e-14)
  1595. def test_normaltest(self):
  1596. with np.errstate(over='raise'), warnings.catch_warnings():
  1597. warnings.filterwarnings(
  1598. "ignore", "`kurtosistest` p-value may be inaccurate", UserWarning)
  1599. warnings.filterwarnings(
  1600. "ignore", "kurtosistest only valid for n>=20", UserWarning)
  1601. for n in self.get_n():
  1602. if n > 8:
  1603. x, y, xm, ym = self.generate_xy_sample(n)
  1604. r = stats.normaltest(x)
  1605. rm = stats.mstats.normaltest(xm)
  1606. assert_allclose(np.asarray(r), np.asarray(rm))
  1607. def test_find_repeats(self):
  1608. x = np.asarray([1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4]).astype('float')
  1609. tmp = np.asarray([1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5]).astype('float')
  1610. mask = (tmp == 5.)
  1611. xm = np.ma.array(tmp, mask=mask)
  1612. x_orig, xm_orig = x.copy(), xm.copy()
  1613. unique, unique_counts = np.unique(x, return_counts=True)
  1614. r = unique[unique_counts > 1], unique_counts[unique_counts > 1]
  1615. rm = stats.mstats.find_repeats(xm)
  1616. assert_equal(r, rm)
  1617. assert_equal(x, x_orig)
  1618. assert_equal(xm, xm_orig)
  1619. # This crazy behavior is expected by count_tied_groups, but is not
  1620. # in the docstring...
  1621. _, counts = stats.mstats.find_repeats([])
  1622. assert_equal(counts, np.array(0, dtype=np.intp))
  1623. def test_kendalltau(self):
  1624. for n in self.get_n():
  1625. x, y, xm, ym = self.generate_xy_sample(n)
  1626. r = stats.kendalltau(x, y)
  1627. rm = stats.mstats.kendalltau(xm, ym)
  1628. assert_almost_equal(r[0], rm[0], decimal=10)
  1629. assert_almost_equal(r[1], rm[1], decimal=7)
  1630. def test_obrientransform(self):
  1631. for n in self.get_n():
  1632. x, y, xm, ym = self.generate_xy_sample(n)
  1633. r = stats.obrientransform(x)
  1634. rm = stats.mstats.obrientransform(xm)
  1635. assert_almost_equal(r.T, rm[0:len(x)])
  1636. def test_ks_1samp(self):
  1637. """Checks that mstats.ks_1samp and stats.ks_1samp agree on masked arrays."""
  1638. for mode in ['auto', 'exact', 'asymp']:
  1639. with warnings.catch_warnings():
  1640. for alternative in ['less', 'greater', 'two-sided']:
  1641. for n in self.get_n():
  1642. x, y, xm, ym = self.generate_xy_sample(n)
  1643. res1 = stats.ks_1samp(x, stats.norm.cdf,
  1644. alternative=alternative, mode=mode)
  1645. res2 = stats.mstats.ks_1samp(xm, stats.norm.cdf,
  1646. alternative=alternative, mode=mode)
  1647. assert_equal(np.asarray(res1), np.asarray(res2))
  1648. res3 = stats.ks_1samp(xm, stats.norm.cdf,
  1649. alternative=alternative, mode=mode)
  1650. assert_equal(np.asarray(res1), np.asarray(res3))
  1651. def test_kstest_1samp(self):
  1652. """
  1653. Checks that 1-sample mstats.kstest and stats.kstest agree on masked arrays.
  1654. """
  1655. for mode in ['auto', 'exact', 'asymp']:
  1656. with warnings.catch_warnings():
  1657. for alternative in ['less', 'greater', 'two-sided']:
  1658. for n in self.get_n():
  1659. x, y, xm, ym = self.generate_xy_sample(n)
  1660. res1 = stats.kstest(x, 'norm',
  1661. alternative=alternative, mode=mode)
  1662. res2 = stats.mstats.kstest(xm, 'norm',
  1663. alternative=alternative, mode=mode)
  1664. assert_equal(np.asarray(res1), np.asarray(res2))
  1665. res3 = stats.kstest(xm, 'norm',
  1666. alternative=alternative, mode=mode)
  1667. assert_equal(np.asarray(res1), np.asarray(res3))
  1668. def test_ks_2samp(self):
  1669. """Checks that mstats.ks_2samp and stats.ks_2samp agree on masked arrays.
  1670. gh-8431"""
  1671. for mode in ['auto', 'exact', 'asymp']:
  1672. with warnings.catch_warnings():
  1673. if mode in ['auto', 'exact']:
  1674. message = "ks_2samp: Exact calculation unsuccessful."
  1675. warnings.filterwarnings("ignore", message, RuntimeWarning)
  1676. for alternative in ['less', 'greater', 'two-sided']:
  1677. for n in self.get_n():
  1678. x, y, xm, ym = self.generate_xy_sample(n)
  1679. res1 = stats.ks_2samp(x, y,
  1680. alternative=alternative, mode=mode)
  1681. res2 = stats.mstats.ks_2samp(xm, ym,
  1682. alternative=alternative, mode=mode)
  1683. assert_equal(np.asarray(res1), np.asarray(res2))
  1684. res3 = stats.ks_2samp(xm, y,
  1685. alternative=alternative, mode=mode)
  1686. assert_equal(np.asarray(res1), np.asarray(res3))
  1687. def test_kstest_2samp(self):
  1688. """
  1689. Checks that 2-sample mstats.kstest and stats.kstest agree on masked arrays.
  1690. """
  1691. for mode in ['auto', 'exact', 'asymp']:
  1692. with warnings.catch_warnings():
  1693. if mode in ['auto', 'exact']:
  1694. message = "ks_2samp: Exact calculation unsuccessful."
  1695. warnings.filterwarnings("ignore", message, RuntimeWarning)
  1696. for alternative in ['less', 'greater', 'two-sided']:
  1697. for n in self.get_n():
  1698. x, y, xm, ym = self.generate_xy_sample(n)
  1699. res1 = stats.kstest(x, y,
  1700. alternative=alternative, mode=mode)
  1701. res2 = stats.mstats.kstest(xm, ym,
  1702. alternative=alternative, mode=mode)
  1703. assert_equal(np.asarray(res1), np.asarray(res2))
  1704. res3 = stats.kstest(xm, y,
  1705. alternative=alternative, mode=mode)
  1706. assert_equal(np.asarray(res1), np.asarray(res3))
  1707. class TestBrunnerMunzel:
  1708. # Data from (Lumley, 1996)
  1709. X = np.ma.masked_invalid([1, 2, 1, 1, 1, np.nan, 1, 1,
  1710. 1, 1, 1, 2, 4, 1, 1, np.nan])
  1711. Y = np.ma.masked_invalid([3, 3, 4, 3, np.nan, 1, 2, 3, 1, 1, 5, 4])
  1712. significant = 14
  1713. def test_brunnermunzel_one_sided(self):
  1714. # Results are compared with R's lawstat package.
  1715. u1, p1 = mstats.brunnermunzel(self.X, self.Y, alternative='less')
  1716. u2, p2 = mstats.brunnermunzel(self.Y, self.X, alternative='greater')
  1717. u3, p3 = mstats.brunnermunzel(self.X, self.Y, alternative='greater')
  1718. u4, p4 = mstats.brunnermunzel(self.Y, self.X, alternative='less')
  1719. assert_almost_equal(p1, p2, decimal=self.significant)
  1720. assert_almost_equal(p3, p4, decimal=self.significant)
  1721. assert_(p1 != p3)
  1722. assert_almost_equal(u1, 3.1374674823029505,
  1723. decimal=self.significant)
  1724. assert_almost_equal(u2, -3.1374674823029505,
  1725. decimal=self.significant)
  1726. assert_almost_equal(u3, 3.1374674823029505,
  1727. decimal=self.significant)
  1728. assert_almost_equal(u4, -3.1374674823029505,
  1729. decimal=self.significant)
  1730. assert_almost_equal(p1, 0.0028931043330757342,
  1731. decimal=self.significant)
  1732. assert_almost_equal(p3, 0.99710689566692423,
  1733. decimal=self.significant)
  1734. def test_brunnermunzel_two_sided(self):
  1735. # Results are compared with R's lawstat package.
  1736. u1, p1 = mstats.brunnermunzel(self.X, self.Y, alternative='two-sided')
  1737. u2, p2 = mstats.brunnermunzel(self.Y, self.X, alternative='two-sided')
  1738. assert_almost_equal(p1, p2, decimal=self.significant)
  1739. assert_almost_equal(u1, 3.1374674823029505,
  1740. decimal=self.significant)
  1741. assert_almost_equal(u2, -3.1374674823029505,
  1742. decimal=self.significant)
  1743. assert_almost_equal(p1, 0.0057862086661515377,
  1744. decimal=self.significant)
  1745. def test_brunnermunzel_default(self):
  1746. # The default value for alternative is two-sided
  1747. u1, p1 = mstats.brunnermunzel(self.X, self.Y)
  1748. u2, p2 = mstats.brunnermunzel(self.Y, self.X)
  1749. assert_almost_equal(p1, p2, decimal=self.significant)
  1750. assert_almost_equal(u1, 3.1374674823029505,
  1751. decimal=self.significant)
  1752. assert_almost_equal(u2, -3.1374674823029505,
  1753. decimal=self.significant)
  1754. assert_almost_equal(p1, 0.0057862086661515377,
  1755. decimal=self.significant)
  1756. def test_brunnermunzel_alternative_error(self):
  1757. alternative = "error"
  1758. distribution = "t"
  1759. assert_(alternative not in ["two-sided", "greater", "less"])
  1760. assert_raises(ValueError,
  1761. mstats.brunnermunzel,
  1762. self.X,
  1763. self.Y,
  1764. alternative,
  1765. distribution)
  1766. def test_brunnermunzel_distribution_norm(self):
  1767. u1, p1 = mstats.brunnermunzel(self.X, self.Y, distribution="normal")
  1768. u2, p2 = mstats.brunnermunzel(self.Y, self.X, distribution="normal")
  1769. assert_almost_equal(p1, p2, decimal=self.significant)
  1770. assert_almost_equal(u1, 3.1374674823029505,
  1771. decimal=self.significant)
  1772. assert_almost_equal(u2, -3.1374674823029505,
  1773. decimal=self.significant)
  1774. assert_almost_equal(p1, 0.0017041417600383024,
  1775. decimal=self.significant)
  1776. def test_brunnermunzel_distribution_error(self):
  1777. alternative = "two-sided"
  1778. distribution = "error"
  1779. assert_(alternative not in ["t", "normal"])
  1780. assert_raises(ValueError,
  1781. mstats.brunnermunzel,
  1782. self.X,
  1783. self.Y,
  1784. alternative,
  1785. distribution)
  1786. def test_brunnermunzel_empty_imput(self):
  1787. u1, p1 = mstats.brunnermunzel(self.X, [])
  1788. u2, p2 = mstats.brunnermunzel([], self.Y)
  1789. u3, p3 = mstats.brunnermunzel([], [])
  1790. assert_(np.isnan(u1))
  1791. assert_(np.isnan(p1))
  1792. assert_(np.isnan(u2))
  1793. assert_(np.isnan(p2))
  1794. assert_(np.isnan(u3))
  1795. assert_(np.isnan(p3))