test_hypotests.py 95 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164
  1. from itertools import product
  2. import numpy as np
  3. import functools
  4. import pytest
  5. from numpy.testing import assert_, assert_equal, assert_allclose
  6. from pytest import raises as assert_raises
  7. from scipy import stats, special
  8. from scipy.stats import distributions
  9. from scipy.stats._hypotests import (epps_singleton_2samp, cramervonmises,
  10. _cdf_cvm, cramervonmises_2samp,
  11. _pval_cvm_2samp_exact, barnard_exact,
  12. boschloo_exact)
  13. from scipy.stats._mannwhitneyu import mannwhitneyu, _mwu_state, _MWU
  14. from scipy._lib._testutils import _TestPythranFunc
  15. from scipy._lib import array_api_extra as xpx
  16. from scipy._lib._array_api import (make_xp_test_case, xp_default_dtype, is_numpy,
  17. eager_warns)
  18. from scipy._lib._array_api_no_0d import xp_assert_equal, xp_assert_close
  19. from scipy.stats._axis_nan_policy import SmallSampleWarning, too_small_1d_not_omit
  20. @make_xp_test_case(epps_singleton_2samp)
  21. class TestEppsSingleton:
  22. @pytest.mark.parametrize('dtype', [None, 'float32', 'float64'])
  23. def test_statistic_1(self, dtype, xp):
  24. # first example in Goerg & Kaiser, also in original paper of
  25. # Epps & Singleton. Note: values do not match exactly, the
  26. # value of the interquartile range varies depending on how
  27. # quantiles are computed
  28. if is_numpy(xp) and xp.__version__ < "2.0" and dtype == 'float32':
  29. pytest.skip("Pre-NEP 50 doesn't respect dtypes")
  30. dtype = xp_default_dtype(xp) if dtype is None else getattr(xp, dtype)
  31. x = xp.asarray([-0.35, 2.55, 1.73, 0.73, 0.35,
  32. 2.69, 0.46, -0.94, -0.37, 12.07], dtype=dtype)
  33. y = xp.asarray([-1.15, -0.15, 2.48, 3.25, 3.71,
  34. 4.29, 5.00, 7.74, 8.38, 8.60], dtype=dtype)
  35. w, p = epps_singleton_2samp(x, y)
  36. xp_assert_close(w, xp.asarray(15.14, dtype=dtype), atol=0.03)
  37. xp_assert_close(p, xp.asarray(0.00442, dtype=dtype), atol=0.0001)
  38. def test_statistic_2(self, xp):
  39. # second example in Goerg & Kaiser, again not a perfect match
  40. x = xp.asarray([0, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4,
  41. 5, 5, 5, 5, 6, 10, 10, 10, 10])
  42. y = xp.asarray([10, 4, 0, 5, 10, 10, 0, 5, 6, 7,
  43. 10, 3, 1, 7, 0, 8, 1, 5, 8, 10])
  44. w, p = epps_singleton_2samp(x, y)
  45. xp_assert_close(w, xp.asarray(8.900), atol=1e-3)
  46. xp_assert_close(p, xp.asarray(0.06364), atol=5e-5)
  47. def test_epps_singleton_array_like(self): # only relevant for NumPy
  48. x, y = np.arange(30), np.arange(28)
  49. w1, p1 = epps_singleton_2samp(list(x), list(y))
  50. w2, p2 = epps_singleton_2samp(tuple(x), tuple(y))
  51. w3, p3 = epps_singleton_2samp(x, y)
  52. assert_(w1 == w2 == w3)
  53. assert_(p1 == p2 == p3)
  54. def test_epps_singleton_size(self, xp):
  55. # warns if sample contains fewer than 5 elements
  56. x, y = xp.asarray([1, 2, 3, 4]), xp.arange(10)
  57. with eager_warns(SmallSampleWarning, match=too_small_1d_not_omit, xp=xp):
  58. res = epps_singleton_2samp(x, y)
  59. xp_assert_equal(res.statistic, xp.asarray(xp.nan))
  60. xp_assert_equal(res.pvalue, xp.asarray(xp.nan))
  61. def test_epps_singleton_nonfinite(self, xp):
  62. rng = np.random.default_rng(83249872384543)
  63. x = rng.random(size=(10, 11))
  64. y = rng.random(size=(10, 12))
  65. i = np.asarray([1, 4, 9]) # arbitrary rows
  66. w_ref, p_ref = epps_singleton_2samp(x, y, axis=-1)
  67. w_ref[i] = np.nan
  68. p_ref[i] = np.nan
  69. x[i[0], 0] = np.nan
  70. x[i[1], 1] = np.inf
  71. y[i[2], 2] = -np.inf
  72. x, y = xp.asarray(x), xp.asarray(y)
  73. w_res, p_res = epps_singleton_2samp(x, y, axis=-1)
  74. xp_assert_close(w_res, xp.asarray(w_ref))
  75. xp_assert_close(p_res, xp.asarray(p_ref))
  76. @make_xp_test_case(stats.cramervonmises)
  77. class TestCvm:
  78. # the expected values of the cdfs are taken from Table 1 in
  79. # Csorgo / Faraway: The Exact and Asymptotic Distribution of
  80. # Cramér-von Mises Statistics, 1996.
  81. @pytest.mark.parametrize('n, x, ref', [
  82. (4, [0.02983, 0.04111, 0.12331, 0.94251], [0.01, 0.05, 0.5, 0.999]),
  83. (10, [0.02657, 0.03830, 0.12068, 0.56643], [0.01, 0.05, 0.5, 0.975]),
  84. (1000, [0.02481, 0.03658, 0.11889, 1.16120], [0.01, 0.05, 0.5, 0.999]),
  85. (None, [0.02480, 0.03656, 0.11888, 1.16204], [0.01, 0.05, 0.5, 0.999]),
  86. ])
  87. def test_cdf_ref(self, n, x, ref, xp):
  88. xp_assert_close(_cdf_cvm(xp.asarray(x), n), xp.asarray(ref), atol=1e-4)
  89. def test_cdf_support(self, xp):
  90. # cdf has support on [1/(12*n), n/3]
  91. xp_assert_equal(_cdf_cvm(xp.asarray([1/(12*533), 533/3]), 533),
  92. xp.asarray([0., 1.]))
  93. xp_assert_equal(_cdf_cvm(xp.asarray([1/(12*(27 + 1)), (27 + 1)/3]), 27),
  94. xp.asarray([0., 1.]))
  95. def test_cdf_large_n(self, xp):
  96. # test that asymptotic cdf and cdf for large samples are close
  97. x = xp.asarray([0.02480, 0.03656, 0.11888, 1.16204, 100])
  98. xp_assert_close(_cdf_cvm(x, 10000), _cdf_cvm(x), atol=1e-4)
  99. def test_large_x(self, xp):
  100. # for large values of x and n, the series used to compute the cdf
  101. # converges slowly.
  102. # this leads to bug in R package goftest and MAPLE code that is
  103. # the basis of the implementation in scipy
  104. # note: cdf = 1 for x >= 1000/3 and n = 1000
  105. x = xp.asarray(333.3, dtype=xp.float64)
  106. assert (0.99999 < _cdf_cvm(x, 1000) < 1.0)
  107. assert (0.99999 < _cdf_cvm(x) < 1.0)
  108. def test_low_p(self, xp):
  109. # _cdf_cvm can return values larger than 1. In that case, we just
  110. # return a p-value of zero.
  111. n = 12
  112. res = cramervonmises(xp.ones(n)*0.8, special.ndtr)
  113. assert _cdf_cvm(res.statistic, n, xp=xp) > 1.0
  114. xp_assert_equal(res.pvalue, xp.asarray(0.))
  115. @pytest.mark.skip_xp_backends('jax.numpy', reason='lazy -> no _axis_nan_policy')
  116. @pytest.mark.parametrize('x', [(), [1.5]])
  117. def test_invalid_input(self, x, xp):
  118. with pytest.warns(SmallSampleWarning, match=too_small_1d_not_omit):
  119. res = cramervonmises(xp.asarray(x), special.ndtr)
  120. xp_assert_equal(res.statistic, xp.asarray(xp.nan))
  121. xp_assert_equal(res.pvalue, xp.asarray(xp.nan))
  122. @pytest.mark.parametrize('dtype', [None, 'float32', 'float64'])
  123. def test_values_R(self, dtype, xp):
  124. if is_numpy(xp) and xp.__version__ < "2.0" and dtype == 'float32':
  125. pytest.skip("Pre-NEP 50 doesn't respect dtypes")
  126. dtype = xp_default_dtype(xp) if dtype is None else getattr(xp, dtype)
  127. # compared against R package goftest, version 1.1.1
  128. # library(goftest)
  129. # options(digits=16)
  130. # cvm.test(c(-1.7, 2, 0, 1.3, 4, 0.1, 0.6), "pnorm")
  131. res = cramervonmises(xp.asarray([-1.7, 2, 0, 1.3, 4, 0.1, 0.6], dtype=dtype),
  132. special.ndtr)
  133. xp_assert_close(res.statistic, xp.asarray(0.28815604599198, dtype=dtype))
  134. xp_assert_close(res.pvalue, xp.asarray( 0.1453465252039, dtype=dtype))
  135. # cvm.test(c(-1.7, 2, 0, 1.3, 4, 0.1, 0.6), "pnorm", mean = 3, sd = 1.5)
  136. res = cramervonmises(xp.asarray([-1.7, 2, 0, 1.3, 4, 0.1, 0.6], dtype=dtype),
  137. lambda x: special.ndtr((x - 3)/1.5))
  138. xp_assert_close(res.statistic, xp.asarray(0.94266847977072, dtype=dtype))
  139. xp_assert_close(res.pvalue, xp.asarray(0.002026416728467, dtype=dtype))
  140. # cvm.test(c(1, 2, 5, 1.4, 0.14, 11, 13, 0.9, 7.5), "pexp")
  141. res = cramervonmises(
  142. xp.asarray([1, 2, 5, 1.4, 0.14, 11, 13, 0.9, 7.5], dtype=dtype),
  143. lambda x: -xp.expm1(-x))
  144. xp_assert_close(res.statistic, xp.asarray(0.84218540922393, dtype=dtype))
  145. xp_assert_close(res.pvalue, xp.asarray(0.004433406063014, dtype=dtype))
  146. def test_str_cdf(self, xp):
  147. if not is_numpy(xp):
  148. message = "`cdf` must be a callable if `rvs` is a non-NumPy array."
  149. with pytest.raises(ValueError, match=message):
  150. cramervonmises(xp.asarray([1, 2, 3]), "beta")
  151. return
  152. x, args = np.arange(5), (1.4, 0.7)
  153. ref = cramervonmises(x, distributions.expon.cdf)
  154. res = cramervonmises(x, "expon")
  155. assert_equal((res.statistic, res.pvalue), (ref.statistic, ref.pvalue))
  156. ref = cramervonmises(x, distributions.beta.cdf, args)
  157. res = cramervonmises(x, "beta", args)
  158. assert_equal((res.statistic, res.pvalue), (ref.statistic, ref.pvalue))
  159. @make_xp_test_case(stats.mannwhitneyu)
  160. class TestMannWhitneyU:
  161. # All magic numbers are from R wilcox.test unless otherwise specified
  162. # https://rdrr.io/r/stats/wilcox.test.html
  163. # --- Test Input Validation ---
  164. @pytest.mark.skip_xp_backends("jax.numpy", reason="lazy -> no _axis_nan_policy")
  165. def test_empty(self, xp):
  166. x = xp.asarray([1, 2]) # generic, valid inputs
  167. y = xp.asarray([3, 4])
  168. empty = xp.asarray([], dtype=x.dtype)
  169. nan = xp.asarray(xp.nan)
  170. with pytest.warns(SmallSampleWarning, match=too_small_1d_not_omit):
  171. res = mannwhitneyu(x, empty)
  172. xp_assert_close(res.statistic, nan)
  173. xp_assert_close(res.pvalue, nan)
  174. with pytest.warns(SmallSampleWarning, match=too_small_1d_not_omit):
  175. res = mannwhitneyu(empty, y)
  176. xp_assert_close(res.statistic, nan)
  177. xp_assert_close(res.pvalue, nan)
  178. with pytest.warns(SmallSampleWarning, match=too_small_1d_not_omit):
  179. res = mannwhitneyu(empty, empty)
  180. xp_assert_close(res.statistic, nan)
  181. xp_assert_close(res.pvalue, nan)
  182. def test_input_validation(self, xp):
  183. x = xp.asarray([1, 2]) # generic, valid inputs
  184. y = xp.asarray([3, 4])
  185. with assert_raises(ValueError, match="`use_continuity` must be one"):
  186. mannwhitneyu(x, y, use_continuity='ekki')
  187. with assert_raises(ValueError, match="`alternative` must be one of"):
  188. mannwhitneyu(x, y, alternative='ekki')
  189. with assert_raises(ValueError, match="`axis` must be an integer"):
  190. mannwhitneyu(x, y, axis=1.5)
  191. with assert_raises(ValueError, match="`method` must be one of"):
  192. mannwhitneyu(x, y, method='ekki')
  193. def test_auto(self, xp):
  194. # Test that default method ('auto') chooses intended method
  195. rng = np.random.default_rng(923823782530925934)
  196. n = 8 # threshold to switch from exact to asymptotic
  197. # both inputs are smaller than threshold; should use exact
  198. x = xp.asarray(rng.random(n-1))
  199. y = xp.asarray(rng.random(n-1))
  200. auto = mannwhitneyu(x, y)
  201. asymptotic = mannwhitneyu(x, y, method='asymptotic')
  202. exact = mannwhitneyu(x, y, method='exact')
  203. assert auto.pvalue == exact.pvalue
  204. assert auto.pvalue != asymptotic.pvalue
  205. # one input is smaller than threshold; should use exact
  206. x = xp.asarray(rng.random(n-1))
  207. y = xp.asarray(rng.random(n+1))
  208. auto = mannwhitneyu(x, y)
  209. asymptotic = mannwhitneyu(x, y, method='asymptotic')
  210. exact = mannwhitneyu(x, y, method='exact')
  211. assert auto.pvalue == exact.pvalue
  212. assert auto.pvalue != asymptotic.pvalue
  213. # other input is smaller than threshold; should use exact
  214. auto = mannwhitneyu(y, x)
  215. asymptotic = mannwhitneyu(x, y, method='asymptotic')
  216. exact = mannwhitneyu(x, y, method='exact')
  217. assert auto.pvalue == exact.pvalue
  218. assert auto.pvalue != asymptotic.pvalue
  219. # both inputs are larger than threshold; should use asymptotic
  220. x = xp.asarray(rng.random(n+1))
  221. y = xp.asarray(rng.random(n+1))
  222. auto = mannwhitneyu(x, y)
  223. asymptotic = mannwhitneyu(x, y, method='asymptotic')
  224. exact = mannwhitneyu(x, y, method='exact')
  225. assert auto.pvalue != exact.pvalue
  226. assert auto.pvalue == asymptotic.pvalue
  227. # both inputs are smaller than threshold, but there is a tie
  228. # should use asymptotic
  229. x = xp.asarray(rng.random(n-1))
  230. y = xp.asarray(rng.random(n-1))
  231. y = xpx.at(y)[3].set(x[3])
  232. auto = mannwhitneyu(x, y)
  233. asymptotic = mannwhitneyu(x, y, method='asymptotic')
  234. exact = mannwhitneyu(x, y, method='exact')
  235. assert auto.pvalue != exact.pvalue
  236. assert auto.pvalue == asymptotic.pvalue
  237. # --- Test Basic Functionality ---
  238. x = [210.052110, 110.190630, 307.918612]
  239. y = [436.08811482466416, 416.37397329768191, 179.96975939463582,
  240. 197.8118754228619, 34.038757281225756, 138.54220550921517,
  241. 128.7769351470246, 265.92721427951852, 275.6617533155341,
  242. 592.34083395416258, 448.73177590617018, 300.61495185038905,
  243. 187.97508449019588]
  244. # This test was written for mann_whitney_u in gh-4933.
  245. # Originally, the p-values for alternatives were swapped;
  246. # this has been corrected and the tests have been refactored for
  247. # compactness, but otherwise the tests are unchanged.
  248. # R code for comparison, e.g.:
  249. # options(digits = 16)
  250. # x = c(210.052110, 110.190630, 307.918612)
  251. # y = c(436.08811482466416, 416.37397329768191, 179.96975939463582,
  252. # 197.8118754228619, 34.038757281225756, 138.54220550921517,
  253. # 128.7769351470246, 265.92721427951852, 275.6617533155341,
  254. # 592.34083395416258, 448.73177590617018, 300.61495185038905,
  255. # 187.97508449019588)
  256. # wilcox.test(x, y, alternative="g", exact=TRUE)
  257. cases_basic = [[{"alternative": 'two-sided', "method": "asymptotic"},
  258. (16., 0.6865041817876)],
  259. [{"alternative": 'less', "method": "asymptotic"},
  260. (16., 0.3432520908938)],
  261. [{"alternative": 'greater', "method": "asymptotic"},
  262. (16., 0.7047591913255)],
  263. [{"alternative": 'two-sided', "method": "exact"},
  264. (16., 0.7035714285714)],
  265. [{"alternative": 'less', "method": "exact"},
  266. (16., 0.3517857142857)],
  267. [{"alternative": 'greater', "method": "exact"},
  268. (16., 0.6946428571429)]]
  269. @pytest.mark.parametrize(("kwds", "expected"), cases_basic)
  270. @pytest.mark.parametrize("dtype", [None, 'float32', 'float64'])
  271. def test_basic(self, kwds, expected, dtype, xp):
  272. if is_numpy(xp) and xp.__version__ < "2.0" and dtype == 'float32':
  273. pytest.skip("Scalar dtypes only respected after NEP 50.")
  274. dtype = xp_default_dtype(xp) if dtype is None else getattr(xp, dtype)
  275. x, y = xp.asarray(self.x, dtype=dtype), xp.asarray(self.y, dtype=dtype)
  276. res = mannwhitneyu(x, y, **kwds)
  277. xp_assert_close(res.statistic, xp.asarray(expected[0], dtype=dtype))
  278. xp_assert_close(res.pvalue, xp.asarray(expected[1], dtype=dtype))
  279. cases_continuity = [[{"alternative": 'two-sided', "use_continuity": True},
  280. (23., 0.6865041817876)],
  281. [{"alternative": 'less', "use_continuity": True},
  282. (23., 0.7047591913255)],
  283. [{"alternative": 'greater', "use_continuity": True},
  284. (23., 0.3432520908938)],
  285. [{"alternative": 'two-sided', "use_continuity": False},
  286. (23., 0.6377328900502)],
  287. [{"alternative": 'less', "use_continuity": False},
  288. (23., 0.6811335549749)],
  289. [{"alternative": 'greater', "use_continuity": False},
  290. (23., 0.3188664450251)]]
  291. @pytest.mark.parametrize(("kwds", "expected"), cases_continuity)
  292. def test_continuity(self, kwds, expected, xp):
  293. # When x and y are interchanged, less and greater p-values should
  294. # swap (compare to above). This wouldn't happen if the continuity
  295. # correction were applied in the wrong direction. Note that less and
  296. # greater p-values do not sum to 1 when continuity correction is on,
  297. # which is what we'd expect. Also check that results match R when
  298. # continuity correction is turned off.
  299. # Note that method='asymptotic' -> exact=FALSE
  300. # and use_continuity=False -> correct=FALSE, e.g.:
  301. # wilcox.test(x, y, alternative="t", exact=FALSE, correct=FALSE)
  302. x, y = xp.asarray(self.x), xp.asarray(self.y)
  303. res = mannwhitneyu(y, x, method='asymptotic', **kwds)
  304. xp_assert_close(res.statistic, xp.asarray(expected[0]))
  305. xp_assert_close(res.pvalue, xp.asarray(expected[1]))
  306. def test_tie_correct(self, xp):
  307. # Test tie correction against R's wilcox.test
  308. # options(digits = 16)
  309. # x = c(1, 2, 3, 4)
  310. # y = c(1, 2, 3, 4, 5)
  311. # wilcox.test(x, y, exact=FALSE)
  312. x = xp.asarray([1., 2., 3., 4.])
  313. y0 = xp.asarray([1., 2., 3., 4., 5.])
  314. dy = xp.asarray([0., 1., 0., 1., 0.])*0.01
  315. dy2 = xp.asarray([0., 0., 1., 0., 0.])*0.01
  316. y = xp.stack([y0-0.01, y0-dy, y0-dy2, y0, y0+dy2, y0+dy, y0+0.01])
  317. res = mannwhitneyu(x, y, axis=-1, method="asymptotic")
  318. U_expected = [10, 9, 8.5, 8, 7.5, 7, 6]
  319. p_expected = [1, 0.9017048037317, 0.804080657472, 0.7086240584439,
  320. 0.6197963884941, 0.5368784563079, 0.3912672792826]
  321. xp_assert_equal(res.statistic, xp.asarray(U_expected))
  322. xp_assert_close(res.pvalue, xp.asarray(p_expected))
  323. # --- Test Exact Distribution of U ---
  324. # These are tabulated values of the CDF of the exact distribution of
  325. # the test statistic from pg 52 of reference [1] (Mann-Whitney Original)
  326. pn3 = {1: [0.25, 0.5, 0.75], 2: [0.1, 0.2, 0.4, 0.6],
  327. 3: [0.05, .1, 0.2, 0.35, 0.5, 0.65]}
  328. pn4 = {1: [0.2, 0.4, 0.6], 2: [0.067, 0.133, 0.267, 0.4, 0.6],
  329. 3: [0.028, 0.057, 0.114, 0.2, .314, 0.429, 0.571],
  330. 4: [0.014, 0.029, 0.057, 0.1, 0.171, 0.243, 0.343, 0.443, 0.557]}
  331. pm5 = {1: [0.167, 0.333, 0.5, 0.667],
  332. 2: [0.047, 0.095, 0.19, 0.286, 0.429, 0.571],
  333. 3: [0.018, 0.036, 0.071, 0.125, 0.196, 0.286, 0.393, 0.5, 0.607],
  334. 4: [0.008, 0.016, 0.032, 0.056, 0.095, 0.143,
  335. 0.206, 0.278, 0.365, 0.452, 0.548],
  336. 5: [0.004, 0.008, 0.016, 0.028, 0.048, 0.075, 0.111,
  337. 0.155, 0.21, 0.274, 0.345, .421, 0.5, 0.579]}
  338. pm6 = {1: [0.143, 0.286, 0.428, 0.571],
  339. 2: [0.036, 0.071, 0.143, 0.214, 0.321, 0.429, 0.571],
  340. 3: [0.012, 0.024, 0.048, 0.083, 0.131,
  341. 0.19, 0.274, 0.357, 0.452, 0.548],
  342. 4: [0.005, 0.01, 0.019, 0.033, 0.057, 0.086, 0.129,
  343. 0.176, 0.238, 0.305, 0.381, 0.457, 0.543], # the last element
  344. # of the previous list, 0.543, has been modified from 0.545;
  345. # I assume it was a typo
  346. 5: [0.002, 0.004, 0.009, 0.015, 0.026, 0.041, 0.063, 0.089,
  347. 0.123, 0.165, 0.214, 0.268, 0.331, 0.396, 0.465, 0.535],
  348. 6: [0.001, 0.002, 0.004, 0.008, 0.013, 0.021, 0.032, 0.047,
  349. 0.066, 0.09, 0.12, 0.155, 0.197, 0.242, 0.294, 0.350,
  350. 0.409, 0.469, 0.531]}
  351. def test_exact_distribution(self):
  352. # I considered parametrize. I decided against it.
  353. setattr(_mwu_state, 's', _MWU(0, 0))
  354. p_tables = {3: self.pn3, 4: self.pn4, 5: self.pm5, 6: self.pm6}
  355. for n, table in p_tables.items():
  356. for m, p in table.items():
  357. # check p-value against table
  358. u = np.arange(0, len(p))
  359. _mwu_state.s.set_shapes(m, n)
  360. assert_allclose(_mwu_state.s.cdf(k=u), p, atol=1e-3)
  361. # check identity CDF + SF - PMF = 1
  362. # ( In this implementation, SF(U) includes PMF(U) )
  363. u2 = np.arange(0, m*n+1)
  364. assert_allclose(_mwu_state.s.cdf(k=u2)
  365. + _mwu_state.s.sf(k=u2)
  366. - _mwu_state.s.pmf(k=u2), 1)
  367. # check symmetry about mean of U, i.e. pmf(U) = pmf(m*n-U)
  368. pmf = _mwu_state.s.pmf(k=u2)
  369. assert_allclose(pmf, pmf[::-1])
  370. # check symmetry w.r.t. interchange of m, n
  371. _mwu_state.s.set_shapes(n, m)
  372. pmf2 = _mwu_state.s.pmf(k=u2)
  373. assert_allclose(pmf, pmf2)
  374. def test_asymptotic_behavior(self, xp):
  375. rng = np.random.default_rng(12543)
  376. # for small samples, the asymptotic test is not very accurate
  377. x = xp.asarray(rng.random(5))
  378. y = xp.asarray(rng.random(5))
  379. res1 = mannwhitneyu(x, y, method="exact")
  380. res2 = mannwhitneyu(x, y, method="asymptotic")
  381. assert res1.statistic == res2.statistic
  382. assert xp.abs(res1.pvalue - res2.pvalue) > 1e-2
  383. # for large samples, they agree reasonably well
  384. x = xp.asarray(rng.random(40))
  385. y = xp.asarray(rng.random(40))
  386. res1 = mannwhitneyu(x, y, method="exact")
  387. res2 = mannwhitneyu(x, y, method="asymptotic")
  388. assert res1.statistic == res2.statistic
  389. assert xp.abs(res1.pvalue - res2.pvalue) < 1e-3
  390. # --- Test Corner Cases ---
  391. def test_exact_U_equals_mean(self, xp):
  392. # Test U == m*n/2 with exact method
  393. # Without special treatment, two-sided p-value > 1 because both
  394. # one-sided p-values are > 0.5
  395. x, y = xp.asarray([1., 2., 3.]), xp.asarray([1.5, 2.5])
  396. res_l = mannwhitneyu(x, y, alternative="less", method="exact")
  397. res_g = mannwhitneyu(x, y, alternative="greater", method="exact")
  398. xp_assert_equal(res_l.pvalue, res_g.pvalue)
  399. assert res_l.pvalue > 0.5
  400. res = mannwhitneyu(x, y, alternative="two-sided", method="exact")
  401. xp_assert_equal(res.statistic, xp.asarray(3.))
  402. xp_assert_equal(res.pvalue, xp.asarray(1.))
  403. # U == m*n/2 for asymptotic case tested in test_gh_2118
  404. # The reason it's tricky for the asymptotic test has to do with
  405. # continuity correction.
  406. cases_scalar = [[{"alternative": 'two-sided', "method": "asymptotic"},
  407. (0., 1.)],
  408. [{"alternative": 'less', "method": "asymptotic"},
  409. (0., 0.5)],
  410. [{"alternative": 'greater', "method": "asymptotic"},
  411. (0., 0.977249868052)],
  412. [{"alternative": 'two-sided', "method": "exact"}, (0., 1)],
  413. [{"alternative": 'less', "method": "exact"}, (0., 0.5)],
  414. [{"alternative": 'greater', "method": "exact"}, (0., 1)]]
  415. @pytest.mark.parametrize(("kwds", "result"), cases_scalar)
  416. def test_scalar_data(self, kwds, result): # not important to preserve w/ array API
  417. # just making sure scalars work
  418. assert_allclose(mannwhitneyu(1, 2, **kwds), result)
  419. def test_equal_scalar_data(self): # not important to preserve w/ array API
  420. # when two scalars are equal, there is an -0.5/0 in the asymptotic
  421. # approximation. R gives pvalue=1.0 for alternatives 'less' and
  422. # 'greater' but NA for 'two-sided'. I don't see why, so I don't
  423. # see a need for a special case to match that behavior.
  424. assert_equal(mannwhitneyu(1, 1, method="exact"), (0.5, 1))
  425. assert_equal(mannwhitneyu(1, 1, method="asymptotic"), (0.5, 1))
  426. # without continuity correction, this becomes 0/0, which really
  427. # is undefined
  428. assert_equal(mannwhitneyu(1, 1, method="asymptotic",
  429. use_continuity=False), (0.5, np.nan))
  430. # --- Test Enhancements / Bug Reports ---
  431. @pytest.mark.skip_xp_backends("jax.numpy", reason="lazy -> no _axis_nan_policy")
  432. @pytest.mark.parametrize("method", ["asymptotic", "exact"])
  433. def test_gh_12837_11113(self, method, xp):
  434. # Test that behavior for broadcastable nd arrays is appropriate:
  435. # output shape is correct and all values are equal to when the test
  436. # is performed on one pair of samples at a time.
  437. # Tests that gh-12837 and gh-11113 (requests for n-d input)
  438. # are resolved
  439. rng = np.random.default_rng(6083743794)
  440. # arrays are broadcastable except for axis = -3
  441. axis = -3
  442. m, n = 7, 10 # sample sizes
  443. x = rng.random((m, 3, 8))
  444. y = rng.random((6, n, 1, 8)) + 0.1
  445. res = mannwhitneyu(xp.asarray(x), xp.asarray(y), method=method, axis=axis)
  446. shape = (6, 3, 8) # appropriate shape of outputs, given inputs
  447. assert res.pvalue.shape == shape
  448. assert res.statistic.shape == shape
  449. # move axis of test to end for simplicity
  450. x, y = np.moveaxis(x, axis, -1), np.moveaxis(y, axis, -1)
  451. x = x[None, ...] # give x a zeroth dimension
  452. assert x.ndim == y.ndim
  453. x = np.broadcast_to(x, shape + (m,))
  454. y = np.broadcast_to(y, shape + (n,))
  455. assert x.shape[:-1] == shape
  456. assert y.shape[:-1] == shape
  457. # loop over pairs of samples
  458. statistics = np.zeros(shape)
  459. pvalues = np.zeros(shape)
  460. for indices in product(*[range(i) for i in shape]):
  461. xi = x[indices]
  462. yi = y[indices]
  463. temp = mannwhitneyu(xi, yi, method=method)
  464. statistics[indices] = temp.statistic
  465. pvalues[indices] = temp.pvalue
  466. xp_assert_close(res.pvalue, xp.asarray(pvalues), atol=1e-16)
  467. xp_assert_close(res.statistic, xp.asarray(statistics), atol=1e-16)
  468. def test_gh_11355(self, xp):
  469. # Test for correct behavior with NaN/Inf in input
  470. x = [1, 2, 3, 4]
  471. y = [3, 6, 7, 8, 9, 3, 2, 1, 4, 4, 5]
  472. res1 = mannwhitneyu(xp.asarray(x), xp.asarray(y))
  473. # Inf is not a problem. This is a rank test, and it's the largest value
  474. x[0] = 1. # ensure floating point
  475. y[4] = np.inf
  476. res2 = mannwhitneyu(xp.asarray(x), xp.asarray(y))
  477. xp_assert_equal(res1.statistic, res2.statistic)
  478. xp_assert_equal(res1.pvalue, res2.pvalue)
  479. @pytest.mark.skip_xp_backends("jax.numpy", reason="lazy -> no _axis_nan_policy")
  480. def test_gh11355_nan(self, xp):
  481. # NaNs should propagate by default.
  482. x = [1., 2., 3., 4.]
  483. y = [3, 6, 7, np.nan, 9, 3, 2, 1, 4, 4, 5]
  484. res3 = mannwhitneyu(xp.asarray(x), xp.asarray(y))
  485. xp_assert_equal(res3.statistic, xp.asarray(xp.nan))
  486. xp_assert_equal(res3.pvalue, xp.asarray(xp.nan))
  487. cases_11355 = [([1., 2, 3, 4],
  488. [3, 6, 7, 8, np.inf, 3, 2, 1, 4, 4, 5],
  489. 10., 0.1297704873477),
  490. ([1., 2, 3, 4],
  491. [3, 6, 7, 8, np.inf, np.inf, 2, 1, 4, 4, 5],
  492. 8.5, 0.08735617507695),
  493. ([1, 2, np.inf, 4],
  494. [3, 6, 7, 8, np.inf, 3, 2, 1, 4, 4, 5],
  495. 17.5, 0.5988856695752),
  496. ([1, 2, np.inf, 4],
  497. [3, 6, 7, 8, np.inf, np.inf, 2, 1, 4, 4, 5],
  498. 16., 0.4687165824462),
  499. ([1, np.inf, np.inf, 4],
  500. [3, 6, 7, 8, np.inf, np.inf, 2, 1, 4, 4, 5],
  501. 24.5, 0.7912517950119)]
  502. @pytest.mark.parametrize(("x", "y", "statistic", "pvalue"), cases_11355)
  503. def test_gh_11355b(self, x, y, statistic, pvalue, xp):
  504. # Test for correct behavior with NaN/Inf in input
  505. res = mannwhitneyu(xp.asarray(x), xp.asarray(y), method='asymptotic')
  506. xp_assert_close(res.statistic, xp.asarray(statistic), atol=1e-12)
  507. xp_assert_close(res.pvalue, xp.asarray(pvalue), atol=1e-12)
  508. cases_9184 = [[True, "less", "asymptotic", 0.900775348204],
  509. [True, "greater", "asymptotic", 0.1223118025635],
  510. [True, "two-sided", "asymptotic", 0.244623605127],
  511. [False, "less", "asymptotic", 0.8896643190401],
  512. [False, "greater", "asymptotic", 0.1103356809599],
  513. [False, "two-sided", "asymptotic", 0.2206713619198],
  514. [True, "less", "exact", 0.8967698967699],
  515. [True, "greater", "exact", 0.1272061272061],
  516. [True, "two-sided", "exact", 0.2544122544123]]
  517. @pytest.mark.parametrize(("use_continuity", "alternative",
  518. "method", "pvalue_exp"), cases_9184)
  519. def test_gh_9184(self, use_continuity, alternative, method, pvalue_exp, xp):
  520. # gh-9184 might be considered a doc-only bug. Please see the
  521. # documentation to confirm that mannwhitneyu correctly notes
  522. # that the output statistic is that of the first sample (x). In any
  523. # case, check the case provided there against output from R.
  524. # R code:
  525. # options(digits=16)
  526. # x <- c(0.80, 0.83, 1.89, 1.04, 1.45, 1.38, 1.91, 1.64, 0.73, 1.46)
  527. # y <- c(1.15, 0.88, 0.90, 0.74, 1.21)
  528. # wilcox.test(x, y, alternative = "less", exact = FALSE)
  529. # wilcox.test(x, y, alternative = "greater", exact = FALSE)
  530. # wilcox.test(x, y, alternative = "two.sided", exact = FALSE)
  531. # wilcox.test(x, y, alternative = "less", exact = FALSE,
  532. # correct=FALSE)
  533. # wilcox.test(x, y, alternative = "greater", exact = FALSE,
  534. # correct=FALSE)
  535. # wilcox.test(x, y, alternative = "two.sided", exact = FALSE,
  536. # correct=FALSE)
  537. # wilcox.test(x, y, alternative = "less", exact = TRUE)
  538. # wilcox.test(x, y, alternative = "greater", exact = TRUE)
  539. # wilcox.test(x, y, alternative = "two.sided", exact = TRUE)
  540. statistic_exp = 35.
  541. x = xp.asarray([0.80, 0.83, 1.89, 1.04, 1.45, 1.38, 1.91, 1.64, 0.73, 1.46])
  542. y = xp.asarray([1.15, 0.88, 0.90, 0.74, 1.21])
  543. res = mannwhitneyu(x, y, use_continuity=use_continuity,
  544. alternative=alternative, method=method)
  545. xp_assert_equal(res.statistic, xp.asarray(statistic_exp))
  546. xp_assert_close(res.pvalue, xp.asarray(pvalue_exp))
  547. @pytest.mark.skip_xp_backends("jax.numpy", reason="lazy -> no _axis_nan_policy")
  548. def test_gh_4067(self, xp):
  549. # Test for correct behavior with all NaN input - default is propagate
  550. nan = xp.asarray(xp.nan)
  551. a = xp.stack([nan, nan, nan, nan, nan])
  552. b = xp.stack([nan, nan, nan, nan, nan])
  553. res = mannwhitneyu(a, b)
  554. xp_assert_equal(res.statistic, xp.asarray(nan))
  555. xp_assert_equal(res.pvalue, nan)
  556. # All cases checked against R wilcox.test, e.g.
  557. # options(digits=16)
  558. # x = c(1, 2, 3)
  559. # y = c(1.5, 2.5)
  560. # wilcox.test(x, y, exact=FALSE, alternative='less')
  561. cases_2118 = [[[1., 2., 3.], [1.5, 2.5], "greater", (3., 0.6135850036578)],
  562. [[1., 2., 3.], [1.5, 2.5], "less", (3., 0.6135850036578)],
  563. [[1., 2., 3.], [1.5, 2.5], "two-sided", (3., 1.0)],
  564. [[1, 2, 3], [2], "greater", (1.5, 0.681324055883)],
  565. [[1, 2, 3], [2], "less", (1.5, 0.681324055883)],
  566. [[1, 2, 3], [2], "two-sided", (1.5, 1.)],
  567. [[1, 2], [1, 2], "greater", (2., 0.667497228949)],
  568. [[1, 2], [1, 2], "less", (2., 0.667497228949)],
  569. [[1, 2], [1, 2], "two-sided", (2., 1.)]]
  570. @pytest.mark.parametrize(["x", "y", "alternative", "expected"], cases_2118)
  571. def test_gh_2118(self, x, y, alternative, expected, xp):
  572. # test cases in which U == m*n/2 when method is asymptotic
  573. # applying continuity correction could result in p-value > 1
  574. res = mannwhitneyu(xp.asarray(x), xp.asarray(y), use_continuity=True,
  575. alternative=alternative, method="asymptotic")
  576. rtol = 1e-6 if xp_default_dtype(xp) == xp.float32 else 1e-12
  577. xp_assert_close(res.statistic, xp.asarray(expected[0]), rtol=rtol)
  578. xp_assert_close(res.pvalue, xp.asarray(expected[1]), rtol=rtol)
  579. def test_gh19692_smaller_table(self):
  580. # In gh-19692, we noted that the shape of the cache used in calculating
  581. # p-values was dependent on the order of the inputs because the sample
  582. # sizes n1 and n2 changed. This was indicative of unnecessary cache
  583. # growth and redundant calculation. Check that this is resolved.
  584. rng = np.random.default_rng(7600451795963068007)
  585. m, n = 5, 11
  586. x = rng.random(size=m)
  587. y = rng.random(size=n)
  588. setattr(_mwu_state, 's', _MWU(0, 0))
  589. _mwu_state.s.reset() # reset cache
  590. res = stats.mannwhitneyu(x, y, method='exact')
  591. shape = _mwu_state.s.configurations.shape
  592. assert shape[-1] == min(res.statistic, m*n - res.statistic) + 1
  593. stats.mannwhitneyu(y, x, method='exact')
  594. assert shape == _mwu_state.s.configurations.shape # same with reversed sizes
  595. # Also, we weren't exploiting the symmetry of the null distribution
  596. # to its full potential. Ensure that the null distribution is not
  597. # evaluated explicitly for `k > m*n/2`.
  598. _mwu_state.s.reset() # reset cache
  599. stats.mannwhitneyu(x, 0*y, method='exact', alternative='greater')
  600. shape = _mwu_state.s.configurations.shape
  601. assert shape[-1] == 1 # k is smallest possible
  602. stats.mannwhitneyu(0*x, y, method='exact', alternative='greater')
  603. assert shape == _mwu_state.s.configurations.shape
  604. @pytest.mark.parametrize('alternative', ['less', 'greater', 'two-sided'])
  605. def test_permutation_method(self, alternative):
  606. rng = np.random.default_rng(7600451795963068007)
  607. x = rng.random(size=(2, 5))
  608. y = rng.random(size=(2, 6))
  609. res = stats.mannwhitneyu(x, y, method=stats.PermutationMethod(),
  610. alternative=alternative, axis=1)
  611. res2 = stats.mannwhitneyu(x, y, method='exact',
  612. alternative=alternative, axis=1)
  613. assert_allclose(res.statistic, res2.statistic, rtol=1e-15)
  614. assert_allclose(res.pvalue, res2.pvalue, rtol=1e-15)
  615. # Old tests moved from test_stats. Source of magic numbers unknown.
  616. X = [19.8958398126694, 19.5452691647182, 19.0577309166425, 21.716543054589,
  617. 20.3269502208702, 20.0009273294025, 19.3440043632957, 20.4216806548105,
  618. 19.0649894736528, 18.7808043120398, 19.3680942943298, 19.4848044069953,
  619. 20.7514611265663, 19.0894948874598, 19.4975522356628, 18.9971170734274,
  620. 20.3239606288208, 20.6921298083835, 19.0724259532507, 18.9825187935021,
  621. 19.5144462609601, 19.8256857844223, 20.5174677102032, 21.1122407995892,
  622. 17.9490854922535, 18.2847521114727, 20.1072217648826, 18.6439891962179,
  623. 20.4970638083542, 19.5567594734914]
  624. Y = [19.2790668029091, 16.993808441865, 18.5416338448258, 17.2634018833575,
  625. 19.1577183624616, 18.5119655377495, 18.6068455037221, 18.8358343362655,
  626. 19.0366413269742, 18.1135025515417, 19.2201873866958, 17.8344909022841,
  627. 18.2894380745856, 18.6661374133922, 19.9688601693252, 16.0672254617636,
  628. 19.00596360572, 19.201561539032, 19.0487501090183, 19.0847908674356]
  629. rtol = 1e-14
  630. def test_mannwhitneyu_one_sided(self, xp):
  631. X, Y = xp.asarray(self.X), xp.asarray(self.Y)
  632. u1, p1 = stats.mannwhitneyu(X, Y, alternative='less')
  633. u2, p2 = stats.mannwhitneyu(Y, X, alternative='greater')
  634. u3, p3 = stats.mannwhitneyu(X, Y, alternative='greater')
  635. u4, p4 = stats.mannwhitneyu(Y, X, alternative='less')
  636. xp_assert_equal(p1, p2)
  637. xp_assert_equal(p3, p4)
  638. assert p1 != p3
  639. xp_assert_equal(u1, xp.asarray(498.))
  640. xp_assert_equal(u2, xp.asarray(102.))
  641. xp_assert_equal(u3, xp.asarray(498.))
  642. xp_assert_equal(u4, xp.asarray(102.))
  643. assert_allclose(p1, xp.asarray(0.999957683256589), rtol=self.rtol)
  644. rtol = self.rtol if X.dtype == xp.float64 else 5e-4
  645. assert_allclose(p3, xp.asarray(4.5941632666275e-05), rtol=rtol, atol=1e-16)
  646. def test_mannwhitneyu_two_sided(self, xp):
  647. X, Y = xp.asarray(self.X), xp.asarray(self.Y)
  648. u1, p1 = stats.mannwhitneyu(X, Y, alternative='two-sided')
  649. u2, p2 = stats.mannwhitneyu(Y, X, alternative='two-sided')
  650. xp_assert_equal(p1, p2)
  651. xp_assert_equal(u1, xp.asarray(498.))
  652. xp_assert_equal(u2, xp.asarray(102.))
  653. rtol = self.rtol if X.dtype == xp.float64 else 5e-4
  654. xp_assert_close(p1, xp.asarray(9.188326533255e-05), rtol=rtol, atol=1e-16)
  655. def test_mannwhitneyu_no_correct_one_sided(self, xp):
  656. X, Y = xp.asarray(self.X), xp.asarray(self.Y)
  657. u1, p1 = stats.mannwhitneyu(X, Y, False, alternative='less')
  658. u2, p2 = stats.mannwhitneyu(Y, X, False, alternative='greater')
  659. u3, p3 = stats.mannwhitneyu(X, Y, False, alternative='greater')
  660. u4, p4 = stats.mannwhitneyu(Y, X, False, alternative='less')
  661. xp_assert_equal(p1, p2)
  662. xp_assert_equal(p3, p4)
  663. assert p1 != p3
  664. xp_assert_equal(u1, xp.asarray(498.))
  665. xp_assert_equal(u2, xp.asarray(102.))
  666. xp_assert_equal(u3, xp.asarray(498.))
  667. xp_assert_equal(u4, xp.asarray(102.))
  668. rtol = self.rtol if X.dtype == xp.float64 else 5e-4
  669. xp_assert_close(p1, xp.asarray(0.999955905990004), rtol=rtol, atol=1e-16)
  670. xp_assert_close(p3, xp.asarray(4.40940099958089e-05), rtol=rtol, atol=1e-16)
  671. def test_mannwhitneyu_no_correct_two_sided(self, xp):
  672. X, Y = xp.asarray(self.X), xp.asarray(self.Y)
  673. u1, p1 = stats.mannwhitneyu(X, Y, False, alternative='two-sided')
  674. u2, p2 = stats.mannwhitneyu(Y, X, False, alternative='two-sided')
  675. xp_assert_equal(p1, p2)
  676. xp_assert_equal(u1, xp.asarray(498.))
  677. xp_assert_equal(u2, xp.asarray(102.))
  678. rtol = self.rtol if X.dtype == xp.float64 else 5e-4
  679. xp_assert_close(p1, xp.asarray(8.81880199916178e-05), rtol=rtol, atol=1e-16)
  680. def test_mannwhitneyu_ones(self, xp):
  681. # test for gh-1428
  682. x = xp.asarray([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
  683. 1., 1., 1., 1., 1., 1., 1., 2., 1., 1., 1., 1., 1., 1.,
  684. 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
  685. 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
  686. 1., 1., 1., 1., 1., 1., 1., 2., 1., 1., 1., 1., 1., 1.,
  687. 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
  688. 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 2.,
  689. 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
  690. 1., 1., 2., 1., 1., 1., 1., 2., 1., 1., 2., 1., 1., 2.,
  691. 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
  692. 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 2., 1.,
  693. 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
  694. 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
  695. 1., 1., 1., 1., 1., 1., 1., 2., 1., 1., 1., 1., 1., 1.,
  696. 1., 1., 1., 2., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
  697. 1., 1., 1., 1., 1., 1., 1., 1., 3., 1., 1., 1., 1., 1.,
  698. 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
  699. 1., 1., 1., 1., 1., 1.])
  700. y = xp.asarray([1., 1., 1., 1., 1., 1., 1., 2., 1., 2., 1., 1., 1., 1.,
  701. 2., 1., 1., 1., 2., 1., 1., 1., 1., 1., 2., 1., 1., 3.,
  702. 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 2., 1., 2., 1.,
  703. 1., 1., 1., 1., 1., 2., 1., 1., 1., 1., 1., 1., 1., 1.,
  704. 1., 1., 1., 1., 1., 1., 1., 2., 1., 1., 1., 1., 1., 2.,
  705. 2., 1., 1., 2., 1., 1., 2., 1., 2., 1., 1., 1., 1., 2.,
  706. 2., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
  707. 1., 2., 1., 1., 1., 1., 1., 2., 2., 2., 1., 1., 1., 1.,
  708. 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
  709. 2., 1., 1., 2., 1., 1., 1., 1., 2., 1., 1., 1., 1., 1.,
  710. 1., 1., 1., 1., 1., 1., 1., 2., 1., 1., 1., 2., 1., 1.,
  711. 1., 1., 1., 1.])
  712. # p-value from R, e.g. wilcox.test(x, y, alternative="g")
  713. res = stats.mannwhitneyu(x, y, alternative='less')
  714. xp_assert_close(res.statistic, xp.asarray(16980.5))
  715. xp_assert_close(res.pvalue, xp.asarray(2.8214327656317373e-5))
  716. res = stats.mannwhitneyu(x, y, alternative='greater')
  717. xp_assert_close(res.statistic, xp.asarray(16980.5))
  718. xp_assert_close(res.pvalue, xp.asarray(0.9999719954296))
  719. res = stats.mannwhitneyu(x, y, alternative='two-sided')
  720. xp_assert_close(res.statistic, xp.asarray(16980.5))
  721. xp_assert_close(res.pvalue, xp.asarray(5.642865531266e-5))
  722. class TestSomersD(_TestPythranFunc):
  723. def setup_method(self):
  724. self.dtypes = self.ALL_INTEGER + self.ALL_FLOAT
  725. self.arguments = {0: (np.arange(10),
  726. self.ALL_INTEGER + self.ALL_FLOAT),
  727. 1: (np.arange(10),
  728. self.ALL_INTEGER + self.ALL_FLOAT)}
  729. input_array = [self.arguments[idx][0] for idx in self.arguments]
  730. # In this case, self.partialfunc can simply be stats.somersd,
  731. # since `alternative` is an optional argument. If it is required,
  732. # we can use functools.partial to freeze the value, because
  733. # we only mainly test various array inputs, not str, etc.
  734. self.partialfunc = functools.partial(stats.somersd,
  735. alternative='two-sided')
  736. self.expected = self.partialfunc(*input_array)
  737. def pythranfunc(self, *args):
  738. res = self.partialfunc(*args)
  739. assert_allclose(res.statistic, self.expected.statistic, atol=1e-15)
  740. assert_allclose(res.pvalue, self.expected.pvalue, atol=1e-15)
  741. def test_pythranfunc_keywords(self):
  742. # Not specifying the optional keyword args
  743. table = [[27, 25, 14, 7, 0], [7, 14, 18, 35, 12], [1, 3, 2, 7, 17]]
  744. res1 = stats.somersd(table)
  745. # Specifying the optional keyword args with default value
  746. optional_args = self.get_optional_args(stats.somersd)
  747. res2 = stats.somersd(table, **optional_args)
  748. # Check if the results are the same in two cases
  749. assert_allclose(res1.statistic, res2.statistic, atol=1e-15)
  750. assert_allclose(res1.pvalue, res2.pvalue, atol=1e-15)
  751. def test_like_kendalltau(self):
  752. # All tests correspond with one in test_stats.py `test_kendalltau`
  753. # case without ties, con-dis equal zero
  754. x = [5, 2, 1, 3, 6, 4, 7, 8]
  755. y = [5, 2, 6, 3, 1, 8, 7, 4]
  756. # Cross-check with result from SAS FREQ:
  757. expected = (0.000000000000000, 1.000000000000000)
  758. res = stats.somersd(x, y)
  759. assert_allclose(res.statistic, expected[0], atol=1e-15)
  760. assert_allclose(res.pvalue, expected[1], atol=1e-15)
  761. # case without ties, con-dis equal zero
  762. x = [0, 5, 2, 1, 3, 6, 4, 7, 8]
  763. y = [5, 2, 0, 6, 3, 1, 8, 7, 4]
  764. # Cross-check with result from SAS FREQ:
  765. expected = (0.000000000000000, 1.000000000000000)
  766. res = stats.somersd(x, y)
  767. assert_allclose(res.statistic, expected[0], atol=1e-15)
  768. assert_allclose(res.pvalue, expected[1], atol=1e-15)
  769. # case without ties, con-dis close to zero
  770. x = [5, 2, 1, 3, 6, 4, 7]
  771. y = [5, 2, 6, 3, 1, 7, 4]
  772. # Cross-check with result from SAS FREQ:
  773. expected = (-0.142857142857140, 0.630326953157670)
  774. res = stats.somersd(x, y)
  775. assert_allclose(res.statistic, expected[0], atol=1e-15)
  776. assert_allclose(res.pvalue, expected[1], atol=1e-15)
  777. # simple case without ties
  778. x = np.arange(10)
  779. y = np.arange(10)
  780. # Cross-check with result from SAS FREQ:
  781. # SAS p value is not provided.
  782. expected = (1.000000000000000, 0)
  783. res = stats.somersd(x, y)
  784. assert_allclose(res.statistic, expected[0], atol=1e-15)
  785. assert_allclose(res.pvalue, expected[1], atol=1e-15)
  786. # swap a couple values and a couple more
  787. x = np.arange(10)
  788. y = np.array([0, 2, 1, 3, 4, 6, 5, 7, 8, 9])
  789. # Cross-check with result from SAS FREQ:
  790. expected = (0.911111111111110, 0.000000000000000)
  791. res = stats.somersd(x, y)
  792. assert_allclose(res.statistic, expected[0], atol=1e-15)
  793. assert_allclose(res.pvalue, expected[1], atol=1e-15)
  794. # same in opposite direction
  795. x = np.arange(10)
  796. y = np.arange(10)[::-1]
  797. # Cross-check with result from SAS FREQ:
  798. # SAS p value is not provided.
  799. expected = (-1.000000000000000, 0)
  800. res = stats.somersd(x, y)
  801. assert_allclose(res.statistic, expected[0], atol=1e-15)
  802. assert_allclose(res.pvalue, expected[1], atol=1e-15)
  803. # swap a couple values and a couple more
  804. x = np.arange(10)
  805. y = np.array([9, 7, 8, 6, 5, 3, 4, 2, 1, 0])
  806. # Cross-check with result from SAS FREQ:
  807. expected = (-0.9111111111111111, 0.000000000000000)
  808. res = stats.somersd(x, y)
  809. assert_allclose(res.statistic, expected[0], atol=1e-15)
  810. assert_allclose(res.pvalue, expected[1], atol=1e-15)
  811. # with some ties
  812. x1 = [12, 2, 1, 12, 2]
  813. x2 = [1, 4, 7, 1, 0]
  814. # Cross-check with result from SAS FREQ:
  815. expected = (-0.500000000000000, 0.304901788178780)
  816. res = stats.somersd(x1, x2)
  817. assert_allclose(res.statistic, expected[0], atol=1e-15)
  818. assert_allclose(res.pvalue, expected[1], atol=1e-15)
  819. # with only ties in one or both inputs
  820. # SAS will not produce an output for these:
  821. # NOTE: No statistics are computed for x * y because x has fewer
  822. # than 2 nonmissing levels.
  823. # WARNING: No OUTPUT data set is produced for this table because a
  824. # row or column variable has fewer than 2 nonmissing levels and no
  825. # statistics are computed.
  826. res = stats.somersd([2, 2, 2], [2, 2, 2])
  827. assert_allclose(res.statistic, np.nan)
  828. assert_allclose(res.pvalue, np.nan)
  829. res = stats.somersd([2, 0, 2], [2, 2, 2])
  830. assert_allclose(res.statistic, np.nan)
  831. assert_allclose(res.pvalue, np.nan)
  832. res = stats.somersd([2, 2, 2], [2, 0, 2])
  833. assert_allclose(res.statistic, np.nan)
  834. assert_allclose(res.pvalue, np.nan)
  835. res = stats.somersd([0], [0])
  836. assert_allclose(res.statistic, np.nan)
  837. assert_allclose(res.pvalue, np.nan)
  838. # empty arrays provided as input
  839. res = stats.somersd([], [])
  840. assert_allclose(res.statistic, np.nan)
  841. assert_allclose(res.pvalue, np.nan)
  842. # test unequal length inputs
  843. x = np.arange(10.)
  844. y = np.arange(20.)
  845. assert_raises(ValueError, stats.somersd, x, y)
  846. def test_asymmetry(self):
  847. # test that somersd is asymmetric w.r.t. input order and that
  848. # convention is as described: first input is row variable & independent
  849. # data is from Wikipedia:
  850. # https://en.wikipedia.org/wiki/Somers%27_D
  851. # but currently that example contradicts itself - it says X is
  852. # independent yet take D_XY
  853. x = [1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 1, 2,
  854. 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3]
  855. y = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
  856. 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]
  857. # Cross-check with result from SAS FREQ:
  858. d_cr = 0.272727272727270
  859. d_rc = 0.342857142857140
  860. p = 0.092891940883700 # same p-value for either direction
  861. res = stats.somersd(x, y)
  862. assert_allclose(res.statistic, d_cr, atol=1e-15)
  863. assert_allclose(res.pvalue, p, atol=1e-4)
  864. assert_equal(res.table.shape, (3, 2))
  865. res = stats.somersd(y, x)
  866. assert_allclose(res.statistic, d_rc, atol=1e-15)
  867. assert_allclose(res.pvalue, p, atol=1e-15)
  868. assert_equal(res.table.shape, (2, 3))
  869. def test_somers_original(self):
  870. # test against Somers' original paper [1]
  871. # Table 5A
  872. # Somers' convention was column IV
  873. table = np.array([[8, 2], [6, 5], [3, 4], [1, 3], [2, 3]])
  874. # Our convention (and that of SAS FREQ) is row IV
  875. table = table.T
  876. dyx = 129/340
  877. assert_allclose(stats.somersd(table).statistic, dyx)
  878. # table 7A - d_yx = 1
  879. table = np.array([[25, 0], [85, 0], [0, 30]])
  880. dxy, dyx = 3300/5425, 3300/3300
  881. assert_allclose(stats.somersd(table).statistic, dxy)
  882. assert_allclose(stats.somersd(table.T).statistic, dyx)
  883. # table 7B - d_yx < 0
  884. table = np.array([[25, 0], [0, 30], [85, 0]])
  885. dyx = -1800/3300
  886. assert_allclose(stats.somersd(table.T).statistic, dyx)
  887. def test_contingency_table_with_zero_rows_cols(self):
  888. # test that zero rows/cols in contingency table don't affect result
  889. N = 100
  890. shape = 4, 6
  891. size = np.prod(shape)
  892. rng = np.random.RandomState(0)
  893. s = stats.multinomial.rvs(N, p=np.ones(size)/size,
  894. random_state=rng).reshape(shape)
  895. res = stats.somersd(s)
  896. s2 = np.insert(s, 2, np.zeros(shape[1]), axis=0)
  897. res2 = stats.somersd(s2)
  898. s3 = np.insert(s, 2, np.zeros(shape[0]), axis=1)
  899. res3 = stats.somersd(s3)
  900. s4 = np.insert(s2, 2, np.zeros(shape[0]+1), axis=1)
  901. res4 = stats.somersd(s4)
  902. # Cross-check with result from SAS FREQ:
  903. assert_allclose(res.statistic, -0.116981132075470, atol=1e-15)
  904. assert_allclose(res.statistic, res2.statistic)
  905. assert_allclose(res.statistic, res3.statistic)
  906. assert_allclose(res.statistic, res4.statistic)
  907. assert_allclose(res.pvalue, 0.156376448188150, atol=1e-15)
  908. assert_allclose(res.pvalue, res2.pvalue)
  909. assert_allclose(res.pvalue, res3.pvalue)
  910. assert_allclose(res.pvalue, res4.pvalue)
  911. def test_invalid_contingency_tables(self):
  912. N = 100
  913. shape = 4, 6
  914. size = np.prod(shape)
  915. rng = np.random.default_rng(0)
  916. # start with a valid contingency table
  917. s = stats.multinomial.rvs(N, p=np.ones(size)/size,
  918. random_state=rng).reshape(shape)
  919. s5 = s - 2
  920. message = "All elements of the contingency table must be non-negative"
  921. with assert_raises(ValueError, match=message):
  922. stats.somersd(s5)
  923. s6 = s + 0.01
  924. message = "All elements of the contingency table must be integer"
  925. with assert_raises(ValueError, match=message):
  926. stats.somersd(s6)
  927. message = ("At least two elements of the contingency "
  928. "table must be nonzero.")
  929. with assert_raises(ValueError, match=message):
  930. stats.somersd([[]])
  931. with assert_raises(ValueError, match=message):
  932. stats.somersd([[1]])
  933. s7 = np.zeros((3, 3))
  934. with assert_raises(ValueError, match=message):
  935. stats.somersd(s7)
  936. s7[0, 1] = 1
  937. with assert_raises(ValueError, match=message):
  938. stats.somersd(s7)
  939. def test_only_ranks_matter(self):
  940. # only ranks of input data should matter
  941. x = [1, 2, 3]
  942. x2 = [-1, 2.1, np.inf]
  943. y = [3, 2, 1]
  944. y2 = [0, -0.5, -np.inf]
  945. res = stats.somersd(x, y)
  946. res2 = stats.somersd(x2, y2)
  947. assert_equal(res.statistic, res2.statistic)
  948. assert_equal(res.pvalue, res2.pvalue)
  949. def test_contingency_table_return(self):
  950. # check that contingency table is returned
  951. x = np.arange(10)
  952. y = np.arange(10)
  953. res = stats.somersd(x, y)
  954. assert_equal(res.table, np.eye(10))
  955. def test_somersd_alternative(self):
  956. # Test alternative parameter, asymptotic method (due to tie)
  957. # Based on scipy.stats.test_stats.TestCorrSpearman2::test_alternative
  958. x1 = [1, 2, 3, 4, 5]
  959. x2 = [5, 6, 7, 8, 7]
  960. # strong positive correlation
  961. expected = stats.somersd(x1, x2, alternative="two-sided")
  962. assert expected.statistic > 0
  963. # rank correlation > 0 -> large "less" p-value
  964. res = stats.somersd(x1, x2, alternative="less")
  965. assert_equal(res.statistic, expected.statistic)
  966. assert_allclose(res.pvalue, 1 - (expected.pvalue / 2))
  967. # rank correlation > 0 -> small "greater" p-value
  968. res = stats.somersd(x1, x2, alternative="greater")
  969. assert_equal(res.statistic, expected.statistic)
  970. assert_allclose(res.pvalue, expected.pvalue / 2)
  971. # reverse the direction of rank correlation
  972. x2.reverse()
  973. # strong negative correlation
  974. expected = stats.somersd(x1, x2, alternative="two-sided")
  975. assert expected.statistic < 0
  976. # rank correlation < 0 -> large "greater" p-value
  977. res = stats.somersd(x1, x2, alternative="greater")
  978. assert_equal(res.statistic, expected.statistic)
  979. assert_allclose(res.pvalue, 1 - (expected.pvalue / 2))
  980. # rank correlation < 0 -> small "less" p-value
  981. res = stats.somersd(x1, x2, alternative="less")
  982. assert_equal(res.statistic, expected.statistic)
  983. assert_allclose(res.pvalue, expected.pvalue / 2)
  984. with pytest.raises(ValueError, match="`alternative` must be..."):
  985. stats.somersd(x1, x2, alternative="ekki-ekki")
  986. @pytest.mark.parametrize("positive_correlation", (False, True))
  987. def test_somersd_perfect_correlation(self, positive_correlation):
  988. # Before the addition of `alternative`, perfect correlation was
  989. # treated as a special case. Now it is treated like any other case, but
  990. # make sure there are no divide by zero warnings or associated errors
  991. x1 = np.arange(10)
  992. x2 = x1 if positive_correlation else np.flip(x1)
  993. expected_statistic = 1 if positive_correlation else -1
  994. # perfect correlation -> small "two-sided" p-value (0)
  995. res = stats.somersd(x1, x2, alternative="two-sided")
  996. assert res.statistic == expected_statistic
  997. assert res.pvalue == 0
  998. # rank correlation > 0 -> large "less" p-value (1)
  999. res = stats.somersd(x1, x2, alternative="less")
  1000. assert res.statistic == expected_statistic
  1001. assert res.pvalue == (1 if positive_correlation else 0)
  1002. # rank correlation > 0 -> small "greater" p-value (0)
  1003. res = stats.somersd(x1, x2, alternative="greater")
  1004. assert res.statistic == expected_statistic
  1005. assert res.pvalue == (0 if positive_correlation else 1)
  1006. def test_somersd_large_inputs_gh18132(self):
  1007. # Test that large inputs where potential overflows could occur give
  1008. # the expected output. This is tested in the case of binary inputs.
  1009. # See gh-18126.
  1010. # generate lists of random classes 1-2 (binary)
  1011. classes = [1, 2]
  1012. n_samples = 10 ** 6
  1013. rng = np.random.default_rng(6889320191)
  1014. x = rng.choice(classes, n_samples)
  1015. y = rng.choice(classes, n_samples)
  1016. # get value to compare with: sklearn output
  1017. # from sklearn import metrics
  1018. # val_auc_sklearn = metrics.roc_auc_score(x, y)
  1019. # # convert to the Gini coefficient (Gini = (AUC*2)-1)
  1020. # val_sklearn = 2 * val_auc_sklearn - 1
  1021. val_sklearn = 0.000624401938730923
  1022. # calculate the Somers' D statistic, which should be equal to the
  1023. # result of val_sklearn until approximately machine precision
  1024. val_scipy = stats.somersd(x, y).statistic
  1025. assert_allclose(val_sklearn, val_scipy, atol=1e-15)
  1026. class TestBarnardExact:
  1027. """Some tests to show that barnard_exact() works correctly."""
  1028. @pytest.mark.parametrize(
  1029. "input_sample,expected",
  1030. [
  1031. ([[43, 40], [10, 39]], (3.555406779643, 0.000362832367)),
  1032. ([[100, 2], [1000, 5]], (-1.776382925679, 0.135126970878)),
  1033. ([[2, 7], [8, 2]], (-2.518474945157, 0.019210815430)),
  1034. ([[5, 1], [10, 10]], (1.449486150679, 0.156277546306)),
  1035. ([[5, 15], [20, 20]], (-1.851640199545, 0.066363501421)),
  1036. ([[5, 16], [20, 25]], (-1.609639949352, 0.116984852192)),
  1037. ([[10, 5], [10, 1]], (-1.449486150679, 0.177536588915)),
  1038. ([[5, 0], [1, 4]], (2.581988897472, 0.013671875000)),
  1039. ([[0, 1], [3, 2]], (-1.095445115010, 0.509667991877)),
  1040. ([[0, 2], [6, 4]], (-1.549193338483, 0.197019618792)),
  1041. ([[2, 7], [8, 2]], (-2.518474945157, 0.019210815430)),
  1042. ],
  1043. )
  1044. def test_precise(self, input_sample, expected):
  1045. """The expected values have been generated by R, using a resolution
  1046. for the nuisance parameter of 1e-6 :
  1047. ```R
  1048. library(Barnard)
  1049. options(digits=10)
  1050. barnard.test(43, 40, 10, 39, dp=1e-6, pooled=TRUE)
  1051. ```
  1052. """
  1053. res = barnard_exact(input_sample)
  1054. statistic, pvalue = res.statistic, res.pvalue
  1055. assert_allclose([statistic, pvalue], expected)
  1056. @pytest.mark.parametrize(
  1057. "input_sample,expected",
  1058. [
  1059. ([[43, 40], [10, 39]], (3.920362887717, 0.000289470662)),
  1060. ([[100, 2], [1000, 5]], (-1.139432816087, 0.950272080594)),
  1061. ([[2, 7], [8, 2]], (-3.079373904042, 0.020172119141)),
  1062. ([[5, 1], [10, 10]], (1.622375939458, 0.150599922226)),
  1063. ([[5, 15], [20, 20]], (-1.974771239528, 0.063038448651)),
  1064. ([[5, 16], [20, 25]], (-1.722122973346, 0.133329494287)),
  1065. ([[10, 5], [10, 1]], (-1.765469659009, 0.250566655215)),
  1066. ([[5, 0], [1, 4]], (5.477225575052, 0.007812500000)),
  1067. ([[0, 1], [3, 2]], (-1.224744871392, 0.509667991877)),
  1068. ([[0, 2], [6, 4]], (-1.732050807569, 0.197019618792)),
  1069. ([[2, 7], [8, 2]], (-3.079373904042, 0.020172119141)),
  1070. ],
  1071. )
  1072. def test_pooled_param(self, input_sample, expected):
  1073. """The expected values have been generated by R, using a resolution
  1074. for the nuisance parameter of 1e-6 :
  1075. ```R
  1076. library(Barnard)
  1077. options(digits=10)
  1078. barnard.test(43, 40, 10, 39, dp=1e-6, pooled=FALSE)
  1079. ```
  1080. """
  1081. res = barnard_exact(input_sample, pooled=False)
  1082. statistic, pvalue = res.statistic, res.pvalue
  1083. assert_allclose([statistic, pvalue], expected)
  1084. def test_raises(self):
  1085. # test we raise an error for wrong input number of nuisances.
  1086. error_msg = (
  1087. "Number of points `n` must be strictly positive, found 0"
  1088. )
  1089. with assert_raises(ValueError, match=error_msg):
  1090. barnard_exact([[1, 2], [3, 4]], n=0)
  1091. # test we raise an error for wrong shape of input.
  1092. error_msg = "The input `table` must be of shape \\(2, 2\\)."
  1093. with assert_raises(ValueError, match=error_msg):
  1094. barnard_exact(np.arange(6).reshape(2, 3))
  1095. # Test all values must be positives
  1096. error_msg = "All values in `table` must be nonnegative."
  1097. with assert_raises(ValueError, match=error_msg):
  1098. barnard_exact([[-1, 2], [3, 4]])
  1099. # Test value error on wrong alternative param
  1100. error_msg = (
  1101. "`alternative` should be one of {'two-sided', 'less', 'greater'},"
  1102. " found .*"
  1103. )
  1104. with assert_raises(ValueError, match=error_msg):
  1105. barnard_exact([[1, 2], [3, 4]], "not-correct")
  1106. @pytest.mark.parametrize(
  1107. "input_sample,expected",
  1108. [
  1109. ([[0, 0], [4, 3]], (1.0, 0)),
  1110. ],
  1111. )
  1112. def test_edge_cases(self, input_sample, expected):
  1113. res = barnard_exact(input_sample)
  1114. statistic, pvalue = res.statistic, res.pvalue
  1115. assert_equal(pvalue, expected[0])
  1116. assert_equal(statistic, expected[1])
  1117. @pytest.mark.parametrize(
  1118. "input_sample,expected",
  1119. [
  1120. ([[0, 5], [0, 10]], (1.0, np.nan)),
  1121. ([[5, 0], [10, 0]], (1.0, np.nan)),
  1122. ],
  1123. )
  1124. def test_row_or_col_zero(self, input_sample, expected):
  1125. res = barnard_exact(input_sample)
  1126. statistic, pvalue = res.statistic, res.pvalue
  1127. assert_equal(pvalue, expected[0])
  1128. assert_equal(statistic, expected[1])
  1129. @pytest.mark.parametrize(
  1130. "input_sample,expected",
  1131. [
  1132. ([[2, 7], [8, 2]], (-2.518474945157, 0.009886140845)),
  1133. ([[7, 200], [300, 8]], (-21.320036698460, 0.0)),
  1134. ([[21, 28], [1957, 6]], (-30.489638143953, 0.0)),
  1135. ],
  1136. )
  1137. @pytest.mark.parametrize("alternative", ["greater", "less"])
  1138. def test_less_greater(self, input_sample, expected, alternative):
  1139. """
  1140. "The expected values have been generated by R, using a resolution
  1141. for the nuisance parameter of 1e-6 :
  1142. ```R
  1143. library(Barnard)
  1144. options(digits=10)
  1145. a = barnard.test(2, 7, 8, 2, dp=1e-6, pooled=TRUE)
  1146. a$p.value[1]
  1147. ```
  1148. In this test, we are using the "one-sided" return value `a$p.value[1]`
  1149. to test our pvalue.
  1150. """
  1151. expected_stat, less_pvalue_expect = expected
  1152. if alternative == "greater":
  1153. input_sample = np.array(input_sample)[:, ::-1]
  1154. expected_stat = -expected_stat
  1155. res = barnard_exact(input_sample, alternative=alternative)
  1156. statistic, pvalue = res.statistic, res.pvalue
  1157. assert_allclose(
  1158. [statistic, pvalue], [expected_stat, less_pvalue_expect], atol=1e-7
  1159. )
  1160. class TestBoschlooExact:
  1161. """Some tests to show that boschloo_exact() works correctly."""
  1162. ATOL = 1e-7
  1163. @pytest.mark.parametrize(
  1164. "input_sample,expected",
  1165. [
  1166. ([[2, 7], [8, 2]], (0.01852173, 0.009886142)),
  1167. ([[5, 1], [10, 10]], (0.9782609, 0.9450994)),
  1168. ([[5, 16], [20, 25]], (0.08913823, 0.05827348)),
  1169. ([[10, 5], [10, 1]], (0.1652174, 0.08565611)),
  1170. ([[5, 0], [1, 4]], (1, 1)),
  1171. ([[0, 1], [3, 2]], (0.5, 0.34375)),
  1172. ([[2, 7], [8, 2]], (0.01852173, 0.009886142)),
  1173. ([[7, 12], [8, 3]], (0.06406797, 0.03410916)),
  1174. ([[10, 24], [25, 37]], (0.2009359, 0.1512882)),
  1175. ],
  1176. )
  1177. def test_less(self, input_sample, expected):
  1178. """The expected values have been generated by R, using a resolution
  1179. for the nuisance parameter of 1e-8 :
  1180. ```R
  1181. library(Exact)
  1182. options(digits=10)
  1183. data <- matrix(c(43, 10, 40, 39), 2, 2, byrow=TRUE)
  1184. a = exact.test(data, method="Boschloo", alternative="less",
  1185. tsmethod="central", np.interval=TRUE, beta=1e-8)
  1186. ```
  1187. """
  1188. res = boschloo_exact(input_sample, alternative="less")
  1189. statistic, pvalue = res.statistic, res.pvalue
  1190. assert_allclose([statistic, pvalue], expected, atol=self.ATOL)
  1191. @pytest.mark.parametrize(
  1192. "input_sample,expected",
  1193. [
  1194. ([[43, 40], [10, 39]], (0.0002875544, 0.0001615562)),
  1195. ([[2, 7], [8, 2]], (0.9990149, 0.9918327)),
  1196. ([[5, 1], [10, 10]], (0.1652174, 0.09008534)),
  1197. ([[5, 15], [20, 20]], (0.9849087, 0.9706997)),
  1198. ([[5, 16], [20, 25]], (0.972349, 0.9524124)),
  1199. ([[5, 0], [1, 4]], (0.02380952, 0.006865367)),
  1200. ([[0, 1], [3, 2]], (1, 1)),
  1201. ([[0, 2], [6, 4]], (1, 1)),
  1202. ([[2, 7], [8, 2]], (0.9990149, 0.9918327)),
  1203. ([[7, 12], [8, 3]], (0.9895302, 0.9771215)),
  1204. ([[10, 24], [25, 37]], (0.9012936, 0.8633275)),
  1205. ],
  1206. )
  1207. def test_greater(self, input_sample, expected):
  1208. """The expected values have been generated by R, using a resolution
  1209. for the nuisance parameter of 1e-8 :
  1210. ```R
  1211. library(Exact)
  1212. options(digits=10)
  1213. data <- matrix(c(43, 10, 40, 39), 2, 2, byrow=TRUE)
  1214. a = exact.test(data, method="Boschloo", alternative="greater",
  1215. tsmethod="central", np.interval=TRUE, beta=1e-8)
  1216. ```
  1217. """
  1218. res = boschloo_exact(input_sample, alternative="greater")
  1219. statistic, pvalue = res.statistic, res.pvalue
  1220. assert_allclose([statistic, pvalue], expected, atol=self.ATOL)
  1221. @pytest.mark.parametrize(
  1222. "input_sample,expected",
  1223. [
  1224. ([[43, 40], [10, 39]], (0.0002875544, 0.0003231115)),
  1225. ([[2, 7], [8, 2]], (0.01852173, 0.01977228)),
  1226. ([[5, 1], [10, 10]], (0.1652174, 0.1801707)),
  1227. ([[5, 16], [20, 25]], (0.08913823, 0.116547)),
  1228. ([[5, 0], [1, 4]], (0.02380952, 0.01373073)),
  1229. ([[0, 1], [3, 2]], (0.5, 0.6875)),
  1230. ([[2, 7], [8, 2]], (0.01852173, 0.01977228)),
  1231. ([[7, 12], [8, 3]], (0.06406797, 0.06821831)),
  1232. ],
  1233. )
  1234. def test_two_sided(self, input_sample, expected):
  1235. """The expected values have been generated by R, using a resolution
  1236. for the nuisance parameter of 1e-8 :
  1237. ```R
  1238. library(Exact)
  1239. options(digits=10)
  1240. data <- matrix(c(43, 10, 40, 39), 2, 2, byrow=TRUE)
  1241. a = exact.test(data, method="Boschloo", alternative="two.sided",
  1242. tsmethod="central", np.interval=TRUE, beta=1e-8)
  1243. ```
  1244. """
  1245. res = boschloo_exact(input_sample, alternative="two-sided", n=64)
  1246. # Need n = 64 for python 32-bit
  1247. statistic, pvalue = res.statistic, res.pvalue
  1248. assert_allclose([statistic, pvalue], expected, atol=self.ATOL)
  1249. def test_raises(self):
  1250. # test we raise an error for wrong input number of nuisances.
  1251. error_msg = (
  1252. "Number of points `n` must be strictly positive, found 0"
  1253. )
  1254. with assert_raises(ValueError, match=error_msg):
  1255. boschloo_exact([[1, 2], [3, 4]], n=0)
  1256. # test we raise an error for wrong shape of input.
  1257. error_msg = "The input `table` must be of shape \\(2, 2\\)."
  1258. with assert_raises(ValueError, match=error_msg):
  1259. boschloo_exact(np.arange(6).reshape(2, 3))
  1260. # Test all values must be positives
  1261. error_msg = "All values in `table` must be nonnegative."
  1262. with assert_raises(ValueError, match=error_msg):
  1263. boschloo_exact([[-1, 2], [3, 4]])
  1264. # Test value error on wrong alternative param
  1265. error_msg = (
  1266. r"`alternative` should be one of \('two-sided', 'less', "
  1267. r"'greater'\), found .*"
  1268. )
  1269. with assert_raises(ValueError, match=error_msg):
  1270. boschloo_exact([[1, 2], [3, 4]], "not-correct")
  1271. @pytest.mark.parametrize(
  1272. "input_sample,expected",
  1273. [
  1274. ([[0, 5], [0, 10]], (np.nan, np.nan)),
  1275. ([[5, 0], [10, 0]], (np.nan, np.nan)),
  1276. ],
  1277. )
  1278. def test_row_or_col_zero(self, input_sample, expected):
  1279. res = boschloo_exact(input_sample)
  1280. statistic, pvalue = res.statistic, res.pvalue
  1281. assert_equal(pvalue, expected[0])
  1282. assert_equal(statistic, expected[1])
  1283. def test_two_sided_gt_1(self):
  1284. # Check that returned p-value does not exceed 1 even when twice
  1285. # the minimum of the one-sided p-values does. See gh-15345.
  1286. tbl = [[1, 1], [13, 12]]
  1287. pl = boschloo_exact(tbl, alternative='less').pvalue
  1288. pg = boschloo_exact(tbl, alternative='greater').pvalue
  1289. assert 2*min(pl, pg) > 1
  1290. pt = boschloo_exact(tbl, alternative='two-sided').pvalue
  1291. assert pt == 1.0
  1292. @pytest.mark.parametrize("alternative", ("less", "greater"))
  1293. def test_against_fisher_exact(self, alternative):
  1294. # Check that the statistic of `boschloo_exact` is the same as the
  1295. # p-value of `fisher_exact` (for one-sided tests). See gh-15345.
  1296. tbl = [[2, 7], [8, 2]]
  1297. boschloo_stat = boschloo_exact(tbl, alternative=alternative).statistic
  1298. fisher_p = stats.fisher_exact(tbl, alternative=alternative)[1]
  1299. assert_allclose(boschloo_stat, fisher_p)
  1300. @make_xp_test_case(cramervonmises_2samp)
  1301. class TestCvm_2samp:
  1302. @pytest.mark.parametrize('args', [([], np.arange(5)),
  1303. (np.arange(5), [1])])
  1304. @pytest.mark.skip_xp_backends("jax.numpy", reason="lazy -> no axis_nan_policy")
  1305. def test_too_small_input(self, args, xp):
  1306. args = (xp.asarray(arg, dtype=xp_default_dtype(xp)) for arg in args)
  1307. with eager_warns(SmallSampleWarning, match=too_small_1d_not_omit, xp=xp):
  1308. res = cramervonmises_2samp(*args)
  1309. xp_assert_equal(res.statistic, xp.asarray(xp.nan))
  1310. xp_assert_equal(res.pvalue, xp.asarray(xp.nan))
  1311. def test_invalid_input(self, xp):
  1312. y = xp.arange(5)
  1313. msg = 'method must be either auto, exact or asymptotic'
  1314. with pytest.raises(ValueError, match=msg):
  1315. cramervonmises_2samp(y, y, 'xyz')
  1316. def test_list_input(self): # list input only relevant for NumPy
  1317. x = [2, 3, 4, 7, 6]
  1318. y = [0.2, 0.7, 12, 18]
  1319. r1 = cramervonmises_2samp(x, y)
  1320. r2 = cramervonmises_2samp(np.array(x), np.array(y))
  1321. assert_equal((r1.statistic, r1.pvalue), (r2.statistic, r2.pvalue))
  1322. @pytest.mark.parametrize('dtype', [None, 'float32', 'float64'])
  1323. def test_example_conover(self, dtype, xp):
  1324. # Example 2 in Section 6.2 of W.J. Conover: Practical Nonparametric
  1325. # Statistics, 1971.
  1326. if is_numpy(xp) and xp.__version__ < "2.0" and dtype == 'float32':
  1327. pytest.skip("Pre-NEP 50 doesn't respect dtypes")
  1328. dtype = xp_default_dtype(xp) if dtype is None else getattr(xp, dtype)
  1329. x = xp.asarray([7.6, 8.4, 8.6, 8.7, 9.3, 9.9, 10.1, 10.6, 11.2], dtype=dtype)
  1330. y = xp.asarray([5.2, 5.7, 5.9, 6.5, 6.8, 8.2, 9.1, 9.8,
  1331. 10.8, 11.3, 11.5, 12.3, 12.5, 13.4, 14.6], dtype=dtype)
  1332. r = cramervonmises_2samp(x, y)
  1333. xp_assert_close(r.statistic, xp.asarray(0.262, dtype=dtype), atol=1e-3)
  1334. xp_assert_close(r.pvalue, xp.asarray(.18, dtype=dtype), atol=1e-2)
  1335. @pytest.mark.parametrize('statistic, m, n, pval',
  1336. [(710, 5, 6, 48./462),
  1337. (1897, 7, 7, 117./1716),
  1338. (576, 4, 6, 2./210),
  1339. (1764, 6, 7, 2./1716)])
  1340. def test_exact_pvalue(self, statistic, m, n, pval): # only implemented w/ NumPy
  1341. # the exact values are taken from Anderson: On the distribution of the
  1342. # two-sample Cramer-von-Mises criterion, 1962.
  1343. # The values are taken from Table 2, 3, 4 and 5
  1344. assert_equal(_pval_cvm_2samp_exact(statistic, m, n), pval)
  1345. @pytest.mark.xslow
  1346. def test_large_sample(self, xp):
  1347. # for large samples, the statistic U gets very large
  1348. # do a sanity check that p-value is not 0, 1 or nan
  1349. rng = np.random.default_rng(4367)
  1350. x = distributions.norm.rvs(size=1000000, random_state=rng)
  1351. y = distributions.norm.rvs(size=900000, random_state=rng)
  1352. x, y = xp.asarray(x), xp.asarray(y)
  1353. r = cramervonmises_2samp(x, y)
  1354. assert 0 < r.pvalue < 1
  1355. r = cramervonmises_2samp(x, y+0.1)
  1356. assert 0 < r.pvalue < 1
  1357. def test_exact_vs_asymptotic(self, xp):
  1358. rng = np.random.RandomState(0)
  1359. x = xp.asarray(rng.random(7))
  1360. y = xp.asarray(rng.random(8))
  1361. r1 = cramervonmises_2samp(x, y, method='exact')
  1362. r2 = cramervonmises_2samp(x, y, method='asymptotic')
  1363. xp_assert_equal(r1.statistic, r2.statistic)
  1364. xp_assert_close(r1.pvalue, r2.pvalue, atol=1e-2)
  1365. def test_method_auto(self, xp):
  1366. x = xp.arange(20.)
  1367. y = xp.asarray([0.5, 4.7, 13.1])
  1368. r1 = cramervonmises_2samp(x, y, method='exact')
  1369. r2 = cramervonmises_2samp(x, y, method='auto')
  1370. xp_assert_equal(r1.pvalue, r2.pvalue)
  1371. # switch to asymptotic if one sample has more than 20 observations
  1372. x = xp.arange(21.)
  1373. r1 = cramervonmises_2samp(x, y, method='asymptotic')
  1374. r2 = cramervonmises_2samp(x, y, method='auto')
  1375. xp_assert_equal(r1.pvalue, r2.pvalue)
  1376. def test_same_input(self, xp):
  1377. # make sure trivial edge case can be handled
  1378. # note that _cdf_cvm_inf(0) = nan. implementation avoids nan by
  1379. # returning pvalue=1 for very small values of the statistic
  1380. x = xp.arange(15)
  1381. res = cramervonmises_2samp(x, x)
  1382. xp_assert_equal(res.statistic, xp.asarray(0.))
  1383. xp_assert_equal(res.pvalue, xp.asarray(1.))
  1384. # check exact p-value
  1385. res = cramervonmises_2samp(x[:4], x[:4])
  1386. xp_assert_equal(res.statistic, xp.asarray(0.))
  1387. xp_assert_equal(res.pvalue, xp.asarray(1.))
  1388. class TestTukeyHSD:
  1389. data_same_size = ([24.5, 23.5, 26.4, 27.1, 29.9],
  1390. [28.4, 34.2, 29.5, 32.2, 30.1],
  1391. [26.1, 28.3, 24.3, 26.2, 27.8])
  1392. data_diff_size = ([24.5, 23.5, 26.28, 26.4, 27.1, 29.9, 30.1, 30.1],
  1393. [28.4, 34.2, 29.5, 32.2, 30.1],
  1394. [26.1, 28.3, 24.3, 26.2, 27.8])
  1395. extreme_size = ([24.5, 23.5, 26.4],
  1396. [28.4, 34.2, 29.5, 32.2, 30.1, 28.4, 34.2, 29.5, 32.2,
  1397. 30.1],
  1398. [26.1, 28.3, 24.3, 26.2, 27.8])
  1399. sas_same_size = """
  1400. Comparison LowerCL Difference UpperCL Significance
  1401. 2 - 3 0.6908830568 4.34 7.989116943 1
  1402. 2 - 1 0.9508830568 4.6 8.249116943 1
  1403. 3 - 2 -7.989116943 -4.34 -0.6908830568 1
  1404. 3 - 1 -3.389116943 0.26 3.909116943 0
  1405. 1 - 2 -8.249116943 -4.6 -0.9508830568 1
  1406. 1 - 3 -3.909116943 -0.26 3.389116943 0
  1407. """
  1408. sas_diff_size = """
  1409. Comparison LowerCL Difference UpperCL Significance
  1410. 2 - 1 0.2679292645 3.645 7.022070736 1
  1411. 2 - 3 0.5934764007 4.34 8.086523599 1
  1412. 1 - 2 -7.022070736 -3.645 -0.2679292645 1
  1413. 1 - 3 -2.682070736 0.695 4.072070736 0
  1414. 3 - 2 -8.086523599 -4.34 -0.5934764007 1
  1415. 3 - 1 -4.072070736 -0.695 2.682070736 0
  1416. """
  1417. sas_extreme = """
  1418. Comparison LowerCL Difference UpperCL Significance
  1419. 2 - 3 1.561605075 4.34 7.118394925 1
  1420. 2 - 1 2.740784879 6.08 9.419215121 1
  1421. 3 - 2 -7.118394925 -4.34 -1.561605075 1
  1422. 3 - 1 -1.964526566 1.74 5.444526566 0
  1423. 1 - 2 -9.419215121 -6.08 -2.740784879 1
  1424. 1 - 3 -5.444526566 -1.74 1.964526566 0
  1425. """
  1426. @pytest.mark.parametrize("data,res_expect_str,atol",
  1427. ((data_same_size, sas_same_size, 1e-4),
  1428. (data_diff_size, sas_diff_size, 1e-4),
  1429. (extreme_size, sas_extreme, 1e-10),
  1430. ),
  1431. ids=["equal size sample",
  1432. "unequal sample size",
  1433. "extreme sample size differences"])
  1434. def test_compare_sas(self, data, res_expect_str, atol):
  1435. '''
  1436. SAS code used to generate results for each sample:
  1437. DATA ACHE;
  1438. INPUT BRAND RELIEF;
  1439. CARDS;
  1440. 1 24.5
  1441. ...
  1442. 3 27.8
  1443. ;
  1444. ods graphics on; ODS RTF;ODS LISTING CLOSE;
  1445. PROC ANOVA DATA=ACHE;
  1446. CLASS BRAND;
  1447. MODEL RELIEF=BRAND;
  1448. MEANS BRAND/TUKEY CLDIFF;
  1449. TITLE 'COMPARE RELIEF ACROSS MEDICINES - ANOVA EXAMPLE';
  1450. ods output CLDiffs =tc;
  1451. proc print data=tc;
  1452. format LowerCL 17.16 UpperCL 17.16 Difference 17.16;
  1453. title "Output with many digits";
  1454. RUN;
  1455. QUIT;
  1456. ODS RTF close;
  1457. ODS LISTING;
  1458. '''
  1459. res_expect = np.asarray(res_expect_str.replace(" - ", " ").split()[5:],
  1460. dtype=float).reshape((6, 6))
  1461. res_tukey = stats.tukey_hsd(*data)
  1462. conf = res_tukey.confidence_interval()
  1463. # loop over the comparisons
  1464. for i, j, l, s, h, sig in res_expect:
  1465. i, j = int(i) - 1, int(j) - 1
  1466. assert_allclose(conf.low[i, j], l, atol=atol)
  1467. assert_allclose(res_tukey.statistic[i, j], s, atol=atol)
  1468. assert_allclose(conf.high[i, j], h, atol=atol)
  1469. assert_allclose((res_tukey.pvalue[i, j] <= .05), sig == 1)
  1470. matlab_sm_siz = """
  1471. 1 2 -8.2491590248597 -4.6 -0.9508409751403 0.0144483269098
  1472. 1 3 -3.9091590248597 -0.26 3.3891590248597 0.9803107240900
  1473. 2 3 0.6908409751403 4.34 7.9891590248597 0.0203311368795
  1474. """
  1475. matlab_diff_sz = """
  1476. 1 2 -7.02207069748501 -3.645 -0.26792930251500 0.03371498443080
  1477. 1 3 -2.68207069748500 0.695 4.07207069748500 0.85572267328807
  1478. 2 3 0.59347644287720 4.34 8.08652355712281 0.02259047020620
  1479. """
  1480. @pytest.mark.parametrize("data,res_expect_str,atol",
  1481. ((data_same_size, matlab_sm_siz, 1e-12),
  1482. (data_diff_size, matlab_diff_sz, 1e-7)),
  1483. ids=["equal size sample",
  1484. "unequal size sample"])
  1485. def test_compare_matlab(self, data, res_expect_str, atol):
  1486. """
  1487. vals = [24.5, 23.5, 26.4, 27.1, 29.9, 28.4, 34.2, 29.5, 32.2, 30.1,
  1488. 26.1, 28.3, 24.3, 26.2, 27.8]
  1489. names = {'zero', 'zero', 'zero', 'zero', 'zero', 'one', 'one', 'one',
  1490. 'one', 'one', 'two', 'two', 'two', 'two', 'two'}
  1491. [p,t,stats] = anova1(vals,names,"off");
  1492. [c,m,h,nms] = multcompare(stats, "CType","hsd");
  1493. """
  1494. res_expect = np.asarray(res_expect_str.split(),
  1495. dtype=float).reshape((3, 6))
  1496. res_tukey = stats.tukey_hsd(*data)
  1497. conf = res_tukey.confidence_interval()
  1498. # loop over the comparisons
  1499. for i, j, l, s, h, p in res_expect:
  1500. i, j = int(i) - 1, int(j) - 1
  1501. assert_allclose(conf.low[i, j], l, atol=atol)
  1502. assert_allclose(res_tukey.statistic[i, j], s, atol=atol)
  1503. assert_allclose(conf.high[i, j], h, atol=atol)
  1504. assert_allclose(res_tukey.pvalue[i, j], p, atol=atol)
  1505. def test_compare_r(self):
  1506. """
  1507. Testing against results and p-values from R:
  1508. from: https://www.rdocumentation.org/packages/stats/versions/3.6.2/
  1509. topics/TukeyHSD
  1510. > require(graphics)
  1511. > summary(fm1 <- aov(breaks ~ tension, data = warpbreaks))
  1512. > TukeyHSD(fm1, "tension", ordered = TRUE)
  1513. > plot(TukeyHSD(fm1, "tension"))
  1514. Tukey multiple comparisons of means
  1515. 95% family-wise confidence level
  1516. factor levels have been ordered
  1517. Fit: aov(formula = breaks ~ tension, data = warpbreaks)
  1518. $tension
  1519. """
  1520. str_res = """
  1521. diff lwr upr p adj
  1522. 2 - 3 4.722222 -4.8376022 14.28205 0.4630831
  1523. 1 - 3 14.722222 5.1623978 24.28205 0.0014315
  1524. 1 - 2 10.000000 0.4401756 19.55982 0.0384598
  1525. """
  1526. res_expect = np.asarray(str_res.replace(" - ", " ").split()[5:],
  1527. dtype=float).reshape((3, 6))
  1528. data = ([26, 30, 54, 25, 70, 52, 51, 26, 67,
  1529. 27, 14, 29, 19, 29, 31, 41, 20, 44],
  1530. [18, 21, 29, 17, 12, 18, 35, 30, 36,
  1531. 42, 26, 19, 16, 39, 28, 21, 39, 29],
  1532. [36, 21, 24, 18, 10, 43, 28, 15, 26,
  1533. 20, 21, 24, 17, 13, 15, 15, 16, 28])
  1534. res_tukey = stats.tukey_hsd(*data)
  1535. conf = res_tukey.confidence_interval()
  1536. # loop over the comparisons
  1537. for i, j, s, l, h, p in res_expect:
  1538. i, j = int(i) - 1, int(j) - 1
  1539. # atols are set to the number of digits present in the r result.
  1540. assert_allclose(conf.low[i, j], l, atol=1e-7)
  1541. assert_allclose(res_tukey.statistic[i, j], s, atol=1e-6)
  1542. assert_allclose(conf.high[i, j], h, atol=1e-5)
  1543. assert_allclose(res_tukey.pvalue[i, j], p, atol=1e-7)
  1544. def test_engineering_stat_handbook(self):
  1545. '''
  1546. Example sourced from:
  1547. https://www.itl.nist.gov/div898/handbook/prc/section4/prc471.htm
  1548. '''
  1549. group1 = [6.9, 5.4, 5.8, 4.6, 4.0]
  1550. group2 = [8.3, 6.8, 7.8, 9.2, 6.5]
  1551. group3 = [8.0, 10.5, 8.1, 6.9, 9.3]
  1552. group4 = [5.8, 3.8, 6.1, 5.6, 6.2]
  1553. res = stats.tukey_hsd(group1, group2, group3, group4)
  1554. conf = res.confidence_interval()
  1555. lower = np.asarray([
  1556. [0, 0, 0, -2.25],
  1557. [.29, 0, -2.93, .13],
  1558. [1.13, 0, 0, .97],
  1559. [0, 0, 0, 0]])
  1560. upper = np.asarray([
  1561. [0, 0, 0, 1.93],
  1562. [4.47, 0, 1.25, 4.31],
  1563. [5.31, 0, 0, 5.15],
  1564. [0, 0, 0, 0]])
  1565. for (i, j) in [(1, 0), (2, 0), (0, 3), (1, 2), (2, 3)]:
  1566. assert_allclose(conf.low[i, j], lower[i, j], atol=1e-2)
  1567. assert_allclose(conf.high[i, j], upper[i, j], atol=1e-2)
  1568. def test_rand_symm(self):
  1569. # test some expected identities of the results
  1570. rng = np.random.default_rng(2699550179)
  1571. data = rng.random((3, 100))
  1572. res = stats.tukey_hsd(*data)
  1573. conf = res.confidence_interval()
  1574. # the confidence intervals should be negated symmetric of each other
  1575. assert_equal(conf.low, -conf.high.T)
  1576. # the `high` and `low` center diagonals should be the same since the
  1577. # mean difference in a self comparison is 0.
  1578. assert_equal(np.diagonal(conf.high), conf.high[0, 0])
  1579. assert_equal(np.diagonal(conf.low), conf.low[0, 0])
  1580. # statistic array should be antisymmetric with zeros on the diagonal
  1581. assert_equal(res.statistic, -res.statistic.T)
  1582. assert_equal(np.diagonal(res.statistic), 0)
  1583. # p-values should be symmetric and 1 when compared to itself
  1584. assert_equal(res.pvalue, res.pvalue.T)
  1585. assert_equal(np.diagonal(res.pvalue), 1)
  1586. def test_no_inf(self):
  1587. with assert_raises(ValueError, match="...must be finite."):
  1588. stats.tukey_hsd([1, 2, 3], [2, np.inf], [6, 7, 3])
  1589. def test_is_1d(self):
  1590. with assert_raises(ValueError, match="...must be one-dimensional"):
  1591. stats.tukey_hsd([[1, 2], [2, 3]], [2, 5], [5, 23, 6])
  1592. def test_no_empty(self):
  1593. with assert_raises(ValueError, match="...must be greater than one"):
  1594. stats.tukey_hsd([], [2, 5], [4, 5, 6])
  1595. def test_equal_var_input_validation(self):
  1596. msg = "Expected a boolean value for 'equal_var'"
  1597. with assert_raises(TypeError, match=msg):
  1598. stats.tukey_hsd([1, 2, 3], [2, 5], [6, 7], equal_var="False")
  1599. @pytest.mark.parametrize("nargs", (0, 1))
  1600. def test_not_enough_treatments(self, nargs):
  1601. with assert_raises(ValueError, match="...more than 1 treatment."):
  1602. stats.tukey_hsd(*([[23, 7, 3]] * nargs))
  1603. @pytest.mark.parametrize("cl", [-.5, 0, 1, 2])
  1604. def test_conf_level_invalid(self, cl):
  1605. with assert_raises(ValueError, match="must be between 0 and 1"):
  1606. r = stats.tukey_hsd([23, 7, 3], [3, 4], [9, 4])
  1607. r.confidence_interval(cl)
  1608. def test_2_args_ttest(self):
  1609. # that with 2 treatments the `pvalue` is equal to that of `ttest_ind`
  1610. res_tukey = stats.tukey_hsd(*self.data_diff_size[:2])
  1611. res_ttest = stats.ttest_ind(*self.data_diff_size[:2])
  1612. assert_allclose(res_ttest.pvalue, res_tukey.pvalue[0, 1])
  1613. assert_allclose(res_ttest.pvalue, res_tukey.pvalue[1, 0])
  1614. class TestGamesHowell:
  1615. # data with unequal variances
  1616. data_same_size = ([24., 23., 31., 51.],
  1617. [34., 18., 18., 26.],
  1618. [17., 68., 59., 7.])
  1619. data_diff_size = ([30., 23., 51.],
  1620. [-81., 71., -27., 63.],
  1621. [42., 11., 29., 19., 50.],
  1622. [23., 22., 20., 18., 9.])
  1623. spss_same_size = """
  1624. Mean Diff Lower Bound Upper Bound Sig
  1625. 0 - 1 8.25000000 -16.5492749527311 33.0492749527311 0.558733632413559
  1626. 0 - 2 -5.50000000 -63.6702454316458 52.6702454316458 0.941147750599221
  1627. 1 - 2 -13.7500000 -74.3174374251372 46.8174374251372 0.682983914946841
  1628. """
  1629. spss_diff_size = """
  1630. Mean Diff Lower Bound Upper Bound Sig
  1631. 0 - 1 28.16666667 -141.985416377670 198.318749711003 0.8727542747886180
  1632. 0 - 2 4.466666667 -37.2830676783904 46.2164010117237 0.9752628408671710
  1633. 0 - 3 16.26666667 -35.0933112382470 67.6266445715803 0.4262506151302880
  1634. 1 - 2 -23.70000000 -195.315617201249 147.915617201249 0.9148950609000590
  1635. 1 - 3 -11.90000000 -188.105478728519 164.305478728519 0.9861432250093960
  1636. 2 - 3 11.80000000 -16.2894857524254 39.8894857524254 0.4755344436335670
  1637. """
  1638. @pytest.mark.xslow
  1639. @pytest.mark.parametrize("data, res_expect_str",
  1640. ((data_same_size, spss_same_size),
  1641. (data_diff_size, spss_diff_size)),
  1642. ids=["equal size sample",
  1643. "unequal sample size"])
  1644. def test_compare_spss(self, data, res_expect_str):
  1645. """
  1646. DATA LIST LIST /Group (F1.0) Value (F8.2).
  1647. BEGIN DATA
  1648. 0 24
  1649. 0 23
  1650. 0 31
  1651. 0 51
  1652. 1 34
  1653. 1 18
  1654. 1 18
  1655. 1 26
  1656. 2 17
  1657. 2 68
  1658. 2 59
  1659. 2 7
  1660. END DATA.
  1661. ONEWAY Value BY Group
  1662. /MISSING ANALYSIS
  1663. /POSTHOC=GH ALPHA(0.05).
  1664. """
  1665. res_expect = np.asarray(
  1666. res_expect_str.replace(" - ", " ").split()[7:],
  1667. dtype=float).reshape(-1, 6)
  1668. res_games = stats.tukey_hsd(*data, equal_var=False)
  1669. conf = res_games.confidence_interval()
  1670. # loop over the comparisons
  1671. for i, j, s, l, h, p in res_expect:
  1672. i, j = int(i), int(j)
  1673. assert_allclose(res_games.statistic[i, j], s, atol=1e-8)
  1674. assert_allclose(res_games.pvalue[i, j], p, atol=1e-8)
  1675. assert_allclose(conf.low[i, j], l, atol=1e-6)
  1676. assert_allclose(conf.high[i, j], h, atol=1e-5)
  1677. r_same_size = """
  1678. q value Pr(>|q|)
  1679. 1 - 0 == 0 -1.5467805948856344 0.55873362851759
  1680. 2 - 0 == 0 0.4726721776628535 0.94114775035993
  1681. 2 - 1 == 0 1.246837541297872 0.68298393799782
  1682. """
  1683. r_diff_size = """
  1684. q value Pr(>|q|)
  1685. 1 - 0 == 0 -1.0589317485313876 0.87275427357438
  1686. 2 - 0 == 0 -0.5716222106144833 0.97526284087419
  1687. 3 - 0 == 0 -2.6209678382077000 0.42625067714691
  1688. 2 - 1 == 0 0.8971899898179028 0.91489506061850
  1689. 3 - 1 == 0 0.4579447210555352 0.98614322544695
  1690. 3 - 2 == 0 -2.198800177874794 0.47553444364614
  1691. """
  1692. @pytest.mark.parametrize("data, res_expect_str",
  1693. ((data_same_size, r_same_size),
  1694. (data_diff_size, r_diff_size)),
  1695. ids=["equal size sample",
  1696. "unequal sample size"])
  1697. def test_compare_r(self, data, res_expect_str):
  1698. """
  1699. games-howell is provided by PMCMRplus package
  1700. https://search.r-project.org/CRAN/refmans/PMCMRplus/html/gamesHowellTest.html
  1701. > library("PMCMRplus")
  1702. > options(digits=16)
  1703. > table = data.frame(
  1704. values = c(24., 23., 31., 51., 34., 18., 18., 26., 17., 68., 59., 7.),
  1705. groups = c("0", "0", "0", "0", "1", "1", "1", "1", "2", "2", "2", "2")
  1706. )
  1707. > table$groups = as.factor(table$groups)
  1708. > fit <-aov(values ~ groups, table)
  1709. > res = gamesHowellTest(fit)
  1710. > summary(res)
  1711. """
  1712. res_expect = np.asarray(
  1713. res_expect_str.replace(" - ", " ")
  1714. .replace(" == ", " ").split()[3:],
  1715. dtype=float).reshape(-1, 5)
  1716. res_games = stats.tukey_hsd(*data, equal_var=False)
  1717. # loop over the comparisons
  1718. # note confidence intervals are not provided by PMCMRplus
  1719. for j, i, _, _, p in res_expect:
  1720. i, j = int(i), int(j)
  1721. assert_allclose(res_games.pvalue[i, j], p, atol=1e-7)
  1722. # Data validation test has been covered by TestTukeyHSD
  1723. # like empty, 1d, inf, and lack of tretments
  1724. # because games_howell leverage _tukey_hsd_iv()
  1725. class TestPoissonMeansTest:
  1726. @pytest.mark.parametrize("c1, n1, c2, n2, p_expect", (
  1727. # example from [1], 6. Illustrative examples: Example 1
  1728. [0, 100, 3, 100, 0.0884],
  1729. [2, 100, 6, 100, 0.1749]
  1730. ))
  1731. def test_paper_examples(self, c1, n1, c2, n2, p_expect):
  1732. res = stats.poisson_means_test(c1, n1, c2, n2)
  1733. assert_allclose(res.pvalue, p_expect, atol=1e-4)
  1734. @pytest.mark.parametrize("c1, n1, c2, n2, p_expect, alt, d", (
  1735. # These test cases are produced by the wrapped fortran code from the
  1736. # original authors. Using a slightly modified version of this fortran,
  1737. # found here, https://github.com/nolanbconaway/poisson-etest,
  1738. # additional tests were created.
  1739. [20, 10, 20, 10, 0.9999997568929630, 'two-sided', 0],
  1740. [10, 10, 10, 10, 0.9999998403241203, 'two-sided', 0],
  1741. [50, 15, 1, 1, 0.09920321053409643, 'two-sided', .05],
  1742. [3, 100, 20, 300, 0.12202725450896404, 'two-sided', 0],
  1743. [3, 12, 4, 20, 0.40416087318539173, 'greater', 0],
  1744. [4, 20, 3, 100, 0.008053640402974236, 'greater', 0],
  1745. # publishing paper does not include a `less` alternative,
  1746. # so it was calculated with switched argument order and
  1747. # alternative="greater"
  1748. [4, 20, 3, 10, 0.3083216325432898, 'less', 0],
  1749. [1, 1, 50, 15, 0.09322998607245102, 'less', 0]
  1750. ))
  1751. def test_fortran_authors(self, c1, n1, c2, n2, p_expect, alt, d):
  1752. res = stats.poisson_means_test(c1, n1, c2, n2, alternative=alt, diff=d)
  1753. assert_allclose(res.pvalue, p_expect, atol=2e-6, rtol=1e-16)
  1754. def test_different_results(self):
  1755. # The implementation in Fortran is known to break down at higher
  1756. # counts and observations, so we expect different results. By
  1757. # inspection we can infer the p-value to be near one.
  1758. count1, count2 = 10000, 10000
  1759. nobs1, nobs2 = 10000, 10000
  1760. res = stats.poisson_means_test(count1, nobs1, count2, nobs2)
  1761. assert_allclose(res.pvalue, 1)
  1762. def test_less_than_zero_lambda_hat2(self):
  1763. # demonstrates behavior that fixes a known fault from original Fortran.
  1764. # p-value should clearly be near one.
  1765. count1, count2 = 0, 0
  1766. nobs1, nobs2 = 1, 1
  1767. res = stats.poisson_means_test(count1, nobs1, count2, nobs2)
  1768. assert_allclose(res.pvalue, 1)
  1769. def test_input_validation(self):
  1770. count1, count2 = 0, 0
  1771. nobs1, nobs2 = 1, 1
  1772. # test non-integral events
  1773. message = '`k1` and `k2` must be integers.'
  1774. with assert_raises(TypeError, match=message):
  1775. stats.poisson_means_test(.7, nobs1, count2, nobs2)
  1776. with assert_raises(TypeError, match=message):
  1777. stats.poisson_means_test(count1, nobs1, .7, nobs2)
  1778. # test negative events
  1779. message = '`k1` and `k2` must be greater than or equal to 0.'
  1780. with assert_raises(ValueError, match=message):
  1781. stats.poisson_means_test(-1, nobs1, count2, nobs2)
  1782. with assert_raises(ValueError, match=message):
  1783. stats.poisson_means_test(count1, nobs1, -1, nobs2)
  1784. # test negative sample size
  1785. message = '`n1` and `n2` must be greater than 0.'
  1786. with assert_raises(ValueError, match=message):
  1787. stats.poisson_means_test(count1, -1, count2, nobs2)
  1788. with assert_raises(ValueError, match=message):
  1789. stats.poisson_means_test(count1, nobs1, count2, -1)
  1790. # test negative difference
  1791. message = 'diff must be greater than or equal to 0.'
  1792. with assert_raises(ValueError, match=message):
  1793. stats.poisson_means_test(count1, nobs1, count2, nobs2, diff=-1)
  1794. # test invalid alternative
  1795. message = 'Alternative must be one of ...'
  1796. with assert_raises(ValueError, match=message):
  1797. stats.poisson_means_test(1, 2, 1, 2, alternative='error')
  1798. class TestBWSTest:
  1799. def test_bws_input_validation(self):
  1800. rng = np.random.default_rng(4571775098104213308)
  1801. x, y = rng.random(size=(2, 7))
  1802. message = '`x` and `y` must be exactly one-dimensional.'
  1803. with pytest.raises(ValueError, match=message):
  1804. stats.bws_test([x, x], [y, y])
  1805. message = '`x` and `y` must not contain NaNs.'
  1806. with pytest.raises(ValueError, match=message):
  1807. stats.bws_test([np.nan], y)
  1808. message = '`x` and `y` must be of nonzero size.'
  1809. with pytest.raises(ValueError, match=message):
  1810. stats.bws_test(x, [])
  1811. message = 'alternative` must be one of...'
  1812. with pytest.raises(ValueError, match=message):
  1813. stats.bws_test(x, y, alternative='ekki-ekki')
  1814. message = 'method` must be an instance of...'
  1815. with pytest.raises(ValueError, match=message):
  1816. stats.bws_test(x, y, method=42)
  1817. def test_against_published_reference(self):
  1818. # Test against Example 2 in bws_test Reference [1], pg 9
  1819. # https://link.springer.com/content/pdf/10.1007/BF02762032.pdf
  1820. x = [1, 2, 3, 4, 6, 7, 8]
  1821. y = [5, 9, 10, 11, 12, 13, 14]
  1822. res = stats.bws_test(x, y, alternative='two-sided')
  1823. assert_allclose(res.statistic, 5.132, atol=1e-3)
  1824. assert_equal(res.pvalue, 10/3432)
  1825. @pytest.mark.parametrize(('alternative', 'statistic', 'pvalue'),
  1826. [('two-sided', 1.7510204081633, 0.1264422777777),
  1827. ('less', -1.7510204081633, 0.05754662004662),
  1828. ('greater', -1.7510204081633, 0.9424533799534)])
  1829. def test_against_R(self, alternative, statistic, pvalue):
  1830. # Test against R library BWStest function bws_test
  1831. # library(BWStest)
  1832. # options(digits=16)
  1833. # x = c(...)
  1834. # y = c(...)
  1835. # bws_test(x, y, alternative='two.sided')
  1836. rng = np.random.default_rng(4571775098104213308)
  1837. x, y = rng.random(size=(2, 7))
  1838. res = stats.bws_test(x, y, alternative=alternative)
  1839. assert_allclose(res.statistic, statistic, rtol=1e-13)
  1840. assert_allclose(res.pvalue, pvalue, atol=1e-2, rtol=1e-1)
  1841. @pytest.mark.parametrize(('alternative', 'statistic', 'pvalue'),
  1842. [('two-sided', 1.142629265891, 0.2903950180801),
  1843. ('less', 0.99629665877411, 0.8545660222131),
  1844. ('greater', 0.99629665877411, 0.1454339777869)])
  1845. def test_against_R_imbalanced(self, alternative, statistic, pvalue):
  1846. # Test against R library BWStest function bws_test
  1847. # library(BWStest)
  1848. # options(digits=16)
  1849. # x = c(...)
  1850. # y = c(...)
  1851. # bws_test(x, y, alternative='two.sided')
  1852. rng = np.random.default_rng(5429015622386364034)
  1853. x = rng.random(size=9)
  1854. y = rng.random(size=8)
  1855. res = stats.bws_test(x, y, alternative=alternative)
  1856. assert_allclose(res.statistic, statistic, rtol=1e-13)
  1857. assert_allclose(res.pvalue, pvalue, atol=1e-2, rtol=1e-1)
  1858. def test_method(self):
  1859. # Test that `method` parameter has the desired effect
  1860. rng = np.random.default_rng(1520514347193347862)
  1861. x, y = rng.random(size=(2, 10))
  1862. rng = np.random.default_rng(1520514347193347862)
  1863. method = stats.PermutationMethod(n_resamples=10, rng=rng)
  1864. res1 = stats.bws_test(x, y, method=method)
  1865. assert len(res1.null_distribution) == 10
  1866. rng = np.random.default_rng(1520514347193347862)
  1867. method = stats.PermutationMethod(n_resamples=10, rng=rng)
  1868. res2 = stats.bws_test(x, y, method=method)
  1869. assert_allclose(res1.null_distribution, res2.null_distribution)
  1870. rng = np.random.default_rng(5205143471933478621)
  1871. method = stats.PermutationMethod(n_resamples=10, rng=rng)
  1872. res3 = stats.bws_test(x, y, method=method)
  1873. assert not np.allclose(res3.null_distribution, res1.null_distribution)
  1874. def test_directions(self):
  1875. # Sanity check of the sign of the one-sided statistic
  1876. rng = np.random.default_rng(1520514347193347862)
  1877. x = rng.random(size=5)
  1878. y = x - 1
  1879. res = stats.bws_test(x, y, alternative='greater')
  1880. assert res.statistic > 0
  1881. assert_equal(res.pvalue, 1 / len(res.null_distribution))
  1882. res = stats.bws_test(x, y, alternative='less')
  1883. assert res.statistic > 0
  1884. assert_equal(res.pvalue, 1)
  1885. res = stats.bws_test(y, x, alternative='less')
  1886. assert res.statistic < 0
  1887. assert_equal(res.pvalue, 1 / len(res.null_distribution))
  1888. res = stats.bws_test(y, x, alternative='greater')
  1889. assert res.statistic < 0
  1890. assert_equal(res.pvalue, 1)