| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164 |
- from itertools import product
- import numpy as np
- import functools
- import pytest
- from numpy.testing import assert_, assert_equal, assert_allclose
- from pytest import raises as assert_raises
- from scipy import stats, special
- from scipy.stats import distributions
- from scipy.stats._hypotests import (epps_singleton_2samp, cramervonmises,
- _cdf_cvm, cramervonmises_2samp,
- _pval_cvm_2samp_exact, barnard_exact,
- boschloo_exact)
- from scipy.stats._mannwhitneyu import mannwhitneyu, _mwu_state, _MWU
- from scipy._lib._testutils import _TestPythranFunc
- from scipy._lib import array_api_extra as xpx
- from scipy._lib._array_api import (make_xp_test_case, xp_default_dtype, is_numpy,
- eager_warns)
- from scipy._lib._array_api_no_0d import xp_assert_equal, xp_assert_close
- from scipy.stats._axis_nan_policy import SmallSampleWarning, too_small_1d_not_omit
- @make_xp_test_case(epps_singleton_2samp)
- class TestEppsSingleton:
- @pytest.mark.parametrize('dtype', [None, 'float32', 'float64'])
- def test_statistic_1(self, dtype, xp):
- # first example in Goerg & Kaiser, also in original paper of
- # Epps & Singleton. Note: values do not match exactly, the
- # value of the interquartile range varies depending on how
- # quantiles are computed
- if is_numpy(xp) and xp.__version__ < "2.0" and dtype == 'float32':
- pytest.skip("Pre-NEP 50 doesn't respect dtypes")
- dtype = xp_default_dtype(xp) if dtype is None else getattr(xp, dtype)
- x = xp.asarray([-0.35, 2.55, 1.73, 0.73, 0.35,
- 2.69, 0.46, -0.94, -0.37, 12.07], dtype=dtype)
- y = xp.asarray([-1.15, -0.15, 2.48, 3.25, 3.71,
- 4.29, 5.00, 7.74, 8.38, 8.60], dtype=dtype)
- w, p = epps_singleton_2samp(x, y)
- xp_assert_close(w, xp.asarray(15.14, dtype=dtype), atol=0.03)
- xp_assert_close(p, xp.asarray(0.00442, dtype=dtype), atol=0.0001)
- def test_statistic_2(self, xp):
- # second example in Goerg & Kaiser, again not a perfect match
- x = xp.asarray([0, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4,
- 5, 5, 5, 5, 6, 10, 10, 10, 10])
- y = xp.asarray([10, 4, 0, 5, 10, 10, 0, 5, 6, 7,
- 10, 3, 1, 7, 0, 8, 1, 5, 8, 10])
- w, p = epps_singleton_2samp(x, y)
- xp_assert_close(w, xp.asarray(8.900), atol=1e-3)
- xp_assert_close(p, xp.asarray(0.06364), atol=5e-5)
- def test_epps_singleton_array_like(self): # only relevant for NumPy
- x, y = np.arange(30), np.arange(28)
- w1, p1 = epps_singleton_2samp(list(x), list(y))
- w2, p2 = epps_singleton_2samp(tuple(x), tuple(y))
- w3, p3 = epps_singleton_2samp(x, y)
- assert_(w1 == w2 == w3)
- assert_(p1 == p2 == p3)
- def test_epps_singleton_size(self, xp):
- # warns if sample contains fewer than 5 elements
- x, y = xp.asarray([1, 2, 3, 4]), xp.arange(10)
- with eager_warns(SmallSampleWarning, match=too_small_1d_not_omit, xp=xp):
- res = epps_singleton_2samp(x, y)
- xp_assert_equal(res.statistic, xp.asarray(xp.nan))
- xp_assert_equal(res.pvalue, xp.asarray(xp.nan))
- def test_epps_singleton_nonfinite(self, xp):
- rng = np.random.default_rng(83249872384543)
- x = rng.random(size=(10, 11))
- y = rng.random(size=(10, 12))
- i = np.asarray([1, 4, 9]) # arbitrary rows
- w_ref, p_ref = epps_singleton_2samp(x, y, axis=-1)
- w_ref[i] = np.nan
- p_ref[i] = np.nan
- x[i[0], 0] = np.nan
- x[i[1], 1] = np.inf
- y[i[2], 2] = -np.inf
- x, y = xp.asarray(x), xp.asarray(y)
- w_res, p_res = epps_singleton_2samp(x, y, axis=-1)
- xp_assert_close(w_res, xp.asarray(w_ref))
- xp_assert_close(p_res, xp.asarray(p_ref))
- @make_xp_test_case(stats.cramervonmises)
- class TestCvm:
- # the expected values of the cdfs are taken from Table 1 in
- # Csorgo / Faraway: The Exact and Asymptotic Distribution of
- # Cramér-von Mises Statistics, 1996.
- @pytest.mark.parametrize('n, x, ref', [
- (4, [0.02983, 0.04111, 0.12331, 0.94251], [0.01, 0.05, 0.5, 0.999]),
- (10, [0.02657, 0.03830, 0.12068, 0.56643], [0.01, 0.05, 0.5, 0.975]),
- (1000, [0.02481, 0.03658, 0.11889, 1.16120], [0.01, 0.05, 0.5, 0.999]),
- (None, [0.02480, 0.03656, 0.11888, 1.16204], [0.01, 0.05, 0.5, 0.999]),
- ])
- def test_cdf_ref(self, n, x, ref, xp):
- xp_assert_close(_cdf_cvm(xp.asarray(x), n), xp.asarray(ref), atol=1e-4)
- def test_cdf_support(self, xp):
- # cdf has support on [1/(12*n), n/3]
- xp_assert_equal(_cdf_cvm(xp.asarray([1/(12*533), 533/3]), 533),
- xp.asarray([0., 1.]))
- xp_assert_equal(_cdf_cvm(xp.asarray([1/(12*(27 + 1)), (27 + 1)/3]), 27),
- xp.asarray([0., 1.]))
- def test_cdf_large_n(self, xp):
- # test that asymptotic cdf and cdf for large samples are close
- x = xp.asarray([0.02480, 0.03656, 0.11888, 1.16204, 100])
- xp_assert_close(_cdf_cvm(x, 10000), _cdf_cvm(x), atol=1e-4)
- def test_large_x(self, xp):
- # for large values of x and n, the series used to compute the cdf
- # converges slowly.
- # this leads to bug in R package goftest and MAPLE code that is
- # the basis of the implementation in scipy
- # note: cdf = 1 for x >= 1000/3 and n = 1000
- x = xp.asarray(333.3, dtype=xp.float64)
- assert (0.99999 < _cdf_cvm(x, 1000) < 1.0)
- assert (0.99999 < _cdf_cvm(x) < 1.0)
- def test_low_p(self, xp):
- # _cdf_cvm can return values larger than 1. In that case, we just
- # return a p-value of zero.
- n = 12
- res = cramervonmises(xp.ones(n)*0.8, special.ndtr)
- assert _cdf_cvm(res.statistic, n, xp=xp) > 1.0
- xp_assert_equal(res.pvalue, xp.asarray(0.))
- @pytest.mark.skip_xp_backends('jax.numpy', reason='lazy -> no _axis_nan_policy')
- @pytest.mark.parametrize('x', [(), [1.5]])
- def test_invalid_input(self, x, xp):
- with pytest.warns(SmallSampleWarning, match=too_small_1d_not_omit):
- res = cramervonmises(xp.asarray(x), special.ndtr)
- xp_assert_equal(res.statistic, xp.asarray(xp.nan))
- xp_assert_equal(res.pvalue, xp.asarray(xp.nan))
- @pytest.mark.parametrize('dtype', [None, 'float32', 'float64'])
- def test_values_R(self, dtype, xp):
- if is_numpy(xp) and xp.__version__ < "2.0" and dtype == 'float32':
- pytest.skip("Pre-NEP 50 doesn't respect dtypes")
- dtype = xp_default_dtype(xp) if dtype is None else getattr(xp, dtype)
- # compared against R package goftest, version 1.1.1
- # library(goftest)
- # options(digits=16)
- # cvm.test(c(-1.7, 2, 0, 1.3, 4, 0.1, 0.6), "pnorm")
- res = cramervonmises(xp.asarray([-1.7, 2, 0, 1.3, 4, 0.1, 0.6], dtype=dtype),
- special.ndtr)
- xp_assert_close(res.statistic, xp.asarray(0.28815604599198, dtype=dtype))
- xp_assert_close(res.pvalue, xp.asarray( 0.1453465252039, dtype=dtype))
- # cvm.test(c(-1.7, 2, 0, 1.3, 4, 0.1, 0.6), "pnorm", mean = 3, sd = 1.5)
- res = cramervonmises(xp.asarray([-1.7, 2, 0, 1.3, 4, 0.1, 0.6], dtype=dtype),
- lambda x: special.ndtr((x - 3)/1.5))
- xp_assert_close(res.statistic, xp.asarray(0.94266847977072, dtype=dtype))
- xp_assert_close(res.pvalue, xp.asarray(0.002026416728467, dtype=dtype))
- # cvm.test(c(1, 2, 5, 1.4, 0.14, 11, 13, 0.9, 7.5), "pexp")
- res = cramervonmises(
- xp.asarray([1, 2, 5, 1.4, 0.14, 11, 13, 0.9, 7.5], dtype=dtype),
- lambda x: -xp.expm1(-x))
- xp_assert_close(res.statistic, xp.asarray(0.84218540922393, dtype=dtype))
- xp_assert_close(res.pvalue, xp.asarray(0.004433406063014, dtype=dtype))
- def test_str_cdf(self, xp):
- if not is_numpy(xp):
- message = "`cdf` must be a callable if `rvs` is a non-NumPy array."
- with pytest.raises(ValueError, match=message):
- cramervonmises(xp.asarray([1, 2, 3]), "beta")
- return
- x, args = np.arange(5), (1.4, 0.7)
- ref = cramervonmises(x, distributions.expon.cdf)
- res = cramervonmises(x, "expon")
- assert_equal((res.statistic, res.pvalue), (ref.statistic, ref.pvalue))
- ref = cramervonmises(x, distributions.beta.cdf, args)
- res = cramervonmises(x, "beta", args)
- assert_equal((res.statistic, res.pvalue), (ref.statistic, ref.pvalue))
- @make_xp_test_case(stats.mannwhitneyu)
- class TestMannWhitneyU:
- # All magic numbers are from R wilcox.test unless otherwise specified
- # https://rdrr.io/r/stats/wilcox.test.html
- # --- Test Input Validation ---
- @pytest.mark.skip_xp_backends("jax.numpy", reason="lazy -> no _axis_nan_policy")
- def test_empty(self, xp):
- x = xp.asarray([1, 2]) # generic, valid inputs
- y = xp.asarray([3, 4])
- empty = xp.asarray([], dtype=x.dtype)
- nan = xp.asarray(xp.nan)
- with pytest.warns(SmallSampleWarning, match=too_small_1d_not_omit):
- res = mannwhitneyu(x, empty)
- xp_assert_close(res.statistic, nan)
- xp_assert_close(res.pvalue, nan)
- with pytest.warns(SmallSampleWarning, match=too_small_1d_not_omit):
- res = mannwhitneyu(empty, y)
- xp_assert_close(res.statistic, nan)
- xp_assert_close(res.pvalue, nan)
- with pytest.warns(SmallSampleWarning, match=too_small_1d_not_omit):
- res = mannwhitneyu(empty, empty)
- xp_assert_close(res.statistic, nan)
- xp_assert_close(res.pvalue, nan)
- def test_input_validation(self, xp):
- x = xp.asarray([1, 2]) # generic, valid inputs
- y = xp.asarray([3, 4])
- with assert_raises(ValueError, match="`use_continuity` must be one"):
- mannwhitneyu(x, y, use_continuity='ekki')
- with assert_raises(ValueError, match="`alternative` must be one of"):
- mannwhitneyu(x, y, alternative='ekki')
- with assert_raises(ValueError, match="`axis` must be an integer"):
- mannwhitneyu(x, y, axis=1.5)
- with assert_raises(ValueError, match="`method` must be one of"):
- mannwhitneyu(x, y, method='ekki')
- def test_auto(self, xp):
- # Test that default method ('auto') chooses intended method
- rng = np.random.default_rng(923823782530925934)
- n = 8 # threshold to switch from exact to asymptotic
- # both inputs are smaller than threshold; should use exact
- x = xp.asarray(rng.random(n-1))
- y = xp.asarray(rng.random(n-1))
- auto = mannwhitneyu(x, y)
- asymptotic = mannwhitneyu(x, y, method='asymptotic')
- exact = mannwhitneyu(x, y, method='exact')
- assert auto.pvalue == exact.pvalue
- assert auto.pvalue != asymptotic.pvalue
- # one input is smaller than threshold; should use exact
- x = xp.asarray(rng.random(n-1))
- y = xp.asarray(rng.random(n+1))
- auto = mannwhitneyu(x, y)
- asymptotic = mannwhitneyu(x, y, method='asymptotic')
- exact = mannwhitneyu(x, y, method='exact')
- assert auto.pvalue == exact.pvalue
- assert auto.pvalue != asymptotic.pvalue
- # other input is smaller than threshold; should use exact
- auto = mannwhitneyu(y, x)
- asymptotic = mannwhitneyu(x, y, method='asymptotic')
- exact = mannwhitneyu(x, y, method='exact')
- assert auto.pvalue == exact.pvalue
- assert auto.pvalue != asymptotic.pvalue
- # both inputs are larger than threshold; should use asymptotic
- x = xp.asarray(rng.random(n+1))
- y = xp.asarray(rng.random(n+1))
- auto = mannwhitneyu(x, y)
- asymptotic = mannwhitneyu(x, y, method='asymptotic')
- exact = mannwhitneyu(x, y, method='exact')
- assert auto.pvalue != exact.pvalue
- assert auto.pvalue == asymptotic.pvalue
- # both inputs are smaller than threshold, but there is a tie
- # should use asymptotic
- x = xp.asarray(rng.random(n-1))
- y = xp.asarray(rng.random(n-1))
- y = xpx.at(y)[3].set(x[3])
- auto = mannwhitneyu(x, y)
- asymptotic = mannwhitneyu(x, y, method='asymptotic')
- exact = mannwhitneyu(x, y, method='exact')
- assert auto.pvalue != exact.pvalue
- assert auto.pvalue == asymptotic.pvalue
- # --- Test Basic Functionality ---
- x = [210.052110, 110.190630, 307.918612]
- y = [436.08811482466416, 416.37397329768191, 179.96975939463582,
- 197.8118754228619, 34.038757281225756, 138.54220550921517,
- 128.7769351470246, 265.92721427951852, 275.6617533155341,
- 592.34083395416258, 448.73177590617018, 300.61495185038905,
- 187.97508449019588]
- # This test was written for mann_whitney_u in gh-4933.
- # Originally, the p-values for alternatives were swapped;
- # this has been corrected and the tests have been refactored for
- # compactness, but otherwise the tests are unchanged.
- # R code for comparison, e.g.:
- # options(digits = 16)
- # x = c(210.052110, 110.190630, 307.918612)
- # y = c(436.08811482466416, 416.37397329768191, 179.96975939463582,
- # 197.8118754228619, 34.038757281225756, 138.54220550921517,
- # 128.7769351470246, 265.92721427951852, 275.6617533155341,
- # 592.34083395416258, 448.73177590617018, 300.61495185038905,
- # 187.97508449019588)
- # wilcox.test(x, y, alternative="g", exact=TRUE)
- cases_basic = [[{"alternative": 'two-sided', "method": "asymptotic"},
- (16., 0.6865041817876)],
- [{"alternative": 'less', "method": "asymptotic"},
- (16., 0.3432520908938)],
- [{"alternative": 'greater', "method": "asymptotic"},
- (16., 0.7047591913255)],
- [{"alternative": 'two-sided', "method": "exact"},
- (16., 0.7035714285714)],
- [{"alternative": 'less', "method": "exact"},
- (16., 0.3517857142857)],
- [{"alternative": 'greater', "method": "exact"},
- (16., 0.6946428571429)]]
- @pytest.mark.parametrize(("kwds", "expected"), cases_basic)
- @pytest.mark.parametrize("dtype", [None, 'float32', 'float64'])
- def test_basic(self, kwds, expected, dtype, xp):
- if is_numpy(xp) and xp.__version__ < "2.0" and dtype == 'float32':
- pytest.skip("Scalar dtypes only respected after NEP 50.")
- dtype = xp_default_dtype(xp) if dtype is None else getattr(xp, dtype)
- x, y = xp.asarray(self.x, dtype=dtype), xp.asarray(self.y, dtype=dtype)
- res = mannwhitneyu(x, y, **kwds)
- xp_assert_close(res.statistic, xp.asarray(expected[0], dtype=dtype))
- xp_assert_close(res.pvalue, xp.asarray(expected[1], dtype=dtype))
- cases_continuity = [[{"alternative": 'two-sided', "use_continuity": True},
- (23., 0.6865041817876)],
- [{"alternative": 'less', "use_continuity": True},
- (23., 0.7047591913255)],
- [{"alternative": 'greater', "use_continuity": True},
- (23., 0.3432520908938)],
- [{"alternative": 'two-sided', "use_continuity": False},
- (23., 0.6377328900502)],
- [{"alternative": 'less', "use_continuity": False},
- (23., 0.6811335549749)],
- [{"alternative": 'greater', "use_continuity": False},
- (23., 0.3188664450251)]]
- @pytest.mark.parametrize(("kwds", "expected"), cases_continuity)
- def test_continuity(self, kwds, expected, xp):
- # When x and y are interchanged, less and greater p-values should
- # swap (compare to above). This wouldn't happen if the continuity
- # correction were applied in the wrong direction. Note that less and
- # greater p-values do not sum to 1 when continuity correction is on,
- # which is what we'd expect. Also check that results match R when
- # continuity correction is turned off.
- # Note that method='asymptotic' -> exact=FALSE
- # and use_continuity=False -> correct=FALSE, e.g.:
- # wilcox.test(x, y, alternative="t", exact=FALSE, correct=FALSE)
- x, y = xp.asarray(self.x), xp.asarray(self.y)
- res = mannwhitneyu(y, x, method='asymptotic', **kwds)
- xp_assert_close(res.statistic, xp.asarray(expected[0]))
- xp_assert_close(res.pvalue, xp.asarray(expected[1]))
- def test_tie_correct(self, xp):
- # Test tie correction against R's wilcox.test
- # options(digits = 16)
- # x = c(1, 2, 3, 4)
- # y = c(1, 2, 3, 4, 5)
- # wilcox.test(x, y, exact=FALSE)
- x = xp.asarray([1., 2., 3., 4.])
- y0 = xp.asarray([1., 2., 3., 4., 5.])
- dy = xp.asarray([0., 1., 0., 1., 0.])*0.01
- dy2 = xp.asarray([0., 0., 1., 0., 0.])*0.01
- y = xp.stack([y0-0.01, y0-dy, y0-dy2, y0, y0+dy2, y0+dy, y0+0.01])
- res = mannwhitneyu(x, y, axis=-1, method="asymptotic")
- U_expected = [10, 9, 8.5, 8, 7.5, 7, 6]
- p_expected = [1, 0.9017048037317, 0.804080657472, 0.7086240584439,
- 0.6197963884941, 0.5368784563079, 0.3912672792826]
- xp_assert_equal(res.statistic, xp.asarray(U_expected))
- xp_assert_close(res.pvalue, xp.asarray(p_expected))
- # --- Test Exact Distribution of U ---
- # These are tabulated values of the CDF of the exact distribution of
- # the test statistic from pg 52 of reference [1] (Mann-Whitney Original)
- pn3 = {1: [0.25, 0.5, 0.75], 2: [0.1, 0.2, 0.4, 0.6],
- 3: [0.05, .1, 0.2, 0.35, 0.5, 0.65]}
- pn4 = {1: [0.2, 0.4, 0.6], 2: [0.067, 0.133, 0.267, 0.4, 0.6],
- 3: [0.028, 0.057, 0.114, 0.2, .314, 0.429, 0.571],
- 4: [0.014, 0.029, 0.057, 0.1, 0.171, 0.243, 0.343, 0.443, 0.557]}
- pm5 = {1: [0.167, 0.333, 0.5, 0.667],
- 2: [0.047, 0.095, 0.19, 0.286, 0.429, 0.571],
- 3: [0.018, 0.036, 0.071, 0.125, 0.196, 0.286, 0.393, 0.5, 0.607],
- 4: [0.008, 0.016, 0.032, 0.056, 0.095, 0.143,
- 0.206, 0.278, 0.365, 0.452, 0.548],
- 5: [0.004, 0.008, 0.016, 0.028, 0.048, 0.075, 0.111,
- 0.155, 0.21, 0.274, 0.345, .421, 0.5, 0.579]}
- pm6 = {1: [0.143, 0.286, 0.428, 0.571],
- 2: [0.036, 0.071, 0.143, 0.214, 0.321, 0.429, 0.571],
- 3: [0.012, 0.024, 0.048, 0.083, 0.131,
- 0.19, 0.274, 0.357, 0.452, 0.548],
- 4: [0.005, 0.01, 0.019, 0.033, 0.057, 0.086, 0.129,
- 0.176, 0.238, 0.305, 0.381, 0.457, 0.543], # the last element
- # of the previous list, 0.543, has been modified from 0.545;
- # I assume it was a typo
- 5: [0.002, 0.004, 0.009, 0.015, 0.026, 0.041, 0.063, 0.089,
- 0.123, 0.165, 0.214, 0.268, 0.331, 0.396, 0.465, 0.535],
- 6: [0.001, 0.002, 0.004, 0.008, 0.013, 0.021, 0.032, 0.047,
- 0.066, 0.09, 0.12, 0.155, 0.197, 0.242, 0.294, 0.350,
- 0.409, 0.469, 0.531]}
- def test_exact_distribution(self):
- # I considered parametrize. I decided against it.
- setattr(_mwu_state, 's', _MWU(0, 0))
- p_tables = {3: self.pn3, 4: self.pn4, 5: self.pm5, 6: self.pm6}
- for n, table in p_tables.items():
- for m, p in table.items():
- # check p-value against table
- u = np.arange(0, len(p))
- _mwu_state.s.set_shapes(m, n)
- assert_allclose(_mwu_state.s.cdf(k=u), p, atol=1e-3)
- # check identity CDF + SF - PMF = 1
- # ( In this implementation, SF(U) includes PMF(U) )
- u2 = np.arange(0, m*n+1)
- assert_allclose(_mwu_state.s.cdf(k=u2)
- + _mwu_state.s.sf(k=u2)
- - _mwu_state.s.pmf(k=u2), 1)
- # check symmetry about mean of U, i.e. pmf(U) = pmf(m*n-U)
- pmf = _mwu_state.s.pmf(k=u2)
- assert_allclose(pmf, pmf[::-1])
- # check symmetry w.r.t. interchange of m, n
- _mwu_state.s.set_shapes(n, m)
- pmf2 = _mwu_state.s.pmf(k=u2)
- assert_allclose(pmf, pmf2)
- def test_asymptotic_behavior(self, xp):
- rng = np.random.default_rng(12543)
- # for small samples, the asymptotic test is not very accurate
- x = xp.asarray(rng.random(5))
- y = xp.asarray(rng.random(5))
- res1 = mannwhitneyu(x, y, method="exact")
- res2 = mannwhitneyu(x, y, method="asymptotic")
- assert res1.statistic == res2.statistic
- assert xp.abs(res1.pvalue - res2.pvalue) > 1e-2
- # for large samples, they agree reasonably well
- x = xp.asarray(rng.random(40))
- y = xp.asarray(rng.random(40))
- res1 = mannwhitneyu(x, y, method="exact")
- res2 = mannwhitneyu(x, y, method="asymptotic")
- assert res1.statistic == res2.statistic
- assert xp.abs(res1.pvalue - res2.pvalue) < 1e-3
- # --- Test Corner Cases ---
- def test_exact_U_equals_mean(self, xp):
- # Test U == m*n/2 with exact method
- # Without special treatment, two-sided p-value > 1 because both
- # one-sided p-values are > 0.5
- x, y = xp.asarray([1., 2., 3.]), xp.asarray([1.5, 2.5])
- res_l = mannwhitneyu(x, y, alternative="less", method="exact")
- res_g = mannwhitneyu(x, y, alternative="greater", method="exact")
- xp_assert_equal(res_l.pvalue, res_g.pvalue)
- assert res_l.pvalue > 0.5
- res = mannwhitneyu(x, y, alternative="two-sided", method="exact")
- xp_assert_equal(res.statistic, xp.asarray(3.))
- xp_assert_equal(res.pvalue, xp.asarray(1.))
- # U == m*n/2 for asymptotic case tested in test_gh_2118
- # The reason it's tricky for the asymptotic test has to do with
- # continuity correction.
- cases_scalar = [[{"alternative": 'two-sided', "method": "asymptotic"},
- (0., 1.)],
- [{"alternative": 'less', "method": "asymptotic"},
- (0., 0.5)],
- [{"alternative": 'greater', "method": "asymptotic"},
- (0., 0.977249868052)],
- [{"alternative": 'two-sided', "method": "exact"}, (0., 1)],
- [{"alternative": 'less', "method": "exact"}, (0., 0.5)],
- [{"alternative": 'greater', "method": "exact"}, (0., 1)]]
- @pytest.mark.parametrize(("kwds", "result"), cases_scalar)
- def test_scalar_data(self, kwds, result): # not important to preserve w/ array API
- # just making sure scalars work
- assert_allclose(mannwhitneyu(1, 2, **kwds), result)
- def test_equal_scalar_data(self): # not important to preserve w/ array API
- # when two scalars are equal, there is an -0.5/0 in the asymptotic
- # approximation. R gives pvalue=1.0 for alternatives 'less' and
- # 'greater' but NA for 'two-sided'. I don't see why, so I don't
- # see a need for a special case to match that behavior.
- assert_equal(mannwhitneyu(1, 1, method="exact"), (0.5, 1))
- assert_equal(mannwhitneyu(1, 1, method="asymptotic"), (0.5, 1))
- # without continuity correction, this becomes 0/0, which really
- # is undefined
- assert_equal(mannwhitneyu(1, 1, method="asymptotic",
- use_continuity=False), (0.5, np.nan))
- # --- Test Enhancements / Bug Reports ---
- @pytest.mark.skip_xp_backends("jax.numpy", reason="lazy -> no _axis_nan_policy")
- @pytest.mark.parametrize("method", ["asymptotic", "exact"])
- def test_gh_12837_11113(self, method, xp):
- # Test that behavior for broadcastable nd arrays is appropriate:
- # output shape is correct and all values are equal to when the test
- # is performed on one pair of samples at a time.
- # Tests that gh-12837 and gh-11113 (requests for n-d input)
- # are resolved
- rng = np.random.default_rng(6083743794)
- # arrays are broadcastable except for axis = -3
- axis = -3
- m, n = 7, 10 # sample sizes
- x = rng.random((m, 3, 8))
- y = rng.random((6, n, 1, 8)) + 0.1
- res = mannwhitneyu(xp.asarray(x), xp.asarray(y), method=method, axis=axis)
- shape = (6, 3, 8) # appropriate shape of outputs, given inputs
- assert res.pvalue.shape == shape
- assert res.statistic.shape == shape
- # move axis of test to end for simplicity
- x, y = np.moveaxis(x, axis, -1), np.moveaxis(y, axis, -1)
- x = x[None, ...] # give x a zeroth dimension
- assert x.ndim == y.ndim
- x = np.broadcast_to(x, shape + (m,))
- y = np.broadcast_to(y, shape + (n,))
- assert x.shape[:-1] == shape
- assert y.shape[:-1] == shape
- # loop over pairs of samples
- statistics = np.zeros(shape)
- pvalues = np.zeros(shape)
- for indices in product(*[range(i) for i in shape]):
- xi = x[indices]
- yi = y[indices]
- temp = mannwhitneyu(xi, yi, method=method)
- statistics[indices] = temp.statistic
- pvalues[indices] = temp.pvalue
- xp_assert_close(res.pvalue, xp.asarray(pvalues), atol=1e-16)
- xp_assert_close(res.statistic, xp.asarray(statistics), atol=1e-16)
- def test_gh_11355(self, xp):
- # Test for correct behavior with NaN/Inf in input
- x = [1, 2, 3, 4]
- y = [3, 6, 7, 8, 9, 3, 2, 1, 4, 4, 5]
- res1 = mannwhitneyu(xp.asarray(x), xp.asarray(y))
- # Inf is not a problem. This is a rank test, and it's the largest value
- x[0] = 1. # ensure floating point
- y[4] = np.inf
- res2 = mannwhitneyu(xp.asarray(x), xp.asarray(y))
- xp_assert_equal(res1.statistic, res2.statistic)
- xp_assert_equal(res1.pvalue, res2.pvalue)
- @pytest.mark.skip_xp_backends("jax.numpy", reason="lazy -> no _axis_nan_policy")
- def test_gh11355_nan(self, xp):
- # NaNs should propagate by default.
- x = [1., 2., 3., 4.]
- y = [3, 6, 7, np.nan, 9, 3, 2, 1, 4, 4, 5]
- res3 = mannwhitneyu(xp.asarray(x), xp.asarray(y))
- xp_assert_equal(res3.statistic, xp.asarray(xp.nan))
- xp_assert_equal(res3.pvalue, xp.asarray(xp.nan))
- cases_11355 = [([1., 2, 3, 4],
- [3, 6, 7, 8, np.inf, 3, 2, 1, 4, 4, 5],
- 10., 0.1297704873477),
- ([1., 2, 3, 4],
- [3, 6, 7, 8, np.inf, np.inf, 2, 1, 4, 4, 5],
- 8.5, 0.08735617507695),
- ([1, 2, np.inf, 4],
- [3, 6, 7, 8, np.inf, 3, 2, 1, 4, 4, 5],
- 17.5, 0.5988856695752),
- ([1, 2, np.inf, 4],
- [3, 6, 7, 8, np.inf, np.inf, 2, 1, 4, 4, 5],
- 16., 0.4687165824462),
- ([1, np.inf, np.inf, 4],
- [3, 6, 7, 8, np.inf, np.inf, 2, 1, 4, 4, 5],
- 24.5, 0.7912517950119)]
- @pytest.mark.parametrize(("x", "y", "statistic", "pvalue"), cases_11355)
- def test_gh_11355b(self, x, y, statistic, pvalue, xp):
- # Test for correct behavior with NaN/Inf in input
- res = mannwhitneyu(xp.asarray(x), xp.asarray(y), method='asymptotic')
- xp_assert_close(res.statistic, xp.asarray(statistic), atol=1e-12)
- xp_assert_close(res.pvalue, xp.asarray(pvalue), atol=1e-12)
- cases_9184 = [[True, "less", "asymptotic", 0.900775348204],
- [True, "greater", "asymptotic", 0.1223118025635],
- [True, "two-sided", "asymptotic", 0.244623605127],
- [False, "less", "asymptotic", 0.8896643190401],
- [False, "greater", "asymptotic", 0.1103356809599],
- [False, "two-sided", "asymptotic", 0.2206713619198],
- [True, "less", "exact", 0.8967698967699],
- [True, "greater", "exact", 0.1272061272061],
- [True, "two-sided", "exact", 0.2544122544123]]
- @pytest.mark.parametrize(("use_continuity", "alternative",
- "method", "pvalue_exp"), cases_9184)
- def test_gh_9184(self, use_continuity, alternative, method, pvalue_exp, xp):
- # gh-9184 might be considered a doc-only bug. Please see the
- # documentation to confirm that mannwhitneyu correctly notes
- # that the output statistic is that of the first sample (x). In any
- # case, check the case provided there against output from R.
- # R code:
- # options(digits=16)
- # x <- c(0.80, 0.83, 1.89, 1.04, 1.45, 1.38, 1.91, 1.64, 0.73, 1.46)
- # y <- c(1.15, 0.88, 0.90, 0.74, 1.21)
- # wilcox.test(x, y, alternative = "less", exact = FALSE)
- # wilcox.test(x, y, alternative = "greater", exact = FALSE)
- # wilcox.test(x, y, alternative = "two.sided", exact = FALSE)
- # wilcox.test(x, y, alternative = "less", exact = FALSE,
- # correct=FALSE)
- # wilcox.test(x, y, alternative = "greater", exact = FALSE,
- # correct=FALSE)
- # wilcox.test(x, y, alternative = "two.sided", exact = FALSE,
- # correct=FALSE)
- # wilcox.test(x, y, alternative = "less", exact = TRUE)
- # wilcox.test(x, y, alternative = "greater", exact = TRUE)
- # wilcox.test(x, y, alternative = "two.sided", exact = TRUE)
- statistic_exp = 35.
- x = xp.asarray([0.80, 0.83, 1.89, 1.04, 1.45, 1.38, 1.91, 1.64, 0.73, 1.46])
- y = xp.asarray([1.15, 0.88, 0.90, 0.74, 1.21])
- res = mannwhitneyu(x, y, use_continuity=use_continuity,
- alternative=alternative, method=method)
- xp_assert_equal(res.statistic, xp.asarray(statistic_exp))
- xp_assert_close(res.pvalue, xp.asarray(pvalue_exp))
- @pytest.mark.skip_xp_backends("jax.numpy", reason="lazy -> no _axis_nan_policy")
- def test_gh_4067(self, xp):
- # Test for correct behavior with all NaN input - default is propagate
- nan = xp.asarray(xp.nan)
- a = xp.stack([nan, nan, nan, nan, nan])
- b = xp.stack([nan, nan, nan, nan, nan])
- res = mannwhitneyu(a, b)
- xp_assert_equal(res.statistic, xp.asarray(nan))
- xp_assert_equal(res.pvalue, nan)
- # All cases checked against R wilcox.test, e.g.
- # options(digits=16)
- # x = c(1, 2, 3)
- # y = c(1.5, 2.5)
- # wilcox.test(x, y, exact=FALSE, alternative='less')
- cases_2118 = [[[1., 2., 3.], [1.5, 2.5], "greater", (3., 0.6135850036578)],
- [[1., 2., 3.], [1.5, 2.5], "less", (3., 0.6135850036578)],
- [[1., 2., 3.], [1.5, 2.5], "two-sided", (3., 1.0)],
- [[1, 2, 3], [2], "greater", (1.5, 0.681324055883)],
- [[1, 2, 3], [2], "less", (1.5, 0.681324055883)],
- [[1, 2, 3], [2], "two-sided", (1.5, 1.)],
- [[1, 2], [1, 2], "greater", (2., 0.667497228949)],
- [[1, 2], [1, 2], "less", (2., 0.667497228949)],
- [[1, 2], [1, 2], "two-sided", (2., 1.)]]
- @pytest.mark.parametrize(["x", "y", "alternative", "expected"], cases_2118)
- def test_gh_2118(self, x, y, alternative, expected, xp):
- # test cases in which U == m*n/2 when method is asymptotic
- # applying continuity correction could result in p-value > 1
- res = mannwhitneyu(xp.asarray(x), xp.asarray(y), use_continuity=True,
- alternative=alternative, method="asymptotic")
- rtol = 1e-6 if xp_default_dtype(xp) == xp.float32 else 1e-12
- xp_assert_close(res.statistic, xp.asarray(expected[0]), rtol=rtol)
- xp_assert_close(res.pvalue, xp.asarray(expected[1]), rtol=rtol)
- def test_gh19692_smaller_table(self):
- # In gh-19692, we noted that the shape of the cache used in calculating
- # p-values was dependent on the order of the inputs because the sample
- # sizes n1 and n2 changed. This was indicative of unnecessary cache
- # growth and redundant calculation. Check that this is resolved.
- rng = np.random.default_rng(7600451795963068007)
- m, n = 5, 11
- x = rng.random(size=m)
- y = rng.random(size=n)
- setattr(_mwu_state, 's', _MWU(0, 0))
- _mwu_state.s.reset() # reset cache
- res = stats.mannwhitneyu(x, y, method='exact')
- shape = _mwu_state.s.configurations.shape
- assert shape[-1] == min(res.statistic, m*n - res.statistic) + 1
- stats.mannwhitneyu(y, x, method='exact')
- assert shape == _mwu_state.s.configurations.shape # same with reversed sizes
- # Also, we weren't exploiting the symmetry of the null distribution
- # to its full potential. Ensure that the null distribution is not
- # evaluated explicitly for `k > m*n/2`.
- _mwu_state.s.reset() # reset cache
- stats.mannwhitneyu(x, 0*y, method='exact', alternative='greater')
- shape = _mwu_state.s.configurations.shape
- assert shape[-1] == 1 # k is smallest possible
- stats.mannwhitneyu(0*x, y, method='exact', alternative='greater')
- assert shape == _mwu_state.s.configurations.shape
- @pytest.mark.parametrize('alternative', ['less', 'greater', 'two-sided'])
- def test_permutation_method(self, alternative):
- rng = np.random.default_rng(7600451795963068007)
- x = rng.random(size=(2, 5))
- y = rng.random(size=(2, 6))
- res = stats.mannwhitneyu(x, y, method=stats.PermutationMethod(),
- alternative=alternative, axis=1)
- res2 = stats.mannwhitneyu(x, y, method='exact',
- alternative=alternative, axis=1)
- assert_allclose(res.statistic, res2.statistic, rtol=1e-15)
- assert_allclose(res.pvalue, res2.pvalue, rtol=1e-15)
- # Old tests moved from test_stats. Source of magic numbers unknown.
- X = [19.8958398126694, 19.5452691647182, 19.0577309166425, 21.716543054589,
- 20.3269502208702, 20.0009273294025, 19.3440043632957, 20.4216806548105,
- 19.0649894736528, 18.7808043120398, 19.3680942943298, 19.4848044069953,
- 20.7514611265663, 19.0894948874598, 19.4975522356628, 18.9971170734274,
- 20.3239606288208, 20.6921298083835, 19.0724259532507, 18.9825187935021,
- 19.5144462609601, 19.8256857844223, 20.5174677102032, 21.1122407995892,
- 17.9490854922535, 18.2847521114727, 20.1072217648826, 18.6439891962179,
- 20.4970638083542, 19.5567594734914]
- Y = [19.2790668029091, 16.993808441865, 18.5416338448258, 17.2634018833575,
- 19.1577183624616, 18.5119655377495, 18.6068455037221, 18.8358343362655,
- 19.0366413269742, 18.1135025515417, 19.2201873866958, 17.8344909022841,
- 18.2894380745856, 18.6661374133922, 19.9688601693252, 16.0672254617636,
- 19.00596360572, 19.201561539032, 19.0487501090183, 19.0847908674356]
- rtol = 1e-14
- def test_mannwhitneyu_one_sided(self, xp):
- X, Y = xp.asarray(self.X), xp.asarray(self.Y)
- u1, p1 = stats.mannwhitneyu(X, Y, alternative='less')
- u2, p2 = stats.mannwhitneyu(Y, X, alternative='greater')
- u3, p3 = stats.mannwhitneyu(X, Y, alternative='greater')
- u4, p4 = stats.mannwhitneyu(Y, X, alternative='less')
- xp_assert_equal(p1, p2)
- xp_assert_equal(p3, p4)
- assert p1 != p3
- xp_assert_equal(u1, xp.asarray(498.))
- xp_assert_equal(u2, xp.asarray(102.))
- xp_assert_equal(u3, xp.asarray(498.))
- xp_assert_equal(u4, xp.asarray(102.))
- assert_allclose(p1, xp.asarray(0.999957683256589), rtol=self.rtol)
- rtol = self.rtol if X.dtype == xp.float64 else 5e-4
- assert_allclose(p3, xp.asarray(4.5941632666275e-05), rtol=rtol, atol=1e-16)
- def test_mannwhitneyu_two_sided(self, xp):
- X, Y = xp.asarray(self.X), xp.asarray(self.Y)
- u1, p1 = stats.mannwhitneyu(X, Y, alternative='two-sided')
- u2, p2 = stats.mannwhitneyu(Y, X, alternative='two-sided')
- xp_assert_equal(p1, p2)
- xp_assert_equal(u1, xp.asarray(498.))
- xp_assert_equal(u2, xp.asarray(102.))
- rtol = self.rtol if X.dtype == xp.float64 else 5e-4
- xp_assert_close(p1, xp.asarray(9.188326533255e-05), rtol=rtol, atol=1e-16)
- def test_mannwhitneyu_no_correct_one_sided(self, xp):
- X, Y = xp.asarray(self.X), xp.asarray(self.Y)
- u1, p1 = stats.mannwhitneyu(X, Y, False, alternative='less')
- u2, p2 = stats.mannwhitneyu(Y, X, False, alternative='greater')
- u3, p3 = stats.mannwhitneyu(X, Y, False, alternative='greater')
- u4, p4 = stats.mannwhitneyu(Y, X, False, alternative='less')
- xp_assert_equal(p1, p2)
- xp_assert_equal(p3, p4)
- assert p1 != p3
- xp_assert_equal(u1, xp.asarray(498.))
- xp_assert_equal(u2, xp.asarray(102.))
- xp_assert_equal(u3, xp.asarray(498.))
- xp_assert_equal(u4, xp.asarray(102.))
- rtol = self.rtol if X.dtype == xp.float64 else 5e-4
- xp_assert_close(p1, xp.asarray(0.999955905990004), rtol=rtol, atol=1e-16)
- xp_assert_close(p3, xp.asarray(4.40940099958089e-05), rtol=rtol, atol=1e-16)
- def test_mannwhitneyu_no_correct_two_sided(self, xp):
- X, Y = xp.asarray(self.X), xp.asarray(self.Y)
- u1, p1 = stats.mannwhitneyu(X, Y, False, alternative='two-sided')
- u2, p2 = stats.mannwhitneyu(Y, X, False, alternative='two-sided')
- xp_assert_equal(p1, p2)
- xp_assert_equal(u1, xp.asarray(498.))
- xp_assert_equal(u2, xp.asarray(102.))
- rtol = self.rtol if X.dtype == xp.float64 else 5e-4
- xp_assert_close(p1, xp.asarray(8.81880199916178e-05), rtol=rtol, atol=1e-16)
- def test_mannwhitneyu_ones(self, xp):
- # test for gh-1428
- x = xp.asarray([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
- 1., 1., 1., 1., 1., 1., 1., 2., 1., 1., 1., 1., 1., 1.,
- 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
- 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
- 1., 1., 1., 1., 1., 1., 1., 2., 1., 1., 1., 1., 1., 1.,
- 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
- 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 2.,
- 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
- 1., 1., 2., 1., 1., 1., 1., 2., 1., 1., 2., 1., 1., 2.,
- 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
- 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 2., 1.,
- 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
- 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
- 1., 1., 1., 1., 1., 1., 1., 2., 1., 1., 1., 1., 1., 1.,
- 1., 1., 1., 2., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
- 1., 1., 1., 1., 1., 1., 1., 1., 3., 1., 1., 1., 1., 1.,
- 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
- 1., 1., 1., 1., 1., 1.])
- y = xp.asarray([1., 1., 1., 1., 1., 1., 1., 2., 1., 2., 1., 1., 1., 1.,
- 2., 1., 1., 1., 2., 1., 1., 1., 1., 1., 2., 1., 1., 3.,
- 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 2., 1., 2., 1.,
- 1., 1., 1., 1., 1., 2., 1., 1., 1., 1., 1., 1., 1., 1.,
- 1., 1., 1., 1., 1., 1., 1., 2., 1., 1., 1., 1., 1., 2.,
- 2., 1., 1., 2., 1., 1., 2., 1., 2., 1., 1., 1., 1., 2.,
- 2., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
- 1., 2., 1., 1., 1., 1., 1., 2., 2., 2., 1., 1., 1., 1.,
- 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
- 2., 1., 1., 2., 1., 1., 1., 1., 2., 1., 1., 1., 1., 1.,
- 1., 1., 1., 1., 1., 1., 1., 2., 1., 1., 1., 2., 1., 1.,
- 1., 1., 1., 1.])
- # p-value from R, e.g. wilcox.test(x, y, alternative="g")
- res = stats.mannwhitneyu(x, y, alternative='less')
- xp_assert_close(res.statistic, xp.asarray(16980.5))
- xp_assert_close(res.pvalue, xp.asarray(2.8214327656317373e-5))
- res = stats.mannwhitneyu(x, y, alternative='greater')
- xp_assert_close(res.statistic, xp.asarray(16980.5))
- xp_assert_close(res.pvalue, xp.asarray(0.9999719954296))
- res = stats.mannwhitneyu(x, y, alternative='two-sided')
- xp_assert_close(res.statistic, xp.asarray(16980.5))
- xp_assert_close(res.pvalue, xp.asarray(5.642865531266e-5))
- class TestSomersD(_TestPythranFunc):
- def setup_method(self):
- self.dtypes = self.ALL_INTEGER + self.ALL_FLOAT
- self.arguments = {0: (np.arange(10),
- self.ALL_INTEGER + self.ALL_FLOAT),
- 1: (np.arange(10),
- self.ALL_INTEGER + self.ALL_FLOAT)}
- input_array = [self.arguments[idx][0] for idx in self.arguments]
- # In this case, self.partialfunc can simply be stats.somersd,
- # since `alternative` is an optional argument. If it is required,
- # we can use functools.partial to freeze the value, because
- # we only mainly test various array inputs, not str, etc.
- self.partialfunc = functools.partial(stats.somersd,
- alternative='two-sided')
- self.expected = self.partialfunc(*input_array)
- def pythranfunc(self, *args):
- res = self.partialfunc(*args)
- assert_allclose(res.statistic, self.expected.statistic, atol=1e-15)
- assert_allclose(res.pvalue, self.expected.pvalue, atol=1e-15)
- def test_pythranfunc_keywords(self):
- # Not specifying the optional keyword args
- table = [[27, 25, 14, 7, 0], [7, 14, 18, 35, 12], [1, 3, 2, 7, 17]]
- res1 = stats.somersd(table)
- # Specifying the optional keyword args with default value
- optional_args = self.get_optional_args(stats.somersd)
- res2 = stats.somersd(table, **optional_args)
- # Check if the results are the same in two cases
- assert_allclose(res1.statistic, res2.statistic, atol=1e-15)
- assert_allclose(res1.pvalue, res2.pvalue, atol=1e-15)
- def test_like_kendalltau(self):
- # All tests correspond with one in test_stats.py `test_kendalltau`
- # case without ties, con-dis equal zero
- x = [5, 2, 1, 3, 6, 4, 7, 8]
- y = [5, 2, 6, 3, 1, 8, 7, 4]
- # Cross-check with result from SAS FREQ:
- expected = (0.000000000000000, 1.000000000000000)
- res = stats.somersd(x, y)
- assert_allclose(res.statistic, expected[0], atol=1e-15)
- assert_allclose(res.pvalue, expected[1], atol=1e-15)
- # case without ties, con-dis equal zero
- x = [0, 5, 2, 1, 3, 6, 4, 7, 8]
- y = [5, 2, 0, 6, 3, 1, 8, 7, 4]
- # Cross-check with result from SAS FREQ:
- expected = (0.000000000000000, 1.000000000000000)
- res = stats.somersd(x, y)
- assert_allclose(res.statistic, expected[0], atol=1e-15)
- assert_allclose(res.pvalue, expected[1], atol=1e-15)
- # case without ties, con-dis close to zero
- x = [5, 2, 1, 3, 6, 4, 7]
- y = [5, 2, 6, 3, 1, 7, 4]
- # Cross-check with result from SAS FREQ:
- expected = (-0.142857142857140, 0.630326953157670)
- res = stats.somersd(x, y)
- assert_allclose(res.statistic, expected[0], atol=1e-15)
- assert_allclose(res.pvalue, expected[1], atol=1e-15)
- # simple case without ties
- x = np.arange(10)
- y = np.arange(10)
- # Cross-check with result from SAS FREQ:
- # SAS p value is not provided.
- expected = (1.000000000000000, 0)
- res = stats.somersd(x, y)
- assert_allclose(res.statistic, expected[0], atol=1e-15)
- assert_allclose(res.pvalue, expected[1], atol=1e-15)
- # swap a couple values and a couple more
- x = np.arange(10)
- y = np.array([0, 2, 1, 3, 4, 6, 5, 7, 8, 9])
- # Cross-check with result from SAS FREQ:
- expected = (0.911111111111110, 0.000000000000000)
- res = stats.somersd(x, y)
- assert_allclose(res.statistic, expected[0], atol=1e-15)
- assert_allclose(res.pvalue, expected[1], atol=1e-15)
- # same in opposite direction
- x = np.arange(10)
- y = np.arange(10)[::-1]
- # Cross-check with result from SAS FREQ:
- # SAS p value is not provided.
- expected = (-1.000000000000000, 0)
- res = stats.somersd(x, y)
- assert_allclose(res.statistic, expected[0], atol=1e-15)
- assert_allclose(res.pvalue, expected[1], atol=1e-15)
- # swap a couple values and a couple more
- x = np.arange(10)
- y = np.array([9, 7, 8, 6, 5, 3, 4, 2, 1, 0])
- # Cross-check with result from SAS FREQ:
- expected = (-0.9111111111111111, 0.000000000000000)
- res = stats.somersd(x, y)
- assert_allclose(res.statistic, expected[0], atol=1e-15)
- assert_allclose(res.pvalue, expected[1], atol=1e-15)
- # with some ties
- x1 = [12, 2, 1, 12, 2]
- x2 = [1, 4, 7, 1, 0]
- # Cross-check with result from SAS FREQ:
- expected = (-0.500000000000000, 0.304901788178780)
- res = stats.somersd(x1, x2)
- assert_allclose(res.statistic, expected[0], atol=1e-15)
- assert_allclose(res.pvalue, expected[1], atol=1e-15)
- # with only ties in one or both inputs
- # SAS will not produce an output for these:
- # NOTE: No statistics are computed for x * y because x has fewer
- # than 2 nonmissing levels.
- # WARNING: No OUTPUT data set is produced for this table because a
- # row or column variable has fewer than 2 nonmissing levels and no
- # statistics are computed.
- res = stats.somersd([2, 2, 2], [2, 2, 2])
- assert_allclose(res.statistic, np.nan)
- assert_allclose(res.pvalue, np.nan)
- res = stats.somersd([2, 0, 2], [2, 2, 2])
- assert_allclose(res.statistic, np.nan)
- assert_allclose(res.pvalue, np.nan)
- res = stats.somersd([2, 2, 2], [2, 0, 2])
- assert_allclose(res.statistic, np.nan)
- assert_allclose(res.pvalue, np.nan)
- res = stats.somersd([0], [0])
- assert_allclose(res.statistic, np.nan)
- assert_allclose(res.pvalue, np.nan)
- # empty arrays provided as input
- res = stats.somersd([], [])
- assert_allclose(res.statistic, np.nan)
- assert_allclose(res.pvalue, np.nan)
- # test unequal length inputs
- x = np.arange(10.)
- y = np.arange(20.)
- assert_raises(ValueError, stats.somersd, x, y)
- def test_asymmetry(self):
- # test that somersd is asymmetric w.r.t. input order and that
- # convention is as described: first input is row variable & independent
- # data is from Wikipedia:
- # https://en.wikipedia.org/wiki/Somers%27_D
- # but currently that example contradicts itself - it says X is
- # independent yet take D_XY
- x = [1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 1, 2,
- 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3]
- y = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]
- # Cross-check with result from SAS FREQ:
- d_cr = 0.272727272727270
- d_rc = 0.342857142857140
- p = 0.092891940883700 # same p-value for either direction
- res = stats.somersd(x, y)
- assert_allclose(res.statistic, d_cr, atol=1e-15)
- assert_allclose(res.pvalue, p, atol=1e-4)
- assert_equal(res.table.shape, (3, 2))
- res = stats.somersd(y, x)
- assert_allclose(res.statistic, d_rc, atol=1e-15)
- assert_allclose(res.pvalue, p, atol=1e-15)
- assert_equal(res.table.shape, (2, 3))
- def test_somers_original(self):
- # test against Somers' original paper [1]
- # Table 5A
- # Somers' convention was column IV
- table = np.array([[8, 2], [6, 5], [3, 4], [1, 3], [2, 3]])
- # Our convention (and that of SAS FREQ) is row IV
- table = table.T
- dyx = 129/340
- assert_allclose(stats.somersd(table).statistic, dyx)
- # table 7A - d_yx = 1
- table = np.array([[25, 0], [85, 0], [0, 30]])
- dxy, dyx = 3300/5425, 3300/3300
- assert_allclose(stats.somersd(table).statistic, dxy)
- assert_allclose(stats.somersd(table.T).statistic, dyx)
- # table 7B - d_yx < 0
- table = np.array([[25, 0], [0, 30], [85, 0]])
- dyx = -1800/3300
- assert_allclose(stats.somersd(table.T).statistic, dyx)
- def test_contingency_table_with_zero_rows_cols(self):
- # test that zero rows/cols in contingency table don't affect result
- N = 100
- shape = 4, 6
- size = np.prod(shape)
- rng = np.random.RandomState(0)
- s = stats.multinomial.rvs(N, p=np.ones(size)/size,
- random_state=rng).reshape(shape)
- res = stats.somersd(s)
- s2 = np.insert(s, 2, np.zeros(shape[1]), axis=0)
- res2 = stats.somersd(s2)
- s3 = np.insert(s, 2, np.zeros(shape[0]), axis=1)
- res3 = stats.somersd(s3)
- s4 = np.insert(s2, 2, np.zeros(shape[0]+1), axis=1)
- res4 = stats.somersd(s4)
- # Cross-check with result from SAS FREQ:
- assert_allclose(res.statistic, -0.116981132075470, atol=1e-15)
- assert_allclose(res.statistic, res2.statistic)
- assert_allclose(res.statistic, res3.statistic)
- assert_allclose(res.statistic, res4.statistic)
- assert_allclose(res.pvalue, 0.156376448188150, atol=1e-15)
- assert_allclose(res.pvalue, res2.pvalue)
- assert_allclose(res.pvalue, res3.pvalue)
- assert_allclose(res.pvalue, res4.pvalue)
- def test_invalid_contingency_tables(self):
- N = 100
- shape = 4, 6
- size = np.prod(shape)
- rng = np.random.default_rng(0)
- # start with a valid contingency table
- s = stats.multinomial.rvs(N, p=np.ones(size)/size,
- random_state=rng).reshape(shape)
- s5 = s - 2
- message = "All elements of the contingency table must be non-negative"
- with assert_raises(ValueError, match=message):
- stats.somersd(s5)
- s6 = s + 0.01
- message = "All elements of the contingency table must be integer"
- with assert_raises(ValueError, match=message):
- stats.somersd(s6)
- message = ("At least two elements of the contingency "
- "table must be nonzero.")
- with assert_raises(ValueError, match=message):
- stats.somersd([[]])
- with assert_raises(ValueError, match=message):
- stats.somersd([[1]])
- s7 = np.zeros((3, 3))
- with assert_raises(ValueError, match=message):
- stats.somersd(s7)
- s7[0, 1] = 1
- with assert_raises(ValueError, match=message):
- stats.somersd(s7)
- def test_only_ranks_matter(self):
- # only ranks of input data should matter
- x = [1, 2, 3]
- x2 = [-1, 2.1, np.inf]
- y = [3, 2, 1]
- y2 = [0, -0.5, -np.inf]
- res = stats.somersd(x, y)
- res2 = stats.somersd(x2, y2)
- assert_equal(res.statistic, res2.statistic)
- assert_equal(res.pvalue, res2.pvalue)
- def test_contingency_table_return(self):
- # check that contingency table is returned
- x = np.arange(10)
- y = np.arange(10)
- res = stats.somersd(x, y)
- assert_equal(res.table, np.eye(10))
- def test_somersd_alternative(self):
- # Test alternative parameter, asymptotic method (due to tie)
- # Based on scipy.stats.test_stats.TestCorrSpearman2::test_alternative
- x1 = [1, 2, 3, 4, 5]
- x2 = [5, 6, 7, 8, 7]
- # strong positive correlation
- expected = stats.somersd(x1, x2, alternative="two-sided")
- assert expected.statistic > 0
- # rank correlation > 0 -> large "less" p-value
- res = stats.somersd(x1, x2, alternative="less")
- assert_equal(res.statistic, expected.statistic)
- assert_allclose(res.pvalue, 1 - (expected.pvalue / 2))
- # rank correlation > 0 -> small "greater" p-value
- res = stats.somersd(x1, x2, alternative="greater")
- assert_equal(res.statistic, expected.statistic)
- assert_allclose(res.pvalue, expected.pvalue / 2)
- # reverse the direction of rank correlation
- x2.reverse()
- # strong negative correlation
- expected = stats.somersd(x1, x2, alternative="two-sided")
- assert expected.statistic < 0
- # rank correlation < 0 -> large "greater" p-value
- res = stats.somersd(x1, x2, alternative="greater")
- assert_equal(res.statistic, expected.statistic)
- assert_allclose(res.pvalue, 1 - (expected.pvalue / 2))
- # rank correlation < 0 -> small "less" p-value
- res = stats.somersd(x1, x2, alternative="less")
- assert_equal(res.statistic, expected.statistic)
- assert_allclose(res.pvalue, expected.pvalue / 2)
- with pytest.raises(ValueError, match="`alternative` must be..."):
- stats.somersd(x1, x2, alternative="ekki-ekki")
- @pytest.mark.parametrize("positive_correlation", (False, True))
- def test_somersd_perfect_correlation(self, positive_correlation):
- # Before the addition of `alternative`, perfect correlation was
- # treated as a special case. Now it is treated like any other case, but
- # make sure there are no divide by zero warnings or associated errors
- x1 = np.arange(10)
- x2 = x1 if positive_correlation else np.flip(x1)
- expected_statistic = 1 if positive_correlation else -1
- # perfect correlation -> small "two-sided" p-value (0)
- res = stats.somersd(x1, x2, alternative="two-sided")
- assert res.statistic == expected_statistic
- assert res.pvalue == 0
- # rank correlation > 0 -> large "less" p-value (1)
- res = stats.somersd(x1, x2, alternative="less")
- assert res.statistic == expected_statistic
- assert res.pvalue == (1 if positive_correlation else 0)
- # rank correlation > 0 -> small "greater" p-value (0)
- res = stats.somersd(x1, x2, alternative="greater")
- assert res.statistic == expected_statistic
- assert res.pvalue == (0 if positive_correlation else 1)
- def test_somersd_large_inputs_gh18132(self):
- # Test that large inputs where potential overflows could occur give
- # the expected output. This is tested in the case of binary inputs.
- # See gh-18126.
- # generate lists of random classes 1-2 (binary)
- classes = [1, 2]
- n_samples = 10 ** 6
- rng = np.random.default_rng(6889320191)
- x = rng.choice(classes, n_samples)
- y = rng.choice(classes, n_samples)
- # get value to compare with: sklearn output
- # from sklearn import metrics
- # val_auc_sklearn = metrics.roc_auc_score(x, y)
- # # convert to the Gini coefficient (Gini = (AUC*2)-1)
- # val_sklearn = 2 * val_auc_sklearn - 1
- val_sklearn = 0.000624401938730923
- # calculate the Somers' D statistic, which should be equal to the
- # result of val_sklearn until approximately machine precision
- val_scipy = stats.somersd(x, y).statistic
- assert_allclose(val_sklearn, val_scipy, atol=1e-15)
- class TestBarnardExact:
- """Some tests to show that barnard_exact() works correctly."""
- @pytest.mark.parametrize(
- "input_sample,expected",
- [
- ([[43, 40], [10, 39]], (3.555406779643, 0.000362832367)),
- ([[100, 2], [1000, 5]], (-1.776382925679, 0.135126970878)),
- ([[2, 7], [8, 2]], (-2.518474945157, 0.019210815430)),
- ([[5, 1], [10, 10]], (1.449486150679, 0.156277546306)),
- ([[5, 15], [20, 20]], (-1.851640199545, 0.066363501421)),
- ([[5, 16], [20, 25]], (-1.609639949352, 0.116984852192)),
- ([[10, 5], [10, 1]], (-1.449486150679, 0.177536588915)),
- ([[5, 0], [1, 4]], (2.581988897472, 0.013671875000)),
- ([[0, 1], [3, 2]], (-1.095445115010, 0.509667991877)),
- ([[0, 2], [6, 4]], (-1.549193338483, 0.197019618792)),
- ([[2, 7], [8, 2]], (-2.518474945157, 0.019210815430)),
- ],
- )
- def test_precise(self, input_sample, expected):
- """The expected values have been generated by R, using a resolution
- for the nuisance parameter of 1e-6 :
- ```R
- library(Barnard)
- options(digits=10)
- barnard.test(43, 40, 10, 39, dp=1e-6, pooled=TRUE)
- ```
- """
- res = barnard_exact(input_sample)
- statistic, pvalue = res.statistic, res.pvalue
- assert_allclose([statistic, pvalue], expected)
- @pytest.mark.parametrize(
- "input_sample,expected",
- [
- ([[43, 40], [10, 39]], (3.920362887717, 0.000289470662)),
- ([[100, 2], [1000, 5]], (-1.139432816087, 0.950272080594)),
- ([[2, 7], [8, 2]], (-3.079373904042, 0.020172119141)),
- ([[5, 1], [10, 10]], (1.622375939458, 0.150599922226)),
- ([[5, 15], [20, 20]], (-1.974771239528, 0.063038448651)),
- ([[5, 16], [20, 25]], (-1.722122973346, 0.133329494287)),
- ([[10, 5], [10, 1]], (-1.765469659009, 0.250566655215)),
- ([[5, 0], [1, 4]], (5.477225575052, 0.007812500000)),
- ([[0, 1], [3, 2]], (-1.224744871392, 0.509667991877)),
- ([[0, 2], [6, 4]], (-1.732050807569, 0.197019618792)),
- ([[2, 7], [8, 2]], (-3.079373904042, 0.020172119141)),
- ],
- )
- def test_pooled_param(self, input_sample, expected):
- """The expected values have been generated by R, using a resolution
- for the nuisance parameter of 1e-6 :
- ```R
- library(Barnard)
- options(digits=10)
- barnard.test(43, 40, 10, 39, dp=1e-6, pooled=FALSE)
- ```
- """
- res = barnard_exact(input_sample, pooled=False)
- statistic, pvalue = res.statistic, res.pvalue
- assert_allclose([statistic, pvalue], expected)
- def test_raises(self):
- # test we raise an error for wrong input number of nuisances.
- error_msg = (
- "Number of points `n` must be strictly positive, found 0"
- )
- with assert_raises(ValueError, match=error_msg):
- barnard_exact([[1, 2], [3, 4]], n=0)
- # test we raise an error for wrong shape of input.
- error_msg = "The input `table` must be of shape \\(2, 2\\)."
- with assert_raises(ValueError, match=error_msg):
- barnard_exact(np.arange(6).reshape(2, 3))
- # Test all values must be positives
- error_msg = "All values in `table` must be nonnegative."
- with assert_raises(ValueError, match=error_msg):
- barnard_exact([[-1, 2], [3, 4]])
- # Test value error on wrong alternative param
- error_msg = (
- "`alternative` should be one of {'two-sided', 'less', 'greater'},"
- " found .*"
- )
- with assert_raises(ValueError, match=error_msg):
- barnard_exact([[1, 2], [3, 4]], "not-correct")
- @pytest.mark.parametrize(
- "input_sample,expected",
- [
- ([[0, 0], [4, 3]], (1.0, 0)),
- ],
- )
- def test_edge_cases(self, input_sample, expected):
- res = barnard_exact(input_sample)
- statistic, pvalue = res.statistic, res.pvalue
- assert_equal(pvalue, expected[0])
- assert_equal(statistic, expected[1])
- @pytest.mark.parametrize(
- "input_sample,expected",
- [
- ([[0, 5], [0, 10]], (1.0, np.nan)),
- ([[5, 0], [10, 0]], (1.0, np.nan)),
- ],
- )
- def test_row_or_col_zero(self, input_sample, expected):
- res = barnard_exact(input_sample)
- statistic, pvalue = res.statistic, res.pvalue
- assert_equal(pvalue, expected[0])
- assert_equal(statistic, expected[1])
- @pytest.mark.parametrize(
- "input_sample,expected",
- [
- ([[2, 7], [8, 2]], (-2.518474945157, 0.009886140845)),
- ([[7, 200], [300, 8]], (-21.320036698460, 0.0)),
- ([[21, 28], [1957, 6]], (-30.489638143953, 0.0)),
- ],
- )
- @pytest.mark.parametrize("alternative", ["greater", "less"])
- def test_less_greater(self, input_sample, expected, alternative):
- """
- "The expected values have been generated by R, using a resolution
- for the nuisance parameter of 1e-6 :
- ```R
- library(Barnard)
- options(digits=10)
- a = barnard.test(2, 7, 8, 2, dp=1e-6, pooled=TRUE)
- a$p.value[1]
- ```
- In this test, we are using the "one-sided" return value `a$p.value[1]`
- to test our pvalue.
- """
- expected_stat, less_pvalue_expect = expected
- if alternative == "greater":
- input_sample = np.array(input_sample)[:, ::-1]
- expected_stat = -expected_stat
- res = barnard_exact(input_sample, alternative=alternative)
- statistic, pvalue = res.statistic, res.pvalue
- assert_allclose(
- [statistic, pvalue], [expected_stat, less_pvalue_expect], atol=1e-7
- )
- class TestBoschlooExact:
- """Some tests to show that boschloo_exact() works correctly."""
- ATOL = 1e-7
- @pytest.mark.parametrize(
- "input_sample,expected",
- [
- ([[2, 7], [8, 2]], (0.01852173, 0.009886142)),
- ([[5, 1], [10, 10]], (0.9782609, 0.9450994)),
- ([[5, 16], [20, 25]], (0.08913823, 0.05827348)),
- ([[10, 5], [10, 1]], (0.1652174, 0.08565611)),
- ([[5, 0], [1, 4]], (1, 1)),
- ([[0, 1], [3, 2]], (0.5, 0.34375)),
- ([[2, 7], [8, 2]], (0.01852173, 0.009886142)),
- ([[7, 12], [8, 3]], (0.06406797, 0.03410916)),
- ([[10, 24], [25, 37]], (0.2009359, 0.1512882)),
- ],
- )
- def test_less(self, input_sample, expected):
- """The expected values have been generated by R, using a resolution
- for the nuisance parameter of 1e-8 :
- ```R
- library(Exact)
- options(digits=10)
- data <- matrix(c(43, 10, 40, 39), 2, 2, byrow=TRUE)
- a = exact.test(data, method="Boschloo", alternative="less",
- tsmethod="central", np.interval=TRUE, beta=1e-8)
- ```
- """
- res = boschloo_exact(input_sample, alternative="less")
- statistic, pvalue = res.statistic, res.pvalue
- assert_allclose([statistic, pvalue], expected, atol=self.ATOL)
- @pytest.mark.parametrize(
- "input_sample,expected",
- [
- ([[43, 40], [10, 39]], (0.0002875544, 0.0001615562)),
- ([[2, 7], [8, 2]], (0.9990149, 0.9918327)),
- ([[5, 1], [10, 10]], (0.1652174, 0.09008534)),
- ([[5, 15], [20, 20]], (0.9849087, 0.9706997)),
- ([[5, 16], [20, 25]], (0.972349, 0.9524124)),
- ([[5, 0], [1, 4]], (0.02380952, 0.006865367)),
- ([[0, 1], [3, 2]], (1, 1)),
- ([[0, 2], [6, 4]], (1, 1)),
- ([[2, 7], [8, 2]], (0.9990149, 0.9918327)),
- ([[7, 12], [8, 3]], (0.9895302, 0.9771215)),
- ([[10, 24], [25, 37]], (0.9012936, 0.8633275)),
- ],
- )
- def test_greater(self, input_sample, expected):
- """The expected values have been generated by R, using a resolution
- for the nuisance parameter of 1e-8 :
- ```R
- library(Exact)
- options(digits=10)
- data <- matrix(c(43, 10, 40, 39), 2, 2, byrow=TRUE)
- a = exact.test(data, method="Boschloo", alternative="greater",
- tsmethod="central", np.interval=TRUE, beta=1e-8)
- ```
- """
- res = boschloo_exact(input_sample, alternative="greater")
- statistic, pvalue = res.statistic, res.pvalue
- assert_allclose([statistic, pvalue], expected, atol=self.ATOL)
- @pytest.mark.parametrize(
- "input_sample,expected",
- [
- ([[43, 40], [10, 39]], (0.0002875544, 0.0003231115)),
- ([[2, 7], [8, 2]], (0.01852173, 0.01977228)),
- ([[5, 1], [10, 10]], (0.1652174, 0.1801707)),
- ([[5, 16], [20, 25]], (0.08913823, 0.116547)),
- ([[5, 0], [1, 4]], (0.02380952, 0.01373073)),
- ([[0, 1], [3, 2]], (0.5, 0.6875)),
- ([[2, 7], [8, 2]], (0.01852173, 0.01977228)),
- ([[7, 12], [8, 3]], (0.06406797, 0.06821831)),
- ],
- )
- def test_two_sided(self, input_sample, expected):
- """The expected values have been generated by R, using a resolution
- for the nuisance parameter of 1e-8 :
- ```R
- library(Exact)
- options(digits=10)
- data <- matrix(c(43, 10, 40, 39), 2, 2, byrow=TRUE)
- a = exact.test(data, method="Boschloo", alternative="two.sided",
- tsmethod="central", np.interval=TRUE, beta=1e-8)
- ```
- """
- res = boschloo_exact(input_sample, alternative="two-sided", n=64)
- # Need n = 64 for python 32-bit
- statistic, pvalue = res.statistic, res.pvalue
- assert_allclose([statistic, pvalue], expected, atol=self.ATOL)
- def test_raises(self):
- # test we raise an error for wrong input number of nuisances.
- error_msg = (
- "Number of points `n` must be strictly positive, found 0"
- )
- with assert_raises(ValueError, match=error_msg):
- boschloo_exact([[1, 2], [3, 4]], n=0)
- # test we raise an error for wrong shape of input.
- error_msg = "The input `table` must be of shape \\(2, 2\\)."
- with assert_raises(ValueError, match=error_msg):
- boschloo_exact(np.arange(6).reshape(2, 3))
- # Test all values must be positives
- error_msg = "All values in `table` must be nonnegative."
- with assert_raises(ValueError, match=error_msg):
- boschloo_exact([[-1, 2], [3, 4]])
- # Test value error on wrong alternative param
- error_msg = (
- r"`alternative` should be one of \('two-sided', 'less', "
- r"'greater'\), found .*"
- )
- with assert_raises(ValueError, match=error_msg):
- boschloo_exact([[1, 2], [3, 4]], "not-correct")
- @pytest.mark.parametrize(
- "input_sample,expected",
- [
- ([[0, 5], [0, 10]], (np.nan, np.nan)),
- ([[5, 0], [10, 0]], (np.nan, np.nan)),
- ],
- )
- def test_row_or_col_zero(self, input_sample, expected):
- res = boschloo_exact(input_sample)
- statistic, pvalue = res.statistic, res.pvalue
- assert_equal(pvalue, expected[0])
- assert_equal(statistic, expected[1])
- def test_two_sided_gt_1(self):
- # Check that returned p-value does not exceed 1 even when twice
- # the minimum of the one-sided p-values does. See gh-15345.
- tbl = [[1, 1], [13, 12]]
- pl = boschloo_exact(tbl, alternative='less').pvalue
- pg = boschloo_exact(tbl, alternative='greater').pvalue
- assert 2*min(pl, pg) > 1
- pt = boschloo_exact(tbl, alternative='two-sided').pvalue
- assert pt == 1.0
- @pytest.mark.parametrize("alternative", ("less", "greater"))
- def test_against_fisher_exact(self, alternative):
- # Check that the statistic of `boschloo_exact` is the same as the
- # p-value of `fisher_exact` (for one-sided tests). See gh-15345.
- tbl = [[2, 7], [8, 2]]
- boschloo_stat = boschloo_exact(tbl, alternative=alternative).statistic
- fisher_p = stats.fisher_exact(tbl, alternative=alternative)[1]
- assert_allclose(boschloo_stat, fisher_p)
- @make_xp_test_case(cramervonmises_2samp)
- class TestCvm_2samp:
- @pytest.mark.parametrize('args', [([], np.arange(5)),
- (np.arange(5), [1])])
- @pytest.mark.skip_xp_backends("jax.numpy", reason="lazy -> no axis_nan_policy")
- def test_too_small_input(self, args, xp):
- args = (xp.asarray(arg, dtype=xp_default_dtype(xp)) for arg in args)
- with eager_warns(SmallSampleWarning, match=too_small_1d_not_omit, xp=xp):
- res = cramervonmises_2samp(*args)
- xp_assert_equal(res.statistic, xp.asarray(xp.nan))
- xp_assert_equal(res.pvalue, xp.asarray(xp.nan))
- def test_invalid_input(self, xp):
- y = xp.arange(5)
- msg = 'method must be either auto, exact or asymptotic'
- with pytest.raises(ValueError, match=msg):
- cramervonmises_2samp(y, y, 'xyz')
- def test_list_input(self): # list input only relevant for NumPy
- x = [2, 3, 4, 7, 6]
- y = [0.2, 0.7, 12, 18]
- r1 = cramervonmises_2samp(x, y)
- r2 = cramervonmises_2samp(np.array(x), np.array(y))
- assert_equal((r1.statistic, r1.pvalue), (r2.statistic, r2.pvalue))
- @pytest.mark.parametrize('dtype', [None, 'float32', 'float64'])
- def test_example_conover(self, dtype, xp):
- # Example 2 in Section 6.2 of W.J. Conover: Practical Nonparametric
- # Statistics, 1971.
- if is_numpy(xp) and xp.__version__ < "2.0" and dtype == 'float32':
- pytest.skip("Pre-NEP 50 doesn't respect dtypes")
- dtype = xp_default_dtype(xp) if dtype is None else getattr(xp, dtype)
- x = xp.asarray([7.6, 8.4, 8.6, 8.7, 9.3, 9.9, 10.1, 10.6, 11.2], dtype=dtype)
- y = xp.asarray([5.2, 5.7, 5.9, 6.5, 6.8, 8.2, 9.1, 9.8,
- 10.8, 11.3, 11.5, 12.3, 12.5, 13.4, 14.6], dtype=dtype)
- r = cramervonmises_2samp(x, y)
- xp_assert_close(r.statistic, xp.asarray(0.262, dtype=dtype), atol=1e-3)
- xp_assert_close(r.pvalue, xp.asarray(.18, dtype=dtype), atol=1e-2)
- @pytest.mark.parametrize('statistic, m, n, pval',
- [(710, 5, 6, 48./462),
- (1897, 7, 7, 117./1716),
- (576, 4, 6, 2./210),
- (1764, 6, 7, 2./1716)])
- def test_exact_pvalue(self, statistic, m, n, pval): # only implemented w/ NumPy
- # the exact values are taken from Anderson: On the distribution of the
- # two-sample Cramer-von-Mises criterion, 1962.
- # The values are taken from Table 2, 3, 4 and 5
- assert_equal(_pval_cvm_2samp_exact(statistic, m, n), pval)
- @pytest.mark.xslow
- def test_large_sample(self, xp):
- # for large samples, the statistic U gets very large
- # do a sanity check that p-value is not 0, 1 or nan
- rng = np.random.default_rng(4367)
- x = distributions.norm.rvs(size=1000000, random_state=rng)
- y = distributions.norm.rvs(size=900000, random_state=rng)
- x, y = xp.asarray(x), xp.asarray(y)
- r = cramervonmises_2samp(x, y)
- assert 0 < r.pvalue < 1
- r = cramervonmises_2samp(x, y+0.1)
- assert 0 < r.pvalue < 1
- def test_exact_vs_asymptotic(self, xp):
- rng = np.random.RandomState(0)
- x = xp.asarray(rng.random(7))
- y = xp.asarray(rng.random(8))
- r1 = cramervonmises_2samp(x, y, method='exact')
- r2 = cramervonmises_2samp(x, y, method='asymptotic')
- xp_assert_equal(r1.statistic, r2.statistic)
- xp_assert_close(r1.pvalue, r2.pvalue, atol=1e-2)
- def test_method_auto(self, xp):
- x = xp.arange(20.)
- y = xp.asarray([0.5, 4.7, 13.1])
- r1 = cramervonmises_2samp(x, y, method='exact')
- r2 = cramervonmises_2samp(x, y, method='auto')
- xp_assert_equal(r1.pvalue, r2.pvalue)
- # switch to asymptotic if one sample has more than 20 observations
- x = xp.arange(21.)
- r1 = cramervonmises_2samp(x, y, method='asymptotic')
- r2 = cramervonmises_2samp(x, y, method='auto')
- xp_assert_equal(r1.pvalue, r2.pvalue)
- def test_same_input(self, xp):
- # make sure trivial edge case can be handled
- # note that _cdf_cvm_inf(0) = nan. implementation avoids nan by
- # returning pvalue=1 for very small values of the statistic
- x = xp.arange(15)
- res = cramervonmises_2samp(x, x)
- xp_assert_equal(res.statistic, xp.asarray(0.))
- xp_assert_equal(res.pvalue, xp.asarray(1.))
- # check exact p-value
- res = cramervonmises_2samp(x[:4], x[:4])
- xp_assert_equal(res.statistic, xp.asarray(0.))
- xp_assert_equal(res.pvalue, xp.asarray(1.))
- class TestTukeyHSD:
- data_same_size = ([24.5, 23.5, 26.4, 27.1, 29.9],
- [28.4, 34.2, 29.5, 32.2, 30.1],
- [26.1, 28.3, 24.3, 26.2, 27.8])
- data_diff_size = ([24.5, 23.5, 26.28, 26.4, 27.1, 29.9, 30.1, 30.1],
- [28.4, 34.2, 29.5, 32.2, 30.1],
- [26.1, 28.3, 24.3, 26.2, 27.8])
- extreme_size = ([24.5, 23.5, 26.4],
- [28.4, 34.2, 29.5, 32.2, 30.1, 28.4, 34.2, 29.5, 32.2,
- 30.1],
- [26.1, 28.3, 24.3, 26.2, 27.8])
- sas_same_size = """
- Comparison LowerCL Difference UpperCL Significance
- 2 - 3 0.6908830568 4.34 7.989116943 1
- 2 - 1 0.9508830568 4.6 8.249116943 1
- 3 - 2 -7.989116943 -4.34 -0.6908830568 1
- 3 - 1 -3.389116943 0.26 3.909116943 0
- 1 - 2 -8.249116943 -4.6 -0.9508830568 1
- 1 - 3 -3.909116943 -0.26 3.389116943 0
- """
- sas_diff_size = """
- Comparison LowerCL Difference UpperCL Significance
- 2 - 1 0.2679292645 3.645 7.022070736 1
- 2 - 3 0.5934764007 4.34 8.086523599 1
- 1 - 2 -7.022070736 -3.645 -0.2679292645 1
- 1 - 3 -2.682070736 0.695 4.072070736 0
- 3 - 2 -8.086523599 -4.34 -0.5934764007 1
- 3 - 1 -4.072070736 -0.695 2.682070736 0
- """
- sas_extreme = """
- Comparison LowerCL Difference UpperCL Significance
- 2 - 3 1.561605075 4.34 7.118394925 1
- 2 - 1 2.740784879 6.08 9.419215121 1
- 3 - 2 -7.118394925 -4.34 -1.561605075 1
- 3 - 1 -1.964526566 1.74 5.444526566 0
- 1 - 2 -9.419215121 -6.08 -2.740784879 1
- 1 - 3 -5.444526566 -1.74 1.964526566 0
- """
- @pytest.mark.parametrize("data,res_expect_str,atol",
- ((data_same_size, sas_same_size, 1e-4),
- (data_diff_size, sas_diff_size, 1e-4),
- (extreme_size, sas_extreme, 1e-10),
- ),
- ids=["equal size sample",
- "unequal sample size",
- "extreme sample size differences"])
- def test_compare_sas(self, data, res_expect_str, atol):
- '''
- SAS code used to generate results for each sample:
- DATA ACHE;
- INPUT BRAND RELIEF;
- CARDS;
- 1 24.5
- ...
- 3 27.8
- ;
- ods graphics on; ODS RTF;ODS LISTING CLOSE;
- PROC ANOVA DATA=ACHE;
- CLASS BRAND;
- MODEL RELIEF=BRAND;
- MEANS BRAND/TUKEY CLDIFF;
- TITLE 'COMPARE RELIEF ACROSS MEDICINES - ANOVA EXAMPLE';
- ods output CLDiffs =tc;
- proc print data=tc;
- format LowerCL 17.16 UpperCL 17.16 Difference 17.16;
- title "Output with many digits";
- RUN;
- QUIT;
- ODS RTF close;
- ODS LISTING;
- '''
- res_expect = np.asarray(res_expect_str.replace(" - ", " ").split()[5:],
- dtype=float).reshape((6, 6))
- res_tukey = stats.tukey_hsd(*data)
- conf = res_tukey.confidence_interval()
- # loop over the comparisons
- for i, j, l, s, h, sig in res_expect:
- i, j = int(i) - 1, int(j) - 1
- assert_allclose(conf.low[i, j], l, atol=atol)
- assert_allclose(res_tukey.statistic[i, j], s, atol=atol)
- assert_allclose(conf.high[i, j], h, atol=atol)
- assert_allclose((res_tukey.pvalue[i, j] <= .05), sig == 1)
- matlab_sm_siz = """
- 1 2 -8.2491590248597 -4.6 -0.9508409751403 0.0144483269098
- 1 3 -3.9091590248597 -0.26 3.3891590248597 0.9803107240900
- 2 3 0.6908409751403 4.34 7.9891590248597 0.0203311368795
- """
- matlab_diff_sz = """
- 1 2 -7.02207069748501 -3.645 -0.26792930251500 0.03371498443080
- 1 3 -2.68207069748500 0.695 4.07207069748500 0.85572267328807
- 2 3 0.59347644287720 4.34 8.08652355712281 0.02259047020620
- """
- @pytest.mark.parametrize("data,res_expect_str,atol",
- ((data_same_size, matlab_sm_siz, 1e-12),
- (data_diff_size, matlab_diff_sz, 1e-7)),
- ids=["equal size sample",
- "unequal size sample"])
- def test_compare_matlab(self, data, res_expect_str, atol):
- """
- vals = [24.5, 23.5, 26.4, 27.1, 29.9, 28.4, 34.2, 29.5, 32.2, 30.1,
- 26.1, 28.3, 24.3, 26.2, 27.8]
- names = {'zero', 'zero', 'zero', 'zero', 'zero', 'one', 'one', 'one',
- 'one', 'one', 'two', 'two', 'two', 'two', 'two'}
- [p,t,stats] = anova1(vals,names,"off");
- [c,m,h,nms] = multcompare(stats, "CType","hsd");
- """
- res_expect = np.asarray(res_expect_str.split(),
- dtype=float).reshape((3, 6))
- res_tukey = stats.tukey_hsd(*data)
- conf = res_tukey.confidence_interval()
- # loop over the comparisons
- for i, j, l, s, h, p in res_expect:
- i, j = int(i) - 1, int(j) - 1
- assert_allclose(conf.low[i, j], l, atol=atol)
- assert_allclose(res_tukey.statistic[i, j], s, atol=atol)
- assert_allclose(conf.high[i, j], h, atol=atol)
- assert_allclose(res_tukey.pvalue[i, j], p, atol=atol)
- def test_compare_r(self):
- """
- Testing against results and p-values from R:
- from: https://www.rdocumentation.org/packages/stats/versions/3.6.2/
- topics/TukeyHSD
- > require(graphics)
- > summary(fm1 <- aov(breaks ~ tension, data = warpbreaks))
- > TukeyHSD(fm1, "tension", ordered = TRUE)
- > plot(TukeyHSD(fm1, "tension"))
- Tukey multiple comparisons of means
- 95% family-wise confidence level
- factor levels have been ordered
- Fit: aov(formula = breaks ~ tension, data = warpbreaks)
- $tension
- """
- str_res = """
- diff lwr upr p adj
- 2 - 3 4.722222 -4.8376022 14.28205 0.4630831
- 1 - 3 14.722222 5.1623978 24.28205 0.0014315
- 1 - 2 10.000000 0.4401756 19.55982 0.0384598
- """
- res_expect = np.asarray(str_res.replace(" - ", " ").split()[5:],
- dtype=float).reshape((3, 6))
- data = ([26, 30, 54, 25, 70, 52, 51, 26, 67,
- 27, 14, 29, 19, 29, 31, 41, 20, 44],
- [18, 21, 29, 17, 12, 18, 35, 30, 36,
- 42, 26, 19, 16, 39, 28, 21, 39, 29],
- [36, 21, 24, 18, 10, 43, 28, 15, 26,
- 20, 21, 24, 17, 13, 15, 15, 16, 28])
- res_tukey = stats.tukey_hsd(*data)
- conf = res_tukey.confidence_interval()
- # loop over the comparisons
- for i, j, s, l, h, p in res_expect:
- i, j = int(i) - 1, int(j) - 1
- # atols are set to the number of digits present in the r result.
- assert_allclose(conf.low[i, j], l, atol=1e-7)
- assert_allclose(res_tukey.statistic[i, j], s, atol=1e-6)
- assert_allclose(conf.high[i, j], h, atol=1e-5)
- assert_allclose(res_tukey.pvalue[i, j], p, atol=1e-7)
- def test_engineering_stat_handbook(self):
- '''
- Example sourced from:
- https://www.itl.nist.gov/div898/handbook/prc/section4/prc471.htm
- '''
- group1 = [6.9, 5.4, 5.8, 4.6, 4.0]
- group2 = [8.3, 6.8, 7.8, 9.2, 6.5]
- group3 = [8.0, 10.5, 8.1, 6.9, 9.3]
- group4 = [5.8, 3.8, 6.1, 5.6, 6.2]
- res = stats.tukey_hsd(group1, group2, group3, group4)
- conf = res.confidence_interval()
- lower = np.asarray([
- [0, 0, 0, -2.25],
- [.29, 0, -2.93, .13],
- [1.13, 0, 0, .97],
- [0, 0, 0, 0]])
- upper = np.asarray([
- [0, 0, 0, 1.93],
- [4.47, 0, 1.25, 4.31],
- [5.31, 0, 0, 5.15],
- [0, 0, 0, 0]])
- for (i, j) in [(1, 0), (2, 0), (0, 3), (1, 2), (2, 3)]:
- assert_allclose(conf.low[i, j], lower[i, j], atol=1e-2)
- assert_allclose(conf.high[i, j], upper[i, j], atol=1e-2)
- def test_rand_symm(self):
- # test some expected identities of the results
- rng = np.random.default_rng(2699550179)
- data = rng.random((3, 100))
- res = stats.tukey_hsd(*data)
- conf = res.confidence_interval()
- # the confidence intervals should be negated symmetric of each other
- assert_equal(conf.low, -conf.high.T)
- # the `high` and `low` center diagonals should be the same since the
- # mean difference in a self comparison is 0.
- assert_equal(np.diagonal(conf.high), conf.high[0, 0])
- assert_equal(np.diagonal(conf.low), conf.low[0, 0])
- # statistic array should be antisymmetric with zeros on the diagonal
- assert_equal(res.statistic, -res.statistic.T)
- assert_equal(np.diagonal(res.statistic), 0)
- # p-values should be symmetric and 1 when compared to itself
- assert_equal(res.pvalue, res.pvalue.T)
- assert_equal(np.diagonal(res.pvalue), 1)
- def test_no_inf(self):
- with assert_raises(ValueError, match="...must be finite."):
- stats.tukey_hsd([1, 2, 3], [2, np.inf], [6, 7, 3])
- def test_is_1d(self):
- with assert_raises(ValueError, match="...must be one-dimensional"):
- stats.tukey_hsd([[1, 2], [2, 3]], [2, 5], [5, 23, 6])
- def test_no_empty(self):
- with assert_raises(ValueError, match="...must be greater than one"):
- stats.tukey_hsd([], [2, 5], [4, 5, 6])
- def test_equal_var_input_validation(self):
- msg = "Expected a boolean value for 'equal_var'"
- with assert_raises(TypeError, match=msg):
- stats.tukey_hsd([1, 2, 3], [2, 5], [6, 7], equal_var="False")
- @pytest.mark.parametrize("nargs", (0, 1))
- def test_not_enough_treatments(self, nargs):
- with assert_raises(ValueError, match="...more than 1 treatment."):
- stats.tukey_hsd(*([[23, 7, 3]] * nargs))
- @pytest.mark.parametrize("cl", [-.5, 0, 1, 2])
- def test_conf_level_invalid(self, cl):
- with assert_raises(ValueError, match="must be between 0 and 1"):
- r = stats.tukey_hsd([23, 7, 3], [3, 4], [9, 4])
- r.confidence_interval(cl)
- def test_2_args_ttest(self):
- # that with 2 treatments the `pvalue` is equal to that of `ttest_ind`
- res_tukey = stats.tukey_hsd(*self.data_diff_size[:2])
- res_ttest = stats.ttest_ind(*self.data_diff_size[:2])
- assert_allclose(res_ttest.pvalue, res_tukey.pvalue[0, 1])
- assert_allclose(res_ttest.pvalue, res_tukey.pvalue[1, 0])
- class TestGamesHowell:
- # data with unequal variances
- data_same_size = ([24., 23., 31., 51.],
- [34., 18., 18., 26.],
- [17., 68., 59., 7.])
- data_diff_size = ([30., 23., 51.],
- [-81., 71., -27., 63.],
- [42., 11., 29., 19., 50.],
- [23., 22., 20., 18., 9.])
- spss_same_size = """
- Mean Diff Lower Bound Upper Bound Sig
- 0 - 1 8.25000000 -16.5492749527311 33.0492749527311 0.558733632413559
- 0 - 2 -5.50000000 -63.6702454316458 52.6702454316458 0.941147750599221
- 1 - 2 -13.7500000 -74.3174374251372 46.8174374251372 0.682983914946841
- """
- spss_diff_size = """
- Mean Diff Lower Bound Upper Bound Sig
- 0 - 1 28.16666667 -141.985416377670 198.318749711003 0.8727542747886180
- 0 - 2 4.466666667 -37.2830676783904 46.2164010117237 0.9752628408671710
- 0 - 3 16.26666667 -35.0933112382470 67.6266445715803 0.4262506151302880
- 1 - 2 -23.70000000 -195.315617201249 147.915617201249 0.9148950609000590
- 1 - 3 -11.90000000 -188.105478728519 164.305478728519 0.9861432250093960
- 2 - 3 11.80000000 -16.2894857524254 39.8894857524254 0.4755344436335670
- """
- @pytest.mark.xslow
- @pytest.mark.parametrize("data, res_expect_str",
- ((data_same_size, spss_same_size),
- (data_diff_size, spss_diff_size)),
- ids=["equal size sample",
- "unequal sample size"])
- def test_compare_spss(self, data, res_expect_str):
- """
- DATA LIST LIST /Group (F1.0) Value (F8.2).
- BEGIN DATA
- 0 24
- 0 23
- 0 31
- 0 51
- 1 34
- 1 18
- 1 18
- 1 26
- 2 17
- 2 68
- 2 59
- 2 7
- END DATA.
- ONEWAY Value BY Group
- /MISSING ANALYSIS
- /POSTHOC=GH ALPHA(0.05).
- """
- res_expect = np.asarray(
- res_expect_str.replace(" - ", " ").split()[7:],
- dtype=float).reshape(-1, 6)
- res_games = stats.tukey_hsd(*data, equal_var=False)
- conf = res_games.confidence_interval()
- # loop over the comparisons
- for i, j, s, l, h, p in res_expect:
- i, j = int(i), int(j)
- assert_allclose(res_games.statistic[i, j], s, atol=1e-8)
- assert_allclose(res_games.pvalue[i, j], p, atol=1e-8)
- assert_allclose(conf.low[i, j], l, atol=1e-6)
- assert_allclose(conf.high[i, j], h, atol=1e-5)
- r_same_size = """
- q value Pr(>|q|)
- 1 - 0 == 0 -1.5467805948856344 0.55873362851759
- 2 - 0 == 0 0.4726721776628535 0.94114775035993
- 2 - 1 == 0 1.246837541297872 0.68298393799782
- """
- r_diff_size = """
- q value Pr(>|q|)
- 1 - 0 == 0 -1.0589317485313876 0.87275427357438
- 2 - 0 == 0 -0.5716222106144833 0.97526284087419
- 3 - 0 == 0 -2.6209678382077000 0.42625067714691
- 2 - 1 == 0 0.8971899898179028 0.91489506061850
- 3 - 1 == 0 0.4579447210555352 0.98614322544695
- 3 - 2 == 0 -2.198800177874794 0.47553444364614
- """
- @pytest.mark.parametrize("data, res_expect_str",
- ((data_same_size, r_same_size),
- (data_diff_size, r_diff_size)),
- ids=["equal size sample",
- "unequal sample size"])
- def test_compare_r(self, data, res_expect_str):
- """
- games-howell is provided by PMCMRplus package
- https://search.r-project.org/CRAN/refmans/PMCMRplus/html/gamesHowellTest.html
- > library("PMCMRplus")
- > options(digits=16)
- > table = data.frame(
- values = c(24., 23., 31., 51., 34., 18., 18., 26., 17., 68., 59., 7.),
- groups = c("0", "0", "0", "0", "1", "1", "1", "1", "2", "2", "2", "2")
- )
- > table$groups = as.factor(table$groups)
- > fit <-aov(values ~ groups, table)
- > res = gamesHowellTest(fit)
- > summary(res)
- """
- res_expect = np.asarray(
- res_expect_str.replace(" - ", " ")
- .replace(" == ", " ").split()[3:],
- dtype=float).reshape(-1, 5)
- res_games = stats.tukey_hsd(*data, equal_var=False)
- # loop over the comparisons
- # note confidence intervals are not provided by PMCMRplus
- for j, i, _, _, p in res_expect:
- i, j = int(i), int(j)
- assert_allclose(res_games.pvalue[i, j], p, atol=1e-7)
- # Data validation test has been covered by TestTukeyHSD
- # like empty, 1d, inf, and lack of tretments
- # because games_howell leverage _tukey_hsd_iv()
- class TestPoissonMeansTest:
- @pytest.mark.parametrize("c1, n1, c2, n2, p_expect", (
- # example from [1], 6. Illustrative examples: Example 1
- [0, 100, 3, 100, 0.0884],
- [2, 100, 6, 100, 0.1749]
- ))
- def test_paper_examples(self, c1, n1, c2, n2, p_expect):
- res = stats.poisson_means_test(c1, n1, c2, n2)
- assert_allclose(res.pvalue, p_expect, atol=1e-4)
- @pytest.mark.parametrize("c1, n1, c2, n2, p_expect, alt, d", (
- # These test cases are produced by the wrapped fortran code from the
- # original authors. Using a slightly modified version of this fortran,
- # found here, https://github.com/nolanbconaway/poisson-etest,
- # additional tests were created.
- [20, 10, 20, 10, 0.9999997568929630, 'two-sided', 0],
- [10, 10, 10, 10, 0.9999998403241203, 'two-sided', 0],
- [50, 15, 1, 1, 0.09920321053409643, 'two-sided', .05],
- [3, 100, 20, 300, 0.12202725450896404, 'two-sided', 0],
- [3, 12, 4, 20, 0.40416087318539173, 'greater', 0],
- [4, 20, 3, 100, 0.008053640402974236, 'greater', 0],
- # publishing paper does not include a `less` alternative,
- # so it was calculated with switched argument order and
- # alternative="greater"
- [4, 20, 3, 10, 0.3083216325432898, 'less', 0],
- [1, 1, 50, 15, 0.09322998607245102, 'less', 0]
- ))
- def test_fortran_authors(self, c1, n1, c2, n2, p_expect, alt, d):
- res = stats.poisson_means_test(c1, n1, c2, n2, alternative=alt, diff=d)
- assert_allclose(res.pvalue, p_expect, atol=2e-6, rtol=1e-16)
- def test_different_results(self):
- # The implementation in Fortran is known to break down at higher
- # counts and observations, so we expect different results. By
- # inspection we can infer the p-value to be near one.
- count1, count2 = 10000, 10000
- nobs1, nobs2 = 10000, 10000
- res = stats.poisson_means_test(count1, nobs1, count2, nobs2)
- assert_allclose(res.pvalue, 1)
- def test_less_than_zero_lambda_hat2(self):
- # demonstrates behavior that fixes a known fault from original Fortran.
- # p-value should clearly be near one.
- count1, count2 = 0, 0
- nobs1, nobs2 = 1, 1
- res = stats.poisson_means_test(count1, nobs1, count2, nobs2)
- assert_allclose(res.pvalue, 1)
- def test_input_validation(self):
- count1, count2 = 0, 0
- nobs1, nobs2 = 1, 1
- # test non-integral events
- message = '`k1` and `k2` must be integers.'
- with assert_raises(TypeError, match=message):
- stats.poisson_means_test(.7, nobs1, count2, nobs2)
- with assert_raises(TypeError, match=message):
- stats.poisson_means_test(count1, nobs1, .7, nobs2)
- # test negative events
- message = '`k1` and `k2` must be greater than or equal to 0.'
- with assert_raises(ValueError, match=message):
- stats.poisson_means_test(-1, nobs1, count2, nobs2)
- with assert_raises(ValueError, match=message):
- stats.poisson_means_test(count1, nobs1, -1, nobs2)
- # test negative sample size
- message = '`n1` and `n2` must be greater than 0.'
- with assert_raises(ValueError, match=message):
- stats.poisson_means_test(count1, -1, count2, nobs2)
- with assert_raises(ValueError, match=message):
- stats.poisson_means_test(count1, nobs1, count2, -1)
- # test negative difference
- message = 'diff must be greater than or equal to 0.'
- with assert_raises(ValueError, match=message):
- stats.poisson_means_test(count1, nobs1, count2, nobs2, diff=-1)
- # test invalid alternative
- message = 'Alternative must be one of ...'
- with assert_raises(ValueError, match=message):
- stats.poisson_means_test(1, 2, 1, 2, alternative='error')
- class TestBWSTest:
- def test_bws_input_validation(self):
- rng = np.random.default_rng(4571775098104213308)
- x, y = rng.random(size=(2, 7))
- message = '`x` and `y` must be exactly one-dimensional.'
- with pytest.raises(ValueError, match=message):
- stats.bws_test([x, x], [y, y])
- message = '`x` and `y` must not contain NaNs.'
- with pytest.raises(ValueError, match=message):
- stats.bws_test([np.nan], y)
- message = '`x` and `y` must be of nonzero size.'
- with pytest.raises(ValueError, match=message):
- stats.bws_test(x, [])
- message = 'alternative` must be one of...'
- with pytest.raises(ValueError, match=message):
- stats.bws_test(x, y, alternative='ekki-ekki')
- message = 'method` must be an instance of...'
- with pytest.raises(ValueError, match=message):
- stats.bws_test(x, y, method=42)
- def test_against_published_reference(self):
- # Test against Example 2 in bws_test Reference [1], pg 9
- # https://link.springer.com/content/pdf/10.1007/BF02762032.pdf
- x = [1, 2, 3, 4, 6, 7, 8]
- y = [5, 9, 10, 11, 12, 13, 14]
- res = stats.bws_test(x, y, alternative='two-sided')
- assert_allclose(res.statistic, 5.132, atol=1e-3)
- assert_equal(res.pvalue, 10/3432)
- @pytest.mark.parametrize(('alternative', 'statistic', 'pvalue'),
- [('two-sided', 1.7510204081633, 0.1264422777777),
- ('less', -1.7510204081633, 0.05754662004662),
- ('greater', -1.7510204081633, 0.9424533799534)])
- def test_against_R(self, alternative, statistic, pvalue):
- # Test against R library BWStest function bws_test
- # library(BWStest)
- # options(digits=16)
- # x = c(...)
- # y = c(...)
- # bws_test(x, y, alternative='two.sided')
- rng = np.random.default_rng(4571775098104213308)
- x, y = rng.random(size=(2, 7))
- res = stats.bws_test(x, y, alternative=alternative)
- assert_allclose(res.statistic, statistic, rtol=1e-13)
- assert_allclose(res.pvalue, pvalue, atol=1e-2, rtol=1e-1)
- @pytest.mark.parametrize(('alternative', 'statistic', 'pvalue'),
- [('two-sided', 1.142629265891, 0.2903950180801),
- ('less', 0.99629665877411, 0.8545660222131),
- ('greater', 0.99629665877411, 0.1454339777869)])
- def test_against_R_imbalanced(self, alternative, statistic, pvalue):
- # Test against R library BWStest function bws_test
- # library(BWStest)
- # options(digits=16)
- # x = c(...)
- # y = c(...)
- # bws_test(x, y, alternative='two.sided')
- rng = np.random.default_rng(5429015622386364034)
- x = rng.random(size=9)
- y = rng.random(size=8)
- res = stats.bws_test(x, y, alternative=alternative)
- assert_allclose(res.statistic, statistic, rtol=1e-13)
- assert_allclose(res.pvalue, pvalue, atol=1e-2, rtol=1e-1)
- def test_method(self):
- # Test that `method` parameter has the desired effect
- rng = np.random.default_rng(1520514347193347862)
- x, y = rng.random(size=(2, 10))
- rng = np.random.default_rng(1520514347193347862)
- method = stats.PermutationMethod(n_resamples=10, rng=rng)
- res1 = stats.bws_test(x, y, method=method)
- assert len(res1.null_distribution) == 10
- rng = np.random.default_rng(1520514347193347862)
- method = stats.PermutationMethod(n_resamples=10, rng=rng)
- res2 = stats.bws_test(x, y, method=method)
- assert_allclose(res1.null_distribution, res2.null_distribution)
- rng = np.random.default_rng(5205143471933478621)
- method = stats.PermutationMethod(n_resamples=10, rng=rng)
- res3 = stats.bws_test(x, y, method=method)
- assert not np.allclose(res3.null_distribution, res1.null_distribution)
- def test_directions(self):
- # Sanity check of the sign of the one-sided statistic
- rng = np.random.default_rng(1520514347193347862)
- x = rng.random(size=5)
- y = x - 1
- res = stats.bws_test(x, y, alternative='greater')
- assert res.statistic > 0
- assert_equal(res.pvalue, 1 / len(res.null_distribution))
- res = stats.bws_test(x, y, alternative='less')
- assert res.statistic > 0
- assert_equal(res.pvalue, 1)
- res = stats.bws_test(y, x, alternative='less')
- assert res.statistic < 0
- assert_equal(res.pvalue, 1 / len(res.null_distribution))
- res = stats.bws_test(y, x, alternative='greater')
- assert res.statistic < 0
- assert_equal(res.pvalue, 1)
|