test_resampling.py 90 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046
  1. import warnings
  2. import pytest
  3. import numpy as np
  4. from numpy.testing import assert_allclose, assert_equal
  5. from scipy._lib._util import rng_integers
  6. from scipy._lib._array_api import (is_numpy, make_xp_test_case, xp_default_dtype,
  7. xp_size, array_namespace, _xp_copy_to_numpy)
  8. from scipy._lib._array_api_no_0d import xp_assert_close, xp_assert_equal
  9. from scipy._lib import array_api_extra as xpx
  10. from scipy import stats, special
  11. from scipy.fft.tests.test_fftlog import skip_xp_backends
  12. from scipy.optimize import root
  13. from scipy.stats import bootstrap, monte_carlo_test, permutation_test, power
  14. import scipy.stats._resampling as _resampling
  15. @make_xp_test_case(bootstrap)
  16. class TestBootstrap:
  17. @skip_xp_backends('numpy', reason='NumPy does not raise')
  18. def test_bootstrap_iv_other(self, xp):
  19. message = f"When using array library {xp.__name__}"
  20. with pytest.raises(TypeError, match=message):
  21. bootstrap((xp.asarray([1, 2, 3]),), lambda x: xp.mean(x))
  22. def test_bootstrap_iv(self, xp):
  23. sample = xp.asarray([1, 2, 3])
  24. message = "`data` must contain at least one sample."
  25. with pytest.raises(ValueError, match=message):
  26. bootstrap(tuple(), xp.mean)
  27. message = "each sample in `data` must contain two or more observations..."
  28. with pytest.raises(ValueError, match=message):
  29. bootstrap((sample, xp.asarray([1])), xp.mean)
  30. message = ("When `paired is True`, all samples must have the same length ")
  31. with pytest.raises(ValueError, match=message):
  32. bootstrap((sample, xp.asarray([1, 2, 3, 4])), xp.mean, paired=True)
  33. message = "`vectorized` must be `True`, `False`, or `None`."
  34. with pytest.raises(ValueError, match=message):
  35. bootstrap(sample, xp.mean, vectorized='ekki')
  36. message = "`axis` must be an integer."
  37. with pytest.raises(ValueError, match=message):
  38. bootstrap((sample,), xp.mean, axis=1.5)
  39. message = "could not convert string to float"
  40. with pytest.raises(ValueError, match=message):
  41. bootstrap((sample,), xp.mean, confidence_level='ni')
  42. message = "`n_resamples` must be a non-negative integer."
  43. with pytest.raises(ValueError, match=message):
  44. bootstrap((sample,), xp.mean, n_resamples=-1000)
  45. message = "`n_resamples` must be a non-negative integer."
  46. with pytest.raises(ValueError, match=message):
  47. bootstrap((sample,), xp.mean, n_resamples=1000.5)
  48. message = "`batch` must be a positive integer or None."
  49. with pytest.raises(ValueError, match=message):
  50. bootstrap((sample,), xp.mean, batch=-1000)
  51. message = "`batch` must be a positive integer or None."
  52. with pytest.raises(ValueError, match=message):
  53. bootstrap((sample,), xp.mean, batch=1000.5)
  54. message = "`method` must be in"
  55. with pytest.raises(ValueError, match=message):
  56. bootstrap((sample,), xp.mean, method='ekki')
  57. message = "`bootstrap_result` must have attribute `bootstrap_distribution'"
  58. with pytest.raises(ValueError, match=message):
  59. bootstrap((sample,), xp.mean, bootstrap_result=10)
  60. message = "Either `bootstrap_result.bootstrap_distribution.size`"
  61. with pytest.raises(ValueError, match=message):
  62. bootstrap((sample,), xp.mean, n_resamples=0)
  63. message = "SeedSequence expects int or sequence of ints"
  64. with pytest.raises(TypeError, match=message):
  65. bootstrap((sample,), xp.mean, rng='herring')
  66. @pytest.mark.parametrize("method", ['basic', 'percentile', 'BCa'])
  67. @pytest.mark.parametrize("axis", [0, 1, 2])
  68. def test_bootstrap_batch(self, method, axis, xp):
  69. # for one-sample statistics, batch size shouldn't affect the result
  70. rng = np.random.RandomState(0)
  71. x = rng.rand(10, 11, 12)
  72. # SPEC-007 leave one call with random_state to ensure it still works
  73. res1 = bootstrap((xp.asarray(x),), xp.mean, batch=None, method=method,
  74. random_state=0, axis=axis, n_resamples=100)
  75. rng = np.random.RandomState(0)
  76. res2 = bootstrap((xp.asarray(x),), xp.mean, batch=10, method=method,
  77. axis=axis, n_resamples=100, random_state=rng)
  78. xp_assert_equal(res2.confidence_interval.low, res1.confidence_interval.low)
  79. xp_assert_equal(res2.confidence_interval.high, res1.confidence_interval.high)
  80. xp_assert_equal(res2.standard_error, res1.standard_error)
  81. @pytest.mark.parametrize("method", ['basic', 'percentile', 'BCa'])
  82. def test_bootstrap_paired(self, method, xp):
  83. # test that `paired` works as expected
  84. rng = np.random.RandomState(0)
  85. n = 100
  86. x = xp.asarray(rng.rand(n))
  87. y = xp.asarray(rng.rand(n))
  88. def my_statistic(x, y, axis=-1):
  89. return xp.mean((x-y)**2, axis=axis)
  90. def my_paired_statistic(i, axis=-1):
  91. a = x[i]
  92. b = y[i]
  93. res = my_statistic(a, b)
  94. return res
  95. i = xp.arange(x.shape[0])
  96. res1 = bootstrap((i,), my_paired_statistic, rng=0)
  97. res2 = bootstrap((x, y), my_statistic, paired=True, rng=0)
  98. xp_assert_close(res1.confidence_interval.low, res2.confidence_interval.low)
  99. xp_assert_close(res1.confidence_interval.high, res2.confidence_interval.high)
  100. xp_assert_close(res1.standard_error, res2.standard_error)
  101. @pytest.mark.parametrize("method", ['basic', 'percentile', 'BCa'])
  102. @pytest.mark.parametrize("axis", [0, 1, 2])
  103. @pytest.mark.parametrize("paired", [True, False])
  104. def test_bootstrap_vectorized(self, method, axis, paired, xp):
  105. # test that paired is vectorized as expected: when samples are tiled,
  106. # CI and standard_error of each axis-slice is the same as those of the
  107. # original 1d sample
  108. rng = np.random.RandomState(0)
  109. def my_statistic(x, y, z, axis=-1):
  110. return xp.mean(x, axis=axis) + xp.mean(y, axis=axis) + xp.mean(z, axis=axis)
  111. shape = 10, 11, 12
  112. n_samples = shape[axis]
  113. x = rng.rand(n_samples)
  114. y = rng.rand(n_samples)
  115. z = rng.rand(n_samples)
  116. x, y, z = xp.asarray(x), xp.asarray(y), xp.asarray(z)
  117. res1 = bootstrap((x, y, z), my_statistic, paired=paired, method=method,
  118. rng=0, axis=0, n_resamples=100)
  119. assert (res1.bootstrap_distribution.shape
  120. == res1.standard_error.shape + (100,))
  121. reshape = [1, 1, 1]
  122. reshape[axis] = n_samples
  123. reshape = tuple(reshape)
  124. x = xp.broadcast_to(xp.reshape(x, reshape), shape)
  125. y = xp.broadcast_to(xp.reshape(y, reshape), shape)
  126. z = xp.broadcast_to(xp.reshape(z, reshape), shape)
  127. res2 = bootstrap((x, y, z), my_statistic, paired=paired, method=method,
  128. rng=0, axis=axis, n_resamples=100)
  129. result_shape = list(shape)
  130. result_shape.pop(axis)
  131. ref_ci_low = xp.broadcast_to(res2.confidence_interval.low, result_shape)
  132. ref_ci_high = xp.broadcast_to(res2.confidence_interval.high, result_shape)
  133. ref_ci_standard_error = xp.broadcast_to(res2.standard_error, result_shape)
  134. xp_assert_close(res2.confidence_interval.low, ref_ci_low)
  135. xp_assert_close(res2.confidence_interval.high, ref_ci_high)
  136. xp_assert_close(res2.standard_error, ref_ci_standard_error)
  137. @pytest.mark.slow
  138. @pytest.mark.xfail_on_32bit("MemoryError with BCa observed in CI")
  139. @pytest.mark.parametrize("method", ['basic', 'percentile', 'BCa'])
  140. def test_bootstrap_against_theory(self, method, xp):
  141. # based on https://www.statology.org/confidence-intervals-python/
  142. rng = np.random.default_rng(2442101192988600726)
  143. data = stats.norm.rvs(loc=5, scale=2, size=5000, random_state=rng)
  144. alpha = 0.95
  145. dist = stats.t(df=len(data)-1, loc=np.mean(data), scale=stats.sem(data))
  146. ref_low, ref_high = dist.interval(confidence=alpha)
  147. ref_se = dist.std()
  148. config = dict(data=(xp.asarray(data),), statistic=xp.mean, n_resamples=5000,
  149. method=method, rng=rng)
  150. res = bootstrap(**config, confidence_level=alpha)
  151. xp_assert_close(res.confidence_interval.low, xp.asarray(ref_low), rtol=5e-4)
  152. xp_assert_close(res.confidence_interval.high, xp.asarray(ref_high), rtol=5e-4)
  153. xp_assert_close(res.standard_error, xp.asarray(ref_se), atol=3e-4)
  154. config.update(dict(n_resamples=0, bootstrap_result=res))
  155. res = bootstrap(**config, confidence_level=alpha, alternative='less')
  156. xp_assert_close(res.confidence_interval.high,
  157. xp.asarray(dist.ppf(alpha)), rtol=5e-4)
  158. config.update(dict(n_resamples=0, bootstrap_result=res))
  159. res = bootstrap(**config, confidence_level=alpha, alternative='greater')
  160. xp_assert_close(res.confidence_interval.low,
  161. xp.asarray(dist.ppf(1-alpha)), rtol=5e-4)
  162. tests_R = [("basic", 23.77, 79.12),
  163. ("percentile", 28.86, 84.21),
  164. ("BCa", 32.31, 91.43)]
  165. @pytest.mark.parametrize("method, ref_low, ref_high", tests_R)
  166. def test_bootstrap_against_R(self, method, ref_low, ref_high, xp):
  167. # Compare against R's "boot" library
  168. # library(boot)
  169. # stat <- function (x, a) {
  170. # mean(x[a])
  171. # }
  172. # x <- c(10, 12, 12.5, 12.5, 13.9, 15, 21, 22,
  173. # 23, 34, 50, 81, 89, 121, 134, 213)
  174. # # Use a large value so we get a few significant digits for the CI.
  175. # n = 1000000
  176. # bootresult = boot(x, stat, n)
  177. # result <- boot.ci(bootresult)
  178. # print(result)
  179. x = xp.asarray([10, 12, 12.5, 12.5, 13.9, 15, 21, 22,
  180. 23, 34, 50, 81, 89, 121, 134, 213])
  181. res = bootstrap((x,), xp.mean, n_resamples=1000000, method=method, rng=0)
  182. xp_assert_close(res.confidence_interval.low, xp.asarray(ref_low), rtol=0.005)
  183. xp_assert_close(res.confidence_interval.high, xp.asarray(ref_high), rtol=0.005)
  184. def test_multisample_BCa_against_R(self, xp):
  185. # Because bootstrap is stochastic, it's tricky to test against reference
  186. # behavior. Here, we show that SciPy's BCa CI matches R wboot's BCa CI
  187. # much more closely than the other SciPy CIs do.
  188. # arbitrary skewed data
  189. x = xp.asarray([0.75859206, 0.5910282, -0.4419409, -0.36654601,
  190. 0.34955357, -1.38835871, 0.76735821])
  191. y = xp.asarray([1.41186073, 0.49775975, 0.08275588, 0.24086388,
  192. 0.03567057, 0.52024419, 0.31966611, 1.32067634])
  193. # a multi-sample statistic for which the BCa CI tends to be different
  194. # from the other CIs
  195. def statistic(x, y, axis):
  196. s1 = stats.skew(x, axis=axis)
  197. s2 = stats.skew(y, axis=axis)
  198. return s1 - s2
  199. # compute confidence intervals using each method
  200. rng = np.random.default_rng(468865032284792692)
  201. res_basic = stats.bootstrap((x, y), statistic, method='basic',
  202. batch=100, rng=rng)
  203. res_percent = stats.bootstrap((x, y), statistic, method='percentile',
  204. batch=100, rng=rng)
  205. res_bca = stats.bootstrap((x, y), statistic, method='bca',
  206. batch=100, rng=rng)
  207. # compute midpoints so we can compare just one number for each
  208. mid_basic = xp.mean(xp.stack(res_basic.confidence_interval))
  209. mid_percent = xp.mean(xp.stack(res_percent.confidence_interval))
  210. mid_bca = xp.mean(xp.stack(res_bca.confidence_interval))
  211. # reference for BCA CI computed using R wboot package:
  212. # library(wBoot)
  213. # library(moments)
  214. # x = c(0.75859206, 0.5910282, -0.4419409, -0.36654601,
  215. # 0.34955357, -1.38835871, 0.76735821)
  216. # y = c(1.41186073, 0.49775975, 0.08275588, 0.24086388,
  217. # 0.03567057, 0.52024419, 0.31966611, 1.32067634)
  218. # twoskew <- function(x1, y1) {skewness(x1) - skewness(y1)}
  219. # boot.two.bca(x, y, skewness, conf.level = 0.95,
  220. # R = 9999, stacked = FALSE)
  221. mid_wboot = -1.5519
  222. # compute percent difference relative to wboot BCA method
  223. diff_basic = (mid_basic - mid_wboot)/abs(mid_wboot)
  224. diff_percent = (mid_percent - mid_wboot)/abs(mid_wboot)
  225. diff_bca = (mid_bca - mid_wboot)/abs(mid_wboot)
  226. # SciPy's BCa CI midpoint is much closer than that of the other methods
  227. assert diff_basic < -0.15
  228. assert diff_percent > 0.15
  229. assert abs(diff_bca) < 0.03
  230. def test_BCa_acceleration_against_reference(self, xp):
  231. # Compare the (deterministic) acceleration parameter for a multi-sample
  232. # problem against a reference value. The example is from [1], but Efron's
  233. # value seems inaccurate. Straightforward code for computing the
  234. # reference acceleration (0.011008228344026734) is available at:
  235. # https://github.com/scipy/scipy/pull/16455#issuecomment-1193400981
  236. y = xp.asarray([10., 27., 31., 40., 46., 50., 52., 104., 146.])
  237. z = xp.asarray([16., 23., 38., 94., 99., 141., 197.])
  238. def statistic(z, y, axis=0):
  239. return xp.mean(z, axis=axis) - xp.mean(y, axis=axis)
  240. data = [z, y]
  241. res = stats.bootstrap(data, statistic)
  242. axis = -1
  243. alpha = 0.95
  244. theta_hat_b = res.bootstrap_distribution
  245. batch = 100
  246. _, _, a_hat = _resampling._bca_interval(data, statistic, axis, alpha,
  247. theta_hat_b, batch, xp)
  248. xp_assert_close(a_hat, xp.asarray(0.011008228344026734))
  249. tests_against_itself_1samp = {"basic": 1789,
  250. "percentile": 1790,
  251. "BCa": 1789}
  252. @pytest.mark.slow
  253. @pytest.mark.parametrize("method, expected",
  254. tests_against_itself_1samp.items())
  255. def test_bootstrap_against_itself_1samp(self, method, expected, xp):
  256. # The expected values in this test were generated using bootstrap
  257. # to check for unintended changes in behavior. The test also makes sure
  258. # that bootstrap works with multi-sample statistics and that the
  259. # `axis` argument works as expected / function is vectorized.
  260. rng = np.random.default_rng(9123847)
  261. n = 100 # size of sample
  262. n_resamples = 999 # number of bootstrap resamples used to form each CI
  263. confidence_level = 0.9
  264. # The true mean is 5
  265. dist = stats.norm(loc=5, scale=1)
  266. stat_true = float(dist.mean())
  267. # Do the same thing 2000 times. (The code is fully vectorized.)
  268. n_replications = 2000
  269. data = dist.rvs(size=(n_replications, n), random_state=rng)
  270. res = bootstrap((xp.asarray(data),),
  271. statistic=xp.mean,
  272. confidence_level=confidence_level,
  273. n_resamples=n_resamples,
  274. batch=50,
  275. method=method,
  276. axis=-1,
  277. rng=rng)
  278. ci = res.confidence_interval
  279. # ci contains vectors of lower and upper confidence interval bounds
  280. ci_contains_true = xp.count_nonzero((ci[0] < stat_true) & (stat_true < ci[1]))
  281. assert ci_contains_true == expected
  282. # ci_contains_true is not inconsistent with confidence_level
  283. pvalue = stats.binomtest(int(ci_contains_true), n_replications,
  284. confidence_level).pvalue
  285. assert pvalue > 0.1
  286. tests_against_itself_2samp = {"basic": 892,
  287. "percentile": 890}
  288. @pytest.mark.slow
  289. @pytest.mark.parametrize("method, expected",
  290. tests_against_itself_2samp.items())
  291. def test_bootstrap_against_itself_2samp(self, method, expected, xp):
  292. # The expected values in this test were generated using bootstrap
  293. # to check for unintended changes in behavior. The test also makes sure
  294. # that bootstrap works with multi-sample statistics and that the
  295. # `axis` argument works as expected / function is vectorized.
  296. rng = np.random.RandomState(0)
  297. n1 = 100 # size of sample 1
  298. n2 = 120 # size of sample 2
  299. n_resamples = 999 # number of bootstrap resamples used to form each CI
  300. confidence_level = 0.9
  301. # The statistic we're interested in is the difference in means
  302. def my_stat(data1, data2, axis=-1):
  303. mean1 = xp.mean(data1, axis=axis)
  304. mean2 = xp.mean(data2, axis=axis)
  305. return mean1 - mean2
  306. # The true difference in the means is -0.1
  307. dist1 = stats.norm(loc=0, scale=1)
  308. dist2 = stats.norm(loc=0.1, scale=1)
  309. stat_true = float(dist1.mean() - dist2.mean())
  310. # Do the same thing 1000 times. (The code is fully vectorized.)
  311. n_replications = 1000
  312. data1 = dist1.rvs(size=(n_replications, n1), random_state=rng)
  313. data2 = dist2.rvs(size=(n_replications, n2), random_state=rng)
  314. res = bootstrap((xp.asarray(data1), xp.asarray(data2)),
  315. statistic=my_stat,
  316. confidence_level=confidence_level,
  317. n_resamples=n_resamples,
  318. batch=50,
  319. method=method,
  320. axis=-1,
  321. random_state=rng)
  322. ci = res.confidence_interval
  323. # ci contains vectors of lower and upper confidence interval bounds
  324. ci_contains_true = xp.count_nonzero((ci[0] < stat_true) & (stat_true < ci[1]))
  325. assert ci_contains_true == expected
  326. # ci_contains_true is not inconsistent with confidence_level
  327. pvalue = stats.binomtest(int(ci_contains_true), n_replications,
  328. confidence_level).pvalue
  329. assert pvalue > 0.1
  330. @pytest.mark.parametrize("method", ["basic", "percentile", "BCa"])
  331. @pytest.mark.parametrize("axis", [0, 1])
  332. @pytest.mark.parametrize("dtype", ['float32', 'float64'])
  333. def test_bootstrap_vectorized_3samp(self, method, axis, dtype, xp):
  334. def statistic(*data, axis=0):
  335. # an arbitrary, vectorized statistic
  336. return sum(xp.mean(sample, axis=axis) for sample in data)
  337. def statistic_1d(*data):
  338. # the same statistic, not vectorized
  339. for sample in data:
  340. assert sample.ndim == 1
  341. return np.asarray(sum(sample.mean() for sample in data), dtype=dtype)
  342. rng = np.random.RandomState(0)
  343. x = rng.rand(4, 5).astype(dtype)
  344. y = rng.rand(4, 5).astype(dtype)
  345. z = rng.rand(4, 5).astype(dtype)
  346. res1 = bootstrap((xp.asarray(x), xp.asarray(y), xp.asarray(z)),
  347. statistic, vectorized=True, axis=axis, n_resamples=100,
  348. method=method, rng=0)
  349. res2 = bootstrap((x, y, z), statistic_1d, vectorized=False,
  350. axis=axis, n_resamples=100, method=method, rng=0)
  351. rtol = 1e-6 if dtype == 'float32' else 1e-14
  352. xp_assert_close(res1.confidence_interval.low,
  353. xp.asarray(res2.confidence_interval.low), rtol=rtol)
  354. xp_assert_close(res1.confidence_interval.high,
  355. xp.asarray(res2.confidence_interval.high), rtol=rtol)
  356. xp_assert_close(res1.standard_error, xp.asarray(res2.standard_error), rtol=rtol)
  357. @pytest.mark.xfail_on_32bit("Failure is not concerning; see gh-14107")
  358. @pytest.mark.parametrize("method", ["basic", "percentile", "BCa"])
  359. @pytest.mark.parametrize("axis", [0, 1])
  360. def test_bootstrap_vectorized_1samp(self, method, axis, xp):
  361. def statistic(x, axis=0):
  362. # an arbitrary, vectorized statistic
  363. return xp.mean(x, axis=axis)
  364. def statistic_1d(x):
  365. # the same statistic, not vectorized
  366. assert x.ndim == 1
  367. return x.mean(axis=0)
  368. rng = np.random.default_rng(7939952824)
  369. x = rng.random((2, 3))
  370. res1 = bootstrap((xp.asarray(x),), statistic, vectorized=True, axis=axis,
  371. n_resamples=100, batch=None, method=method, rng=0)
  372. res2 = bootstrap((x,), statistic_1d, vectorized=False, axis=axis,
  373. n_resamples=100, batch=10, method=method, rng=0)
  374. xp_assert_close(res1.confidence_interval.low,
  375. xp.asarray(res2.confidence_interval.low))
  376. xp_assert_close(res1.confidence_interval.high,
  377. xp.asarray(res2.confidence_interval.high))
  378. xp_assert_close(res1.standard_error, xp.asarray(res2.standard_error))
  379. @pytest.mark.parametrize("method", ["basic", "percentile", "BCa"])
  380. def test_bootstrap_degenerate(self, method, xp):
  381. data = xp.full((35,), 10000.)
  382. if method == "BCa":
  383. with np.errstate(invalid='ignore'):
  384. msg = "The BCa confidence interval cannot be calculated"
  385. with pytest.warns(stats.DegenerateDataWarning, match=msg):
  386. res = bootstrap([data, ], xp.mean, method=method)
  387. xp_assert_equal(res.confidence_interval.low, xp.asarray(xp.nan))
  388. xp_assert_equal(res.confidence_interval.high, xp.asarray(xp.nan))
  389. else:
  390. res = bootstrap([data, ], xp.mean, method=method)
  391. xp_assert_equal(res.confidence_interval.low, xp.asarray(10000.))
  392. xp_assert_equal(res.confidence_interval.high, xp.asarray(10000.))
  393. xp_assert_equal(res.standard_error, xp.asarray(0.))
  394. @pytest.mark.parametrize("method", ["BCa", "basic", "percentile"])
  395. def test_bootstrap_gh15678(self, method, xp):
  396. # Check that gh-15678 is fixed: when statistic function returned a Python
  397. # float, method="BCa" failed when trying to add a dimension to the float
  398. rng = np.random.default_rng(354645618886684)
  399. dist = stats.norm(loc=2, scale=4)
  400. data = dist.rvs(size=100, random_state=rng)
  401. res = bootstrap((xp.asarray(data),), stats.skew, method=method, n_resamples=100,
  402. rng=np.random.default_rng(9563))
  403. # this always worked because np.apply_along_axis returns NumPy data type
  404. ref = bootstrap((data,), stats.skew, method=method, n_resamples=100,
  405. rng=np.random.default_rng(9563), vectorized=False)
  406. xp_assert_close(res.confidence_interval.low,
  407. xp.asarray(ref.confidence_interval.low))
  408. xp_assert_close(res.confidence_interval.high,
  409. xp.asarray(ref.confidence_interval.high))
  410. xp_assert_close(res.standard_error, xp.asarray(ref.standard_error))
  411. def test_bootstrap_min(self, xp):
  412. # Check that gh-15883 is fixed: percentileofscore should
  413. # behave according to the 'mean' behavior and not trigger nan for BCa
  414. rng = np.random.default_rng(1891289180021102)
  415. dist = stats.norm(loc=2, scale=4)
  416. data = dist.rvs(size=100, random_state=rng)
  417. true_min = np.min(data)
  418. data = xp.asarray(data)
  419. res = bootstrap((data,), xp.min, method="BCa", n_resamples=100,
  420. rng=np.random.default_rng(3942))
  421. xp_assert_equal(res.confidence_interval.low, xp.asarray(true_min))
  422. res2 = bootstrap((-data,), xp.max, method="BCa", n_resamples=100,
  423. rng=np.random.default_rng(3942))
  424. xp_assert_close(-res.confidence_interval.low, res2.confidence_interval.high)
  425. xp_assert_close(-res.confidence_interval.high, res2.confidence_interval.low)
  426. @pytest.mark.parametrize("additional_resamples", [0, 1000])
  427. def test_re_bootstrap(self, additional_resamples, xp):
  428. # Test behavior of parameter `bootstrap_result`
  429. rng = np.random.default_rng(8958153316228384)
  430. x = rng.random(size=100)
  431. n1 = 1000
  432. n2 = additional_resamples
  433. n3 = n1 + additional_resamples
  434. rng = np.random.default_rng(296689032789913033)
  435. res = stats.bootstrap((xp.asarray(x),), xp.mean, n_resamples=n1, rng=rng,
  436. confidence_level=0.95, method='percentile')
  437. res = stats.bootstrap((xp.asarray(x),), xp.mean, n_resamples=n2, rng=rng,
  438. confidence_level=0.90, method='BCa',
  439. bootstrap_result=res)
  440. rng = np.random.default_rng(296689032789913033)
  441. ref = stats.bootstrap((xp.asarray(x),), xp.mean, n_resamples=n3, rng=rng,
  442. confidence_level=0.90, method='BCa')
  443. xp_assert_close(res.confidence_interval.low,
  444. xp.asarray(ref.confidence_interval.low),
  445. rtol=1e-14)
  446. xp_assert_close(res.confidence_interval.high,
  447. xp.asarray(ref.confidence_interval.high),
  448. rtol=1e-14)
  449. xp_assert_close(res.standard_error, xp.asarray(ref.standard_error), rtol=1e-14)
  450. @pytest.mark.xfail_on_32bit("Sensitive to machine precision")
  451. @pytest.mark.parametrize("method", ['basic', 'percentile', 'BCa'])
  452. def test_bootstrap_alternative(self, method, xp):
  453. rng = np.random.default_rng(5894822712842015040)
  454. dist = stats.norm(loc=2, scale=4)
  455. data = (xp.asarray(dist.rvs(size=(100), random_state=rng)),)
  456. config = dict(data=data, statistic=xp.std, rng=rng, axis=-1)
  457. t = stats.bootstrap(**config, confidence_level=0.9)
  458. config.update(dict(n_resamples=0, bootstrap_result=t))
  459. l = stats.bootstrap(**config, confidence_level=0.95, alternative='less')
  460. g = stats.bootstrap(**config, confidence_level=0.95, alternative='greater')
  461. xp_assert_close(l.confidence_interval.high, t.confidence_interval.high,
  462. rtol=1e-14)
  463. xp_assert_close(g.confidence_interval.low, t.confidence_interval.low,
  464. rtol=1e-14)
  465. assert xp.isinf(l.confidence_interval.low) and l.confidence_interval.low < 0
  466. assert xp.isinf(g.confidence_interval.high) and g.confidence_interval.high > 0
  467. with pytest.raises(ValueError, match='`alternative` must be one of'):
  468. stats.bootstrap(**config, alternative='ekki-ekki')
  469. def test_jackknife_resample(self, xp):
  470. shape = 3, 4, 5, 6
  471. rng = np.random.default_rng(5274950392)
  472. x = rng.random(size=shape)
  473. y = next(_resampling._jackknife_resample(xp.asarray(x), xp=xp))
  474. for i in range(shape[-1]):
  475. # each resample is indexed along second to last axis
  476. # (last axis is the one the statistic will be taken over / consumed)
  477. slc = y[..., i, :]
  478. expected = np.delete(x, i, axis=-1)
  479. xp_assert_equal(slc, xp.asarray(expected))
  480. y2 = list(_resampling._jackknife_resample(xp.asarray(x), batch=2, xp=xp))
  481. xp_assert_equal(xp.concat(y2, axis=-2), y)
  482. @pytest.mark.skip_xp_backends("array_api_strict",
  483. reason="Test uses ... + fancy indexing")
  484. @pytest.mark.parametrize("rng_name", ["RandomState", "default_rng"])
  485. def test_bootstrap_resample(self, rng_name, xp):
  486. rng_gen = getattr(np.random, rng_name)
  487. rng1 = rng_gen(3949441460)
  488. rng2 = rng_gen(3949441460)
  489. n_resamples = 10
  490. shape = 3, 4, 5, 6
  491. rng = np.random.default_rng(5274950392)
  492. x = xp.asarray(rng.random(shape))
  493. y = _resampling._bootstrap_resample(x, n_resamples, rng=rng1, xp=xp)
  494. for i in range(n_resamples):
  495. # each resample is indexed along second to last axis
  496. # (last axis is the one the statistic will be taken over / consumed)
  497. slc = y[..., i, :]
  498. js = xp.asarray(rng_integers(rng2, 0, shape[-1], shape[-1]))
  499. expected = x[..., js]
  500. xp_assert_equal(slc, expected)
  501. @pytest.mark.parametrize("score", [0, 0.5, 1])
  502. @pytest.mark.parametrize("axis", [0, 1, 2])
  503. def test_percentile_of_score(self, score, axis, xp):
  504. shape = 10, 20, 30
  505. rng = np.random.default_rng(5903363153)
  506. x = rng.random(shape)
  507. dtype = xp_default_dtype(xp)
  508. p = _resampling._percentile_of_score(xp.asarray(x, dtype=dtype),
  509. xp.asarray(score, dtype=dtype),
  510. axis=-1, xp=xp)
  511. def vectorized_pos(a, score, axis):
  512. return np.apply_along_axis(stats.percentileofscore, axis, a, score)
  513. p2 = vectorized_pos(x, score, axis=-1)/100
  514. xp_assert_close(p, xp.asarray(p2, dtype=dtype), rtol=1e-15)
  515. @pytest.mark.parametrize("axis", [0, 1, 2])
  516. def test_vectorize_statistic(self, axis):
  517. # test that _vectorize_statistic vectorizes a statistic along `axis`
  518. # only used with NumPy arrays
  519. def statistic(*data, axis):
  520. # an arbitrary, vectorized statistic
  521. return sum(sample.mean(axis) for sample in data)
  522. def statistic_1d(*data):
  523. # the same statistic, not vectorized
  524. for sample in data:
  525. assert sample.ndim == 1
  526. return statistic(*data, axis=0)
  527. # vectorize the non-vectorized statistic
  528. statistic2 = _resampling._vectorize_statistic(statistic_1d)
  529. rng = np.random.RandomState(0)
  530. x = rng.rand(4, 5, 6)
  531. y = rng.rand(4, 1, 6)
  532. z = rng.rand(1, 5, 6)
  533. res1 = statistic(x, y, z, axis=axis)
  534. res2 = statistic2(x, y, z, axis=axis)
  535. assert_allclose(res1, res2)
  536. @pytest.mark.slow
  537. @pytest.mark.parametrize("method", ["basic", "percentile", "BCa"])
  538. def test_vector_valued_statistic(self, method, xp):
  539. # Generate 95% confidence interval around MLE of normal distribution
  540. # parameters. Repeat 100 times, each time on sample of size 100.
  541. # Check that confidence interval contains true parameters ~95 times.
  542. # Confidence intervals are estimated and stochastic; a test failure
  543. # does not necessarily indicate that something is wrong. More important
  544. # than values of `counts` below is that the shapes of the outputs are
  545. # correct.
  546. rng = np.random.default_rng(2196847219)
  547. params = 1, 0.5
  548. sample = xp.asarray(rng.normal(*params, size=(100, 100)))
  549. def statistic(data, axis):
  550. return xp.stack([xp.mean(data, axis=axis),
  551. xp.std(data, axis=axis, correction=1)])
  552. res = bootstrap((sample,), statistic, method=method, axis=-1,
  553. n_resamples=9999, batch=200, random_state=rng)
  554. params = xp.asarray([1, 0.5])
  555. counts = xp.count_nonzero((res.confidence_interval.low.T < params)
  556. & (res.confidence_interval.high.T > params),
  557. axis=0)
  558. assert xp.all(counts >= 90)
  559. assert xp.all(counts <= 100)
  560. assert res.confidence_interval.low.shape == (2, 100)
  561. assert res.confidence_interval.high.shape == (2, 100)
  562. assert res.standard_error.shape == (2, 100)
  563. assert res.bootstrap_distribution.shape == (2, 100, 9999)
  564. @pytest.mark.slow
  565. @pytest.mark.filterwarnings('ignore::RuntimeWarning')
  566. def test_vector_valued_statistic_gh17715(self):
  567. # gh-17715 reported a mistake introduced in the extension of BCa to
  568. # multi-sample statistics; a `len` should have been `.shape[-1]`. Check
  569. # that this is resolved.
  570. # If this is resolved for NumPy, it's resolved for the rest,
  571. # let's only run this for NumPy.
  572. rng = np.random.default_rng(141921000979291141)
  573. def concordance(x, y, axis):
  574. xm = x.mean(axis)
  575. ym = y.mean(axis)
  576. cov = ((x - xm[..., None]) * (y - ym[..., None])).mean(axis)
  577. return (2 * cov) / (x.var(axis) + y.var(axis) + (xm - ym) ** 2)
  578. def statistic(tp, tn, fp, fn, axis):
  579. actual = tp + fp
  580. expected = tp + fn
  581. return np.nan_to_num(concordance(actual, expected, axis))
  582. def statistic_extradim(*args, axis):
  583. return statistic(*args, axis)[np.newaxis, ...]
  584. data = [[4, 0, 0, 2], # (tp, tn, fp, fn)
  585. [2, 1, 2, 1],
  586. [0, 6, 0, 0],
  587. [0, 6, 3, 0],
  588. [0, 8, 1, 0]]
  589. data = np.array(data).T
  590. res = bootstrap(data, statistic_extradim, rng=rng, paired=True)
  591. ref = bootstrap(data, statistic, rng=rng, paired=True)
  592. assert_allclose(res.confidence_interval.low[0],
  593. ref.confidence_interval.low, atol=1e-15)
  594. assert_allclose(res.confidence_interval.high[0],
  595. ref.confidence_interval.high, atol=1e-15)
  596. # torch doesn't have T distribution CDF and can't fall back to NumPy on GPU
  597. @pytest.mark.skip_xp_backends(np_only=True, exceptions=['cupy'])
  598. def test_gh_20850(self, xp):
  599. rng = np.random.default_rng(2085020850)
  600. x = xp.asarray(rng.random((10, 2)))
  601. y = xp.asarray(rng.random((11, 2)))
  602. def statistic(x, y, axis):
  603. return stats.ttest_ind(x, y, axis=axis).statistic
  604. # The shapes do *not* need to be the same along axis
  605. stats.bootstrap((x, y), statistic)
  606. stats.bootstrap((x.T, y.T), statistic, axis=1)
  607. # But even when the shapes *are* the same along axis, the lengths
  608. # along other dimensions have to be the same (or `bootstrap` warns).
  609. message = "Array shapes are incompatible for broadcasting."
  610. with pytest.raises(ValueError, match=message):
  611. stats.bootstrap((x, y[:10, 0]), statistic) # this won't work after 1.16
  612. stats.bootstrap((x, y[:10, 0:1]), statistic) # this will
  613. stats.bootstrap((x.T, y.T[0:1, :10]), statistic, axis=1) # this will
  614. # --- Test Monte Carlo Hypothesis Test --- #
  615. @make_xp_test_case(monte_carlo_test)
  616. class TestMonteCarloHypothesisTest:
  617. atol = 2.5e-2 # for comparing p-value
  618. def get_rvs(self, rvs_in, rs, dtype=None, xp=np):
  619. return lambda *args, **kwds: xp.asarray(rvs_in(*args, random_state=rs, **kwds),
  620. dtype=dtype)
  621. def get_statistic(self, xp):
  622. def statistic(x, axis):
  623. m = xp.mean(x, axis=axis)
  624. v = xp.var(x, axis=axis, correction=1)
  625. n = x.shape[axis]
  626. return m / (v/n)**0.5
  627. # return stats.ttest_1samp(x, popmean=0., axis=axis).statistic)
  628. return statistic
  629. def test_input_validation(self, xp):
  630. # test that the appropriate error messages are raised for invalid input
  631. data = xp.asarray([1., 2., 3.])
  632. def stat(x, axis=None):
  633. return xp.mean(x, axis=axis)
  634. message = "Array shapes are incompatible for broadcasting."
  635. temp = (xp.zeros((2, 5)), xp.zeros((3, 5)))
  636. rvs = (stats.norm.rvs, stats.norm.rvs)
  637. with pytest.raises(ValueError, match=message):
  638. monte_carlo_test(temp, rvs, lambda x, y, axis: 1, axis=-1)
  639. message = "`axis` must be an integer."
  640. with pytest.raises(ValueError, match=message):
  641. monte_carlo_test(data, stats.norm.rvs, stat, axis=1.5)
  642. message = "`vectorized` must be `True`, `False`, or `None`."
  643. with pytest.raises(ValueError, match=message):
  644. monte_carlo_test(data, stats.norm.rvs, stat, vectorized=1.5)
  645. message = "`rvs` must be callable or sequence of callables."
  646. with pytest.raises(TypeError, match=message):
  647. monte_carlo_test(data, None, stat)
  648. with pytest.raises(TypeError, match=message):
  649. temp = xp.asarray([[1., 2.], [3., 4.]])
  650. monte_carlo_test(temp, [lambda x: x, None], stat)
  651. message = "If `rvs` is a sequence..."
  652. with pytest.raises(ValueError, match=message):
  653. temp = xp.asarray([[1., 2., 3.]])
  654. monte_carlo_test(temp, [lambda x: x, lambda x: x], stat)
  655. message = "`statistic` must be callable."
  656. with pytest.raises(TypeError, match=message):
  657. monte_carlo_test(data, stats.norm.rvs, None)
  658. message = "`n_resamples` must be a positive integer."
  659. with pytest.raises(ValueError, match=message):
  660. monte_carlo_test(data, stats.norm.rvs, stat, n_resamples=-1000)
  661. message = "`n_resamples` must be a positive integer."
  662. with pytest.raises(ValueError, match=message):
  663. monte_carlo_test(data, stats.norm.rvs, stat, n_resamples=1000.5)
  664. message = "`batch` must be a positive integer or None."
  665. with pytest.raises(ValueError, match=message):
  666. monte_carlo_test(data, stats.norm.rvs, stat, batch=-1000)
  667. message = "`batch` must be a positive integer or None."
  668. with pytest.raises(ValueError, match=message):
  669. monte_carlo_test(data, stats.norm.rvs, stat, batch=1000.5)
  670. message = "`alternative` must be in..."
  671. with pytest.raises(ValueError, match=message):
  672. monte_carlo_test(data, stats.norm.rvs, stat, alternative='ekki')
  673. # *If* this raises a value error, make sure it has the intended message
  674. message = "Signature inspection of statistic"
  675. def rvs(size):
  676. return xp.asarray(stats.norm.rvs(size=size))
  677. try:
  678. monte_carlo_test(data, rvs, xp.mean)
  679. except ValueError as e:
  680. assert str(e).startswith(message)
  681. def test_input_validation_xp(self, xp):
  682. def non_vectorized_statistic(x):
  683. return xp.mean(x)
  684. message = "`statistic` must be vectorized..."
  685. sample = xp.asarray([1., 2., 3.])
  686. if is_numpy(xp):
  687. monte_carlo_test(sample, stats.norm.rvs, non_vectorized_statistic)
  688. return
  689. with pytest.raises(ValueError, match=message):
  690. monte_carlo_test(sample, stats.norm.rvs, non_vectorized_statistic)
  691. with pytest.raises(ValueError, match=message):
  692. monte_carlo_test(sample, stats.norm.rvs, xp.mean, vectorized=False)
  693. @pytest.mark.xslow
  694. def test_batch(self, xp):
  695. # make sure that the `batch` parameter is respected by checking the
  696. # maximum batch size provided in calls to `statistic`
  697. rng = np.random.default_rng(23492340193)
  698. x = xp.asarray(rng.standard_normal(size=10))
  699. def statistic(x, axis):
  700. batch_size = 1 if x.ndim == 1 else x.shape[0]
  701. statistic.batch_size = max(batch_size, statistic.batch_size)
  702. statistic.counter += 1
  703. return self.get_statistic(xp)(x, axis=axis)
  704. statistic.counter = 0
  705. statistic.batch_size = 0
  706. kwds = {'sample': x, 'statistic': statistic,
  707. 'n_resamples': 1000, 'vectorized': True}
  708. kwds['rvs'] = self.get_rvs(stats.norm.rvs, np.random.default_rng(328423), xp=xp)
  709. res1 = monte_carlo_test(batch=1, **kwds)
  710. assert_equal(statistic.counter, 1001)
  711. assert_equal(statistic.batch_size, 1)
  712. kwds['rvs'] = self.get_rvs(stats.norm.rvs, np.random.default_rng(328423), xp=xp)
  713. statistic.counter = 0
  714. res2 = monte_carlo_test(batch=50, **kwds)
  715. assert_equal(statistic.counter, 21)
  716. assert_equal(statistic.batch_size, 50)
  717. kwds['rvs'] = self.get_rvs(stats.norm.rvs, np.random.default_rng(328423), xp=xp)
  718. statistic.counter = 0
  719. res3 = monte_carlo_test(**kwds)
  720. assert_equal(statistic.counter, 2)
  721. assert_equal(statistic.batch_size, 1000)
  722. xp_assert_equal(res1.pvalue, res3.pvalue)
  723. xp_assert_equal(res2.pvalue, res3.pvalue)
  724. @pytest.mark.parametrize('axis', range(-3, 3))
  725. def test_axis_dtype(self, axis, xp):
  726. # test that Nd-array samples are handled correctly for valid values
  727. # of the `axis` parameter; also make sure non-default dtype is maintained
  728. rng = np.random.default_rng(2389234)
  729. size = [2, 3, 4]
  730. size[axis] = 100
  731. # Determine non-default dtype
  732. dtype_default = xp.asarray(1.).dtype
  733. dtype_str = 'float32'if ("64" in str(dtype_default)) else 'float64'
  734. dtype_np = getattr(np, dtype_str)
  735. dtype = getattr(xp, dtype_str)
  736. # ttest_1samp is CPU array-API compatible, but it would be good to
  737. # include CuPy in this test. We'll perform ttest_1samp with a
  738. # NumPy array, but all the rest with be done with fully array-API
  739. # compatible code.
  740. x = rng.standard_normal(size=size, dtype=dtype_np)
  741. expected = stats.ttest_1samp(x, popmean=0., axis=axis)
  742. x = xp.asarray(x, dtype=dtype)
  743. statistic = self.get_statistic(xp)
  744. rvs = self.get_rvs(stats.norm.rvs, rng, dtype=dtype, xp=xp)
  745. res = monte_carlo_test(x, rvs, statistic, vectorized=True,
  746. n_resamples=20000, axis=axis)
  747. ref_statistic = xp.asarray(expected.statistic, dtype=dtype)
  748. ref_pvalue = xp.asarray(expected.pvalue, dtype=dtype)
  749. xp_assert_close(res.statistic, ref_statistic)
  750. xp_assert_close(res.pvalue, ref_pvalue, atol=self.atol)
  751. @pytest.mark.parametrize('alternative', ("two-sided", "less", "greater"))
  752. def test_alternative(self, alternative, xp):
  753. # test that `alternative` is working as expected
  754. rng = np.random.default_rng(65723433)
  755. x = rng.standard_normal(size=30)
  756. ref = stats.ttest_1samp(x, 0., alternative=alternative)
  757. x = xp.asarray(x)
  758. statistic = self.get_statistic(xp)
  759. rvs = self.get_rvs(stats.norm.rvs, rng, xp=xp)
  760. res = monte_carlo_test(x, rvs, statistic, alternative=alternative)
  761. xp_assert_close(res.statistic, xp.asarray(ref.statistic))
  762. xp_assert_close(res.pvalue, xp.asarray(ref.pvalue), atol=self.atol)
  763. # Tests below involve statistics that are not yet array-API compatible.
  764. # They can be converted when the statistics are converted.
  765. @pytest.mark.slow
  766. @pytest.mark.parametrize('alternative', ("less", "greater"))
  767. @pytest.mark.parametrize('a', np.linspace(-0.5, 0.5, 5)) # skewness
  768. def test_against_ks_1samp(self, alternative, a):
  769. # test that monte_carlo_test can reproduce pvalue of ks_1samp
  770. rng = np.random.default_rng(65723433)
  771. x = stats.skewnorm.rvs(a=a, size=30, random_state=rng)
  772. expected = stats.ks_1samp(x, stats.norm.cdf, alternative=alternative)
  773. def statistic1d(x):
  774. return stats.ks_1samp(x, stats.norm.cdf, mode='asymp',
  775. alternative=alternative).statistic
  776. norm_rvs = self.get_rvs(stats.norm.rvs, rng)
  777. res = monte_carlo_test(x, norm_rvs, statistic1d,
  778. n_resamples=1000, vectorized=False,
  779. alternative=alternative)
  780. assert_allclose(res.statistic, expected.statistic)
  781. if alternative == 'greater':
  782. assert_allclose(res.pvalue, expected.pvalue, atol=self.atol)
  783. elif alternative == 'less':
  784. assert_allclose(1-res.pvalue, expected.pvalue, atol=self.atol)
  785. @pytest.mark.parametrize('hypotest', (stats.skewtest, stats.kurtosistest))
  786. @pytest.mark.parametrize('alternative', ("less", "greater", "two-sided"))
  787. @pytest.mark.parametrize('a', np.linspace(-2, 2, 5)) # skewness
  788. def test_against_normality_tests(self, hypotest, alternative, a):
  789. # test that monte_carlo_test can reproduce pvalue of normality tests
  790. rng = np.random.default_rng(85723405)
  791. x = stats.skewnorm.rvs(a=a, size=150, random_state=rng)
  792. expected = hypotest(x, alternative=alternative)
  793. def statistic(x, axis):
  794. return hypotest(x, axis=axis).statistic
  795. norm_rvs = self.get_rvs(stats.norm.rvs, rng)
  796. res = monte_carlo_test(x, norm_rvs, statistic, vectorized=True,
  797. alternative=alternative)
  798. assert_allclose(res.statistic, expected.statistic)
  799. assert_allclose(res.pvalue, expected.pvalue, atol=self.atol)
  800. @pytest.mark.parametrize('a', np.arange(-2, 3)) # skewness parameter
  801. def test_against_normaltest(self, a):
  802. # test that monte_carlo_test can reproduce pvalue of normaltest
  803. rng = np.random.default_rng(12340513)
  804. x = stats.skewnorm.rvs(a=a, size=150, random_state=rng)
  805. expected = stats.normaltest(x)
  806. def statistic(x, axis):
  807. return stats.normaltest(x, axis=axis).statistic
  808. norm_rvs = self.get_rvs(stats.norm.rvs, rng)
  809. res = monte_carlo_test(x, norm_rvs, statistic, vectorized=True,
  810. alternative='greater')
  811. assert_allclose(res.statistic, expected.statistic)
  812. assert_allclose(res.pvalue, expected.pvalue, atol=self.atol)
  813. @pytest.mark.xslow
  814. @pytest.mark.parametrize('a', np.linspace(-0.5, 0.5, 5)) # skewness
  815. def test_against_cramervonmises(self, a):
  816. # test that monte_carlo_test can reproduce pvalue of cramervonmises
  817. rng = np.random.default_rng(234874135)
  818. x = stats.skewnorm.rvs(a=a, size=30, random_state=rng)
  819. expected = stats.cramervonmises(x, stats.norm.cdf)
  820. def statistic1d(x):
  821. return stats.cramervonmises(x, stats.norm.cdf).statistic
  822. norm_rvs = self.get_rvs(stats.norm.rvs, rng)
  823. res = monte_carlo_test(x, norm_rvs, statistic1d,
  824. n_resamples=1000, vectorized=False,
  825. alternative='greater')
  826. assert_allclose(res.statistic, expected.statistic)
  827. assert_allclose(res.pvalue, expected.pvalue, atol=self.atol)
  828. @pytest.mark.slow
  829. @pytest.mark.parametrize('dist_name', ['norm', 'logistic'])
  830. @pytest.mark.parametrize('target_statistic', [0.6, 0.7, 0.8])
  831. def test_against_anderson(self, dist_name, target_statistic):
  832. # test that monte_carlo_test can reproduce results of `anderson`.
  833. # find the skewness for which the sample statistic is within the range of
  834. # values tubulated by `anderson`
  835. def fun(a):
  836. rng = np.random.default_rng(394295467)
  837. x = stats.tukeylambda.rvs(a, size=100, random_state=rng)
  838. expected = stats.anderson(x, dist_name, method='interpolate')
  839. return expected.statistic - target_statistic
  840. with warnings.catch_warnings():
  841. warnings.simplefilter("ignore", RuntimeWarning)
  842. sol = root(fun, x0=0)
  843. assert sol.success
  844. # get the significance level (p-value) associated with that critical
  845. # value
  846. a = sol.x[0]
  847. rng = np.random.default_rng(394295467)
  848. x = stats.tukeylambda.rvs(a, size=100, random_state=rng)
  849. expected = stats.anderson(x, dist_name, method='interpolate')
  850. expected_stat = expected.statistic
  851. expected_p = expected.pvalue
  852. # perform equivalent Monte Carlo test and compare results
  853. def statistic1d(x):
  854. return stats.anderson(x, dist_name, method='interpolate').statistic
  855. dist_rvs = self.get_rvs(getattr(stats, dist_name).rvs, rng)
  856. with warnings.catch_warnings():
  857. warnings.simplefilter("ignore", RuntimeWarning)
  858. res = monte_carlo_test(x, dist_rvs,
  859. statistic1d, n_resamples=1000,
  860. vectorized=False, alternative='greater')
  861. assert_allclose(res.statistic, expected_stat)
  862. assert_allclose(res.pvalue, expected_p, atol=2*self.atol)
  863. def test_p_never_zero(self):
  864. # Use biased estimate of p-value to ensure that p-value is never zero
  865. # per monte_carlo_test reference [1]
  866. rng = np.random.default_rng(2190176673029737545)
  867. x = np.zeros(100)
  868. res = monte_carlo_test(x, rng.random, np.mean,
  869. vectorized=True, alternative='less')
  870. assert res.pvalue == 0.0001
  871. def test_against_ttest_ind(self):
  872. # test that `monte_carlo_test` can reproduce results of `ttest_ind`.
  873. rng = np.random.default_rng(219017667302737545)
  874. data = rng.random(size=(2, 5)), rng.random(size=7) # broadcastable
  875. rvs = rng.normal, rng.normal
  876. def statistic(x, y, axis):
  877. return stats.ttest_ind(x, y, axis=axis).statistic
  878. res = stats.monte_carlo_test(data, rvs, statistic, axis=-1)
  879. ref = stats.ttest_ind(data[0], [data[1]], axis=-1)
  880. assert_allclose(res.statistic, ref.statistic)
  881. assert_allclose(res.pvalue, ref.pvalue, rtol=2e-2)
  882. def test_against_f_oneway(self):
  883. # test that `monte_carlo_test` can reproduce results of `f_oneway`.
  884. rng = np.random.default_rng(219017667302737545)
  885. data = (rng.random(size=(2, 100)), rng.random(size=(2, 101)),
  886. rng.random(size=(2, 102)), rng.random(size=(2, 103)))
  887. rvs = rng.normal, rng.normal, rng.normal, rng.normal
  888. def statistic(*args, axis):
  889. return stats.f_oneway(*args, axis=axis).statistic
  890. res = stats.monte_carlo_test(data, rvs, statistic, axis=-1,
  891. alternative='greater')
  892. ref = stats.f_oneway(*data, axis=-1)
  893. assert_allclose(res.statistic, ref.statistic)
  894. assert_allclose(res.pvalue, ref.pvalue, atol=1e-2)
  895. @pytest.mark.fail_slow(2)
  896. @pytest.mark.xfail_on_32bit("Statistic may not depend on sample order on 32-bit")
  897. def test_finite_precision_statistic(self):
  898. # Some statistics return numerically distinct values when the values
  899. # should be equal in theory. Test that `monte_carlo_test` accounts
  900. # for this in some way.
  901. rng = np.random.default_rng(2549824598234528)
  902. n_resamples = 9999
  903. def rvs(size):
  904. return 1. * stats.bernoulli(p=0.333).rvs(size=size, random_state=rng)
  905. x = rvs(100)
  906. res = stats.monte_carlo_test(x, rvs, np.var, alternative='less',
  907. n_resamples=n_resamples)
  908. # show that having a tolerance matters
  909. c0 = np.sum(res.null_distribution <= res.statistic)
  910. c1 = np.sum(res.null_distribution <= res.statistic*(1+1e-15))
  911. assert c0 != c1
  912. assert res.pvalue == (c1 + 1)/(n_resamples + 1)
  913. @make_xp_test_case(power)
  914. class TestPower:
  915. def xp_normal(self, rng, *, xp, dtype=None):
  916. dtype = xp_default_dtype(xp) if dtype is None else dtype
  917. return lambda size: xp.asarray(rng.normal(size=size), dtype=dtype)
  918. def test_input_validation(self, xp):
  919. # test that the appropriate error messages are raised for invalid input
  920. rng = np.random.default_rng(8519895914314711673)
  921. test = stats.ttest_ind
  922. rvs = (self.xp_normal(rng, xp=xp), self.xp_normal(rng, xp=xp))
  923. n_observations = (xp.asarray(10), xp.asarray(12))
  924. message = "`vectorized` must be `True`, `False`, or `None`."
  925. with pytest.raises(ValueError, match=message):
  926. power(test, rvs, n_observations, vectorized=1.5)
  927. message = "`rvs` must be callable or sequence of callables."
  928. with pytest.raises(TypeError, match=message):
  929. power(test, None, n_observations)
  930. with pytest.raises(TypeError, match=message):
  931. power(test, (self.xp_normal(rng, xp=xp), 'ekki'), n_observations)
  932. message = "If `rvs` is a sequence..."
  933. with pytest.raises(ValueError, match=message):
  934. power(test, (self.xp_normal(rng, xp=xp),), n_observations)
  935. with pytest.raises(ValueError, match=message):
  936. power(test, rvs, (10,))
  937. message = "`significance` must contain floats between 0 and 1."
  938. with pytest.raises(ValueError, match=message):
  939. power(test, rvs, n_observations, significance=2)
  940. with pytest.raises(ValueError, match=message):
  941. power(test, rvs, n_observations, significance=xp.linspace(-1, 1, 50))
  942. message = "`kwargs` must be a dictionary"
  943. with pytest.raises(TypeError, match=message):
  944. power(test, rvs, n_observations, kwargs=(1, 2, 3))
  945. message = "not be broadcast|Chunks do not add|Incompatible shapes"
  946. with pytest.raises((ValueError, RuntimeError), match=message):
  947. power(test, rvs, (xp.asarray([10, 11]), xp.asarray([12, 13, 14])))
  948. with pytest.raises((ValueError, RuntimeError), match=message):
  949. power(test, rvs, (xp.asarray([10, 11]), xp.asarray([12, 13])),
  950. kwargs={'x': xp.asarray([1, 2, 3])})
  951. message = "`test` must be callable"
  952. with pytest.raises(TypeError, match=message):
  953. power(None, rvs, n_observations)
  954. message = "`n_resamples` must be a positive integer"
  955. with pytest.raises(ValueError, match=message):
  956. power(test, rvs, n_observations, n_resamples=-10)
  957. with pytest.raises(ValueError, match=message):
  958. power(test, rvs, n_observations, n_resamples=10.5)
  959. message = "`batch` must be a positive integer"
  960. with pytest.raises(ValueError, match=message):
  961. power(test, rvs, n_observations, batch=-10)
  962. with pytest.raises(ValueError, match=message):
  963. power(test, rvs, n_observations, batch=10.5)
  964. @pytest.mark.slow
  965. def test_batch(self, xp):
  966. # make sure that the `batch` parameter is respected by checking the
  967. # maximum batch size provided in calls to `test`
  968. rng = np.random.default_rng(23492340193)
  969. def test(x, axis):
  970. batch_size = 1 if x.ndim == 1 else x.shape[0]
  971. test.batch_size = max(batch_size, test.batch_size)
  972. test.counter += 1
  973. return stats.ttest_1samp(x, xp.asarray(0.), axis=axis).pvalue
  974. test.counter = 0
  975. test.batch_size = 0
  976. kwds = dict(test=test,
  977. n_observations=xp.asarray(10),
  978. n_resamples=1000)
  979. rng = np.random.default_rng(23492340193)
  980. res1 = power(**kwds, rvs=self.xp_normal(rng, xp=xp), batch=1)
  981. assert test.counter == 1000
  982. assert test.batch_size == 1
  983. rng = np.random.default_rng(23492340193)
  984. test.counter = 0
  985. res2 = power(**kwds, rvs=self.xp_normal(rng, xp=xp), batch=50)
  986. assert test.counter == 20
  987. assert test.batch_size == 50
  988. rng = np.random.default_rng(23492340193)
  989. test.counter = 0
  990. res3 = power(**kwds, rvs=self.xp_normal(rng, xp=xp), batch=1000)
  991. assert test.counter == 1
  992. assert test.batch_size == 1000
  993. xp_assert_equal(res1.power, res3.power)
  994. xp_assert_equal(res2.power, res3.power)
  995. @pytest.mark.slow
  996. def test_vectorization(self, xp):
  997. # Test that `power` is vectorized as expected
  998. rng = np.random.default_rng(25495254834552)
  999. alternatives = {-1: 'less', 0:'two-sided', 1: 'greater'}
  1000. # Single vectorized call
  1001. popmeans = xp.asarray([0, 0.2])
  1002. def test(x, alternative, axis=-1):
  1003. # ensure that popmeans axis is zeroth and orthogonal to the rest
  1004. popmeans_expanded = xpx.expand_dims(popmeans,
  1005. axis=tuple(range(1, x.ndim + 1)))
  1006. alternative = alternatives[int(alternative)]
  1007. return stats.ttest_1samp(x, popmeans_expanded, alternative=alternative,
  1008. axis=axis)
  1009. # nx and kwargs broadcast against one another
  1010. nx = xp.asarray([10, 15, 20, 50, 100])[:, xp.newaxis]
  1011. kwargs = {'alternative': xp.asarray([-1, 0, 1])}
  1012. # This dimension is added to the beginning
  1013. significance = xp.asarray([0.01, 0.025, 0.05, 0.1])
  1014. res = stats.power(test, self.xp_normal(rng, xp=xp), nx,
  1015. significance=significance, kwargs=kwargs)
  1016. # Looping over all combinations
  1017. ref = []
  1018. for significance_i in significance:
  1019. for i in range(nx.shape[0]):
  1020. nx_i = nx[i, ...]
  1021. for alternative_i in kwargs['alternative']:
  1022. for j in range(popmeans.shape[0]):
  1023. popmean_j = popmeans[j]
  1024. def test2(x, axis=-1):
  1025. return stats.ttest_1samp(x, popmean_j, axis=axis,
  1026. alternative=alternatives[int(alternative_i)])
  1027. tmp = stats.power(test2, self.xp_normal(rng, xp=xp), nx_i,
  1028. significance=significance_i)
  1029. ref.append(tmp.power)
  1030. ref = xp.reshape(xp.stack(ref), res.power.shape)
  1031. # Show that results are similar
  1032. xp_assert_close(res.power, ref, rtol=2e-2, atol=1e-2)
  1033. @pytest.mark.skip_xp_backends(cpu_only=True,
  1034. exceptions=['cupy', 'jax.numpy', 'dask.array'])
  1035. def test_ttest_ind_null(self, xp):
  1036. # Check that the p-values of `ttest_ind` are uniformly distributed under
  1037. # the null hypothesis
  1038. rng = np.random.default_rng(254952548345528)
  1039. test = stats.ttest_ind
  1040. n_observations = (xp.asarray(rng.integers(10, 100, size=(10))),
  1041. xp.asarray(rng.integers(10, 100, size=(10))))
  1042. rvs = self.xp_normal(rng, xp=xp), self.xp_normal(rng, xp=xp)
  1043. significance = xp.asarray([0.01, 0.05, 0.1])
  1044. res = stats.power(test, rvs, n_observations, significance=significance)
  1045. significance = xp.broadcast_to(significance[:, xp.newaxis], res.power.shape)
  1046. xp_assert_close(res.power, significance, atol=1e-2)
  1047. @pytest.mark.skip_xp_backends('array_api_strict',
  1048. reason='currently combines integer and float arrays')
  1049. @pytest.mark.skip_xp_backends(cpu_only=True,
  1050. exceptions=['cupy', 'jax.numpy', 'dask.array'])
  1051. def test_ttest_1samp_power(self, xp):
  1052. # Check simulated ttest_1samp power against reference
  1053. rng = np.random.default_rng(254952548345528)
  1054. # Reference values computed with statmodels
  1055. # import numpy as np
  1056. # from statsmodels.stats.power import tt_solve_power
  1057. # tt_solve_power = np.vectorize(tt_solve_power)
  1058. # tt_solve_power([0.1, 0.5, 0.9], [[10], [20]], [[[0.01]], [[0.05]]])
  1059. ref = [[[0.0126515 , 0.10269751, 0.40415802],
  1060. [0.01657775, 0.29734608, 0.86228288]],
  1061. [[0.0592903 , 0.29317561, 0.71718121],
  1062. [0.07094116, 0.56450441, 0.96815163]]]
  1063. kwargs = {'popmean': xp.asarray([0.1, 0.5, 0.9])}
  1064. n_observations = xp.asarray([[10], [20]])
  1065. significance = xp.asarray([0.01, 0.05])
  1066. res = stats.power(stats.ttest_1samp, self.xp_normal(rng, xp=xp), n_observations,
  1067. significance=significance, kwargs=kwargs)
  1068. xp_assert_close(res.power, xp.asarray(ref), atol=1e-2)
  1069. @make_xp_test_case(permutation_test)
  1070. class TestPermutationTest:
  1071. rtol = 1e-14
  1072. def setup_method(self):
  1073. self.rng = np.random.default_rng(7170559330470561044)
  1074. # -- Input validation -- #
  1075. def test_permutation_test_iv(self, xp):
  1076. def stat(x, y, axis):
  1077. return stats.ttest_ind((x, y), axis).statistic
  1078. data = (xp.asarray([1, 2, 3]), xp.asarray([1, 2, 3]))
  1079. message = "each sample in `data` must contain two or more ..."
  1080. with pytest.raises(ValueError, match=message):
  1081. permutation_test((data[0], xp.asarray([0])), stat)
  1082. message = "`data` must be a tuple containing at least two samples"
  1083. with pytest.raises(ValueError, match=message):
  1084. permutation_test((1,), stat)
  1085. with pytest.raises(TypeError, match=message):
  1086. permutation_test(1, stat)
  1087. message = "`axis` must be an integer."
  1088. with pytest.raises(ValueError, match=message):
  1089. permutation_test(data, stat, axis=1.5)
  1090. message = "`permutation_type` must be in..."
  1091. with pytest.raises(ValueError, match=message):
  1092. permutation_test(data, stat,
  1093. permutation_type="ekki")
  1094. message = "`vectorized` must be `True`, `False`, or `None`."
  1095. with pytest.raises(ValueError, match=message):
  1096. permutation_test(data, stat, vectorized=1.5)
  1097. message = "`n_resamples` must be a positive integer."
  1098. with pytest.raises(ValueError, match=message):
  1099. permutation_test(data, stat, n_resamples=-1000)
  1100. message = "`n_resamples` must be a positive integer."
  1101. with pytest.raises(ValueError, match=message):
  1102. permutation_test(data, stat, n_resamples=1000.5)
  1103. message = "`batch` must be a positive integer or None."
  1104. with pytest.raises(ValueError, match=message):
  1105. permutation_test(data, stat, batch=-1000)
  1106. message = "`batch` must be a positive integer or None."
  1107. with pytest.raises(ValueError, match=message):
  1108. permutation_test(data, stat, batch=1000.5)
  1109. message = "`alternative` must be in..."
  1110. with pytest.raises(ValueError, match=message):
  1111. permutation_test(data, stat, alternative='ekki')
  1112. message = "SeedSequence expects int or sequence of ints"
  1113. with pytest.raises(TypeError, match=message):
  1114. permutation_test(data, stat, rng='herring')
  1115. # -- Test Parameters -- #
  1116. # SPEC-007 leave one call with seed to check it still works
  1117. @pytest.mark.parametrize('random_state', [np.random.RandomState,
  1118. np.random.default_rng])
  1119. @pytest.mark.parametrize('permutation_type',
  1120. ['pairings', 'samples', 'independent'])
  1121. def test_batch(self, permutation_type, random_state, xp):
  1122. # make sure that the `batch` parameter is respected by checking the
  1123. # maximum batch size provided in calls to `statistic`
  1124. x = xp.asarray(self.rng.random(10))
  1125. y = xp.asarray(self.rng.random(10))
  1126. def statistic(x, y, axis):
  1127. batch_size = 1 if x.ndim == 1 else x.shape[0]
  1128. statistic.batch_size = max(batch_size, statistic.batch_size)
  1129. statistic.counter += 1
  1130. return xp.mean(x, axis=axis) - xp.mean(y, axis=axis)
  1131. statistic.counter = 0
  1132. statistic.batch_size = 0
  1133. kwds = {'n_resamples': 1000, 'permutation_type': permutation_type,
  1134. 'vectorized': True}
  1135. res1 = stats.permutation_test((x, y), statistic, batch=1,
  1136. random_state=random_state(0), **kwds)
  1137. assert statistic.counter == 1001
  1138. assert statistic.batch_size == 1
  1139. statistic.counter = 0
  1140. res2 = stats.permutation_test((x, y), statistic, batch=50,
  1141. random_state=random_state(0), **kwds)
  1142. assert statistic.counter == 21
  1143. assert statistic.batch_size == 50
  1144. statistic.counter = 0
  1145. res3 = stats.permutation_test((x, y), statistic, batch=1000,
  1146. random_state=random_state(0), **kwds)
  1147. assert statistic.counter == 2
  1148. assert statistic.batch_size == 1000
  1149. xp_assert_equal(res1.pvalue, res3.pvalue)
  1150. xp_assert_equal(res2.pvalue, res3.pvalue)
  1151. # SPEC-007 leave at least one call with seed to check it still works
  1152. @pytest.mark.parametrize('random_state', [np.random.RandomState,
  1153. np.random.default_rng])
  1154. @pytest.mark.parametrize('permutation_type, exact_size',
  1155. [('pairings', special.factorial(3)**2),
  1156. ('samples', 2**3),
  1157. ('independent', special.binom(6, 3))])
  1158. def test_permutations(self, permutation_type, exact_size, random_state, xp):
  1159. # make sure that the `permutations` parameter is respected by checking
  1160. # the size of the null distribution
  1161. x = xp.asarray(self.rng.random(3))
  1162. y = xp.asarray(self.rng.random(3))
  1163. def statistic(x, y, axis):
  1164. return xp.mean(x, axis=axis) - xp.mean(y, axis=axis)
  1165. kwds = {'permutation_type': permutation_type,
  1166. 'vectorized': True}
  1167. res = stats.permutation_test((x, y), statistic, n_resamples=3,
  1168. random_state=random_state(0), **kwds)
  1169. assert xp_size(res.null_distribution) == 3
  1170. res = stats.permutation_test((x, y), statistic, **kwds)
  1171. assert xp_size(res.null_distribution) == exact_size
  1172. # -- Randomized Permutation Tests -- #
  1173. # To get reasonable accuracy, these next three tests are somewhat slow.
  1174. # Originally, I had them passing for all combinations of permutation type,
  1175. # alternative, and RNG, but that takes too long for CI. Instead, split
  1176. # into three tests, each testing a particular combination of the three
  1177. # parameters.
  1178. def test_randomized_test_against_exact_both(self, xp):
  1179. # check that the randomized and exact tests agree to reasonable
  1180. # precision for permutation_type='both
  1181. alternative, rng = 'less', 0
  1182. nx, ny, permutations = 8, 9, 24000
  1183. assert special.binom(nx + ny, nx) > permutations
  1184. rng = np.random.default_rng(8235259808)
  1185. x = xp.asarray(rng.standard_normal(size=nx))
  1186. y = xp.asarray(rng.standard_normal(size=ny))
  1187. data = x, y
  1188. def statistic(x, y, axis):
  1189. return xp.mean(x, axis=axis) - xp.mean(y, axis=axis)
  1190. kwds = {'vectorized': True, 'permutation_type': 'independent',
  1191. 'batch': 100, 'alternative': alternative, 'rng': rng}
  1192. res = permutation_test(data, statistic, n_resamples=permutations, **kwds)
  1193. res2 = permutation_test(data, statistic, n_resamples=xp.inf, **kwds)
  1194. assert res.statistic == res2.statistic
  1195. xp_assert_close(res.pvalue, res2.pvalue, atol=1e-2)
  1196. @pytest.mark.slow()
  1197. def test_randomized_test_against_exact_samples(self, xp):
  1198. # check that the randomized and exact tests agree to reasonable
  1199. # precision for permutation_type='samples'
  1200. alternative, rng = 'greater', None
  1201. nx, ny, permutations = 15, 15, 32000
  1202. assert 2**nx > permutations
  1203. rng = np.random.default_rng(8235259808)
  1204. x = xp.asarray(rng.standard_normal(size=nx))
  1205. y = xp.asarray(rng.standard_normal(size=ny))
  1206. data = x, y
  1207. def statistic(x, y, axis):
  1208. return xp.mean(x - y, axis=axis)
  1209. kwds = {'vectorized': True, 'permutation_type': 'samples',
  1210. 'batch': 100, 'alternative': alternative, 'rng': rng}
  1211. res = permutation_test(data, statistic, n_resamples=permutations, **kwds)
  1212. res2 = permutation_test(data, statistic, n_resamples=xp.inf, **kwds)
  1213. assert res.statistic == res2.statistic
  1214. xp_assert_close(res.pvalue, res2.pvalue, atol=1e-2)
  1215. # I only need to skip torch on GPU because it doesn't have betaincc for pearsonr
  1216. @pytest.mark.skip_xp_backends(cpu_only=True, exceptions=['cupy', 'jax.numpy'])
  1217. def test_randomized_test_against_exact_pairings(self, xp):
  1218. # check that the randomized and exact tests agree to reasonable
  1219. # precision for permutation_type='pairings'
  1220. alternative, rng = 'two-sided', self.rng
  1221. nx, ny, permutations = 8, 8, 40000
  1222. assert special.factorial(nx) > permutations
  1223. rng = np.random.default_rng(8235259808)
  1224. x = xp.asarray(rng.standard_normal(size=nx))
  1225. y = xp.asarray(rng.standard_normal(size=ny))
  1226. data = [x]
  1227. def statistic(x, axis):
  1228. return stats.pearsonr(x, y, axis=axis).statistic
  1229. kwds = {'vectorized': True, 'permutation_type': 'samples',
  1230. 'batch': 100, 'alternative': alternative, 'rng': rng}
  1231. res = permutation_test(data, statistic, n_resamples=permutations, **kwds)
  1232. res2 = permutation_test(data, statistic, n_resamples=xp.inf, **kwds)
  1233. assert res.statistic == res2.statistic
  1234. xp_assert_close(res.pvalue, res2.pvalue, atol=1e-2)
  1235. # -- Independent (Unpaired) Sample Tests -- #
  1236. @pytest.mark.skip_xp_backends(eager_only=True) # TODO: change to jax_jit=False
  1237. @pytest.mark.parametrize('alternative', ("less", "greater", "two-sided"))
  1238. def test_against_ks_2samp(self, alternative, xp):
  1239. x = self.rng.normal(size=4, scale=1)
  1240. y = self.rng.normal(size=5, loc=3, scale=3)
  1241. expected = stats.ks_2samp(x, y, alternative=alternative, mode='exact')
  1242. def statistic(x, y, axis):
  1243. # todo: use `xp` as backend when `ks_2samp` is translated to array API
  1244. x, y = _xp_copy_to_numpy(x), _xp_copy_to_numpy(y)
  1245. res = stats.ks_2samp(x, y, axis=axis, mode='asymp', alternative=alternative)
  1246. res = xp.asarray(res.statistic)
  1247. return res[()] if res.ndim == 0 else res
  1248. # ks_2samp is always a one-tailed 'greater' test
  1249. # it's the statistic that changes (D+ vs D- vs max(D+, D-))
  1250. x, y = xp.asarray(x), xp.asarray(y)
  1251. res = permutation_test((x, y), statistic, n_resamples=np.inf,
  1252. alternative='greater', rng=self.rng)
  1253. xp_assert_close(res.statistic, xp.asarray(expected.statistic), rtol=self.rtol)
  1254. xp_assert_close(res.pvalue, xp.asarray(expected.pvalue), rtol=self.rtol)
  1255. @pytest.mark.skip_xp_backends(eager_only=True) # TODO: change to jax_jit=False
  1256. @pytest.mark.parametrize('alternative', ("less", "greater", "two-sided"))
  1257. def test_against_ansari(self, alternative, xp):
  1258. x = self.rng.normal(size=4, scale=1)
  1259. y = self.rng.normal(size=5, scale=3)
  1260. # ansari has a different convention for 'alternative'
  1261. alternative_correspondence = {"less": "greater",
  1262. "greater": "less",
  1263. "two-sided": "two-sided"}
  1264. alternative_scipy = alternative_correspondence[alternative]
  1265. expected = stats.ansari(x, y, alternative=alternative_scipy)
  1266. def statistic(x, y, axis):
  1267. # todo: use `xp` as backend when `ansari` is translated to array API
  1268. x, y = _xp_copy_to_numpy(x), _xp_copy_to_numpy(y)
  1269. res = stats.ansari(x, y, axis=axis)
  1270. res = xp.asarray(res.statistic)
  1271. return res[()] if res.ndim == 0 else res
  1272. x, y = xp.asarray(x), xp.asarray(y)
  1273. res = permutation_test((x, y), statistic, n_resamples=np.inf,
  1274. alternative=alternative, rng=self.rng)
  1275. xp_assert_close(res.statistic, xp.asarray(expected.statistic), rtol=self.rtol)
  1276. xp_assert_close(res.pvalue, xp.asarray(expected.pvalue), rtol=self.rtol)
  1277. @skip_xp_backends('cupy', reason='needs mannwhitneyu')
  1278. @skip_xp_backends(eager_only=True) # mannwhitneyu does input validation
  1279. @pytest.mark.parametrize('alternative', ("less", "greater", "two-sided"))
  1280. def test_against_mannwhitneyu(self, alternative, xp):
  1281. x = stats.uniform.rvs(size=(3, 5, 2), loc=0, random_state=self.rng)
  1282. y = stats.uniform.rvs(size=(3, 5, 2), loc=0.05, random_state=self.rng)
  1283. expected = stats.mannwhitneyu(x, y, axis=1, alternative=alternative)
  1284. def statistic(x, y, axis):
  1285. return stats.mannwhitneyu(x, y, axis=axis, method='asymptotic').statistic
  1286. x, y = xp.asarray(x), xp.asarray(y)
  1287. res = permutation_test((x, y), statistic, vectorized=True,
  1288. n_resamples=xp.inf, alternative=alternative,
  1289. axis=1, rng=self.rng)
  1290. xp_assert_close(res.statistic, xp.asarray(expected.statistic), rtol=self.rtol)
  1291. xp_assert_close(res.pvalue, xp.asarray(expected.pvalue), rtol=self.rtol)
  1292. @skip_xp_backends('cupy', reason='needs cramervonmises_2samp')
  1293. @skip_xp_backends(eager_only=True) # cramervonmises_2samp does input validation
  1294. @skip_xp_backends(cpu_only=True) # torch doesn't have `kv`
  1295. def test_against_cvm(self, xp):
  1296. x = stats.norm.rvs(size=4, scale=1, random_state=self.rng)
  1297. y = stats.norm.rvs(size=5, loc=3, scale=3, random_state=self.rng)
  1298. expected = stats.cramervonmises_2samp(x, y, method='exact')
  1299. def statistic(x, y, axis):
  1300. res = stats.cramervonmises_2samp(x, y, axis=axis, method='asymptotic')
  1301. return res.statistic
  1302. # cramervonmises_2samp has only one alternative, greater
  1303. x, y = xp.asarray(x), xp.asarray(y)
  1304. res = permutation_test((x, y), statistic, n_resamples=np.inf,
  1305. alternative='greater', rng=self.rng)
  1306. xp_assert_close(res.statistic, xp.asarray(expected.statistic), rtol=self.rtol)
  1307. xp_assert_close(res.pvalue, xp.asarray(expected.pvalue), rtol=self.rtol)
  1308. @skip_xp_backends('cupy', reason='needs kruskal')
  1309. @skip_xp_backends(eager_only=True) # kruskal does input validation
  1310. @pytest.mark.parametrize('axis', (-1, 2))
  1311. def test_vectorized_nsamp_ptype_both(self, axis, xp):
  1312. # statistic only available for NumPy
  1313. # Test that permutation_test with permutation_type='independent' works
  1314. # properly for a 3-sample statistic with nd array samples of different
  1315. # (but compatible) shapes and ndims. Show that exact permutation test
  1316. # and random permutation tests approximate SciPy's asymptotic pvalues
  1317. # and that exact and random permutation test results are even closer
  1318. # to one another (than they are to the asymptotic results).
  1319. # Three samples, different (but compatible) shapes with different ndims
  1320. rng = np.random.default_rng(6709265303529651545)
  1321. x = rng.random(size=(3))
  1322. y = rng.random(size=(1, 3, 2))
  1323. z = rng.random(size=(2, 1, 4))
  1324. data = (x, y, z)
  1325. expected = stats.kruskal(*data, axis=axis)
  1326. # Define the statistic (and pvalue for comparison)
  1327. def statistic(*data, axis):
  1328. return stats.kruskal(*data, axis=axis).statistic
  1329. # Calculate exact and randomized permutation results
  1330. kwds = {'axis': axis, 'alternative': 'greater',
  1331. 'permutation_type': 'independent', 'rng': rng}
  1332. data = [xp.asarray(data_) for data_ in data]
  1333. res = permutation_test(data, statistic, n_resamples=xp.inf, **kwds)
  1334. res2 = permutation_test(data, statistic, n_resamples=1000, **kwds)
  1335. # Check results
  1336. xp_assert_close(res.statistic, xp.asarray(expected.statistic), rtol=self.rtol*5)
  1337. xp_assert_close(res.statistic, res2.statistic, rtol=self.rtol*5)
  1338. xp_assert_close(res.pvalue, xp.asarray(expected.pvalue), atol=6e-2)
  1339. xp_assert_close(res.pvalue, res2.pvalue, atol=3e-2)
  1340. # -- Paired-Sample Tests -- #
  1341. @pytest.mark.skip_xp_backends(eager_only=True) # TODO: change to jax_jit=False
  1342. @pytest.mark.parametrize('alternative', ("less", "greater", "two-sided"))
  1343. def test_against_wilcoxon(self, alternative, xp):
  1344. x = stats.uniform.rvs(size=(3, 6, 2), loc=0, random_state=self.rng)
  1345. y = stats.uniform.rvs(size=(3, 6, 2), loc=0.05, random_state=self.rng)
  1346. expected = stats.wilcoxon(x, y, alternative=alternative, axis=1)
  1347. # We'll check both 1- and 2-sample versions of the same test;
  1348. # we expect identical results to wilcoxon in all cases.
  1349. def statistic_1samp_1d(z, axis):
  1350. # todo: use `xp` as backend when `wilcoxon` is translated to array API
  1351. # 'less' ensures we get the same of two statistics every time
  1352. z = _xp_copy_to_numpy(z)
  1353. res = stats.wilcoxon(z, alternative='less', axis=axis)
  1354. res = xp.asarray(res.statistic)
  1355. return res[()] if res.ndim == 0 else res
  1356. def statistic_2samp_1d(x, y, axis):
  1357. # todo: use `xp` as backend when `wilcoxon` is translated to array API
  1358. x, y = _xp_copy_to_numpy(x), _xp_copy_to_numpy(y)
  1359. res = stats.wilcoxon(x, y, alternative='less', axis=axis)
  1360. res = xp.asarray(res.statistic)
  1361. return res[()] if res.ndim == 0 else res
  1362. x, y = xp.asarray(x), xp.asarray(y)
  1363. kwds = {'axis': 1, 'alternative': alternative, 'permutation_type': 'samples',
  1364. 'rng': self.rng, 'n_resamples': np.inf}
  1365. res1 = permutation_test((x-y,), statistic_1samp_1d, **kwds)
  1366. res2 = permutation_test((x, y), statistic_2samp_1d, **kwds)
  1367. # `wilcoxon` returns a different statistic with 'two-sided'
  1368. xp_assert_close(res1.statistic, res2.statistic, rtol=self.rtol)
  1369. if alternative != 'two-sided':
  1370. xp_assert_close(res2.statistic, xp.asarray(expected.statistic),
  1371. rtol=self.rtol)
  1372. xp_assert_close(res2.pvalue, xp.asarray(expected.pvalue), rtol=self.rtol)
  1373. xp_assert_close(res1.pvalue, res2.pvalue, rtol=self.rtol)
  1374. @pytest.mark.parametrize('alternative', ("less", "greater", "two-sided"))
  1375. def test_against_binomtest(self, alternative, xp):
  1376. x = self.rng.integers(0, 2, size=10)
  1377. x[x == 0] = -1
  1378. # More naturally, the test would flip elements between 0 and one.
  1379. # However, permutation_test will flip the _signs_ of the elements.
  1380. # So we have to work with +1/-1 instead of 1/0.
  1381. def statistic(x, axis=0):
  1382. xp_ = array_namespace(x)
  1383. return xp_.count_nonzero(x > 0, axis=axis)
  1384. k, n, p = statistic(x), 10, 0.5
  1385. expected = stats.binomtest(k, n, p, alternative=alternative)
  1386. res = stats.permutation_test((xp.asarray(x, dtype=xp.float64),), statistic,
  1387. vectorized=True,
  1388. permutation_type='samples', n_resamples=xp.inf,
  1389. rng=self.rng, alternative=alternative)
  1390. xp_assert_close(res.pvalue, xp.asarray(expected.pvalue), rtol=self.rtol)
  1391. # -- Exact Association Tests -- #
  1392. @pytest.mark.skip_xp_backends(eager_only=True) # TODO: change to jax_jit=False
  1393. def test_against_kendalltau(self, xp):
  1394. x = self.rng.normal(size=6)
  1395. y = x + self.rng.normal(size=6)
  1396. expected = stats.kendalltau(x, y, method='exact')
  1397. def statistic(x, axis):
  1398. # todo: use `xp` as backend when `kendalltau` is translated to array API
  1399. x = _xp_copy_to_numpy(x)
  1400. res = stats.kendalltau(x, y, method='asymptotic', axis=axis)
  1401. res = xp.asarray(res.statistic)
  1402. return res[()] if res.ndim == 0 else res
  1403. # kendalltau currently has only one alternative, two-sided
  1404. x = xp.asarray(x)
  1405. res = permutation_test((x,), statistic, permutation_type='pairings',
  1406. n_resamples=np.inf, rng=self.rng)
  1407. xp_assert_close(res.statistic, xp.asarray(expected.statistic), rtol=self.rtol)
  1408. xp_assert_close(res.pvalue, xp.asarray(expected.pvalue), rtol=self.rtol)
  1409. @pytest.mark.parametrize('alternative', ('less', 'greater', 'two-sided'))
  1410. def test_against_fisher_exact(self, alternative, xp):
  1411. # x and y are binary random variables with some dependence
  1412. rng = np.random.default_rng(6235696159000529929)
  1413. x = (rng.random(7) > 0.6).astype(float)
  1414. y = (rng.random(7) + 0.25*x > 0.6).astype(float)
  1415. tab = stats.contingency.crosstab(x, y)[1]
  1416. x, y = xp.asarray(x), xp.asarray(y)
  1417. def statistic(x, axis):
  1418. return xp.count_nonzero((x == 1) & (y == 1), axis=axis)
  1419. res = permutation_test((x,), statistic, permutation_type='pairings',
  1420. n_resamples=xp.inf, alternative=alternative,
  1421. rng=rng)
  1422. res2 = stats.fisher_exact(tab, alternative=alternative)
  1423. xp_assert_close(res.pvalue, xp.asarray(res2.pvalue, dtype=x.dtype))
  1424. @pytest.mark.xslow()
  1425. @pytest.mark.parametrize('axis', (-2, 1))
  1426. def test_vectorized_nsamp_ptype_samples(self, axis):
  1427. # statistic only available for NumPy, and it's a pain to vectorize
  1428. # Test that permutation_test with permutation_type='samples' works
  1429. # properly for a 3-sample statistic with nd array samples of different
  1430. # (but compatible) shapes and ndims. Show that exact permutation test
  1431. # reproduces SciPy's exact pvalue and that random permutation test
  1432. # approximates it.
  1433. x = self.rng.random(size=(2, 4, 3))
  1434. y = self.rng.random(size=(1, 4, 3))
  1435. z = self.rng.random(size=(2, 4, 1))
  1436. x = stats.rankdata(x, axis=axis)
  1437. y = stats.rankdata(y, axis=axis)
  1438. z = stats.rankdata(z, axis=axis)
  1439. y = y[0] # to check broadcast with different ndim
  1440. data = (x, y, z)
  1441. def statistic1d(*data):
  1442. return stats.page_trend_test(data, ranked=True,
  1443. method='asymptotic').statistic
  1444. def pvalue1d(*data):
  1445. return stats.page_trend_test(data, ranked=True,
  1446. method='exact').pvalue
  1447. statistic = _resampling._vectorize_statistic(statistic1d)
  1448. pvalue = _resampling._vectorize_statistic(pvalue1d)
  1449. expected_statistic = statistic(*np.broadcast_arrays(*data), axis=axis)
  1450. expected_pvalue = pvalue(*np.broadcast_arrays(*data), axis=axis)
  1451. # Let's forgive this use of an integer seed, please.
  1452. kwds = {'vectorized': False, 'axis': axis, 'alternative': 'greater',
  1453. 'permutation_type': 'pairings', 'rng': 0}
  1454. res = permutation_test(data, statistic1d, n_resamples=np.inf, **kwds)
  1455. res2 = permutation_test(data, statistic1d, n_resamples=5000, **kwds)
  1456. assert_allclose(res.statistic, expected_statistic, rtol=self.rtol)
  1457. assert_allclose(res.statistic, res2.statistic, rtol=self.rtol)
  1458. assert_allclose(res.pvalue, expected_pvalue, rtol=self.rtol)
  1459. assert_allclose(res.pvalue, res2.pvalue, atol=3e-2)
  1460. # -- Test Against External References -- #
  1461. tie_case_1 = {'x': [1, 2, 3, 4], 'y': [1.5, 2, 2.5],
  1462. 'expected_less': 0.2000000000,
  1463. 'expected_2sided': 0.4, # 2*expected_less
  1464. 'expected_Pr_gte_S_mean': 0.3428571429, # see note below
  1465. 'expected_statistic': 7.5,
  1466. 'expected_avg': 9.142857, 'expected_std': 1.40698}
  1467. tie_case_2 = {'x': [111, 107, 100, 99, 102, 106, 109, 108],
  1468. 'y': [107, 108, 106, 98, 105, 103, 110, 105, 104],
  1469. 'expected_less': 0.1555738379,
  1470. 'expected_2sided': 0.3111476758,
  1471. 'expected_Pr_gte_S_mean': 0.2969971205, # see note below
  1472. 'expected_statistic': 32.5,
  1473. 'expected_avg': 38.117647, 'expected_std': 5.172124}
  1474. @pytest.mark.skip_xp_backends(eager_only=True) # TODO: change to jax_jit=False
  1475. @pytest.mark.xslow() # only the second case is slow, really
  1476. @pytest.mark.parametrize('case', (tie_case_1, tie_case_2))
  1477. def test_with_ties(self, case, xp):
  1478. """
  1479. Results above from SAS PROC NPAR1WAY, e.g.
  1480. DATA myData;
  1481. INPUT X Y;
  1482. CARDS;
  1483. 1 1
  1484. 1 2
  1485. 1 3
  1486. 1 4
  1487. 2 1.5
  1488. 2 2
  1489. 2 2.5
  1490. ods graphics on;
  1491. proc npar1way AB data=myData;
  1492. class X;
  1493. EXACT;
  1494. run;
  1495. ods graphics off;
  1496. Note: SAS provides Pr >= |S-Mean|, which is different from our
  1497. definition of a two-sided p-value.
  1498. """
  1499. x = case['x']
  1500. y = case['y']
  1501. expected_statistic = xp.asarray(case['expected_statistic'])
  1502. expected_less = xp.asarray(case['expected_less'])
  1503. expected_2sided = xp.asarray(case['expected_2sided'])
  1504. expected_Pr_gte_S_mean = xp.asarray(case['expected_Pr_gte_S_mean'])
  1505. expected_avg = xp.asarray(case['expected_avg'])
  1506. expected_std = xp.asarray(case['expected_std'])
  1507. def statistic(x, y, axis):
  1508. # todo: use `xp` as backend when `ansari` is translated to array API
  1509. x, y = _xp_copy_to_numpy(x), _xp_copy_to_numpy(y)
  1510. res = stats.ansari(x, y, axis=axis)
  1511. res = xp.asarray(res.statistic)
  1512. return res[()] if res.ndim == 0 else res
  1513. dtype = xp_default_dtype(xp)
  1514. x, y = xp.asarray(x, dtype=dtype), xp.asarray(y, dtype=dtype)
  1515. with warnings.catch_warnings():
  1516. warnings.filterwarnings(
  1517. "ignore", "Ties preclude use of exact statistic", UserWarning)
  1518. res = permutation_test((x, y), statistic, n_resamples=np.inf,
  1519. alternative='less')
  1520. res2 = permutation_test((x, y), statistic, n_resamples=np.inf,
  1521. alternative='two-sided')
  1522. xp_assert_close(res.statistic, expected_statistic, rtol=self.rtol)
  1523. xp_assert_close(res.pvalue, expected_less, atol=1e-10)
  1524. xp_assert_close(res2.pvalue, expected_2sided, atol=1e-10)
  1525. xp_assert_close(xp.mean(res2.null_distribution), expected_avg, rtol=1e-6)
  1526. xp_assert_close(xp.std(res2.null_distribution), expected_std, rtol=1e-6)
  1527. # SAS provides Pr >= |S-Mean|; might as well check against that, too
  1528. S = res.statistic
  1529. mean = xp.mean(res.null_distribution)
  1530. n = res.null_distribution.shape[0]
  1531. Pr_gte_S_mean = xp.astype(xp.count_nonzero(
  1532. xp.abs(res.null_distribution-mean) >= xp.abs(S-mean)), S.dtype) / n
  1533. xp_assert_close(Pr_gte_S_mean, expected_Pr_gte_S_mean)
  1534. @pytest.mark.slow
  1535. @pytest.mark.parametrize('alternative, expected_pvalue',
  1536. (('less', 0.9708333333333),
  1537. ('greater', 0.05138888888889),
  1538. ('two-sided', 0.1027777777778)))
  1539. # I only need to skip torch on GPU because it doesn't have betaincc for pearsonr
  1540. @pytest.mark.skip_xp_backends(cpu_only=True, exceptions=['cupy', 'jax.numpy'])
  1541. @pytest.mark.skip_xp_backends(eager_only=True) # TODO: change to jax_jit=False
  1542. def test_against_spearmanr_in_R(self, alternative, expected_pvalue, xp):
  1543. """
  1544. Results above from R cor.test, e.g.
  1545. options(digits=16)
  1546. x <- c(1.76405235, 0.40015721, 0.97873798,
  1547. 2.2408932, 1.86755799, -0.97727788)
  1548. y <- c(2.71414076, 0.2488, 0.87551913,
  1549. 2.6514917, 2.01160156, 0.47699563)
  1550. cor.test(x, y, method = "spearm", alternative = "t")
  1551. """
  1552. # data comes from
  1553. # np.random.seed(0)
  1554. # x = stats.norm.rvs(size=6)
  1555. # y = x + stats.norm.rvs(size=6)
  1556. x = xp.asarray([1.76405235, 0.40015721, 0.97873798,
  1557. 2.2408932, 1.86755799, -0.97727788])
  1558. y = xp.asarray([2.71414076, 0.2488, 0.87551913,
  1559. 2.6514917, 2.01160156, 0.47699563])
  1560. expected_statistic = 0.7714285714285715
  1561. y = xp.asarray(stats.rankdata(_xp_copy_to_numpy(y)))
  1562. def statistic(x, axis):
  1563. # `spearmanr` is not array api compatible, but `pearsonr` is. So for now
  1564. # use _xp_copy_to_numpy just for ranking so we can run this test w/ CuPy.
  1565. # TODO: use `xp` as backend when cupy works with `rankdata`
  1566. x = xp.asarray(stats.rankdata(_xp_copy_to_numpy(x), axis=axis))
  1567. return stats.pearsonr(x, y, axis=axis).statistic
  1568. res = permutation_test((x,), statistic, permutation_type='pairings',
  1569. n_resamples=xp.inf, alternative=alternative)
  1570. xp_assert_close(res.statistic, xp.asarray(expected_statistic), rtol=self.rtol)
  1571. xp_assert_close(res.pvalue, xp.asarray(expected_pvalue), atol=1e-13)
  1572. @pytest.mark.parametrize("batch", (-1, 0))
  1573. def test_batch_generator_iv(self, batch):
  1574. with pytest.raises(ValueError, match="`batch` must be positive."):
  1575. list(_resampling._batch_generator([1, 2, 3], batch))
  1576. batch_generator_cases = [(range(0), 3, []),
  1577. (range(6), 3, [[0, 1, 2], [3, 4, 5]]),
  1578. (range(8), 3, [[0, 1, 2], [3, 4, 5], [6, 7]])]
  1579. @pytest.mark.parametrize("iterable, batch, expected",
  1580. batch_generator_cases)
  1581. def test_batch_generator(self, iterable, batch, expected):
  1582. got = list(_resampling._batch_generator(iterable, batch))
  1583. assert got == expected
  1584. @pytest.mark.fail_slow(2)
  1585. # I only need to skip torch on GPU because it doesn't have betaincc for pearsonr
  1586. @pytest.mark.skip_xp_backends(cpu_only=True, exceptions=['cupy', 'jax.numpy'])
  1587. def test_finite_precision_statistic(self, xp):
  1588. # Some statistics return numerically distinct values when the values
  1589. # should be equal in theory. Test that `permutation_test` accounts
  1590. # for this in some way.
  1591. x = xp.asarray([1., 2., 4., 3.], dtype=xp.float64)
  1592. y = xp.asarray([2., 4., 6., 8.], dtype=xp.float64)
  1593. def statistic(x, y, axis):
  1594. return stats.pearsonr(x, y, axis=axis)[0]
  1595. res = stats.permutation_test((x, y), statistic,
  1596. permutation_type='pairings')
  1597. r, pvalue, null = res.statistic, res.pvalue, res.null_distribution
  1598. correct_p = 2 * float(xp.count_nonzero(null >= r - 1e-14)) / null.shape[0]
  1599. assert pvalue == correct_p == 1/3
  1600. # Compare against other exact correlation tests using R corr.test
  1601. # options(digits=16)
  1602. # x = c(1, 2, 4, 3)
  1603. # y = c(2, 4, 6, 8)
  1604. # cor.test(x, y, alternative = "t", method = "spearman") # 0.333333333
  1605. # cor.test(x, y, alternative = "t", method = "kendall") # 0.333333333
  1606. def test_all_partitions_concatenated():
  1607. # make sure that _all_paritions_concatenated produces the correct number
  1608. # of partitions of the data into samples of the given sizes and that
  1609. # all are unique
  1610. n = np.array([3, 2, 4], dtype=int)
  1611. nc = np.cumsum(n)
  1612. all_partitions = set()
  1613. counter = 0
  1614. for partition_concatenated in _resampling._all_partitions_concatenated(n):
  1615. counter += 1
  1616. partitioning = np.split(partition_concatenated, nc[:-1])
  1617. all_partitions.add(tuple([frozenset(i) for i in partitioning]))
  1618. expected = np.prod([special.binom(sum(n[i:]), sum(n[i+1:]))
  1619. for i in range(len(n)-1)])
  1620. assert_equal(counter, expected)
  1621. assert_equal(len(all_partitions), expected)
  1622. @pytest.mark.parametrize('fun_name',
  1623. ['bootstrap', 'permutation_test', 'monte_carlo_test'])
  1624. def test_parameter_vectorized(fun_name):
  1625. # Check that parameter `vectorized` is working as desired for all
  1626. # resampling functions. Results don't matter; just don't fail asserts.
  1627. rng = np.random.default_rng(75245098234592)
  1628. sample = rng.random(size=10)
  1629. def rvs(size): # needed by `monte_carlo_test`
  1630. return stats.norm.rvs(size=size, random_state=rng)
  1631. fun_options = {'bootstrap': {'data': (sample,), 'rng': rng,
  1632. 'method': 'percentile'},
  1633. 'permutation_test': {'data': (sample,), 'rng': rng,
  1634. 'permutation_type': 'samples'},
  1635. 'monte_carlo_test': {'sample': sample, 'rvs': rvs}}
  1636. common_options = {'n_resamples': 100}
  1637. fun = getattr(stats, fun_name)
  1638. options = fun_options[fun_name]
  1639. options.update(common_options)
  1640. def statistic(x, axis):
  1641. assert x.ndim > 1 or np.array_equal(x, sample)
  1642. return np.mean(x, axis=axis)
  1643. fun(statistic=statistic, vectorized=None, **options)
  1644. fun(statistic=statistic, vectorized=True, **options)
  1645. def statistic(x):
  1646. assert x.ndim == 1
  1647. return np.mean(x)
  1648. fun(statistic=statistic, vectorized=None, **options)
  1649. fun(statistic=statistic, vectorized=False, **options)
  1650. class TestMonteCarloMethod:
  1651. def test_rvs_and_random_state(self):
  1652. message = "Use of `rvs` and `rng` are mutually exclusive."
  1653. rng = np.random.default_rng(34982345)
  1654. with pytest.raises(ValueError, match=message):
  1655. stats.MonteCarloMethod(rvs=rng.random, rng=rng)