_hypotests.py 85 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160
  1. from collections import namedtuple
  2. from dataclasses import dataclass
  3. import math
  4. import numpy as np
  5. import warnings
  6. from itertools import combinations
  7. import scipy.stats
  8. from scipy.optimize import shgo
  9. from . import distributions
  10. from ._common import ConfidenceInterval
  11. from ._continuous_distns import norm
  12. from scipy._lib._array_api import (xp_capabilities, array_namespace, xp_size,
  13. xp_promote, xp_result_type, xp_copy, is_numpy)
  14. import scipy._lib.array_api_extra as xpx
  15. from scipy.special import gamma, kv, gammaln
  16. from scipy.fft import ifft
  17. from ._stats_pythran import _a_ij_Aij_Dij2
  18. from ._stats_pythran import (
  19. _concordant_pairs as _P, _discordant_pairs as _Q
  20. )
  21. from ._axis_nan_policy import _axis_nan_policy_factory
  22. from scipy.stats import _stats_py
  23. __all__ = ['epps_singleton_2samp', 'cramervonmises', 'somersd',
  24. 'barnard_exact', 'boschloo_exact', 'cramervonmises_2samp',
  25. 'tukey_hsd', 'poisson_means_test']
  26. Epps_Singleton_2sampResult = namedtuple('Epps_Singleton_2sampResult',
  27. ('statistic', 'pvalue'))
  28. @xp_capabilities(skip_backends=[("dask.array", "lazy -> no _axis_nan_policy"),
  29. ("jax.numpy", "lazy -> no _axis_nan_policy")])
  30. @_axis_nan_policy_factory(Epps_Singleton_2sampResult, n_samples=2, too_small=4)
  31. def epps_singleton_2samp(x, y, t=(0.4, 0.8), *, axis=0):
  32. """Compute the Epps-Singleton (ES) test statistic.
  33. Test the null hypothesis that two samples have the same underlying
  34. probability distribution.
  35. Parameters
  36. ----------
  37. x, y : array-like
  38. The two samples of observations to be tested. Input must not have more
  39. than one dimension. Samples can have different lengths, but both
  40. must have at least five observations.
  41. t : array-like, optional
  42. The points (t1, ..., tn) where the empirical characteristic function is
  43. to be evaluated. It should be positive distinct numbers. The default
  44. value (0.4, 0.8) is proposed in [1]_. Input must not have more than
  45. one dimension.
  46. axis : int or tuple of ints, default: 0
  47. If an int or tuple of ints, the axis or axes of the input along which
  48. to compute the statistic. The statistic of each axis-slice (e.g. row)
  49. of the input will appear in a corresponding element of the output.
  50. If ``None``, the input will be raveled before computing the statistic.
  51. Returns
  52. -------
  53. statistic : float
  54. The test statistic.
  55. pvalue : float
  56. The associated p-value based on the asymptotic chi2-distribution.
  57. See Also
  58. --------
  59. ks_2samp, anderson_ksamp
  60. Notes
  61. -----
  62. Testing whether two samples are generated by the same underlying
  63. distribution is a classical question in statistics. A widely used test is
  64. the Kolmogorov-Smirnov (KS) test which relies on the empirical
  65. distribution function. Epps and Singleton introduce a test based on the
  66. empirical characteristic function in [1]_.
  67. One advantage of the ES test compared to the KS test is that is does
  68. not assume a continuous distribution. In [1]_, the authors conclude
  69. that the test also has a higher power than the KS test in many
  70. examples. They recommend the use of the ES test for discrete samples as
  71. well as continuous samples with at least 25 observations each, whereas
  72. `anderson_ksamp` is recommended for smaller sample sizes in the
  73. continuous case.
  74. The p-value is computed from the asymptotic distribution of the test
  75. statistic which follows a `chi2` distribution. If the sample size of both
  76. `x` and `y` is below 25, the small sample correction proposed in [1]_ is
  77. applied to the test statistic.
  78. The default values of `t` are determined in [1]_ by considering
  79. various distributions and finding good values that lead to a high power
  80. of the test in general. Table III in [1]_ gives the optimal values for
  81. the distributions tested in that study. The values of `t` are scaled by
  82. the semi-interquartile range in the implementation, see [1]_.
  83. References
  84. ----------
  85. .. [1] T. W. Epps and K. J. Singleton, "An omnibus test for the two-sample
  86. problem using the empirical characteristic function", Journal of
  87. Statistical Computation and Simulation 26, p. 177--203, 1986.
  88. .. [2] S. J. Goerg and J. Kaiser, "Nonparametric testing of distributions
  89. - the Epps-Singleton two-sample test using the empirical characteristic
  90. function", The Stata Journal 9(3), p. 454--465, 2009.
  91. """
  92. xp = array_namespace(x, y)
  93. # x and y are converted to arrays by the decorator
  94. # and `axis` is guaranteed to be -1.
  95. x, y = xp_promote(x, y, force_floating=True, xp=xp)
  96. t = xp.asarray(t, dtype=x.dtype)
  97. # check if x and y are valid inputs
  98. nx, ny = x.shape[-1], y.shape[-1]
  99. if (nx < 5) or (ny < 5): # only used by test_axis_nan_policy
  100. raise ValueError('x and y should have at least 5 elements, but len(x) '
  101. f'= {nx} and len(y) = {ny}.')
  102. n = nx + ny
  103. # check if t is valid
  104. if t.ndim > 1:
  105. raise ValueError(f't must be 1d, but t.ndim equals {t.ndim}.')
  106. if xp.any(t <= 0):
  107. raise ValueError('t must contain positive elements only.')
  108. # Previously, non-finite input caused an error in linalg functions.
  109. # To prevent an issue in one slice from halting the calculation, replace non-finite
  110. # values with a harmless one, and replace results with NaN at the end.
  111. i_x = ~xp.isfinite(x)
  112. i_y = ~xp.isfinite(y)
  113. # Ideally we would avoid copying all data here; see
  114. # discussion in data-apis/array-api-extra#506.
  115. x = xp.where(i_x, 1., x)
  116. y = xp.where(i_y, 1., y)
  117. invalid_result = xp.any(i_x, axis=-1) | xp.any(i_y, axis=-1)
  118. # rescale t with semi-iqr as proposed in [1]; import iqr here to avoid
  119. # circular import
  120. from scipy.stats import iqr
  121. sigma = iqr(xp.concat((x, y), axis=-1), axis=-1, keepdims=True) / 2
  122. ts = xp.reshape(t, (-1,) + (1,)*x.ndim) / sigma
  123. # covariance estimation of ES test
  124. gx = xp.concat((xp.cos(ts*x), xp.sin(ts*x)), axis=0)
  125. gy = xp.concat((xp.cos(ts*y), xp.sin(ts*y)), axis=0)
  126. gx, gy = xp.moveaxis(gx, 0, -2), xp.moveaxis(gy, 0, -2)
  127. cov_x = xpx.cov(gx) * (nx-1)/nx # the test uses biased cov-estimate
  128. cov_y = xpx.cov(gy) * (ny-1)/ny
  129. cov_x, cov_y = xp.astype(cov_x, x.dtype), xp.astype(cov_y, y.dtype)
  130. est_cov = (n/nx)*cov_x + (n/ny)*cov_y
  131. est_cov_inv = xp.linalg.pinv(est_cov)
  132. r = xp.asarray(xp.linalg.matrix_rank(est_cov_inv), dtype=est_cov_inv.dtype)
  133. if xp.any(r < 2*xp_size(t)):
  134. warnings.warn('Estimated covariance matrix does not have full rank. '
  135. 'This indicates a bad choice of the input t and the '
  136. 'test might not be consistent.', # see p. 183 in [1]_
  137. stacklevel=2)
  138. # compute test statistic w distributed asympt. as chisquare with df=r
  139. g_diff = xp.mean(gx, axis=-1, keepdims=True) - xp.mean(gy, axis=-1, keepdims=True)
  140. w = n*xp.matmul(xp.matrix_transpose(g_diff), xp.matmul(est_cov_inv, g_diff))
  141. w = w[..., 0, 0]
  142. # apply small-sample correction
  143. if (max(nx, ny) < 25):
  144. corr = 1.0/(1.0 + n**(-0.45) + 10.1*(nx**(-1.7) + ny**(-1.7)))
  145. w *= corr
  146. chi2 = _stats_py._SimpleChi2(r)
  147. p = _stats_py._get_pvalue(w, chi2, alternative='greater', symmetric=False, xp=xp)
  148. w = xpx.at(w)[invalid_result].set(xp.nan)
  149. p = xpx.at(p)[invalid_result].set(xp.nan)
  150. w = w[()] if w.ndim == 0 else w
  151. p = p[()] if p.ndim == 0 else p
  152. return Epps_Singleton_2sampResult(w, p)
  153. @xp_capabilities(np_only=True)
  154. def poisson_means_test(k1, n1, k2, n2, *, diff=0, alternative='two-sided'):
  155. r"""
  156. Performs the Poisson means test, AKA the "E-test".
  157. This is a test of the null hypothesis that the difference between means of
  158. two Poisson distributions is `diff`. The samples are provided as the
  159. number of events `k1` and `k2` observed within measurement intervals
  160. (e.g. of time, space, number of observations) of sizes `n1` and `n2`.
  161. Parameters
  162. ----------
  163. k1 : int
  164. Number of events observed from distribution 1.
  165. n1: float
  166. Size of sample from distribution 1.
  167. k2 : int
  168. Number of events observed from distribution 2.
  169. n2 : float
  170. Size of sample from distribution 2.
  171. diff : float, default=0
  172. The hypothesized difference in means between the distributions
  173. underlying the samples.
  174. alternative : {'two-sided', 'less', 'greater'}, optional
  175. Defines the alternative hypothesis.
  176. The following options are available (default is 'two-sided'):
  177. * 'two-sided': the difference between distribution means is not
  178. equal to `diff`
  179. * 'less': the difference between distribution means is less than
  180. `diff`
  181. * 'greater': the difference between distribution means is greater
  182. than `diff`
  183. Returns
  184. -------
  185. statistic : float
  186. The test statistic (see [1]_ equation 3.3).
  187. pvalue : float
  188. The probability of achieving such an extreme value of the test
  189. statistic under the null hypothesis.
  190. Notes
  191. -----
  192. Let:
  193. .. math:: X_1 \sim \mbox{Poisson}(\mathtt{n1}\lambda_1)
  194. be a random variable independent of
  195. .. math:: X_2 \sim \mbox{Poisson}(\mathtt{n2}\lambda_2)
  196. and let ``k1`` and ``k2`` be the observed values of :math:`X_1`
  197. and :math:`X_2`, respectively. Then `poisson_means_test` uses the number
  198. of observed events ``k1`` and ``k2`` from samples of size ``n1`` and
  199. ``n2``, respectively, to test the null hypothesis that
  200. .. math::
  201. H_0: \lambda_1 - \lambda_2 = \mathtt{diff}
  202. A benefit of the E-test is that it has good power for small sample sizes,
  203. which can reduce sampling costs [1]_. It has been evaluated and determined
  204. to be more powerful than the comparable C-test, sometimes referred to as
  205. the Poisson exact test.
  206. References
  207. ----------
  208. .. [1] Krishnamoorthy, K., & Thomson, J. (2004). A more powerful test for
  209. comparing two Poisson means. Journal of Statistical Planning and
  210. Inference, 119(1), 23-35.
  211. .. [2] Przyborowski, J., & Wilenski, H. (1940). Homogeneity of results in
  212. testing samples from Poisson series: With an application to testing
  213. clover seed for dodder. Biometrika, 31(3/4), 313-323.
  214. Examples
  215. --------
  216. Suppose that a gardener wishes to test the number of dodder (weed) seeds
  217. in a sack of clover seeds that they buy from a seed company. It has
  218. previously been established that the number of dodder seeds in clover
  219. follows the Poisson distribution.
  220. A 100 gram sample is drawn from the sack before being shipped to the
  221. gardener. The sample is analyzed, and it is found to contain no dodder
  222. seeds; that is, `k1` is 0. However, upon arrival, the gardener draws
  223. another 100 gram sample from the sack. This time, three dodder seeds are
  224. found in the sample; that is, `k2` is 3. The gardener would like to
  225. know if the difference is significant and not due to chance. The
  226. null hypothesis is that the difference between the two samples is merely
  227. due to chance, or that :math:`\lambda_1 - \lambda_2 = \mathtt{diff}`
  228. where :math:`\mathtt{diff} = 0`. The alternative hypothesis is that the
  229. difference is not due to chance, or :math:`\lambda_1 - \lambda_2 \ne 0`.
  230. The gardener selects a significance level of 5% to reject the null
  231. hypothesis in favor of the alternative [2]_.
  232. >>> import scipy.stats as stats
  233. >>> res = stats.poisson_means_test(0, 100, 3, 100)
  234. >>> res.statistic, res.pvalue
  235. (-1.7320508075688772, 0.08837900929018157)
  236. The p-value is .088, indicating a near 9% chance of observing a value of
  237. the test statistic under the null hypothesis. This exceeds 5%, so the
  238. gardener does not reject the null hypothesis as the difference cannot be
  239. regarded as significant at this level.
  240. """
  241. _poisson_means_test_iv(k1, n1, k2, n2, diff, alternative)
  242. # "for a given k_1 and k_2, an estimate of \lambda_2 is given by" [1] (3.4)
  243. lmbd_hat2 = ((k1 + k2) / (n1 + n2) - diff * n1 / (n1 + n2))
  244. # "\hat{\lambda_{2k}} may be less than or equal to zero ... and in this
  245. # case the null hypothesis cannot be rejected ... [and] it is not necessary
  246. # to compute the p-value". [1] page 26 below eq. (3.6).
  247. if lmbd_hat2 <= 0:
  248. return _stats_py.SignificanceResult(0, 1)
  249. # The unbiased variance estimate [1] (3.2)
  250. var = k1 / (n1 ** 2) + k2 / (n2 ** 2)
  251. # The _observed_ pivot statistic from the input. It follows the
  252. # unnumbered equation following equation (3.3) This is used later in
  253. # comparison with the computed pivot statistics in an indicator function.
  254. t_k1k2 = (k1 / n1 - k2 / n2 - diff) / np.sqrt(var)
  255. # Equation (3.5) of [1] is lengthy, so it is broken into several parts,
  256. # beginning here. Note that the probability mass function of poisson is
  257. # exp^(-\mu)*\mu^k/k!, so and this is called with shape \mu, here noted
  258. # here as nlmbd_hat*. The strategy for evaluating the double summation in
  259. # (3.5) is to create two arrays of the values of the two products inside
  260. # the summation and then broadcast them together into a matrix, and then
  261. # sum across the entire matrix.
  262. # Compute constants (as seen in the first and second separated products in
  263. # (3.5).). (This is the shape (\mu) parameter of the poisson distribution.)
  264. nlmbd_hat1 = n1 * (lmbd_hat2 + diff)
  265. nlmbd_hat2 = n2 * lmbd_hat2
  266. # Determine summation bounds for tail ends of distribution rather than
  267. # summing to infinity. `x1*` is for the outer sum and `x2*` is the inner
  268. # sum.
  269. x1_lb, x1_ub = distributions.poisson.ppf([1e-10, 1 - 1e-16], nlmbd_hat1)
  270. x2_lb, x2_ub = distributions.poisson.ppf([1e-10, 1 - 1e-16], nlmbd_hat2)
  271. # Construct arrays to function as the x_1 and x_2 counters on the summation
  272. # in (3.5). `x1` is in columns and `x2` is in rows to allow for
  273. # broadcasting.
  274. x1 = np.arange(x1_lb, x1_ub + 1)
  275. x2 = np.arange(x2_lb, x2_ub + 1)[:, None]
  276. # These are the two products in equation (3.5) with `prob_x1` being the
  277. # first (left side) and `prob_x2` being the second (right side). (To
  278. # make as clear as possible: the 1st contains a "+ d" term, the 2nd does
  279. # not.)
  280. prob_x1 = distributions.poisson.pmf(x1, nlmbd_hat1)
  281. prob_x2 = distributions.poisson.pmf(x2, nlmbd_hat2)
  282. # compute constants for use in the "pivot statistic" per the
  283. # unnumbered equation following (3.3).
  284. lmbd_x1 = x1 / n1
  285. lmbd_x2 = x2 / n2
  286. lmbds_diff = lmbd_x1 - lmbd_x2 - diff
  287. var_x1x2 = lmbd_x1 / n1 + lmbd_x2 / n2
  288. # This is the 'pivot statistic' for use in the indicator of the summation
  289. # (left side of "I[.]").
  290. with np.errstate(invalid='ignore', divide='ignore'):
  291. t_x1x2 = lmbds_diff / np.sqrt(var_x1x2)
  292. # `[indicator]` implements the "I[.] ... the indicator function" per
  293. # the paragraph following equation (3.5).
  294. if alternative == 'two-sided':
  295. indicator = np.abs(t_x1x2) >= np.abs(t_k1k2)
  296. elif alternative == 'less':
  297. indicator = t_x1x2 <= t_k1k2
  298. else:
  299. indicator = t_x1x2 >= t_k1k2
  300. # Multiply all combinations of the products together, exclude terms
  301. # based on the `indicator` and then sum. (3.5)
  302. pvalue = np.sum((prob_x1 * prob_x2)[indicator])
  303. return _stats_py.SignificanceResult(t_k1k2, pvalue)
  304. def _poisson_means_test_iv(k1, n1, k2, n2, diff, alternative):
  305. # """check for valid types and values of input to `poisson_mean_test`."""
  306. if k1 != int(k1) or k2 != int(k2):
  307. raise TypeError('`k1` and `k2` must be integers.')
  308. count_err = '`k1` and `k2` must be greater than or equal to 0.'
  309. if k1 < 0 or k2 < 0:
  310. raise ValueError(count_err)
  311. if n1 <= 0 or n2 <= 0:
  312. raise ValueError('`n1` and `n2` must be greater than 0.')
  313. if diff < 0:
  314. raise ValueError('diff must be greater than or equal to 0.')
  315. alternatives = {'two-sided', 'less', 'greater'}
  316. if alternative.lower() not in alternatives:
  317. raise ValueError(f"Alternative must be one of '{alternatives}'.")
  318. class CramerVonMisesResult:
  319. def __init__(self, statistic, pvalue):
  320. self.statistic = statistic
  321. self.pvalue = pvalue
  322. def __repr__(self):
  323. return (f"{self.__class__.__name__}(statistic={self.statistic}, "
  324. f"pvalue={self.pvalue})")
  325. def _psi1_mod(x, *, xp=None):
  326. """
  327. psi1 is defined in equation 1.10 in Csörgő, S. and Faraway, J. (1996).
  328. This implements a modified version by excluding the term V(x) / 12
  329. (here: _cdf_cvm_inf(x) / 12) to avoid evaluating _cdf_cvm_inf(x)
  330. twice in _cdf_cvm.
  331. Implementation based on MAPLE code of Julian Faraway and R code of the
  332. function pCvM in the package goftest (v1.1.1), permission granted
  333. by Adrian Baddeley. Main difference in the implementation: the code
  334. here keeps adding terms of the series until the terms are small enough.
  335. """
  336. xp = array_namespace(x) if xp is None else xp
  337. def _ed2(y):
  338. z = y**2 / 4
  339. z_ = np.asarray(z)
  340. b = xp.asarray(kv(1/4, z_) + kv(3/4, z_))
  341. return xp.exp(-z) * (y/2)**(3/2) * b / math.sqrt(np.pi)
  342. def _ed3(y):
  343. z = y**2 / 4
  344. z_ = np.asarray(z)
  345. c = xp.exp(-z) / math.sqrt(np.pi)
  346. kv_terms = xp.asarray(2*kv(1/4, z_)
  347. + 3*kv(3/4, z_) - kv(5/4, z_))
  348. return c * (y/2)**(5/2) * kv_terms
  349. def _Ak(k, x):
  350. m = 2*k + 1
  351. sx = 2 * xp.sqrt(x)
  352. y1 = x**(3/4)
  353. y2 = x**(5/4)
  354. gamma_kp1_2 = float(gamma(k + 1 / 2))
  355. gamma_kp3_2 = float(gamma(k + 3 / 2))
  356. e1 = m * gamma_kp1_2 * _ed2((4 * k + 3)/sx) / (9 * y1)
  357. e2 = gamma_kp1_2 * _ed3((4 * k + 1) / sx) / (72 * y2)
  358. e3 = 2 * (m + 2) * gamma_kp3_2 * _ed3((4 * k + 5) / sx) / (12 * y2)
  359. e4 = 7 * m * gamma_kp1_2 * _ed2((4 * k + 1) / sx) / (144 * y1)
  360. e5 = 7 * m * gamma_kp1_2 * _ed2((4 * k + 5) / sx) / (144 * y1)
  361. return e1 + e2 + e3 + e4 + e5
  362. x = xp.asarray(x)
  363. tot = xp.zeros_like(x)
  364. cond = xp.ones_like(x, dtype=xp.bool)
  365. k = 0
  366. while xp.any(cond):
  367. gamma_kp1 = float(gamma(k + 1))
  368. z = -_Ak(k, x[cond]) / (xp.pi * gamma_kp1)
  369. tot = xpx.at(tot)[cond].set(tot[cond] + z)
  370. # For float32 arithmetic, the tolerance may need to be adjusted or the
  371. # algorithm may prove to be unsuitable.
  372. cond = xpx.at(cond)[xp_copy(cond)].set(xp.abs(z) >= 1e-7)
  373. k += 1
  374. return tot
  375. def _cdf_cvm_inf(x, *, xp=None):
  376. """
  377. Calculate the cdf of the Cramér-von Mises statistic (infinite sample size).
  378. See equation 1.2 in Csörgő, S. and Faraway, J. (1996).
  379. Implementation based on MAPLE code of Julian Faraway and R code of the
  380. function pCvM in the package goftest (v1.1.1), permission granted
  381. by Adrian Baddeley. Main difference in the implementation: the code
  382. here keeps adding terms of the series until the terms are small enough.
  383. The function is not expected to be accurate for large values of x, say
  384. x > 4, when the cdf is very close to 1.
  385. """
  386. xp = array_namespace(x) if xp is None else xp
  387. x = xp.asarray(x)
  388. def term(x, k):
  389. # this expression can be found in [2], second line of (1.3)
  390. u = math.exp(gammaln(k + 0.5) - gammaln(k+1)) / (xp.pi**1.5 * xp.sqrt(x))
  391. y = 4*k + 1
  392. q = y**2 / (16*x)
  393. b = xp.asarray(kv(0.25, np.asarray(q)), dtype=u.dtype) # not automatic?
  394. return u * math.sqrt(y) * xp.exp(-q) * b
  395. tot = xp.zeros_like(x, dtype=x.dtype)
  396. cond = xp.ones_like(x, dtype=xp.bool)
  397. k = 0
  398. while xp.any(cond):
  399. z = term(x[cond], k)
  400. # tot[cond] = tot[cond] + z
  401. tot = xpx.at(tot)[cond].add(z)
  402. # cond[cond] = np.abs(z) >= 1e-7
  403. cond = xpx.at(cond)[xp_copy(cond)].set(xp.abs(z) >= 1e-7) # torch needs copy
  404. k += 1
  405. return tot
  406. def _cdf_cvm(x, n=None, *, xp=None):
  407. """
  408. Calculate the cdf of the Cramér-von Mises statistic for a finite sample
  409. size n. If N is None, use the asymptotic cdf (n=inf).
  410. See equation 1.8 in Csörgő, S. and Faraway, J. (1996) for finite samples,
  411. 1.2 for the asymptotic cdf.
  412. The function is not expected to be accurate for large values of x, say
  413. x > 2, when the cdf is very close to 1 and it might return values > 1
  414. in that case, e.g. _cdf_cvm(2.0, 12) = 1.0000027556716846. Moreover, it
  415. is not accurate for small values of n, especially close to the bounds of
  416. the distribution's domain, [1/(12*n), n/3], where the value jumps to 0
  417. and 1, respectively. These are limitations of the approximation by Csörgő
  418. and Faraway (1996) implemented in this function.
  419. """
  420. xp = array_namespace(x) if xp is None else xp
  421. x = xp.asarray(x)
  422. if n is None:
  423. y = _cdf_cvm_inf(x, xp=xp)
  424. else:
  425. # support of the test statistic is [12/n, n/3], see 1.1 in [2]
  426. y = xp.zeros_like(x, dtype=x.dtype)
  427. sup = (1./(12*n) < x) & (x < n/3.)
  428. # note: _psi1_mod does not include the term _cdf_cvm_inf(x) / 12
  429. # therefore, we need to add it here
  430. y = xpx.at(y)[sup].set(_cdf_cvm_inf(x[sup], xp=xp) * (1 + 1./(12*n))
  431. + _psi1_mod(x[sup], xp=xp) / n)
  432. y = xpx.at(y)[x >= n/3].set(1.)
  433. return y[()] if y.ndim == 0 else y
  434. def _cvm_result_to_tuple(res, _):
  435. return res.statistic, res.pvalue
  436. @xp_capabilities(cpu_only=True, # needs special function `kv`
  437. skip_backends=[('dask.array', 'typical dask issues')], jax_jit=False)
  438. @_axis_nan_policy_factory(CramerVonMisesResult, n_samples=1, too_small=1,
  439. result_to_tuple=_cvm_result_to_tuple)
  440. def cramervonmises(rvs, cdf, args=(), *, axis=0):
  441. r"""Perform the one-sample Cramér-von Mises test for goodness of fit.
  442. This performs a test of the goodness of fit of a cumulative distribution
  443. function (cdf) :math:`F` compared to the empirical distribution function
  444. :math:`F_n` of observed random variates :math:`X_1, ..., X_n` that are
  445. assumed to be independent and identically distributed ([1]_).
  446. The null hypothesis is that the :math:`X_i` have cumulative distribution
  447. :math:`F`.
  448. The test statistic :math:`T` is defined as in [1]_, where :math:`\omega^2`
  449. is the Cramér-von Mises criterion and :math:`x_i` are the observed values.
  450. .. math::
  451. T = n\omega^2 =
  452. \frac{1}{12n} + \sum_{i=1}^n \left[ \frac{2i-1}{2n} - F(x_i) \right]^2
  453. Parameters
  454. ----------
  455. rvs : array_like
  456. A 1-D array of observed values of the random variables :math:`X_i`.
  457. The sample must contain at least two observations.
  458. cdf : str or callable
  459. The cumulative distribution function :math:`F` to test the
  460. observations against. If a string, it should be the name of a
  461. distribution in `scipy.stats`. If a callable, that callable is used
  462. to calculate the cdf: ``cdf(x, *args) -> float``.
  463. args : tuple, optional
  464. Distribution parameters. These are assumed to be known; see Notes.
  465. axis : int or tuple of ints, default: 0
  466. If an int or tuple of ints, the axis or axes of the input along which
  467. to compute the statistic. The statistic of each axis-slice (e.g. row)
  468. of the input will appear in a corresponding element of the output.
  469. If ``None``, the input will be raveled before computing the statistic.
  470. Returns
  471. -------
  472. res : object with attributes
  473. statistic : float
  474. Cramér-von Mises statistic :math:`T`.
  475. pvalue : float
  476. The p-value.
  477. See Also
  478. --------
  479. kstest, cramervonmises_2samp
  480. Notes
  481. -----
  482. .. versionadded:: 1.6.0
  483. The p-value relies on the approximation given by equation 1.8 in [2]_.
  484. It is important to keep in mind that the p-value is only accurate if
  485. one tests a simple hypothesis, i.e. the parameters of the reference
  486. distribution are known. If the parameters are estimated from the data
  487. (composite hypothesis), the computed p-value is not reliable.
  488. References
  489. ----------
  490. .. [1] Cramér-von Mises criterion, Wikipedia,
  491. https://en.wikipedia.org/wiki/Cram%C3%A9r%E2%80%93von_Mises_criterion
  492. .. [2] Csörgő, S. and Faraway, J. (1996). The Exact and Asymptotic
  493. Distribution of Cramér-von Mises Statistics. Journal of the
  494. Royal Statistical Society, pp. 221-234.
  495. Examples
  496. --------
  497. Suppose we wish to test whether data generated by ``scipy.stats.norm.rvs``
  498. were, in fact, drawn from the standard normal distribution. We choose a
  499. significance level of ``alpha=0.05``.
  500. >>> import numpy as np
  501. >>> from scipy import stats
  502. >>> rng = np.random.default_rng(165417232101553420507139617764912913465)
  503. >>> x = stats.norm.rvs(size=500, random_state=rng)
  504. >>> res = stats.cramervonmises(x, 'norm')
  505. >>> res.statistic, res.pvalue
  506. (0.1072085112565724, 0.5508482238203407)
  507. The p-value exceeds our chosen significance level, so we do not
  508. reject the null hypothesis that the observed sample is drawn from the
  509. standard normal distribution.
  510. Now suppose we wish to check whether the same samples shifted by 2.1 is
  511. consistent with being drawn from a normal distribution with a mean of 2.
  512. >>> y = x + 2.1
  513. >>> res = stats.cramervonmises(y, 'norm', args=(2,))
  514. >>> res.statistic, res.pvalue
  515. (0.8364446265294695, 0.00596286797008283)
  516. Here we have used the `args` keyword to specify the mean (``loc``)
  517. of the normal distribution to test the data against. This is equivalent
  518. to the following, in which we create a frozen normal distribution with
  519. mean 2.1, then pass its ``cdf`` method as an argument.
  520. >>> frozen_dist = stats.norm(loc=2)
  521. >>> res = stats.cramervonmises(y, frozen_dist.cdf)
  522. >>> res.statistic, res.pvalue
  523. (0.8364446265294695, 0.00596286797008283)
  524. In either case, we would reject the null hypothesis that the observed
  525. sample is drawn from a normal distribution with a mean of 2 (and default
  526. variance of 1) because the p-value is less than our chosen
  527. significance level.
  528. """
  529. # `_axis_nan_policy` decorator ensures `axis=-1`
  530. xp = array_namespace(rvs)
  531. if isinstance(cdf, str) and is_numpy(xp):
  532. cdf = getattr(distributions, cdf).cdf
  533. elif isinstance(cdf, str):
  534. message = "`cdf` must be a callable if `rvs` is a non-NumPy array."
  535. raise ValueError(message)
  536. n = rvs.shape[-1]
  537. if n <= 1: # only needed for `test_axis_nan_policy.py`; not user-facing
  538. raise ValueError('The sample must contain at least two observations.')
  539. rvs, n = xp_promote(rvs, n, force_floating=True, xp=xp)
  540. vals = xp.sort(rvs, axis=-1)
  541. cdfvals = cdf(vals, *args)
  542. u = (2*xp.arange(1, n+1, dtype=n.dtype) - 1)/(2*n)
  543. w = 1/(12*n) + xp.sum((u - cdfvals)**2, axis=-1)
  544. # avoid small negative values that can occur due to the approximation
  545. p = xp.clip(1. - _cdf_cvm(w, n), 0., None)
  546. return CramerVonMisesResult(statistic=w, pvalue=p)
  547. def _get_wilcoxon_distr(n):
  548. """
  549. Distribution of probability of the Wilcoxon ranksum statistic r_plus (sum
  550. of ranks of positive differences).
  551. Returns an array with the probabilities of all the possible ranks
  552. r = 0, ..., n*(n+1)/2
  553. """
  554. c = np.ones(1, dtype=np.float64)
  555. for k in range(1, n + 1):
  556. prev_c = c
  557. c = np.zeros(k * (k + 1) // 2 + 1, dtype=np.float64)
  558. m = len(prev_c)
  559. c[:m] = prev_c * 0.5
  560. c[-m:] += prev_c * 0.5
  561. return c
  562. def _get_wilcoxon_distr2(n):
  563. """
  564. Distribution of probability of the Wilcoxon ranksum statistic r_plus (sum
  565. of ranks of positive differences).
  566. Returns an array with the probabilities of all the possible ranks
  567. r = 0, ..., n*(n+1)/2
  568. This is a slower reference function
  569. References
  570. ----------
  571. .. [1] 1. Harris T, Hardin JW. Exact Wilcoxon Signed-Rank and Wilcoxon
  572. Mann-Whitney Ranksum Tests. The Stata Journal. 2013;13(2):337-343.
  573. """
  574. ai = np.arange(1, n+1)[:, None]
  575. t = n*(n+1)/2
  576. q = 2*t
  577. j = np.arange(q)
  578. theta = 2*np.pi/q*j
  579. phi_sp = np.prod(np.cos(theta*ai), axis=0)
  580. phi_s = np.exp(1j*theta*t) * phi_sp
  581. p = np.real(ifft(phi_s))
  582. res = np.zeros(int(t)+1)
  583. res[:-1:] = p[::2]
  584. res[0] /= 2
  585. res[-1] = res[0]
  586. return res
  587. def _tau_b(A):
  588. """Calculate Kendall's tau-b and p-value from contingency table."""
  589. # See [2] 2.2 and 4.2
  590. # contingency table must be truly 2D
  591. if A.shape[0] == 1 or A.shape[1] == 1:
  592. return np.nan, np.nan
  593. NA = A.sum()
  594. PA = _P(A)
  595. QA = _Q(A)
  596. Sri2 = (A.sum(axis=1)**2).sum()
  597. Scj2 = (A.sum(axis=0)**2).sum()
  598. denominator = (NA**2 - Sri2)*(NA**2 - Scj2)
  599. tau = (PA-QA)/(denominator)**0.5
  600. numerator = 4*(_a_ij_Aij_Dij2(A) - (PA - QA)**2 / NA)
  601. s02_tau_b = numerator/denominator
  602. if s02_tau_b == 0: # Avoid divide by zero
  603. return tau, 0
  604. Z = tau/s02_tau_b**0.5
  605. p = 2*norm.sf(abs(Z)) # 2-sided p-value
  606. return tau, p
  607. def _somers_d(A, alternative='two-sided'):
  608. """Calculate Somers' D and p-value from contingency table."""
  609. # See [3] page 1740
  610. # contingency table must be truly 2D
  611. if A.shape[0] <= 1 or A.shape[1] <= 1:
  612. return np.nan, np.nan
  613. NA = A.sum()
  614. NA2 = NA**2
  615. PA = _P(A)
  616. QA = _Q(A)
  617. Sri2 = (A.sum(axis=1)**2).sum()
  618. d = (PA - QA)/(NA2 - Sri2)
  619. S = _a_ij_Aij_Dij2(A) - (PA-QA)**2/NA
  620. with np.errstate(divide='ignore'):
  621. Z = (PA - QA)/(4*(S))**0.5
  622. norm = _stats_py._SimpleNormal()
  623. p = _stats_py._get_pvalue(Z, norm, alternative, xp=np)
  624. return d, p
  625. @dataclass
  626. class SomersDResult:
  627. statistic: float
  628. pvalue: float
  629. table: np.ndarray
  630. @xp_capabilities(np_only=True)
  631. def somersd(x, y=None, alternative='two-sided'):
  632. r"""Calculates Somers' D, an asymmetric measure of ordinal association.
  633. Like Kendall's :math:`\tau`, Somers' :math:`D` is a measure of the
  634. correspondence between two rankings. Both statistics consider the
  635. difference between the number of concordant and discordant pairs in two
  636. rankings :math:`X` and :math:`Y`, and both are normalized such that values
  637. close to 1 indicate strong agreement and values close to -1 indicate
  638. strong disagreement. They differ in how they are normalized. To show the
  639. relationship, Somers' :math:`D` can be defined in terms of Kendall's
  640. :math:`\tau_a`:
  641. .. math::
  642. D(Y|X) = \frac{\tau_a(X, Y)}{\tau_a(X, X)}
  643. Suppose the first ranking :math:`X` has :math:`r` distinct ranks and the
  644. second ranking :math:`Y` has :math:`s` distinct ranks. These two lists of
  645. :math:`n` rankings can also be viewed as an :math:`r \times s` contingency
  646. table in which element :math:`i, j` is the number of rank pairs with rank
  647. :math:`i` in ranking :math:`X` and rank :math:`j` in ranking :math:`Y`.
  648. Accordingly, `somersd` also allows the input data to be supplied as a
  649. single, 2D contingency table instead of as two separate, 1D rankings.
  650. Note that the definition of Somers' :math:`D` is asymmetric: in general,
  651. :math:`D(Y|X) \neq D(X|Y)`. ``somersd(x, y)`` calculates Somers'
  652. :math:`D(Y|X)`: the "row" variable :math:`X` is treated as an independent
  653. variable, and the "column" variable :math:`Y` is dependent. For Somers'
  654. :math:`D(X|Y)`, swap the input lists or transpose the input table.
  655. Parameters
  656. ----------
  657. x : array_like
  658. 1D array of rankings, treated as the (row) independent variable.
  659. Alternatively, a 2D contingency table.
  660. y : array_like, optional
  661. If `x` is a 1D array of rankings, `y` is a 1D array of rankings of the
  662. same length, treated as the (column) dependent variable.
  663. If `x` is 2D, `y` is ignored.
  664. alternative : {'two-sided', 'less', 'greater'}, optional
  665. Defines the alternative hypothesis. Default is 'two-sided'.
  666. The following options are available:
  667. * 'two-sided': the rank correlation is nonzero
  668. * 'less': the rank correlation is negative (less than zero)
  669. * 'greater': the rank correlation is positive (greater than zero)
  670. Returns
  671. -------
  672. res : SomersDResult
  673. A `SomersDResult` object with the following fields:
  674. statistic : float
  675. The Somers' :math:`D` statistic.
  676. pvalue : float
  677. The p-value for a hypothesis test whose null
  678. hypothesis is an absence of association, :math:`D=0`.
  679. See notes for more information.
  680. table : 2D array
  681. The contingency table formed from rankings `x` and `y` (or the
  682. provided contingency table, if `x` is a 2D array)
  683. See Also
  684. --------
  685. kendalltau : Calculates Kendall's tau, another correlation measure.
  686. weightedtau : Computes a weighted version of Kendall's tau.
  687. spearmanr : Calculates a Spearman rank-order correlation coefficient.
  688. pearsonr : Calculates a Pearson correlation coefficient.
  689. Notes
  690. -----
  691. This function follows the contingency table approach of [2]_ and
  692. [3]_. *p*-values are computed based on an asymptotic approximation of
  693. the test statistic distribution under the null hypothesis :math:`D=0`.
  694. Theoretically, hypothesis tests based on Kendall's :math:`tau` and Somers'
  695. :math:`D` should be identical.
  696. However, the *p*-values returned by `kendalltau` are based
  697. on the null hypothesis of *independence* between :math:`X` and :math:`Y`
  698. (i.e. the population from which pairs in :math:`X` and :math:`Y` are
  699. sampled contains equal numbers of all possible pairs), which is more
  700. specific than the null hypothesis :math:`D=0` used here. If the null
  701. hypothesis of independence is desired, it is acceptable to use the
  702. *p*-value returned by `kendalltau` with the statistic returned by
  703. `somersd` and vice versa. For more information, see [2]_.
  704. Contingency tables are formatted according to the convention used by
  705. SAS and R: the first ranking supplied (``x``) is the "row" variable, and
  706. the second ranking supplied (``y``) is the "column" variable. This is
  707. opposite the convention of Somers' original paper [1]_.
  708. References
  709. ----------
  710. .. [1] Robert H. Somers, "A New Asymmetric Measure of Association for
  711. Ordinal Variables", *American Sociological Review*, Vol. 27, No. 6,
  712. pp. 799--811, 1962.
  713. .. [2] Morton B. Brown and Jacqueline K. Benedetti, "Sampling Behavior of
  714. Tests for Correlation in Two-Way Contingency Tables", *Journal of
  715. the American Statistical Association* Vol. 72, No. 358, pp.
  716. 309--315, 1977.
  717. .. [3] SAS Institute, Inc., "The FREQ Procedure (Book Excerpt)",
  718. *SAS/STAT 9.2 User's Guide, Second Edition*, SAS Publishing, 2009.
  719. .. [4] Laerd Statistics, "Somers' d using SPSS Statistics", *SPSS
  720. Statistics Tutorials and Statistical Guides*,
  721. https://statistics.laerd.com/spss-tutorials/somers-d-using-spss-statistics.php,
  722. Accessed July 31, 2020.
  723. Examples
  724. --------
  725. We calculate Somers' D for the example given in [4]_, in which a hotel
  726. chain owner seeks to determine the association between hotel room
  727. cleanliness and customer satisfaction. The independent variable, hotel
  728. room cleanliness, is ranked on an ordinal scale: "below average (1)",
  729. "average (2)", or "above average (3)". The dependent variable, customer
  730. satisfaction, is ranked on a second scale: "very dissatisfied (1)",
  731. "moderately dissatisfied (2)", "neither dissatisfied nor satisfied (3)",
  732. "moderately satisfied (4)", or "very satisfied (5)". 189 customers
  733. respond to the survey, and the results are cast into a contingency table
  734. with the hotel room cleanliness as the "row" variable and customer
  735. satisfaction as the "column" variable.
  736. +-----+-----+-----+-----+-----+-----+
  737. | | (1) | (2) | (3) | (4) | (5) |
  738. +=====+=====+=====+=====+=====+=====+
  739. | (1) | 27 | 25 | 14 | 7 | 0 |
  740. +-----+-----+-----+-----+-----+-----+
  741. | (2) | 7 | 14 | 18 | 35 | 12 |
  742. +-----+-----+-----+-----+-----+-----+
  743. | (3) | 1 | 3 | 2 | 7 | 17 |
  744. +-----+-----+-----+-----+-----+-----+
  745. For example, 27 customers assigned their room a cleanliness ranking of
  746. "below average (1)" and a corresponding satisfaction of "very
  747. dissatisfied (1)". We perform the analysis as follows.
  748. >>> from scipy.stats import somersd
  749. >>> table = [[27, 25, 14, 7, 0], [7, 14, 18, 35, 12], [1, 3, 2, 7, 17]]
  750. >>> res = somersd(table)
  751. >>> res.statistic
  752. 0.6032766111513396
  753. >>> res.pvalue
  754. 1.0007091191074533e-27
  755. The value of the Somers' D statistic is approximately 0.6, indicating
  756. a positive correlation between room cleanliness and customer satisfaction
  757. in the sample.
  758. The *p*-value is very small, indicating a very small probability of
  759. observing such an extreme value of the statistic under the null
  760. hypothesis that the statistic of the entire population (from which
  761. our sample of 189 customers is drawn) is zero. This supports the
  762. alternative hypothesis that the true value of Somers' D for the population
  763. is nonzero.
  764. """
  765. x, y = np.array(x), np.array(y)
  766. if x.ndim == 1:
  767. if x.size != y.size:
  768. raise ValueError("Rankings must be of equal length.")
  769. table = scipy.stats.contingency.crosstab(x, y)[1]
  770. elif x.ndim == 2:
  771. if np.any(x < 0):
  772. raise ValueError("All elements of the contingency table must be "
  773. "non-negative.")
  774. if np.any(x != x.astype(int)):
  775. raise ValueError("All elements of the contingency table must be "
  776. "integer.")
  777. if x.nonzero()[0].size < 2:
  778. raise ValueError("At least two elements of the contingency table "
  779. "must be nonzero.")
  780. table = x
  781. else:
  782. raise ValueError("x must be either a 1D or 2D array")
  783. # The table type is converted to a float to avoid an integer overflow
  784. d, p = _somers_d(table.astype(float), alternative)
  785. # add alias for consistency with other correlation functions
  786. res = SomersDResult(d, p, table)
  787. res.correlation = d
  788. return res
  789. # This could be combined with `_all_partitions` in `_resampling.py`
  790. def _all_partitions(nx, ny):
  791. """
  792. Partition a set of indices into two fixed-length sets in all possible ways
  793. Partition a set of indices 0 ... nx + ny - 1 into two sets of length nx and
  794. ny in all possible ways (ignoring order of elements).
  795. """
  796. z = np.arange(nx+ny)
  797. for c in combinations(z, nx):
  798. x = np.array(c)
  799. mask = np.ones(nx+ny, bool)
  800. mask[x] = False
  801. y = z[mask]
  802. yield x, y
  803. def _compute_log_combinations(n):
  804. """Compute all log combination of C(n, k)."""
  805. gammaln_arr = gammaln(np.arange(n + 1) + 1)
  806. return gammaln(n + 1) - gammaln_arr - gammaln_arr[::-1]
  807. @dataclass
  808. class BarnardExactResult:
  809. statistic: float
  810. pvalue: float
  811. @xp_capabilities(np_only=True)
  812. def barnard_exact(table, alternative="two-sided", pooled=True, n=32):
  813. r"""Perform a Barnard exact test on a 2x2 contingency table.
  814. Parameters
  815. ----------
  816. table : array_like of ints
  817. A 2x2 contingency table. Elements should be non-negative integers.
  818. alternative : {'two-sided', 'less', 'greater'}, optional
  819. Defines the null and alternative hypotheses. Default is 'two-sided'.
  820. Please see explanations in the Notes section below.
  821. pooled : bool, optional
  822. Whether to compute score statistic with pooled variance (as in
  823. Student's t-test, for example) or unpooled variance (as in Welch's
  824. t-test). Default is ``True``.
  825. n : int, optional
  826. Number of sampling points used in the construction of the sampling
  827. method. Note that this argument will automatically be converted to
  828. the next higher power of 2 since `scipy.stats.qmc.Sobol` is used to
  829. select sample points. Default is 32. Must be positive. In most cases,
  830. 32 points is enough to reach good precision. More points comes at
  831. performance cost.
  832. Returns
  833. -------
  834. ber : BarnardExactResult
  835. A result object with the following attributes.
  836. statistic : float
  837. The Wald statistic with pooled or unpooled variance, depending
  838. on the user choice of `pooled`.
  839. pvalue : float
  840. P-value, the probability of obtaining a distribution at least as
  841. extreme as the one that was actually observed, assuming that the
  842. null hypothesis is true.
  843. See Also
  844. --------
  845. chi2_contingency : Chi-square test of independence of variables in a
  846. contingency table.
  847. fisher_exact : Fisher exact test on a 2x2 contingency table.
  848. boschloo_exact : Boschloo's exact test on a 2x2 contingency table,
  849. which is an uniformly more powerful alternative to Fisher's exact test.
  850. Notes
  851. -----
  852. Barnard's test is an exact test used in the analysis of contingency
  853. tables. It examines the association of two categorical variables, and
  854. is a more powerful alternative than Fisher's exact test
  855. for 2x2 contingency tables.
  856. Let's define :math:`X_0` a 2x2 matrix representing the observed sample,
  857. where each column stores the binomial experiment, as in the example
  858. below. Let's also define :math:`p_1, p_2` the theoretical binomial
  859. probabilities for :math:`x_{11}` and :math:`x_{12}`. When using
  860. Barnard exact test, we can assert three different null hypotheses :
  861. - :math:`H_0 : p_1 \geq p_2` versus :math:`H_1 : p_1 < p_2`,
  862. with `alternative` = "less"
  863. - :math:`H_0 : p_1 \leq p_2` versus :math:`H_1 : p_1 > p_2`,
  864. with `alternative` = "greater"
  865. - :math:`H_0 : p_1 = p_2` versus :math:`H_1 : p_1 \neq p_2`,
  866. with `alternative` = "two-sided" (default one)
  867. In order to compute Barnard's exact test, we are using the Wald
  868. statistic [3]_ with pooled or unpooled variance.
  869. Under the default assumption that both variances are equal
  870. (``pooled = True``), the statistic is computed as:
  871. .. math::
  872. T(X) = \frac{
  873. \hat{p}_1 - \hat{p}_2
  874. }{
  875. \sqrt{
  876. \hat{p}(1 - \hat{p})
  877. (\frac{1}{c_1} +
  878. \frac{1}{c_2})
  879. }
  880. }
  881. with :math:`\hat{p}_1, \hat{p}_2` and :math:`\hat{p}` the estimator of
  882. :math:`p_1, p_2` and :math:`p`, the latter being the combined probability,
  883. given the assumption that :math:`p_1 = p_2`.
  884. If this assumption is invalid (``pooled = False``), the statistic is:
  885. .. math::
  886. T(X) = \frac{
  887. \hat{p}_1 - \hat{p}_2
  888. }{
  889. \sqrt{
  890. \frac{\hat{p}_1 (1 - \hat{p}_1)}{c_1} +
  891. \frac{\hat{p}_2 (1 - \hat{p}_2)}{c_2}
  892. }
  893. }
  894. The p-value is then computed as:
  895. .. math::
  896. \sum
  897. \binom{c_1}{x_{11}}
  898. \binom{c_2}{x_{12}}
  899. \pi^{x_{11} + x_{12}}
  900. (1 - \pi)^{t - x_{11} - x_{12}}
  901. where the sum is over all 2x2 contingency tables :math:`X` such that:
  902. * :math:`T(X) \leq T(X_0)` when `alternative` = "less",
  903. * :math:`T(X) \geq T(X_0)` when `alternative` = "greater", or
  904. * :math:`T(X) \geq |T(X_0)|` when `alternative` = "two-sided".
  905. Above, :math:`c_1, c_2` are the sum of the columns 1 and 2,
  906. and :math:`t` the total (sum of the 4 sample's element).
  907. The returned p-value is the maximum p-value taken over the nuisance
  908. parameter :math:`\pi`, where :math:`0 \leq \pi \leq 1`.
  909. This function's complexity is :math:`O(n c_1 c_2)`, where `n` is the
  910. number of sample points.
  911. References
  912. ----------
  913. .. [1] Barnard, G. A. "Significance Tests for 2x2 Tables". *Biometrika*.
  914. 34.1/2 (1947): 123-138. :doi:`dpgkg3`
  915. .. [2] Mehta, Cyrus R., and Pralay Senchaudhuri. "Conditional versus
  916. unconditional exact tests for comparing two binomials."
  917. *Cytel Software Corporation* 675 (2003): 1-5.
  918. .. [3] "Wald Test". *Wikipedia*. https://en.wikipedia.org/wiki/Wald_test
  919. Examples
  920. --------
  921. An example use of Barnard's test is presented in [2]_.
  922. Consider the following example of a vaccine efficacy study
  923. (Chan, 1998). In a randomized clinical trial of 30 subjects, 15 were
  924. inoculated with a recombinant DNA influenza vaccine and the 15 were
  925. inoculated with a placebo. Twelve of the 15 subjects in the placebo
  926. group (80%) eventually became infected with influenza whereas for the
  927. vaccine group, only 7 of the 15 subjects (47%) became infected. The
  928. data are tabulated as a 2 x 2 table::
  929. Vaccine Placebo
  930. Yes 7 12
  931. No 8 3
  932. When working with statistical hypothesis testing, we usually use a
  933. threshold probability or significance level upon which we decide
  934. to reject the null hypothesis :math:`H_0`. Suppose we choose the common
  935. significance level of 5%.
  936. Our alternative hypothesis is that the vaccine will lower the chance of
  937. becoming infected with the virus; that is, the probability :math:`p_1` of
  938. catching the virus with the vaccine will be *less than* the probability
  939. :math:`p_2` of catching the virus without the vaccine. Therefore, we call
  940. `barnard_exact` with the ``alternative="less"`` option:
  941. >>> import scipy.stats as stats
  942. >>> res = stats.barnard_exact([[7, 12], [8, 3]], alternative="less")
  943. >>> res.statistic
  944. -1.894
  945. >>> res.pvalue
  946. 0.03407
  947. Under the null hypothesis that the vaccine will not lower the chance of
  948. becoming infected, the probability of obtaining test results at least as
  949. extreme as the observed data is approximately 3.4%. Since this p-value is
  950. less than our chosen significance level, we have evidence to reject
  951. :math:`H_0` in favor of the alternative.
  952. Suppose we had used Fisher's exact test instead:
  953. >>> _, pvalue = stats.fisher_exact([[7, 12], [8, 3]], alternative="less")
  954. >>> pvalue
  955. 0.0640
  956. With the same threshold significance of 5%, we would not have been able
  957. to reject the null hypothesis in favor of the alternative. As stated in
  958. [2]_, Barnard's test is uniformly more powerful than Fisher's exact test
  959. because Barnard's test does not condition on any margin. Fisher's test
  960. should only be used when both sets of marginals are fixed.
  961. """
  962. if n <= 0:
  963. raise ValueError(
  964. "Number of points `n` must be strictly positive, "
  965. f"found {n!r}"
  966. )
  967. table = np.asarray(table, dtype=np.int64)
  968. if not table.shape == (2, 2):
  969. raise ValueError("The input `table` must be of shape (2, 2).")
  970. if np.any(table < 0):
  971. raise ValueError("All values in `table` must be nonnegative.")
  972. if 0 in table.sum(axis=0):
  973. # If both values in column are zero, the p-value is 1 and
  974. # the score's statistic is NaN.
  975. return BarnardExactResult(np.nan, 1.0)
  976. total_col_1, total_col_2 = table.sum(axis=0)
  977. x1 = np.arange(total_col_1 + 1, dtype=np.int64).reshape(-1, 1)
  978. x2 = np.arange(total_col_2 + 1, dtype=np.int64).reshape(1, -1)
  979. # We need to calculate the wald statistics for each combination of x1 and
  980. # x2.
  981. p1, p2 = x1 / total_col_1, x2 / total_col_2
  982. if pooled:
  983. p = (x1 + x2) / (total_col_1 + total_col_2)
  984. variances = p * (1 - p) * (1 / total_col_1 + 1 / total_col_2)
  985. else:
  986. variances = p1 * (1 - p1) / total_col_1 + p2 * (1 - p2) / total_col_2
  987. # To avoid warning when dividing by 0
  988. with np.errstate(divide="ignore", invalid="ignore"):
  989. wald_statistic = np.divide((p1 - p2), np.sqrt(variances))
  990. wald_statistic[p1 == p2] = 0 # Removing NaN values
  991. wald_stat_obs = wald_statistic[table[0, 0], table[0, 1]]
  992. if alternative == "two-sided":
  993. index_arr = np.abs(wald_statistic) >= abs(wald_stat_obs)
  994. elif alternative == "less":
  995. index_arr = wald_statistic <= wald_stat_obs
  996. elif alternative == "greater":
  997. index_arr = wald_statistic >= wald_stat_obs
  998. else:
  999. msg = (
  1000. "`alternative` should be one of {'two-sided', 'less', 'greater'},"
  1001. f" found {alternative!r}"
  1002. )
  1003. raise ValueError(msg)
  1004. x1_sum_x2 = x1 + x2
  1005. x1_log_comb = _compute_log_combinations(total_col_1)
  1006. x2_log_comb = _compute_log_combinations(total_col_2)
  1007. x1_sum_x2_log_comb = x1_log_comb[x1] + x2_log_comb[x2]
  1008. result = shgo(
  1009. _get_binomial_log_p_value_with_nuisance_param,
  1010. args=(x1_sum_x2, x1_sum_x2_log_comb, index_arr),
  1011. bounds=((0, 1),),
  1012. n=n,
  1013. sampling_method="sobol",
  1014. )
  1015. # result.fun is the negative log pvalue and therefore needs to be
  1016. # changed before return
  1017. p_value = np.clip(np.exp(-result.fun), a_min=0, a_max=1)
  1018. return BarnardExactResult(wald_stat_obs, p_value)
  1019. @dataclass
  1020. class BoschlooExactResult:
  1021. statistic: float
  1022. pvalue: float
  1023. @xp_capabilities(np_only=True)
  1024. def boschloo_exact(table, alternative="two-sided", n=32):
  1025. r"""Perform Boschloo's exact test on a 2x2 contingency table.
  1026. Parameters
  1027. ----------
  1028. table : array_like of ints
  1029. A 2x2 contingency table. Elements should be non-negative integers.
  1030. alternative : {'two-sided', 'less', 'greater'}, optional
  1031. Defines the null and alternative hypotheses. Default is 'two-sided'.
  1032. Please see explanations in the Notes section below.
  1033. n : int, optional
  1034. Number of sampling points used in the construction of the sampling
  1035. method. Note that this argument will automatically be converted to
  1036. the next higher power of 2 since `scipy.stats.qmc.Sobol` is used to
  1037. select sample points. Default is 32. Must be positive. In most cases,
  1038. 32 points is enough to reach good precision. More points comes at
  1039. performance cost.
  1040. Returns
  1041. -------
  1042. ber : BoschlooExactResult
  1043. A result object with the following attributes.
  1044. statistic : float
  1045. The statistic used in Boschloo's test; that is, the p-value
  1046. from Fisher's exact test.
  1047. pvalue : float
  1048. P-value, the probability of obtaining a distribution at least as
  1049. extreme as the one that was actually observed, assuming that the
  1050. null hypothesis is true.
  1051. See Also
  1052. --------
  1053. chi2_contingency : Chi-square test of independence of variables in a
  1054. contingency table.
  1055. fisher_exact : Fisher exact test on a 2x2 contingency table.
  1056. barnard_exact : Barnard's exact test, which is a more powerful alternative
  1057. than Fisher's exact test for 2x2 contingency tables.
  1058. Notes
  1059. -----
  1060. Boschloo's test is an exact test used in the analysis of contingency
  1061. tables. It examines the association of two categorical variables, and
  1062. is a uniformly more powerful alternative to Fisher's exact test
  1063. for 2x2 contingency tables.
  1064. Boschloo's exact test uses the p-value of Fisher's exact test as a
  1065. statistic, and Boschloo's p-value is the probability under the null
  1066. hypothesis of observing such an extreme value of this statistic.
  1067. Let's define :math:`X_0` a 2x2 matrix representing the observed sample,
  1068. where each column stores the binomial experiment, as in the example
  1069. below. Let's also define :math:`p_1, p_2` the theoretical binomial
  1070. probabilities for :math:`x_{11}` and :math:`x_{12}`. When using
  1071. Boschloo exact test, we can assert three different alternative hypotheses:
  1072. - :math:`H_0 : p_1=p_2` versus :math:`H_1 : p_1 < p_2`,
  1073. with `alternative` = "less"
  1074. - :math:`H_0 : p_1=p_2` versus :math:`H_1 : p_1 > p_2`,
  1075. with `alternative` = "greater"
  1076. - :math:`H_0 : p_1=p_2` versus :math:`H_1 : p_1 \neq p_2`,
  1077. with `alternative` = "two-sided" (default)
  1078. There are multiple conventions for computing a two-sided p-value when the
  1079. null distribution is asymmetric. Here, we apply the convention that the
  1080. p-value of a two-sided test is twice the minimum of the p-values of the
  1081. one-sided tests (clipped to 1.0). Note that `fisher_exact` follows a
  1082. different convention, so for a given `table`, the statistic reported by
  1083. `boschloo_exact` may differ from the p-value reported by `fisher_exact`
  1084. when ``alternative='two-sided'``.
  1085. .. versionadded:: 1.7.0
  1086. References
  1087. ----------
  1088. .. [1] R.D. Boschloo. "Raised conditional level of significance for the
  1089. 2 x 2-table when testing the equality of two probabilities",
  1090. Statistica Neerlandica, 24(1), 1970
  1091. .. [2] "Boschloo's test", Wikipedia,
  1092. https://en.wikipedia.org/wiki/Boschloo%27s_test
  1093. .. [3] Lise M. Saari et al. "Employee attitudes and job satisfaction",
  1094. Human Resource Management, 43(4), 395-407, 2004,
  1095. :doi:`10.1002/hrm.20032`.
  1096. Examples
  1097. --------
  1098. In the following example, we consider the article "Employee
  1099. attitudes and job satisfaction" [3]_
  1100. which reports the results of a survey from 63 scientists and 117 college
  1101. professors. Of the 63 scientists, 31 said they were very satisfied with
  1102. their jobs, whereas 74 of the college professors were very satisfied
  1103. with their work. Is this significant evidence that college
  1104. professors are happier with their work than scientists?
  1105. The following table summarizes the data mentioned above::
  1106. college professors scientists
  1107. Very Satisfied 74 31
  1108. Dissatisfied 43 32
  1109. When working with statistical hypothesis testing, we usually use a
  1110. threshold probability or significance level upon which we decide
  1111. to reject the null hypothesis :math:`H_0`. Suppose we choose the common
  1112. significance level of 5%.
  1113. Our alternative hypothesis is that college professors are truly more
  1114. satisfied with their work than scientists. Therefore, we expect
  1115. :math:`p_1` the proportion of very satisfied college professors to be
  1116. greater than :math:`p_2`, the proportion of very satisfied scientists.
  1117. We thus call `boschloo_exact` with the ``alternative="greater"`` option:
  1118. >>> import scipy.stats as stats
  1119. >>> res = stats.boschloo_exact([[74, 31], [43, 32]], alternative="greater")
  1120. >>> res.statistic
  1121. 0.0483
  1122. >>> res.pvalue
  1123. 0.0355
  1124. Under the null hypothesis that scientists are happier in their work than
  1125. college professors, the probability of obtaining test
  1126. results at least as extreme as the observed data is approximately 3.55%.
  1127. Since this p-value is less than our chosen significance level, we have
  1128. evidence to reject :math:`H_0` in favor of the alternative hypothesis.
  1129. """
  1130. hypergeom = distributions.hypergeom
  1131. if n <= 0:
  1132. raise ValueError(
  1133. "Number of points `n` must be strictly positive,"
  1134. f" found {n!r}"
  1135. )
  1136. table = np.asarray(table, dtype=np.int64)
  1137. if not table.shape == (2, 2):
  1138. raise ValueError("The input `table` must be of shape (2, 2).")
  1139. if np.any(table < 0):
  1140. raise ValueError("All values in `table` must be nonnegative.")
  1141. if 0 in table.sum(axis=0):
  1142. # If both values in column are zero, the p-value is 1 and
  1143. # the score's statistic is NaN.
  1144. return BoschlooExactResult(np.nan, np.nan)
  1145. total_col_1, total_col_2 = table.sum(axis=0)
  1146. total = total_col_1 + total_col_2
  1147. x1 = np.arange(total_col_1 + 1, dtype=np.int64).reshape(1, -1)
  1148. x2 = np.arange(total_col_2 + 1, dtype=np.int64).reshape(-1, 1)
  1149. x1_sum_x2 = x1 + x2
  1150. if alternative == 'less':
  1151. pvalues = hypergeom.cdf(x1, total, x1_sum_x2, total_col_1).T
  1152. elif alternative == 'greater':
  1153. # Same formula as the 'less' case, but with the second column.
  1154. pvalues = hypergeom.cdf(x2, total, x1_sum_x2, total_col_2).T
  1155. elif alternative == 'two-sided':
  1156. boschloo_less = boschloo_exact(table, alternative="less", n=n)
  1157. boschloo_greater = boschloo_exact(table, alternative="greater", n=n)
  1158. res = (
  1159. boschloo_less if boschloo_less.pvalue < boschloo_greater.pvalue
  1160. else boschloo_greater
  1161. )
  1162. # Two-sided p-value is defined as twice the minimum of the one-sided
  1163. # p-values
  1164. pvalue = np.clip(2 * res.pvalue, a_min=0, a_max=1)
  1165. return BoschlooExactResult(res.statistic, pvalue)
  1166. else:
  1167. msg = (
  1168. f"`alternative` should be one of {'two-sided', 'less', 'greater'},"
  1169. f" found {alternative!r}"
  1170. )
  1171. raise ValueError(msg)
  1172. fisher_stat = pvalues[table[0, 0], table[0, 1]]
  1173. # fisher_stat * (1+1e-13) guards us from small numerical error. It is
  1174. # equivalent to np.isclose with relative tol of 1e-13 and absolute tol of 0
  1175. # For more throughout explanations, see gh-14178
  1176. index_arr = pvalues <= fisher_stat * (1+1e-13)
  1177. x1, x2, x1_sum_x2 = x1.T, x2.T, x1_sum_x2.T
  1178. x1_log_comb = _compute_log_combinations(total_col_1)
  1179. x2_log_comb = _compute_log_combinations(total_col_2)
  1180. x1_sum_x2_log_comb = x1_log_comb[x1] + x2_log_comb[x2]
  1181. result = shgo(
  1182. _get_binomial_log_p_value_with_nuisance_param,
  1183. args=(x1_sum_x2, x1_sum_x2_log_comb, index_arr),
  1184. bounds=((0, 1),),
  1185. n=n,
  1186. sampling_method="sobol",
  1187. )
  1188. # result.fun is the negative log pvalue and therefore needs to be
  1189. # changed before return
  1190. p_value = np.clip(np.exp(-result.fun), a_min=0, a_max=1)
  1191. return BoschlooExactResult(fisher_stat, p_value)
  1192. def _get_binomial_log_p_value_with_nuisance_param(
  1193. nuisance_param, x1_sum_x2, x1_sum_x2_log_comb, index_arr
  1194. ):
  1195. r"""
  1196. Compute the log pvalue in respect of a nuisance parameter considering
  1197. a 2x2 sample space.
  1198. Parameters
  1199. ----------
  1200. nuisance_param : float
  1201. nuisance parameter used in the computation of the maximisation of
  1202. the p-value. Must be between 0 and 1
  1203. x1_sum_x2 : ndarray
  1204. Sum of x1 and x2 inside barnard_exact
  1205. x1_sum_x2_log_comb : ndarray
  1206. sum of the log combination of x1 and x2
  1207. index_arr : ndarray of boolean
  1208. Returns
  1209. -------
  1210. p_value : float
  1211. Return the maximum p-value considering every nuisance parameter
  1212. between 0 and 1
  1213. Notes
  1214. -----
  1215. Both Barnard's test and Boschloo's test iterate over a nuisance parameter
  1216. :math:`\pi \in [0, 1]` to find the maximum p-value. To search this
  1217. maxima, this function return the negative log pvalue with respect to the
  1218. nuisance parameter passed in params. This negative log p-value is then
  1219. used in `shgo` to find the minimum negative pvalue which is our maximum
  1220. pvalue.
  1221. Also, to compute the different combination used in the
  1222. p-values' computation formula, this function uses `gammaln` which is
  1223. more tolerant for large value than `scipy.special.comb`. `gammaln` gives
  1224. a log combination. For the little precision loss, performances are
  1225. improved a lot.
  1226. """
  1227. t1, t2 = x1_sum_x2.shape
  1228. n = t1 + t2 - 2
  1229. with np.errstate(divide="ignore", invalid="ignore"):
  1230. log_nuisance = np.log(
  1231. nuisance_param,
  1232. out=np.zeros_like(nuisance_param),
  1233. where=nuisance_param >= 0,
  1234. )
  1235. log_1_minus_nuisance = np.log(
  1236. 1 - nuisance_param,
  1237. out=np.zeros_like(nuisance_param),
  1238. where=1 - nuisance_param >= 0,
  1239. )
  1240. nuisance_power_x1_x2 = log_nuisance * x1_sum_x2
  1241. nuisance_power_x1_x2[(x1_sum_x2 == 0)[:, :]] = 0
  1242. nuisance_power_n_minus_x1_x2 = log_1_minus_nuisance * (n - x1_sum_x2)
  1243. nuisance_power_n_minus_x1_x2[(x1_sum_x2 == n)[:, :]] = 0
  1244. tmp_log_values_arr = (
  1245. x1_sum_x2_log_comb
  1246. + nuisance_power_x1_x2
  1247. + nuisance_power_n_minus_x1_x2
  1248. )
  1249. tmp_values_from_index = tmp_log_values_arr[index_arr]
  1250. # To avoid dividing by zero in log function and getting inf value,
  1251. # values are centered according to the max
  1252. max_value = tmp_values_from_index.max()
  1253. # To have better result's precision, the log pvalue is taken here.
  1254. # Indeed, pvalue is included inside [0, 1] interval. Passing the
  1255. # pvalue to log makes the interval a lot bigger ([-inf, 0]), and thus
  1256. # help us to achieve better precision
  1257. with np.errstate(divide="ignore", invalid="ignore"):
  1258. log_probs = np.exp(tmp_values_from_index - max_value).sum()
  1259. log_pvalue = max_value + np.log(
  1260. log_probs,
  1261. out=np.full_like(log_probs, -np.inf),
  1262. where=log_probs > 0,
  1263. )
  1264. # Since shgo find the minima, minus log pvalue is returned
  1265. return -log_pvalue
  1266. @np.vectorize(otypes=[np.float64])
  1267. def _pval_cvm_2samp_exact(s, m, n):
  1268. """
  1269. Compute the exact p-value of the Cramer-von Mises two-sample test
  1270. for a given value s of the test statistic.
  1271. m and n are the sizes of the samples.
  1272. [1] Y. Xiao, A. Gordon, and A. Yakovlev, "A C++ Program for
  1273. the Cramér-Von Mises Two-Sample Test", J. Stat. Soft.,
  1274. vol. 17, no. 8, pp. 1-15, Dec. 2006.
  1275. [2] T. W. Anderson "On the Distribution of the Two-Sample Cramer-von Mises
  1276. Criterion," The Annals of Mathematical Statistics, Ann. Math. Statist.
  1277. 33(3), 1148-1159, (September, 1962)
  1278. """
  1279. # [1, p. 3]
  1280. lcm = np.lcm(m, n)
  1281. # [1, p. 4], below eq. 3
  1282. a = lcm // m
  1283. b = lcm // n
  1284. # Combine Eq. 9 in [2] with Eq. 2 in [1] and solve for $\zeta$
  1285. # Hint: `s` is $U$ in [2], and $T_2$ in [1] is $T$ in [2]
  1286. mn = m * n
  1287. zeta = lcm ** 2 * (m + n) * (6 * s - mn * (4 * mn - 1)) // (6 * mn ** 2)
  1288. # bound maximum value that may appear in `gs` (remember both rows!)
  1289. zeta_bound = lcm**2 * (m + n) # bound elements in row 1
  1290. combinations = math.comb(m + n, m) # sum of row 2
  1291. max_gs = max(zeta_bound, combinations)
  1292. dtype = np.min_scalar_type(max_gs)
  1293. # the frequency table of $g_{u, v}^+$ defined in [1, p. 6]
  1294. gs = ([np.array([[0], [1]], dtype=dtype)]
  1295. + [np.empty((2, 0), dtype=dtype) for _ in range(m)])
  1296. for u in range(n + 1):
  1297. next_gs = []
  1298. tmp = np.empty((2, 0), dtype=dtype)
  1299. for v, g in enumerate(gs):
  1300. # Calculate g recursively with eq. 11 in [1]. Even though it
  1301. # doesn't look like it, this also does 12/13 (all of Algorithm 1).
  1302. vi, i0, i1 = np.intersect1d(tmp[0], g[0], return_indices=True)
  1303. tmp = np.concatenate([
  1304. np.stack([vi, tmp[1, i0] + g[1, i1]]),
  1305. np.delete(tmp, i0, 1),
  1306. np.delete(g, i1, 1)
  1307. ], 1)
  1308. res = (a * v - b * u) ** 2
  1309. tmp[0] += res.astype(dtype)
  1310. next_gs.append(tmp)
  1311. gs = next_gs
  1312. value, freq = gs[m]
  1313. return np.float64(np.sum(freq[value >= zeta]) / combinations)
  1314. def _pval_cvm_2samp_asymptotic(t, N, nx, ny, k, *, xp):
  1315. # compute expected value and variance of T (eq. 11 and 14 in [2])
  1316. et = (1 + 1 / N) / 6
  1317. vt = (N + 1) * (4 * k * N - 3 * (nx ** 2 + ny ** 2) - 2 * k)
  1318. vt = vt / (45 * N ** 2 * 4 * k)
  1319. # computed the normalized statistic (eq. 15 in [2])
  1320. tn = 1 / 6 + (t - et) / math.sqrt(45 * vt)
  1321. # approximate distribution of tn with limiting distribution
  1322. # of the one-sample test statistic
  1323. # if tn < 0.003, the _cdf_cvm_inf(tn) < 1.28*1e-18, return 1.0 directly
  1324. p = xpx.apply_where(tn >= 0.003,
  1325. (tn,),
  1326. lambda tn: xp.clip(1. - _cdf_cvm_inf(tn, xp=xp), 0.),
  1327. fill_value = 1.)
  1328. return p
  1329. @xp_capabilities(skip_backends=[('cupy', 'needs rankdata'),
  1330. ('dask.array', 'needs rankdata')],
  1331. cpu_only=True, jax_jit=False)
  1332. @_axis_nan_policy_factory(CramerVonMisesResult, n_samples=2, too_small=1,
  1333. result_to_tuple=_cvm_result_to_tuple)
  1334. def cramervonmises_2samp(x, y, method='auto', *, axis=0):
  1335. r"""Perform the two-sample Cramér-von Mises test for goodness of fit.
  1336. This is the two-sample version of the Cramér-von Mises test ([1]_):
  1337. for two independent samples :math:`X_1, ..., X_n` and
  1338. :math:`Y_1, ..., Y_m`, the null hypothesis is that the samples
  1339. come from the same (unspecified) continuous distribution.
  1340. The test statistic :math:`T` is defined as in [1]_:
  1341. .. math::
  1342. T = \frac{nm}{n+m}\omega^2 =
  1343. \frac{U}{n m (n+m)} - \frac{4 m n - 1}{6(m+n)}
  1344. where :math:`U` is defined as below, and :math:`\omega^2` is the Cramér-von
  1345. Mises criterion. The function :math:`r(\cdot)` here denotes the rank of the
  1346. observed values :math:`x_i` and :math:`y_j` within the pooled sample of size
  1347. :math:`n + m`, with ties assigned mid-rank values:
  1348. .. math::
  1349. U = n \sum_{i=1}^n (r(x_i)-i)^2 + m \sum_{j=1}^m (r(y_j)-j)^2
  1350. Parameters
  1351. ----------
  1352. x : array_like
  1353. A 1-D array of observed values of the random variables :math:`X_i`.
  1354. Must contain at least two observations.
  1355. y : array_like
  1356. A 1-D array of observed values of the random variables :math:`Y_i`.
  1357. Must contain at least two observations.
  1358. method : {'auto', 'asymptotic', 'exact'}, optional
  1359. The method used to compute the p-value, see Notes for details.
  1360. The default is 'auto'.
  1361. axis : int or tuple of ints, default: 0
  1362. If an int or tuple of ints, the axis or axes of the input along which
  1363. to compute the statistic. The statistic of each axis-slice (e.g. row)
  1364. of the input will appear in a corresponding element of the output.
  1365. If ``None``, the input will be raveled before computing the statistic.
  1366. Returns
  1367. -------
  1368. res : object with attributes
  1369. statistic : float
  1370. Cramér-von Mises statistic :math:`T`.
  1371. pvalue : float
  1372. The p-value.
  1373. See Also
  1374. --------
  1375. cramervonmises, anderson_ksamp, epps_singleton_2samp, ks_2samp
  1376. Notes
  1377. -----
  1378. .. versionadded:: 1.7.0
  1379. The statistic is computed according to equation 9 in [2]_. The
  1380. calculation of the p-value depends on the keyword `method`:
  1381. - ``asymptotic``: The p-value is approximated by using the limiting
  1382. distribution of the test statistic.
  1383. - ``exact``: The exact p-value is computed by enumerating all
  1384. possible combinations of the test statistic, see [2]_.
  1385. If ``method='auto'``, the exact approach is used
  1386. if both samples contain equal to or less than 20 observations,
  1387. otherwise the asymptotic distribution is used.
  1388. If the underlying distribution is not continuous, the p-value is likely to
  1389. be conservative (Section 6.2 in [3]_). When ranking the data to compute
  1390. the test statistic, midranks are used if there are ties.
  1391. References
  1392. ----------
  1393. .. [1] https://en.wikipedia.org/wiki/Cramer-von_Mises_criterion
  1394. .. [2] Anderson, T.W. (1962). On the distribution of the two-sample
  1395. Cramer-von-Mises criterion. The Annals of Mathematical
  1396. Statistics, pp. 1148-1159.
  1397. .. [3] Conover, W.J., Practical Nonparametric Statistics, 1971.
  1398. Examples
  1399. --------
  1400. Suppose we wish to test whether two samples generated by
  1401. ``scipy.stats.norm.rvs`` have the same distribution. We choose a
  1402. significance level of alpha=0.05.
  1403. >>> import numpy as np
  1404. >>> from scipy import stats
  1405. >>> rng = np.random.default_rng()
  1406. >>> x = stats.norm.rvs(size=100, random_state=rng)
  1407. >>> y = stats.norm.rvs(size=70, random_state=rng)
  1408. >>> res = stats.cramervonmises_2samp(x, y)
  1409. >>> res.statistic, res.pvalue
  1410. (0.29376470588235293, 0.1412873014573014)
  1411. The p-value exceeds our chosen significance level, so we do not
  1412. reject the null hypothesis that the observed samples are drawn from the
  1413. same distribution.
  1414. For small sample sizes, one can compute the exact p-values:
  1415. >>> x = stats.norm.rvs(size=7, random_state=rng)
  1416. >>> y = stats.t.rvs(df=2, size=6, random_state=rng)
  1417. >>> res = stats.cramervonmises_2samp(x, y, method='exact')
  1418. >>> res.statistic, res.pvalue
  1419. (0.197802197802198, 0.31643356643356646)
  1420. The p-value based on the asymptotic distribution is a good approximation
  1421. even though the sample size is small.
  1422. >>> res = stats.cramervonmises_2samp(x, y, method='asymptotic')
  1423. >>> res.statistic, res.pvalue
  1424. (0.197802197802198, 0.2966041181527128)
  1425. Independent of the method, one would not reject the null hypothesis at the
  1426. chosen significance level in this example.
  1427. """
  1428. xp = array_namespace(x, y)
  1429. nx = x.shape[-1]
  1430. ny = y.shape[-1]
  1431. if nx <= 1 or ny <= 1: # only needed for testing / `test_axis_nan_policy`
  1432. raise ValueError('x and y must contain at least two observations.')
  1433. if method not in ['auto', 'exact', 'asymptotic']:
  1434. raise ValueError('method must be either auto, exact or asymptotic.')
  1435. if method == 'auto':
  1436. if max(nx, ny) > 20:
  1437. method = 'asymptotic'
  1438. else:
  1439. method = 'exact'
  1440. # axis=-1 is guaranteed by _axis_nan_policy decorator
  1441. xa = xp.sort(x, axis=-1)
  1442. ya = xp.sort(y, axis=-1)
  1443. # get ranks of x and y in the pooled sample
  1444. z = xp.concat([xa, ya], axis=-1)
  1445. # in case of ties, use midrank (see [1])
  1446. r = scipy.stats.rankdata(z, method='average', axis=-1)
  1447. dtype = xp_result_type(x, y, force_floating=True, xp=xp)
  1448. r = xp.astype(r, dtype, copy=False)
  1449. rx = r[..., :nx]
  1450. ry = r[..., nx:]
  1451. # compute U (eq. 10 in [2])
  1452. u = (nx * xp.sum((rx - xp.arange(1, nx+1, dtype=dtype))**2, axis=-1)
  1453. + ny * xp.sum((ry - xp.arange(1, ny+1, dtype=dtype))**2, axis=-1))
  1454. # compute T (eq. 9 in [2])
  1455. k, N = nx*ny, nx + ny
  1456. t = u / (k*N) - (4*k - 1)/(6*N)
  1457. if method == 'exact':
  1458. p = xp.asarray(_pval_cvm_2samp_exact(np.asarray(u), nx, ny), dtype=dtype)
  1459. else:
  1460. p = _pval_cvm_2samp_asymptotic(t, N, nx, ny, k, xp=xp)
  1461. t = t[()] if t.ndim == 0 else t
  1462. p = p[()] if p.ndim == 0 else p
  1463. return CramerVonMisesResult(statistic=t, pvalue=p)
  1464. class TukeyHSDResult:
  1465. """Result of `scipy.stats.tukey_hsd`.
  1466. Attributes
  1467. ----------
  1468. statistic : float ndarray
  1469. The computed statistic of the test for each comparison. The element
  1470. at index ``(i, j)`` is the statistic for the comparison between groups
  1471. ``i`` and ``j``.
  1472. pvalue : float ndarray
  1473. The associated p-value from the studentized range distribution. The
  1474. element at index ``(i, j)`` is the p-value for the comparison
  1475. between groups ``i`` and ``j``.
  1476. Notes
  1477. -----
  1478. The string representation of this object displays the most recently
  1479. calculated confidence interval, and if none have been previously
  1480. calculated, it will evaluate ``confidence_interval()``.
  1481. References
  1482. ----------
  1483. .. [1] NIST/SEMATECH e-Handbook of Statistical Methods, "7.4.7.1. Tukey's
  1484. Method."
  1485. https://www.itl.nist.gov/div898/handbook/prc/section4/prc471.htm,
  1486. 28 November 2020.
  1487. .. [2] P. A. Games and J. F. Howell, "Pairwise Multiple Comparison Procedures
  1488. with Unequal N's and/or Variances: A Monte Carlo Study," Journal of
  1489. Educational Statistics, vol. 1, no. 2, pp. 113-125, Jun. 1976,
  1490. doi: https://doi.org/10.3102/10769986001002113.
  1491. """
  1492. def __init__(self, statistic, pvalue, _ntreatments, _df, _stand_err):
  1493. self.statistic = statistic
  1494. self.pvalue = pvalue
  1495. self._ntreatments = _ntreatments
  1496. self._df = _df
  1497. self._stand_err = _stand_err
  1498. self._ci = None
  1499. self._ci_cl = None
  1500. def __str__(self):
  1501. # Note: `__str__` prints the confidence intervals from the most
  1502. # recent call to `confidence_interval`. If it has not been called,
  1503. # it will be called with the default CL of .95.
  1504. if self._ci is None:
  1505. self.confidence_interval(confidence_level=.95)
  1506. s = ("Pairwise Group Comparisons"
  1507. f" ({self._ci_cl*100:.1f}% Confidence Interval)\n")
  1508. s += "Comparison Statistic p-value Lower CI Upper CI\n"
  1509. for i, j in np.ndindex(self.pvalue.shape):
  1510. if i != j:
  1511. s += (f" ({i} - {j}) {self.statistic[i, j]:>10.3f}"
  1512. f"{self.pvalue[i, j]:>10.3f}"
  1513. f"{self._ci.low[i, j]:>10.3f}"
  1514. f"{self._ci.high[i, j]:>10.3f}\n")
  1515. return s
  1516. def confidence_interval(self, confidence_level=.95):
  1517. """Compute the confidence interval for the specified confidence level.
  1518. Parameters
  1519. ----------
  1520. confidence_level : float, optional
  1521. Confidence level for the computed confidence interval
  1522. of the estimated proportion. Default is .95.
  1523. Returns
  1524. -------
  1525. ci : ``ConfidenceInterval`` object
  1526. The object has attributes ``low`` and ``high`` that hold the
  1527. lower and upper bounds of the confidence intervals for each
  1528. comparison. The high and low values are accessible for each
  1529. comparison at index ``(i, j)`` between groups ``i`` and ``j``.
  1530. References
  1531. ----------
  1532. .. [1] NIST/SEMATECH e-Handbook of Statistical Methods, "7.4.7.1.
  1533. Tukey's Method."
  1534. https://www.itl.nist.gov/div898/handbook/prc/section4/prc471.htm,
  1535. 28 November 2020.
  1536. .. [2] P. A. Games and J. F. Howell, "Pairwise Multiple Comparison Procedures
  1537. with Unequal N's and/or Variances: A Monte Carlo Study," Journal of
  1538. Educational Statistics, vol. 1, no. 2, pp. 113-125, Jun. 1976,
  1539. doi: https://doi.org/10.3102/10769986001002113.
  1540. Examples
  1541. --------
  1542. >>> from scipy.stats import tukey_hsd
  1543. >>> group0 = [24.5, 23.5, 26.4, 27.1, 29.9]
  1544. >>> group1 = [28.4, 34.2, 29.5, 32.2, 30.1]
  1545. >>> group2 = [26.1, 28.3, 24.3, 26.2, 27.8]
  1546. >>> result = tukey_hsd(group0, group1, group2)
  1547. >>> ci = result.confidence_interval()
  1548. >>> ci.low
  1549. array([[-3.649159, -8.249159, -3.909159],
  1550. [ 0.950841, -3.649159, 0.690841],
  1551. [-3.389159, -7.989159, -3.649159]])
  1552. >>> ci.high
  1553. array([[ 3.649159, -0.950841, 3.389159],
  1554. [ 8.249159, 3.649159, 7.989159],
  1555. [ 3.909159, -0.690841, 3.649159]])
  1556. """
  1557. # check to see if the supplied confidence level matches that of the
  1558. # previously computed CI.
  1559. if (self._ci is not None and self._ci_cl is not None and
  1560. confidence_level == self._ci_cl):
  1561. return self._ci
  1562. if not 0 < confidence_level < 1:
  1563. raise ValueError("Confidence level must be between 0 and 1.")
  1564. # determine the critical value of the studentized range using the
  1565. # appropriate confidence level, number of treatments, and degrees
  1566. # of freedom. See [1] "Confidence limits for Tukey's method" / [2] p.117
  1567. # "H0 was rejected if...". Note that in the cases of unequal sample sizes,
  1568. # there will be a criterion for each group comparison.
  1569. params = (confidence_level, self._ntreatments, self._df)
  1570. srd = distributions.studentized_range.ppf(*params)
  1571. # also called maximum critical value, the confidence_radius is the
  1572. # studentized range critical value * the square root of mean square
  1573. # error over the sample size.
  1574. confidence_radius = srd * self._stand_err
  1575. # the confidence levels are determined by the
  1576. # `mean_differences` +- `confidence_radius`
  1577. upper_conf = self.statistic + confidence_radius
  1578. lower_conf = self.statistic - confidence_radius
  1579. self._ci = ConfidenceInterval(low=lower_conf, high=upper_conf)
  1580. self._ci_cl = confidence_level
  1581. return self._ci
  1582. def _tukey_hsd_iv(args, equal_var):
  1583. if (len(args)) < 2:
  1584. raise ValueError("There must be more than 1 treatment.")
  1585. if not isinstance(equal_var, bool):
  1586. raise TypeError("Expected a boolean value for 'equal_var'")
  1587. args = [np.asarray(arg) for arg in args]
  1588. for arg in args:
  1589. if arg.ndim != 1:
  1590. raise ValueError("Input samples must be one-dimensional.")
  1591. if arg.size <= 1:
  1592. raise ValueError("Input sample size must be greater than one.")
  1593. if np.isinf(arg).any():
  1594. raise ValueError("Input samples must be finite.")
  1595. return args
  1596. @xp_capabilities(np_only=True)
  1597. def tukey_hsd(*args, equal_var=True):
  1598. """Perform Tukey's HSD test for equality of means over multiple treatments.
  1599. Tukey's honestly significant difference (HSD) test performs pairwise
  1600. comparison of means for a set of samples. Whereas ANOVA (e.g. `f_oneway`)
  1601. assesses whether the true means underlying each sample are identical,
  1602. Tukey's HSD is a post hoc test used to compare the mean of each sample
  1603. to the mean of each other sample.
  1604. The null hypothesis is that the distributions underlying the samples all
  1605. have the same mean. The test statistic, which is computed for every
  1606. possible pairing of samples, is simply the difference between the sample
  1607. means. For each pair, the p-value is the probability under the null
  1608. hypothesis (and other assumptions; see notes) of observing such an extreme
  1609. value of the statistic, considering that many pairwise comparisons are
  1610. being performed. Confidence intervals for the difference between each pair
  1611. of means are also available.
  1612. Parameters
  1613. ----------
  1614. sample1, sample2, ... : array_like
  1615. The sample measurements for each group. There must be at least
  1616. two arguments.
  1617. equal_var: bool, optional
  1618. If True (default) and equal sample size, perform Tukey-HSD test [6].
  1619. If True and unequal sample size, perform Tukey-Kramer test [4]_.
  1620. If False, perform Games-Howell test [7]_, which does not assume equal variances.
  1621. Returns
  1622. -------
  1623. result : `~scipy.stats._result_classes.TukeyHSDResult` instance
  1624. The return value is an object with the following attributes:
  1625. statistic : float ndarray
  1626. The computed statistic of the test for each comparison. The element
  1627. at index ``(i, j)`` is the statistic for the comparison between
  1628. groups ``i`` and ``j``.
  1629. pvalue : float ndarray
  1630. The computed p-value of the test for each comparison. The element
  1631. at index ``(i, j)`` is the p-value for the comparison between
  1632. groups ``i`` and ``j``.
  1633. The object has the following methods:
  1634. confidence_interval(confidence_level=0.95):
  1635. Compute the confidence interval for the specified confidence level.
  1636. See Also
  1637. --------
  1638. dunnett : performs comparison of means against a control group.
  1639. Notes
  1640. -----
  1641. The use of this test relies on several assumptions.
  1642. 1. The observations are independent within and among groups.
  1643. 2. The observations within each group are normally distributed.
  1644. 3. The distributions from which the samples are drawn have the same finite
  1645. variance.
  1646. The original formulation of the test was for samples of equal size drawn from
  1647. populations assumed to have equal variances [6]_. In case of unequal sample sizes,
  1648. the test uses the Tukey-Kramer method [4]_. When equal variances are not assumed
  1649. (``equal_var=False``), the test uses the Games-Howell method [7]_.
  1650. References
  1651. ----------
  1652. .. [1] NIST/SEMATECH e-Handbook of Statistical Methods, "7.4.7.1. Tukey's
  1653. Method."
  1654. https://www.itl.nist.gov/div898/handbook/prc/section4/prc471.htm,
  1655. 28 November 2020.
  1656. .. [2] Abdi, Herve & Williams, Lynne. (2021). "Tukey's Honestly Significant
  1657. Difference (HSD) Test."
  1658. https://personal.utdallas.edu/~herve/abdi-HSD2010-pretty.pdf
  1659. .. [3] "One-Way ANOVA Using SAS PROC ANOVA & PROC GLM." SAS
  1660. Tutorials, 2007, www.stattutorials.com/SAS/TUTORIAL-PROC-GLM.htm.
  1661. .. [4] Kramer, Clyde Young. "Extension of Multiple Range Tests to Group
  1662. Means with Unequal Numbers of Replications." Biometrics, vol. 12,
  1663. no. 3, 1956, pp. 307-310. JSTOR, www.jstor.org/stable/3001469.
  1664. Accessed 25 May 2021.
  1665. .. [5] NIST/SEMATECH e-Handbook of Statistical Methods, "7.4.3.3.
  1666. The ANOVA table and tests of hypotheses about means"
  1667. https://www.itl.nist.gov/div898/handbook/prc/section4/prc433.htm,
  1668. 2 June 2021.
  1669. .. [6] Tukey, John W. "Comparing Individual Means in the Analysis of
  1670. Variance." Biometrics, vol. 5, no. 2, 1949, pp. 99-114. JSTOR,
  1671. www.jstor.org/stable/3001913. Accessed 14 June 2021.
  1672. .. [7] P. A. Games and J. F. Howell, "Pairwise Multiple Comparison Procedures
  1673. with Unequal N's and/or Variances: A Monte Carlo Study," Journal of
  1674. Educational Statistics, vol. 1, no. 2, pp. 113-125, Jun. 1976,
  1675. doi: https://doi.org/10.3102/10769986001002113.
  1676. Examples
  1677. --------
  1678. Here are some data comparing the time to relief of three brands of
  1679. headache medicine, reported in minutes. Data adapted from [3]_.
  1680. >>> import numpy as np
  1681. >>> from scipy.stats import tukey_hsd
  1682. >>> group0 = [24.5, 23.5, 26.4, 27.1, 29.9]
  1683. >>> group1 = [28.4, 34.2, 29.5, 32.2, 30.1]
  1684. >>> group2 = [26.1, 28.3, 24.3, 26.2, 27.8]
  1685. We would like to see if the means between any of the groups are
  1686. significantly different. First, visually examine a box and whisker plot.
  1687. >>> import matplotlib.pyplot as plt
  1688. >>> fig, ax = plt.subplots(1, 1)
  1689. >>> ax.boxplot([group0, group1, group2])
  1690. >>> ax.set_xticklabels(["group0", "group1", "group2"]) # doctest: +SKIP
  1691. >>> ax.set_ylabel("mean") # doctest: +SKIP
  1692. >>> plt.show()
  1693. From the box and whisker plot, we can see overlap in the interquartile
  1694. ranges group 1 to group 2 and group 3, but we can apply the ``tukey_hsd``
  1695. test to determine if the difference between means is significant. We
  1696. set a significance level of .05 to reject the null hypothesis.
  1697. >>> res = tukey_hsd(group0, group1, group2)
  1698. >>> print(res)
  1699. Pairwise Group Comparisons (95.0% Confidence Interval)
  1700. Comparison Statistic p-value Lower CI Upper CI
  1701. (0 - 1) -4.600 0.014 -8.249 -0.951
  1702. (0 - 2) -0.260 0.980 -3.909 3.389
  1703. (1 - 0) 4.600 0.014 0.951 8.249
  1704. (1 - 2) 4.340 0.020 0.691 7.989
  1705. (2 - 0) 0.260 0.980 -3.389 3.909
  1706. (2 - 1) -4.340 0.020 -7.989 -0.691
  1707. The null hypothesis is that each group has the same mean. The p-value for
  1708. comparisons between ``group0`` and ``group1`` as well as ``group1`` and
  1709. ``group2`` do not exceed .05, so we reject the null hypothesis that they
  1710. have the same means. The p-value of the comparison between ``group0``
  1711. and ``group2`` exceeds .05, so we accept the null hypothesis that there
  1712. is not a significant difference between their means.
  1713. We can also compute the confidence interval associated with our chosen
  1714. confidence level.
  1715. >>> group0 = [24.5, 23.5, 26.4, 27.1, 29.9]
  1716. >>> group1 = [28.4, 34.2, 29.5, 32.2, 30.1]
  1717. >>> group2 = [26.1, 28.3, 24.3, 26.2, 27.8]
  1718. >>> result = tukey_hsd(group0, group1, group2)
  1719. >>> conf = res.confidence_interval(confidence_level=.99)
  1720. >>> for ((i, j), l) in np.ndenumerate(conf.low):
  1721. ... # filter out self comparisons
  1722. ... if i != j:
  1723. ... h = conf.high[i,j]
  1724. ... print(f"({i} - {j}) {l:>6.3f} {h:>6.3f}")
  1725. (0 - 1) -9.480 0.280
  1726. (0 - 2) -5.140 4.620
  1727. (1 - 0) -0.280 9.480
  1728. (1 - 2) -0.540 9.220
  1729. (2 - 0) -4.620 5.140
  1730. (2 - 1) -9.220 0.540
  1731. """
  1732. args = _tukey_hsd_iv(args, equal_var)
  1733. ntreatments = len(args)
  1734. means = np.asarray([np.mean(arg) for arg in args])
  1735. nsamples_treatments = np.asarray([a.size for a in args])
  1736. nobs = np.sum(nsamples_treatments)
  1737. vars_ = np.asarray([np.var(arg, ddof=1) for arg in args])
  1738. if equal_var:
  1739. # determine mean square error [5]. Note that this is sometimes called
  1740. # mean square error within.
  1741. mse = (np.sum(vars_ * (nsamples_treatments - 1)) / (nobs - ntreatments))
  1742. # The calculation of the standard error differs when treatments differ in
  1743. # size. See ("Unequal sample sizes")[1].
  1744. if np.unique(nsamples_treatments).size == 1:
  1745. # all input groups are the same length, so only one value needs to be
  1746. # calculated [1].
  1747. normalize = 2 / nsamples_treatments[0]
  1748. else:
  1749. # to compare groups of differing sizes, we must compute a variance
  1750. # value for each individual comparison. Use broadcasting to get the
  1751. # resulting matrix. [3], verified against [4] (page 308).
  1752. normalize = 1 / nsamples_treatments + 1 / nsamples_treatments[None].T
  1753. # the standard error is used in the computation of the tukey criterion and
  1754. # finding the p-values.
  1755. stand_err = np.sqrt(normalize * mse / 2)
  1756. df = nobs - ntreatments
  1757. else:
  1758. # `stand_err` is the denominator of the Behrens-Fisher statistic ($v$)
  1759. # with a factor of $\sqrt{2}$. Compare [7] p.116 "t-solution rejects H0 if...",
  1760. # [7] p. 117 "H0 was rejected", and definition of `t_stat` below.
  1761. sj2_nj = vars_ / nsamples_treatments
  1762. si2_ni = sj2_nj[:, np.newaxis]
  1763. stand_err = np.sqrt(si2_ni + sj2_nj) / 2**0.5
  1764. # `df` is the Welch degree of freedom $\nu$.
  1765. # See [7] p. 116 "and the degrees of freedom, $\nu$, are given by...".
  1766. njm1 = nsamples_treatments - 1
  1767. nim1 = njm1[:, np.newaxis]
  1768. df = (si2_ni + sj2_nj)**2 / (si2_ni**2 / nim1 + sj2_nj**2 / njm1)
  1769. # the mean difference is the test statistic.
  1770. mean_differences = means[None].T - means
  1771. # Calculate the t-statistic to use within the survival function of the
  1772. # studentized range to get the p-value.
  1773. t_stat = np.abs(mean_differences) / stand_err
  1774. params = t_stat, ntreatments, df
  1775. pvalues = distributions.studentized_range.sf(*params)
  1776. return TukeyHSDResult(mean_differences, pvalues, ntreatments,
  1777. df, stand_err)