_multicomp.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451
  1. import warnings
  2. from collections.abc import Sequence
  3. from dataclasses import dataclass, field
  4. from typing import TYPE_CHECKING, Literal
  5. import numpy as np
  6. from scipy import stats
  7. from scipy.optimize import minimize_scalar
  8. from scipy.stats._common import ConfidenceInterval
  9. from scipy.stats._qmc import check_random_state
  10. from scipy.stats._stats_py import _var
  11. from scipy._lib._array_api import xp_capabilities
  12. from scipy._lib._util import _transition_to_rng, DecimalNumber, SeedType
  13. if TYPE_CHECKING:
  14. import numpy.typing as npt
  15. __all__ = [
  16. 'dunnett'
  17. ]
  18. @dataclass
  19. class DunnettResult:
  20. """Result object returned by `scipy.stats.dunnett`.
  21. Attributes
  22. ----------
  23. statistic : float ndarray
  24. The computed statistic of the test for each comparison. The element
  25. at index ``i`` is the statistic for the comparison between
  26. groups ``i`` and the control.
  27. pvalue : float ndarray
  28. The computed p-value of the test for each comparison. The element
  29. at index ``i`` is the p-value for the comparison between
  30. group ``i`` and the control.
  31. """
  32. statistic: np.ndarray
  33. pvalue: np.ndarray
  34. _alternative: Literal['two-sided', 'less', 'greater'] = field(repr=False)
  35. _rho: np.ndarray = field(repr=False)
  36. _df: int = field(repr=False)
  37. _std: float = field(repr=False)
  38. _mean_samples: np.ndarray = field(repr=False)
  39. _mean_control: np.ndarray = field(repr=False)
  40. _n_samples: np.ndarray = field(repr=False)
  41. _n_control: int = field(repr=False)
  42. _rng: SeedType = field(repr=False)
  43. _ci: ConfidenceInterval | None = field(default=None, repr=False)
  44. _ci_cl: DecimalNumber | None = field(default=None, repr=False)
  45. def __str__(self):
  46. # Note: `__str__` prints the confidence intervals from the most
  47. # recent call to `confidence_interval`. If it has not been called,
  48. # it will be called with the default CL of .95.
  49. if self._ci is None:
  50. self.confidence_interval(confidence_level=.95)
  51. s = (
  52. "Dunnett's test"
  53. f" ({self._ci_cl*100:.1f}% Confidence Interval)\n"
  54. "Comparison Statistic p-value Lower CI Upper CI\n"
  55. )
  56. for i in range(self.pvalue.size):
  57. s += (f" (Sample {i} - Control) {self.statistic[i]:>10.3f}"
  58. f"{self.pvalue[i]:>10.3f}"
  59. f"{self._ci.low[i]:>10.3f}"
  60. f"{self._ci.high[i]:>10.3f}\n")
  61. return s
  62. def _allowance(
  63. self, confidence_level: DecimalNumber = 0.95, tol: DecimalNumber = 1e-3
  64. ) -> float:
  65. """Allowance.
  66. It is the quantity to add/subtract from the observed difference
  67. between the means of observed groups and the mean of the control
  68. group. The result gives confidence limits.
  69. Parameters
  70. ----------
  71. confidence_level : float, optional
  72. Confidence level for the computed confidence interval.
  73. Default is .95.
  74. tol : float, optional
  75. A tolerance for numerical optimization: the allowance will produce
  76. a confidence within ``10*tol*(1 - confidence_level)`` of the
  77. specified level, or a warning will be emitted. Tight tolerances
  78. may be impractical due to noisy evaluation of the objective.
  79. Default is 1e-3.
  80. Returns
  81. -------
  82. allowance : float
  83. Allowance around the mean.
  84. """
  85. alpha = 1 - confidence_level
  86. def pvalue_from_stat(statistic):
  87. statistic = np.array(statistic)
  88. sf = _pvalue_dunnett(
  89. rho=self._rho, df=self._df,
  90. statistic=statistic, alternative=self._alternative,
  91. rng=self._rng
  92. )
  93. return abs(sf - alpha)/alpha
  94. # Evaluation of `pvalue_from_stat` is noisy due to the use of RQMC to
  95. # evaluate `multivariate_t.cdf`. `minimize_scalar` is not designed
  96. # to tolerate a noisy objective function and may fail to find the
  97. # minimum accurately. We mitigate this possibility with the validation
  98. # step below, but implementation of a noise-tolerant root finder or
  99. # minimizer would be a welcome enhancement. See gh-18150.
  100. res = minimize_scalar(pvalue_from_stat, method='brent', tol=tol)
  101. critical_value = res.x
  102. # validation
  103. # tol*10 because tol=1e-3 means we tolerate a 1% change at most
  104. if res.success is False or res.fun >= tol*10:
  105. warnings.warn(
  106. "Computation of the confidence interval did not converge to "
  107. "the desired level. The confidence level corresponding with "
  108. f"the returned interval is approximately {alpha*(1+res.fun)}.",
  109. stacklevel=3
  110. )
  111. # From [1] p. 1101 between (1) and (3)
  112. allowance = critical_value*self._std*np.sqrt(
  113. 1/self._n_samples + 1/self._n_control
  114. )
  115. return abs(allowance)
  116. def confidence_interval(
  117. self, confidence_level: DecimalNumber = 0.95
  118. ) -> ConfidenceInterval:
  119. """Compute the confidence interval for the specified confidence level.
  120. Parameters
  121. ----------
  122. confidence_level : float, optional
  123. Confidence level for the computed confidence interval.
  124. Default is .95.
  125. Returns
  126. -------
  127. ci : ``ConfidenceInterval`` object
  128. The object has attributes ``low`` and ``high`` that hold the
  129. lower and upper bounds of the confidence intervals for each
  130. comparison. The high and low values are accessible for each
  131. comparison at index ``i`` for each group ``i``.
  132. """
  133. # check to see if the supplied confidence level matches that of the
  134. # previously computed CI.
  135. if (self._ci is not None) and (confidence_level == self._ci_cl):
  136. return self._ci
  137. if not (0 < confidence_level < 1):
  138. raise ValueError("Confidence level must be between 0 and 1.")
  139. allowance = self._allowance(confidence_level=confidence_level)
  140. diff_means = self._mean_samples - self._mean_control
  141. low = diff_means-allowance
  142. high = diff_means+allowance
  143. if self._alternative == 'greater':
  144. high = [np.inf] * len(diff_means)
  145. elif self._alternative == 'less':
  146. low = [-np.inf] * len(diff_means)
  147. self._ci_cl = confidence_level
  148. self._ci = ConfidenceInterval(
  149. low=low,
  150. high=high
  151. )
  152. return self._ci
  153. @xp_capabilities(np_only=True)
  154. @_transition_to_rng('random_state', replace_doc=False)
  155. def dunnett(
  156. *samples: "npt.ArrayLike", # noqa: D417
  157. control: "npt.ArrayLike",
  158. alternative: Literal['two-sided', 'less', 'greater'] = "two-sided",
  159. rng: SeedType = None
  160. ) -> DunnettResult:
  161. """Dunnett's test: multiple comparisons of means against a control group.
  162. This is an implementation of Dunnett's original, single-step test as
  163. described in [1]_.
  164. Parameters
  165. ----------
  166. sample1, sample2, ... : 1D array_like
  167. The sample measurements for each experimental group.
  168. control : 1D array_like
  169. The sample measurements for the control group.
  170. alternative : {'two-sided', 'less', 'greater'}, optional
  171. Defines the alternative hypothesis.
  172. The null hypothesis is that the means of the distributions underlying
  173. the samples and control are equal. The following alternative
  174. hypotheses are available (default is 'two-sided'):
  175. * 'two-sided': the means of the distributions underlying the samples
  176. and control are unequal.
  177. * 'less': the means of the distributions underlying the samples
  178. are less than the mean of the distribution underlying the control.
  179. * 'greater': the means of the distributions underlying the
  180. samples are greater than the mean of the distribution underlying
  181. the control.
  182. rng : `numpy.random.Generator`, optional
  183. Pseudorandom number generator state. When `rng` is None, a new
  184. `numpy.random.Generator` is created using entropy from the
  185. operating system. Types other than `numpy.random.Generator` are
  186. passed to `numpy.random.default_rng` to instantiate a ``Generator``.
  187. .. versionchanged:: 1.15.0
  188. As part of the `SPEC-007 <https://scientific-python.org/specs/spec-0007/>`_
  189. transition from use of `numpy.random.RandomState` to
  190. `numpy.random.Generator`, this keyword was changed from `random_state` to
  191. `rng`. For an interim period, both keywords will continue to work, although
  192. only one may be specified at a time. After the interim period, function
  193. calls using the `random_state` keyword will emit warnings. Following a
  194. deprecation period, the `random_state` keyword will be removed.
  195. Returns
  196. -------
  197. res : `~scipy.stats._result_classes.DunnettResult`
  198. An object containing attributes:
  199. statistic : float ndarray
  200. The computed statistic of the test for each comparison. The element
  201. at index ``i`` is the statistic for the comparison between
  202. groups ``i`` and the control.
  203. pvalue : float ndarray
  204. The computed p-value of the test for each comparison. The element
  205. at index ``i`` is the p-value for the comparison between
  206. group ``i`` and the control.
  207. And the following method:
  208. confidence_interval(confidence_level=0.95) :
  209. Compute the difference in means of the groups
  210. with the control +- the allowance.
  211. See Also
  212. --------
  213. tukey_hsd : performs pairwise comparison of means.
  214. :ref:`hypothesis_dunnett` : Extended example
  215. Notes
  216. -----
  217. Like the independent-sample t-test, Dunnett's test [1]_ is used to make
  218. inferences about the means of distributions from which samples were drawn.
  219. However, when multiple t-tests are performed at a fixed significance level,
  220. the "family-wise error rate" - the probability of incorrectly rejecting the
  221. null hypothesis in at least one test - will exceed the significance level.
  222. Dunnett's test is designed to perform multiple comparisons while
  223. controlling the family-wise error rate.
  224. Dunnett's test compares the means of multiple experimental groups
  225. against a single control group. Tukey's Honestly Significant Difference Test
  226. is another multiple-comparison test that controls the family-wise error
  227. rate, but `tukey_hsd` performs *all* pairwise comparisons between groups.
  228. When pairwise comparisons between experimental groups are not needed,
  229. Dunnett's test is preferable due to its higher power.
  230. The use of this test relies on several assumptions.
  231. 1. The observations are independent within and among groups.
  232. 2. The observations within each group are normally distributed.
  233. 3. The distributions from which the samples are drawn have the same finite
  234. variance.
  235. References
  236. ----------
  237. .. [1] Dunnett, Charles W. (1955) "A Multiple Comparison Procedure for
  238. Comparing Several Treatments with a Control." Journal of the American
  239. Statistical Association, 50:272, 1096-1121,
  240. :doi:`10.1080/01621459.1955.10501294`
  241. .. [2] Thomson, M. L., & Short, M. D. (1969). Mucociliary function in
  242. health, chronic obstructive airway disease, and asbestosis. Journal
  243. of applied physiology, 26(5), 535-539.
  244. :doi:`10.1152/jappl.1969.26.5.535`
  245. Examples
  246. --------
  247. We'll use data from [2]_, Table 1. The null hypothesis is that the means of
  248. the distributions underlying the samples and control are equal.
  249. First, we test that the means of the distributions underlying the samples
  250. and control are unequal (``alternative='two-sided'``, the default).
  251. >>> import numpy as np
  252. >>> from scipy.stats import dunnett
  253. >>> samples = [[3.8, 2.7, 4.0, 2.4], [2.8, 3.4, 3.7, 2.2, 2.0]]
  254. >>> control = [2.9, 3.0, 2.5, 2.6, 3.2]
  255. >>> res = dunnett(*samples, control=control)
  256. >>> res.statistic
  257. array([ 0.90874545, -0.05007117])
  258. >>> res.pvalue
  259. array([0.58325114, 0.99819341])
  260. Now, we test that the means of the distributions underlying the samples are
  261. greater than the mean of the distribution underlying the control.
  262. >>> res = dunnett(*samples, control=control, alternative='greater')
  263. >>> res.statistic
  264. array([ 0.90874545, -0.05007117])
  265. >>> res.pvalue
  266. array([0.30230596, 0.69115597])
  267. For a more detailed example, see :ref:`hypothesis_dunnett`.
  268. """
  269. samples_, control_, rng = _iv_dunnett(
  270. samples=samples, control=control,
  271. alternative=alternative, rng=rng
  272. )
  273. rho, df, n_group, n_samples, n_control = _params_dunnett(
  274. samples=samples_, control=control_
  275. )
  276. statistic, std, mean_control, mean_samples = _statistic_dunnett(
  277. samples_, control_, df, n_samples, n_control
  278. )
  279. pvalue = _pvalue_dunnett(
  280. rho=rho, df=df, statistic=statistic, alternative=alternative, rng=rng
  281. )
  282. return DunnettResult(
  283. statistic=statistic, pvalue=pvalue,
  284. _alternative=alternative,
  285. _rho=rho, _df=df, _std=std,
  286. _mean_samples=mean_samples,
  287. _mean_control=mean_control,
  288. _n_samples=n_samples,
  289. _n_control=n_control,
  290. _rng=rng
  291. )
  292. def _iv_dunnett(
  293. samples: Sequence["npt.ArrayLike"],
  294. control: "npt.ArrayLike",
  295. alternative: Literal['two-sided', 'less', 'greater'],
  296. rng: SeedType
  297. ) -> tuple[list[np.ndarray], np.ndarray, SeedType]:
  298. """Input validation for Dunnett's test."""
  299. rng = check_random_state(rng)
  300. if alternative not in {'two-sided', 'less', 'greater'}:
  301. raise ValueError(
  302. "alternative must be 'less', 'greater' or 'two-sided'"
  303. )
  304. ndim_msg = "Control and samples groups must be 1D arrays"
  305. n_obs_msg = "Control and samples groups must have at least 1 observation"
  306. control = np.asarray(control)
  307. samples_ = [np.asarray(sample) for sample in samples]
  308. # samples checks
  309. samples_control: list[np.ndarray] = samples_ + [control]
  310. for sample in samples_control:
  311. if sample.ndim > 1:
  312. raise ValueError(ndim_msg)
  313. if sample.size < 1:
  314. raise ValueError(n_obs_msg)
  315. return samples_, control, rng
  316. def _params_dunnett(
  317. samples: list[np.ndarray], control: np.ndarray
  318. ) -> tuple[np.ndarray, int, int, np.ndarray, int]:
  319. """Specific parameters for Dunnett's test.
  320. Degree of freedom is the number of observations minus the number of groups
  321. including the control.
  322. """
  323. n_samples = np.array([sample.size for sample in samples])
  324. # From [1] p. 1100 d.f. = (sum N)-(p+1)
  325. n_sample = n_samples.sum()
  326. n_control = control.size
  327. n = n_sample + n_control
  328. n_groups = len(samples)
  329. df = n - n_groups - 1
  330. # From [1] p. 1103 rho_ij = 1/sqrt((N0/Ni+1)(N0/Nj+1))
  331. rho = n_control/n_samples + 1
  332. rho = 1/np.sqrt(rho[:, None] * rho[None, :])
  333. np.fill_diagonal(rho, 1)
  334. return rho, df, n_groups, n_samples, n_control
  335. def _statistic_dunnett(
  336. samples: list[np.ndarray], control: np.ndarray, df: int,
  337. n_samples: np.ndarray, n_control: int
  338. ) -> tuple[np.ndarray, float, np.ndarray, np.ndarray]:
  339. """Statistic of Dunnett's test.
  340. Computation based on the original single-step test from [1].
  341. """
  342. mean_control = np.mean(control)
  343. mean_samples = np.array([np.mean(sample) for sample in samples])
  344. all_samples = [control] + samples
  345. all_means = np.concatenate([[mean_control], mean_samples])
  346. # Variance estimate s^2 from [1] Eq. 1
  347. s2 = np.sum([_var(sample, mean=mean)*sample.size
  348. for sample, mean in zip(all_samples, all_means)]) / df
  349. std = np.sqrt(s2)
  350. # z score inferred from [1] unlabeled equation after Eq. 1
  351. z = (mean_samples - mean_control) / np.sqrt(1/n_samples + 1/n_control)
  352. return z / std, std, mean_control, mean_samples
  353. def _pvalue_dunnett(
  354. rho: np.ndarray, df: int, statistic: np.ndarray,
  355. alternative: Literal['two-sided', 'less', 'greater'],
  356. rng: SeedType = None
  357. ) -> np.ndarray:
  358. """pvalue from the multivariate t-distribution.
  359. Critical values come from the multivariate student-t distribution.
  360. """
  361. statistic = statistic.reshape(-1, 1)
  362. mvt = stats.multivariate_t(shape=rho, df=df, seed=rng)
  363. if alternative == "two-sided":
  364. statistic = abs(statistic)
  365. pvalue = 1 - mvt.cdf(statistic, lower_limit=-statistic)
  366. elif alternative == "greater":
  367. pvalue = 1 - mvt.cdf(statistic, lower_limit=-np.inf)
  368. else:
  369. pvalue = 1 - mvt.cdf(np.inf, lower_limit=statistic)
  370. return np.atleast_1d(pvalue)