| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451 |
- import warnings
- from collections.abc import Sequence
- from dataclasses import dataclass, field
- from typing import TYPE_CHECKING, Literal
- import numpy as np
- from scipy import stats
- from scipy.optimize import minimize_scalar
- from scipy.stats._common import ConfidenceInterval
- from scipy.stats._qmc import check_random_state
- from scipy.stats._stats_py import _var
- from scipy._lib._array_api import xp_capabilities
- from scipy._lib._util import _transition_to_rng, DecimalNumber, SeedType
- if TYPE_CHECKING:
- import numpy.typing as npt
- __all__ = [
- 'dunnett'
- ]
- @dataclass
- class DunnettResult:
- """Result object returned by `scipy.stats.dunnett`.
- Attributes
- ----------
- statistic : float ndarray
- The computed statistic of the test for each comparison. The element
- at index ``i`` is the statistic for the comparison between
- groups ``i`` and the control.
- pvalue : float ndarray
- The computed p-value of the test for each comparison. The element
- at index ``i`` is the p-value for the comparison between
- group ``i`` and the control.
- """
- statistic: np.ndarray
- pvalue: np.ndarray
- _alternative: Literal['two-sided', 'less', 'greater'] = field(repr=False)
- _rho: np.ndarray = field(repr=False)
- _df: int = field(repr=False)
- _std: float = field(repr=False)
- _mean_samples: np.ndarray = field(repr=False)
- _mean_control: np.ndarray = field(repr=False)
- _n_samples: np.ndarray = field(repr=False)
- _n_control: int = field(repr=False)
- _rng: SeedType = field(repr=False)
- _ci: ConfidenceInterval | None = field(default=None, repr=False)
- _ci_cl: DecimalNumber | None = field(default=None, repr=False)
- def __str__(self):
- # Note: `__str__` prints the confidence intervals from the most
- # recent call to `confidence_interval`. If it has not been called,
- # it will be called with the default CL of .95.
- if self._ci is None:
- self.confidence_interval(confidence_level=.95)
- s = (
- "Dunnett's test"
- f" ({self._ci_cl*100:.1f}% Confidence Interval)\n"
- "Comparison Statistic p-value Lower CI Upper CI\n"
- )
- for i in range(self.pvalue.size):
- s += (f" (Sample {i} - Control) {self.statistic[i]:>10.3f}"
- f"{self.pvalue[i]:>10.3f}"
- f"{self._ci.low[i]:>10.3f}"
- f"{self._ci.high[i]:>10.3f}\n")
- return s
- def _allowance(
- self, confidence_level: DecimalNumber = 0.95, tol: DecimalNumber = 1e-3
- ) -> float:
- """Allowance.
- It is the quantity to add/subtract from the observed difference
- between the means of observed groups and the mean of the control
- group. The result gives confidence limits.
- Parameters
- ----------
- confidence_level : float, optional
- Confidence level for the computed confidence interval.
- Default is .95.
- tol : float, optional
- A tolerance for numerical optimization: the allowance will produce
- a confidence within ``10*tol*(1 - confidence_level)`` of the
- specified level, or a warning will be emitted. Tight tolerances
- may be impractical due to noisy evaluation of the objective.
- Default is 1e-3.
- Returns
- -------
- allowance : float
- Allowance around the mean.
- """
- alpha = 1 - confidence_level
- def pvalue_from_stat(statistic):
- statistic = np.array(statistic)
- sf = _pvalue_dunnett(
- rho=self._rho, df=self._df,
- statistic=statistic, alternative=self._alternative,
- rng=self._rng
- )
- return abs(sf - alpha)/alpha
- # Evaluation of `pvalue_from_stat` is noisy due to the use of RQMC to
- # evaluate `multivariate_t.cdf`. `minimize_scalar` is not designed
- # to tolerate a noisy objective function and may fail to find the
- # minimum accurately. We mitigate this possibility with the validation
- # step below, but implementation of a noise-tolerant root finder or
- # minimizer would be a welcome enhancement. See gh-18150.
- res = minimize_scalar(pvalue_from_stat, method='brent', tol=tol)
- critical_value = res.x
- # validation
- # tol*10 because tol=1e-3 means we tolerate a 1% change at most
- if res.success is False or res.fun >= tol*10:
- warnings.warn(
- "Computation of the confidence interval did not converge to "
- "the desired level. The confidence level corresponding with "
- f"the returned interval is approximately {alpha*(1+res.fun)}.",
- stacklevel=3
- )
- # From [1] p. 1101 between (1) and (3)
- allowance = critical_value*self._std*np.sqrt(
- 1/self._n_samples + 1/self._n_control
- )
- return abs(allowance)
- def confidence_interval(
- self, confidence_level: DecimalNumber = 0.95
- ) -> ConfidenceInterval:
- """Compute the confidence interval for the specified confidence level.
- Parameters
- ----------
- confidence_level : float, optional
- Confidence level for the computed confidence interval.
- Default is .95.
- Returns
- -------
- ci : ``ConfidenceInterval`` object
- The object has attributes ``low`` and ``high`` that hold the
- lower and upper bounds of the confidence intervals for each
- comparison. The high and low values are accessible for each
- comparison at index ``i`` for each group ``i``.
- """
- # check to see if the supplied confidence level matches that of the
- # previously computed CI.
- if (self._ci is not None) and (confidence_level == self._ci_cl):
- return self._ci
- if not (0 < confidence_level < 1):
- raise ValueError("Confidence level must be between 0 and 1.")
- allowance = self._allowance(confidence_level=confidence_level)
- diff_means = self._mean_samples - self._mean_control
- low = diff_means-allowance
- high = diff_means+allowance
- if self._alternative == 'greater':
- high = [np.inf] * len(diff_means)
- elif self._alternative == 'less':
- low = [-np.inf] * len(diff_means)
- self._ci_cl = confidence_level
- self._ci = ConfidenceInterval(
- low=low,
- high=high
- )
- return self._ci
- @xp_capabilities(np_only=True)
- @_transition_to_rng('random_state', replace_doc=False)
- def dunnett(
- *samples: "npt.ArrayLike", # noqa: D417
- control: "npt.ArrayLike",
- alternative: Literal['two-sided', 'less', 'greater'] = "two-sided",
- rng: SeedType = None
- ) -> DunnettResult:
- """Dunnett's test: multiple comparisons of means against a control group.
- This is an implementation of Dunnett's original, single-step test as
- described in [1]_.
- Parameters
- ----------
- sample1, sample2, ... : 1D array_like
- The sample measurements for each experimental group.
- control : 1D array_like
- The sample measurements for the control group.
- alternative : {'two-sided', 'less', 'greater'}, optional
- Defines the alternative hypothesis.
- The null hypothesis is that the means of the distributions underlying
- the samples and control are equal. The following alternative
- hypotheses are available (default is 'two-sided'):
- * 'two-sided': the means of the distributions underlying the samples
- and control are unequal.
- * 'less': the means of the distributions underlying the samples
- are less than the mean of the distribution underlying the control.
- * 'greater': the means of the distributions underlying the
- samples are greater than the mean of the distribution underlying
- the control.
- rng : `numpy.random.Generator`, optional
- Pseudorandom number generator state. When `rng` is None, a new
- `numpy.random.Generator` is created using entropy from the
- operating system. Types other than `numpy.random.Generator` are
- passed to `numpy.random.default_rng` to instantiate a ``Generator``.
- .. versionchanged:: 1.15.0
- As part of the `SPEC-007 <https://scientific-python.org/specs/spec-0007/>`_
- transition from use of `numpy.random.RandomState` to
- `numpy.random.Generator`, this keyword was changed from `random_state` to
- `rng`. For an interim period, both keywords will continue to work, although
- only one may be specified at a time. After the interim period, function
- calls using the `random_state` keyword will emit warnings. Following a
- deprecation period, the `random_state` keyword will be removed.
- Returns
- -------
- res : `~scipy.stats._result_classes.DunnettResult`
- An object containing attributes:
- statistic : float ndarray
- The computed statistic of the test for each comparison. The element
- at index ``i`` is the statistic for the comparison between
- groups ``i`` and the control.
- pvalue : float ndarray
- The computed p-value of the test for each comparison. The element
- at index ``i`` is the p-value for the comparison between
- group ``i`` and the control.
- And the following method:
- confidence_interval(confidence_level=0.95) :
- Compute the difference in means of the groups
- with the control +- the allowance.
- See Also
- --------
- tukey_hsd : performs pairwise comparison of means.
- :ref:`hypothesis_dunnett` : Extended example
- Notes
- -----
- Like the independent-sample t-test, Dunnett's test [1]_ is used to make
- inferences about the means of distributions from which samples were drawn.
- However, when multiple t-tests are performed at a fixed significance level,
- the "family-wise error rate" - the probability of incorrectly rejecting the
- null hypothesis in at least one test - will exceed the significance level.
- Dunnett's test is designed to perform multiple comparisons while
- controlling the family-wise error rate.
- Dunnett's test compares the means of multiple experimental groups
- against a single control group. Tukey's Honestly Significant Difference Test
- is another multiple-comparison test that controls the family-wise error
- rate, but `tukey_hsd` performs *all* pairwise comparisons between groups.
- When pairwise comparisons between experimental groups are not needed,
- Dunnett's test is preferable due to its higher power.
- The use of this test relies on several assumptions.
- 1. The observations are independent within and among groups.
- 2. The observations within each group are normally distributed.
- 3. The distributions from which the samples are drawn have the same finite
- variance.
- References
- ----------
- .. [1] Dunnett, Charles W. (1955) "A Multiple Comparison Procedure for
- Comparing Several Treatments with a Control." Journal of the American
- Statistical Association, 50:272, 1096-1121,
- :doi:`10.1080/01621459.1955.10501294`
- .. [2] Thomson, M. L., & Short, M. D. (1969). Mucociliary function in
- health, chronic obstructive airway disease, and asbestosis. Journal
- of applied physiology, 26(5), 535-539.
- :doi:`10.1152/jappl.1969.26.5.535`
- Examples
- --------
- We'll use data from [2]_, Table 1. The null hypothesis is that the means of
- the distributions underlying the samples and control are equal.
- First, we test that the means of the distributions underlying the samples
- and control are unequal (``alternative='two-sided'``, the default).
- >>> import numpy as np
- >>> from scipy.stats import dunnett
- >>> samples = [[3.8, 2.7, 4.0, 2.4], [2.8, 3.4, 3.7, 2.2, 2.0]]
- >>> control = [2.9, 3.0, 2.5, 2.6, 3.2]
- >>> res = dunnett(*samples, control=control)
- >>> res.statistic
- array([ 0.90874545, -0.05007117])
- >>> res.pvalue
- array([0.58325114, 0.99819341])
- Now, we test that the means of the distributions underlying the samples are
- greater than the mean of the distribution underlying the control.
- >>> res = dunnett(*samples, control=control, alternative='greater')
- >>> res.statistic
- array([ 0.90874545, -0.05007117])
- >>> res.pvalue
- array([0.30230596, 0.69115597])
- For a more detailed example, see :ref:`hypothesis_dunnett`.
- """
- samples_, control_, rng = _iv_dunnett(
- samples=samples, control=control,
- alternative=alternative, rng=rng
- )
- rho, df, n_group, n_samples, n_control = _params_dunnett(
- samples=samples_, control=control_
- )
- statistic, std, mean_control, mean_samples = _statistic_dunnett(
- samples_, control_, df, n_samples, n_control
- )
- pvalue = _pvalue_dunnett(
- rho=rho, df=df, statistic=statistic, alternative=alternative, rng=rng
- )
- return DunnettResult(
- statistic=statistic, pvalue=pvalue,
- _alternative=alternative,
- _rho=rho, _df=df, _std=std,
- _mean_samples=mean_samples,
- _mean_control=mean_control,
- _n_samples=n_samples,
- _n_control=n_control,
- _rng=rng
- )
- def _iv_dunnett(
- samples: Sequence["npt.ArrayLike"],
- control: "npt.ArrayLike",
- alternative: Literal['two-sided', 'less', 'greater'],
- rng: SeedType
- ) -> tuple[list[np.ndarray], np.ndarray, SeedType]:
- """Input validation for Dunnett's test."""
- rng = check_random_state(rng)
- if alternative not in {'two-sided', 'less', 'greater'}:
- raise ValueError(
- "alternative must be 'less', 'greater' or 'two-sided'"
- )
- ndim_msg = "Control and samples groups must be 1D arrays"
- n_obs_msg = "Control and samples groups must have at least 1 observation"
- control = np.asarray(control)
- samples_ = [np.asarray(sample) for sample in samples]
- # samples checks
- samples_control: list[np.ndarray] = samples_ + [control]
- for sample in samples_control:
- if sample.ndim > 1:
- raise ValueError(ndim_msg)
- if sample.size < 1:
- raise ValueError(n_obs_msg)
- return samples_, control, rng
- def _params_dunnett(
- samples: list[np.ndarray], control: np.ndarray
- ) -> tuple[np.ndarray, int, int, np.ndarray, int]:
- """Specific parameters for Dunnett's test.
- Degree of freedom is the number of observations minus the number of groups
- including the control.
- """
- n_samples = np.array([sample.size for sample in samples])
- # From [1] p. 1100 d.f. = (sum N)-(p+1)
- n_sample = n_samples.sum()
- n_control = control.size
- n = n_sample + n_control
- n_groups = len(samples)
- df = n - n_groups - 1
- # From [1] p. 1103 rho_ij = 1/sqrt((N0/Ni+1)(N0/Nj+1))
- rho = n_control/n_samples + 1
- rho = 1/np.sqrt(rho[:, None] * rho[None, :])
- np.fill_diagonal(rho, 1)
- return rho, df, n_groups, n_samples, n_control
- def _statistic_dunnett(
- samples: list[np.ndarray], control: np.ndarray, df: int,
- n_samples: np.ndarray, n_control: int
- ) -> tuple[np.ndarray, float, np.ndarray, np.ndarray]:
- """Statistic of Dunnett's test.
- Computation based on the original single-step test from [1].
- """
- mean_control = np.mean(control)
- mean_samples = np.array([np.mean(sample) for sample in samples])
- all_samples = [control] + samples
- all_means = np.concatenate([[mean_control], mean_samples])
- # Variance estimate s^2 from [1] Eq. 1
- s2 = np.sum([_var(sample, mean=mean)*sample.size
- for sample, mean in zip(all_samples, all_means)]) / df
- std = np.sqrt(s2)
- # z score inferred from [1] unlabeled equation after Eq. 1
- z = (mean_samples - mean_control) / np.sqrt(1/n_samples + 1/n_control)
- return z / std, std, mean_control, mean_samples
- def _pvalue_dunnett(
- rho: np.ndarray, df: int, statistic: np.ndarray,
- alternative: Literal['two-sided', 'less', 'greater'],
- rng: SeedType = None
- ) -> np.ndarray:
- """pvalue from the multivariate t-distribution.
- Critical values come from the multivariate student-t distribution.
- """
- statistic = statistic.reshape(-1, 1)
- mvt = stats.multivariate_t(shape=rho, df=df, seed=rng)
- if alternative == "two-sided":
- statistic = abs(statistic)
- pvalue = 1 - mvt.cdf(statistic, lower_limit=-statistic)
- elif alternative == "greater":
- pvalue = 1 - mvt.cdf(statistic, lower_limit=-np.inf)
- else:
- pvalue = 1 - mvt.cdf(np.inf, lower_limit=statistic)
- return np.atleast_1d(pvalue)
|