| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405 |
- import copy
- import numpy as np
- import pytest
- from numpy.testing import assert_allclose
- from scipy import stats
- from scipy.stats._multicomp import _pvalue_dunnett, DunnettResult
- class TestDunnett:
- # For the following tests, p-values were computed using Matlab, e.g.
- # sample = [18. 15. 18. 16. 17. 15. 14. 14. 14. 15. 15....
- # 14. 15. 14. 22. 18. 21. 21. 10. 10. 11. 9....
- # 25. 26. 17.5 16. 15.5 14.5 22. 22. 24. 22.5 29....
- # 24.5 20. 18. 18.5 17.5 26.5 13. 16.5 13. 13. 13....
- # 28. 27. 34. 31. 29. 27. 24. 23. 38. 36. 25....
- # 38. 26. 22. 36. 27. 27. 32. 28. 31....
- # 24. 27. 33. 32. 28. 19. 37. 31. 36. 36....
- # 34. 38. 32. 38. 32....
- # 26. 24. 26. 25. 29. 29.5 16.5 36. 44....
- # 25. 27. 19....
- # 25. 20....
- # 28.];
- # j = [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ...
- # 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ...
- # 0 0 0 0...
- # 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1...
- # 2 2 2 2 2 2 2 2 2...
- # 3 3 3...
- # 4 4...
- # 5];
- # [~, ~, stats] = anova1(sample, j, "off");
- # [results, ~, ~, gnames] = multcompare(stats, ...
- # "CriticalValueType", "dunnett", ...
- # "Approximate", false);
- # tbl = array2table(results, "VariableNames", ...
- # ["Group", "Control Group", "Lower Limit", ...
- # "Difference", "Upper Limit", "P-value"]);
- # tbl.("Group") = gnames(tbl.("Group"));
- # tbl.("Control Group") = gnames(tbl.("Control Group"))
- # Matlab doesn't report the statistic, so the statistics were
- # computed using R multcomp `glht`, e.g.:
- # library(multcomp)
- # options(digits=16)
- # control < - c(18.0, 15.0, 18.0, 16.0, 17.0, 15.0, 14.0, 14.0, 14.0,
- # 15.0, 15.0, 14.0, 15.0, 14.0, 22.0, 18.0, 21.0, 21.0,
- # 10.0, 10.0, 11.0, 9.0, 25.0, 26.0, 17.5, 16.0, 15.5,
- # 14.5, 22.0, 22.0, 24.0, 22.5, 29.0, 24.5, 20.0, 18.0,
- # 18.5, 17.5, 26.5, 13.0, 16.5, 13.0, 13.0, 13.0, 28.0,
- # 27.0, 34.0, 31.0, 29.0, 27.0, 24.0, 23.0, 38.0, 36.0,
- # 25.0, 38.0, 26.0, 22.0, 36.0, 27.0, 27.0, 32.0, 28.0,
- # 31.0)
- # t < - c(24.0, 27.0, 33.0, 32.0, 28.0, 19.0, 37.0, 31.0, 36.0, 36.0,
- # 34.0, 38.0, 32.0, 38.0, 32.0)
- # w < - c(26.0, 24.0, 26.0, 25.0, 29.0, 29.5, 16.5, 36.0, 44.0)
- # x < - c(25.0, 27.0, 19.0)
- # y < - c(25.0, 20.0)
- # z < - c(28.0)
- #
- # groups = factor(rep(c("control", "t", "w", "x", "y", "z"),
- # times=c(length(control), length(t), length(w),
- # length(x), length(y), length(z))))
- # df < - data.frame(response=c(control, t, w, x, y, z),
- # group=groups)
- # model < - aov(response
- # ~group, data = df)
- # test < - glht(model=model,
- # linfct=mcp(group="Dunnett"),
- # alternative="g")
- # summary(test)
- # confint(test)
- # p-values agreed with those produced by Matlab to at least atol=1e-3
- # From Matlab's documentation on multcompare
- samples_1 = [
- [
- 24.0, 27.0, 33.0, 32.0, 28.0, 19.0, 37.0, 31.0, 36.0, 36.0,
- 34.0, 38.0, 32.0, 38.0, 32.0
- ],
- [26.0, 24.0, 26.0, 25.0, 29.0, 29.5, 16.5, 36.0, 44.0],
- [25.0, 27.0, 19.0],
- [25.0, 20.0],
- [28.0]
- ]
- control_1 = [
- 18.0, 15.0, 18.0, 16.0, 17.0, 15.0, 14.0, 14.0, 14.0, 15.0, 15.0,
- 14.0, 15.0, 14.0, 22.0, 18.0, 21.0, 21.0, 10.0, 10.0, 11.0, 9.0,
- 25.0, 26.0, 17.5, 16.0, 15.5, 14.5, 22.0, 22.0, 24.0, 22.5, 29.0,
- 24.5, 20.0, 18.0, 18.5, 17.5, 26.5, 13.0, 16.5, 13.0, 13.0, 13.0,
- 28.0, 27.0, 34.0, 31.0, 29.0, 27.0, 24.0, 23.0, 38.0, 36.0, 25.0,
- 38.0, 26.0, 22.0, 36.0, 27.0, 27.0, 32.0, 28.0, 31.0
- ]
- pvalue_1 = [4.727e-06, 0.022346, 0.97912, 0.99953, 0.86579] # Matlab
- # Statistic, alternative p-values, and CIs computed with R multcomp `glht`
- p_1_twosided = [1e-4, 0.02237, 0.97913, 0.99953, 0.86583]
- p_1_greater = [1e-4, 0.011217, 0.768500, 0.896991, 0.577211]
- p_1_less = [1, 1, 0.99660, 0.98398, .99953]
- statistic_1 = [5.27356, 2.91270, 0.60831, 0.27002, 0.96637]
- ci_1_twosided = [[5.3633917835622, 0.7296142201217, -8.3879817106607,
- -11.9090753452911, -11.7655021543469],
- [15.9709832164378, 13.8936496687672, 13.4556900439941,
- 14.6434503452911, 25.4998771543469]]
- ci_1_greater = [5.9036402398526, 1.4000632918725, -7.2754756323636,
- -10.5567456382391, -9.8675629499576]
- ci_1_less = [15.4306165948619, 13.2230539537359, 12.3429406339544,
- 13.2908248513211, 23.6015228251660]
- pvalues_1 = dict(twosided=p_1_twosided, less=p_1_less, greater=p_1_greater)
- cis_1 = dict(twosided=ci_1_twosided, less=ci_1_less, greater=ci_1_greater)
- case_1 = dict(samples=samples_1, control=control_1, statistic=statistic_1,
- pvalues=pvalues_1, cis=cis_1)
- # From Dunnett1955 comparing with R's DescTools: DunnettTest
- samples_2 = [[9.76, 8.80, 7.68, 9.36], [12.80, 9.68, 12.16, 9.20, 10.55]]
- control_2 = [7.40, 8.50, 7.20, 8.24, 9.84, 8.32]
- pvalue_2 = [0.6201, 0.0058]
- # Statistic, alternative p-values, and CIs computed with R multcomp `glht`
- p_2_twosided = [0.6201020, 0.0058254]
- p_2_greater = [0.3249776, 0.0029139]
- p_2_less = [0.91676, 0.99984]
- statistic_2 = [0.85703, 3.69375]
- ci_2_twosided = [[-1.2564116462124, 0.8396273539789],
- [2.5564116462124, 4.4163726460211]]
- ci_2_greater = [-0.9588591188156, 1.1187563667543]
- ci_2_less = [2.2588591188156, 4.1372436332457]
- pvalues_2 = dict(twosided=p_2_twosided, less=p_2_less, greater=p_2_greater)
- cis_2 = dict(twosided=ci_2_twosided, less=ci_2_less, greater=ci_2_greater)
- case_2 = dict(samples=samples_2, control=control_2, statistic=statistic_2,
- pvalues=pvalues_2, cis=cis_2)
- samples_3 = [[55, 64, 64], [55, 49, 52], [50, 44, 41]]
- control_3 = [55, 47, 48]
- pvalue_3 = [0.0364, 0.8966, 0.4091]
- # Statistic, alternative p-values, and CIs computed with R multcomp `glht`
- p_3_twosided = [0.036407, 0.896539, 0.409295]
- p_3_greater = [0.018277, 0.521109, 0.981892]
- p_3_less = [0.99944, 0.90054, 0.20974]
- statistic_3 = [3.09073, 0.56195, -1.40488]
- ci_3_twosided = [[0.7529028025053, -8.2470971974947, -15.2470971974947],
- [21.2470971974947, 12.2470971974947, 5.2470971974947]]
- ci_3_greater = [2.4023682323149, -6.5976317676851, -13.5976317676851]
- ci_3_less = [19.5984402363662, 10.5984402363662, 3.5984402363662]
- pvalues_3 = dict(twosided=p_3_twosided, less=p_3_less, greater=p_3_greater)
- cis_3 = dict(twosided=ci_3_twosided, less=ci_3_less, greater=ci_3_greater)
- case_3 = dict(samples=samples_3, control=control_3, statistic=statistic_3,
- pvalues=pvalues_3, cis=cis_3)
- # From Thomson and Short,
- # Mucociliary function in health, chronic obstructive airway disease,
- # and asbestosis, Journal of Applied Physiology, 1969. Table 1
- # Comparing with R's DescTools: DunnettTest
- samples_4 = [[3.8, 2.7, 4.0, 2.4], [2.8, 3.4, 3.7, 2.2, 2.0]]
- control_4 = [2.9, 3.0, 2.5, 2.6, 3.2]
- pvalue_4 = [0.5832, 0.9982]
- # Statistic, alternative p-values, and CIs computed with R multcomp `glht`
- p_4_twosided = [0.58317, 0.99819]
- p_4_greater = [0.30225, 0.69115]
- p_4_less = [0.91929, 0.65212]
- statistic_4 = [0.90875, -0.05007]
- ci_4_twosided = [[-0.6898153448579, -1.0333456251632],
- [1.4598153448579, 0.9933456251632]]
- ci_4_greater = [-0.5186459268412, -0.8719655502147 ]
- ci_4_less = [1.2886459268412, 0.8319655502147]
- pvalues_4 = dict(twosided=p_4_twosided, less=p_4_less, greater=p_4_greater)
- cis_4 = dict(twosided=ci_4_twosided, less=ci_4_less, greater=ci_4_greater)
- case_4 = dict(samples=samples_4, control=control_4, statistic=statistic_4,
- pvalues=pvalues_4, cis=cis_4)
- @pytest.mark.parametrize(
- 'rho, n_groups, df, statistic, pvalue, alternative',
- [
- # From Dunnett1955
- # Tables 1a and 1b pages 1117-1118
- (0.5, 1, 10, 1.81, 0.05, "greater"), # different than two-sided
- (0.5, 3, 10, 2.34, 0.05, "greater"),
- (0.5, 2, 30, 1.99, 0.05, "greater"),
- (0.5, 5, 30, 2.33, 0.05, "greater"),
- (0.5, 4, 12, 3.32, 0.01, "greater"),
- (0.5, 7, 12, 3.56, 0.01, "greater"),
- (0.5, 2, 60, 2.64, 0.01, "greater"),
- (0.5, 4, 60, 2.87, 0.01, "greater"),
- (0.5, 4, 60, [2.87, 2.21], [0.01, 0.05], "greater"),
- # Tables 2a and 2b pages 1119-1120
- (0.5, 1, 10, 2.23, 0.05, "two-sided"), # two-sided
- (0.5, 3, 10, 2.81, 0.05, "two-sided"),
- (0.5, 2, 30, 2.32, 0.05, "two-sided"),
- (0.5, 3, 20, 2.57, 0.05, "two-sided"),
- (0.5, 4, 12, 3.76, 0.01, "two-sided"),
- (0.5, 7, 12, 4.08, 0.01, "two-sided"),
- (0.5, 2, 60, 2.90, 0.01, "two-sided"),
- (0.5, 4, 60, 3.14, 0.01, "two-sided"),
- (0.5, 4, 60, [3.14, 2.55], [0.01, 0.05], "two-sided"),
- ],
- )
- def test_critical_values(
- self, rho, n_groups, df, statistic, pvalue, alternative
- ):
- rng = np.random.default_rng(165250594791731684851746311027739134893)
- rho = np.full((n_groups, n_groups), rho)
- np.fill_diagonal(rho, 1)
- statistic = np.array(statistic)
- res = _pvalue_dunnett(
- rho=rho, df=df, statistic=statistic,
- alternative=alternative,
- rng=rng
- )
- assert_allclose(res, pvalue, atol=5e-3)
- @pytest.mark.parametrize(
- 'samples, control, pvalue, statistic',
- [
- (samples_1, control_1, pvalue_1, statistic_1),
- (samples_2, control_2, pvalue_2, statistic_2),
- (samples_3, control_3, pvalue_3, statistic_3),
- (samples_4, control_4, pvalue_4, statistic_4),
- ]
- )
- def test_basic(self, samples, control, pvalue, statistic):
- rng = np.random.default_rng(11681140010308601919115036826969764808)
- res = stats.dunnett(*samples, control=control, rng=rng)
- assert isinstance(res, DunnettResult)
- assert_allclose(res.statistic, statistic, rtol=5e-5)
- assert_allclose(res.pvalue, pvalue, rtol=1e-2, atol=1e-4)
- @pytest.mark.parametrize(
- 'alternative',
- ['two-sided', 'less', 'greater']
- )
- def test_ttest_ind(self, alternative):
- # check that `dunnett` agrees with `ttest_ind`
- # when there are only two groups
- rng = np.random.default_rng(114184017807316971636137493526995620351)
- for _ in range(10):
- sample = rng.integers(-100, 100, size=(10,))
- control = rng.integers(-100, 100, size=(10,))
- # preserve use of old random_state during SPEC 7 transition
- res = stats.dunnett(
- sample, control=control,
- alternative=alternative, random_state=rng
- )
- ref = stats.ttest_ind(
- sample, control,
- alternative=alternative
- )
- assert_allclose(res.statistic, ref.statistic, rtol=1e-3, atol=1e-5)
- assert_allclose(res.pvalue, ref.pvalue, rtol=1e-3, atol=1e-5)
- @pytest.mark.parametrize(
- 'alternative, pvalue',
- [
- ('less', [0, 1]),
- ('greater', [1, 0]),
- ('two-sided', [0, 0]),
- ]
- )
- def test_alternatives(self, alternative, pvalue):
- rng = np.random.default_rng(114184017807316971636137493526995620351)
- # width of 20 and min diff between samples/control is 60
- # and maximal diff would be 100
- sample_less = rng.integers(0, 20, size=(10,))
- control = rng.integers(80, 100, size=(10,))
- sample_greater = rng.integers(160, 180, size=(10,))
- res = stats.dunnett(
- sample_less, sample_greater, control=control,
- alternative=alternative, rng=rng
- )
- assert_allclose(res.pvalue, pvalue, atol=1e-7)
- ci = res.confidence_interval()
- # two-sided is comparable for high/low
- if alternative == 'less':
- assert np.isneginf(ci.low).all()
- assert -100 < ci.high[0] < -60
- assert 60 < ci.high[1] < 100
- elif alternative == 'greater':
- assert -100 < ci.low[0] < -60
- assert 60 < ci.low[1] < 100
- assert np.isposinf(ci.high).all()
- elif alternative == 'two-sided':
- assert -100 < ci.low[0] < -60
- assert 60 < ci.low[1] < 100
- assert -100 < ci.high[0] < -60
- assert 60 < ci.high[1] < 100
- @pytest.mark.parametrize("case", [case_1, case_2, case_3, case_4])
- @pytest.mark.parametrize("alternative", ['less', 'greater', 'two-sided'])
- def test_against_R_multicomp_glht(self, case, alternative):
- rng = np.random.default_rng(189117774084579816190295271136455278291)
- samples = case['samples']
- control = case['control']
- alternatives = {'less': 'less', 'greater': 'greater',
- 'two-sided': 'twosided'}
- p_ref = case['pvalues'][alternative.replace('-', '')]
- res = stats.dunnett(*samples, control=control, alternative=alternative,
- rng=rng)
- # atol can't be tighter because R reports some pvalues as "< 1e-4"
- assert_allclose(res.pvalue, p_ref, rtol=5e-3, atol=1e-4)
- ci_ref = case['cis'][alternatives[alternative]]
- if alternative == "greater":
- ci_ref = [ci_ref, np.inf]
- elif alternative == "less":
- ci_ref = [-np.inf, ci_ref]
- assert res._ci is None
- assert res._ci_cl is None
- ci = res.confidence_interval(confidence_level=0.95)
- assert_allclose(ci.low, ci_ref[0], rtol=5e-3, atol=1e-5)
- assert_allclose(ci.high, ci_ref[1], rtol=5e-3, atol=1e-5)
- # re-run to use the cached value "is" to check id as same object
- assert res._ci is ci
- assert res._ci_cl == 0.95
- ci_ = res.confidence_interval(confidence_level=0.95)
- assert ci_ is ci
- @pytest.mark.parametrize('alternative', ["two-sided", "less", "greater"])
- def test_str(self, alternative):
- rng = np.random.default_rng(189117774084579816190295271136455278291)
- res = stats.dunnett(
- *self.samples_3, control=self.control_3, alternative=alternative,
- rng=rng
- )
- # check some str output
- res_str = str(res)
- assert '(Sample 2 - Control)' in res_str
- assert '95.0%' in res_str
- if alternative == 'less':
- assert '-inf' in res_str
- assert '19.' in res_str
- elif alternative == 'greater':
- assert 'inf' in res_str
- assert '-13.' in res_str
- else:
- assert 'inf' not in res_str
- assert '21.' in res_str
- def test_warnings(self):
- rng = np.random.default_rng(189117774084579816190295271136455278291)
- res = stats.dunnett(
- *self.samples_3, control=self.control_3, rng=rng
- )
- msg = r"Computation of the confidence interval did not converge"
- with pytest.warns(UserWarning, match=msg):
- res._allowance(tol=1e-5)
- def test_raises(self):
- samples, control = self.samples_3, self.control_3
- # alternative
- with pytest.raises(ValueError, match="alternative must be"):
- stats.dunnett(*samples, control=control, alternative='bob')
- # 2D for a sample
- samples_ = copy.deepcopy(samples)
- samples_[0] = [samples_[0]]
- with pytest.raises(ValueError, match="must be 1D arrays"):
- stats.dunnett(*samples_, control=control)
- # 2D for control
- control_ = copy.deepcopy(control)
- control_ = [control_]
- with pytest.raises(ValueError, match="must be 1D arrays"):
- stats.dunnett(*samples, control=control_)
- # No obs in a sample
- samples_ = copy.deepcopy(samples)
- samples_[1] = []
- with pytest.raises(ValueError, match="at least 1 observation"):
- stats.dunnett(*samples_, control=control)
- # No obs in control
- control_ = []
- with pytest.raises(ValueError, match="at least 1 observation"):
- stats.dunnett(*samples, control=control_)
- res = stats.dunnett(*samples, control=control)
- with pytest.raises(ValueError, match="Confidence level must"):
- res.confidence_interval(confidence_level=3)
- @pytest.mark.filterwarnings("ignore:Computation of the confidence")
- @pytest.mark.parametrize('n_samples', [1, 2, 3])
- def test_shapes(self, n_samples):
- rng = np.random.default_rng(689448934110805334)
- samples = rng.normal(size=(n_samples, 10))
- control = rng.normal(size=10)
- res = stats.dunnett(*samples, control=control, rng=rng)
- assert res.statistic.shape == (n_samples,)
- assert res.pvalue.shape == (n_samples,)
- ci = res.confidence_interval()
- assert ci.low.shape == (n_samples,)
- assert ci.high.shape == (n_samples,)
|