test_mgc.py 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216
  1. import pytest
  2. from pytest import raises as assert_raises, warns as assert_warns
  3. import numpy as np
  4. from numpy.testing import assert_approx_equal, assert_allclose, assert_equal
  5. from scipy.spatial.distance import cdist
  6. from scipy import stats
  7. class TestMGCErrorWarnings:
  8. """ Tests errors and warnings derived from MGC.
  9. """
  10. def test_error_notndarray(self):
  11. # raises error if x or y is not a ndarray
  12. x = np.arange(20)
  13. y = [5] * 20
  14. assert_raises(ValueError, stats.multiscale_graphcorr, x, y)
  15. assert_raises(ValueError, stats.multiscale_graphcorr, y, x)
  16. def test_error_shape(self):
  17. # raises error if number of samples different (n)
  18. x = np.arange(100).reshape(25, 4)
  19. y = x.reshape(10, 10)
  20. assert_raises(ValueError, stats.multiscale_graphcorr, x, y)
  21. def test_error_lowsamples(self):
  22. # raises error if samples are low (< 3)
  23. x = np.arange(3)
  24. y = np.arange(3)
  25. assert_raises(ValueError, stats.multiscale_graphcorr, x, y)
  26. def test_error_nans(self):
  27. # raises error if inputs contain NaNs
  28. x = np.arange(20, dtype=float)
  29. x[0] = np.nan
  30. assert_raises(ValueError, stats.multiscale_graphcorr, x, x)
  31. y = np.arange(20)
  32. assert_raises(ValueError, stats.multiscale_graphcorr, x, y)
  33. def test_error_wrongdisttype(self):
  34. # raises error if metric is not a function
  35. x = np.arange(20)
  36. compute_distance = 0
  37. assert_raises(ValueError, stats.multiscale_graphcorr, x, x,
  38. compute_distance=compute_distance)
  39. @pytest.mark.parametrize("reps", [
  40. -1, # reps is negative
  41. '1', # reps is not integer
  42. ])
  43. def test_error_reps(self, reps):
  44. # raises error if reps is negative
  45. x = np.arange(20)
  46. assert_raises(ValueError, stats.multiscale_graphcorr, x, x, reps=reps)
  47. def test_warns_reps(self):
  48. # raises warning when reps is less than 1000
  49. x = np.arange(20)
  50. reps = 100
  51. assert_warns(RuntimeWarning, stats.multiscale_graphcorr, x, x, reps=reps)
  52. def test_error_infty(self):
  53. # raises error if input contains infinities
  54. x = np.arange(20)
  55. y = np.ones(20) * np.inf
  56. assert_raises(ValueError, stats.multiscale_graphcorr, x, y)
  57. class TestMGCStat:
  58. """ Test validity of MGC test statistic
  59. """
  60. def setup_method(self):
  61. self.rng = np.random.default_rng(1266219746)
  62. def _simulations(self, samps=100, dims=1, sim_type="", rng=None):
  63. rng = rng or self.rng
  64. # linear simulation
  65. if sim_type == "linear":
  66. x = rng.uniform(-1, 1, size=(samps, 1))
  67. y = x + 0.3 * rng.random(size=(x.size, 1))
  68. # spiral simulation
  69. elif sim_type == "nonlinear":
  70. unif = np.array(self.rng.uniform(0, 5, size=(samps, 1)))
  71. x = unif * np.cos(np.pi * unif)
  72. y = (unif * np.sin(np.pi * unif) +
  73. 0.4*self.rng.random(size=(x.size, 1)))
  74. # independence (tests type I simulation)
  75. elif sim_type == "independence":
  76. u = self.rng.standard_normal(size=(samps, 1))
  77. v = self.rng.standard_normal(size=(samps, 1))
  78. u_2 = self.rng.binomial(1, p=0.5, size=(samps, 1))
  79. v_2 = self.rng.binomial(1, p=0.5, size=(samps, 1))
  80. x = u/3 + 2*u_2 - 1
  81. y = v/3 + 2*v_2 - 1
  82. # raises error if not approved sim_type
  83. else:
  84. raise ValueError("sim_type must be linear, nonlinear, or "
  85. "independence")
  86. # add dimensions of noise for higher dimensions
  87. if dims > 1:
  88. dims_noise = self.rng.standard_normal(size=(samps, dims-1))
  89. x = np.concatenate((x, dims_noise), axis=1)
  90. return x, y
  91. @pytest.mark.xslow
  92. @pytest.mark.parametrize("sim_type, obs_stat, obs_pvalue", [
  93. # Reference values produced by `multiscale_graphcorr` ->
  94. # this only guards against unintended changes
  95. ("linear", 0.970137188683, 0.000999000999), # test linear simulation
  96. ("nonlinear", 0.149843048137, 0.000999000999), # test spiral simulation
  97. ("independence", -0.003965353120, 0.4675324675) # test independence simulation
  98. ])
  99. def test_oned(self, sim_type, obs_stat, obs_pvalue):
  100. # generate x and y
  101. rng = np.random.default_rng(8157117705)
  102. x, y = self._simulations(samps=100, dims=1, sim_type=sim_type, rng=rng)
  103. # test stat and pvalue
  104. stat, pvalue, _ = stats.multiscale_graphcorr(x, y, random_state=rng)
  105. assert_allclose(stat, obs_stat)
  106. assert_allclose(pvalue, obs_pvalue)
  107. @pytest.mark.xslow
  108. @pytest.mark.parametrize("sim_type, obs_stat, obs_pvalue", [
  109. # Reference values produced by `multiscale_graphcorr` ->
  110. # this only guards against unintended changes
  111. ("linear", 0.193063672972, 0.000999000999), # test linear simulation
  112. ("nonlinear", 0.010042844939, 0.216783216783), # test spiral simulation
  113. ])
  114. def test_fived(self, sim_type, obs_stat, obs_pvalue):
  115. # generate x and y
  116. rng = np.random.default_rng(8157117705)
  117. x, y = self._simulations(samps=100, dims=5, sim_type=sim_type, rng=rng)
  118. # test stat and pvalue
  119. stat, pvalue, _ = stats.multiscale_graphcorr(x, y, random_state=rng)
  120. assert_allclose(stat, obs_stat)
  121. assert_allclose(pvalue, obs_pvalue)
  122. @pytest.mark.xslow
  123. def test_twosamp(self):
  124. # generate x and y
  125. x = self.rng.binomial(100, 0.5, size=(100, 5))
  126. y = self.rng.standard_normal(size=(80, 5))
  127. # test stat and pvalue
  128. stat, pvalue, _ = stats.multiscale_graphcorr(x, y, random_state=self.rng)
  129. assert_approx_equal(stat, 1.0, significant=1)
  130. assert_approx_equal(pvalue, 0.001, significant=1)
  131. # generate x and y
  132. y = self.rng.standard_normal(size=(100, 5))
  133. # test stat and pvalue
  134. stat, pvalue, _ = stats.multiscale_graphcorr(x, y, is_twosamp=True,
  135. random_state=self.rng)
  136. assert_approx_equal(stat, 1.0, significant=1)
  137. assert_approx_equal(pvalue, 0.001, significant=1)
  138. @pytest.mark.xslow
  139. def test_workers(self):
  140. # generate x and y
  141. x, y = self._simulations(samps=100, dims=1, sim_type="linear")
  142. # test stat and pvalue
  143. stat, pvalue, _ = stats.multiscale_graphcorr(x, y, workers=2,
  144. random_state=self.rng)
  145. assert_approx_equal(stat, 0.97, significant=1)
  146. assert_approx_equal(pvalue, 0.001, significant=1)
  147. @pytest.mark.xslow
  148. def test_random_state(self):
  149. # generate x and y
  150. x, y = self._simulations(samps=100, dims=1, sim_type="linear")
  151. # test stat and pvalue
  152. stat, pvalue, _ = stats.multiscale_graphcorr(x, y, random_state=1)
  153. assert_approx_equal(stat, 0.97, significant=1)
  154. assert_approx_equal(pvalue, 0.001, significant=1)
  155. @pytest.mark.xslow
  156. def test_dist_perm(self):
  157. # generate x and y
  158. x, y = self._simulations(samps=100, dims=1, sim_type="nonlinear")
  159. distx = cdist(x, x, metric="euclidean")
  160. disty = cdist(y, y, metric="euclidean")
  161. stat_dist, pvalue_dist, _ = stats.multiscale_graphcorr(distx, disty,
  162. compute_distance=None,
  163. random_state=1)
  164. assert_approx_equal(stat_dist, 0.163, significant=1)
  165. assert_approx_equal(pvalue_dist, 0.001, significant=1)
  166. @pytest.mark.fail_slow(20) # all other tests are XSLOW; we need at least one to run
  167. @pytest.mark.slow
  168. def test_pvalue_literature(self):
  169. # generate x and y
  170. x, y = self._simulations(samps=100, dims=1, sim_type="linear")
  171. # test stat and pvalue
  172. _, pvalue, _ = stats.multiscale_graphcorr(x, y, random_state=1)
  173. assert_allclose(pvalue, 1/1001)
  174. @pytest.mark.xslow
  175. def test_alias(self):
  176. # generate x and y
  177. x, y = self._simulations(samps=100, dims=1, sim_type="linear")
  178. res = stats.multiscale_graphcorr(x, y, random_state=1)
  179. assert_equal(res.stat, res.statistic)