test_mstats_extras.py 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172
  1. import numpy as np
  2. import numpy.ma as ma
  3. import scipy.stats.mstats as ms
  4. from numpy.testing import (assert_equal, assert_almost_equal, assert_,
  5. assert_allclose)
  6. def test_compare_medians_ms():
  7. x = np.arange(7)
  8. y = x + 10
  9. assert_almost_equal(ms.compare_medians_ms(x, y), 0)
  10. y2 = np.linspace(0, 1, num=10)
  11. assert_almost_equal(ms.compare_medians_ms(x, y2), 0.017116406778)
  12. def test_hdmedian():
  13. # 1-D array
  14. x = ma.arange(11)
  15. assert_allclose(ms.hdmedian(x), 5, rtol=1e-14)
  16. x.mask = ma.make_mask(x)
  17. x.mask[:7] = False
  18. assert_allclose(ms.hdmedian(x), 3, rtol=1e-14)
  19. # Check that `var` keyword returns a value. TODO: check whether returned
  20. # value is actually correct.
  21. assert_(ms.hdmedian(x, var=True).size == 2)
  22. # 2-D array
  23. x2 = ma.arange(22).reshape((11, 2))
  24. assert_allclose(ms.hdmedian(x2, axis=0), [10, 11])
  25. x2.mask = ma.make_mask(x2)
  26. x2.mask[:7, :] = False
  27. assert_allclose(ms.hdmedian(x2, axis=0), [6, 7])
  28. def test_rsh():
  29. rng = np.random.default_rng(806795795)
  30. x = rng.standard_normal(100)
  31. res = ms.rsh(x)
  32. # Just a sanity check that the code runs and output shape is correct.
  33. # TODO: check that implementation is correct.
  34. assert_(res.shape == x.shape)
  35. # Check points keyword
  36. res = ms.rsh(x, points=[0, 1.])
  37. assert_(res.size == 2)
  38. def test_mjci():
  39. # Tests the Marits-Jarrett estimator
  40. data = ma.array([77, 87, 88,114,151,210,219,246,253,262,
  41. 296,299,306,376,428,515,666,1310,2611])
  42. assert_almost_equal(ms.mjci(data),[55.76819,45.84028,198.87875],5)
  43. def test_trimmed_mean_ci():
  44. # Tests the confidence intervals of the trimmed mean.
  45. data = ma.array([545,555,558,572,575,576,578,580,
  46. 594,605,635,651,653,661,666])
  47. assert_almost_equal(ms.trimmed_mean(data,0.2), 596.2, 1)
  48. assert_equal(np.round(ms.trimmed_mean_ci(data,(0.2,0.2)),1),
  49. [561.8, 630.6])
  50. def test_idealfourths():
  51. # Tests ideal-fourths
  52. test = np.arange(100)
  53. assert_almost_equal(np.asarray(ms.idealfourths(test)),
  54. [24.416667,74.583333],6)
  55. test_2D = test.repeat(3).reshape(-1,3)
  56. assert_almost_equal(ms.idealfourths(test_2D, axis=0),
  57. [[24.416667,24.416667,24.416667],
  58. [74.583333,74.583333,74.583333]],6)
  59. assert_almost_equal(ms.idealfourths(test_2D, axis=1),
  60. test.repeat(2).reshape(-1,2))
  61. test = [0, 0]
  62. _result = ms.idealfourths(test)
  63. assert_(np.isnan(_result).all())
  64. class TestQuantiles:
  65. data = [0.706560797,0.727229578,0.990399276,0.927065621,0.158953014,
  66. 0.887764025,0.239407086,0.349638551,0.972791145,0.149789972,
  67. 0.936947700,0.132359948,0.046041972,0.641675031,0.945530547,
  68. 0.224218684,0.771450991,0.820257774,0.336458052,0.589113496,
  69. 0.509736129,0.696838829,0.491323573,0.622767425,0.775189248,
  70. 0.641461450,0.118455200,0.773029450,0.319280007,0.752229111,
  71. 0.047841438,0.466295911,0.583850781,0.840581845,0.550086491,
  72. 0.466470062,0.504765074,0.226855960,0.362641207,0.891620942,
  73. 0.127898691,0.490094097,0.044882048,0.041441695,0.317976349,
  74. 0.504135618,0.567353033,0.434617473,0.636243375,0.231803616,
  75. 0.230154113,0.160011327,0.819464108,0.854706985,0.438809221,
  76. 0.487427267,0.786907310,0.408367937,0.405534192,0.250444460,
  77. 0.995309248,0.144389588,0.739947527,0.953543606,0.680051621,
  78. 0.388382017,0.863530727,0.006514031,0.118007779,0.924024803,
  79. 0.384236354,0.893687694,0.626534881,0.473051932,0.750134705,
  80. 0.241843555,0.432947602,0.689538104,0.136934797,0.150206859,
  81. 0.474335206,0.907775349,0.525869295,0.189184225,0.854284286,
  82. 0.831089744,0.251637345,0.587038213,0.254475554,0.237781276,
  83. 0.827928620,0.480283781,0.594514455,0.213641488,0.024194386,
  84. 0.536668589,0.699497811,0.892804071,0.093835427,0.731107772]
  85. def test_hdquantiles(self):
  86. data = self.data
  87. assert_almost_equal(ms.hdquantiles(data,[0., 1.]),
  88. [0.006514031, 0.995309248])
  89. hdq = ms.hdquantiles(data,[0.25, 0.5, 0.75])
  90. assert_almost_equal(hdq, [0.253210762, 0.512847491, 0.762232442,])
  91. data = np.array(data).reshape(10,10)
  92. hdq = ms.hdquantiles(data,[0.25,0.5,0.75],axis=0)
  93. assert_almost_equal(hdq[:,0], ms.hdquantiles(data[:,0],[0.25,0.5,0.75]))
  94. assert_almost_equal(hdq[:,-1], ms.hdquantiles(data[:,-1],[0.25,0.5,0.75]))
  95. hdq = ms.hdquantiles(data,[0.25,0.5,0.75],axis=0,var=True)
  96. assert_almost_equal(hdq[...,0],
  97. ms.hdquantiles(data[:,0],[0.25,0.5,0.75],var=True))
  98. assert_almost_equal(hdq[...,-1],
  99. ms.hdquantiles(data[:,-1],[0.25,0.5,0.75], var=True))
  100. def test_hdquantiles_sd(self):
  101. # Standard deviation is a jackknife estimator, so we can check if
  102. # the efficient version (hdquantiles_sd) matches a rudimentary,
  103. # but clear version here.
  104. hd_std_errs = ms.hdquantiles_sd(self.data)
  105. # jacknnife standard error, Introduction to the Bootstrap Eq. 11.5
  106. n = len(self.data)
  107. jdata = np.broadcast_to(self.data, (n, n))
  108. jselector = np.logical_not(np.eye(n)) # leave out one sample each row
  109. jdata = jdata[jselector].reshape(n, n-1)
  110. jdist = ms.hdquantiles(jdata, axis=1)
  111. jdist_mean = np.mean(jdist, axis=0)
  112. jstd = ((n-1)/n * np.sum((jdist - jdist_mean)**2, axis=0))**.5
  113. assert_almost_equal(hd_std_errs, jstd)
  114. # Test actual values for good measure
  115. assert_almost_equal(hd_std_errs, [0.0379258, 0.0380656, 0.0380013])
  116. two_data_points = ms.hdquantiles_sd([1, 2])
  117. assert_almost_equal(two_data_points, [0.5, 0.5, 0.5])
  118. def test_mquantiles_cimj(self):
  119. # Only test that code runs, implementation not checked for correctness
  120. ci_lower, ci_upper = ms.mquantiles_cimj(self.data)
  121. assert_(ci_lower.size == ci_upper.size == 3)
  122. def test_median_cihs():
  123. # Basic test against R library EnvStats function `eqnpar`, e.g.
  124. # library(EnvStats)
  125. # options(digits=8)
  126. # x = c(0.88612955, 0.35242375, 0.66240904, 0.94617974, 0.10929913,
  127. # 0.76699506, 0.88550655, 0.62763754, 0.76818588, 0.68506508,
  128. # 0.88043148, 0.03911248, 0.93805564, 0.95326961, 0.25291112,
  129. # 0.16128487, 0.49784577, 0.24588924, 0.6597, 0.92239679)
  130. # eqnpar(x, p=0.5,
  131. # ci.method = "interpolate", approx.conf.level = 0.95, ci = TRUE)
  132. rng = np.random.default_rng(8824288259505800535)
  133. x = rng.random(size=20)
  134. assert_allclose(ms.median_cihs(x), (0.38663198, 0.88431272))
  135. # SciPy's 90% CI upper limit doesn't match that of EnvStats eqnpar. SciPy
  136. # doesn't look wrong, and it agrees with a different reference,
  137. # `median_confint_hs` from `hoehleatsu/quantileCI`.
  138. # In (e.g.) Colab with R runtime:
  139. # devtools::install_github("hoehleatsu/quantileCI")
  140. # library(quantileCI)
  141. # median_confint_hs(x=x, conf.level=0.90, interpolate=TRUE)
  142. assert_allclose(ms.median_cihs(x, 0.1), (0.48319773366, 0.88094268050))