test_entropy.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317
  1. import math
  2. import pytest
  3. from pytest import raises as assert_raises
  4. import numpy as np
  5. from scipy import stats
  6. from scipy.stats import norm, expon # type: ignore[attr-defined]
  7. from scipy._lib._array_api import make_xp_test_case
  8. from scipy._lib._array_api_no_0d import (xp_assert_close, xp_assert_equal,
  9. xp_assert_less)
  10. skip_xp_backends = pytest.mark.skip_xp_backends
  11. @make_xp_test_case(stats.entropy)
  12. class TestEntropy:
  13. def test_entropy_positive(self, xp):
  14. # See ticket #497
  15. pk = xp.asarray([0.5, 0.2, 0.3])
  16. qk = xp.asarray([0.1, 0.25, 0.65])
  17. eself = stats.entropy(pk, pk)
  18. edouble = stats.entropy(pk, qk)
  19. xp_assert_equal(eself, xp.asarray(0.))
  20. xp_assert_less(-edouble, xp.asarray(0.))
  21. def test_entropy_base(self, xp):
  22. pk = xp.ones(16)
  23. S = stats.entropy(pk, base=2.)
  24. xp_assert_less(xp.abs(S - 4.), xp.asarray(1.e-5))
  25. qk = xp.ones(16)
  26. qk = xp.where(xp.arange(16) < 8, 2., qk)
  27. S = stats.entropy(pk, qk)
  28. S2 = stats.entropy(pk, qk, base=2.)
  29. xp_assert_less(xp.abs(S/S2 - math.log(2.)), xp.asarray(1.e-5))
  30. def test_entropy_zero(self, xp):
  31. # Test for PR-479
  32. x = xp.asarray([0., 1., 2.])
  33. xp_assert_close(stats.entropy(x),
  34. xp.asarray(0.63651416829481278))
  35. def test_entropy_2d(self, xp):
  36. pk = xp.asarray([[0.1, 0.2], [0.6, 0.3], [0.3, 0.5]])
  37. qk = xp.asarray([[0.2, 0.1], [0.3, 0.6], [0.5, 0.3]])
  38. xp_assert_close(stats.entropy(pk, qk),
  39. xp.asarray([0.1933259, 0.18609809]))
  40. def test_entropy_2d_zero(self, xp):
  41. pk = xp.asarray([[0.1, 0.2], [0.6, 0.3], [0.3, 0.5]])
  42. qk = xp.asarray([[0.0, 0.1], [0.3, 0.6], [0.5, 0.3]])
  43. xp_assert_close(stats.entropy(pk, qk),
  44. xp.asarray([xp.inf, 0.18609809]))
  45. pk = xp.asarray([[0.0, 0.2], [0.6, 0.3], [0.3, 0.5]])
  46. xp_assert_close(stats.entropy(pk, qk),
  47. xp.asarray([0.17403988, 0.18609809]))
  48. def test_entropy_base_2d_nondefault_axis(self, xp):
  49. pk = xp.asarray([[0.1, 0.2], [0.6, 0.3], [0.3, 0.5]])
  50. xp_assert_close(stats.entropy(pk, axis=1),
  51. xp.asarray([0.63651417, 0.63651417, 0.66156324]))
  52. def test_entropy_2d_nondefault_axis(self, xp):
  53. pk = xp.asarray([[0.1, 0.2], [0.6, 0.3], [0.3, 0.5]])
  54. qk = xp.asarray([[0.2, 0.1], [0.3, 0.6], [0.5, 0.3]])
  55. xp_assert_close(stats.entropy(pk, qk, axis=1),
  56. xp.asarray([0.23104906, 0.23104906, 0.12770641]))
  57. def test_entropy_raises_value_error(self, xp):
  58. pk = xp.asarray([[0.1, 0.2], [0.6, 0.3], [0.3, 0.5]])
  59. qk = xp.asarray([[0.1, 0.2], [0.6, 0.3]])
  60. message = "Array shapes are incompatible for broadcasting."
  61. with pytest.raises(ValueError, match=message):
  62. stats.entropy(pk, qk)
  63. def test_base_entropy_with_axis_0_is_equal_to_default(self, xp):
  64. pk = xp.asarray([[0.1, 0.2], [0.6, 0.3], [0.3, 0.5]])
  65. xp_assert_close(stats.entropy(pk, axis=0),
  66. stats.entropy(pk))
  67. def test_entropy_with_axis_0_is_equal_to_default(self, xp):
  68. pk = xp.asarray([[0.1, 0.2], [0.6, 0.3], [0.3, 0.5]])
  69. qk = xp.asarray([[0.2, 0.1], [0.3, 0.6], [0.5, 0.3]])
  70. xp_assert_close(stats.entropy(pk, qk, axis=0),
  71. stats.entropy(pk, qk))
  72. def test_base_entropy_transposed(self, xp):
  73. pk = xp.asarray([[0.1, 0.2], [0.6, 0.3], [0.3, 0.5]])
  74. xp_assert_close(stats.entropy(pk.T),
  75. stats.entropy(pk, axis=1))
  76. def test_entropy_transposed(self, xp):
  77. pk = xp.asarray([[0.1, 0.2], [0.6, 0.3], [0.3, 0.5]])
  78. qk = xp.asarray([[0.2, 0.1], [0.3, 0.6], [0.5, 0.3]])
  79. xp_assert_close(stats.entropy(pk.T, qk.T),
  80. stats.entropy(pk, qk, axis=1))
  81. def test_entropy_broadcasting(self, xp):
  82. rng = np.random.default_rng(74187315492831452)
  83. x = xp.asarray(rng.random(3))
  84. y = xp.asarray(rng.random((2, 1)))
  85. res = stats.entropy(x, y, axis=-1)
  86. xp_assert_equal(res[0], stats.entropy(x, y[0, ...]))
  87. xp_assert_equal(res[1], stats.entropy(x, y[1, ...]))
  88. def test_entropy_shape_mismatch(self, xp):
  89. x = xp.ones((10, 1, 12))
  90. y = xp.ones((11, 2))
  91. message = "Array shapes are incompatible for broadcasting."
  92. with pytest.raises(ValueError, match=message):
  93. stats.entropy(x, y)
  94. def test_input_validation(self, xp):
  95. x = xp.ones(10)
  96. message = "`base` must be a positive number."
  97. with pytest.raises(ValueError, match=message):
  98. stats.entropy(x, base=-2)
  99. @make_xp_test_case(stats.differential_entropy)
  100. class TestDifferentialEntropy:
  101. """
  102. Vasicek results are compared with the R package vsgoftest.
  103. # library(vsgoftest)
  104. #
  105. # samp <- c(<values>)
  106. # entropy.estimate(x = samp, window = <window_length>)
  107. """
  108. methods = pytest.mark.parametrize('method', [
  109. "vasicek",
  110. "van es",
  111. pytest.param(
  112. "correa",
  113. marks=[
  114. skip_xp_backends("array_api_strict", reason="Invalid fancy indexing"),
  115. skip_xp_backends("dask.array", reason="Invalid fancy indexing"),
  116. ],
  117. ),
  118. "ebrahimi",
  119. ])
  120. def test_differential_entropy_vasicek(self, xp):
  121. random_state = np.random.RandomState(0)
  122. values = random_state.standard_normal(100)
  123. values = xp.asarray(values.tolist())
  124. entropy = stats.differential_entropy(values, method='vasicek')
  125. xp_assert_close(entropy, xp.asarray(1.342551187000946))
  126. entropy = stats.differential_entropy(values, window_length=1,
  127. method='vasicek')
  128. xp_assert_close(entropy, xp.asarray(1.122044177725947))
  129. entropy = stats.differential_entropy(values, window_length=8,
  130. method='vasicek')
  131. xp_assert_close(entropy, xp.asarray(1.349401487550325))
  132. def test_differential_entropy_vasicek_2d_nondefault_axis(self, xp):
  133. random_state = np.random.RandomState(0)
  134. values = random_state.standard_normal((3, 100))
  135. values = xp.asarray(values.tolist())
  136. entropy = stats.differential_entropy(values, axis=1, method='vasicek')
  137. ref = xp.asarray([1.342551187000946, 1.341825903922332, 1.293774601883585])
  138. xp_assert_close(entropy, ref)
  139. entropy = stats.differential_entropy(values, axis=1, window_length=1,
  140. method='vasicek')
  141. ref = xp.asarray([1.122044177725947, 1.10294413850758, 1.129615790292772])
  142. xp_assert_close(entropy, ref)
  143. entropy = stats.differential_entropy(values, axis=1, window_length=8,
  144. method='vasicek')
  145. ref = xp.asarray([1.349401487550325, 1.338514126301301, 1.292331889365405])
  146. xp_assert_close(entropy, ref)
  147. def test_differential_entropy_raises_value_error(self, xp):
  148. random_state = np.random.RandomState(0)
  149. values = random_state.standard_normal((3, 100))
  150. values = xp.asarray(values.tolist())
  151. error_str = (
  152. r"Window length \({window_length}\) must be positive and less "
  153. r"than half the sample size \({sample_size}\)."
  154. )
  155. sample_size = values.shape[1]
  156. for window_length in {-1, 0, sample_size//2, sample_size}:
  157. formatted_error_str = error_str.format(
  158. window_length=window_length,
  159. sample_size=sample_size,
  160. )
  161. with assert_raises(ValueError, match=formatted_error_str):
  162. stats.differential_entropy(
  163. values,
  164. window_length=window_length,
  165. axis=1,
  166. )
  167. def test_base_differential_entropy_with_axis_0_is_equal_to_default(self, xp):
  168. random_state = np.random.RandomState(0)
  169. values = random_state.standard_normal((100, 3))
  170. values = xp.asarray(values.tolist())
  171. entropy = stats.differential_entropy(values, axis=0)
  172. default_entropy = stats.differential_entropy(values)
  173. xp_assert_close(entropy, default_entropy)
  174. def test_base_differential_entropy_transposed(self, xp):
  175. random_state = np.random.RandomState(0)
  176. values = random_state.standard_normal((3, 100))
  177. values = xp.asarray(values.tolist())
  178. xp_assert_close(
  179. stats.differential_entropy(values.T),
  180. stats.differential_entropy(values, axis=1),
  181. )
  182. def test_input_validation(self, xp):
  183. x = np.ones(10)
  184. x = xp.asarray(x.tolist())
  185. message = "`base` must be a positive number or `None`."
  186. with pytest.raises(ValueError, match=message):
  187. stats.differential_entropy(x, base=-2)
  188. message = "`method` must be one of..."
  189. with pytest.raises(ValueError, match=message):
  190. stats.differential_entropy(x, method='ekki-ekki')
  191. def test_window_length_is_none(self, xp):
  192. rng = np.random.default_rng(358923459826738562)
  193. x = xp.asarray(rng.random(size=10))
  194. ref = stats.differential_entropy(x)
  195. res = stats.differential_entropy(x, window_length=None)
  196. xp_assert_close(res, ref, rtol=0.005)
  197. @methods
  198. def test_consistency(self, method, xp):
  199. # test that method is a consistent estimator
  200. n = 10000 if method == 'correa' else 1000000
  201. rvs = stats.norm.rvs(size=n, random_state=0)
  202. rvs = xp.asarray(rvs.tolist())
  203. expected = xp.asarray(float(stats.norm.entropy()))
  204. res = stats.differential_entropy(rvs, method=method)
  205. xp_assert_close(res, expected, rtol=0.005)
  206. # values from differential_entropy reference [6], table 1, n=50, m=7
  207. norm_rmse_std_cases = { # method: (RMSE, STD)
  208. 'vasicek': (0.198, 0.109),
  209. 'van es': (0.212, 0.110),
  210. 'correa': (0.135, 0.112),
  211. 'ebrahimi': (0.128, 0.109)
  212. }
  213. # values from differential_entropy reference [6], table 2, n=50, m=7
  214. expon_rmse_std_cases = { # method: (RMSE, STD)
  215. 'vasicek': (0.194, 0.148),
  216. 'van es': (0.179, 0.149),
  217. 'correa': (0.155, 0.152),
  218. 'ebrahimi': (0.151, 0.148)
  219. }
  220. rmse_std_cases = {norm: norm_rmse_std_cases,
  221. expon: expon_rmse_std_cases}
  222. @methods
  223. @pytest.mark.parametrize('dist', [norm, expon])
  224. def test_rmse_std(self, method, dist, xp):
  225. # test that RMSE and standard deviation of estimators matches values
  226. # given in differential_entropy reference [6]. Incidentally, also
  227. # tests vectorization.
  228. reps, n, m = 10000, 50, 7
  229. expected = self.rmse_std_cases[dist][method]
  230. rmse_expected, std_expected = xp.asarray(expected[0]), xp.asarray(expected[1])
  231. rvs = dist.rvs(size=(reps, n), random_state=0)
  232. rvs = xp.asarray(rvs.tolist())
  233. true_entropy = xp.asarray(float(dist.entropy()))
  234. res = stats.differential_entropy(rvs, window_length=m,
  235. method=method, axis=-1)
  236. xp_assert_close(xp.sqrt(xp.mean((res - true_entropy)**2)),
  237. rmse_expected, atol=0.005)
  238. xp_assert_close(xp.std(res, correction=0), std_expected, atol=0.002)
  239. @pytest.mark.parametrize('n, method', [
  240. (8, 'van es'),
  241. (12, 'ebrahimi'),
  242. (1001, 'vasicek')
  243. ])
  244. def test_method_auto(self, n, method, xp):
  245. rvs = stats.norm.rvs(size=(n,), random_state=0)
  246. rvs = xp.asarray(rvs.tolist())
  247. res1 = stats.differential_entropy(rvs)
  248. res2 = stats.differential_entropy(rvs, method=method)
  249. xp_assert_equal(res1, res2)
  250. @methods
  251. @pytest.mark.parametrize('dtype', [None, 'float32', 'float64'])
  252. def test_dtypes_gh21192(self, xp, method, dtype):
  253. # gh-21192 noted a change in the output of method='ebrahimi'
  254. # with integer input. Check that the output is consistent regardless
  255. # of input dtype.
  256. x = [1, 1, 2, 3, 3, 4, 5, 5, 6, 7, 8, 9, 10, 11]
  257. dtype_in = getattr(xp, str(dtype), None)
  258. dtype_out = getattr(xp, str(dtype), xp.asarray(1.).dtype)
  259. res = stats.differential_entropy(xp.asarray(x, dtype=dtype_in), method=method)
  260. ref = stats.differential_entropy(xp.asarray(x, dtype=xp.float64), method=method)
  261. xp_assert_close(res, xp.asarray(ref, dtype=dtype_out)[()])