test_generator_mt19937_regressions.py 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206
  1. from numpy.testing import (assert_, assert_array_equal)
  2. import numpy as np
  3. import pytest
  4. from numpy.random import Generator, MT19937
  5. class TestRegression:
  6. def setup_method(self):
  7. self.mt19937 = Generator(MT19937(121263137472525314065))
  8. def test_vonmises_range(self):
  9. # Make sure generated random variables are in [-pi, pi].
  10. # Regression test for ticket #986.
  11. for mu in np.linspace(-7., 7., 5):
  12. r = self.mt19937.vonmises(mu, 1, 50)
  13. assert_(np.all(r > -np.pi) and np.all(r <= np.pi))
  14. def test_hypergeometric_range(self):
  15. # Test for ticket #921
  16. assert_(np.all(self.mt19937.hypergeometric(3, 18, 11, size=10) < 4))
  17. assert_(np.all(self.mt19937.hypergeometric(18, 3, 11, size=10) > 0))
  18. # Test for ticket #5623
  19. args = (2**20 - 2, 2**20 - 2, 2**20 - 2) # Check for 32-bit systems
  20. assert_(self.mt19937.hypergeometric(*args) > 0)
  21. def test_logseries_convergence(self):
  22. # Test for ticket #923
  23. N = 1000
  24. rvsn = self.mt19937.logseries(0.8, size=N)
  25. # these two frequency counts should be close to theoretical
  26. # numbers with this large sample
  27. # theoretical large N result is 0.49706795
  28. freq = np.sum(rvsn == 1) / N
  29. msg = f'Frequency was {freq:f}, should be > 0.45'
  30. assert_(freq > 0.45, msg)
  31. # theoretical large N result is 0.19882718
  32. freq = np.sum(rvsn == 2) / N
  33. msg = f'Frequency was {freq:f}, should be < 0.23'
  34. assert_(freq < 0.23, msg)
  35. def test_shuffle_mixed_dimension(self):
  36. # Test for trac ticket #2074
  37. for t in [[1, 2, 3, None],
  38. [(1, 1), (2, 2), (3, 3), None],
  39. [1, (2, 2), (3, 3), None],
  40. [(1, 1), 2, 3, None]]:
  41. mt19937 = Generator(MT19937(12345))
  42. shuffled = np.array(t, dtype=object)
  43. mt19937.shuffle(shuffled)
  44. expected = np.array([t[2], t[0], t[3], t[1]], dtype=object)
  45. assert_array_equal(np.array(shuffled, dtype=object), expected)
  46. def test_call_within_randomstate(self):
  47. # Check that custom BitGenerator does not call into global state
  48. res = np.array([1, 8, 0, 1, 5, 3, 3, 8, 1, 4])
  49. for i in range(3):
  50. mt19937 = Generator(MT19937(i))
  51. m = Generator(MT19937(4321))
  52. # If m.state is not honored, the result will change
  53. assert_array_equal(m.choice(10, size=10, p=np.ones(10)/10.), res)
  54. def test_multivariate_normal_size_types(self):
  55. # Test for multivariate_normal issue with 'size' argument.
  56. # Check that the multivariate_normal size argument can be a
  57. # numpy integer.
  58. self.mt19937.multivariate_normal([0], [[0]], size=1)
  59. self.mt19937.multivariate_normal([0], [[0]], size=np.int_(1))
  60. self.mt19937.multivariate_normal([0], [[0]], size=np.int64(1))
  61. def test_beta_small_parameters(self):
  62. # Test that beta with small a and b parameters does not produce
  63. # NaNs due to roundoff errors causing 0 / 0, gh-5851
  64. x = self.mt19937.beta(0.0001, 0.0001, size=100)
  65. assert_(not np.any(np.isnan(x)), 'Nans in mt19937.beta')
  66. def test_beta_very_small_parameters(self):
  67. # gh-24203: beta would hang with very small parameters.
  68. self.mt19937.beta(1e-49, 1e-40)
  69. def test_beta_ridiculously_small_parameters(self):
  70. # gh-24266: beta would generate nan when the parameters
  71. # were subnormal or a small multiple of the smallest normal.
  72. tiny = np.finfo(1.0).tiny
  73. x = self.mt19937.beta(tiny/32, tiny/40, size=50)
  74. assert not np.any(np.isnan(x))
  75. def test_beta_expected_zero_frequency(self):
  76. # gh-24475: For small a and b (e.g. a=0.0025, b=0.0025), beta
  77. # would generate too many zeros.
  78. a = 0.0025
  79. b = 0.0025
  80. n = 1000000
  81. x = self.mt19937.beta(a, b, size=n)
  82. nzeros = np.count_nonzero(x == 0)
  83. # beta CDF at x = np.finfo(np.double).smallest_subnormal/2
  84. # is p = 0.0776169083131899, e.g,
  85. #
  86. # import numpy as np
  87. # from mpmath import mp
  88. # mp.dps = 160
  89. # x = mp.mpf(np.finfo(np.float64).smallest_subnormal)/2
  90. # # CDF of the beta distribution at x:
  91. # p = mp.betainc(a, b, x1=0, x2=x, regularized=True)
  92. # n = 1000000
  93. # exprected_freq = float(n*p)
  94. #
  95. expected_freq = 77616.90831318991
  96. assert 0.95*expected_freq < nzeros < 1.05*expected_freq
  97. def test_choice_sum_of_probs_tolerance(self):
  98. # The sum of probs should be 1.0 with some tolerance.
  99. # For low precision dtypes the tolerance was too tight.
  100. # See numpy github issue 6123.
  101. a = [1, 2, 3]
  102. counts = [4, 4, 2]
  103. for dt in np.float16, np.float32, np.float64:
  104. probs = np.array(counts, dtype=dt) / sum(counts)
  105. c = self.mt19937.choice(a, p=probs)
  106. assert_(c in a)
  107. with pytest.raises(ValueError):
  108. self.mt19937.choice(a, p=probs*0.9)
  109. def test_shuffle_of_array_of_different_length_strings(self):
  110. # Test that permuting an array of different length strings
  111. # will not cause a segfault on garbage collection
  112. # Tests gh-7710
  113. a = np.array(['a', 'a' * 1000])
  114. for _ in range(100):
  115. self.mt19937.shuffle(a)
  116. # Force Garbage Collection - should not segfault.
  117. import gc
  118. gc.collect()
  119. def test_shuffle_of_array_of_objects(self):
  120. # Test that permuting an array of objects will not cause
  121. # a segfault on garbage collection.
  122. # See gh-7719
  123. a = np.array([np.arange(1), np.arange(4)], dtype=object)
  124. for _ in range(1000):
  125. self.mt19937.shuffle(a)
  126. # Force Garbage Collection - should not segfault.
  127. import gc
  128. gc.collect()
  129. def test_permutation_subclass(self):
  130. class N(np.ndarray):
  131. pass
  132. mt19937 = Generator(MT19937(1))
  133. orig = np.arange(3).view(N)
  134. perm = mt19937.permutation(orig)
  135. assert_array_equal(perm, np.array([2, 0, 1]))
  136. assert_array_equal(orig, np.arange(3).view(N))
  137. class M:
  138. a = np.arange(5)
  139. def __array__(self, dtype=None, copy=None):
  140. return self.a
  141. mt19937 = Generator(MT19937(1))
  142. m = M()
  143. perm = mt19937.permutation(m)
  144. assert_array_equal(perm, np.array([4, 1, 3, 0, 2]))
  145. assert_array_equal(m.__array__(), np.arange(5))
  146. def test_gamma_0(self):
  147. assert self.mt19937.standard_gamma(0.0) == 0.0
  148. assert_array_equal(self.mt19937.standard_gamma([0.0]), 0.0)
  149. actual = self.mt19937.standard_gamma([0.0], dtype='float')
  150. expected = np.array([0.], dtype=np.float32)
  151. assert_array_equal(actual, expected)
  152. def test_geometric_tiny_prob(self):
  153. # Regression test for gh-17007.
  154. # When p = 1e-30, the probability that a sample will exceed 2**63-1
  155. # is 0.9999999999907766, so we expect the result to be all 2**63-1.
  156. assert_array_equal(self.mt19937.geometric(p=1e-30, size=3),
  157. np.iinfo(np.int64).max)
  158. def test_zipf_large_parameter(self):
  159. # Regression test for part of gh-9829: a call such as rng.zipf(10000)
  160. # would hang.
  161. n = 8
  162. sample = self.mt19937.zipf(10000, size=n)
  163. assert_array_equal(sample, np.ones(n, dtype=np.int64))
  164. def test_zipf_a_near_1(self):
  165. # Regression test for gh-9829: a call such as rng.zipf(1.0000000000001)
  166. # would hang.
  167. n = 100000
  168. sample = self.mt19937.zipf(1.0000000000001, size=n)
  169. # Not much of a test, but let's do something more than verify that
  170. # it doesn't hang. Certainly for a monotonically decreasing
  171. # discrete distribution truncated to signed 64 bit integers, more
  172. # than half should be less than 2**62.
  173. assert np.count_nonzero(sample < 2**62) > n/2