test_discrete_distns.py 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747
  1. import pytest
  2. import itertools
  3. import warnings
  4. from scipy import stats
  5. from scipy.stats import (betabinom, betanbinom, hypergeom, nhypergeom,
  6. bernoulli, boltzmann, skellam, zipf, zipfian, binom,
  7. nbinom, nchypergeom_fisher, nchypergeom_wallenius,
  8. randint, poisson_binom)
  9. import numpy as np
  10. from numpy.testing import (
  11. assert_almost_equal, assert_equal, assert_allclose
  12. )
  13. from scipy.special import binom as special_binom
  14. from scipy.optimize import root_scalar
  15. from scipy.integrate import quad
  16. # The expected values were computed with Wolfram Alpha, using
  17. # the expression CDF[HypergeometricDistribution[N, n, M], k].
  18. @pytest.mark.parametrize('k, M, n, N, expected, rtol',
  19. [(3, 10, 4, 5,
  20. 0.9761904761904762, 1e-15),
  21. (107, 10000, 3000, 215,
  22. 0.9999999997226765, 1e-15),
  23. (10, 10000, 3000, 215,
  24. 2.681682217692179e-21, 5e-11)])
  25. def test_hypergeom_cdf(k, M, n, N, expected, rtol):
  26. p = hypergeom.cdf(k, M, n, N)
  27. assert_allclose(p, expected, rtol=rtol)
  28. # The expected values were computed with Wolfram Alpha, using
  29. # the expression SurvivalFunction[HypergeometricDistribution[N, n, M], k].
  30. @pytest.mark.parametrize('k, M, n, N, expected, rtol',
  31. [(25, 10000, 3000, 215,
  32. 0.9999999999052958, 1e-15),
  33. (125, 10000, 3000, 215,
  34. 1.4416781705752128e-18, 5e-11)])
  35. def test_hypergeom_sf(k, M, n, N, expected, rtol):
  36. p = hypergeom.sf(k, M, n, N)
  37. assert_allclose(p, expected, rtol=rtol)
  38. def test_hypergeom_logpmf():
  39. # symmetries test
  40. # f(k,N,K,n) = f(n-k,N,N-K,n) = f(K-k,N,K,N-n) = f(k,N,n,K)
  41. k = 5
  42. N = 50
  43. K = 10
  44. n = 5
  45. logpmf1 = hypergeom.logpmf(k, N, K, n)
  46. logpmf2 = hypergeom.logpmf(n - k, N, N - K, n)
  47. logpmf3 = hypergeom.logpmf(K - k, N, K, N - n)
  48. logpmf4 = hypergeom.logpmf(k, N, n, K)
  49. assert_almost_equal(logpmf1, logpmf2, decimal=12)
  50. assert_almost_equal(logpmf1, logpmf3, decimal=12)
  51. assert_almost_equal(logpmf1, logpmf4, decimal=12)
  52. # test related distribution
  53. # Bernoulli distribution if n = 1
  54. k = 1
  55. N = 10
  56. K = 7
  57. n = 1
  58. hypergeom_logpmf = hypergeom.logpmf(k, N, K, n)
  59. bernoulli_logpmf = bernoulli.logpmf(k, K/N)
  60. assert_almost_equal(hypergeom_logpmf, bernoulli_logpmf, decimal=12)
  61. def test_nhypergeom_pmf():
  62. # test with hypergeom
  63. M, n, r = 45, 13, 8
  64. k = 6
  65. NHG = nhypergeom.pmf(k, M, n, r)
  66. HG = hypergeom.pmf(k, M, n, k+r-1) * (M - n - (r-1)) / (M - (k+r-1))
  67. assert_allclose(HG, NHG, rtol=1e-10)
  68. def test_nhypergeom_pmfcdf():
  69. # test pmf and cdf with arbitrary values.
  70. M = 8
  71. n = 3
  72. r = 4
  73. support = np.arange(n+1)
  74. pmf = nhypergeom.pmf(support, M, n, r)
  75. cdf = nhypergeom.cdf(support, M, n, r)
  76. assert_allclose(pmf, [1/14, 3/14, 5/14, 5/14], rtol=1e-13)
  77. assert_allclose(cdf, [1/14, 4/14, 9/14, 1.0], rtol=1e-13)
  78. def test_nhypergeom_r0():
  79. # test with `r = 0`.
  80. M = 10
  81. n = 3
  82. r = 0
  83. pmf = nhypergeom.pmf([[0, 1, 2, 0], [1, 2, 0, 3]], M, n, r)
  84. assert_allclose(pmf, [[1, 0, 0, 1], [0, 0, 1, 0]], rtol=1e-13)
  85. def test_nhypergeom_rvs_shape():
  86. # Check that when given a size with more dimensions than the
  87. # dimensions of the broadcast parameters, rvs returns an array
  88. # with the correct shape.
  89. x = nhypergeom.rvs(22, [7, 8, 9], [[12], [13]], size=(5, 1, 2, 3))
  90. assert x.shape == (5, 1, 2, 3)
  91. def test_nhypergeom_accuracy():
  92. # Check that nhypergeom.rvs post-gh-13431 gives the same values as
  93. # inverse transform sampling
  94. rng = np.random.RandomState(0)
  95. x = nhypergeom.rvs(22, 7, 11, size=100, random_state=rng)
  96. rng = np.random.RandomState(0)
  97. p = rng.uniform(size=100)
  98. y = nhypergeom.ppf(p, 22, 7, 11)
  99. assert_equal(x, y)
  100. def test_boltzmann_upper_bound():
  101. k = np.arange(-3, 5)
  102. N = 1
  103. p = boltzmann.pmf(k, 0.123, N)
  104. expected = k == 0
  105. assert_equal(p, expected)
  106. lam = np.log(2)
  107. N = 3
  108. p = boltzmann.pmf(k, lam, N)
  109. expected = [0, 0, 0, 4/7, 2/7, 1/7, 0, 0]
  110. assert_allclose(p, expected, rtol=1e-13)
  111. c = boltzmann.cdf(k, lam, N)
  112. expected = [0, 0, 0, 4/7, 6/7, 1, 1, 1]
  113. assert_allclose(c, expected, rtol=1e-13)
  114. def test_betabinom_a_and_b_unity():
  115. # test limiting case that betabinom(n, 1, 1) is a discrete uniform
  116. # distribution from 0 to n
  117. n = 20
  118. k = np.arange(n + 1)
  119. p = betabinom(n, 1, 1).pmf(k)
  120. expected = np.repeat(1 / (n + 1), n + 1)
  121. assert_almost_equal(p, expected)
  122. @pytest.mark.parametrize('dtypes', itertools.product(*[(int, float)]*3))
  123. def test_betabinom_stats_a_and_b_integers_gh18026(dtypes):
  124. # gh-18026 reported that `betabinom` kurtosis calculation fails when some
  125. # parameters are integers. Check that this is resolved.
  126. n_type, a_type, b_type = dtypes
  127. n, a, b = n_type(10), a_type(2), b_type(3)
  128. assert_allclose(betabinom.stats(n, a, b, moments='k'), -0.6904761904761907)
  129. def test_betabinom_bernoulli():
  130. # test limiting case that betabinom(1, a, b) = bernoulli(a / (a + b))
  131. a = 2.3
  132. b = 0.63
  133. k = np.arange(2)
  134. p = betabinom(1, a, b).pmf(k)
  135. expected = bernoulli(a / (a + b)).pmf(k)
  136. assert_almost_equal(p, expected)
  137. def test_issue_10317():
  138. alpha, n, p = 0.9, 10, 1
  139. assert_equal(nbinom.interval(confidence=alpha, n=n, p=p), (0, 0))
  140. def test_issue_11134():
  141. alpha, n, p = 0.95, 10, 0
  142. assert_equal(binom.interval(confidence=alpha, n=n, p=p), (0, 0))
  143. def test_issue_7406():
  144. rng = np.random.default_rng(4763112764)
  145. assert_equal(binom.ppf(rng.random(10), 0, 0.5), 0)
  146. # Also check that endpoints (q=0, q=1) are correct
  147. assert_equal(binom.ppf(0, 0, 0.5), -1)
  148. assert_equal(binom.ppf(1, 0, 0.5), 0)
  149. def test_issue_5122():
  150. rng = np.random.default_rng(8312492117)
  151. p = 0
  152. n = rng.integers(100, size=10)
  153. x = 0
  154. ppf = binom.ppf(x, n, p)
  155. assert_equal(ppf, -1)
  156. x = np.linspace(0.01, 0.99, 10)
  157. ppf = binom.ppf(x, n, p)
  158. assert_equal(ppf, 0)
  159. x = 1
  160. ppf = binom.ppf(x, n, p)
  161. assert_equal(ppf, n)
  162. def test_issue_1603():
  163. assert_equal(binom(1000, np.logspace(-3, -100)).ppf(0.01), 0)
  164. def test_issue_5503():
  165. p = 0.5
  166. x = np.logspace(3, 14, 12)
  167. assert_allclose(binom.cdf(x, 2*x, p), 0.5, atol=1e-2)
  168. @pytest.mark.parametrize('x, n, p, cdf_desired', [
  169. (300, 1000, 3/10, 0.51559351981411995636),
  170. (3000, 10000, 3/10, 0.50493298381929698016),
  171. (30000, 100000, 3/10, 0.50156000591726422864),
  172. (300000, 1000000, 3/10, 0.50049331906666960038),
  173. (3000000, 10000000, 3/10, 0.50015600124585261196),
  174. (30000000, 100000000, 3/10, 0.50004933192735230102),
  175. (30010000, 100000000, 3/10, 0.98545384016570790717),
  176. (29990000, 100000000, 3/10, 0.01455017177985268670),
  177. (29950000, 100000000, 3/10, 5.02250963487432024943e-28),
  178. (300_000_000, 1_000_000_000, 3/10, 0.50001560012523938),
  179. (3_000_000_000, 10_000_000_000, 3/10, 0.50000493319275)
  180. ])
  181. def test_issue_5503pt2(x, n, p, cdf_desired):
  182. assert_allclose(binom.cdf(x, n, p), cdf_desired)
  183. def test_issue_5503pt3():
  184. # From Wolfram Alpha: CDF[BinomialDistribution[1e12, 1e-12], 2]
  185. assert_allclose(binom.cdf(2, 10**12, 10**-12), 0.91969860292869777384)
  186. def test_issue_6682():
  187. # Reference value from R:
  188. # options(digits=16)
  189. # print(pnbinom(250, 50, 32/63, lower.tail=FALSE))
  190. assert_allclose(nbinom.sf(250, 50, 32./63.), 1.460458510976452e-35)
  191. def test_issue_19747():
  192. # test that negative k does not raise an error in nbinom.logcdf
  193. result = nbinom.logcdf([5, -1, 1], 5, 0.5)
  194. reference = [-0.47313352, -np.inf, -2.21297293]
  195. assert_allclose(result, reference)
  196. def test_boost_divide_by_zero_issue_15101():
  197. n = 1000
  198. p = 0.01
  199. k = 996
  200. assert_allclose(binom.pmf(k, n, p), 0.0)
  201. def test_skellam_gh11474():
  202. # test issue reported in gh-11474 caused by `cdfchn`
  203. mu = [1, 10, 100, 1000, 5000, 5050, 5100, 5250, 6000]
  204. cdf = skellam.cdf(0, mu, mu)
  205. # generated in R
  206. # library(skellam)
  207. # options(digits = 16)
  208. # mu = c(1, 10, 100, 1000, 5000, 5050, 5100, 5250, 6000)
  209. # pskellam(0, mu, mu, TRUE)
  210. cdf_expected = [0.6542541612768356, 0.5448901559424127, 0.5141135799745580,
  211. 0.5044605891382528, 0.5019947363350450, 0.5019848365953181,
  212. 0.5019750827993392, 0.5019466621805060, 0.5018209330219539]
  213. assert_allclose(cdf, cdf_expected)
  214. class TestZipfian:
  215. def test_zipfian_asymptotic(self):
  216. # test limiting case that zipfian(a, n) -> zipf(a) as n-> oo
  217. a = 6.5
  218. N = 10000000
  219. k = np.arange(1, 21)
  220. assert_allclose(zipfian.pmf(k, a, N), zipf.pmf(k, a))
  221. assert_allclose(zipfian.cdf(k, a, N), zipf.cdf(k, a))
  222. assert_allclose(zipfian.sf(k, a, N), zipf.sf(k, a))
  223. assert_allclose(zipfian.stats(a, N, moments='msvk'),
  224. zipf.stats(a, moments='msvk'))
  225. def test_zipfian_continuity(self):
  226. # test that zipfian(0.999999, n) ~ zipfian(1.000001, n)
  227. # (a = 1 switches between methods of calculating harmonic sum)
  228. alt1, agt1 = 0.99999999, 1.00000001
  229. N = 30
  230. k = np.arange(1, N + 1)
  231. assert_allclose(zipfian.pmf(k, alt1, N), zipfian.pmf(k, agt1, N),
  232. rtol=5e-7)
  233. assert_allclose(zipfian.cdf(k, alt1, N), zipfian.cdf(k, agt1, N),
  234. rtol=5e-7)
  235. assert_allclose(zipfian.sf(k, alt1, N), zipfian.sf(k, agt1, N),
  236. rtol=5e-7)
  237. assert_allclose(zipfian.stats(alt1, N, moments='msvk'),
  238. zipfian.stats(agt1, N, moments='msvk'), rtol=5e-7)
  239. def test_zipfian_R(self):
  240. # test against R VGAM package
  241. # library(VGAM)
  242. # k <- c(13, 16, 1, 4, 4, 8, 10, 19, 5, 7)
  243. # a <- c(1.56712977, 3.72656295, 5.77665117, 9.12168729, 5.79977172,
  244. # 4.92784796, 9.36078764, 4.3739616 , 7.48171872, 4.6824154)
  245. # n <- c(70, 80, 48, 65, 83, 89, 50, 30, 20, 20)
  246. # pmf <- dzipf(k, N = n, shape = a)
  247. # cdf <- pzipf(k, N = n, shape = a)
  248. # print(pmf)
  249. # print(cdf)
  250. rng = np.random.RandomState(0)
  251. k = rng.randint(1, 20, size=10)
  252. a = rng.rand(10)*10 + 1
  253. n = rng.randint(1, 100, size=10)
  254. pmf = [8.076972e-03, 2.950214e-05, 9.799333e-01, 3.216601e-06,
  255. 3.158895e-04, 3.412497e-05, 4.350472e-10, 2.405773e-06,
  256. 5.860662e-06, 1.053948e-04]
  257. cdf = [0.8964133, 0.9998666, 0.9799333, 0.9999995, 0.9998584,
  258. 0.9999458, 1.0000000, 0.9999920, 0.9999977, 0.9998498]
  259. # skip the first point; zipUC is not accurate for low a, n
  260. assert_allclose(zipfian.pmf(k, a, n)[1:], pmf[1:], rtol=1e-6)
  261. assert_allclose(zipfian.cdf(k, a, n)[1:], cdf[1:], rtol=5e-5)
  262. rng = np.random.RandomState(0)
  263. naive_tests = np.vstack((np.logspace(-2, 1, 10),
  264. rng.randint(2, 40, 10))).T
  265. @pytest.mark.parametrize("a, n", naive_tests)
  266. def test_zipfian_naive(self, a, n):
  267. # test against bare-bones implementation
  268. @np.vectorize
  269. def Hns(n, s):
  270. """Naive implementation of harmonic sum"""
  271. return (1/np.arange(1, n+1)**s).sum()
  272. @np.vectorize
  273. def pzip(k, a, n):
  274. """Naive implementation of zipfian pmf"""
  275. if k < 1 or k > n:
  276. return 0.
  277. else:
  278. return 1 / k**a / Hns(n, a)
  279. k = np.arange(n+1)
  280. pmf = pzip(k, a, n)
  281. cdf = np.cumsum(pmf)
  282. mean = np.average(k, weights=pmf)
  283. var = np.average((k - mean)**2, weights=pmf)
  284. std = var**0.5
  285. skew = np.average(((k-mean)/std)**3, weights=pmf)
  286. kurtosis = np.average(((k-mean)/std)**4, weights=pmf) - 3
  287. assert_allclose(zipfian.pmf(k, a, n), pmf)
  288. assert_allclose(zipfian.cdf(k, a, n), cdf)
  289. assert_allclose(zipfian.stats(a, n, moments="mvsk"),
  290. [mean, var, skew, kurtosis])
  291. def test_pmf_integer_k(self):
  292. k = np.arange(0, 1000)
  293. k_int32 = k.astype(np.int32)
  294. dist = zipfian(111, 22)
  295. pmf = dist.pmf(k)
  296. pmf_k_int32 = dist.pmf(k_int32)
  297. assert_equal(pmf, pmf_k_int32)
  298. # Reference values were computed with mpmath.
  299. @pytest.mark.parametrize(
  300. 'k, a, n, ref',
  301. [(3, 1 + 1e-12, 10, 0.11380571738244807),
  302. (995, 1 + 1e-9, 1000, 0.0001342634472310051)]
  303. )
  304. def test_pmf_against_mpmath(self, k, a, n, ref):
  305. p = zipfian.pmf(k, a, n)
  306. assert_allclose(p, ref, 5e-16)
  307. # Reference values were computed with mpmath.
  308. @pytest.mark.parametrize(
  309. 'k, a, n, ref',
  310. [(4990, 1.25, 5000, 5.780138225335147e-05),
  311. (9998, 3.5, 10000, 1.775352757966365e-14),
  312. (50000, 3.5, 100000, 5.227803621367486e-13),
  313. (10, 6.0, 100, 1.523108153557902e-06),
  314. (95, 6.0, 100, 5.572438078308601e-12)]
  315. )
  316. def test_sf_against_mpmath(self, k, a, n, ref):
  317. sf = zipfian.sf(k, a, n)
  318. assert_allclose(sf, ref, rtol=8e-15)
  319. # Reference values were computed with mpmath.
  320. @pytest.mark.parametrize(
  321. 'a, n, ref',
  322. [(1 + 1e-9, 100, 19.27756356649707),
  323. (1 + 1e-7, 100000, 8271.194454731552),
  324. (1.001, 3, 1.6360225521183804)]
  325. )
  326. def test_mean_against_mpmath(self, a, n, ref):
  327. m = zipfian.mean(a, n)
  328. assert_allclose(m, ref, rtol=8e-15)
  329. def test_ridiculously_large_n(self):
  330. # This should return nan with no errors or warnings.
  331. a = 2.5
  332. n = 1e100
  333. p = zipfian.pmf(10, a, n)
  334. assert_equal(p, np.nan)
  335. class TestNCH:
  336. def setup_method(self):
  337. rng = np.random.default_rng(7162434334)
  338. shape = (2, 4, 3)
  339. max_m = 100
  340. m1 = rng.integers(1, max_m, size=shape) # red balls
  341. m2 = rng.integers(1, max_m, size=shape) # white balls
  342. N = m1 + m2 # total balls
  343. n = randint.rvs(0, N, size=N.shape, random_state=rng) # number of draws
  344. xl = np.maximum(0, n-m2) # lower bound of support
  345. xu = np.minimum(n, m1) # upper bound of support
  346. x = randint.rvs(xl, xu, size=xl.shape, random_state=rng)
  347. odds = rng.random(x.shape)*2
  348. self.x, self.N, self.m1, self.n, self.odds = x, N, m1, n, odds
  349. # test output is more readable when function names (strings) are passed
  350. @pytest.mark.parametrize('dist_name',
  351. ['nchypergeom_fisher', 'nchypergeom_wallenius'])
  352. def test_nch_hypergeom(self, dist_name):
  353. # Both noncentral hypergeometric distributions reduce to the
  354. # hypergeometric distribution when odds = 1
  355. dists = {'nchypergeom_fisher': nchypergeom_fisher,
  356. 'nchypergeom_wallenius': nchypergeom_wallenius}
  357. dist = dists[dist_name]
  358. x, N, m1, n = self.x, self.N, self.m1, self.n
  359. assert_allclose(dist.pmf(x, N, m1, n, odds=1),
  360. hypergeom.pmf(x, N, m1, n))
  361. def test_nchypergeom_fisher_naive(self):
  362. # test against a very simple implementation
  363. x, N, m1, n, odds = self.x, self.N, self.m1, self.n, self.odds
  364. @np.vectorize
  365. def pmf_mean_var(x, N, m1, n, w):
  366. # simple implementation of nchypergeom_fisher pmf
  367. m2 = N - m1
  368. xl = np.maximum(0, n-m2)
  369. xu = np.minimum(n, m1)
  370. def f(x):
  371. t1 = special_binom(m1, x)
  372. t2 = special_binom(m2, n - x)
  373. return t1 * t2 * w**x
  374. def P(k):
  375. return sum(f(y)*y**k for y in range(xl, xu + 1))
  376. P0 = P(0)
  377. P1 = P(1)
  378. P2 = P(2)
  379. pmf = f(x) / P0
  380. mean = P1 / P0
  381. var = P2 / P0 - (P1 / P0)**2
  382. return pmf, mean, var
  383. pmf, mean, var = pmf_mean_var(x, N, m1, n, odds)
  384. assert_allclose(nchypergeom_fisher.pmf(x, N, m1, n, odds), pmf)
  385. assert_allclose(nchypergeom_fisher.stats(N, m1, n, odds, moments='m'),
  386. mean)
  387. assert_allclose(nchypergeom_fisher.stats(N, m1, n, odds, moments='v'),
  388. var)
  389. def test_nchypergeom_wallenius_naive(self):
  390. # test against a very simple implementation
  391. rng = np.random.RandomState(2)
  392. shape = (2, 4, 3)
  393. max_m = 100
  394. m1 = rng.randint(1, max_m, size=shape)
  395. m2 = rng.randint(1, max_m, size=shape)
  396. N = m1 + m2
  397. n = randint.rvs(0, N, size=N.shape, random_state=rng)
  398. xl = np.maximum(0, n-m2)
  399. xu = np.minimum(n, m1)
  400. x = randint.rvs(xl, xu, size=xl.shape, random_state=rng)
  401. w = rng.rand(*x.shape)*2
  402. def support(N, m1, n, w):
  403. m2 = N - m1
  404. xl = np.maximum(0, n-m2)
  405. xu = np.minimum(n, m1)
  406. return xl, xu
  407. @np.vectorize
  408. def mean(N, m1, n, w):
  409. m2 = N - m1
  410. xl, xu = support(N, m1, n, w)
  411. def fun(u):
  412. return u/m1 + (1 - (n-u)/m2)**w - 1
  413. return root_scalar(fun, bracket=(xl, xu)).root
  414. with warnings.catch_warnings():
  415. warnings.filterwarnings("ignore", category=RuntimeWarning,
  416. message="invalid value encountered in mean")
  417. assert_allclose(nchypergeom_wallenius.mean(N, m1, n, w),
  418. mean(N, m1, n, w), rtol=2e-2)
  419. @np.vectorize
  420. def variance(N, m1, n, w):
  421. m2 = N - m1
  422. u = mean(N, m1, n, w)
  423. a = u * (m1 - u)
  424. b = (n-u)*(u + m2 - n)
  425. return N*a*b / ((N-1) * (m1*b + m2*a))
  426. with warnings.catch_warnings():
  427. warnings.filterwarnings("ignore", category=RuntimeWarning,
  428. message="invalid value encountered in mean")
  429. assert_allclose(
  430. nchypergeom_wallenius.stats(N, m1, n, w, moments='v'),
  431. variance(N, m1, n, w),
  432. rtol=5e-2
  433. )
  434. @np.vectorize
  435. def pmf(x, N, m1, n, w):
  436. m2 = N - m1
  437. xl, xu = support(N, m1, n, w)
  438. def integrand(t):
  439. D = w*(m1 - x) + (m2 - (n-x))
  440. res = (1-t**(w/D))**x * (1-t**(1/D))**(n-x)
  441. return res
  442. def f(x):
  443. t1 = special_binom(m1, x)
  444. t2 = special_binom(m2, n - x)
  445. the_integral = quad(integrand, 0, 1,
  446. epsrel=1e-16, epsabs=1e-16)
  447. return t1 * t2 * the_integral[0]
  448. return f(x)
  449. pmf0 = pmf(x, N, m1, n, w)
  450. pmf1 = nchypergeom_wallenius.pmf(x, N, m1, n, w)
  451. atol, rtol = 1e-6, 1e-6
  452. i = np.abs(pmf1 - pmf0) < atol + rtol*np.abs(pmf0)
  453. assert i.sum() > np.prod(shape) / 2 # works at least half the time
  454. # for those that fail, discredit the naive implementation
  455. for N, m1, n, w in zip(N[~i], m1[~i], n[~i], w[~i]):
  456. # get the support
  457. m2 = N - m1
  458. xl, xu = support(N, m1, n, w)
  459. x = np.arange(xl, xu + 1)
  460. # calculate sum of pmf over the support
  461. # the naive implementation is very wrong in these cases
  462. assert pmf(x, N, m1, n, w).sum() < .5
  463. assert_allclose(nchypergeom_wallenius.pmf(x, N, m1, n, w).sum(), 1)
  464. def test_wallenius_against_mpmath(self):
  465. # precompute data with mpmath since naive implementation above
  466. # is not reliable. See source code in gh-13330.
  467. M = 50
  468. n = 30
  469. N = 20
  470. odds = 2.25
  471. # Expected results, computed with mpmath.
  472. sup = np.arange(21)
  473. pmf = np.array([3.699003068656875e-20,
  474. 5.89398584245431e-17,
  475. 2.1594437742911123e-14,
  476. 3.221458044649955e-12,
  477. 2.4658279241205077e-10,
  478. 1.0965862603981212e-08,
  479. 3.057890479665704e-07,
  480. 5.622818831643761e-06,
  481. 7.056482841531681e-05,
  482. 0.000618899425358671,
  483. 0.003854172932571669,
  484. 0.01720592676256026,
  485. 0.05528844897093792,
  486. 0.12772363313574242,
  487. 0.21065898367825722,
  488. 0.24465958845359234,
  489. 0.1955114898110033,
  490. 0.10355390084949237,
  491. 0.03414490375225675,
  492. 0.006231989845775931,
  493. 0.0004715577304677075])
  494. mean = 14.808018384813426
  495. var = 2.6085975877923717
  496. # nchypergeom_wallenius.pmf returns 0 for pmf(0) and pmf(1), and pmf(2)
  497. # has only three digits of accuracy (~ 2.1511e-14).
  498. assert_allclose(nchypergeom_wallenius.pmf(sup, M, n, N, odds), pmf,
  499. rtol=1e-13, atol=1e-13)
  500. assert_allclose(nchypergeom_wallenius.mean(M, n, N, odds),
  501. mean, rtol=1e-13)
  502. assert_allclose(nchypergeom_wallenius.var(M, n, N, odds),
  503. var, rtol=1e-11)
  504. @pytest.mark.parametrize('dist_name',
  505. ['nchypergeom_fisher', 'nchypergeom_wallenius'])
  506. def test_rvs_shape(self, dist_name):
  507. # Check that when given a size with more dimensions than the
  508. # dimensions of the broadcast parameters, rvs returns an array
  509. # with the correct shape.
  510. dists = {'nchypergeom_fisher': nchypergeom_fisher,
  511. 'nchypergeom_wallenius': nchypergeom_wallenius}
  512. dist = dists[dist_name]
  513. x = dist.rvs(50, 30, [[10], [20]], [0.5, 1.0, 2.0], size=(5, 1, 2, 3))
  514. assert x.shape == (5, 1, 2, 3)
  515. @pytest.mark.parametrize("mu, q, expected",
  516. [[10, 120, -1.240089881791596e-38],
  517. [1500, 0, -86.61466680572661]])
  518. def test_nbinom_11465(mu, q, expected):
  519. # test nbinom.logcdf at extreme tails
  520. size = 20
  521. n, p = size, size/(size+mu)
  522. # In R:
  523. # options(digits=16)
  524. # pnbinom(mu=10, size=20, q=120, log.p=TRUE)
  525. assert_allclose(nbinom.logcdf(q, n, p), expected)
  526. def test_gh_17146():
  527. # Check that discrete distributions return PMF of zero at non-integral x.
  528. # See gh-17146.
  529. x = np.linspace(0, 1, 11)
  530. p = 0.8
  531. pmf = bernoulli(p).pmf(x)
  532. i = (x % 1 == 0)
  533. assert_allclose(pmf[-1], p)
  534. assert_allclose(pmf[0], 1-p)
  535. assert_equal(pmf[~i], 0)
  536. class TestBetaNBinom:
  537. @pytest.mark.parametrize('x, n, a, b, ref',
  538. [[5, 5e6, 5, 20, 1.1520944824139114e-107],
  539. [100, 50, 5, 20, 0.002855762954310226],
  540. [10000, 1000, 5, 20, 1.9648515726019154e-05]])
  541. def test_betanbinom_pmf(self, x, n, a, b, ref):
  542. # test that PMF stays accurate in the distribution tails
  543. # reference values computed with mpmath
  544. # from mpmath import mp
  545. # mp.dps = 500
  546. # def betanbinom_pmf(k, n, a, b):
  547. # k = mp.mpf(k)
  548. # a = mp.mpf(a)
  549. # b = mp.mpf(b)
  550. # n = mp.mpf(n)
  551. # return float(mp.binomial(n + k - mp.one, k)
  552. # * mp.beta(a + n, b + k) / mp.beta(a, b))
  553. assert_allclose(betanbinom.pmf(x, n, a, b), ref, rtol=1e-10)
  554. @pytest.mark.parametrize('n, a, b, ref',
  555. [[10000, 5000, 50, 0.12841520515722202],
  556. [10, 9, 9, 7.9224400871459695],
  557. [100, 1000, 10, 1.5849602176622748]])
  558. def test_betanbinom_kurtosis(self, n, a, b, ref):
  559. # reference values were computed via mpmath
  560. # from mpmath import mp
  561. # def kurtosis_betanegbinom(n, a, b):
  562. # n = mp.mpf(n)
  563. # a = mp.mpf(a)
  564. # b = mp.mpf(b)
  565. # four = mp.mpf(4.)
  566. # mean = n * b / (a - mp.one)
  567. # var = (n * b * (n + a - 1.) * (a + b - 1.)
  568. # / ((a - 2.) * (a - 1.)**2.))
  569. # def f(k):
  570. # return (mp.binomial(n + k - mp.one, k)
  571. # * mp.beta(a + n, b + k) / mp.beta(a, b)
  572. # * (k - mean)**four)
  573. # fourth_moment = mp.nsum(f, [0, mp.inf])
  574. # return float(fourth_moment/var**2 - 3.)
  575. assert_allclose(betanbinom.stats(n, a, b, moments="k"),
  576. ref, rtol=3e-15)
  577. class TestZipf:
  578. def test_gh20692(self):
  579. # test that int32 data for k generates same output as double
  580. k = np.arange(0, 1000)
  581. k_int32 = k.astype(np.int32)
  582. dist = zipf(9)
  583. pmf = dist.pmf(k)
  584. pmf_k_int32 = dist.pmf(k_int32)
  585. assert_equal(pmf, pmf_k_int32)
  586. def test_gh20048():
  587. # gh-20048 reported an infinite loop in _drv2_ppfsingle
  588. # check that the one identified is resolved
  589. class test_dist_gen(stats.rv_discrete):
  590. def _cdf(self, k):
  591. return min(k / 100, 0.99)
  592. test_dist = test_dist_gen(b=np.inf)
  593. message = "Arguments that bracket..."
  594. with pytest.raises(RuntimeError, match=message):
  595. test_dist.ppf(0.999)
  596. class TestPoissonBinomial:
  597. def test_pmf(self):
  598. # Test pmf against R `poisbinom` to confirm that this is indeed the Poisson
  599. # binomial distribution. Consistency of other methods and all other behavior
  600. # should be covered by generic tests. (If not, please add a generic test.)
  601. # Like many other distributions, no special attempt is made to be more
  602. # accurate than the usual formulas provide, so we use default tolerances.
  603. #
  604. # library(poisbinom)
  605. # options(digits=16)
  606. # k = c(0, 1, 2, 3, 4)
  607. # p = c(0.9480654803913988, 0.052428488100509374,
  608. # 0.25863527358887417, 0.057764076043633206)
  609. # dpoisbinom(k, p)
  610. rng = np.random.default_rng(259823598254)
  611. n = rng.integers(10) # 4
  612. k = np.arange(n + 1)
  613. p = rng.random(n) # [0.9480654803913988, 0.052428488100509374,
  614. # 0.25863527358887417, 0.057764076043633206]
  615. res = poisson_binom.pmf(k, p)
  616. ref = [0.0343763443678060318, 0.6435428452689714307, 0.2936345519235536994,
  617. 0.0277036647503902354, 0.0007425936892786034]
  618. assert_allclose(res, ref)
  619. class TestRandInt:
  620. def test_gh19759(self):
  621. # test zero PMF values within the support reported by gh-19759
  622. a = -354
  623. max_range = abs(a)
  624. all_b_1 = [a + 2 ** 31 + i for i in range(max_range)]
  625. res = randint.pmf(325, a, all_b_1)
  626. assert (res > 0).all()
  627. ref = 1 / (np.asarray(all_b_1, dtype=np.float64) - a)
  628. assert_allclose(res, ref)