| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747 |
- import pytest
- import itertools
- import warnings
- from scipy import stats
- from scipy.stats import (betabinom, betanbinom, hypergeom, nhypergeom,
- bernoulli, boltzmann, skellam, zipf, zipfian, binom,
- nbinom, nchypergeom_fisher, nchypergeom_wallenius,
- randint, poisson_binom)
- import numpy as np
- from numpy.testing import (
- assert_almost_equal, assert_equal, assert_allclose
- )
- from scipy.special import binom as special_binom
- from scipy.optimize import root_scalar
- from scipy.integrate import quad
- # The expected values were computed with Wolfram Alpha, using
- # the expression CDF[HypergeometricDistribution[N, n, M], k].
- @pytest.mark.parametrize('k, M, n, N, expected, rtol',
- [(3, 10, 4, 5,
- 0.9761904761904762, 1e-15),
- (107, 10000, 3000, 215,
- 0.9999999997226765, 1e-15),
- (10, 10000, 3000, 215,
- 2.681682217692179e-21, 5e-11)])
- def test_hypergeom_cdf(k, M, n, N, expected, rtol):
- p = hypergeom.cdf(k, M, n, N)
- assert_allclose(p, expected, rtol=rtol)
- # The expected values were computed with Wolfram Alpha, using
- # the expression SurvivalFunction[HypergeometricDistribution[N, n, M], k].
- @pytest.mark.parametrize('k, M, n, N, expected, rtol',
- [(25, 10000, 3000, 215,
- 0.9999999999052958, 1e-15),
- (125, 10000, 3000, 215,
- 1.4416781705752128e-18, 5e-11)])
- def test_hypergeom_sf(k, M, n, N, expected, rtol):
- p = hypergeom.sf(k, M, n, N)
- assert_allclose(p, expected, rtol=rtol)
- def test_hypergeom_logpmf():
- # symmetries test
- # f(k,N,K,n) = f(n-k,N,N-K,n) = f(K-k,N,K,N-n) = f(k,N,n,K)
- k = 5
- N = 50
- K = 10
- n = 5
- logpmf1 = hypergeom.logpmf(k, N, K, n)
- logpmf2 = hypergeom.logpmf(n - k, N, N - K, n)
- logpmf3 = hypergeom.logpmf(K - k, N, K, N - n)
- logpmf4 = hypergeom.logpmf(k, N, n, K)
- assert_almost_equal(logpmf1, logpmf2, decimal=12)
- assert_almost_equal(logpmf1, logpmf3, decimal=12)
- assert_almost_equal(logpmf1, logpmf4, decimal=12)
- # test related distribution
- # Bernoulli distribution if n = 1
- k = 1
- N = 10
- K = 7
- n = 1
- hypergeom_logpmf = hypergeom.logpmf(k, N, K, n)
- bernoulli_logpmf = bernoulli.logpmf(k, K/N)
- assert_almost_equal(hypergeom_logpmf, bernoulli_logpmf, decimal=12)
- def test_nhypergeom_pmf():
- # test with hypergeom
- M, n, r = 45, 13, 8
- k = 6
- NHG = nhypergeom.pmf(k, M, n, r)
- HG = hypergeom.pmf(k, M, n, k+r-1) * (M - n - (r-1)) / (M - (k+r-1))
- assert_allclose(HG, NHG, rtol=1e-10)
- def test_nhypergeom_pmfcdf():
- # test pmf and cdf with arbitrary values.
- M = 8
- n = 3
- r = 4
- support = np.arange(n+1)
- pmf = nhypergeom.pmf(support, M, n, r)
- cdf = nhypergeom.cdf(support, M, n, r)
- assert_allclose(pmf, [1/14, 3/14, 5/14, 5/14], rtol=1e-13)
- assert_allclose(cdf, [1/14, 4/14, 9/14, 1.0], rtol=1e-13)
- def test_nhypergeom_r0():
- # test with `r = 0`.
- M = 10
- n = 3
- r = 0
- pmf = nhypergeom.pmf([[0, 1, 2, 0], [1, 2, 0, 3]], M, n, r)
- assert_allclose(pmf, [[1, 0, 0, 1], [0, 0, 1, 0]], rtol=1e-13)
- def test_nhypergeom_rvs_shape():
- # Check that when given a size with more dimensions than the
- # dimensions of the broadcast parameters, rvs returns an array
- # with the correct shape.
- x = nhypergeom.rvs(22, [7, 8, 9], [[12], [13]], size=(5, 1, 2, 3))
- assert x.shape == (5, 1, 2, 3)
- def test_nhypergeom_accuracy():
- # Check that nhypergeom.rvs post-gh-13431 gives the same values as
- # inverse transform sampling
- rng = np.random.RandomState(0)
- x = nhypergeom.rvs(22, 7, 11, size=100, random_state=rng)
- rng = np.random.RandomState(0)
- p = rng.uniform(size=100)
- y = nhypergeom.ppf(p, 22, 7, 11)
- assert_equal(x, y)
- def test_boltzmann_upper_bound():
- k = np.arange(-3, 5)
- N = 1
- p = boltzmann.pmf(k, 0.123, N)
- expected = k == 0
- assert_equal(p, expected)
- lam = np.log(2)
- N = 3
- p = boltzmann.pmf(k, lam, N)
- expected = [0, 0, 0, 4/7, 2/7, 1/7, 0, 0]
- assert_allclose(p, expected, rtol=1e-13)
- c = boltzmann.cdf(k, lam, N)
- expected = [0, 0, 0, 4/7, 6/7, 1, 1, 1]
- assert_allclose(c, expected, rtol=1e-13)
- def test_betabinom_a_and_b_unity():
- # test limiting case that betabinom(n, 1, 1) is a discrete uniform
- # distribution from 0 to n
- n = 20
- k = np.arange(n + 1)
- p = betabinom(n, 1, 1).pmf(k)
- expected = np.repeat(1 / (n + 1), n + 1)
- assert_almost_equal(p, expected)
- @pytest.mark.parametrize('dtypes', itertools.product(*[(int, float)]*3))
- def test_betabinom_stats_a_and_b_integers_gh18026(dtypes):
- # gh-18026 reported that `betabinom` kurtosis calculation fails when some
- # parameters are integers. Check that this is resolved.
- n_type, a_type, b_type = dtypes
- n, a, b = n_type(10), a_type(2), b_type(3)
- assert_allclose(betabinom.stats(n, a, b, moments='k'), -0.6904761904761907)
- def test_betabinom_bernoulli():
- # test limiting case that betabinom(1, a, b) = bernoulli(a / (a + b))
- a = 2.3
- b = 0.63
- k = np.arange(2)
- p = betabinom(1, a, b).pmf(k)
- expected = bernoulli(a / (a + b)).pmf(k)
- assert_almost_equal(p, expected)
- def test_issue_10317():
- alpha, n, p = 0.9, 10, 1
- assert_equal(nbinom.interval(confidence=alpha, n=n, p=p), (0, 0))
- def test_issue_11134():
- alpha, n, p = 0.95, 10, 0
- assert_equal(binom.interval(confidence=alpha, n=n, p=p), (0, 0))
- def test_issue_7406():
- rng = np.random.default_rng(4763112764)
- assert_equal(binom.ppf(rng.random(10), 0, 0.5), 0)
- # Also check that endpoints (q=0, q=1) are correct
- assert_equal(binom.ppf(0, 0, 0.5), -1)
- assert_equal(binom.ppf(1, 0, 0.5), 0)
- def test_issue_5122():
- rng = np.random.default_rng(8312492117)
- p = 0
- n = rng.integers(100, size=10)
- x = 0
- ppf = binom.ppf(x, n, p)
- assert_equal(ppf, -1)
- x = np.linspace(0.01, 0.99, 10)
- ppf = binom.ppf(x, n, p)
- assert_equal(ppf, 0)
- x = 1
- ppf = binom.ppf(x, n, p)
- assert_equal(ppf, n)
- def test_issue_1603():
- assert_equal(binom(1000, np.logspace(-3, -100)).ppf(0.01), 0)
- def test_issue_5503():
- p = 0.5
- x = np.logspace(3, 14, 12)
- assert_allclose(binom.cdf(x, 2*x, p), 0.5, atol=1e-2)
- @pytest.mark.parametrize('x, n, p, cdf_desired', [
- (300, 1000, 3/10, 0.51559351981411995636),
- (3000, 10000, 3/10, 0.50493298381929698016),
- (30000, 100000, 3/10, 0.50156000591726422864),
- (300000, 1000000, 3/10, 0.50049331906666960038),
- (3000000, 10000000, 3/10, 0.50015600124585261196),
- (30000000, 100000000, 3/10, 0.50004933192735230102),
- (30010000, 100000000, 3/10, 0.98545384016570790717),
- (29990000, 100000000, 3/10, 0.01455017177985268670),
- (29950000, 100000000, 3/10, 5.02250963487432024943e-28),
- (300_000_000, 1_000_000_000, 3/10, 0.50001560012523938),
- (3_000_000_000, 10_000_000_000, 3/10, 0.50000493319275)
- ])
- def test_issue_5503pt2(x, n, p, cdf_desired):
- assert_allclose(binom.cdf(x, n, p), cdf_desired)
- def test_issue_5503pt3():
- # From Wolfram Alpha: CDF[BinomialDistribution[1e12, 1e-12], 2]
- assert_allclose(binom.cdf(2, 10**12, 10**-12), 0.91969860292869777384)
- def test_issue_6682():
- # Reference value from R:
- # options(digits=16)
- # print(pnbinom(250, 50, 32/63, lower.tail=FALSE))
- assert_allclose(nbinom.sf(250, 50, 32./63.), 1.460458510976452e-35)
- def test_issue_19747():
- # test that negative k does not raise an error in nbinom.logcdf
- result = nbinom.logcdf([5, -1, 1], 5, 0.5)
- reference = [-0.47313352, -np.inf, -2.21297293]
- assert_allclose(result, reference)
- def test_boost_divide_by_zero_issue_15101():
- n = 1000
- p = 0.01
- k = 996
- assert_allclose(binom.pmf(k, n, p), 0.0)
- def test_skellam_gh11474():
- # test issue reported in gh-11474 caused by `cdfchn`
- mu = [1, 10, 100, 1000, 5000, 5050, 5100, 5250, 6000]
- cdf = skellam.cdf(0, mu, mu)
- # generated in R
- # library(skellam)
- # options(digits = 16)
- # mu = c(1, 10, 100, 1000, 5000, 5050, 5100, 5250, 6000)
- # pskellam(0, mu, mu, TRUE)
- cdf_expected = [0.6542541612768356, 0.5448901559424127, 0.5141135799745580,
- 0.5044605891382528, 0.5019947363350450, 0.5019848365953181,
- 0.5019750827993392, 0.5019466621805060, 0.5018209330219539]
- assert_allclose(cdf, cdf_expected)
- class TestZipfian:
- def test_zipfian_asymptotic(self):
- # test limiting case that zipfian(a, n) -> zipf(a) as n-> oo
- a = 6.5
- N = 10000000
- k = np.arange(1, 21)
- assert_allclose(zipfian.pmf(k, a, N), zipf.pmf(k, a))
- assert_allclose(zipfian.cdf(k, a, N), zipf.cdf(k, a))
- assert_allclose(zipfian.sf(k, a, N), zipf.sf(k, a))
- assert_allclose(zipfian.stats(a, N, moments='msvk'),
- zipf.stats(a, moments='msvk'))
- def test_zipfian_continuity(self):
- # test that zipfian(0.999999, n) ~ zipfian(1.000001, n)
- # (a = 1 switches between methods of calculating harmonic sum)
- alt1, agt1 = 0.99999999, 1.00000001
- N = 30
- k = np.arange(1, N + 1)
- assert_allclose(zipfian.pmf(k, alt1, N), zipfian.pmf(k, agt1, N),
- rtol=5e-7)
- assert_allclose(zipfian.cdf(k, alt1, N), zipfian.cdf(k, agt1, N),
- rtol=5e-7)
- assert_allclose(zipfian.sf(k, alt1, N), zipfian.sf(k, agt1, N),
- rtol=5e-7)
- assert_allclose(zipfian.stats(alt1, N, moments='msvk'),
- zipfian.stats(agt1, N, moments='msvk'), rtol=5e-7)
- def test_zipfian_R(self):
- # test against R VGAM package
- # library(VGAM)
- # k <- c(13, 16, 1, 4, 4, 8, 10, 19, 5, 7)
- # a <- c(1.56712977, 3.72656295, 5.77665117, 9.12168729, 5.79977172,
- # 4.92784796, 9.36078764, 4.3739616 , 7.48171872, 4.6824154)
- # n <- c(70, 80, 48, 65, 83, 89, 50, 30, 20, 20)
- # pmf <- dzipf(k, N = n, shape = a)
- # cdf <- pzipf(k, N = n, shape = a)
- # print(pmf)
- # print(cdf)
- rng = np.random.RandomState(0)
- k = rng.randint(1, 20, size=10)
- a = rng.rand(10)*10 + 1
- n = rng.randint(1, 100, size=10)
- pmf = [8.076972e-03, 2.950214e-05, 9.799333e-01, 3.216601e-06,
- 3.158895e-04, 3.412497e-05, 4.350472e-10, 2.405773e-06,
- 5.860662e-06, 1.053948e-04]
- cdf = [0.8964133, 0.9998666, 0.9799333, 0.9999995, 0.9998584,
- 0.9999458, 1.0000000, 0.9999920, 0.9999977, 0.9998498]
- # skip the first point; zipUC is not accurate for low a, n
- assert_allclose(zipfian.pmf(k, a, n)[1:], pmf[1:], rtol=1e-6)
- assert_allclose(zipfian.cdf(k, a, n)[1:], cdf[1:], rtol=5e-5)
- rng = np.random.RandomState(0)
- naive_tests = np.vstack((np.logspace(-2, 1, 10),
- rng.randint(2, 40, 10))).T
- @pytest.mark.parametrize("a, n", naive_tests)
- def test_zipfian_naive(self, a, n):
- # test against bare-bones implementation
- @np.vectorize
- def Hns(n, s):
- """Naive implementation of harmonic sum"""
- return (1/np.arange(1, n+1)**s).sum()
- @np.vectorize
- def pzip(k, a, n):
- """Naive implementation of zipfian pmf"""
- if k < 1 or k > n:
- return 0.
- else:
- return 1 / k**a / Hns(n, a)
- k = np.arange(n+1)
- pmf = pzip(k, a, n)
- cdf = np.cumsum(pmf)
- mean = np.average(k, weights=pmf)
- var = np.average((k - mean)**2, weights=pmf)
- std = var**0.5
- skew = np.average(((k-mean)/std)**3, weights=pmf)
- kurtosis = np.average(((k-mean)/std)**4, weights=pmf) - 3
- assert_allclose(zipfian.pmf(k, a, n), pmf)
- assert_allclose(zipfian.cdf(k, a, n), cdf)
- assert_allclose(zipfian.stats(a, n, moments="mvsk"),
- [mean, var, skew, kurtosis])
- def test_pmf_integer_k(self):
- k = np.arange(0, 1000)
- k_int32 = k.astype(np.int32)
- dist = zipfian(111, 22)
- pmf = dist.pmf(k)
- pmf_k_int32 = dist.pmf(k_int32)
- assert_equal(pmf, pmf_k_int32)
- # Reference values were computed with mpmath.
- @pytest.mark.parametrize(
- 'k, a, n, ref',
- [(3, 1 + 1e-12, 10, 0.11380571738244807),
- (995, 1 + 1e-9, 1000, 0.0001342634472310051)]
- )
- def test_pmf_against_mpmath(self, k, a, n, ref):
- p = zipfian.pmf(k, a, n)
- assert_allclose(p, ref, 5e-16)
- # Reference values were computed with mpmath.
- @pytest.mark.parametrize(
- 'k, a, n, ref',
- [(4990, 1.25, 5000, 5.780138225335147e-05),
- (9998, 3.5, 10000, 1.775352757966365e-14),
- (50000, 3.5, 100000, 5.227803621367486e-13),
- (10, 6.0, 100, 1.523108153557902e-06),
- (95, 6.0, 100, 5.572438078308601e-12)]
- )
- def test_sf_against_mpmath(self, k, a, n, ref):
- sf = zipfian.sf(k, a, n)
- assert_allclose(sf, ref, rtol=8e-15)
- # Reference values were computed with mpmath.
- @pytest.mark.parametrize(
- 'a, n, ref',
- [(1 + 1e-9, 100, 19.27756356649707),
- (1 + 1e-7, 100000, 8271.194454731552),
- (1.001, 3, 1.6360225521183804)]
- )
- def test_mean_against_mpmath(self, a, n, ref):
- m = zipfian.mean(a, n)
- assert_allclose(m, ref, rtol=8e-15)
- def test_ridiculously_large_n(self):
- # This should return nan with no errors or warnings.
- a = 2.5
- n = 1e100
- p = zipfian.pmf(10, a, n)
- assert_equal(p, np.nan)
- class TestNCH:
- def setup_method(self):
- rng = np.random.default_rng(7162434334)
- shape = (2, 4, 3)
- max_m = 100
- m1 = rng.integers(1, max_m, size=shape) # red balls
- m2 = rng.integers(1, max_m, size=shape) # white balls
- N = m1 + m2 # total balls
- n = randint.rvs(0, N, size=N.shape, random_state=rng) # number of draws
- xl = np.maximum(0, n-m2) # lower bound of support
- xu = np.minimum(n, m1) # upper bound of support
- x = randint.rvs(xl, xu, size=xl.shape, random_state=rng)
- odds = rng.random(x.shape)*2
- self.x, self.N, self.m1, self.n, self.odds = x, N, m1, n, odds
- # test output is more readable when function names (strings) are passed
- @pytest.mark.parametrize('dist_name',
- ['nchypergeom_fisher', 'nchypergeom_wallenius'])
- def test_nch_hypergeom(self, dist_name):
- # Both noncentral hypergeometric distributions reduce to the
- # hypergeometric distribution when odds = 1
- dists = {'nchypergeom_fisher': nchypergeom_fisher,
- 'nchypergeom_wallenius': nchypergeom_wallenius}
- dist = dists[dist_name]
- x, N, m1, n = self.x, self.N, self.m1, self.n
- assert_allclose(dist.pmf(x, N, m1, n, odds=1),
- hypergeom.pmf(x, N, m1, n))
- def test_nchypergeom_fisher_naive(self):
- # test against a very simple implementation
- x, N, m1, n, odds = self.x, self.N, self.m1, self.n, self.odds
- @np.vectorize
- def pmf_mean_var(x, N, m1, n, w):
- # simple implementation of nchypergeom_fisher pmf
- m2 = N - m1
- xl = np.maximum(0, n-m2)
- xu = np.minimum(n, m1)
- def f(x):
- t1 = special_binom(m1, x)
- t2 = special_binom(m2, n - x)
- return t1 * t2 * w**x
- def P(k):
- return sum(f(y)*y**k for y in range(xl, xu + 1))
- P0 = P(0)
- P1 = P(1)
- P2 = P(2)
- pmf = f(x) / P0
- mean = P1 / P0
- var = P2 / P0 - (P1 / P0)**2
- return pmf, mean, var
- pmf, mean, var = pmf_mean_var(x, N, m1, n, odds)
- assert_allclose(nchypergeom_fisher.pmf(x, N, m1, n, odds), pmf)
- assert_allclose(nchypergeom_fisher.stats(N, m1, n, odds, moments='m'),
- mean)
- assert_allclose(nchypergeom_fisher.stats(N, m1, n, odds, moments='v'),
- var)
- def test_nchypergeom_wallenius_naive(self):
- # test against a very simple implementation
- rng = np.random.RandomState(2)
- shape = (2, 4, 3)
- max_m = 100
- m1 = rng.randint(1, max_m, size=shape)
- m2 = rng.randint(1, max_m, size=shape)
- N = m1 + m2
- n = randint.rvs(0, N, size=N.shape, random_state=rng)
- xl = np.maximum(0, n-m2)
- xu = np.minimum(n, m1)
- x = randint.rvs(xl, xu, size=xl.shape, random_state=rng)
- w = rng.rand(*x.shape)*2
- def support(N, m1, n, w):
- m2 = N - m1
- xl = np.maximum(0, n-m2)
- xu = np.minimum(n, m1)
- return xl, xu
- @np.vectorize
- def mean(N, m1, n, w):
- m2 = N - m1
- xl, xu = support(N, m1, n, w)
- def fun(u):
- return u/m1 + (1 - (n-u)/m2)**w - 1
- return root_scalar(fun, bracket=(xl, xu)).root
- with warnings.catch_warnings():
- warnings.filterwarnings("ignore", category=RuntimeWarning,
- message="invalid value encountered in mean")
- assert_allclose(nchypergeom_wallenius.mean(N, m1, n, w),
- mean(N, m1, n, w), rtol=2e-2)
- @np.vectorize
- def variance(N, m1, n, w):
- m2 = N - m1
- u = mean(N, m1, n, w)
- a = u * (m1 - u)
- b = (n-u)*(u + m2 - n)
- return N*a*b / ((N-1) * (m1*b + m2*a))
- with warnings.catch_warnings():
- warnings.filterwarnings("ignore", category=RuntimeWarning,
- message="invalid value encountered in mean")
- assert_allclose(
- nchypergeom_wallenius.stats(N, m1, n, w, moments='v'),
- variance(N, m1, n, w),
- rtol=5e-2
- )
- @np.vectorize
- def pmf(x, N, m1, n, w):
- m2 = N - m1
- xl, xu = support(N, m1, n, w)
- def integrand(t):
- D = w*(m1 - x) + (m2 - (n-x))
- res = (1-t**(w/D))**x * (1-t**(1/D))**(n-x)
- return res
- def f(x):
- t1 = special_binom(m1, x)
- t2 = special_binom(m2, n - x)
- the_integral = quad(integrand, 0, 1,
- epsrel=1e-16, epsabs=1e-16)
- return t1 * t2 * the_integral[0]
- return f(x)
- pmf0 = pmf(x, N, m1, n, w)
- pmf1 = nchypergeom_wallenius.pmf(x, N, m1, n, w)
- atol, rtol = 1e-6, 1e-6
- i = np.abs(pmf1 - pmf0) < atol + rtol*np.abs(pmf0)
- assert i.sum() > np.prod(shape) / 2 # works at least half the time
- # for those that fail, discredit the naive implementation
- for N, m1, n, w in zip(N[~i], m1[~i], n[~i], w[~i]):
- # get the support
- m2 = N - m1
- xl, xu = support(N, m1, n, w)
- x = np.arange(xl, xu + 1)
- # calculate sum of pmf over the support
- # the naive implementation is very wrong in these cases
- assert pmf(x, N, m1, n, w).sum() < .5
- assert_allclose(nchypergeom_wallenius.pmf(x, N, m1, n, w).sum(), 1)
- def test_wallenius_against_mpmath(self):
- # precompute data with mpmath since naive implementation above
- # is not reliable. See source code in gh-13330.
- M = 50
- n = 30
- N = 20
- odds = 2.25
- # Expected results, computed with mpmath.
- sup = np.arange(21)
- pmf = np.array([3.699003068656875e-20,
- 5.89398584245431e-17,
- 2.1594437742911123e-14,
- 3.221458044649955e-12,
- 2.4658279241205077e-10,
- 1.0965862603981212e-08,
- 3.057890479665704e-07,
- 5.622818831643761e-06,
- 7.056482841531681e-05,
- 0.000618899425358671,
- 0.003854172932571669,
- 0.01720592676256026,
- 0.05528844897093792,
- 0.12772363313574242,
- 0.21065898367825722,
- 0.24465958845359234,
- 0.1955114898110033,
- 0.10355390084949237,
- 0.03414490375225675,
- 0.006231989845775931,
- 0.0004715577304677075])
- mean = 14.808018384813426
- var = 2.6085975877923717
- # nchypergeom_wallenius.pmf returns 0 for pmf(0) and pmf(1), and pmf(2)
- # has only three digits of accuracy (~ 2.1511e-14).
- assert_allclose(nchypergeom_wallenius.pmf(sup, M, n, N, odds), pmf,
- rtol=1e-13, atol=1e-13)
- assert_allclose(nchypergeom_wallenius.mean(M, n, N, odds),
- mean, rtol=1e-13)
- assert_allclose(nchypergeom_wallenius.var(M, n, N, odds),
- var, rtol=1e-11)
- @pytest.mark.parametrize('dist_name',
- ['nchypergeom_fisher', 'nchypergeom_wallenius'])
- def test_rvs_shape(self, dist_name):
- # Check that when given a size with more dimensions than the
- # dimensions of the broadcast parameters, rvs returns an array
- # with the correct shape.
- dists = {'nchypergeom_fisher': nchypergeom_fisher,
- 'nchypergeom_wallenius': nchypergeom_wallenius}
- dist = dists[dist_name]
- x = dist.rvs(50, 30, [[10], [20]], [0.5, 1.0, 2.0], size=(5, 1, 2, 3))
- assert x.shape == (5, 1, 2, 3)
- @pytest.mark.parametrize("mu, q, expected",
- [[10, 120, -1.240089881791596e-38],
- [1500, 0, -86.61466680572661]])
- def test_nbinom_11465(mu, q, expected):
- # test nbinom.logcdf at extreme tails
- size = 20
- n, p = size, size/(size+mu)
- # In R:
- # options(digits=16)
- # pnbinom(mu=10, size=20, q=120, log.p=TRUE)
- assert_allclose(nbinom.logcdf(q, n, p), expected)
- def test_gh_17146():
- # Check that discrete distributions return PMF of zero at non-integral x.
- # See gh-17146.
- x = np.linspace(0, 1, 11)
- p = 0.8
- pmf = bernoulli(p).pmf(x)
- i = (x % 1 == 0)
- assert_allclose(pmf[-1], p)
- assert_allclose(pmf[0], 1-p)
- assert_equal(pmf[~i], 0)
- class TestBetaNBinom:
- @pytest.mark.parametrize('x, n, a, b, ref',
- [[5, 5e6, 5, 20, 1.1520944824139114e-107],
- [100, 50, 5, 20, 0.002855762954310226],
- [10000, 1000, 5, 20, 1.9648515726019154e-05]])
- def test_betanbinom_pmf(self, x, n, a, b, ref):
- # test that PMF stays accurate in the distribution tails
- # reference values computed with mpmath
- # from mpmath import mp
- # mp.dps = 500
- # def betanbinom_pmf(k, n, a, b):
- # k = mp.mpf(k)
- # a = mp.mpf(a)
- # b = mp.mpf(b)
- # n = mp.mpf(n)
- # return float(mp.binomial(n + k - mp.one, k)
- # * mp.beta(a + n, b + k) / mp.beta(a, b))
- assert_allclose(betanbinom.pmf(x, n, a, b), ref, rtol=1e-10)
- @pytest.mark.parametrize('n, a, b, ref',
- [[10000, 5000, 50, 0.12841520515722202],
- [10, 9, 9, 7.9224400871459695],
- [100, 1000, 10, 1.5849602176622748]])
- def test_betanbinom_kurtosis(self, n, a, b, ref):
- # reference values were computed via mpmath
- # from mpmath import mp
- # def kurtosis_betanegbinom(n, a, b):
- # n = mp.mpf(n)
- # a = mp.mpf(a)
- # b = mp.mpf(b)
- # four = mp.mpf(4.)
- # mean = n * b / (a - mp.one)
- # var = (n * b * (n + a - 1.) * (a + b - 1.)
- # / ((a - 2.) * (a - 1.)**2.))
- # def f(k):
- # return (mp.binomial(n + k - mp.one, k)
- # * mp.beta(a + n, b + k) / mp.beta(a, b)
- # * (k - mean)**four)
- # fourth_moment = mp.nsum(f, [0, mp.inf])
- # return float(fourth_moment/var**2 - 3.)
- assert_allclose(betanbinom.stats(n, a, b, moments="k"),
- ref, rtol=3e-15)
- class TestZipf:
- def test_gh20692(self):
- # test that int32 data for k generates same output as double
- k = np.arange(0, 1000)
- k_int32 = k.astype(np.int32)
- dist = zipf(9)
- pmf = dist.pmf(k)
- pmf_k_int32 = dist.pmf(k_int32)
- assert_equal(pmf, pmf_k_int32)
- def test_gh20048():
- # gh-20048 reported an infinite loop in _drv2_ppfsingle
- # check that the one identified is resolved
- class test_dist_gen(stats.rv_discrete):
- def _cdf(self, k):
- return min(k / 100, 0.99)
- test_dist = test_dist_gen(b=np.inf)
- message = "Arguments that bracket..."
- with pytest.raises(RuntimeError, match=message):
- test_dist.ppf(0.999)
- class TestPoissonBinomial:
- def test_pmf(self):
- # Test pmf against R `poisbinom` to confirm that this is indeed the Poisson
- # binomial distribution. Consistency of other methods and all other behavior
- # should be covered by generic tests. (If not, please add a generic test.)
- # Like many other distributions, no special attempt is made to be more
- # accurate than the usual formulas provide, so we use default tolerances.
- #
- # library(poisbinom)
- # options(digits=16)
- # k = c(0, 1, 2, 3, 4)
- # p = c(0.9480654803913988, 0.052428488100509374,
- # 0.25863527358887417, 0.057764076043633206)
- # dpoisbinom(k, p)
- rng = np.random.default_rng(259823598254)
- n = rng.integers(10) # 4
- k = np.arange(n + 1)
- p = rng.random(n) # [0.9480654803913988, 0.052428488100509374,
- # 0.25863527358887417, 0.057764076043633206]
- res = poisson_binom.pmf(k, p)
- ref = [0.0343763443678060318, 0.6435428452689714307, 0.2936345519235536994,
- 0.0277036647503902354, 0.0007425936892786034]
- assert_allclose(res, ref)
- class TestRandInt:
- def test_gh19759(self):
- # test zero PMF values within the support reported by gh-19759
- a = -354
- max_range = abs(a)
- all_b_1 = [a + 2 ** 31 + i for i in range(max_range)]
- res = randint.pmf(325, a, all_b_1)
- assert (res > 0).all()
- ref = 1 / (np.asarray(all_b_1, dtype=np.float64) - a)
- assert_allclose(res, ref)
|