_discrete_distns.py 65 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096
  1. #
  2. # Author: Travis Oliphant 2002-2011 with contributions from
  3. # SciPy Developers 2004-2011
  4. #
  5. from functools import partial
  6. from scipy import special
  7. from scipy.special import entr, logsumexp, betaln, gammaln as gamln
  8. import scipy.special._ufuncs as scu
  9. from scipy._lib._util import rng_integers
  10. import scipy._lib.array_api_extra as xpx
  11. from scipy.interpolate import interp1d
  12. from numpy import floor, ceil, log, exp, sqrt, log1p, expm1, tanh, cosh, sinh
  13. import numpy as np
  14. from ._distn_infrastructure import (rv_discrete, get_distribution_names,
  15. _vectorize_rvs_over_shapes,
  16. _ShapeInfo, _isintegral,
  17. rv_discrete_frozen)
  18. from ._biasedurn import (_PyFishersNCHypergeometric,
  19. _PyWalleniusNCHypergeometric,
  20. _PyStochasticLib3)
  21. from ._stats_pythran import _poisson_binom
  22. class binom_gen(rv_discrete):
  23. r"""A binomial discrete random variable.
  24. %(before_notes)s
  25. Notes
  26. -----
  27. The probability mass function for `binom` is:
  28. .. math::
  29. f(k) = \binom{n}{k} p^k (1-p)^{n-k}
  30. for :math:`k \in \{0, 1, \dots, n\}`, :math:`0 \leq p \leq 1`
  31. `binom` takes :math:`n` and :math:`p` as shape parameters,
  32. where :math:`p` is the probability of a single success
  33. and :math:`1-p` is the probability of a single failure.
  34. This distribution uses routines from the Boost Math C++ library for
  35. the computation of the ``pmf``, ``cdf``, ``sf``, ``ppf`` and ``isf``
  36. methods. [1]_
  37. %(after_notes)s
  38. References
  39. ----------
  40. .. [1] The Boost Developers. "Boost C++ Libraries". https://www.boost.org/.
  41. %(example)s
  42. See Also
  43. --------
  44. hypergeom, nbinom, nhypergeom
  45. """
  46. def _shape_info(self):
  47. return [_ShapeInfo("n", True, (0, np.inf), (True, False)),
  48. _ShapeInfo("p", False, (0, 1), (True, True))]
  49. def _rvs(self, n, p, size=None, random_state=None):
  50. return random_state.binomial(n, p, size)
  51. def _argcheck(self, n, p):
  52. return (n >= 0) & _isintegral(n) & (p >= 0) & (p <= 1)
  53. def _get_support(self, n, p):
  54. return self.a, n
  55. def _logpmf(self, x, n, p):
  56. k = floor(x)
  57. combiln = (gamln(n+1) - (gamln(k+1) + gamln(n-k+1)))
  58. return combiln + special.xlogy(k, p) + special.xlog1py(n-k, -p)
  59. def _pmf(self, x, n, p):
  60. # binom.pmf(k) = choose(n, k) * p**k * (1-p)**(n-k)
  61. return scu._binom_pmf(x, n, p)
  62. def _cdf(self, x, n, p):
  63. k = floor(x)
  64. return scu._binom_cdf(k, n, p)
  65. def _sf(self, x, n, p):
  66. k = floor(x)
  67. return scu._binom_sf(k, n, p)
  68. def _isf(self, x, n, p):
  69. return scu._binom_isf(x, n, p)
  70. def _ppf(self, q, n, p):
  71. return scu._binom_ppf(q, n, p)
  72. def _stats(self, n, p, moments='mv'):
  73. mu = n * p
  74. var = mu - n * np.square(p)
  75. g1, g2 = None, None
  76. if 's' in moments:
  77. pq = p - np.square(p)
  78. npq_sqrt = np.sqrt(n * pq)
  79. t1 = np.reciprocal(npq_sqrt)
  80. t2 = (2.0 * p) / npq_sqrt
  81. g1 = t1 - t2
  82. if 'k' in moments:
  83. pq = p - np.square(p)
  84. npq = n * pq
  85. t1 = np.reciprocal(npq)
  86. t2 = 6.0/n
  87. g2 = t1 - t2
  88. return mu, var, g1, g2
  89. def _entropy(self, n, p):
  90. k = np.r_[0:n + 1]
  91. vals = self._pmf(k, n, p)
  92. return np.sum(entr(vals), axis=0)
  93. binom = binom_gen(name='binom')
  94. class bernoulli_gen(binom_gen):
  95. r"""A Bernoulli discrete random variable.
  96. %(before_notes)s
  97. Notes
  98. -----
  99. The probability mass function for `bernoulli` is:
  100. .. math::
  101. f(k) = \begin{cases}1-p &\text{if } k = 0\\
  102. p &\text{if } k = 1\end{cases}
  103. for :math:`k` in :math:`\{0, 1\}`, :math:`0 \leq p \leq 1`
  104. `bernoulli` takes :math:`p` as shape parameter,
  105. where :math:`p` is the probability of a single success
  106. and :math:`1-p` is the probability of a single failure.
  107. %(after_notes)s
  108. %(example)s
  109. """
  110. def _shape_info(self):
  111. return [_ShapeInfo("p", False, (0, 1), (True, True))]
  112. def _rvs(self, p, size=None, random_state=None):
  113. return binom_gen._rvs(self, 1, p, size=size, random_state=random_state)
  114. def _argcheck(self, p):
  115. return (p >= 0) & (p <= 1)
  116. def _get_support(self, p):
  117. # Overrides binom_gen._get_support!x
  118. return self.a, self.b
  119. def _logpmf(self, x, p):
  120. return binom._logpmf(x, 1, p)
  121. def _pmf(self, x, p):
  122. # bernoulli.pmf(k) = 1-p if k = 0
  123. # = p if k = 1
  124. return binom._pmf(x, 1, p)
  125. def _cdf(self, x, p):
  126. return binom._cdf(x, 1, p)
  127. def _sf(self, x, p):
  128. return binom._sf(x, 1, p)
  129. def _isf(self, x, p):
  130. return binom._isf(x, 1, p)
  131. def _ppf(self, q, p):
  132. return binom._ppf(q, 1, p)
  133. def _stats(self, p):
  134. return binom._stats(1, p)
  135. def _entropy(self, p):
  136. return entr(p) + entr(1-p)
  137. bernoulli = bernoulli_gen(b=1, name='bernoulli')
  138. class betabinom_gen(rv_discrete):
  139. r"""A beta-binomial discrete random variable.
  140. %(before_notes)s
  141. Notes
  142. -----
  143. The beta-binomial distribution is a binomial distribution with a
  144. probability of success `p` that follows a beta distribution.
  145. The probability mass function for `betabinom` is:
  146. .. math::
  147. f(k) = \binom{n}{k} \frac{B(k + a, n - k + b)}{B(a, b)}
  148. for :math:`k \in \{0, 1, \dots, n\}`, :math:`n \geq 0`, :math:`a > 0`,
  149. :math:`b > 0`, where :math:`B(a, b)` is the beta function.
  150. `betabinom` takes :math:`n`, :math:`a`, and :math:`b` as shape parameters.
  151. %(after_notes)s
  152. References
  153. ----------
  154. .. [1] https://en.wikipedia.org/wiki/Beta-binomial_distribution
  155. .. versionadded:: 1.4.0
  156. See Also
  157. --------
  158. beta, binom
  159. %(example)s
  160. """
  161. def _shape_info(self):
  162. return [_ShapeInfo("n", True, (0, np.inf), (True, False)),
  163. _ShapeInfo("a", False, (0, np.inf), (False, False)),
  164. _ShapeInfo("b", False, (0, np.inf), (False, False))]
  165. def _rvs(self, n, a, b, size=None, random_state=None):
  166. p = random_state.beta(a, b, size)
  167. return random_state.binomial(n, p, size)
  168. def _get_support(self, n, a, b):
  169. return 0, n
  170. def _argcheck(self, n, a, b):
  171. return (n >= 0) & _isintegral(n) & (a > 0) & (b > 0)
  172. def _logpmf(self, x, n, a, b):
  173. k = floor(x)
  174. combiln = -log(n + 1) - betaln(n - k + 1, k + 1)
  175. return combiln + betaln(k + a, n - k + b) - betaln(a, b)
  176. def _pmf(self, x, n, a, b):
  177. return exp(self._logpmf(x, n, a, b))
  178. def _stats(self, n, a, b, moments='mv'):
  179. e_p = a / (a + b)
  180. e_q = 1 - e_p
  181. mu = n * e_p
  182. var = n * (a + b + n) * e_p * e_q / (a + b + 1)
  183. g1, g2 = None, None
  184. if 's' in moments:
  185. g1 = 1.0 / sqrt(var)
  186. g1 *= (a + b + 2 * n) * (b - a)
  187. g1 /= (a + b + 2) * (a + b)
  188. if 'k' in moments:
  189. g2 = (a + b).astype(e_p.dtype)
  190. g2 *= (a + b - 1 + 6 * n)
  191. g2 += 3 * a * b * (n - 2)
  192. g2 += 6 * n ** 2
  193. g2 -= 3 * e_p * b * n * (6 - n)
  194. g2 -= 18 * e_p * e_q * n ** 2
  195. g2 *= (a + b) ** 2 * (1 + a + b)
  196. g2 /= (n * a * b * (a + b + 2) * (a + b + 3) * (a + b + n))
  197. g2 -= 3
  198. return mu, var, g1, g2
  199. betabinom = betabinom_gen(name='betabinom')
  200. class nbinom_gen(rv_discrete):
  201. r"""A negative binomial discrete random variable.
  202. %(before_notes)s
  203. Notes
  204. -----
  205. Negative binomial distribution describes a sequence of i.i.d. Bernoulli
  206. trials, repeated until a predefined, non-random number of successes occurs.
  207. The probability mass function of the number of failures for `nbinom` is:
  208. .. math::
  209. f(k) = \binom{k+n-1}{n-1} p^n (1-p)^k
  210. for :math:`k \ge 0`, :math:`0 < p \leq 1`
  211. `nbinom` takes :math:`n` and :math:`p` as shape parameters where :math:`n`
  212. is the number of successes, :math:`p` is the probability of a single
  213. success, and :math:`1-p` is the probability of a single failure.
  214. Another common parameterization of the negative binomial distribution is
  215. in terms of the mean number of failures :math:`\mu` to achieve :math:`n`
  216. successes. The mean :math:`\mu` is related to the probability of success
  217. as
  218. .. math::
  219. p = \frac{n}{n + \mu}
  220. The number of successes :math:`n` may also be specified in terms of a
  221. "dispersion", "heterogeneity", or "aggregation" parameter :math:`\alpha`,
  222. which relates the mean :math:`\mu` to the variance :math:`\sigma^2`,
  223. e.g. :math:`\sigma^2 = \mu + \alpha \mu^2`. Regardless of the convention
  224. used for :math:`\alpha`,
  225. .. math::
  226. p &= \frac{\mu}{\sigma^2} \\
  227. n &= \frac{\mu^2}{\sigma^2 - \mu}
  228. This distribution uses routines from the Boost Math C++ library for
  229. the computation of the ``pmf``, ``cdf``, ``sf``, ``ppf``, ``isf``
  230. and ``stats`` methods. [1]_
  231. %(after_notes)s
  232. References
  233. ----------
  234. .. [1] The Boost Developers. "Boost C++ Libraries". https://www.boost.org/.
  235. %(example)s
  236. See Also
  237. --------
  238. hypergeom, binom, nhypergeom
  239. """
  240. def _shape_info(self):
  241. return [_ShapeInfo("n", True, (0, np.inf), (True, False)),
  242. _ShapeInfo("p", False, (0, 1), (True, True))]
  243. def _rvs(self, n, p, size=None, random_state=None):
  244. return random_state.negative_binomial(n, p, size)
  245. def _argcheck(self, n, p):
  246. return (n > 0) & (p > 0) & (p <= 1)
  247. def _pmf(self, x, n, p):
  248. # nbinom.pmf(k) = choose(k+n-1, n-1) * p**n * (1-p)**k
  249. return scu._nbinom_pmf(x, n, p)
  250. def _logpmf(self, x, n, p):
  251. coeff = gamln(n+x) - gamln(x+1) - gamln(n)
  252. return coeff + n*log(p) + special.xlog1py(x, -p)
  253. def _cdf(self, x, n, p):
  254. k = floor(x)
  255. return scu._nbinom_cdf(k, n, p)
  256. def _logcdf(self, x, n, p):
  257. k = floor(x)
  258. k, n, p = np.broadcast_arrays(k, n, p)
  259. cdf = self._cdf(k, n, p)
  260. cond = cdf > 0.5
  261. def f1(k, n, p):
  262. return np.log1p(-special.betainc(k + 1, n, 1 - p))
  263. # do calc in place
  264. logcdf = cdf
  265. with np.errstate(divide='ignore'):
  266. logcdf[cond] = f1(k[cond], n[cond], p[cond])
  267. logcdf[~cond] = np.log(cdf[~cond])
  268. return logcdf
  269. def _sf(self, x, n, p):
  270. k = floor(x)
  271. return scu._nbinom_sf(k, n, p)
  272. def _isf(self, x, n, p):
  273. with np.errstate(over='ignore'): # see gh-17432
  274. return scu._nbinom_isf(x, n, p)
  275. def _ppf(self, q, n, p):
  276. with np.errstate(over='ignore'): # see gh-17432
  277. return scu._nbinom_ppf(q, n, p)
  278. def _stats(self, n, p):
  279. return (
  280. scu._nbinom_mean(n, p),
  281. scu._nbinom_variance(n, p),
  282. scu._nbinom_skewness(n, p),
  283. scu._nbinom_kurtosis_excess(n, p),
  284. )
  285. nbinom = nbinom_gen(name='nbinom')
  286. class betanbinom_gen(rv_discrete):
  287. r"""A beta-negative-binomial discrete random variable.
  288. %(before_notes)s
  289. Notes
  290. -----
  291. The beta-negative-binomial distribution is a negative binomial
  292. distribution with a probability of success `p` that follows a
  293. beta distribution.
  294. The probability mass function for `betanbinom` is:
  295. .. math::
  296. f(k) = \binom{n + k - 1}{k} \frac{B(a + n, b + k)}{B(a, b)}
  297. for :math:`k \ge 0`, :math:`n \geq 0`, :math:`a > 0`,
  298. :math:`b > 0`, where :math:`B(a, b)` is the beta function.
  299. `betanbinom` takes :math:`n`, :math:`a`, and :math:`b` as shape parameters.
  300. %(after_notes)s
  301. References
  302. ----------
  303. .. [1] https://en.wikipedia.org/wiki/Beta_negative_binomial_distribution
  304. .. versionadded:: 1.12.0
  305. See Also
  306. --------
  307. betabinom : Beta binomial distribution
  308. %(example)s
  309. """
  310. def _shape_info(self):
  311. return [_ShapeInfo("n", True, (0, np.inf), (True, False)),
  312. _ShapeInfo("a", False, (0, np.inf), (False, False)),
  313. _ShapeInfo("b", False, (0, np.inf), (False, False))]
  314. def _rvs(self, n, a, b, size=None, random_state=None):
  315. p = random_state.beta(a, b, size)
  316. return random_state.negative_binomial(n, p, size)
  317. def _argcheck(self, n, a, b):
  318. return (n >= 0) & _isintegral(n) & (a > 0) & (b > 0)
  319. def _logpmf(self, x, n, a, b):
  320. k = floor(x)
  321. combiln = -np.log(n + k) - betaln(n, k + 1)
  322. return combiln + betaln(a + n, b + k) - betaln(a, b)
  323. def _pmf(self, x, n, a, b):
  324. return exp(self._logpmf(x, n, a, b))
  325. def _stats(self, n, a, b, moments='mv'):
  326. # reference: Wolfram Alpha input
  327. # BetaNegativeBinomialDistribution[a, b, n]
  328. def mean(n, a, b):
  329. return n * b / (a - 1.)
  330. mu = xpx.apply_where(a > 1, (n, a, b), mean, fill_value=np.inf)
  331. def var(n, a, b):
  332. return (n * b * (n + a - 1.) * (a + b - 1.)
  333. / ((a - 2.) * (a - 1.)**2.))
  334. var = xpx.apply_where(a > 2, (n, a, b), var, fill_value=np.inf)
  335. g1, g2 = None, None
  336. def skew(n, a, b):
  337. return ((2 * n + a - 1.) * (2 * b + a - 1.)
  338. / (a - 3.) / sqrt(n * b * (n + a - 1.) * (b + a - 1.)
  339. / (a - 2.)))
  340. if 's' in moments:
  341. g1 = xpx.apply_where(a > 3, (n, a, b), skew, fill_value=np.inf)
  342. def kurtosis(n, a, b):
  343. term = (a - 2.)
  344. term_2 = ((a - 1.)**2. * (a**2. + a * (6 * b - 1.)
  345. + 6. * (b - 1.) * b)
  346. + 3. * n**2. * ((a + 5.) * b**2. + (a + 5.)
  347. * (a - 1.) * b + 2. * (a - 1.)**2)
  348. + 3 * (a - 1.) * n
  349. * ((a + 5.) * b**2. + (a + 5.) * (a - 1.) * b
  350. + 2. * (a - 1.)**2.))
  351. denominator = ((a - 4.) * (a - 3.) * b * n
  352. * (a + b - 1.) * (a + n - 1.))
  353. # Wolfram Alpha uses Pearson kurtosis, so we subtract 3 to get
  354. # scipy's Fisher kurtosis
  355. return term * term_2 / denominator - 3.
  356. if 'k' in moments:
  357. g2 = xpx.apply_where(a > 4, (n, a, b), kurtosis, fill_value=np.inf)
  358. return mu, var, g1, g2
  359. betanbinom = betanbinom_gen(name='betanbinom')
  360. class geom_gen(rv_discrete):
  361. r"""A geometric discrete random variable.
  362. %(before_notes)s
  363. Notes
  364. -----
  365. The probability mass function for `geom` is:
  366. .. math::
  367. f(k) = (1-p)^{k-1} p
  368. for :math:`k \ge 1`, :math:`0 < p \leq 1`
  369. `geom` takes :math:`p` as shape parameter,
  370. where :math:`p` is the probability of a single success
  371. and :math:`1-p` is the probability of a single failure.
  372. Note that when drawing random samples, the probability of observations that exceed
  373. ``np.iinfo(np.int64).max`` increases rapidly as $p$ decreases below $10^{-17}$. For
  374. $p < 10^{-20}$, almost all observations would exceed the maximum ``int64``; however,
  375. the output dtype is always ``int64``, so these values are clipped to the maximum.
  376. %(after_notes)s
  377. See Also
  378. --------
  379. planck
  380. %(example)s
  381. """
  382. def _shape_info(self):
  383. return [_ShapeInfo("p", False, (0, 1), (True, True))]
  384. def _rvs(self, p, size=None, random_state=None):
  385. res = random_state.geometric(p, size=size)
  386. # RandomState.geometric can wrap around to negative values; make behavior
  387. # consistent with Generator.geometric by replacing with maximum integer.
  388. max_int = np.iinfo(res.dtype).max
  389. return np.where(res < 0, max_int, res)
  390. def _argcheck(self, p):
  391. return (p <= 1) & (p > 0)
  392. def _pmf(self, k, p):
  393. return np.power(1-p, k-1) * p
  394. def _logpmf(self, k, p):
  395. return special.xlog1py(k - 1, -p) + log(p)
  396. def _cdf(self, x, p):
  397. k = floor(x)
  398. return -expm1(log1p(-p)*k)
  399. def _sf(self, x, p):
  400. return np.exp(self._logsf(x, p))
  401. def _logsf(self, x, p):
  402. k = floor(x)
  403. return k*log1p(-p)
  404. def _ppf(self, q, p):
  405. vals = ceil(log1p(-q) / log1p(-p))
  406. temp = self._cdf(vals-1, p)
  407. return np.where((temp >= q) & (vals > 0), vals-1, vals)
  408. def _stats(self, p):
  409. mu = 1.0/p
  410. qr = 1.0-p
  411. var = qr / p / p
  412. g1 = (2.0-p) / sqrt(qr)
  413. g2 = np.polyval([1, -6, 6], p)/(1.0-p)
  414. return mu, var, g1, g2
  415. def _entropy(self, p):
  416. return -np.log(p) - np.log1p(-p) * (1.0-p) / p
  417. geom = geom_gen(a=1, name='geom', longname="A geometric")
  418. class hypergeom_gen(rv_discrete):
  419. r"""A hypergeometric discrete random variable.
  420. The hypergeometric distribution models drawing objects from a bin.
  421. `M` is the total number of objects, `n` is total number of Type I objects.
  422. The random variate represents the number of Type I objects in `N` drawn
  423. without replacement from the total population.
  424. %(before_notes)s
  425. Notes
  426. -----
  427. The symbols used to denote the shape parameters (`M`, `n`, and `N`) are not
  428. universally accepted. See the Examples for a clarification of the
  429. definitions used here.
  430. The probability mass function is defined as,
  431. .. math:: p(k, M, n, N) = \frac{\binom{n}{k} \binom{M - n}{N - k}}
  432. {\binom{M}{N}}
  433. for :math:`k \in [\max(0, N - M + n), \min(n, N)]`, where the binomial
  434. coefficients are defined as,
  435. .. math:: \binom{n}{k} \equiv \frac{n!}{k! (n - k)!}.
  436. This distribution uses routines from the Boost Math C++ library for
  437. the computation of the ``pmf``, ``cdf``, ``sf`` and ``stats`` methods. [1]_
  438. %(after_notes)s
  439. References
  440. ----------
  441. .. [1] The Boost Developers. "Boost C++ Libraries". https://www.boost.org/.
  442. Examples
  443. --------
  444. >>> import numpy as np
  445. >>> from scipy.stats import hypergeom
  446. >>> import matplotlib.pyplot as plt
  447. Suppose we have a collection of 20 animals, of which 7 are dogs. Then if
  448. we want to know the probability of finding a given number of dogs if we
  449. choose at random 12 of the 20 animals, we can initialize a frozen
  450. distribution and plot the probability mass function:
  451. >>> [M, n, N] = [20, 7, 12]
  452. >>> rv = hypergeom(M, n, N)
  453. >>> x = np.arange(0, n+1)
  454. >>> pmf_dogs = rv.pmf(x)
  455. >>> fig = plt.figure()
  456. >>> ax = fig.add_subplot(111)
  457. >>> ax.plot(x, pmf_dogs, 'bo')
  458. >>> ax.vlines(x, 0, pmf_dogs, lw=2)
  459. >>> ax.set_xlabel('# of dogs in our group of chosen animals')
  460. >>> ax.set_ylabel('hypergeom PMF')
  461. >>> plt.show()
  462. Instead of using a frozen distribution we can also use `hypergeom`
  463. methods directly. To for example obtain the cumulative distribution
  464. function, use:
  465. >>> prb = hypergeom.cdf(x, M, n, N)
  466. And to generate random numbers:
  467. >>> R = hypergeom.rvs(M, n, N, size=10)
  468. See Also
  469. --------
  470. nhypergeom, binom, nbinom
  471. """
  472. def _shape_info(self):
  473. return [_ShapeInfo("M", True, (0, np.inf), (True, False)),
  474. _ShapeInfo("n", True, (0, np.inf), (True, False)),
  475. _ShapeInfo("N", True, (0, np.inf), (True, False))]
  476. def _rvs(self, M, n, N, size=None, random_state=None):
  477. return random_state.hypergeometric(n, M-n, N, size=size)
  478. def _get_support(self, M, n, N):
  479. return np.maximum(N-(M-n), 0), np.minimum(n, N)
  480. def _argcheck(self, M, n, N):
  481. cond = (M > 0) & (n >= 0) & (N >= 0)
  482. cond &= (n <= M) & (N <= M)
  483. cond &= _isintegral(M) & _isintegral(n) & _isintegral(N)
  484. return cond
  485. def _logpmf(self, k, M, n, N):
  486. tot, good = M, n
  487. bad = tot - good
  488. result = (betaln(good+1, 1) + betaln(bad+1, 1) + betaln(tot-N+1, N+1) -
  489. betaln(k+1, good-k+1) - betaln(N-k+1, bad-N+k+1) -
  490. betaln(tot+1, 1))
  491. return result
  492. def _pmf(self, k, M, n, N):
  493. return scu._hypergeom_pmf(k, n, N, M)
  494. def _cdf(self, k, M, n, N):
  495. return scu._hypergeom_cdf(k, n, N, M)
  496. def _stats(self, M, n, N):
  497. M, n, N = 1. * M, 1. * n, 1. * N
  498. m = M - n
  499. # Boost kurtosis_excess doesn't return the same as the value
  500. # computed here.
  501. g2 = M * (M + 1) - 6. * N * (M - N) - 6. * n * m
  502. g2 *= (M - 1) * M * M
  503. g2 += 6. * n * N * (M - N) * m * (5. * M - 6)
  504. g2 /= n * N * (M - N) * m * (M - 2.) * (M - 3.)
  505. return (
  506. scu._hypergeom_mean(n, N, M),
  507. scu._hypergeom_variance(n, N, M),
  508. scu._hypergeom_skewness(n, N, M),
  509. g2,
  510. )
  511. def _entropy(self, M, n, N):
  512. k = np.r_[N - (M - n):min(n, N) + 1]
  513. vals = self.pmf(k, M, n, N)
  514. return np.sum(entr(vals), axis=0)
  515. def _sf(self, k, M, n, N):
  516. return scu._hypergeom_sf(k, n, N, M)
  517. def _logsf(self, k, M, n, N):
  518. res = []
  519. for quant, tot, good, draw in zip(*np.broadcast_arrays(k, M, n, N)):
  520. if (quant + 0.5) * (tot + 0.5) < (good - 0.5) * (draw - 0.5):
  521. # Less terms to sum if we calculate log(1-cdf)
  522. res.append(log1p(-exp(self.logcdf(quant, tot, good, draw))))
  523. else:
  524. # Integration over probability mass function using logsumexp
  525. k2 = np.arange(quant + 1, draw + 1)
  526. res.append(logsumexp(self._logpmf(k2, tot, good, draw)))
  527. return np.asarray(res)
  528. def _logcdf(self, k, M, n, N):
  529. res = []
  530. for quant, tot, good, draw in zip(*np.broadcast_arrays(k, M, n, N)):
  531. if (quant + 0.5) * (tot + 0.5) > (good - 0.5) * (draw - 0.5):
  532. # Less terms to sum if we calculate log(1-sf)
  533. res.append(log1p(-exp(self.logsf(quant, tot, good, draw))))
  534. else:
  535. # Integration over probability mass function using logsumexp
  536. k2 = np.arange(0, quant + 1)
  537. res.append(logsumexp(self._logpmf(k2, tot, good, draw)))
  538. return np.asarray(res)
  539. hypergeom = hypergeom_gen(name='hypergeom')
  540. class nhypergeom_gen(rv_discrete):
  541. r"""A negative hypergeometric discrete random variable.
  542. Consider a box containing :math:`M` balls:, :math:`n` red and
  543. :math:`M-n` blue. We randomly sample balls from the box, one
  544. at a time and *without* replacement, until we have picked :math:`r`
  545. blue balls. `nhypergeom` is the distribution of the number of
  546. red balls :math:`k` we have picked.
  547. %(before_notes)s
  548. Notes
  549. -----
  550. The symbols used to denote the shape parameters (`M`, `n`, and `r`) are not
  551. universally accepted. See the Examples for a clarification of the
  552. definitions used here.
  553. The probability mass function is defined as,
  554. .. math:: f(k; M, n, r) = \frac{{{k+r-1}\choose{k}}{{M-r-k}\choose{n-k}}}
  555. {{M \choose n}}
  556. for :math:`k \in [0, n]`, :math:`n \in [0, M]`, :math:`r \in [0, M-n]`,
  557. and the binomial coefficient is:
  558. .. math:: \binom{n}{k} \equiv \frac{n!}{k! (n - k)!}.
  559. It is equivalent to observing :math:`k` successes in :math:`k+r-1`
  560. samples with :math:`k+r`'th sample being a failure. The former
  561. can be modelled as a hypergeometric distribution. The probability
  562. of the latter is simply the number of failures remaining
  563. :math:`M-n-(r-1)` divided by the size of the remaining population
  564. :math:`M-(k+r-1)`. This relationship can be shown as:
  565. .. math:: NHG(k;M,n,r) = HG(k;M,n,k+r-1)\frac{(M-n-(r-1))}{(M-(k+r-1))}
  566. where :math:`NHG` is probability mass function (PMF) of the
  567. negative hypergeometric distribution and :math:`HG` is the
  568. PMF of the hypergeometric distribution.
  569. %(after_notes)s
  570. Examples
  571. --------
  572. >>> import numpy as np
  573. >>> from scipy.stats import nhypergeom
  574. >>> import matplotlib.pyplot as plt
  575. Suppose we have a collection of 20 animals, of which 7 are dogs.
  576. Then if we want to know the probability of finding a given number
  577. of dogs (successes) in a sample with exactly 12 animals that
  578. aren't dogs (failures), we can initialize a frozen distribution
  579. and plot the probability mass function:
  580. >>> M, n, r = [20, 7, 12]
  581. >>> rv = nhypergeom(M, n, r)
  582. >>> x = np.arange(0, n+2)
  583. >>> pmf_dogs = rv.pmf(x)
  584. >>> fig = plt.figure()
  585. >>> ax = fig.add_subplot(111)
  586. >>> ax.plot(x, pmf_dogs, 'bo')
  587. >>> ax.vlines(x, 0, pmf_dogs, lw=2)
  588. >>> ax.set_xlabel('# of dogs in our group with given 12 failures')
  589. >>> ax.set_ylabel('nhypergeom PMF')
  590. >>> plt.show()
  591. Instead of using a frozen distribution we can also use `nhypergeom`
  592. methods directly. To for example obtain the probability mass
  593. function, use:
  594. >>> prb = nhypergeom.pmf(x, M, n, r)
  595. And to generate random numbers:
  596. >>> R = nhypergeom.rvs(M, n, r, size=10)
  597. To verify the relationship between `hypergeom` and `nhypergeom`, use:
  598. >>> from scipy.stats import hypergeom, nhypergeom
  599. >>> M, n, r = 45, 13, 8
  600. >>> k = 6
  601. >>> nhypergeom.pmf(k, M, n, r)
  602. 0.06180776620271643
  603. >>> hypergeom.pmf(k, M, n, k+r-1) * (M - n - (r-1)) / (M - (k+r-1))
  604. 0.06180776620271644
  605. See Also
  606. --------
  607. hypergeom, binom, nbinom
  608. References
  609. ----------
  610. .. [1] Negative Hypergeometric Distribution on Wikipedia
  611. https://en.wikipedia.org/wiki/Negative_hypergeometric_distribution
  612. .. [2] Negative Hypergeometric Distribution from
  613. http://www.math.wm.edu/~leemis/chart/UDR/PDFs/Negativehypergeometric.pdf
  614. """
  615. def _shape_info(self):
  616. return [_ShapeInfo("M", True, (0, np.inf), (True, False)),
  617. _ShapeInfo("n", True, (0, np.inf), (True, False)),
  618. _ShapeInfo("r", True, (0, np.inf), (True, False))]
  619. def _get_support(self, M, n, r):
  620. return 0, n
  621. def _argcheck(self, M, n, r):
  622. cond = (n >= 0) & (n <= M) & (r >= 0) & (r <= M-n)
  623. cond &= _isintegral(M) & _isintegral(n) & _isintegral(r)
  624. return cond
  625. def _rvs(self, M, n, r, size=None, random_state=None):
  626. @_vectorize_rvs_over_shapes
  627. def _rvs1(M, n, r, size, random_state):
  628. # invert cdf by calculating all values in support, scalar M, n, r
  629. a, b = self.support(M, n, r)
  630. ks = np.arange(a, b+1)
  631. cdf = self.cdf(ks, M, n, r)
  632. ppf = interp1d(cdf, ks, kind='next', fill_value='extrapolate')
  633. rvs = ppf(random_state.uniform(size=size)).astype(int)
  634. if size is None:
  635. return rvs.item()
  636. return rvs
  637. return _rvs1(M, n, r, size=size, random_state=random_state)
  638. def _logpmf(self, k, M, n, r):
  639. return xpx.apply_where(
  640. (r != 0) | (k != 0), (k, M, n, r),
  641. lambda k, M, n, r:
  642. (-betaln(k+1, r) + betaln(k+r, 1)
  643. - betaln(n-k+1, M-r-n+1) + betaln(M-r-k+1, 1)
  644. + betaln(n+1, M-n+1) - betaln(M+1, 1)),
  645. fill_value=0.0)
  646. def _pmf(self, k, M, n, r):
  647. # same as the following but numerically more precise
  648. # return comb(k+r-1, k) * comb(M-r-k, n-k) / comb(M, n)
  649. return exp(self._logpmf(k, M, n, r))
  650. def _stats(self, M, n, r):
  651. # Promote the datatype to at least float
  652. # mu = rn / (M-n+1)
  653. M, n, r = 1.*M, 1.*n, 1.*r
  654. mu = r*n / (M-n+1)
  655. var = r*(M+1)*n / ((M-n+1)*(M-n+2)) * (1 - r / (M-n+1))
  656. # The skew and kurtosis are mathematically
  657. # intractable so return `None`. See [2]_.
  658. g1, g2 = None, None
  659. return mu, var, g1, g2
  660. nhypergeom = nhypergeom_gen(name='nhypergeom')
  661. # FIXME: Fails _cdfvec
  662. class logser_gen(rv_discrete):
  663. r"""A Logarithmic (Log-Series, Series) discrete random variable.
  664. %(before_notes)s
  665. Notes
  666. -----
  667. The probability mass function for `logser` is:
  668. .. math::
  669. f(k) = - \frac{p^k}{k \log(1-p)}
  670. for :math:`k \ge 1`, :math:`0 < p < 1`
  671. `logser` takes :math:`p` as shape parameter,
  672. where :math:`p` is the probability of a single success
  673. and :math:`1-p` is the probability of a single failure.
  674. %(after_notes)s
  675. %(example)s
  676. """
  677. def _shape_info(self):
  678. return [_ShapeInfo("p", False, (0, 1), (True, True))]
  679. def _rvs(self, p, size=None, random_state=None):
  680. # looks wrong for p>0.5, too few k=1
  681. # trying to use generic is worse, no k=1 at all
  682. return random_state.logseries(p, size=size)
  683. def _argcheck(self, p):
  684. return (p > 0) & (p < 1)
  685. def _pmf(self, k, p):
  686. # logser.pmf(k) = - p**k / (k*log(1-p))
  687. return -np.power(p, k) * 1.0 / k / special.log1p(-p)
  688. def _sf(self, k, p):
  689. tiny = 1e-100
  690. # Ideally, this is the unregularized beta function with `b=0`. We don't have
  691. # an unregularized beta function (yet, although we could get it from Boost),
  692. # and neither technically support `b=0` - despite the function being accurate
  693. # for `b` super close to zero. See https://github.com/scipy/scipy/issues/3890.
  694. return -special.betainc(k+1, tiny, p) * special.beta(k+1, tiny) / np.log1p(-p)
  695. def _stats(self, p):
  696. r = special.log1p(-p)
  697. mu = p / (p - 1.0) / r
  698. mu2p = -p / r / (p - 1.0)**2
  699. var = mu2p - mu*mu
  700. mu3p = -p / r * (1.0+p) / (1.0 - p)**3
  701. mu3 = mu3p - 3*mu*mu2p + 2*mu**3
  702. g1 = mu3 / np.power(var, 1.5)
  703. mu4p = -p / r * (
  704. 1.0 / (p-1)**2 - 6*p / (p - 1)**3 + 6*p*p / (p-1)**4)
  705. mu4 = mu4p - 4*mu3p*mu + 6*mu2p*mu*mu - 3*mu**4
  706. g2 = mu4 / var**2 - 3.0
  707. return mu, var, g1, g2
  708. logser = logser_gen(a=1, name='logser', longname='A logarithmic')
  709. class poisson_gen(rv_discrete):
  710. r"""A Poisson discrete random variable.
  711. %(before_notes)s
  712. Notes
  713. -----
  714. The probability mass function for `poisson` is:
  715. .. math::
  716. f(k) = \exp(-\mu) \frac{\mu^k}{k!}
  717. for :math:`k \ge 0`.
  718. `poisson` takes :math:`\mu \geq 0` as shape parameter.
  719. When :math:`\mu = 0`, the ``pmf`` method
  720. returns ``1.0`` at quantile :math:`k = 0`.
  721. %(after_notes)s
  722. %(example)s
  723. """
  724. def _shape_info(self):
  725. return [_ShapeInfo("mu", False, (0, np.inf), (True, False))]
  726. # Override rv_discrete._argcheck to allow mu=0.
  727. def _argcheck(self, mu):
  728. return mu >= 0
  729. def _rvs(self, mu, size=None, random_state=None):
  730. return random_state.poisson(mu, size)
  731. def _logpmf(self, k, mu):
  732. Pk = special.xlogy(k, mu) - gamln(k + 1) - mu
  733. return Pk
  734. def _pmf(self, k, mu):
  735. # poisson.pmf(k) = exp(-mu) * mu**k / k!
  736. return exp(self._logpmf(k, mu))
  737. def _cdf(self, x, mu):
  738. k = floor(x)
  739. return special.pdtr(k, mu)
  740. def _sf(self, x, mu):
  741. k = floor(x)
  742. return special.pdtrc(k, mu)
  743. def _ppf(self, q, mu):
  744. vals = ceil(special.pdtrik(q, mu))
  745. vals1 = np.maximum(vals - 1, 0)
  746. temp = special.pdtr(vals1, mu)
  747. return np.where(temp >= q, vals1, vals)
  748. def _stats(self, mu):
  749. var = mu
  750. tmp = np.asarray(mu)
  751. mu_nonzero = tmp > 0
  752. g1 = xpx.apply_where(mu_nonzero, tmp, lambda x: sqrt(1.0/x), fill_value=np.inf)
  753. g2 = xpx.apply_where(mu_nonzero, tmp, lambda x: 1.0/x, fill_value=np.inf)
  754. return mu, var, g1, g2
  755. poisson = poisson_gen(name="poisson", longname='A Poisson')
  756. class planck_gen(rv_discrete):
  757. r"""A Planck discrete exponential random variable.
  758. %(before_notes)s
  759. Notes
  760. -----
  761. The probability mass function for `planck` is:
  762. .. math::
  763. f(k) = (1-\exp(-\lambda)) \exp(-\lambda k)
  764. for :math:`k \ge 0` and :math:`\lambda > 0`.
  765. `planck` takes :math:`\lambda` as shape parameter. The Planck distribution
  766. can be written as a geometric distribution (`geom`) with
  767. :math:`p = 1 - \exp(-\lambda)` shifted by ``loc = -1``.
  768. %(after_notes)s
  769. See Also
  770. --------
  771. geom
  772. %(example)s
  773. """
  774. def _shape_info(self):
  775. return [_ShapeInfo("lambda_", False, (0, np.inf), (False, False))]
  776. def _argcheck(self, lambda_):
  777. return lambda_ > 0
  778. def _pmf(self, k, lambda_):
  779. return -expm1(-lambda_)*exp(-lambda_*k)
  780. def _cdf(self, x, lambda_):
  781. k = floor(x)
  782. return -expm1(-lambda_*(k+1))
  783. def _sf(self, x, lambda_):
  784. return exp(self._logsf(x, lambda_))
  785. def _logsf(self, x, lambda_):
  786. k = floor(x)
  787. return -lambda_*(k+1)
  788. def _ppf(self, q, lambda_):
  789. vals = ceil(-1.0/lambda_ * log1p(-q)-1)
  790. vals1 = (vals-1).clip(*(self._get_support(lambda_)))
  791. temp = self._cdf(vals1, lambda_)
  792. return np.where(temp >= q, vals1, vals)
  793. def _rvs(self, lambda_, size=None, random_state=None):
  794. # use relation to geometric distribution for sampling
  795. p = -expm1(-lambda_)
  796. return random_state.geometric(p, size=size) - 1.0
  797. def _stats(self, lambda_):
  798. mu = 1/expm1(lambda_)
  799. var = exp(-lambda_)/(expm1(-lambda_))**2
  800. g1 = 2*cosh(lambda_/2.0)
  801. g2 = 4+2*cosh(lambda_)
  802. return mu, var, g1, g2
  803. def _entropy(self, lambda_):
  804. C = -expm1(-lambda_)
  805. return lambda_*exp(-lambda_)/C - log(C)
  806. planck = planck_gen(a=0, name='planck', longname='A discrete exponential ')
  807. class boltzmann_gen(rv_discrete):
  808. r"""A Boltzmann (Truncated Discrete Exponential) random variable.
  809. %(before_notes)s
  810. Notes
  811. -----
  812. The probability mass function for `boltzmann` is:
  813. .. math::
  814. f(k) = (1-\exp(-\lambda)) \exp(-\lambda k) / (1-\exp(-\lambda N))
  815. for :math:`k = 0,..., N-1`.
  816. `boltzmann` takes :math:`\lambda > 0` and :math:`N > 0` as shape parameters.
  817. %(after_notes)s
  818. %(example)s
  819. """
  820. def _shape_info(self):
  821. return [_ShapeInfo("lambda_", False, (0, np.inf), (False, False)),
  822. _ShapeInfo("N", True, (0, np.inf), (False, False))]
  823. def _argcheck(self, lambda_, N):
  824. return (lambda_ > 0) & (N > 0) & _isintegral(N)
  825. def _get_support(self, lambda_, N):
  826. return self.a, N - 1
  827. def _pmf(self, k, lambda_, N):
  828. # boltzmann.pmf(k) =
  829. # (1-exp(-lambda_)*exp(-lambda_*k)/(1-exp(-lambda_*N))
  830. fact = (1-exp(-lambda_))/(1-exp(-lambda_*N))
  831. return fact*exp(-lambda_*k)
  832. def _cdf(self, x, lambda_, N):
  833. k = floor(x)
  834. return (1-exp(-lambda_*(k+1)))/(1-exp(-lambda_*N))
  835. def _ppf(self, q, lambda_, N):
  836. qnew = q*(1-exp(-lambda_*N))
  837. vals = ceil(-1.0/lambda_ * log(1-qnew)-1)
  838. vals1 = (vals-1).clip(0.0, np.inf)
  839. temp = self._cdf(vals1, lambda_, N)
  840. return np.where(temp >= q, vals1, vals)
  841. def _stats(self, lambda_, N):
  842. z = exp(-lambda_)
  843. zN = exp(-lambda_*N)
  844. mu = z/(1.0-z)-N*zN/(1-zN)
  845. var = z/(1.0-z)**2 - N*N*zN/(1-zN)**2
  846. trm = (1-zN)/(1-z)
  847. trm2 = (z*trm**2 - N*N*zN)
  848. g1 = z*(1+z)*trm**3 - N**3*zN*(1+zN)
  849. g1 = g1 / trm2**(1.5)
  850. g2 = z*(1+4*z+z*z)*trm**4 - N**4 * zN*(1+4*zN+zN*zN)
  851. g2 = g2 / trm2 / trm2
  852. return mu, var, g1, g2
  853. boltzmann = boltzmann_gen(name='boltzmann', a=0,
  854. longname='A truncated discrete exponential ')
  855. class randint_gen(rv_discrete):
  856. r"""A uniform discrete random variable.
  857. %(before_notes)s
  858. Notes
  859. -----
  860. The probability mass function for `randint` is:
  861. .. math::
  862. f(k) = \frac{1}{\texttt{high} - \texttt{low}}
  863. for :math:`k \in \{\texttt{low}, \dots, \texttt{high} - 1\}`.
  864. `randint` takes :math:`\texttt{low}` and :math:`\texttt{high}` as shape
  865. parameters.
  866. %(after_notes)s
  867. Examples
  868. --------
  869. >>> import numpy as np
  870. >>> from scipy.stats import randint
  871. >>> import matplotlib.pyplot as plt
  872. >>> fig, ax = plt.subplots(1, 1)
  873. Calculate the first four moments:
  874. >>> low, high = 7, 31
  875. >>> mean, var, skew, kurt = randint.stats(low, high, moments='mvsk')
  876. Display the probability mass function (``pmf``):
  877. >>> x = np.arange(low - 5, high + 5)
  878. >>> ax.plot(x, randint.pmf(x, low, high), 'bo', ms=8, label='randint pmf')
  879. >>> ax.vlines(x, 0, randint.pmf(x, low, high), colors='b', lw=5, alpha=0.5)
  880. Alternatively, the distribution object can be called (as a function) to
  881. fix the shape and location. This returns a "frozen" RV object holding the
  882. given parameters fixed.
  883. Freeze the distribution and display the frozen ``pmf``:
  884. >>> rv = randint(low, high)
  885. >>> ax.vlines(x, 0, rv.pmf(x), colors='k', linestyles='-',
  886. ... lw=1, label='frozen pmf')
  887. >>> ax.legend(loc='lower center')
  888. >>> plt.show()
  889. Check the relationship between the cumulative distribution function
  890. (``cdf``) and its inverse, the percent point function (``ppf``):
  891. >>> q = np.arange(low, high)
  892. >>> p = randint.cdf(q, low, high)
  893. >>> np.allclose(q, randint.ppf(p, low, high))
  894. True
  895. Generate random numbers:
  896. >>> r = randint.rvs(low, high, size=1000)
  897. """
  898. def _shape_info(self):
  899. return [_ShapeInfo("low", True, (-np.inf, np.inf), (False, False)),
  900. _ShapeInfo("high", True, (-np.inf, np.inf), (False, False))]
  901. def _argcheck(self, low, high):
  902. return (high > low) & _isintegral(low) & _isintegral(high)
  903. def _get_support(self, low, high):
  904. return low, high-1
  905. def _pmf(self, k, low, high):
  906. # randint.pmf(k) = 1./(high - low)
  907. p = np.ones_like(k) / (np.asarray(high, dtype=np.int64) - low)
  908. return np.where((k >= low) & (k < high), p, 0.)
  909. def _cdf(self, x, low, high):
  910. k = floor(x)
  911. return (k - low + 1.) / (high - low)
  912. def _ppf(self, q, low, high):
  913. vals = ceil(q * (high - low) + low) - 1
  914. vals1 = (vals - 1).clip(low, high)
  915. temp = self._cdf(vals1, low, high)
  916. return np.where(temp >= q, vals1, vals)
  917. def _stats(self, low, high):
  918. m2, m1 = np.asarray(high), np.asarray(low)
  919. mu = (m2 + m1 - 1.0) / 2
  920. d = m2 - m1
  921. var = (d*d - 1) / 12.0
  922. g1 = 0.0
  923. g2 = -6.0/5.0 * (d*d + 1.0) / (d*d - 1.0)
  924. return mu, var, g1, g2
  925. def _rvs(self, low, high, size=None, random_state=None):
  926. """An array of *size* random integers >= ``low`` and < ``high``."""
  927. if np.asarray(low).size == 1 and np.asarray(high).size == 1:
  928. # no need to vectorize in that case
  929. return rng_integers(random_state, low, high, size=size)
  930. if size is not None:
  931. # NumPy's RandomState.randint() doesn't broadcast its arguments.
  932. # Use `broadcast_to()` to extend the shapes of low and high
  933. # up to size. Then we can use the numpy.vectorize'd
  934. # randint without needing to pass it a `size` argument.
  935. low = np.broadcast_to(low, size)
  936. high = np.broadcast_to(high, size)
  937. randint = np.vectorize(partial(rng_integers, random_state),
  938. otypes=[np.dtype(int)])
  939. return randint(low, high)
  940. def _entropy(self, low, high):
  941. return log(high - low)
  942. randint = randint_gen(name='randint', longname='A discrete uniform '
  943. '(random integer)')
  944. # FIXME: problems sampling.
  945. class zipf_gen(rv_discrete):
  946. r"""A Zipf (Zeta) discrete random variable.
  947. %(before_notes)s
  948. See Also
  949. --------
  950. zipfian
  951. Notes
  952. -----
  953. The probability mass function for `zipf` is:
  954. .. math::
  955. f(k, a) = \frac{1}{\zeta(a) k^a}
  956. for :math:`k \ge 1`, :math:`a > 1`.
  957. `zipf` takes :math:`a > 1` as shape parameter. :math:`\zeta` is the
  958. Riemann zeta function (`scipy.special.zeta`)
  959. The Zipf distribution is also known as the zeta distribution, which is
  960. a special case of the Zipfian distribution (`zipfian`).
  961. %(after_notes)s
  962. References
  963. ----------
  964. .. [1] "Zeta Distribution", Wikipedia,
  965. https://en.wikipedia.org/wiki/Zeta_distribution
  966. %(example)s
  967. Confirm that `zipf` is the large `n` limit of `zipfian`.
  968. >>> import numpy as np
  969. >>> from scipy.stats import zipf, zipfian
  970. >>> k = np.arange(11)
  971. >>> np.allclose(zipf.pmf(k, a), zipfian.pmf(k, a, n=10000000))
  972. True
  973. """
  974. def _shape_info(self):
  975. return [_ShapeInfo("a", False, (1, np.inf), (False, False))]
  976. def _rvs(self, a, size=None, random_state=None):
  977. return random_state.zipf(a, size=size)
  978. def _argcheck(self, a):
  979. return a > 1
  980. def _pmf(self, k, a):
  981. k = k.astype(np.float64)
  982. # zipf.pmf(k, a) = 1/(zeta(a) * k**a)
  983. Pk = 1.0 / special.zeta(a, 1) * k**-a
  984. return Pk
  985. def _munp(self, n, a):
  986. return xpx.apply_where(
  987. a > n + 1, (a, n),
  988. lambda a, n: special.zeta(a - n, 1) / special.zeta(a, 1),
  989. fill_value=np.inf)
  990. zipf = zipf_gen(a=1, name='zipf', longname='A Zipf')
  991. class zipfian_gen(rv_discrete):
  992. r"""A Zipfian discrete random variable.
  993. %(before_notes)s
  994. See Also
  995. --------
  996. zipf
  997. Notes
  998. -----
  999. The probability mass function for `zipfian` is:
  1000. .. math::
  1001. f(k, a, n) = \frac{1}{H_{n,a} k^a}
  1002. for :math:`k \in \{1, 2, \dots, n-1, n\}`, :math:`a \ge 0`,
  1003. :math:`n \in \{1, 2, 3, \dots\}`.
  1004. `zipfian` takes :math:`a` and :math:`n` as shape parameters.
  1005. :math:`H_{n,a}` is the :math:`n`:sup:`th` generalized harmonic
  1006. number of order :math:`a`.
  1007. The SciPy implementation of this distribution requires :math:`1 \le n \le 2^{53}`.
  1008. For larger values of :math:`n`, the `zipfian` methods (`pmf`, `cdf`, `mean`, etc.)
  1009. will return `nan`.
  1010. When :math:`a > 1`, the Zipfian distribution reduces to the Zipf (zeta)
  1011. distribution as :math:`n \rightarrow \infty`.
  1012. %(after_notes)s
  1013. References
  1014. ----------
  1015. .. [1] "Zipf's Law", Wikipedia, https://en.wikipedia.org/wiki/Zipf's_law
  1016. .. [2] Larry Leemis, "Zipf Distribution", Univariate Distribution
  1017. Relationships. http://www.math.wm.edu/~leemis/chart/UDR/PDFs/Zipf.pdf
  1018. %(example)s
  1019. Confirm that `zipfian` reduces to `zipf` for large `n`, ``a > 1``.
  1020. >>> import numpy as np
  1021. >>> from scipy.stats import zipf, zipfian
  1022. >>> k = np.arange(11)
  1023. >>> np.allclose(zipfian.pmf(k, a=3.5, n=10000000), zipf.pmf(k, a=3.5))
  1024. True
  1025. """
  1026. def _shape_info(self):
  1027. return [_ShapeInfo("a", False, (0, np.inf), (True, False)),
  1028. _ShapeInfo("n", True, (0, np.inf), (False, False))]
  1029. def _argcheck(self, a, n):
  1030. # The upper bound on n is for practical numerical reasons. The numerical
  1031. # methods for computing the PMF, CDF and SF involve sums over range(1, n+1)
  1032. # when a <= 1, so there is no way they can be computed for extremely large
  1033. # n--even 2**53 is ridiculously large for those calculations. The bound is
  1034. # also required to ensure that the loops compute the sums correctly when
  1035. # the inputs are double precision instead of integers.
  1036. #
  1037. # n may be an integer or a float, but the value must be an integer in the
  1038. # range 1 <= n <= 2**53. The expression below clips n to the accepted range
  1039. # before attempting to cast to integer to avoid warnings generated by an
  1040. # input such as n=1e100. The extra `np.asarray()` wrapper avoids the error
  1041. # that arises with an input such as `n=2**100`.
  1042. return ((a >= 0) &
  1043. (n == np.asarray(np.clip(n, 1, 2**53)).astype(dtype=np.int64)))
  1044. def _get_support(self, a, n):
  1045. return 1, np.floor(n)
  1046. def _pmf(self, k, a, n):
  1047. k = np.floor(k)
  1048. n = np.floor(n)
  1049. return scu._normalized_gen_harmonic(k, k, n, a)
  1050. def _cdf(self, k, a, n):
  1051. k = np.floor(k)
  1052. n = np.floor(n)
  1053. return scu._normalized_gen_harmonic(1, k, n, a)
  1054. def _sf(self, k, a, n):
  1055. k = np.floor(k)
  1056. n = np.floor(n)
  1057. return scu._normalized_gen_harmonic(k + 1, n, n, a)
  1058. def _stats(self, a, n):
  1059. n = np.floor(n)
  1060. # see http://www.math.wm.edu/~leemis/chart/UDR/PDFs/Zipf.pdf
  1061. Hna = scu._gen_harmonic(n, a)
  1062. Hna1 = scu._gen_harmonic(n, a-1)
  1063. Hna2 = scu._gen_harmonic(n, a-2)
  1064. Hna3 = scu._gen_harmonic(n, a-3)
  1065. Hna4 = scu._gen_harmonic(n, a-4)
  1066. mu1 = Hna1/Hna
  1067. mu2n = (Hna2*Hna - Hna1**2)
  1068. mu2d = Hna**2
  1069. mu2 = mu2n / mu2d
  1070. g1 = (Hna3/Hna - 3*Hna1*Hna2/Hna**2 + 2*Hna1**3/Hna**3)/mu2**(3/2)
  1071. g2 = (Hna**3*Hna4 - 4*Hna**2*Hna1*Hna3 + 6*Hna*Hna1**2*Hna2
  1072. - 3*Hna1**4) / mu2n**2
  1073. g2 -= 3
  1074. return mu1, mu2, g1, g2
  1075. zipfian = zipfian_gen(a=1, name='zipfian', longname='A Zipfian')
  1076. class dlaplace_gen(rv_discrete):
  1077. r"""A Laplacian discrete random variable.
  1078. %(before_notes)s
  1079. Notes
  1080. -----
  1081. The probability mass function for `dlaplace` is:
  1082. .. math::
  1083. f(k) = \tanh(a/2) \exp(-a |k|)
  1084. for integers :math:`k` and :math:`a > 0`.
  1085. `dlaplace` takes :math:`a` as shape parameter.
  1086. %(after_notes)s
  1087. %(example)s
  1088. """
  1089. def _shape_info(self):
  1090. return [_ShapeInfo("a", False, (0, np.inf), (False, False))]
  1091. def _pmf(self, k, a):
  1092. # dlaplace.pmf(k) = tanh(a/2) * exp(-a*abs(k))
  1093. return tanh(a/2.0) * exp(-a * abs(k))
  1094. def _cdf(self, x, a):
  1095. k = floor(x)
  1096. def f1(k, a):
  1097. return 1.0 - exp(-a * k) / (exp(a) + 1)
  1098. def f2(k, a):
  1099. return exp(a * (k + 1)) / (exp(a) + 1)
  1100. return xpx.apply_where(k >= 0, (k, a), f1, f2)
  1101. def _ppf(self, q, a):
  1102. const = 1 + exp(a)
  1103. vals = ceil(np.where(q < 1.0 / (1 + exp(-a)),
  1104. log(q*const) / a - 1,
  1105. -log((1-q) * const) / a))
  1106. vals1 = vals - 1
  1107. return np.where(self._cdf(vals1, a) >= q, vals1, vals)
  1108. def _stats(self, a):
  1109. ea = exp(a)
  1110. mu2 = 2.*ea/(ea-1.)**2
  1111. mu4 = 2.*ea*(ea**2+10.*ea+1.) / (ea-1.)**4
  1112. return 0., mu2, 0., mu4/mu2**2 - 3.
  1113. def _entropy(self, a):
  1114. return a / sinh(a) - log(tanh(a/2.0))
  1115. def _rvs(self, a, size=None, random_state=None):
  1116. # The discrete Laplace is equivalent to the two-sided geometric
  1117. # distribution with PMF:
  1118. # f(k) = (1 - alpha)/(1 + alpha) * alpha^abs(k)
  1119. # Reference:
  1120. # https://www.sciencedirect.com/science/
  1121. # article/abs/pii/S0378375804003519
  1122. # Furthermore, the two-sided geometric distribution is
  1123. # equivalent to the difference between two iid geometric
  1124. # distributions.
  1125. # Reference (page 179):
  1126. # https://pdfs.semanticscholar.org/61b3/
  1127. # b99f466815808fd0d03f5d2791eea8b541a1.pdf
  1128. # Thus, we can leverage the following:
  1129. # 1) alpha = e^-a
  1130. # 2) probability_of_success = 1 - alpha (Bernoulli trial)
  1131. probOfSuccess = -np.expm1(-np.asarray(a))
  1132. x = random_state.geometric(probOfSuccess, size=size)
  1133. y = random_state.geometric(probOfSuccess, size=size)
  1134. return x - y
  1135. dlaplace = dlaplace_gen(a=-np.inf,
  1136. name='dlaplace', longname='A discrete Laplacian')
  1137. class poisson_binom_gen(rv_discrete):
  1138. r"""A Poisson Binomial discrete random variable.
  1139. %(before_notes)s
  1140. See Also
  1141. --------
  1142. binom
  1143. Notes
  1144. -----
  1145. The probability mass function for `poisson_binom` is:
  1146. .. math::
  1147. f(k; p_1, p_2, ..., p_n) = \sum_{A \in F_k} \prod_{i \in A} p_i \prod_{j \in A^C} 1 - p_j
  1148. where :math:`k \in \{0, 1, \dots, n-1, n\}`, :math:`F_k` is the set of all
  1149. subsets of :math:`k` integers that can be selected :math:`\{0, 1, \dots, n-1, n\}`,
  1150. and :math:`A^C` is the complement of a set :math:`A`.
  1151. `poisson_binom` accepts a single array argument ``p`` for shape parameters
  1152. :math:`0 ≤ p_i ≤ 1`, where the last axis corresponds with the index :math:`i` and
  1153. any others are for batch dimensions. Broadcasting behaves according to the usual
  1154. rules except that the last axis of ``p`` is ignored. Instances of this class do
  1155. not support serialization/unserialization.
  1156. %(after_notes)s
  1157. References
  1158. ----------
  1159. .. [1] "Poisson binomial distribution", Wikipedia,
  1160. https://en.wikipedia.org/wiki/Poisson_binomial_distribution
  1161. .. [2] Biscarri, William, Sihai Dave Zhao, and Robert J. Brunner. "A simple and
  1162. fast method for computing the Poisson binomial distribution function".
  1163. Computational Statistics & Data Analysis 122 (2018) 92-100.
  1164. :doi:`10.1016/j.csda.2018.01.007`
  1165. %(example)s
  1166. """ # noqa: E501
  1167. def _shape_info(self):
  1168. # message = 'Fitting is not implemented for this distribution."
  1169. # raise NotImplementedError(message)
  1170. return []
  1171. def _argcheck(self, *args):
  1172. p = np.stack(args, axis=0)
  1173. conds = (0 <= p) & (p <= 1)
  1174. return np.all(conds, axis=0)
  1175. def _rvs(self, *args, size=None, random_state=None):
  1176. # convenient to work along the last axis here to avoid interference with `size`
  1177. p = np.stack(args, axis=-1)
  1178. # Size passed by the user is the *shape of the returned array*, so it won't
  1179. # contain the length of the last axis of p.
  1180. size = (p.shape if size is None else
  1181. (size, 1) if np.isscalar(size) else tuple(size) + (1,))
  1182. size = np.broadcast_shapes(p.shape, size)
  1183. return bernoulli._rvs(p, size=size, random_state=random_state).sum(axis=-1)
  1184. def _get_support(self, *args):
  1185. return 0, len(args)
  1186. def _pmf(self, k, *args):
  1187. k = np.atleast_1d(k).astype(np.int64)
  1188. k, *args = np.broadcast_arrays(k, *args)
  1189. args = np.asarray(args, dtype=np.float64)
  1190. return _poisson_binom(k, args, 'pmf')
  1191. def _cdf(self, k, *args):
  1192. k = np.atleast_1d(k).astype(np.int64)
  1193. k, *args = np.broadcast_arrays(k, *args)
  1194. args = np.asarray(args, dtype=np.float64)
  1195. return _poisson_binom(k, args, 'cdf')
  1196. def _stats(self, *args, **kwds):
  1197. p = np.stack(args, axis=0)
  1198. mean = np.sum(p, axis=0)
  1199. var = np.sum(p * (1-p), axis=0)
  1200. return (mean, var, None, None)
  1201. def __call__(self, *args, **kwds):
  1202. return poisson_binomial_frozen(self, *args, **kwds)
  1203. poisson_binom = poisson_binom_gen(name='poisson_binom', longname='A Poisson binomial',
  1204. shapes='p')
  1205. # The _parse_args methods don't work with vector-valued shape parameters, so we rewrite
  1206. # them. Note that `p` is accepted as an array with the index `i` of `p_i` corresponding
  1207. # with the last axis; we return it as a tuple (p_1, p_2, ..., p_n) so that it looks
  1208. # like `n` scalar (or arrays of scalar-valued) shape parameters to the infrastructure.
  1209. def _parse_args_rvs(self, p, loc=0, size=None):
  1210. return tuple(np.moveaxis(p, -1, 0)), loc, 1.0, size
  1211. def _parse_args_stats(self, p, loc=0, moments='mv'):
  1212. return tuple(np.moveaxis(p, -1, 0)), loc, 1.0, moments
  1213. def _parse_args(self, p, loc=0):
  1214. return tuple(np.moveaxis(p, -1, 0)), loc, 1.0
  1215. # The infrastructure manually binds these methods to the instance, so
  1216. # we can only override them by manually binding them, too.
  1217. _pb_obj, _pb_cls = poisson_binom, poisson_binom_gen # shorter names (for PEP8)
  1218. poisson_binom._parse_args_rvs = _parse_args_rvs.__get__(_pb_obj, _pb_cls)
  1219. poisson_binom._parse_args_stats = _parse_args_stats.__get__(_pb_obj, _pb_cls)
  1220. poisson_binom._parse_args = _parse_args.__get__(_pb_obj, _pb_cls)
  1221. class poisson_binomial_frozen(rv_discrete_frozen):
  1222. # copied from rv_frozen; we just need to bind the `_parse_args` methods
  1223. def __init__(self, dist, *args, **kwds): # verbatim
  1224. self.args = args # verbatim
  1225. self.kwds = kwds # verbatim
  1226. # create a new instance # verbatim
  1227. self.dist = dist.__class__(**dist._updated_ctor_param()) # verbatim
  1228. # Here is the only modification
  1229. self.dist._parse_args_rvs = _parse_args_rvs.__get__(_pb_obj, _pb_cls)
  1230. self.dist._parse_args_stats = _parse_args_stats.__get__(_pb_obj, _pb_cls)
  1231. self.dist._parse_args = _parse_args.__get__(_pb_obj, _pb_cls)
  1232. shapes, _, _ = self.dist._parse_args(*args, **kwds) # verbatim
  1233. self.a, self.b = self.dist._get_support(*shapes) # verbatim
  1234. def expect(self, func=None, lb=None, ub=None, conditional=False, **kwds):
  1235. a, loc, scale = self.dist._parse_args(*self.args, **self.kwds)
  1236. # Here's the modification: we pass all args (including `loc`) into the `args`
  1237. # parameter of `expect` so the shape only goes through `_parse_args` once.
  1238. return self.dist.expect(func, self.args, loc, lb, ub, conditional, **kwds)
  1239. class skellam_gen(rv_discrete):
  1240. r"""A Skellam discrete random variable.
  1241. %(before_notes)s
  1242. Notes
  1243. -----
  1244. Probability distribution of the difference of two correlated or
  1245. uncorrelated Poisson random variables.
  1246. Let :math:`k_1` and :math:`k_2` be two Poisson-distributed r.v. with
  1247. expected values :math:`\lambda_1` and :math:`\lambda_2`. Then,
  1248. :math:`k_1 - k_2` follows a Skellam distribution with parameters
  1249. :math:`\mu_1 = \lambda_1 - \rho \sqrt{\lambda_1 \lambda_2}` and
  1250. :math:`\mu_2 = \lambda_2 - \rho \sqrt{\lambda_1 \lambda_2}`, where
  1251. :math:`\rho` is the correlation coefficient between :math:`k_1` and
  1252. :math:`k_2`. If the two Poisson-distributed r.v. are independent then
  1253. :math:`\rho = 0`.
  1254. Parameters :math:`\mu_1` and :math:`\mu_2` must be strictly positive.
  1255. For details see: https://en.wikipedia.org/wiki/Skellam_distribution
  1256. `skellam` takes :math:`\mu_1` and :math:`\mu_2` as shape parameters.
  1257. %(after_notes)s
  1258. %(example)s
  1259. """
  1260. def _shape_info(self):
  1261. return [_ShapeInfo("mu1", False, (0, np.inf), (False, False)),
  1262. _ShapeInfo("mu2", False, (0, np.inf), (False, False))]
  1263. def _rvs(self, mu1, mu2, size=None, random_state=None):
  1264. n = size
  1265. return (random_state.poisson(mu1, n) -
  1266. random_state.poisson(mu2, n))
  1267. def _pmf(self, x, mu1, mu2):
  1268. with np.errstate(over='ignore'): # see gh-17432
  1269. px = np.where(x < 0,
  1270. scu._ncx2_pdf(2*mu2, 2*(1-x), 2*mu1)*2,
  1271. scu._ncx2_pdf(2*mu1, 2*(1+x), 2*mu2)*2)
  1272. # ncx2.pdf() returns nan's for extremely low probabilities
  1273. return px
  1274. def _cdf(self, x, mu1, mu2):
  1275. x = floor(x)
  1276. with np.errstate(over='ignore'): # see gh-17432
  1277. px = np.where(x < 0,
  1278. special.chndtr(2*mu2, -2*x, 2*mu1),
  1279. scu._ncx2_sf(2*mu1, 2*(x+1), 2*mu2))
  1280. return px
  1281. def _stats(self, mu1, mu2):
  1282. mean = mu1 - mu2
  1283. var = mu1 + mu2
  1284. g1 = mean / sqrt((var)**3)
  1285. g2 = 1 / var
  1286. return mean, var, g1, g2
  1287. skellam = skellam_gen(a=-np.inf, name="skellam", longname='A Skellam')
  1288. class yulesimon_gen(rv_discrete):
  1289. r"""A Yule-Simon discrete random variable.
  1290. %(before_notes)s
  1291. Notes
  1292. -----
  1293. The probability mass function for the `yulesimon` is:
  1294. .. math::
  1295. f(k) = \alpha B(k, \alpha+1)
  1296. for :math:`k=1,2,3,...`, where :math:`\alpha>0`.
  1297. Here :math:`B` refers to the `scipy.special.beta` function.
  1298. The sampling of random variates is based on pg 553, Section 6.3 of [1]_.
  1299. Our notation maps to the referenced logic via :math:`\alpha=a-1`.
  1300. For details see the wikipedia entry [2]_.
  1301. References
  1302. ----------
  1303. .. [1] Devroye, Luc. "Non-uniform Random Variate Generation",
  1304. (1986) Springer, New York.
  1305. .. [2] https://en.wikipedia.org/wiki/Yule-Simon_distribution
  1306. %(after_notes)s
  1307. %(example)s
  1308. """
  1309. def _shape_info(self):
  1310. return [_ShapeInfo("alpha", False, (0, np.inf), (False, False))]
  1311. def _rvs(self, alpha, size=None, random_state=None):
  1312. E1 = random_state.standard_exponential(size)
  1313. E2 = random_state.standard_exponential(size)
  1314. ans = ceil(-E1 / log1p(-exp(-E2 / alpha)))
  1315. return ans
  1316. def _pmf(self, x, alpha):
  1317. return alpha * special.beta(x, alpha + 1)
  1318. def _argcheck(self, alpha):
  1319. return (alpha > 0)
  1320. def _logpmf(self, x, alpha):
  1321. return log(alpha) + special.betaln(x, alpha + 1)
  1322. def _cdf(self, x, alpha):
  1323. return 1 - x * special.beta(x, alpha + 1)
  1324. def _sf(self, x, alpha):
  1325. return x * special.beta(x, alpha + 1)
  1326. def _logsf(self, x, alpha):
  1327. return log(x) + special.betaln(x, alpha + 1)
  1328. def _stats(self, alpha):
  1329. mu = np.where(alpha <= 1, np.inf, alpha / (alpha - 1))
  1330. mu2 = np.where(alpha > 2,
  1331. alpha**2 / ((alpha - 2.0) * (alpha - 1)**2),
  1332. np.inf)
  1333. mu2 = np.where(alpha <= 1, np.nan, mu2)
  1334. g1 = np.where(alpha > 3,
  1335. sqrt(alpha - 2) * (alpha + 1)**2 / (alpha * (alpha - 3)),
  1336. np.inf)
  1337. g1 = np.where(alpha <= 2, np.nan, g1)
  1338. g2 = np.where(alpha > 4,
  1339. alpha + 3 + ((11 * alpha**3 - 49 * alpha - 22) /
  1340. (alpha * (alpha - 4) * (alpha - 3))),
  1341. np.inf)
  1342. g2 = np.where(alpha <= 2, np.nan, g2)
  1343. return mu, mu2, g1, g2
  1344. yulesimon = yulesimon_gen(name='yulesimon', a=1)
  1345. class _nchypergeom_gen(rv_discrete):
  1346. r"""A noncentral hypergeometric discrete random variable.
  1347. For subclassing by nchypergeom_fisher_gen and nchypergeom_wallenius_gen.
  1348. """
  1349. rvs_name = None
  1350. dist = None
  1351. def _shape_info(self):
  1352. return [_ShapeInfo("M", True, (0, np.inf), (True, False)),
  1353. _ShapeInfo("n", True, (0, np.inf), (True, False)),
  1354. _ShapeInfo("N", True, (0, np.inf), (True, False)),
  1355. _ShapeInfo("odds", False, (0, np.inf), (False, False))]
  1356. def _get_support(self, M, n, N, odds):
  1357. N, m1, n = M, n, N # follow Wikipedia notation
  1358. m2 = N - m1
  1359. x_min = np.maximum(0, n - m2)
  1360. x_max = np.minimum(n, m1)
  1361. return x_min, x_max
  1362. def _argcheck(self, M, n, N, odds):
  1363. M, n = np.asarray(M), np.asarray(n),
  1364. N, odds = np.asarray(N), np.asarray(odds)
  1365. cond1 = (~np.isnan(M)) & (M.astype(int) == M) & (M >= 0)
  1366. cond2 = (~np.isnan(n)) & (n.astype(int) == n) & (n >= 0)
  1367. cond3 = (~np.isnan(N)) & (N.astype(int) == N) & (N >= 0)
  1368. cond4 = odds > 0
  1369. cond5 = N <= M
  1370. cond6 = n <= M
  1371. return cond1 & cond2 & cond3 & cond4 & cond5 & cond6
  1372. def _rvs(self, M, n, N, odds, size=None, random_state=None):
  1373. @_vectorize_rvs_over_shapes
  1374. def _rvs1(M, n, N, odds, size, random_state):
  1375. if np.isnan(M) | np.isnan(n) | np.isnan(N):
  1376. return np.full(size, np.nan)
  1377. length = np.prod(size)
  1378. urn = _PyStochasticLib3()
  1379. rv_gen = getattr(urn, self.rvs_name)
  1380. rvs = rv_gen(N, n, M, odds, length, random_state)
  1381. rvs = rvs.reshape(size)
  1382. return rvs
  1383. return _rvs1(M, n, N, odds, size=size, random_state=random_state)
  1384. def _pmf(self, x, M, n, N, odds):
  1385. x, M, n, N, odds = np.broadcast_arrays(x, M, n, N, odds)
  1386. if x.size == 0: # np.vectorize doesn't work with zero size input
  1387. return np.empty_like(x)
  1388. @np.vectorize
  1389. def _pmf1(x, M, n, N, odds):
  1390. if np.isnan(x) | np.isnan(M) | np.isnan(n) | np.isnan(N):
  1391. return np.nan
  1392. urn = self.dist(N, n, M, odds, 1e-12)
  1393. return urn.probability(x)
  1394. return _pmf1(x, M, n, N, odds)
  1395. def _stats(self, M, n, N, odds, moments='mv'):
  1396. @np.vectorize
  1397. def _moments1(M, n, N, odds):
  1398. if np.isnan(M) | np.isnan(n) | np.isnan(N):
  1399. return np.nan, np.nan
  1400. urn = self.dist(N, n, M, odds, 1e-12)
  1401. return urn.moments()
  1402. m, v = (_moments1(M, n, N, odds) if ("m" in moments or "v" in moments)
  1403. else (None, None))
  1404. s, k = None, None
  1405. return m, v, s, k
  1406. class nchypergeom_fisher_gen(_nchypergeom_gen):
  1407. r"""A Fisher's noncentral hypergeometric discrete random variable.
  1408. Fisher's noncentral hypergeometric distribution models drawing objects of
  1409. two types from a bin. `M` is the total number of objects, `n` is the
  1410. number of Type I objects, and `odds` is the odds ratio: the odds of
  1411. selecting a Type I object rather than a Type II object when there is only
  1412. one object of each type.
  1413. The random variate represents the number of Type I objects drawn if we
  1414. take a handful of objects from the bin at once and find out afterwards
  1415. that we took `N` objects.
  1416. %(before_notes)s
  1417. See Also
  1418. --------
  1419. nchypergeom_wallenius, hypergeom, nhypergeom
  1420. Notes
  1421. -----
  1422. Let mathematical symbols :math:`N`, :math:`n`, and :math:`M` correspond
  1423. with parameters `N`, `n`, and `M` (respectively) as defined above.
  1424. The probability mass function is defined as
  1425. .. math::
  1426. p(x; M, n, N, \omega) =
  1427. \frac{\binom{n}{x}\binom{M - n}{N-x}\omega^x}{P_0},
  1428. for
  1429. :math:`x \in [x_l, x_u]`,
  1430. :math:`M \in {\mathbb N}`,
  1431. :math:`n \in [0, M]`,
  1432. :math:`N \in [0, M]`,
  1433. :math:`\omega > 0`,
  1434. where
  1435. :math:`x_l = \max(0, N - (M - n))`,
  1436. :math:`x_u = \min(N, n)`,
  1437. .. math::
  1438. P_0 = \sum_{y=x_l}^{x_u} \binom{n}{y}\binom{M - n}{N-y}\omega^y,
  1439. and the binomial coefficients are defined as
  1440. .. math:: \binom{n}{k} \equiv \frac{n!}{k! (n - k)!}.
  1441. `nchypergeom_fisher` uses the BiasedUrn package by Agner Fog with
  1442. permission for it to be distributed under SciPy's license.
  1443. The symbols used to denote the shape parameters (`N`, `n`, and `M`) are not
  1444. universally accepted; they are chosen for consistency with `hypergeom`.
  1445. Note that Fisher's noncentral hypergeometric distribution is distinct
  1446. from Wallenius' noncentral hypergeometric distribution, which models
  1447. drawing a pre-determined `N` objects from a bin one by one.
  1448. When the odds ratio is unity, however, both distributions reduce to the
  1449. ordinary hypergeometric distribution.
  1450. %(after_notes)s
  1451. References
  1452. ----------
  1453. .. [1] Agner Fog, "Biased Urn Theory".
  1454. https://cran.r-project.org/web/packages/BiasedUrn/vignettes/UrnTheory.pdf
  1455. .. [2] "Fisher's noncentral hypergeometric distribution", Wikipedia,
  1456. https://en.wikipedia.org/wiki/Fisher's_noncentral_hypergeometric_distribution
  1457. %(example)s
  1458. """
  1459. rvs_name = "rvs_fisher"
  1460. dist = _PyFishersNCHypergeometric
  1461. nchypergeom_fisher = nchypergeom_fisher_gen(
  1462. name='nchypergeom_fisher',
  1463. longname="A Fisher's noncentral hypergeometric")
  1464. class nchypergeom_wallenius_gen(_nchypergeom_gen):
  1465. r"""A Wallenius' noncentral hypergeometric discrete random variable.
  1466. Wallenius' noncentral hypergeometric distribution models drawing objects of
  1467. two types from a bin. `M` is the total number of objects, `n` is the
  1468. number of Type I objects, and `odds` is the odds ratio: the odds of
  1469. selecting a Type I object rather than a Type II object when there is only
  1470. one object of each type.
  1471. The random variate represents the number of Type I objects drawn if we
  1472. draw a pre-determined `N` objects from a bin one by one.
  1473. %(before_notes)s
  1474. See Also
  1475. --------
  1476. nchypergeom_fisher, hypergeom, nhypergeom
  1477. Notes
  1478. -----
  1479. Let mathematical symbols :math:`N`, :math:`n`, and :math:`M` correspond
  1480. with parameters `N`, `n`, and `M` (respectively) as defined above.
  1481. The probability mass function is defined as
  1482. .. math::
  1483. p(x; N, n, M) = \binom{n}{x} \binom{M - n}{N-x}
  1484. \int_0^1 \left(1-t^{\omega/D}\right)^x\left(1-t^{1/D}\right)^{N-x} dt
  1485. for
  1486. :math:`x \in [x_l, x_u]`,
  1487. :math:`M \in {\mathbb N}`,
  1488. :math:`n \in [0, M]`,
  1489. :math:`N \in [0, M]`,
  1490. :math:`\omega > 0`,
  1491. where
  1492. :math:`x_l = \max(0, N - (M - n))`,
  1493. :math:`x_u = \min(N, n)`,
  1494. .. math::
  1495. D = \omega(n - x) + ((M - n)-(N-x)),
  1496. and the binomial coefficients are defined as
  1497. .. math:: \binom{n}{k} \equiv \frac{n!}{k! (n - k)!}.
  1498. `nchypergeom_wallenius` uses the BiasedUrn package by Agner Fog with
  1499. permission for it to be distributed under SciPy's license.
  1500. The symbols used to denote the shape parameters (`N`, `n`, and `M`) are not
  1501. universally accepted; they are chosen for consistency with `hypergeom`.
  1502. Note that Wallenius' noncentral hypergeometric distribution is distinct
  1503. from Fisher's noncentral hypergeometric distribution, which models
  1504. take a handful of objects from the bin at once, finding out afterwards
  1505. that `N` objects were taken.
  1506. When the odds ratio is unity, however, both distributions reduce to the
  1507. ordinary hypergeometric distribution.
  1508. %(after_notes)s
  1509. References
  1510. ----------
  1511. .. [1] Agner Fog, "Biased Urn Theory".
  1512. https://cran.r-project.org/web/packages/BiasedUrn/vignettes/UrnTheory.pdf
  1513. .. [2] "Wallenius' noncentral hypergeometric distribution", Wikipedia,
  1514. https://en.wikipedia.org/wiki/Wallenius'_noncentral_hypergeometric_distribution
  1515. %(example)s
  1516. """
  1517. rvs_name = "rvs_wallenius"
  1518. dist = _PyWalleniusNCHypergeometric
  1519. nchypergeom_wallenius = nchypergeom_wallenius_gen(
  1520. name='nchypergeom_wallenius',
  1521. longname="A Wallenius' noncentral hypergeometric")
  1522. # Collect names of classes and objects in this module.
  1523. pairs = list(globals().copy().items())
  1524. _distn_names, _distn_gen_names = get_distribution_names(pairs, rv_discrete)
  1525. __all__ = _distn_names + _distn_gen_names