test_sampling.py 54 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448
  1. import threading
  2. import pickle
  3. import pytest
  4. from copy import deepcopy
  5. import platform
  6. import sys
  7. import math
  8. import warnings
  9. import numpy as np
  10. from numpy.testing import assert_allclose, assert_equal
  11. from scipy.stats.sampling import (
  12. TransformedDensityRejection,
  13. DiscreteAliasUrn,
  14. DiscreteGuideTable,
  15. NumericalInversePolynomial,
  16. NumericalInverseHermite,
  17. RatioUniforms,
  18. SimpleRatioUniforms,
  19. UNURANError
  20. )
  21. from pytest import raises as assert_raises
  22. from scipy import stats
  23. from scipy import special
  24. from scipy.stats import chisquare, cramervonmises
  25. from scipy.stats._distr_params import distdiscrete, distcont
  26. from scipy._lib._util import check_random_state
  27. # common test data: this data can be shared between all the tests.
  28. # Normal distribution shared between all the continuous methods
  29. class StandardNormal:
  30. def pdf(self, x):
  31. # normalization constant needed for NumericalInverseHermite
  32. return 1./np.sqrt(2.*np.pi) * np.exp(-0.5 * x*x)
  33. def dpdf(self, x):
  34. return 1./np.sqrt(2.*np.pi) * -x * np.exp(-0.5 * x*x)
  35. def cdf(self, x):
  36. return special.ndtr(x)
  37. all_methods = [
  38. ("TransformedDensityRejection", {"dist": StandardNormal()}),
  39. ("DiscreteAliasUrn", {"dist": [0.02, 0.18, 0.8]}),
  40. ("DiscreteGuideTable", {"dist": [0.02, 0.18, 0.8]}),
  41. ("NumericalInversePolynomial", {"dist": StandardNormal()}),
  42. ("NumericalInverseHermite", {"dist": StandardNormal()}),
  43. ("SimpleRatioUniforms", {"dist": StandardNormal(), "mode": 0})
  44. ]
  45. if (sys.implementation.name == 'pypy'
  46. and sys.implementation.version < (7, 3, 10)):
  47. # changed in PyPy for v7.3.10
  48. floaterr = r"unsupported operand type for float\(\): 'list'"
  49. else:
  50. floaterr = r"must be real number, not list"
  51. # Make sure an internal error occurs in UNU.RAN when invalid callbacks are
  52. # passed. Moreover, different generators throw different error messages.
  53. # So, in case of an `UNURANError`, we do not validate the error message.
  54. bad_pdfs_common = [
  55. # Negative PDF
  56. (lambda x: -x, UNURANError, r"..."),
  57. # Returning wrong type
  58. (lambda x: [], TypeError, floaterr),
  59. # Undefined name inside the function
  60. (lambda x: foo, NameError, r"name 'foo' is not defined"), # type: ignore[name-defined] # noqa: F821, E501
  61. # Infinite value returned => Overflow error.
  62. (lambda x: np.inf, UNURANError, r"..."),
  63. # NaN value => internal error in UNU.RAN
  64. (lambda x: np.nan, UNURANError, r"..."),
  65. # signature of PDF wrong
  66. (lambda: 1.0, TypeError, r"takes 0 positional arguments but 1 was given")
  67. ]
  68. # same approach for dpdf
  69. bad_dpdf_common = [
  70. # Infinite value returned.
  71. (lambda x: np.inf, UNURANError, r"..."),
  72. # NaN value => internal error in UNU.RAN
  73. (lambda x: np.nan, UNURANError, r"..."),
  74. # Returning wrong type
  75. (lambda x: [], TypeError, floaterr),
  76. # Undefined name inside the function
  77. (lambda x: foo, NameError, r"name 'foo' is not defined"), # type: ignore[name-defined] # noqa: F821, E501
  78. # signature of dPDF wrong
  79. (lambda: 1.0, TypeError, r"takes 0 positional arguments but 1 was given")
  80. ]
  81. # same approach for logpdf
  82. bad_logpdfs_common = [
  83. # Returning wrong type
  84. (lambda x: [], TypeError, floaterr),
  85. # Undefined name inside the function
  86. (lambda x: foo, NameError, r"name 'foo' is not defined"), # type: ignore[name-defined] # noqa: F821, E501
  87. # Infinite value returned => Overflow error.
  88. (lambda x: np.inf, UNURANError, r"..."),
  89. # NaN value => internal error in UNU.RAN
  90. (lambda x: np.nan, UNURANError, r"..."),
  91. # signature of logpdf wrong
  92. (lambda: 1.0, TypeError, r"takes 0 positional arguments but 1 was given")
  93. ]
  94. bad_pv_common = [
  95. ([], r"must contain at least one element"),
  96. ([[1.0, 0.0]], r"wrong number of dimensions \(expected 1, got 2\)"),
  97. ([0.2, 0.4, np.nan, 0.8], r"must contain only finite / non-nan values"),
  98. ([0.2, 0.4, np.inf, 0.8], r"must contain only finite / non-nan values"),
  99. ([0.0, 0.0], r"must contain at least one non-zero value"),
  100. ]
  101. # size of the domains is incorrect
  102. bad_sized_domains = [
  103. # > 2 elements in the domain
  104. ((1, 2, 3), ValueError, r"must be a length 2 tuple"),
  105. # empty domain
  106. ((), ValueError, r"must be a length 2 tuple")
  107. ]
  108. # domain values are incorrect
  109. bad_domains = [
  110. ((2, 1), UNURANError, r"left >= right"),
  111. ((1, 1), UNURANError, r"left >= right"),
  112. ]
  113. # infinite and nan values present in domain.
  114. inf_nan_domains = [
  115. # left >= right
  116. ((10, 10), UNURANError, r"left >= right"),
  117. ((np.inf, np.inf), UNURANError, r"left >= right"),
  118. ((-np.inf, -np.inf), UNURANError, r"left >= right"),
  119. ((np.inf, -np.inf), UNURANError, r"left >= right"),
  120. # Also include nans in some of the domains.
  121. ((-np.inf, np.nan), ValueError, r"only non-nan values"),
  122. ((np.nan, np.inf), ValueError, r"only non-nan values")
  123. ]
  124. # `nan` values present in domain. Some distributions don't support
  125. # infinite tails, so don't mix the nan values with infinities.
  126. nan_domains = [
  127. ((0, np.nan), ValueError, r"only non-nan values"),
  128. ((np.nan, np.nan), ValueError, r"only non-nan values")
  129. ]
  130. # all the methods should throw errors for nan, bad sized, and bad valued
  131. # domains.
  132. @pytest.mark.parametrize("domain, err, msg",
  133. bad_domains + bad_sized_domains +
  134. nan_domains) # type: ignore[operator]
  135. @pytest.mark.parametrize("method, kwargs", all_methods)
  136. def test_bad_domain(domain, err, msg, method, kwargs):
  137. Method = getattr(stats.sampling, method)
  138. with pytest.raises(err, match=msg):
  139. Method(**kwargs, domain=domain)
  140. @pytest.mark.parametrize("method, kwargs", all_methods)
  141. def test_random_state(method, kwargs):
  142. Method = getattr(stats.sampling, method)
  143. # simple seed that works for any version of NumPy
  144. seed = 123
  145. rng1 = Method(**kwargs, random_state=seed)
  146. rng2 = Method(**kwargs, random_state=seed)
  147. assert_equal(rng1.rvs(100), rng2.rvs(100))
  148. # global seed
  149. rng = np.random.RandomState(123)
  150. rng1 = Method(**kwargs)
  151. rvs1 = rng1.rvs(100, random_state=rng)
  152. np.random.seed(None) # valid use of np.random.seed
  153. rng2 = Method(**kwargs, random_state=123)
  154. rvs2 = rng2.rvs(100)
  155. assert_equal(rvs1, rvs2)
  156. # Generator seed for new NumPy
  157. # when a RandomState is given, it should take the bitgen_t
  158. # member of the class and create a Generator instance.
  159. seed1 = np.random.RandomState(np.random.MT19937(123))
  160. seed2 = np.random.Generator(np.random.MT19937(123))
  161. rng1 = Method(**kwargs, random_state=seed1)
  162. rng2 = Method(**kwargs, random_state=seed2)
  163. assert_equal(rng1.rvs(100), rng2.rvs(100))
  164. def test_set_random_state():
  165. rng1 = TransformedDensityRejection(StandardNormal(), random_state=123)
  166. rng2 = TransformedDensityRejection(StandardNormal())
  167. rng2.set_random_state(123)
  168. assert_equal(rng1.rvs(100), rng2.rvs(100))
  169. rng = TransformedDensityRejection(StandardNormal(), random_state=123)
  170. rvs1 = rng.rvs(100)
  171. rng.set_random_state(123)
  172. rvs2 = rng.rvs(100)
  173. assert_equal(rvs1, rvs2)
  174. def test_threading_behaviour():
  175. # Test if the API is thread-safe.
  176. # This verifies if the lock mechanism and the use of `PyErr_Occurred`
  177. # is correct.
  178. errors = {"err1": None, "err2": None}
  179. class Distribution:
  180. def __init__(self, pdf_msg):
  181. self.pdf_msg = pdf_msg
  182. def pdf(self, x):
  183. if 49.9 < x < 50.0:
  184. raise ValueError(self.pdf_msg)
  185. return x
  186. def dpdf(self, x):
  187. return 1
  188. def func1():
  189. dist = Distribution('foo')
  190. rng = TransformedDensityRejection(dist, domain=(10, 100),
  191. random_state=12)
  192. try:
  193. rng.rvs(100000)
  194. except ValueError as e:
  195. errors['err1'] = e.args[0]
  196. def func2():
  197. dist = Distribution('bar')
  198. rng = TransformedDensityRejection(dist, domain=(10, 100),
  199. random_state=2)
  200. try:
  201. rng.rvs(100000)
  202. except ValueError as e:
  203. errors['err2'] = e.args[0]
  204. t1 = threading.Thread(target=func1)
  205. t2 = threading.Thread(target=func2)
  206. t1.start()
  207. t2.start()
  208. t1.join()
  209. t2.join()
  210. assert errors['err1'] == 'foo'
  211. assert errors['err2'] == 'bar'
  212. @pytest.mark.parametrize("method, kwargs", all_methods)
  213. def test_pickle(method, kwargs):
  214. Method = getattr(stats.sampling, method)
  215. rng1 = Method(**kwargs, random_state=123)
  216. obj = pickle.dumps(rng1)
  217. rng2 = pickle.loads(obj)
  218. assert_equal(rng1.rvs(100), rng2.rvs(100))
  219. @pytest.mark.parametrize("size", [None, 0, (0, ), 1, (10, 3), (2, 3, 4, 5),
  220. (0, 0), (0, 1)])
  221. def test_rvs_size(size):
  222. # As the `rvs` method is present in the base class and shared between
  223. # all the classes, we can just test with one of the methods.
  224. rng = TransformedDensityRejection(StandardNormal())
  225. if size is None:
  226. assert np.isscalar(rng.rvs(size))
  227. else:
  228. if np.isscalar(size):
  229. size = (size, )
  230. assert rng.rvs(size).shape == size
  231. def test_with_scipy_distribution():
  232. # test if the setup works with SciPy's rv_frozen distributions
  233. dist = stats.norm()
  234. urng = np.random.default_rng(0)
  235. rng = NumericalInverseHermite(dist, random_state=urng)
  236. u = np.linspace(0, 1, num=100)
  237. check_cont_samples(rng, dist, dist.stats())
  238. assert_allclose(dist.ppf(u), rng.ppf(u))
  239. # test if it works with `loc` and `scale`
  240. dist = stats.norm(loc=10., scale=5.)
  241. rng = NumericalInverseHermite(dist, random_state=urng)
  242. check_cont_samples(rng, dist, dist.stats())
  243. assert_allclose(dist.ppf(u), rng.ppf(u))
  244. # check for discrete distributions
  245. dist = stats.binom(10, 0.2)
  246. rng = DiscreteAliasUrn(dist, random_state=urng)
  247. domain = dist.support()
  248. pv = dist.pmf(np.arange(domain[0], domain[1]+1))
  249. check_discr_samples(rng, pv, dist.stats())
  250. def check_cont_samples(rng, dist, mv_ex, rtol=1e-7, atol=1e-1):
  251. rvs = rng.rvs(100000)
  252. mv = rvs.mean(), rvs.var()
  253. # test the moments only if the variance is finite
  254. if np.isfinite(mv_ex[1]):
  255. assert_allclose(mv, mv_ex, rtol=rtol, atol=atol)
  256. # Cramer Von Mises test for goodness-of-fit
  257. rvs = rng.rvs(500)
  258. dist.cdf = np.vectorize(dist.cdf)
  259. pval = cramervonmises(rvs, dist.cdf).pvalue
  260. assert pval > 0.1
  261. def check_discr_samples(rng, pv, mv_ex, rtol=1e-3, atol=1e-1):
  262. rvs = rng.rvs(100000)
  263. # test if the first few moments match
  264. mv = rvs.mean(), rvs.var()
  265. assert_allclose(mv, mv_ex, rtol=rtol, atol=atol)
  266. # normalize
  267. pv = pv / pv.sum()
  268. # chi-squared test for goodness-of-fit
  269. obs_freqs = np.zeros_like(pv)
  270. _, freqs = np.unique(rvs, return_counts=True)
  271. freqs = freqs / freqs.sum()
  272. obs_freqs[:freqs.size] = freqs
  273. pval = chisquare(obs_freqs, pv).pvalue
  274. assert pval > 0.1
  275. def test_warning_center_not_in_domain():
  276. # UNURAN will warn if the center provided or the one computed w/o the
  277. # domain is outside of the domain
  278. msg = "102 : center moved into domain of distribution"
  279. with pytest.warns(RuntimeWarning, match=msg):
  280. NumericalInversePolynomial(StandardNormal(), center=0, domain=(3, 5))
  281. with pytest.warns(RuntimeWarning, match=msg):
  282. NumericalInversePolynomial(StandardNormal(), domain=(3, 5))
  283. @pytest.mark.parametrize('method', ["SimpleRatioUniforms",
  284. "NumericalInversePolynomial",
  285. "TransformedDensityRejection"])
  286. def test_error_mode_not_in_domain(method):
  287. # UNURAN raises an error if the mode is not in the domain
  288. # the behavior is different compared to the case that center is not in the
  289. # domain. mode is supposed to be the exact value, center can be an
  290. # approximate value
  291. Method = getattr(stats.sampling, method)
  292. msg = "17 : mode not in domain"
  293. with pytest.raises(UNURANError, match=msg):
  294. Method(StandardNormal(), mode=0, domain=(3, 5))
  295. @pytest.mark.parametrize('method', ["NumericalInverseHermite",
  296. "NumericalInversePolynomial"])
  297. class TestQRVS:
  298. def test_input_validation(self, method):
  299. match = "`qmc_engine` must be an instance of..."
  300. with pytest.raises(ValueError, match=match):
  301. Method = getattr(stats.sampling, method)
  302. gen = Method(StandardNormal())
  303. gen.qrvs(qmc_engine=0)
  304. # issues with QMCEngines and old NumPy
  305. Method = getattr(stats.sampling, method)
  306. gen = Method(StandardNormal())
  307. match = "`d` must be consistent with dimension of `qmc_engine`."
  308. with pytest.raises(ValueError, match=match):
  309. gen.qrvs(d=3, qmc_engine=stats.qmc.Halton(2))
  310. qrngs = [None, stats.qmc.Sobol(1, seed=0), stats.qmc.Halton(3, seed=0)]
  311. # `size=None` should not add anything to the shape, `size=1` should
  312. sizes = [(None, tuple()), (1, (1,)), (4, (4,)),
  313. ((4,), (4,)), ((2, 4), (2, 4))] # type: ignore
  314. # Neither `d=None` nor `d=1` should add anything to the shape
  315. ds = [(None, tuple()), (1, tuple()), (3, (3,))]
  316. @pytest.mark.parametrize('qrng', qrngs)
  317. @pytest.mark.parametrize('size_in, size_out', sizes)
  318. @pytest.mark.parametrize('d_in, d_out', ds)
  319. @pytest.mark.thread_unsafe(reason="fails in parallel")
  320. def test_QRVS_shape_consistency(self, qrng, size_in, size_out,
  321. d_in, d_out, method):
  322. w32 = sys.platform == "win32" and platform.architecture()[0] == "32bit"
  323. if w32 and method == "NumericalInversePolynomial":
  324. pytest.xfail("NumericalInversePolynomial.qrvs fails for Win "
  325. "32-bit")
  326. dist = StandardNormal()
  327. Method = getattr(stats.sampling, method)
  328. gen = Method(dist)
  329. # If d and qrng.d are inconsistent, an error is raised
  330. if d_in is not None and qrng is not None and qrng.d != d_in:
  331. match = "`d` must be consistent with dimension of `qmc_engine`."
  332. with pytest.raises(ValueError, match=match):
  333. gen.qrvs(size_in, d=d_in, qmc_engine=qrng)
  334. return
  335. # Sometimes d is really determined by qrng
  336. if d_in is None and qrng is not None and qrng.d != 1:
  337. d_out = (qrng.d,)
  338. shape_expected = size_out + d_out
  339. qrng2 = deepcopy(qrng)
  340. qrvs = gen.qrvs(size=size_in, d=d_in, qmc_engine=qrng)
  341. if size_in is not None:
  342. assert qrvs.shape == shape_expected
  343. if qrng2 is not None:
  344. uniform = qrng2.random(np.prod(size_in) or 1)
  345. qrvs2 = stats.norm.ppf(uniform).reshape(shape_expected)
  346. assert_allclose(qrvs, qrvs2, atol=1e-12)
  347. def test_QRVS_size_tuple(self, method):
  348. # QMCEngine samples are always of shape (n, d). When `size` is a tuple,
  349. # we set `n = prod(size)` in the call to qmc_engine.random, transform
  350. # the sample, and reshape it to the final dimensions. When we reshape,
  351. # we need to be careful, because the _columns_ of the sample returned
  352. # by a QMCEngine are "independent"-ish, but the elements within the
  353. # columns are not. We need to make sure that this doesn't get mixed up
  354. # by reshaping: qrvs[..., i] should remain "independent"-ish of
  355. # qrvs[..., i+1], but the elements within qrvs[..., i] should be
  356. # transformed from the same low-discrepancy sequence.
  357. dist = StandardNormal()
  358. Method = getattr(stats.sampling, method)
  359. gen = Method(dist)
  360. size = (3, 4)
  361. d = 5
  362. qrng = stats.qmc.Halton(d, seed=0)
  363. qrng2 = stats.qmc.Halton(d, seed=0)
  364. uniform = qrng2.random(np.prod(size))
  365. qrvs = gen.qrvs(size=size, d=d, qmc_engine=qrng)
  366. qrvs2 = stats.norm.ppf(uniform)
  367. for i in range(d):
  368. sample = qrvs[..., i]
  369. sample2 = qrvs2[:, i].reshape(size)
  370. assert_allclose(sample, sample2, atol=1e-12)
  371. class TestTransformedDensityRejection:
  372. # Simple Custom Distribution
  373. class dist0:
  374. def pdf(self, x):
  375. return 3/4 * (1-x*x)
  376. def dpdf(self, x):
  377. return 3/4 * (-2*x)
  378. def cdf(self, x):
  379. return 3/4 * (x - x**3/3 + 2/3)
  380. def support(self):
  381. return -1, 1
  382. # Standard Normal Distribution
  383. class dist1:
  384. def pdf(self, x):
  385. return stats.norm._pdf(x / 0.1)
  386. def dpdf(self, x):
  387. return -x / 0.01 * stats.norm._pdf(x / 0.1)
  388. def cdf(self, x):
  389. return stats.norm._cdf(x / 0.1)
  390. # pdf with piecewise linear function as transformed density
  391. # with T = -1/sqrt with shift. Taken from UNU.RAN test suite
  392. # (from file t_tdr_ps.c)
  393. class dist2:
  394. def __init__(self, shift):
  395. self.shift = shift
  396. def pdf(self, x):
  397. x -= self.shift
  398. y = 1. / (abs(x) + 1.)
  399. return 0.5 * y * y
  400. def dpdf(self, x):
  401. x -= self.shift
  402. y = 1. / (abs(x) + 1.)
  403. y = y * y * y
  404. return y if (x < 0.) else -y
  405. def cdf(self, x):
  406. x -= self.shift
  407. if x <= 0.:
  408. return 0.5 / (1. - x)
  409. else:
  410. return 1. - 0.5 / (1. + x)
  411. dists = [dist0(), dist1(), dist2(0.), dist2(10000.)]
  412. # exact mean and variance of the distributions in the list dists
  413. mv0 = [0., 4./15.]
  414. mv1 = [0., 0.01]
  415. mv2 = [0., np.inf]
  416. mv3 = [10000., np.inf]
  417. mvs = [mv0, mv1, mv2, mv3]
  418. @pytest.mark.parametrize("dist, mv_ex",
  419. zip(dists, mvs))
  420. @pytest.mark.thread_unsafe(reason="deadlocks for unknown reasons")
  421. def test_basic(self, dist, mv_ex):
  422. with warnings.catch_warnings():
  423. # filter the warnings thrown by UNU.RAN
  424. warnings.simplefilter("ignore", RuntimeWarning)
  425. rng = TransformedDensityRejection(dist, random_state=42)
  426. check_cont_samples(rng, dist, mv_ex)
  427. # PDF 0 everywhere => bad construction points
  428. bad_pdfs = [(lambda x: 0, UNURANError, r"50 : bad construction points.")]
  429. bad_pdfs += bad_pdfs_common # type: ignore[arg-type]
  430. @pytest.mark.parametrize("pdf, err, msg", bad_pdfs)
  431. def test_bad_pdf(self, pdf, err, msg):
  432. class dist:
  433. pass
  434. dist.pdf = pdf
  435. dist.dpdf = lambda x: 1 # an arbitrary dPDF
  436. with pytest.raises(err, match=msg):
  437. TransformedDensityRejection(dist)
  438. @pytest.mark.parametrize("dpdf, err, msg", bad_dpdf_common)
  439. def test_bad_dpdf(self, dpdf, err, msg):
  440. class dist:
  441. pass
  442. dist.pdf = lambda x: x
  443. dist.dpdf = dpdf
  444. with pytest.raises(err, match=msg):
  445. TransformedDensityRejection(dist, domain=(1, 10))
  446. # test domains with inf + nan in them. need to write a custom test for
  447. # this because not all methods support infinite tails.
  448. @pytest.mark.parametrize("domain, err, msg", inf_nan_domains)
  449. def test_inf_nan_domains(self, domain, err, msg):
  450. with pytest.raises(err, match=msg):
  451. TransformedDensityRejection(StandardNormal(), domain=domain)
  452. @pytest.mark.parametrize("construction_points", [-1, 0, 0.1])
  453. def test_bad_construction_points_scalar(self, construction_points):
  454. with pytest.raises(ValueError, match=r"`construction_points` must be "
  455. r"a positive integer."):
  456. TransformedDensityRejection(
  457. StandardNormal(), construction_points=construction_points
  458. )
  459. def test_bad_construction_points_array(self):
  460. # empty array
  461. construction_points = []
  462. with pytest.raises(ValueError, match=r"`construction_points` must "
  463. r"either be a "
  464. r"scalar or a non-empty array."):
  465. TransformedDensityRejection(
  466. StandardNormal(), construction_points=construction_points
  467. )
  468. # construction_points not monotonically increasing
  469. construction_points = [1, 1, 1, 1, 1, 1]
  470. with pytest.warns(RuntimeWarning, match=r"33 : starting points not "
  471. r"strictly monotonically "
  472. r"increasing"):
  473. TransformedDensityRejection(
  474. StandardNormal(), construction_points=construction_points
  475. )
  476. # construction_points containing nans
  477. construction_points = [np.nan, np.nan, np.nan]
  478. with pytest.raises(UNURANError, match=r"50 : bad construction "
  479. r"points."):
  480. TransformedDensityRejection(
  481. StandardNormal(), construction_points=construction_points
  482. )
  483. # construction_points out of domain
  484. construction_points = [-10, 10]
  485. with pytest.warns(RuntimeWarning, match=r"50 : starting point out of "
  486. r"domain"):
  487. TransformedDensityRejection(
  488. StandardNormal(), domain=(-3, 3),
  489. construction_points=construction_points
  490. )
  491. @pytest.mark.parametrize("c", [-1., np.nan, np.inf, 0.1, 1.])
  492. def test_bad_c(self, c):
  493. msg = r"`c` must either be -0.5 or 0."
  494. with pytest.raises(ValueError, match=msg):
  495. TransformedDensityRejection(StandardNormal(), c=-1.)
  496. u = [np.linspace(0, 1, num=1000), [], [[]], [np.nan],
  497. [-np.inf, np.nan, np.inf], 0,
  498. [[np.nan, 0.5, 0.1], [0.2, 0.4, np.inf], [-2, 3, 4]]]
  499. @pytest.mark.parametrize("u", u)
  500. def test_ppf_hat(self, u):
  501. # Increase the `max_squeeze_hat_ratio` so the ppf_hat is more
  502. # accurate.
  503. rng = TransformedDensityRejection(StandardNormal(),
  504. max_squeeze_hat_ratio=0.9999)
  505. # Older versions of NumPy throw RuntimeWarnings for comparisons
  506. # with nan.
  507. with warnings.catch_warnings():
  508. msg = "invalid value encountered in "
  509. warnings.filterwarnings("ignore", msg + "greater", RuntimeWarning)
  510. warnings.filterwarnings("ignore", msg + "greater_equal", RuntimeWarning)
  511. warnings.filterwarnings("ignore", msg + "less", RuntimeWarning)
  512. warnings.filterwarnings("ignore", msg + "less_equal", RuntimeWarning)
  513. res = rng.ppf_hat(u)
  514. expected = stats.norm.ppf(u)
  515. assert_allclose(res, expected, rtol=1e-3, atol=1e-5)
  516. assert res.shape == expected.shape
  517. def test_bad_dist(self):
  518. # Empty distribution
  519. class dist:
  520. ...
  521. msg = r"`pdf` required but not found."
  522. with pytest.raises(ValueError, match=msg):
  523. TransformedDensityRejection(dist)
  524. # dPDF not present in dist
  525. class dist:
  526. pdf = lambda x: 1-x*x # noqa: E731
  527. msg = r"`dpdf` required but not found."
  528. with pytest.raises(ValueError, match=msg):
  529. TransformedDensityRejection(dist)
  530. class TestDiscreteAliasUrn:
  531. # DAU fails on these probably because of large domains and small
  532. # computation errors in PMF. Mean/SD match but chi-squared test fails.
  533. basic_fail_dists = {
  534. 'nchypergeom_fisher', # numerical errors on tails
  535. 'nchypergeom_wallenius', # numerical errors on tails
  536. 'randint' # fails on 32-bit ubuntu
  537. }
  538. @pytest.mark.parametrize("distname, params", distdiscrete)
  539. def test_basic(self, distname, params):
  540. if distname in self.basic_fail_dists:
  541. msg = ("DAU fails on these probably because of large domains "
  542. "and small computation errors in PMF.")
  543. pytest.skip(msg)
  544. if not isinstance(distname, str):
  545. dist = distname
  546. else:
  547. dist = getattr(stats, distname)
  548. dist = dist(*params)
  549. domain = dist.support()
  550. if not np.isfinite(domain[1] - domain[0]):
  551. # DAU only works with finite domain. So, skip the distributions
  552. # with infinite tails.
  553. pytest.skip("DAU only works with a finite domain.")
  554. k = np.arange(domain[0], domain[1]+1)
  555. pv = dist.pmf(k)
  556. mv_ex = dist.stats('mv')
  557. rng = DiscreteAliasUrn(dist, random_state=42)
  558. check_discr_samples(rng, pv, mv_ex)
  559. # Can't use bad_pmf_common here as we evaluate PMF early on to avoid
  560. # unhelpful errors from UNU.RAN.
  561. bad_pmf = [
  562. # inf returned
  563. (lambda x: np.inf, ValueError,
  564. r"must contain only finite / non-nan values"),
  565. # nan returned
  566. (lambda x: np.nan, ValueError,
  567. r"must contain only finite / non-nan values"),
  568. # all zeros
  569. (lambda x: 0.0, ValueError,
  570. r"must contain at least one non-zero value"),
  571. # Undefined name inside the function
  572. (lambda x: foo, NameError, # type: ignore[name-defined] # noqa: F821
  573. r"name 'foo' is not defined"),
  574. # Returning wrong type.
  575. (lambda x: [], ValueError,
  576. r"setting an array element with a sequence."),
  577. # probabilities < 0
  578. (lambda x: -x, UNURANError,
  579. r"50 : probability < 0"),
  580. # signature of PMF wrong
  581. (lambda: 1.0, TypeError,
  582. r"takes 0 positional arguments but 1 was given")
  583. ]
  584. @pytest.mark.parametrize("pmf, err, msg", bad_pmf)
  585. def test_bad_pmf(self, pmf, err, msg):
  586. class dist:
  587. pass
  588. dist.pmf = pmf
  589. with pytest.raises(err, match=msg):
  590. DiscreteAliasUrn(dist, domain=(1, 10))
  591. @pytest.mark.parametrize("pv", [[0.18, 0.02, 0.8],
  592. [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]])
  593. def test_sampling_with_pv(self, pv):
  594. pv = np.asarray(pv, dtype=np.float64)
  595. rng = DiscreteAliasUrn(pv, random_state=123)
  596. rng.rvs(100_000)
  597. pv = pv / pv.sum()
  598. variates = np.arange(0, len(pv))
  599. # test if the first few moments match
  600. m_expected = np.average(variates, weights=pv)
  601. v_expected = np.average((variates - m_expected) ** 2, weights=pv)
  602. mv_expected = m_expected, v_expected
  603. check_discr_samples(rng, pv, mv_expected)
  604. @pytest.mark.parametrize("pv, msg", bad_pv_common)
  605. def test_bad_pv(self, pv, msg):
  606. with pytest.raises(ValueError, match=msg):
  607. DiscreteAliasUrn(pv)
  608. # DAU doesn't support infinite tails. So, it should throw an error when
  609. # inf is present in the domain.
  610. inf_domain = [(-np.inf, np.inf), (np.inf, np.inf), (-np.inf, -np.inf),
  611. (0, np.inf), (-np.inf, 0)]
  612. @pytest.mark.parametrize("domain", inf_domain)
  613. def test_inf_domain(self, domain):
  614. with pytest.raises(ValueError, match=r"must be finite"):
  615. DiscreteAliasUrn(stats.binom(10, 0.2), domain=domain)
  616. def test_bad_urn_factor(self):
  617. with pytest.warns(RuntimeWarning, match=r"relative urn size < 1."):
  618. DiscreteAliasUrn([0.5, 0.5], urn_factor=-1)
  619. def test_bad_args(self):
  620. msg = (r"`domain` must be provided when the "
  621. r"probability vector is not available.")
  622. class dist:
  623. def pmf(self, x):
  624. return x
  625. with pytest.raises(ValueError, match=msg):
  626. DiscreteAliasUrn(dist)
  627. def test_gh19359(self):
  628. pv = special.softmax(np.ones((1533,)))
  629. rng = DiscreteAliasUrn(pv, random_state=42)
  630. # check the correctness
  631. check_discr_samples(rng, pv, (1532 / 2, (1532**2 - 1) / 12),
  632. rtol=5e-3)
  633. class TestNumericalInversePolynomial:
  634. # Simple Custom Distribution
  635. class dist0:
  636. def pdf(self, x):
  637. return 3/4 * (1-x*x)
  638. def cdf(self, x):
  639. return 3/4 * (x - x**3/3 + 2/3)
  640. def support(self):
  641. return -1, 1
  642. # Standard Normal Distribution
  643. class dist1:
  644. def pdf(self, x):
  645. return stats.norm._pdf(x / 0.1)
  646. def cdf(self, x):
  647. return stats.norm._cdf(x / 0.1)
  648. # Sin 2 distribution
  649. # / 0.05 + 0.45*(1 +sin(2 Pi x)) if |x| <= 1
  650. # f(x) = <
  651. # \ 0 otherwise
  652. # Taken from UNU.RAN test suite (from file t_pinv.c)
  653. class dist2:
  654. def pdf(self, x):
  655. return 0.05 + 0.45 * (1 + np.sin(2*np.pi*x))
  656. def cdf(self, x):
  657. return (0.05*(x + 1) +
  658. 0.9*(1. + 2.*np.pi*(1 + x) - np.cos(2.*np.pi*x)) /
  659. (4.*np.pi))
  660. def support(self):
  661. return -1, 1
  662. # Sin 10 distribution
  663. # / 0.05 + 0.45*(1 +sin(2 Pi x)) if |x| <= 5
  664. # f(x) = <
  665. # \ 0 otherwise
  666. # Taken from UNU.RAN test suite (from file t_pinv.c)
  667. class dist3:
  668. def pdf(self, x):
  669. return 0.2 * (0.05 + 0.45 * (1 + np.sin(2*np.pi*x)))
  670. def cdf(self, x):
  671. return x/10. + 0.5 + 0.09/(2*np.pi) * (np.cos(10*np.pi) -
  672. np.cos(2*np.pi*x))
  673. def support(self):
  674. return -5, 5
  675. dists = [dist0(), dist1(), dist2(), dist3()]
  676. # exact mean and variance of the distributions in the list dists
  677. mv0 = [0., 4./15.]
  678. mv1 = [0., 0.01]
  679. mv2 = [-0.45/np.pi, 2/3*0.5 - 0.45**2/np.pi**2]
  680. mv3 = [-0.45/np.pi, 0.2 * 250/3 * 0.5 - 0.45**2/np.pi**2]
  681. mvs = [mv0, mv1, mv2, mv3]
  682. @pytest.mark.thread_unsafe(reason="deadlocks for unknown reasons")
  683. @pytest.mark.parametrize("dist, mv_ex",
  684. zip(dists, mvs))
  685. def test_basic(self, dist, mv_ex):
  686. rng = NumericalInversePolynomial(dist, random_state=42)
  687. check_cont_samples(rng, dist, mv_ex)
  688. @pytest.mark.xslow
  689. @pytest.mark.parametrize("distname, params", distcont)
  690. def test_basic_all_scipy_dists(self, distname, params):
  691. very_slow_dists = ['anglit', 'gausshyper', 'kappa4',
  692. 'ksone', 'kstwo', 'levy_l',
  693. 'levy_stable', 'studentized_range',
  694. 'trapezoid', 'triang', 'vonmises']
  695. # for these distributions, some assertions fail due to minor
  696. # numerical differences. They can be avoided either by changing
  697. # the seed or by increasing the u_resolution.
  698. fail_dists = ['chi2', 'fatiguelife', 'gibrat',
  699. 'halfgennorm', 'lognorm', 'ncf',
  700. 'ncx2', 'pareto', 't']
  701. # for these distributions, skip the check for agreement between sample
  702. # moments and true moments. We cannot expect them to pass due to the
  703. # high variance of sample moments.
  704. skip_sample_moment_check = ['rel_breitwigner']
  705. if distname in very_slow_dists:
  706. pytest.skip(f"PINV too slow for {distname}")
  707. if distname in fail_dists:
  708. pytest.skip(f"PINV fails for {distname}")
  709. dist = (getattr(stats, distname)
  710. if isinstance(distname, str)
  711. else distname)
  712. dist = dist(*params)
  713. with warnings.catch_warnings():
  714. warnings.simplefilter("ignore", RuntimeWarning)
  715. rng = NumericalInversePolynomial(dist, random_state=42)
  716. if distname in skip_sample_moment_check:
  717. return
  718. check_cont_samples(rng, dist, [dist.mean(), dist.var()])
  719. @pytest.mark.parametrize("pdf, err, msg", bad_pdfs_common)
  720. def test_bad_pdf(self, pdf, err, msg):
  721. class dist:
  722. pass
  723. dist.pdf = pdf
  724. with pytest.raises(err, match=msg):
  725. NumericalInversePolynomial(dist, domain=[0, 5])
  726. @pytest.mark.parametrize("logpdf, err, msg", bad_logpdfs_common)
  727. def test_bad_logpdf(self, logpdf, err, msg):
  728. class dist:
  729. pass
  730. dist.logpdf = logpdf
  731. with pytest.raises(err, match=msg):
  732. NumericalInversePolynomial(dist, domain=[0, 5])
  733. # test domains with inf + nan in them. need to write a custom test for
  734. # this because not all methods support infinite tails.
  735. @pytest.mark.parametrize("domain, err, msg", inf_nan_domains)
  736. def test_inf_nan_domains(self, domain, err, msg):
  737. with pytest.raises(err, match=msg):
  738. NumericalInversePolynomial(StandardNormal(), domain=domain)
  739. u = [
  740. # test if quantile 0 and 1 return -inf and inf respectively and check
  741. # the correctness of the PPF for equidistant points between 0 and 1.
  742. np.linspace(0, 1, num=10000),
  743. # test the PPF method for empty arrays
  744. [], [[]],
  745. # test if nans and infs return nan result.
  746. [np.nan], [-np.inf, np.nan, np.inf],
  747. # test if a scalar is returned for a scalar input.
  748. 0,
  749. # test for arrays with nans, values greater than 1 and less than 0,
  750. # and some valid values.
  751. [[np.nan, 0.5, 0.1], [0.2, 0.4, np.inf], [-2, 3, 4]]
  752. ]
  753. @pytest.mark.parametrize("u", u)
  754. def test_ppf(self, u):
  755. dist = StandardNormal()
  756. rng = NumericalInversePolynomial(dist, u_resolution=1e-14)
  757. # Older versions of NumPy throw RuntimeWarnings for comparisons
  758. # with nan.
  759. with warnings.catch_warnings():
  760. msg = "invalid value encountered in "
  761. warnings.filterwarnings("ignore", msg + "greater", RuntimeWarning)
  762. warnings.filterwarnings("ignore", msg + "greater_equal", RuntimeWarning)
  763. warnings.filterwarnings("ignore", msg + "less", RuntimeWarning)
  764. warnings.filterwarnings("ignore", msg + "less_equal", RuntimeWarning)
  765. res = rng.ppf(u)
  766. expected = stats.norm.ppf(u)
  767. assert_allclose(res, expected, rtol=1e-11, atol=1e-11)
  768. assert res.shape == expected.shape
  769. x = [np.linspace(-10, 10, num=10000), [], [[]], [np.nan],
  770. [-np.inf, np.nan, np.inf], 0,
  771. [[np.nan, 0.5, 0.1], [0.2, 0.4, np.inf], [-np.inf, 3, 4]]]
  772. @pytest.mark.parametrize("x", x)
  773. def test_cdf(self, x):
  774. dist = StandardNormal()
  775. rng = NumericalInversePolynomial(dist, u_resolution=1e-14)
  776. # Older versions of NumPy throw RuntimeWarnings for comparisons
  777. # with nan.
  778. with warnings.catch_warnings():
  779. msg = "invalid value encountered in "
  780. warnings.filterwarnings("ignore", msg + "greater", RuntimeWarning)
  781. warnings.filterwarnings("ignore", msg + "greater_equal", RuntimeWarning)
  782. warnings.filterwarnings("ignore", msg + "less", RuntimeWarning)
  783. warnings.filterwarnings("ignore", msg + "less_equal", RuntimeWarning)
  784. res = rng.cdf(x)
  785. expected = stats.norm.cdf(x)
  786. assert_allclose(res, expected, rtol=1e-11, atol=1e-11)
  787. assert res.shape == expected.shape
  788. @pytest.mark.slow
  789. def test_u_error(self):
  790. dist = StandardNormal()
  791. rng = NumericalInversePolynomial(dist, u_resolution=1e-10)
  792. max_error, mae = rng.u_error()
  793. assert max_error < 1e-10
  794. assert mae <= max_error
  795. rng = NumericalInversePolynomial(dist, u_resolution=1e-14)
  796. max_error, mae = rng.u_error()
  797. assert max_error < 1e-14
  798. assert mae <= max_error
  799. bad_orders = [1, 4.5, 20, np.inf, np.nan]
  800. bad_u_resolution = [1e-20, 1e-1, np.inf, np.nan]
  801. @pytest.mark.parametrize("order", bad_orders)
  802. def test_bad_orders(self, order):
  803. dist = StandardNormal()
  804. msg = r"`order` must be an integer in the range \[3, 17\]."
  805. with pytest.raises(ValueError, match=msg):
  806. NumericalInversePolynomial(dist, order=order)
  807. @pytest.mark.parametrize("u_resolution", bad_u_resolution)
  808. def test_bad_u_resolution(self, u_resolution):
  809. msg = r"`u_resolution` must be between 1e-15 and 1e-5."
  810. with pytest.raises(ValueError, match=msg):
  811. NumericalInversePolynomial(StandardNormal(),
  812. u_resolution=u_resolution)
  813. def test_bad_args(self):
  814. class BadDist:
  815. def cdf(self, x):
  816. return stats.norm._cdf(x)
  817. dist = BadDist()
  818. msg = r"Either of the methods `pdf` or `logpdf` must be specified"
  819. with pytest.raises(ValueError, match=msg):
  820. rng = NumericalInversePolynomial(dist)
  821. dist = StandardNormal()
  822. rng = NumericalInversePolynomial(dist)
  823. msg = r"`sample_size` must be greater than or equal to 1000."
  824. with pytest.raises(ValueError, match=msg):
  825. rng.u_error(10)
  826. class Distribution:
  827. def pdf(self, x):
  828. return np.exp(-0.5 * x*x)
  829. dist = Distribution()
  830. rng = NumericalInversePolynomial(dist)
  831. msg = r"Exact CDF required but not found."
  832. with pytest.raises(ValueError, match=msg):
  833. rng.u_error()
  834. def test_logpdf_pdf_consistency(self):
  835. # 1. check that PINV works with pdf and logpdf only
  836. # 2. check that generated ppf is the same (up to a small tolerance)
  837. class MyDist:
  838. pass
  839. # create generator from dist with only pdf
  840. dist_pdf = MyDist()
  841. dist_pdf.pdf = lambda x: math.exp(-x*x/2)
  842. rng1 = NumericalInversePolynomial(dist_pdf)
  843. # create dist with only logpdf
  844. dist_logpdf = MyDist()
  845. dist_logpdf.logpdf = lambda x: -x*x/2
  846. rng2 = NumericalInversePolynomial(dist_logpdf)
  847. q = np.linspace(1e-5, 1-1e-5, num=100)
  848. assert_allclose(rng1.ppf(q), rng2.ppf(q))
  849. class TestNumericalInverseHermite:
  850. # / (1 +sin(2 Pi x))/2 if |x| <= 1
  851. # f(x) = <
  852. # \ 0 otherwise
  853. # Taken from UNU.RAN test suite (from file t_hinv.c)
  854. class dist0:
  855. def pdf(self, x):
  856. return 0.5*(1. + np.sin(2.*np.pi*x))
  857. def dpdf(self, x):
  858. return np.pi*np.cos(2.*np.pi*x)
  859. def cdf(self, x):
  860. return (1. + 2.*np.pi*(1 + x) - np.cos(2.*np.pi*x)) / (4.*np.pi)
  861. def support(self):
  862. return -1, 1
  863. # / Max(sin(2 Pi x)),0)Pi/2 if -1 < x <0.5
  864. # f(x) = <
  865. # \ 0 otherwise
  866. # Taken from UNU.RAN test suite (from file t_hinv.c)
  867. class dist1:
  868. def pdf(self, x):
  869. if (x <= -0.5):
  870. return np.sin((2. * np.pi) * x) * 0.5 * np.pi
  871. if (x < 0.):
  872. return 0.
  873. if (x <= 0.5):
  874. return np.sin((2. * np.pi) * x) * 0.5 * np.pi
  875. def dpdf(self, x):
  876. if (x <= -0.5):
  877. return np.cos((2. * np.pi) * x) * np.pi * np.pi
  878. if (x < 0.):
  879. return 0.
  880. if (x <= 0.5):
  881. return np.cos((2. * np.pi) * x) * np.pi * np.pi
  882. def cdf(self, x):
  883. if (x <= -0.5):
  884. return 0.25 * (1 - np.cos((2. * np.pi) * x))
  885. if (x < 0.):
  886. return 0.5
  887. if (x <= 0.5):
  888. return 0.75 - 0.25 * np.cos((2. * np.pi) * x)
  889. def support(self):
  890. return -1, 0.5
  891. dists = [dist0(), dist1()]
  892. # exact mean and variance of the distributions in the list dists
  893. mv0 = [-1/(2*np.pi), 1/3 - 1/(4*np.pi*np.pi)]
  894. mv1 = [-1/4, 3/8-1/(2*np.pi*np.pi) - 1/16]
  895. mvs = [mv0, mv1]
  896. @pytest.mark.parametrize("dist, mv_ex",
  897. zip(dists, mvs))
  898. @pytest.mark.parametrize("order", [3, 5])
  899. @pytest.mark.thread_unsafe
  900. def test_basic(self, dist, mv_ex, order):
  901. rng = NumericalInverseHermite(dist, order=order, random_state=42)
  902. check_cont_samples(rng, dist, mv_ex)
  903. # test domains with inf + nan in them. need to write a custom test for
  904. # this because not all methods support infinite tails.
  905. @pytest.mark.parametrize("domain, err, msg", inf_nan_domains)
  906. def test_inf_nan_domains(self, domain, err, msg):
  907. with pytest.raises(err, match=msg):
  908. NumericalInverseHermite(StandardNormal(), domain=domain)
  909. def basic_test_all_scipy_dists(self, distname, shapes):
  910. slow_dists = {'ksone', 'kstwo', 'levy_stable', 'skewnorm'}
  911. fail_dists = {'beta', 'gausshyper', 'geninvgauss', 'ncf', 'nct',
  912. 'norminvgauss', 'genhyperbolic', 'studentized_range',
  913. 'vonmises', 'kappa4', 'invgauss', 'wald'}
  914. if distname in slow_dists:
  915. pytest.skip("Distribution is too slow")
  916. if distname in fail_dists:
  917. # specific reasons documented in gh-13319
  918. # https://github.com/scipy/scipy/pull/13319#discussion_r626188955
  919. pytest.xfail("Fails - usually due to inaccurate CDF/PDF")
  920. rng = np.random.default_rng(0)
  921. dist = getattr(stats, distname)(*shapes)
  922. fni = NumericalInverseHermite(dist)
  923. x = rng.random(10)
  924. p_tol = np.max(np.abs(dist.ppf(x)-fni.ppf(x))/np.abs(dist.ppf(x)))
  925. u_tol = np.max(np.abs(dist.cdf(fni.ppf(x)) - x))
  926. assert p_tol < 1e-8
  927. assert u_tol < 1e-12
  928. @pytest.mark.filterwarnings('ignore::RuntimeWarning')
  929. @pytest.mark.xslow
  930. @pytest.mark.parametrize(("distname", "shapes"), distcont)
  931. def test_basic_all_scipy_dists(self, distname, shapes):
  932. # if distname == "truncnorm":
  933. # pytest.skip("Tested separately")
  934. self.basic_test_all_scipy_dists(distname, shapes)
  935. @pytest.mark.fail_slow(5)
  936. @pytest.mark.filterwarnings('ignore::RuntimeWarning')
  937. @pytest.mark.parallel_threads_limit(4) # Very slow
  938. def test_basic_truncnorm_gh17155(self):
  939. self.basic_test_all_scipy_dists("truncnorm", (0.1, 2))
  940. def test_input_validation(self):
  941. match = r"`order` must be either 1, 3, or 5."
  942. with pytest.raises(ValueError, match=match):
  943. NumericalInverseHermite(StandardNormal(), order=2)
  944. match = "`cdf` required but not found"
  945. with pytest.raises(ValueError, match=match):
  946. NumericalInverseHermite("norm")
  947. match = "could not convert string to float"
  948. with pytest.raises(ValueError, match=match):
  949. NumericalInverseHermite(StandardNormal(),
  950. u_resolution='ekki')
  951. rngs = [None, 0, np.random.RandomState(0)]
  952. rngs.append(np.random.default_rng(0)) # type: ignore
  953. sizes = [(None, tuple()), (8, (8,)), ((4, 5, 6), (4, 5, 6))]
  954. @pytest.mark.parametrize('rng', rngs)
  955. @pytest.mark.parametrize('size_in, size_out', sizes)
  956. @pytest.mark.thread_unsafe
  957. def test_RVS(self, rng, size_in, size_out):
  958. dist = StandardNormal()
  959. fni = NumericalInverseHermite(dist)
  960. rng2 = deepcopy(rng)
  961. rvs = fni.rvs(size=size_in, random_state=rng)
  962. if size_in is not None:
  963. assert rvs.shape == size_out
  964. if rng2 is not None:
  965. rng2 = check_random_state(rng2)
  966. uniform = rng2.uniform(size=size_in)
  967. rvs2 = stats.norm.ppf(uniform)
  968. assert_allclose(rvs, rvs2)
  969. def test_inaccurate_CDF(self):
  970. # CDF function with inaccurate tail cannot be inverted; see gh-13319
  971. # https://github.com/scipy/scipy/pull/13319#discussion_r626188955
  972. shapes = (2.3098496451481823, 0.6268795430096368)
  973. match = ("98 : one or more intervals very short; possibly due to "
  974. "numerical problems with a pole or very flat tail")
  975. # fails with default tol
  976. with pytest.warns(RuntimeWarning, match=match):
  977. NumericalInverseHermite(stats.beta(*shapes))
  978. # no error with coarser tol
  979. NumericalInverseHermite(stats.beta(*shapes), u_resolution=1e-8)
  980. def test_custom_distribution(self):
  981. dist1 = StandardNormal()
  982. fni1 = NumericalInverseHermite(dist1)
  983. dist2 = stats.norm()
  984. fni2 = NumericalInverseHermite(dist2)
  985. assert_allclose(fni1.rvs(random_state=0), fni2.rvs(random_state=0))
  986. u = [
  987. # check the correctness of the PPF for equidistant points between
  988. # 0.02 and 0.98.
  989. np.linspace(0., 1., num=10000),
  990. # test the PPF method for empty arrays
  991. [], [[]],
  992. # test if nans and infs return nan result.
  993. [np.nan], [-np.inf, np.nan, np.inf],
  994. # test if a scalar is returned for a scalar input.
  995. 0,
  996. # test for arrays with nans, values greater than 1 and less than 0,
  997. # and some valid values.
  998. [[np.nan, 0.5, 0.1], [0.2, 0.4, np.inf], [-2, 3, 4]]
  999. ]
  1000. @pytest.mark.parametrize("u", u)
  1001. def test_ppf(self, u):
  1002. dist = StandardNormal()
  1003. rng = NumericalInverseHermite(dist, u_resolution=1e-12)
  1004. # Older versions of NumPy throw RuntimeWarnings for comparisons
  1005. # with nan.
  1006. with warnings.catch_warnings():
  1007. msg = "invalid value encountered in "
  1008. warnings.filterwarnings("ignore", msg + "greater", RuntimeWarning)
  1009. warnings.filterwarnings("ignore", msg + "greater_equal", RuntimeWarning)
  1010. warnings.filterwarnings("ignore", msg + "less", RuntimeWarning)
  1011. warnings.filterwarnings("ignore", msg + "less_equal", RuntimeWarning)
  1012. res = rng.ppf(u)
  1013. expected = stats.norm.ppf(u)
  1014. assert_allclose(res, expected, rtol=1e-9, atol=3e-10)
  1015. assert res.shape == expected.shape
  1016. @pytest.mark.slow
  1017. def test_u_error(self):
  1018. dist = StandardNormal()
  1019. rng = NumericalInverseHermite(dist, u_resolution=1e-10)
  1020. max_error, mae = rng.u_error()
  1021. assert max_error < 1e-10
  1022. assert mae <= max_error
  1023. with warnings.catch_warnings():
  1024. # ignore warning about u-resolution being too small.
  1025. warnings.simplefilter("ignore", RuntimeWarning)
  1026. rng = NumericalInverseHermite(dist, u_resolution=1e-14)
  1027. max_error, mae = rng.u_error()
  1028. assert max_error < 1e-14
  1029. assert mae <= max_error
  1030. class TestDiscreteGuideTable:
  1031. basic_fail_dists = {
  1032. 'nchypergeom_fisher', # numerical errors on tails
  1033. 'nchypergeom_wallenius', # numerical errors on tails
  1034. 'randint' # fails on 32-bit ubuntu
  1035. }
  1036. def test_guide_factor_gt3_raises_warning(self):
  1037. pv = [0.1, 0.3, 0.6]
  1038. urng = np.random.default_rng()
  1039. with pytest.warns(RuntimeWarning):
  1040. DiscreteGuideTable(pv, random_state=urng, guide_factor=7)
  1041. def test_guide_factor_zero_raises_warning(self):
  1042. pv = [0.1, 0.3, 0.6]
  1043. urng = np.random.default_rng()
  1044. with pytest.warns(RuntimeWarning):
  1045. DiscreteGuideTable(pv, random_state=urng, guide_factor=0)
  1046. def test_negative_guide_factor_raises_warning(self):
  1047. # This occurs from the UNU.RAN wrapper automatically.
  1048. # however it already gives a useful warning
  1049. # Here we just test that a warning is raised.
  1050. pv = [0.1, 0.3, 0.6]
  1051. urng = np.random.default_rng()
  1052. with pytest.warns(RuntimeWarning):
  1053. DiscreteGuideTable(pv, random_state=urng, guide_factor=-1)
  1054. @pytest.mark.parametrize("distname, params", distdiscrete)
  1055. def test_basic(self, distname, params):
  1056. if distname in self.basic_fail_dists:
  1057. msg = ("DGT fails on these probably because of large domains "
  1058. "and small computation errors in PMF.")
  1059. pytest.skip(msg)
  1060. if not isinstance(distname, str):
  1061. dist = distname
  1062. else:
  1063. dist = getattr(stats, distname)
  1064. dist = dist(*params)
  1065. domain = dist.support()
  1066. if not np.isfinite(domain[1] - domain[0]):
  1067. # DGT only works with finite domain. So, skip the distributions
  1068. # with infinite tails.
  1069. pytest.skip("DGT only works with a finite domain.")
  1070. k = np.arange(domain[0], domain[1]+1)
  1071. pv = dist.pmf(k)
  1072. mv_ex = dist.stats('mv')
  1073. rng = DiscreteGuideTable(dist, random_state=42)
  1074. check_discr_samples(rng, pv, mv_ex)
  1075. u = [
  1076. # the correctness of the PPF for equidistant points between 0 and 1.
  1077. np.linspace(0, 1, num=10000),
  1078. # test the PPF method for empty arrays
  1079. [], [[]],
  1080. # test if nans and infs return nan result.
  1081. [np.nan], [-np.inf, np.nan, np.inf],
  1082. # test if a scalar is returned for a scalar input.
  1083. 0,
  1084. # test for arrays with nans, values greater than 1 and less than 0,
  1085. # and some valid values.
  1086. [[np.nan, 0.5, 0.1], [0.2, 0.4, np.inf], [-2, 3, 4]]
  1087. ]
  1088. @pytest.mark.parametrize('u', u)
  1089. def test_ppf(self, u):
  1090. n, p = 4, 0.1
  1091. dist = stats.binom(n, p)
  1092. rng = DiscreteGuideTable(dist, random_state=42)
  1093. # Older versions of NumPy throw RuntimeWarnings for comparisons
  1094. # with nan.
  1095. with warnings.catch_warnings():
  1096. msg = "invalid value encountered in "
  1097. warnings.filterwarnings("ignore", msg + "greater", RuntimeWarning)
  1098. warnings.filterwarnings("ignore", msg + "greater_equal", RuntimeWarning)
  1099. warnings.filterwarnings("ignore", msg + "less", RuntimeWarning)
  1100. warnings.filterwarnings("ignore", msg + "less_equal", RuntimeWarning)
  1101. res = rng.ppf(u)
  1102. expected = stats.binom.ppf(u, n, p)
  1103. assert_equal(res.shape, expected.shape)
  1104. assert_equal(res, expected)
  1105. @pytest.mark.parametrize("pv, msg", bad_pv_common)
  1106. def test_bad_pv(self, pv, msg):
  1107. with pytest.raises(ValueError, match=msg):
  1108. DiscreteGuideTable(pv)
  1109. # DGT doesn't support infinite tails. So, it should throw an error when
  1110. # inf is present in the domain.
  1111. inf_domain = [(-np.inf, np.inf), (np.inf, np.inf), (-np.inf, -np.inf),
  1112. (0, np.inf), (-np.inf, 0)]
  1113. @pytest.mark.parametrize("domain", inf_domain)
  1114. def test_inf_domain(self, domain):
  1115. with pytest.raises(ValueError, match=r"must be finite"):
  1116. DiscreteGuideTable(stats.binom(10, 0.2), domain=domain)
  1117. class TestSimpleRatioUniforms:
  1118. # pdf with piecewise linear function as transformed density
  1119. # with T = -1/sqrt with shift. Taken from UNU.RAN test suite
  1120. # (from file t_srou.c)
  1121. class dist:
  1122. def __init__(self, shift):
  1123. self.shift = shift
  1124. self.mode = shift
  1125. def pdf(self, x):
  1126. x -= self.shift
  1127. y = 1. / (abs(x) + 1.)
  1128. return 0.5 * y * y
  1129. def cdf(self, x):
  1130. x -= self.shift
  1131. if x <= 0.:
  1132. return 0.5 / (1. - x)
  1133. else:
  1134. return 1. - 0.5 / (1. + x)
  1135. dists = [dist(0.), dist(10000.)]
  1136. # exact mean and variance of the distributions in the list dists
  1137. mv1 = [0., np.inf]
  1138. mv2 = [10000., np.inf]
  1139. mvs = [mv1, mv2]
  1140. @pytest.mark.parametrize("dist, mv_ex",
  1141. zip(dists, mvs))
  1142. @pytest.mark.thread_unsafe
  1143. def test_basic(self, dist, mv_ex):
  1144. rng = SimpleRatioUniforms(dist, mode=dist.mode, random_state=42)
  1145. check_cont_samples(rng, dist, mv_ex)
  1146. rng = SimpleRatioUniforms(dist, mode=dist.mode,
  1147. cdf_at_mode=dist.cdf(dist.mode),
  1148. random_state=42)
  1149. check_cont_samples(rng, dist, mv_ex)
  1150. # test domains with inf + nan in them. need to write a custom test for
  1151. # this because not all methods support infinite tails.
  1152. @pytest.mark.parametrize("domain, err, msg", inf_nan_domains)
  1153. def test_inf_nan_domains(self, domain, err, msg):
  1154. with pytest.raises(err, match=msg):
  1155. SimpleRatioUniforms(StandardNormal(), domain=domain)
  1156. def test_bad_args(self):
  1157. # pdf_area < 0
  1158. with pytest.raises(ValueError, match=r"`pdf_area` must be > 0"):
  1159. SimpleRatioUniforms(StandardNormal(), mode=0, pdf_area=-1)
  1160. class TestRatioUniforms:
  1161. def test_rv_generation(self):
  1162. # use KS test to check distribution of rvs
  1163. # normal distribution
  1164. f = stats.norm.pdf
  1165. v = np.sqrt(f(np.sqrt(2))) * np.sqrt(2)
  1166. u = np.sqrt(f(0))
  1167. gen = RatioUniforms(f, umax=u, vmin=-v, vmax=v, random_state=12345)
  1168. assert_equal(stats.kstest(gen.rvs(2500), 'norm')[1] > 0.25, True)
  1169. # exponential distribution
  1170. gen = RatioUniforms(lambda x: np.exp(-x), umax=1,
  1171. vmin=0, vmax=2*np.exp(-1), random_state=12345)
  1172. assert_equal(stats.kstest(gen.rvs(1000), 'expon')[1] > 0.25, True)
  1173. def test_shape(self):
  1174. # test shape of return value depending on size parameter
  1175. f = stats.norm.pdf
  1176. v = np.sqrt(f(np.sqrt(2))) * np.sqrt(2)
  1177. u = np.sqrt(f(0))
  1178. gen1 = RatioUniforms(f, umax=u, vmin=-v, vmax=v, random_state=1234)
  1179. gen2 = RatioUniforms(f, umax=u, vmin=-v, vmax=v, random_state=1234)
  1180. gen3 = RatioUniforms(f, umax=u, vmin=-v, vmax=v, random_state=1234)
  1181. r1, r2, r3 = gen1.rvs(3), gen2.rvs((3,)), gen3.rvs((3, 1))
  1182. assert_equal(r1, r2)
  1183. assert_equal(r2, r3.flatten())
  1184. assert_equal(r1.shape, (3,))
  1185. assert_equal(r3.shape, (3, 1))
  1186. gen4 = RatioUniforms(f, umax=u, vmin=-v, vmax=v, random_state=12)
  1187. gen5 = RatioUniforms(f, umax=u, vmin=-v, vmax=v, random_state=12)
  1188. r4, r5 = gen4.rvs(size=(3, 3, 3)), gen5.rvs(size=27)
  1189. assert_equal(r4.flatten(), r5)
  1190. assert_equal(r4.shape, (3, 3, 3))
  1191. gen6 = RatioUniforms(f, umax=u, vmin=-v, vmax=v, random_state=1234)
  1192. gen7 = RatioUniforms(f, umax=u, vmin=-v, vmax=v, random_state=1234)
  1193. gen8 = RatioUniforms(f, umax=u, vmin=-v, vmax=v, random_state=1234)
  1194. r6, r7, r8 = gen6.rvs(), gen7.rvs(1), gen8.rvs((1,))
  1195. assert_equal(r6, r7)
  1196. assert_equal(r7, r8)
  1197. def test_random_state(self):
  1198. f = stats.norm.pdf
  1199. v = np.sqrt(f(np.sqrt(2))) * np.sqrt(2)
  1200. umax = np.sqrt(f(0))
  1201. gen1 = RatioUniforms(f, umax=umax, vmin=-v, vmax=v, random_state=1234)
  1202. r1 = gen1.rvs(10)
  1203. rng = np.random.RandomState(1234)
  1204. gen2 = RatioUniforms(f, umax=umax, vmin=-v, vmax=v, random_state=rng)
  1205. r2 = gen2.rvs(10)
  1206. assert_equal(r1, r2)
  1207. def test_exceptions(self):
  1208. f = stats.norm.pdf
  1209. # need vmin < vmax
  1210. with assert_raises(ValueError, match="vmin must be smaller than vmax"):
  1211. RatioUniforms(pdf=f, umax=1, vmin=3, vmax=1)
  1212. with assert_raises(ValueError, match="vmin must be smaller than vmax"):
  1213. RatioUniforms(pdf=f, umax=1, vmin=1, vmax=1)
  1214. # need umax > 0
  1215. with assert_raises(ValueError, match="umax must be positive"):
  1216. RatioUniforms(pdf=f, umax=-1, vmin=1, vmax=3)
  1217. with assert_raises(ValueError, match="umax must be positive"):
  1218. RatioUniforms(pdf=f, umax=0, vmin=1, vmax=3)