_distn_infrastructure.py 150 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228
  1. #
  2. # Author: Travis Oliphant 2002-2011 with contributions from
  3. # SciPy Developers 2004-2011
  4. #
  5. from scipy._lib._util import getfullargspec_no_self as _getfullargspec
  6. import sys
  7. import keyword
  8. import re
  9. import types
  10. import warnings
  11. from itertools import zip_longest
  12. from scipy._lib import doccer
  13. from ._distr_params import distcont, distdiscrete
  14. from scipy._lib._util import check_random_state
  15. import scipy._lib.array_api_extra as xpx
  16. from scipy.special import comb, entr
  17. # for root finding for continuous distribution ppf, and maximum likelihood
  18. # estimation
  19. from scipy import optimize
  20. # for functions of continuous distributions (e.g. moments, entropy, cdf)
  21. from scipy import integrate
  22. # to approximate the pdf of a continuous distribution given its cdf
  23. from scipy.stats._finite_differences import _derivative
  24. # for scipy.stats.entropy. Attempts to import just that function or file
  25. # have cause import problems
  26. from scipy import stats
  27. from numpy import (arange, putmask, ones, shape, ndarray, zeros, floor,
  28. logical_and, log, sqrt, place, argmax, vectorize, asarray,
  29. nan, inf, isinf, empty)
  30. import numpy as np
  31. from ._constants import _XMAX, _LOGXMAX
  32. from ._censored_data import CensoredData
  33. from scipy.stats._warnings_errors import FitError
  34. # These are the docstring parts used for substitution in specific
  35. # distribution docstrings
  36. docheaders = {'methods': """\nMethods\n-------\n""",
  37. 'notes': """\nNotes\n-----\n""",
  38. 'examples': """\nExamples\n--------\n"""}
  39. _doc_rvs = """\
  40. rvs(%(shapes)s, loc=0, scale=1, size=1, random_state=None)
  41. Random variates.
  42. """
  43. _doc_pdf = """\
  44. pdf(x, %(shapes)s, loc=0, scale=1)
  45. Probability density function.
  46. """
  47. _doc_logpdf = """\
  48. logpdf(x, %(shapes)s, loc=0, scale=1)
  49. Log of the probability density function.
  50. """
  51. _doc_pmf = """\
  52. pmf(k, %(shapes)s, loc=0, scale=1)
  53. Probability mass function.
  54. """
  55. _doc_logpmf = """\
  56. logpmf(k, %(shapes)s, loc=0, scale=1)
  57. Log of the probability mass function.
  58. """
  59. _doc_cdf = """\
  60. cdf(x, %(shapes)s, loc=0, scale=1)
  61. Cumulative distribution function.
  62. """
  63. _doc_logcdf = """\
  64. logcdf(x, %(shapes)s, loc=0, scale=1)
  65. Log of the cumulative distribution function.
  66. """
  67. _doc_sf = """\
  68. sf(x, %(shapes)s, loc=0, scale=1)
  69. Survival function (also defined as ``1 - cdf``, but `sf` is sometimes more accurate).
  70. """ # noqa: E501
  71. _doc_logsf = """\
  72. logsf(x, %(shapes)s, loc=0, scale=1)
  73. Log of the survival function.
  74. """
  75. _doc_ppf = """\
  76. ppf(q, %(shapes)s, loc=0, scale=1)
  77. Percent point function (inverse of ``cdf`` --- percentiles).
  78. """
  79. _doc_isf = """\
  80. isf(q, %(shapes)s, loc=0, scale=1)
  81. Inverse survival function (inverse of ``sf``).
  82. """
  83. _doc_moment = """\
  84. moment(order, %(shapes)s, loc=0, scale=1)
  85. Non-central moment of the specified order.
  86. """
  87. _doc_stats = """\
  88. stats(%(shapes)s, loc=0, scale=1, moments='mv')
  89. Mean('m'), variance('v'), skew('s'), and/or kurtosis('k').
  90. """
  91. _doc_entropy = """\
  92. entropy(%(shapes)s, loc=0, scale=1)
  93. (Differential) entropy of the RV.
  94. """
  95. _doc_fit = """\
  96. fit(data)
  97. Parameter estimates for generic data.
  98. See `scipy.stats.rv_continuous.fit <https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.rv_continuous.fit.html#scipy.stats.rv_continuous.fit>`__ for detailed documentation of the
  99. keyword arguments.
  100. """ # noqa: E501
  101. _doc_expect = """\
  102. expect(func, args=(%(shapes_)s), loc=0, scale=1, lb=None, ub=None, conditional=False, **kwds)
  103. Expected value of a function (of one argument) with respect to the distribution.
  104. """ # noqa: E501
  105. _doc_expect_discrete = """\
  106. expect(func, args=(%(shapes_)s), loc=0, lb=None, ub=None, conditional=False)
  107. Expected value of a function (of one argument) with respect to the distribution.
  108. """
  109. _doc_median = """\
  110. median(%(shapes)s, loc=0, scale=1)
  111. Median of the distribution.
  112. """
  113. _doc_mean = """\
  114. mean(%(shapes)s, loc=0, scale=1)
  115. Mean of the distribution.
  116. """
  117. _doc_var = """\
  118. var(%(shapes)s, loc=0, scale=1)
  119. Variance of the distribution.
  120. """
  121. _doc_std = """\
  122. std(%(shapes)s, loc=0, scale=1)
  123. Standard deviation of the distribution.
  124. """
  125. _doc_interval = """\
  126. interval(confidence, %(shapes)s, loc=0, scale=1)
  127. Confidence interval with equal areas around the median.
  128. """
  129. _doc_allmethods = ''.join([docheaders['methods'], _doc_rvs, _doc_pdf,
  130. _doc_logpdf, _doc_cdf, _doc_logcdf, _doc_sf,
  131. _doc_logsf, _doc_ppf, _doc_isf, _doc_moment,
  132. _doc_stats, _doc_entropy, _doc_fit,
  133. _doc_expect, _doc_median,
  134. _doc_mean, _doc_var, _doc_std, _doc_interval])
  135. _doc_default_longsummary = """\
  136. As an instance of the `rv_continuous` class, `%(name)s` object inherits from it
  137. a collection of generic methods (see below for the full list),
  138. and completes them with details specific for this particular distribution.
  139. """
  140. _doc_default_frozen_note = """
  141. Alternatively, the object may be called (as a function) to fix the shape,
  142. location, and scale parameters returning a "frozen" continuous RV object:
  143. rv = %(name)s(%(shapes)s, loc=0, scale=1)
  144. - Frozen RV object with the same methods but holding the given shape,
  145. location, and scale fixed.
  146. """
  147. _doc_default_example = """\
  148. Examples
  149. --------
  150. >>> import numpy as np
  151. >>> from scipy.stats import %(name)s
  152. >>> import matplotlib.pyplot as plt
  153. >>> fig, ax = plt.subplots(1, 1)
  154. Get the support:
  155. %(set_vals_stmt)s
  156. >>> lb, ub = %(name)s.support(%(shapes)s)
  157. Calculate the first four moments:
  158. >>> mean, var, skew, kurt = %(name)s.stats(%(shapes)s, moments='mvsk')
  159. Display the probability density function (``pdf``):
  160. >>> x = np.linspace(%(name)s.ppf(0.01, %(shapes)s),
  161. ... %(name)s.ppf(0.99, %(shapes)s), 100)
  162. >>> ax.plot(x, %(name)s.pdf(x, %(shapes)s),
  163. ... 'r-', lw=5, alpha=0.6, label='%(name)s pdf')
  164. Alternatively, the distribution object can be called (as a function)
  165. to fix the shape, location and scale parameters. This returns a "frozen"
  166. RV object holding the given parameters fixed.
  167. Freeze the distribution and display the frozen ``pdf``:
  168. >>> rv = %(name)s(%(shapes)s)
  169. >>> ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf')
  170. Check accuracy of ``cdf`` and ``ppf``:
  171. >>> vals = %(name)s.ppf([0.001, 0.5, 0.999], %(shapes)s)
  172. >>> np.allclose([0.001, 0.5, 0.999], %(name)s.cdf(vals, %(shapes)s))
  173. True
  174. Generate random numbers:
  175. >>> r = %(name)s.rvs(%(shapes)s, size=1000)
  176. And compare the histogram:
  177. >>> ax.hist(r, density=True, bins='auto', histtype='stepfilled', alpha=0.2)
  178. >>> ax.set_xlim([x[0], x[-1]])
  179. >>> ax.legend(loc='best', frameon=False)
  180. >>> plt.show()
  181. """
  182. _doc_default_locscale = """\
  183. The probability density above is defined in the "standardized" form. To shift
  184. and/or scale the distribution use the ``loc`` and ``scale`` parameters.
  185. Specifically, ``%(name)s.pdf(x, %(shapes)s, loc, scale)`` is identically
  186. equivalent to ``%(name)s.pdf(y, %(shapes)s) / scale`` with
  187. ``y = (x - loc) / scale``. Note that shifting the location of a distribution
  188. does not make it a "noncentral" distribution; noncentral generalizations of
  189. some distributions are available in separate classes.
  190. """
  191. _doc_default = ''.join([_doc_default_longsummary,
  192. _doc_allmethods,
  193. '\n',
  194. _doc_default_example])
  195. _doc_default_before_notes = ''.join([_doc_default_longsummary,
  196. _doc_allmethods])
  197. docdict = {
  198. 'rvs': _doc_rvs,
  199. 'pdf': _doc_pdf,
  200. 'logpdf': _doc_logpdf,
  201. 'cdf': _doc_cdf,
  202. 'logcdf': _doc_logcdf,
  203. 'sf': _doc_sf,
  204. 'logsf': _doc_logsf,
  205. 'ppf': _doc_ppf,
  206. 'isf': _doc_isf,
  207. 'stats': _doc_stats,
  208. 'entropy': _doc_entropy,
  209. 'fit': _doc_fit,
  210. 'moment': _doc_moment,
  211. 'expect': _doc_expect,
  212. 'interval': _doc_interval,
  213. 'mean': _doc_mean,
  214. 'std': _doc_std,
  215. 'var': _doc_var,
  216. 'median': _doc_median,
  217. 'allmethods': _doc_allmethods,
  218. 'longsummary': _doc_default_longsummary,
  219. 'frozennote': _doc_default_frozen_note,
  220. 'example': _doc_default_example,
  221. 'default': _doc_default,
  222. 'before_notes': _doc_default_before_notes,
  223. 'after_notes': _doc_default_locscale
  224. }
  225. # Reuse common content between continuous and discrete docs, change some
  226. # minor bits.
  227. docdict_discrete = docdict.copy()
  228. docdict_discrete['pmf'] = _doc_pmf
  229. docdict_discrete['logpmf'] = _doc_logpmf
  230. docdict_discrete['expect'] = _doc_expect_discrete
  231. _doc_disc_methods = ['rvs', 'pmf', 'logpmf', 'cdf', 'logcdf', 'sf', 'logsf',
  232. 'ppf', 'isf', 'stats', 'entropy', 'expect', 'median',
  233. 'mean', 'var', 'std', 'interval']
  234. for obj in _doc_disc_methods:
  235. docdict_discrete[obj] = docdict_discrete[obj].replace(', scale=1', '')
  236. _doc_disc_methods_err_varname = ['cdf', 'logcdf', 'sf', 'logsf']
  237. for obj in _doc_disc_methods_err_varname:
  238. docdict_discrete[obj] = docdict_discrete[obj].replace('(x, ', '(k, ')
  239. docdict_discrete.pop('pdf')
  240. docdict_discrete.pop('logpdf')
  241. _doc_allmethods = ''.join([docdict_discrete[obj] for obj in _doc_disc_methods])
  242. docdict_discrete['allmethods'] = docheaders['methods'] + _doc_allmethods
  243. docdict_discrete['longsummary'] = _doc_default_longsummary.replace(
  244. 'rv_continuous', 'rv_discrete')
  245. _doc_default_frozen_note = """
  246. Alternatively, the object may be called (as a function) to fix the shape and
  247. location parameters returning a "frozen" discrete RV object:
  248. rv = %(name)s(%(shapes)s, loc=0)
  249. - Frozen RV object with the same methods but holding the given shape and
  250. location fixed.
  251. """
  252. docdict_discrete['frozennote'] = _doc_default_frozen_note
  253. _doc_default_discrete_example = """\
  254. Examples
  255. --------
  256. >>> import numpy as np
  257. >>> from scipy.stats import %(name)s
  258. >>> import matplotlib.pyplot as plt
  259. >>> fig, ax = plt.subplots(1, 1)
  260. Get the support:
  261. %(set_vals_stmt)s
  262. >>> lb, ub = %(name)s.support(%(shapes)s)
  263. Calculate the first four moments:
  264. >>> mean, var, skew, kurt = %(name)s.stats(%(shapes)s, moments='mvsk')
  265. Display the probability mass function (``pmf``):
  266. >>> x = np.arange(%(name)s.ppf(0.01, %(shapes)s),
  267. ... %(name)s.ppf(0.99, %(shapes)s))
  268. >>> ax.plot(x, %(name)s.pmf(x, %(shapes)s), 'bo', ms=8, label='%(name)s pmf')
  269. >>> ax.vlines(x, 0, %(name)s.pmf(x, %(shapes)s), colors='b', lw=5, alpha=0.5)
  270. Alternatively, the distribution object can be called (as a function)
  271. to fix the shape and location. This returns a "frozen" RV object holding
  272. the given parameters fixed.
  273. Freeze the distribution and display the frozen ``pmf``:
  274. >>> rv = %(name)s(%(shapes)s)
  275. >>> ax.vlines(x, 0, rv.pmf(x), colors='k', linestyles='-', lw=1,
  276. ... label='frozen pmf')
  277. >>> ax.legend(loc='best', frameon=False)
  278. >>> plt.show()
  279. Check accuracy of ``cdf`` and ``ppf``:
  280. >>> prob = %(name)s.cdf(x, %(shapes)s)
  281. >>> np.allclose(x, %(name)s.ppf(prob, %(shapes)s))
  282. True
  283. Generate random numbers:
  284. >>> r = %(name)s.rvs(%(shapes)s, size=1000)
  285. """
  286. _doc_default_discrete_locscale = """\
  287. The probability mass function above is defined in the "standardized" form.
  288. To shift distribution use the ``loc`` parameter.
  289. Specifically, ``%(name)s.pmf(k, %(shapes)s, loc)`` is identically
  290. equivalent to ``%(name)s.pmf(k - loc, %(shapes)s)``.
  291. """
  292. docdict_discrete['example'] = _doc_default_discrete_example
  293. docdict_discrete['after_notes'] = _doc_default_discrete_locscale
  294. _doc_default_before_notes = ''.join([docdict_discrete['longsummary'],
  295. docdict_discrete['allmethods']])
  296. docdict_discrete['before_notes'] = _doc_default_before_notes
  297. _doc_default_disc = ''.join([docdict_discrete['longsummary'],
  298. docdict_discrete['allmethods'],
  299. docdict_discrete['frozennote'],
  300. docdict_discrete['example']])
  301. docdict_discrete['default'] = _doc_default_disc
  302. # clean up all the separate docstring elements, we do not need them anymore
  303. for obj in [s for s in dir() if s.startswith('_doc_')]:
  304. exec('del ' + obj)
  305. del obj
  306. def _moment(data, n, mu=None):
  307. if mu is None:
  308. mu = data.mean()
  309. return ((data - mu)**n).mean()
  310. def _moment_from_stats(n, mu, mu2, g1, g2, moment_func, args):
  311. if (n == 0):
  312. return 1.0
  313. elif (n == 1):
  314. if mu is None:
  315. val = moment_func(1, *args)
  316. else:
  317. val = mu
  318. elif (n == 2):
  319. if mu2 is None or mu is None:
  320. val = moment_func(2, *args)
  321. else:
  322. val = mu2 + mu*mu
  323. elif (n == 3):
  324. if g1 is None or mu2 is None or mu is None:
  325. val = moment_func(3, *args)
  326. else:
  327. mu3 = g1 * np.power(mu2, 1.5) # 3rd central moment
  328. val = mu3+3*mu*mu2+mu*mu*mu # 3rd non-central moment
  329. elif (n == 4):
  330. if g1 is None or g2 is None or mu2 is None or mu is None:
  331. val = moment_func(4, *args)
  332. else:
  333. mu4 = (g2+3.0)*(mu2**2.0) # 4th central moment
  334. mu3 = g1*np.power(mu2, 1.5) # 3rd central moment
  335. val = mu4+4*mu*mu3+6*mu*mu*mu2+mu*mu*mu*mu
  336. else:
  337. val = moment_func(n, *args)
  338. return val
  339. def _skew(data):
  340. """
  341. skew is third central moment / variance**(1.5)
  342. """
  343. data = np.ravel(data)
  344. mu = data.mean()
  345. m2 = ((data - mu)**2).mean()
  346. m3 = ((data - mu)**3).mean()
  347. return m3 / np.power(m2, 1.5)
  348. def _kurtosis(data):
  349. """Fisher's excess kurtosis is fourth central moment / variance**2 - 3."""
  350. data = np.ravel(data)
  351. mu = data.mean()
  352. m2 = ((data - mu)**2).mean()
  353. m4 = ((data - mu)**4).mean()
  354. return m4 / m2**2 - 3
  355. def _vectorize_rvs_over_shapes(_rvs1):
  356. """Decorator that vectorizes _rvs method to work on ndarray shapes"""
  357. # _rvs1 must be a _function_ that accepts _scalar_ args as positional
  358. # arguments, `size` and `random_state` as keyword arguments.
  359. # _rvs1 must return a random variate array with shape `size`. If `size` is
  360. # None, _rvs1 must return a scalar.
  361. # When applied to _rvs1, this decorator broadcasts ndarray args
  362. # and loops over them, calling _rvs1 for each set of scalar args.
  363. # For usage example, see _nchypergeom_gen
  364. def _rvs(*args, size, random_state):
  365. _rvs1_size, _rvs1_indices = _check_shape(args[0].shape, size)
  366. size = np.array(size)
  367. _rvs1_size = np.array(_rvs1_size)
  368. _rvs1_indices = np.array(_rvs1_indices)
  369. if np.all(_rvs1_indices): # all args are scalars
  370. return _rvs1(*args, size, random_state)
  371. out = np.empty(size)
  372. # out.shape can mix dimensions associated with arg_shape and _rvs1_size
  373. # Sort them to arg_shape + _rvs1_size for easy indexing of dimensions
  374. # corresponding with the different sets of scalar args
  375. j0 = np.arange(out.ndim)
  376. j1 = np.hstack((j0[~_rvs1_indices], j0[_rvs1_indices]))
  377. out = np.moveaxis(out, j1, j0)
  378. for i in np.ndindex(*size[~_rvs1_indices]):
  379. # arg can be squeezed because singleton dimensions will be
  380. # associated with _rvs1_size, not arg_shape per _check_shape
  381. out[i] = _rvs1(*[np.squeeze(arg)[i] for arg in args],
  382. _rvs1_size, random_state)
  383. return np.moveaxis(out, j0, j1) # move axes back before returning
  384. return _rvs
  385. def _fit_determine_optimizer(optimizer):
  386. if not callable(optimizer) and isinstance(optimizer, str):
  387. if not optimizer.startswith('fmin_'):
  388. optimizer = "fmin_"+optimizer
  389. if optimizer == 'fmin_':
  390. optimizer = 'fmin'
  391. try:
  392. optimizer = getattr(optimize, optimizer)
  393. except AttributeError as e:
  394. raise ValueError(f"{optimizer} is not a valid optimizer") from e
  395. return optimizer
  396. def _isintegral(x):
  397. return x == np.round(x)
  398. def _sum_finite(x):
  399. """
  400. For a 1D array x, return a tuple containing the sum of the
  401. finite values of x and the number of nonfinite values.
  402. This is a utility function used when evaluating the negative
  403. loglikelihood for a distribution and an array of samples.
  404. Examples
  405. --------
  406. >>> import numpy as np
  407. >>> from scipy.stats._distn_infrastructure import _sum_finite
  408. >>> tot, nbad = _sum_finite(np.array([-2, -np.inf, 5, 1]))
  409. >>> tot
  410. 4.0
  411. >>> nbad
  412. 1
  413. """
  414. finite_x = np.isfinite(x)
  415. bad_count = finite_x.size - np.count_nonzero(finite_x)
  416. return np.sum(x[finite_x]), bad_count
  417. # Frozen RV class
  418. class rv_frozen:
  419. # generic type compatibility with scipy-stubs
  420. __class_getitem__ = classmethod(types.GenericAlias)
  421. def __init__(self, dist, *args, **kwds):
  422. self.args = args
  423. self.kwds = kwds
  424. # create a new instance
  425. self.dist = dist.__class__(**dist._updated_ctor_param())
  426. shapes, _, _ = self.dist._parse_args(*args, **kwds)
  427. self.a, self.b = self.dist._get_support(*shapes)
  428. @property
  429. def random_state(self):
  430. return self.dist._random_state
  431. @random_state.setter
  432. def random_state(self, seed):
  433. self.dist._random_state = check_random_state(seed)
  434. def cdf(self, x):
  435. return self.dist.cdf(x, *self.args, **self.kwds)
  436. def logcdf(self, x):
  437. return self.dist.logcdf(x, *self.args, **self.kwds)
  438. def ppf(self, q):
  439. return self.dist.ppf(q, *self.args, **self.kwds)
  440. def isf(self, q):
  441. return self.dist.isf(q, *self.args, **self.kwds)
  442. def rvs(self, size=None, random_state=None):
  443. kwds = self.kwds.copy()
  444. kwds.update({'size': size, 'random_state': random_state})
  445. return self.dist.rvs(*self.args, **kwds)
  446. def sf(self, x):
  447. return self.dist.sf(x, *self.args, **self.kwds)
  448. def logsf(self, x):
  449. return self.dist.logsf(x, *self.args, **self.kwds)
  450. def stats(self, moments='mv'):
  451. kwds = self.kwds.copy()
  452. kwds.update({'moments': moments})
  453. return self.dist.stats(*self.args, **kwds)
  454. def median(self):
  455. return self.dist.median(*self.args, **self.kwds)
  456. def mean(self):
  457. return self.dist.mean(*self.args, **self.kwds)
  458. def var(self):
  459. return self.dist.var(*self.args, **self.kwds)
  460. def std(self):
  461. return self.dist.std(*self.args, **self.kwds)
  462. def moment(self, order=None):
  463. return self.dist.moment(order, *self.args, **self.kwds)
  464. def entropy(self):
  465. return self.dist.entropy(*self.args, **self.kwds)
  466. def interval(self, confidence=None):
  467. return self.dist.interval(confidence, *self.args, **self.kwds)
  468. def expect(self, func=None, lb=None, ub=None, conditional=False, **kwds):
  469. # expect method only accepts shape parameters as positional args
  470. # hence convert self.args, self.kwds, also loc/scale
  471. # See the .expect method docstrings for the meaning of
  472. # other parameters.
  473. a, loc, scale = self.dist._parse_args(*self.args, **self.kwds)
  474. if isinstance(self.dist, rv_discrete):
  475. return self.dist.expect(func, a, loc, lb, ub, conditional, **kwds)
  476. else:
  477. return self.dist.expect(func, a, loc, scale, lb, ub,
  478. conditional, **kwds)
  479. def support(self):
  480. return self.dist.support(*self.args, **self.kwds)
  481. class rv_discrete_frozen(rv_frozen):
  482. def pmf(self, k):
  483. return self.dist.pmf(k, *self.args, **self.kwds)
  484. def logpmf(self, k): # No error
  485. return self.dist.logpmf(k, *self.args, **self.kwds)
  486. class rv_continuous_frozen(rv_frozen):
  487. def pdf(self, x):
  488. return self.dist.pdf(x, *self.args, **self.kwds)
  489. def logpdf(self, x):
  490. return self.dist.logpdf(x, *self.args, **self.kwds)
  491. def argsreduce(cond, *args):
  492. """Clean arguments to:
  493. 1. Ensure all arguments are iterable (arrays of dimension at least one
  494. 2. If cond != True and size > 1, ravel(args[i]) where ravel(condition) is
  495. True, in 1D.
  496. Return list of processed arguments.
  497. Examples
  498. --------
  499. >>> import numpy as np
  500. >>> from scipy.stats._distn_infrastructure import argsreduce
  501. >>> rng = np.random.default_rng()
  502. >>> A = rng.random((4, 5))
  503. >>> B = 2
  504. >>> C = rng.random((1, 5))
  505. >>> cond = np.ones(A.shape)
  506. >>> [A1, B1, C1] = argsreduce(cond, A, B, C)
  507. >>> A1.shape
  508. (4, 5)
  509. >>> B1.shape
  510. (1,)
  511. >>> C1.shape
  512. (1, 5)
  513. >>> cond[2,:] = 0
  514. >>> [A1, B1, C1] = argsreduce(cond, A, B, C)
  515. >>> A1.shape
  516. (15,)
  517. >>> B1.shape
  518. (1,)
  519. >>> C1.shape
  520. (15,)
  521. """
  522. # some distributions assume arguments are iterable.
  523. newargs = np.atleast_1d(*args)
  524. # np.atleast_1d returns an array if only one argument, or a list of arrays
  525. # if more than one argument.
  526. if not isinstance(newargs, (list | tuple)):
  527. newargs = (newargs,)
  528. if np.all(cond):
  529. # broadcast arrays with cond
  530. *newargs, cond = np.broadcast_arrays(*newargs, cond)
  531. return [arg.ravel() for arg in newargs]
  532. s = cond.shape
  533. # np.extract returns flattened arrays, which are not broadcastable together
  534. # unless they are either the same size or size == 1.
  535. return [(arg if np.size(arg) == 1
  536. else np.extract(cond, np.broadcast_to(arg, s)))
  537. for arg in newargs]
  538. parse_arg_template = """
  539. def _parse_args(self, %(shape_arg_str)s %(locscale_in)s):
  540. return (%(shape_arg_str)s), %(locscale_out)s
  541. def _parse_args_rvs(self, %(shape_arg_str)s %(locscale_in)s, size=None):
  542. return self._argcheck_rvs(%(shape_arg_str)s %(locscale_out)s, size=size)
  543. def _parse_args_stats(self, %(shape_arg_str)s %(locscale_in)s, moments='mv'):
  544. return (%(shape_arg_str)s), %(locscale_out)s, moments
  545. """
  546. class rv_generic:
  547. """Class which encapsulates common functionality between rv_discrete
  548. and rv_continuous.
  549. """
  550. def __init__(self, seed=None):
  551. super().__init__()
  552. # figure out if _stats signature has 'moments' keyword
  553. sig = _getfullargspec(self._stats)
  554. self._stats_has_moments = ((sig.varkw is not None) or
  555. ('moments' in sig.args) or
  556. ('moments' in sig.kwonlyargs))
  557. self._random_state = check_random_state(seed)
  558. @property
  559. def random_state(self):
  560. """Get or set the generator object for generating random variates.
  561. If `random_state` is None (or `np.random`), the
  562. `numpy.random.RandomState` singleton is used.
  563. If `random_state` is an int, a new ``RandomState`` instance is used,
  564. seeded with `random_state`.
  565. If `random_state` is already a ``Generator`` or ``RandomState``
  566. instance, that instance is used.
  567. """
  568. return self._random_state
  569. @random_state.setter
  570. def random_state(self, seed):
  571. self._random_state = check_random_state(seed)
  572. def __setstate__(self, state):
  573. try:
  574. self.__dict__.update(state)
  575. # attaches the dynamically created methods on each instance.
  576. # if a subclass overrides rv_generic.__setstate__, or implements
  577. # it's own _attach_methods, then it must make sure that
  578. # _attach_argparser_methods is called.
  579. self._attach_methods()
  580. except ValueError:
  581. # reconstitute an old pickle scipy<1.6, that contains
  582. # (_ctor_param, random_state) as state
  583. self._ctor_param = state[0]
  584. self._random_state = state[1]
  585. self.__init__()
  586. def _attach_methods(self):
  587. """Attaches dynamically created methods to the rv_* instance.
  588. This method must be overridden by subclasses, and must itself call
  589. _attach_argparser_methods. This method is called in __init__ in
  590. subclasses, and in __setstate__
  591. """
  592. raise NotImplementedError
  593. def _attach_argparser_methods(self):
  594. """
  595. Generates the argument-parsing functions dynamically and attaches
  596. them to the instance.
  597. Should be called from `_attach_methods`, typically in __init__ and
  598. during unpickling (__setstate__)
  599. """
  600. ns = {}
  601. exec(self._parse_arg_template, ns)
  602. # NB: attach to the instance, not class
  603. for name in ['_parse_args', '_parse_args_stats', '_parse_args_rvs']:
  604. setattr(self, name, types.MethodType(ns[name], self))
  605. def _construct_argparser(
  606. self, meths_to_inspect, locscale_in, locscale_out):
  607. """Construct the parser string for the shape arguments.
  608. This method should be called in __init__ of a class for each
  609. distribution. It creates the `_parse_arg_template` attribute that is
  610. then used by `_attach_argparser_methods` to dynamically create and
  611. attach the `_parse_args`, `_parse_args_stats`, `_parse_args_rvs`
  612. methods to the instance.
  613. If self.shapes is a non-empty string, interprets it as a
  614. comma-separated list of shape parameters.
  615. Otherwise inspects the call signatures of `meths_to_inspect`
  616. and constructs the argument-parsing functions from these.
  617. In this case also sets `shapes` and `numargs`.
  618. """
  619. if self.shapes:
  620. # sanitize the user-supplied shapes
  621. if not isinstance(self.shapes, str):
  622. raise TypeError('shapes must be a string.')
  623. shapes = self.shapes.replace(',', ' ').split()
  624. for field in shapes:
  625. if keyword.iskeyword(field):
  626. raise SyntaxError('keywords cannot be used as shapes.')
  627. if not re.match('^[_a-zA-Z][_a-zA-Z0-9]*$', field):
  628. raise SyntaxError(
  629. 'shapes must be valid python identifiers')
  630. else:
  631. # find out the call signatures (_pdf, _cdf etc), deduce shape
  632. # arguments. Generic methods only have 'self, x', any further args
  633. # are shapes.
  634. shapes_list = []
  635. for meth in meths_to_inspect:
  636. shapes_args = _getfullargspec(meth) # NB does not contain self
  637. args = shapes_args.args[1:] # peel off 'x', too
  638. if args:
  639. shapes_list.append(args)
  640. # *args or **kwargs are not allowed w/automatic shapes
  641. if shapes_args.varargs is not None:
  642. raise TypeError(
  643. '*args are not allowed w/out explicit shapes')
  644. if shapes_args.varkw is not None:
  645. raise TypeError(
  646. '**kwds are not allowed w/out explicit shapes')
  647. if shapes_args.kwonlyargs:
  648. raise TypeError(
  649. 'kwonly args are not allowed w/out explicit shapes')
  650. if shapes_args.defaults is not None:
  651. raise TypeError('defaults are not allowed for shapes')
  652. if shapes_list:
  653. shapes = shapes_list[0]
  654. # make sure the signatures are consistent
  655. for item in shapes_list:
  656. if item != shapes:
  657. raise TypeError('Shape arguments are inconsistent.')
  658. else:
  659. shapes = []
  660. # have the arguments, construct the method from template
  661. shapes_str = ', '.join(shapes) + ', ' if shapes else '' # NB: not None
  662. dct = dict(shape_arg_str=shapes_str,
  663. locscale_in=locscale_in,
  664. locscale_out=locscale_out,
  665. )
  666. # this string is used by _attach_argparser_methods
  667. self._parse_arg_template = parse_arg_template % dct
  668. self.shapes = ', '.join(shapes) if shapes else None
  669. if not hasattr(self, 'numargs'):
  670. # allows more general subclassing with *args
  671. self.numargs = len(shapes)
  672. def _construct_doc(self, docdict, shapes_vals=None):
  673. """Construct the instance docstring with string substitutions."""
  674. if sys.flags.optimize > 1:
  675. # if run with -OO, docstrings are stripped
  676. # see https://docs.python.org/3/using/cmdline.html#cmdoption-OO
  677. return
  678. tempdict = docdict.copy()
  679. tempdict['name'] = self.name or 'distname'
  680. tempdict['shapes'] = self.shapes or ''
  681. if shapes_vals is None:
  682. shapes_vals = ()
  683. try:
  684. vals = ', '.join(f'{val:.3g}' for val in shapes_vals)
  685. except TypeError:
  686. vals = ', '.join(f'{val}' for val in shapes_vals)
  687. tempdict['vals'] = vals
  688. tempdict['shapes_'] = self.shapes or ''
  689. if self.shapes and self.numargs == 1:
  690. tempdict['shapes_'] += ','
  691. if self.shapes:
  692. tempdict['set_vals_stmt'] = f'>>> {self.shapes} = {vals}'
  693. else:
  694. tempdict['set_vals_stmt'] = ''
  695. if self.shapes is None:
  696. # remove shapes from call parameters if there are none
  697. for item in ['default', 'before_notes']:
  698. tempdict[item] = tempdict[item].replace(
  699. "\n%(shapes)s : array_like\n shape parameters", "")
  700. for i in range(2):
  701. if self.shapes is None:
  702. # necessary because we use %(shapes)s in two forms (w w/o ", ")
  703. self.__doc__ = self.__doc__.replace("%(shapes)s, ", "")
  704. try:
  705. self.__doc__ = doccer.docformat(self.__doc__, tempdict)
  706. except TypeError as e:
  707. raise Exception("Unable to construct docstring for "
  708. f"distribution \"{self.name}\": {repr(e)}") from e
  709. # correct for empty shapes
  710. self.__doc__ = self.__doc__.replace('(, ', '(').replace(', )', ')')
  711. def _construct_default_doc(self, longname=None,
  712. docdict=None, discrete='continuous'):
  713. """Construct instance docstring from the default template."""
  714. if sys.flags.optimize > 1:
  715. # if run with -OO, docstrings are stripped
  716. # see https://docs.python.org/3/using/cmdline.html#cmdoption-OO
  717. return
  718. if longname is None:
  719. longname = 'A'
  720. self.__doc__ = ''.join([f'{longname} {discrete} random variable.',
  721. '\n\n%(before_notes)s\n', docheaders['notes'],
  722. '\n%(example)s'])
  723. self._construct_doc(docdict)
  724. def freeze(self, *args, **kwds):
  725. """Freeze the distribution for the given arguments.
  726. Parameters
  727. ----------
  728. arg1, arg2, arg3,... : array_like
  729. The shape parameter(s) for the distribution. Should include all
  730. the non-optional arguments, may include ``loc`` and ``scale``.
  731. Returns
  732. -------
  733. rv_frozen : rv_frozen instance
  734. The frozen distribution.
  735. """
  736. if isinstance(self, rv_continuous):
  737. return rv_continuous_frozen(self, *args, **kwds)
  738. else:
  739. return rv_discrete_frozen(self, *args, **kwds)
  740. def __call__(self, *args, **kwds):
  741. return self.freeze(*args, **kwds)
  742. __call__.__doc__ = freeze.__doc__
  743. # The actual calculation functions (no basic checking need be done)
  744. # If these are defined, the others won't be looked at.
  745. # Otherwise, the other set can be defined.
  746. def _stats(self, *args, **kwds):
  747. return None, None, None, None
  748. # Noncentral moments (also known as the moment about the origin).
  749. # Expressed in LaTeX, munp would be $\mu'_{n}$, i.e. "mu-sub-n-prime".
  750. # The primed mu is a widely used notation for the noncentral moment.
  751. def _munp(self, n, *args):
  752. # Silence floating point warnings from integration.
  753. with np.errstate(all='ignore'):
  754. vals = self.generic_moment(n, *args)
  755. return vals
  756. def _argcheck_rvs(self, *args, **kwargs):
  757. # Handle broadcasting and size validation of the rvs method.
  758. # Subclasses should not have to override this method.
  759. # The rule is that if `size` is not None, then `size` gives the
  760. # shape of the result (integer values of `size` are treated as
  761. # tuples with length 1; i.e. `size=3` is the same as `size=(3,)`.)
  762. #
  763. # `args` is expected to contain the shape parameters (if any), the
  764. # location and the scale in a flat tuple (e.g. if there are two
  765. # shape parameters `a` and `b`, `args` will be `(a, b, loc, scale)`).
  766. # The only keyword argument expected is 'size'.
  767. size = kwargs.get('size', None)
  768. all_bcast = np.broadcast_arrays(*args)
  769. def squeeze_left(a):
  770. while a.ndim > 0 and a.shape[0] == 1:
  771. a = a[0]
  772. return a
  773. # Eliminate trivial leading dimensions. In the convention
  774. # used by numpy's random variate generators, trivial leading
  775. # dimensions are effectively ignored. In other words, when `size`
  776. # is given, trivial leading dimensions of the broadcast parameters
  777. # in excess of the number of dimensions in size are ignored, e.g.
  778. # >>> np.random.normal([[1, 3, 5]], [[[[0.01]]]], size=3)
  779. # array([ 1.00104267, 3.00422496, 4.99799278])
  780. # If `size` is not given, the exact broadcast shape is preserved:
  781. # >>> np.random.normal([[1, 3, 5]], [[[[0.01]]]])
  782. # array([[[[ 1.00862899, 3.00061431, 4.99867122]]]])
  783. #
  784. all_bcast = [squeeze_left(a) for a in all_bcast]
  785. bcast_shape = all_bcast[0].shape
  786. bcast_ndim = all_bcast[0].ndim
  787. if size is None:
  788. size_ = bcast_shape
  789. else:
  790. size_ = tuple(np.atleast_1d(size))
  791. # Check compatibility of size_ with the broadcast shape of all
  792. # the parameters. This check is intended to be consistent with
  793. # how the numpy random variate generators (e.g. np.random.normal,
  794. # np.random.beta) handle their arguments. The rule is that, if size
  795. # is given, it determines the shape of the output. Broadcasting
  796. # can't change the output size.
  797. # This is the standard broadcasting convention of extending the
  798. # shape with fewer dimensions with enough dimensions of length 1
  799. # so that the two shapes have the same number of dimensions.
  800. ndiff = bcast_ndim - len(size_)
  801. if ndiff < 0:
  802. bcast_shape = (1,)*(-ndiff) + bcast_shape
  803. elif ndiff > 0:
  804. size_ = (1,)*ndiff + size_
  805. # This compatibility test is not standard. In "regular" broadcasting,
  806. # two shapes are compatible if for each dimension, the lengths are the
  807. # same or one of the lengths is 1. Here, the length of a dimension in
  808. # size_ must not be less than the corresponding length in bcast_shape.
  809. ok = all([bcdim == 1 or bcdim == szdim
  810. for (bcdim, szdim) in zip(bcast_shape, size_)])
  811. if not ok:
  812. raise ValueError("size does not match the broadcast shape of "
  813. f"the parameters. {size}, {size_}, {bcast_shape}")
  814. param_bcast = all_bcast[:-2]
  815. loc_bcast = all_bcast[-2]
  816. scale_bcast = all_bcast[-1]
  817. return param_bcast, loc_bcast, scale_bcast, size_
  818. # These are the methods you must define (standard form functions)
  819. # NB: generic _pdf, _logpdf, _cdf are different for
  820. # rv_continuous and rv_discrete hence are defined in there
  821. def _argcheck(self, *args):
  822. """Default check for correct values on args and keywords.
  823. Returns condition array of 1's where arguments are correct and
  824. 0's where they are not.
  825. """
  826. cond = 1
  827. for arg in args:
  828. cond = logical_and(cond, (asarray(arg) > 0))
  829. return cond
  830. def _get_support(self, *args, **kwargs):
  831. """Return the support of the (unscaled, unshifted) distribution.
  832. *Must* be overridden by distributions which have support dependent
  833. upon the shape parameters of the distribution. Any such override
  834. *must not* set or change any of the class members, as these members
  835. are shared amongst all instances of the distribution.
  836. Parameters
  837. ----------
  838. arg1, arg2, ... : array_like
  839. The shape parameter(s) for the distribution (see docstring of the
  840. instance object for more information).
  841. Returns
  842. -------
  843. a, b : numeric (float, or int or +/-np.inf)
  844. end-points of the distribution's support for the specified
  845. shape parameters.
  846. """
  847. return self.a, self.b
  848. def _support_mask(self, x, *args):
  849. a, b = self._get_support(*args)
  850. with np.errstate(invalid='ignore'):
  851. return (a <= x) & (x <= b)
  852. def _open_support_mask(self, x, *args):
  853. a, b = self._get_support(*args)
  854. with np.errstate(invalid='ignore'):
  855. return (a < x) & (x < b)
  856. def _rvs(self, *args, size=None, random_state=None):
  857. # This method must handle size being a tuple, and it must
  858. # properly broadcast *args and size. size might be
  859. # an empty tuple, which means a scalar random variate is to be
  860. # generated.
  861. # Use basic inverse cdf algorithm for RV generation as default.
  862. U = random_state.uniform(size=size)
  863. Y = self._ppf(U, *args)
  864. return Y
  865. def _logcdf(self, x, *args):
  866. with np.errstate(divide='ignore'):
  867. return log(self._cdf(x, *args))
  868. def _sf(self, x, *args):
  869. return 1.0-self._cdf(x, *args)
  870. def _logsf(self, x, *args):
  871. with np.errstate(divide='ignore'):
  872. return log(self._sf(x, *args))
  873. def _ppf(self, q, *args):
  874. return self._ppfvec(q, *args)
  875. def _isf(self, q, *args):
  876. return self._ppf(1.0-q, *args) # use correct _ppf for subclasses
  877. # These are actually called, and should not be overwritten if you
  878. # want to keep error checking.
  879. def rvs(self, *args, **kwds):
  880. """Random variates of given type.
  881. Parameters
  882. ----------
  883. arg1, arg2, arg3,... : array_like
  884. The shape parameter(s) for the distribution (see docstring of the
  885. instance object for more information).
  886. loc : array_like, optional
  887. Location parameter (default=0).
  888. scale : array_like, optional
  889. Scale parameter (default=1).
  890. size : int or tuple of ints, optional
  891. Defining number of random variates (default is 1).
  892. random_state : {None, int, `numpy.random.Generator`,
  893. `numpy.random.RandomState`}, optional
  894. If `random_state` is None (or `np.random`), the
  895. `numpy.random.RandomState` singleton is used.
  896. If `random_state` is an int, a new ``RandomState`` instance is
  897. used, seeded with `random_state`.
  898. If `random_state` is already a ``Generator`` or ``RandomState``
  899. instance, that instance is used.
  900. Returns
  901. -------
  902. rvs : ndarray or scalar
  903. Random variates of given `size`.
  904. """
  905. discrete = kwds.pop('discrete', None)
  906. rndm = kwds.pop('random_state', None)
  907. args, loc, scale, size = self._parse_args_rvs(*args, **kwds)
  908. cond = logical_and(self._argcheck(*args), (scale >= 0))
  909. if not np.all(cond):
  910. message = ("Domain error in arguments. The `scale` parameter must "
  911. "be positive for all distributions, and many "
  912. "distributions have restrictions on shape parameters. "
  913. f"Please see the `scipy.stats.{self.name}` "
  914. "documentation for details.")
  915. raise ValueError(message)
  916. if np.all(scale == 0):
  917. return loc*ones(size, 'd')
  918. # extra gymnastics needed for a custom random_state
  919. if rndm is not None:
  920. random_state_saved = self._random_state
  921. random_state = check_random_state(rndm)
  922. else:
  923. random_state = self._random_state
  924. vals = self._rvs(*args, size=size, random_state=random_state)
  925. vals = vals * scale + loc
  926. # do not forget to restore the _random_state
  927. if rndm is not None:
  928. self._random_state = random_state_saved
  929. # Cast to int if discrete
  930. if discrete and not isinstance(self, rv_sample):
  931. if size == ():
  932. vals = int(vals)
  933. else:
  934. vals = vals.astype(np.int64)
  935. return vals
  936. def stats(self, *args, **kwds):
  937. """Some statistics of the given RV.
  938. Parameters
  939. ----------
  940. arg1, arg2, arg3,... : array_like
  941. The shape parameter(s) for the distribution (see docstring of the
  942. instance object for more information)
  943. loc : array_like, optional
  944. location parameter (default=0)
  945. scale : array_like, optional (continuous RVs only)
  946. scale parameter (default=1)
  947. moments : str, optional
  948. composed of letters ['mvsk'] defining which moments to compute:
  949. 'm' = mean,
  950. 'v' = variance,
  951. 's' = (Fisher's) skew,
  952. 'k' = (Fisher's) kurtosis.
  953. (default is 'mv')
  954. Returns
  955. -------
  956. stats : sequence
  957. of requested moments.
  958. """
  959. args, loc, scale, moments = self._parse_args_stats(*args, **kwds)
  960. # scale = 1 by construction for discrete RVs
  961. loc, scale = map(asarray, (loc, scale))
  962. args = tuple(map(asarray, args))
  963. cond = self._argcheck(*args) & (scale > 0) & (loc == loc)
  964. output = []
  965. default = np.full(shape(cond), fill_value=self.badvalue)
  966. # Use only entries that are valid in calculation
  967. if np.any(cond):
  968. goodargs = argsreduce(cond, *(args+(scale, loc)))
  969. scale, loc, goodargs = goodargs[-2], goodargs[-1], goodargs[:-2]
  970. if self._stats_has_moments:
  971. mu, mu2, g1, g2 = self._stats(*goodargs,
  972. **{'moments': moments})
  973. else:
  974. mu, mu2, g1, g2 = self._stats(*goodargs)
  975. if 'm' in moments:
  976. if mu is None:
  977. mu = self._munp(1, *goodargs)
  978. out0 = default.copy()
  979. place(out0, cond, mu * scale + loc)
  980. output.append(out0)
  981. if 'v' in moments:
  982. if mu2 is None:
  983. mu2p = self._munp(2, *goodargs)
  984. if mu is None:
  985. mu = self._munp(1, *goodargs)
  986. # if mean is inf then var is also inf
  987. with np.errstate(invalid='ignore'):
  988. mu2 = np.where(~np.isinf(mu), mu2p - mu**2, np.inf)
  989. out0 = default.copy()
  990. place(out0, cond, mu2 * scale * scale)
  991. output.append(out0)
  992. if 's' in moments:
  993. if g1 is None:
  994. mu3p = self._munp(3, *goodargs)
  995. if mu is None:
  996. mu = self._munp(1, *goodargs)
  997. if mu2 is None:
  998. mu2p = self._munp(2, *goodargs)
  999. with np.errstate(invalid='ignore'):
  1000. mu2 = mu2p - mu * mu
  1001. with np.errstate(invalid='ignore'):
  1002. mu3 = (-mu*mu - 3*mu2)*mu + mu3p
  1003. g1 = mu3 / np.power(mu2, 1.5)
  1004. out0 = default.copy()
  1005. place(out0, cond, g1)
  1006. output.append(out0)
  1007. if 'k' in moments:
  1008. if g2 is None:
  1009. mu4p = self._munp(4, *goodargs)
  1010. if mu is None:
  1011. mu = self._munp(1, *goodargs)
  1012. if mu2 is None:
  1013. mu2p = self._munp(2, *goodargs)
  1014. with np.errstate(invalid='ignore'):
  1015. mu2 = mu2p - mu * mu
  1016. if g1 is None:
  1017. mu3 = None
  1018. else:
  1019. # (mu2**1.5) breaks down for nan and inf
  1020. mu3 = g1 * np.power(mu2, 1.5)
  1021. if mu3 is None:
  1022. mu3p = self._munp(3, *goodargs)
  1023. with np.errstate(invalid='ignore'):
  1024. mu3 = (-mu * mu - 3 * mu2) * mu + mu3p
  1025. with np.errstate(invalid='ignore'):
  1026. mu4 = ((-mu**2 - 6*mu2) * mu - 4*mu3)*mu + mu4p
  1027. g2 = mu4 / mu2**2.0 - 3.0
  1028. out0 = default.copy()
  1029. place(out0, cond, g2)
  1030. output.append(out0)
  1031. else: # no valid args
  1032. output = [default.copy() for _ in moments]
  1033. output = [out[()] for out in output]
  1034. if len(output) == 1:
  1035. return output[0]
  1036. else:
  1037. return tuple(output)
  1038. def entropy(self, *args, **kwds):
  1039. """Differential entropy of the RV.
  1040. Parameters
  1041. ----------
  1042. arg1, arg2, arg3,... : array_like
  1043. The shape parameter(s) for the distribution (see docstring of the
  1044. instance object for more information).
  1045. loc : array_like, optional
  1046. Location parameter (default=0).
  1047. scale : array_like, optional (continuous distributions only).
  1048. Scale parameter (default=1).
  1049. Notes
  1050. -----
  1051. Entropy is defined base `e`:
  1052. >>> import numpy as np
  1053. >>> from scipy.stats._distn_infrastructure import rv_discrete
  1054. >>> drv = rv_discrete(values=((0, 1), (0.5, 0.5)))
  1055. >>> np.allclose(drv.entropy(), np.log(2.0))
  1056. True
  1057. """
  1058. args, loc, scale = self._parse_args(*args, **kwds)
  1059. # NB: for discrete distributions scale=1 by construction in _parse_args
  1060. loc, scale = map(asarray, (loc, scale))
  1061. args = tuple(map(asarray, args))
  1062. cond0 = self._argcheck(*args) & (scale > 0) & (loc == loc)
  1063. output = zeros(shape(cond0), 'd')
  1064. place(output, (1-cond0), self.badvalue)
  1065. goodargs = argsreduce(cond0, scale, *args)
  1066. goodscale = goodargs[0]
  1067. goodargs = goodargs[1:]
  1068. place(output, cond0, self.vecentropy(*goodargs) + log(goodscale))
  1069. return output[()]
  1070. def moment(self, order, *args, **kwds):
  1071. """non-central moment of distribution of specified order.
  1072. Parameters
  1073. ----------
  1074. order : int, order >= 1
  1075. Order of moment.
  1076. arg1, arg2, arg3,... : float
  1077. The shape parameter(s) for the distribution (see docstring of the
  1078. instance object for more information).
  1079. loc : array_like, optional
  1080. location parameter (default=0)
  1081. scale : array_like, optional
  1082. scale parameter (default=1)
  1083. """
  1084. n = order
  1085. shapes, loc, scale = self._parse_args(*args, **kwds)
  1086. args = np.broadcast_arrays(*(*shapes, loc, scale))
  1087. *shapes, loc, scale = args
  1088. i0 = np.logical_and(self._argcheck(*shapes), scale > 0)
  1089. i1 = np.logical_and(i0, loc == 0)
  1090. i2 = np.logical_and(i0, loc != 0)
  1091. args = argsreduce(i0, *shapes, loc, scale)
  1092. *shapes, loc, scale = args
  1093. if (floor(n) != n):
  1094. raise ValueError("Moment must be an integer.")
  1095. if (n < 0):
  1096. raise ValueError("Moment must be positive.")
  1097. mu, mu2, g1, g2 = None, None, None, None
  1098. if (n > 0) and (n < 5):
  1099. if self._stats_has_moments:
  1100. mdict = {'moments': {1: 'm', 2: 'v', 3: 'vs', 4: 'mvsk'}[n]}
  1101. else:
  1102. mdict = {}
  1103. mu, mu2, g1, g2 = self._stats(*shapes, **mdict)
  1104. val = np.empty(loc.shape) # val needs to be indexed by loc
  1105. val[...] = _moment_from_stats(n, mu, mu2, g1, g2, self._munp, shapes)
  1106. # Convert to transformed X = L + S*Y
  1107. # E[X^n] = E[(L+S*Y)^n] = L^n sum(comb(n, k)*(S/L)^k E[Y^k], k=0...n)
  1108. result = zeros(i0.shape)
  1109. place(result, ~i0, self.badvalue)
  1110. if i1.any():
  1111. res1 = scale[loc == 0]**n * val[loc == 0]
  1112. place(result, i1, res1)
  1113. if i2.any():
  1114. mom = [mu, mu2, g1, g2]
  1115. arrs = [i for i in mom if i is not None]
  1116. idx = [i for i in range(4) if mom[i] is not None]
  1117. if any(idx):
  1118. arrs = argsreduce(loc != 0, *arrs)
  1119. j = 0
  1120. for i in idx:
  1121. mom[i] = arrs[j]
  1122. j += 1
  1123. mu, mu2, g1, g2 = mom
  1124. args = argsreduce(loc != 0, *shapes, loc, scale, val)
  1125. *shapes, loc, scale, val = args
  1126. res2 = zeros(loc.shape, dtype='d')
  1127. fac = scale / loc
  1128. for k in range(n):
  1129. valk = _moment_from_stats(k, mu, mu2, g1, g2, self._munp,
  1130. shapes)
  1131. res2 += comb(n, k, exact=True)*fac**k * valk
  1132. res2 += fac**n * val
  1133. res2 *= loc**n
  1134. place(result, i2, res2)
  1135. return result[()]
  1136. def median(self, *args, **kwds):
  1137. """Median of the distribution.
  1138. Parameters
  1139. ----------
  1140. arg1, arg2, arg3,... : array_like
  1141. The shape parameter(s) for the distribution (see docstring of the
  1142. instance object for more information)
  1143. loc : array_like, optional
  1144. Location parameter, Default is 0.
  1145. scale : array_like, optional
  1146. Scale parameter, Default is 1.
  1147. Returns
  1148. -------
  1149. median : float
  1150. The median of the distribution.
  1151. See Also
  1152. --------
  1153. rv_discrete.ppf
  1154. Inverse of the CDF
  1155. """
  1156. return self.ppf(0.5, *args, **kwds)
  1157. def mean(self, *args, **kwds):
  1158. """Mean of the distribution.
  1159. Parameters
  1160. ----------
  1161. arg1, arg2, arg3,... : array_like
  1162. The shape parameter(s) for the distribution (see docstring of the
  1163. instance object for more information)
  1164. loc : array_like, optional
  1165. location parameter (default=0)
  1166. scale : array_like, optional
  1167. scale parameter (default=1)
  1168. Returns
  1169. -------
  1170. mean : float
  1171. the mean of the distribution
  1172. """
  1173. kwds['moments'] = 'm'
  1174. res = self.stats(*args, **kwds)
  1175. if isinstance(res, ndarray) and res.ndim == 0:
  1176. return res[()]
  1177. return res
  1178. def var(self, *args, **kwds):
  1179. """Variance of the distribution.
  1180. Parameters
  1181. ----------
  1182. arg1, arg2, arg3,... : array_like
  1183. The shape parameter(s) for the distribution (see docstring of the
  1184. instance object for more information)
  1185. loc : array_like, optional
  1186. location parameter (default=0)
  1187. scale : array_like, optional
  1188. scale parameter (default=1)
  1189. Returns
  1190. -------
  1191. var : float
  1192. the variance of the distribution
  1193. """
  1194. kwds['moments'] = 'v'
  1195. res = self.stats(*args, **kwds)
  1196. if isinstance(res, ndarray) and res.ndim == 0:
  1197. return res[()]
  1198. return res
  1199. def std(self, *args, **kwds):
  1200. """Standard deviation of the distribution.
  1201. Parameters
  1202. ----------
  1203. arg1, arg2, arg3,... : array_like
  1204. The shape parameter(s) for the distribution (see docstring of the
  1205. instance object for more information)
  1206. loc : array_like, optional
  1207. location parameter (default=0)
  1208. scale : array_like, optional
  1209. scale parameter (default=1)
  1210. Returns
  1211. -------
  1212. std : float
  1213. standard deviation of the distribution
  1214. """
  1215. kwds['moments'] = 'v'
  1216. res = sqrt(self.stats(*args, **kwds))
  1217. return res
  1218. def interval(self, confidence, *args, **kwds):
  1219. """Confidence interval with equal areas around the median.
  1220. Parameters
  1221. ----------
  1222. confidence : array_like of float
  1223. Probability that an rv will be drawn from the returned range.
  1224. Each value should be in the range [0, 1].
  1225. arg1, arg2, ... : array_like
  1226. The shape parameter(s) for the distribution (see docstring of the
  1227. instance object for more information).
  1228. loc : array_like, optional
  1229. location parameter, Default is 0.
  1230. scale : array_like, optional
  1231. scale parameter, Default is 1.
  1232. Returns
  1233. -------
  1234. a, b : ndarray of float
  1235. end-points of range that contain ``100 * alpha %`` of the rv's
  1236. possible values.
  1237. Notes
  1238. -----
  1239. This is implemented as ``ppf([p_tail, 1-p_tail])``, where
  1240. ``ppf`` is the inverse cumulative distribution function and
  1241. ``p_tail = (1-confidence)/2``. Suppose ``[c, d]`` is the support of a
  1242. discrete distribution; then ``ppf([0, 1]) == (c-1, d)``. Therefore,
  1243. when ``confidence=1`` and the distribution is discrete, the left end
  1244. of the interval will be beyond the support of the distribution.
  1245. For discrete distributions, the interval will limit the probability
  1246. in each tail to be less than or equal to ``p_tail`` (usually
  1247. strictly less).
  1248. """
  1249. alpha = confidence
  1250. alpha = asarray(alpha)
  1251. if np.any((alpha > 1) | (alpha < 0)):
  1252. raise ValueError("alpha must be between 0 and 1 inclusive")
  1253. q1 = (1.0-alpha)/2
  1254. q2 = (1.0+alpha)/2
  1255. a = self.ppf(q1, *args, **kwds)
  1256. b = self.ppf(q2, *args, **kwds)
  1257. return a, b
  1258. def support(self, *args, **kwargs):
  1259. """Support of the distribution.
  1260. Parameters
  1261. ----------
  1262. arg1, arg2, ... : array_like
  1263. The shape parameter(s) for the distribution (see docstring of the
  1264. instance object for more information).
  1265. loc : array_like, optional
  1266. location parameter, Default is 0.
  1267. scale : array_like, optional
  1268. scale parameter, Default is 1.
  1269. Returns
  1270. -------
  1271. a, b : array_like
  1272. end-points of the distribution's support.
  1273. """
  1274. args, loc, scale = self._parse_args(*args, **kwargs)
  1275. arrs = np.broadcast_arrays(*args, loc, scale)
  1276. args, loc, scale = arrs[:-2], arrs[-2], arrs[-1]
  1277. cond = self._argcheck(*args) & (scale > 0)
  1278. _a, _b = self._get_support(*args)
  1279. if cond.all():
  1280. return _a * scale + loc, _b * scale + loc
  1281. elif cond.ndim == 0:
  1282. return self.badvalue, self.badvalue
  1283. # promote bounds to at least float to fill in the badvalue
  1284. _a, _b = np.asarray(_a).astype('d'), np.asarray(_b).astype('d')
  1285. out_a, out_b = _a * scale + loc, _b * scale + loc
  1286. place(out_a, 1-cond, self.badvalue)
  1287. place(out_b, 1-cond, self.badvalue)
  1288. return out_a, out_b
  1289. def nnlf(self, theta, x):
  1290. """Negative loglikelihood function.
  1291. Notes
  1292. -----
  1293. This is ``-sum(log pdf(x, theta), axis=0)`` where `theta` are the
  1294. parameters (including loc and scale).
  1295. """
  1296. loc, scale, args = self._unpack_loc_scale(theta)
  1297. if not self._argcheck(*args) or scale <= 0:
  1298. return inf
  1299. x = (asarray(x)-loc) / scale
  1300. n_log_scale = len(x) * log(scale)
  1301. if np.any(~self._support_mask(x, *args)):
  1302. return inf
  1303. return self._nnlf(x, *args) + n_log_scale
  1304. def _nnlf(self, x, *args):
  1305. return -np.sum(self._logpxf(x, *args), axis=0)
  1306. def _nlff_and_penalty(self, x, args, log_fitfun):
  1307. # negative log fit function
  1308. cond0 = ~self._support_mask(x, *args)
  1309. n_bad = np.count_nonzero(cond0, axis=0)
  1310. if n_bad > 0:
  1311. x = argsreduce(~cond0, x)[0]
  1312. logff = log_fitfun(x, *args)
  1313. finite_logff = np.isfinite(logff)
  1314. n_bad += np.sum(~finite_logff, axis=0)
  1315. if n_bad > 0:
  1316. penalty = n_bad * log(_XMAX) * 100
  1317. return -np.sum(logff[finite_logff], axis=0) + penalty
  1318. return -np.sum(logff, axis=0)
  1319. def _penalized_nnlf(self, theta, x):
  1320. """Penalized negative loglikelihood function.
  1321. i.e., - sum (log pdf(x, theta), axis=0) + penalty
  1322. where theta are the parameters (including loc and scale)
  1323. """
  1324. loc, scale, args = self._unpack_loc_scale(theta)
  1325. if not self._argcheck(*args) or scale <= 0:
  1326. return inf
  1327. x = asarray((x-loc) / scale)
  1328. n_log_scale = len(x) * log(scale)
  1329. return self._nlff_and_penalty(x, args, self._logpxf) + n_log_scale
  1330. def _penalized_nlpsf(self, theta, x):
  1331. """Penalized negative log product spacing function.
  1332. i.e., - sum (log (diff (cdf (x, theta))), axis=0) + penalty
  1333. where theta are the parameters (including loc and scale)
  1334. Follows reference [1] of scipy.stats.fit
  1335. """
  1336. loc, scale, args = self._unpack_loc_scale(theta)
  1337. if not self._argcheck(*args) or scale <= 0:
  1338. return inf
  1339. x = (np.sort(x) - loc)/scale
  1340. def log_psf(x, *args):
  1341. x, lj = np.unique(x, return_counts=True) # fast for sorted x
  1342. cdf_data = self._cdf(x, *args) if x.size else []
  1343. if not (x.size and 1 - cdf_data[-1] <= 0):
  1344. cdf = np.concatenate(([0], cdf_data, [1]))
  1345. lj = np.concatenate((lj, [1]))
  1346. else:
  1347. cdf = np.concatenate(([0], cdf_data))
  1348. # here we could use logcdf w/ logsumexp trick to take differences,
  1349. # but in the context of the method, it seems unlikely to matter
  1350. return lj * np.log(np.diff(cdf) / lj)
  1351. return self._nlff_and_penalty(x, args, log_psf)
  1352. class _ShapeInfo:
  1353. def __init__(self, name, integrality=False, domain=(-np.inf, np.inf),
  1354. inclusive=(True, True)):
  1355. self.name = name
  1356. self.integrality = integrality
  1357. self.endpoints = domain
  1358. self.inclusive = inclusive
  1359. domain = list(domain)
  1360. if np.isfinite(domain[0]) and not inclusive[0]:
  1361. domain[0] = np.nextafter(domain[0], np.inf)
  1362. if np.isfinite(domain[1]) and not inclusive[1]:
  1363. domain[1] = np.nextafter(domain[1], -np.inf)
  1364. self.domain = domain
  1365. def _get_fixed_fit_value(kwds, names):
  1366. """
  1367. Given names such as ``['f0', 'fa', 'fix_a']``, check that there is
  1368. at most one non-None value in `kwds` associated with those names.
  1369. Return that value, or None if none of the names occur in `kwds`.
  1370. As a side effect, all occurrences of those names in `kwds` are
  1371. removed.
  1372. """
  1373. vals = [(name, kwds.pop(name)) for name in names if name in kwds]
  1374. if len(vals) > 1:
  1375. repeated = [name for name, val in vals]
  1376. raise ValueError("fit method got multiple keyword arguments to "
  1377. "specify the same fixed parameter: " +
  1378. ', '.join(repeated))
  1379. return vals[0][1] if vals else None
  1380. # continuous random variables: implement maybe later
  1381. #
  1382. # hf --- Hazard Function (PDF / SF)
  1383. # chf --- Cumulative hazard function (-log(SF))
  1384. # psf --- Probability sparsity function (reciprocal of the pdf) in
  1385. # units of percent-point-function (as a function of q).
  1386. # Also, the derivative of the percent-point function.
  1387. class rv_continuous(rv_generic):
  1388. """A generic continuous random variable class meant for subclassing.
  1389. `rv_continuous` is a base class to construct specific distribution classes
  1390. and instances for continuous random variables. It cannot be used
  1391. directly as a distribution.
  1392. Parameters
  1393. ----------
  1394. momtype : int, optional
  1395. The type of generic moment calculation to use: 0 for pdf, 1 (default)
  1396. for ppf.
  1397. a : float, optional
  1398. Lower bound of the support of the distribution, default is minus
  1399. infinity.
  1400. b : float, optional
  1401. Upper bound of the support of the distribution, default is plus
  1402. infinity.
  1403. xtol : float, optional
  1404. The tolerance for fixed point calculation for generic ppf.
  1405. badvalue : float, optional
  1406. The value in a result arrays that indicates a value that for which
  1407. some argument restriction is violated, default is np.nan.
  1408. name : str, optional
  1409. The name of the instance. This string is used to construct the default
  1410. example for distributions.
  1411. longname : str, optional
  1412. This string is used as part of the first line of the docstring returned
  1413. when a subclass has no docstring of its own. Note: `longname` exists
  1414. for backwards compatibility, do not use for new subclasses.
  1415. shapes : str, optional
  1416. The shape of the distribution. For example ``"m, n"`` for a
  1417. distribution that takes two integers as the two shape arguments for all
  1418. its methods. If not provided, shape parameters will be inferred from
  1419. the signature of the private methods, ``_pdf`` and ``_cdf`` of the
  1420. instance.
  1421. seed : {None, int, `numpy.random.Generator`, `numpy.random.RandomState`}, optional
  1422. If `seed` is None (or `np.random`), the `numpy.random.RandomState`
  1423. singleton is used.
  1424. If `seed` is an int, a new ``RandomState`` instance is used,
  1425. seeded with `seed`.
  1426. If `seed` is already a ``Generator`` or ``RandomState`` instance then
  1427. that instance is used.
  1428. Attributes
  1429. ----------
  1430. a, b : float, optional
  1431. Lower/upper bound of the support of the unshifted/unscaled distribution.
  1432. This value is unaffected by the `loc` and `scale` parameters.
  1433. To calculate the support of the shifted/scaled distribution,
  1434. use the `support` method.
  1435. Methods
  1436. -------
  1437. rvs
  1438. pdf
  1439. logpdf
  1440. cdf
  1441. logcdf
  1442. sf
  1443. logsf
  1444. ppf
  1445. isf
  1446. moment
  1447. stats
  1448. entropy
  1449. expect
  1450. median
  1451. mean
  1452. std
  1453. var
  1454. interval
  1455. __call__
  1456. fit
  1457. fit_loc_scale
  1458. nnlf
  1459. support
  1460. Notes
  1461. -----
  1462. Public methods of an instance of a distribution class (e.g., ``pdf``,
  1463. ``cdf``) check their arguments and pass valid arguments to private,
  1464. computational methods (``_pdf``, ``_cdf``). For ``pdf(x)``, ``x`` is valid
  1465. if it is within the support of the distribution.
  1466. Whether a shape parameter is valid is decided by an ``_argcheck`` method
  1467. (which defaults to checking that its arguments are strictly positive.)
  1468. **Subclassing**
  1469. New random variables can be defined by subclassing the `rv_continuous` class
  1470. and re-defining at least the ``_pdf`` or the ``_cdf`` method (normalized
  1471. to location 0 and scale 1).
  1472. If positive argument checking is not correct for your RV
  1473. then you will also need to re-define the ``_argcheck`` method.
  1474. For most of the scipy.stats distributions, the support interval doesn't
  1475. depend on the shape parameters. ``x`` being in the support interval is
  1476. equivalent to ``self.a <= x <= self.b``. If either of the endpoints of
  1477. the support do depend on the shape parameters, then
  1478. i) the distribution must implement the ``_get_support`` method; and
  1479. ii) those dependent endpoints must be omitted from the distribution's
  1480. call to the ``rv_continuous`` initializer.
  1481. Correct, but potentially slow defaults exist for the remaining
  1482. methods but for speed and/or accuracy you can over-ride::
  1483. _logpdf, _cdf, _logcdf, _ppf, _rvs, _isf, _sf, _logsf
  1484. The default method ``_rvs`` relies on the inverse of the cdf, ``_ppf``,
  1485. applied to a uniform random variate. In order to generate random variates
  1486. efficiently, either the default ``_ppf`` needs to be overwritten (e.g.
  1487. if the inverse cdf can expressed in an explicit form) or a sampling
  1488. method needs to be implemented in a custom ``_rvs`` method.
  1489. If possible, you should override ``_isf``, ``_sf`` or ``_logsf``.
  1490. The main reason would be to improve numerical accuracy: for example,
  1491. the survival function ``_sf`` is computed as ``1 - _cdf`` which can
  1492. result in loss of precision if ``_cdf(x)`` is close to one.
  1493. **Methods that can be overwritten by subclasses**
  1494. ::
  1495. _rvs
  1496. _pdf
  1497. _cdf
  1498. _sf
  1499. _ppf
  1500. _isf
  1501. _stats
  1502. _munp
  1503. _entropy
  1504. _argcheck
  1505. _get_support
  1506. There are additional (internal and private) generic methods that can
  1507. be useful for cross-checking and for debugging, but might work in all
  1508. cases when directly called.
  1509. A note on ``shapes``: subclasses need not specify them explicitly. In this
  1510. case, `shapes` will be automatically deduced from the signatures of the
  1511. overridden methods (`pdf`, `cdf` etc).
  1512. If, for some reason, you prefer to avoid relying on introspection, you can
  1513. specify ``shapes`` explicitly as an argument to the instance constructor.
  1514. **Frozen Distributions**
  1515. Normally, you must provide shape parameters (and, optionally, location and
  1516. scale parameters to each call of a method of a distribution.
  1517. Alternatively, the object may be called (as a function) to fix the shape,
  1518. location, and scale parameters returning a "frozen" continuous RV object:
  1519. rv = generic(<shape(s)>, loc=0, scale=1)
  1520. `rv_frozen` object with the same methods but holding the given shape,
  1521. location, and scale fixed
  1522. **Statistics**
  1523. Statistics are computed using numerical integration by default.
  1524. For speed you can redefine this using ``_stats``:
  1525. - take shape parameters and return mu, mu2, g1, g2
  1526. - If you can't compute one of these, return it as None
  1527. - Can also be defined with a keyword argument ``moments``, which is a
  1528. string composed of "m", "v", "s", and/or "k".
  1529. Only the components appearing in string should be computed and
  1530. returned in the order "m", "v", "s", or "k" with missing values
  1531. returned as None.
  1532. Alternatively, you can override ``_munp``, which takes ``n`` and shape
  1533. parameters and returns the n-th non-central moment of the distribution.
  1534. **Deepcopying / Pickling**
  1535. If a distribution or frozen distribution is deepcopied (pickled/unpickled,
  1536. etc.), any underlying random number generator is deepcopied with it. An
  1537. implication is that if a distribution relies on the singleton RandomState
  1538. before copying, it will rely on a copy of that random state after copying,
  1539. and ``np.random.seed`` will no longer control the state.
  1540. Examples
  1541. --------
  1542. To create a new Gaussian distribution, we would do the following:
  1543. >>> from scipy.stats import rv_continuous
  1544. >>> class gaussian_gen(rv_continuous):
  1545. ... "Gaussian distribution"
  1546. ... def _pdf(self, x):
  1547. ... return np.exp(-x**2 / 2.) / np.sqrt(2.0 * np.pi)
  1548. >>> gaussian = gaussian_gen(name='gaussian')
  1549. ``scipy.stats`` distributions are *instances*, so here we subclass
  1550. `rv_continuous` and create an instance. With this, we now have
  1551. a fully functional distribution with all relevant methods automagically
  1552. generated by the framework.
  1553. Note that above we defined a standard normal distribution, with zero mean
  1554. and unit variance. Shifting and scaling of the distribution can be done
  1555. by using ``loc`` and ``scale`` parameters: ``gaussian.pdf(x, loc, scale)``
  1556. essentially computes ``y = (x - loc) / scale`` and
  1557. ``gaussian._pdf(y) / scale``.
  1558. """
  1559. def __init__(self, momtype=1, a=None, b=None, xtol=1e-14,
  1560. badvalue=None, name=None, longname=None,
  1561. shapes=None, seed=None):
  1562. super().__init__(seed)
  1563. # save the ctor parameters, cf generic freeze
  1564. self._ctor_param = dict(
  1565. momtype=momtype, a=a, b=b, xtol=xtol,
  1566. badvalue=badvalue, name=name, longname=longname,
  1567. shapes=shapes, seed=seed)
  1568. if badvalue is None:
  1569. badvalue = nan
  1570. if name is None:
  1571. name = 'Distribution'
  1572. self.badvalue = badvalue
  1573. self.name = name
  1574. self.a = a
  1575. self.b = b
  1576. if a is None:
  1577. self.a = -inf
  1578. if b is None:
  1579. self.b = inf
  1580. self.xtol = xtol
  1581. self.moment_type = momtype
  1582. self.shapes = shapes
  1583. self._construct_argparser(meths_to_inspect=[self._pdf, self._cdf],
  1584. locscale_in='loc=0, scale=1',
  1585. locscale_out='loc, scale')
  1586. self._attach_methods()
  1587. if longname is None:
  1588. if name[0] in ['aeiouAEIOU']:
  1589. hstr = "An "
  1590. else:
  1591. hstr = "A "
  1592. longname = hstr + name
  1593. if sys.flags.optimize < 2:
  1594. # Skip adding docstrings if interpreter is run with -OO
  1595. if self.__doc__ is None:
  1596. self._construct_default_doc(longname=longname,
  1597. docdict=docdict,
  1598. discrete='continuous')
  1599. else:
  1600. dct = dict(distcont)
  1601. self._construct_doc(docdict, dct.get(self.name))
  1602. def __getstate__(self):
  1603. dct = self.__dict__.copy()
  1604. # these methods will be remade in __setstate__
  1605. # _random_state attribute is taken care of by rv_generic
  1606. attrs = ["_parse_args", "_parse_args_stats", "_parse_args_rvs",
  1607. "_cdfvec", "_ppfvec", "vecentropy", "generic_moment"]
  1608. [dct.pop(attr, None) for attr in attrs]
  1609. return dct
  1610. def _attach_methods(self):
  1611. """
  1612. Attaches dynamically created methods to the rv_continuous instance.
  1613. """
  1614. # _attach_methods is responsible for calling _attach_argparser_methods
  1615. self._attach_argparser_methods()
  1616. # nin correction
  1617. self._ppfvec = vectorize(self._ppf_single, otypes='d')
  1618. self._ppfvec.nin = self.numargs + 1
  1619. self.vecentropy = vectorize(self._entropy, otypes='d')
  1620. self._cdfvec = vectorize(self._cdf_single, otypes='d')
  1621. self._cdfvec.nin = self.numargs + 1
  1622. if self.moment_type == 0:
  1623. self.generic_moment = vectorize(self._mom0_sc, otypes='d')
  1624. else:
  1625. self.generic_moment = vectorize(self._mom1_sc, otypes='d')
  1626. # Because of the *args argument of _mom0_sc, vectorize cannot count the
  1627. # number of arguments correctly.
  1628. self.generic_moment.nin = self.numargs + 1
  1629. def _updated_ctor_param(self):
  1630. """Return the current version of _ctor_param, possibly updated by user.
  1631. Used by freezing.
  1632. Keep this in sync with the signature of __init__.
  1633. """
  1634. dct = self._ctor_param.copy()
  1635. dct['a'] = self.a
  1636. dct['b'] = self.b
  1637. dct['xtol'] = self.xtol
  1638. dct['badvalue'] = self.badvalue
  1639. dct['name'] = self.name
  1640. dct['shapes'] = self.shapes
  1641. return dct
  1642. def _ppf_to_solve(self, x, q, *args):
  1643. return self.cdf(*(x, )+args)-q
  1644. def _ppf_single(self, q, *args):
  1645. factor = 10.
  1646. left, right = self._get_support(*args)
  1647. if np.isinf(left):
  1648. left = min(-factor, right)
  1649. while self._ppf_to_solve(left, q, *args) > 0.:
  1650. left, right = left * factor, left
  1651. # left is now such that cdf(left) <= q
  1652. # if right has changed, then cdf(right) > q
  1653. if np.isinf(right):
  1654. right = max(factor, left)
  1655. while self._ppf_to_solve(right, q, *args) < 0.:
  1656. left, right = right, right * factor
  1657. # right is now such that cdf(right) >= q
  1658. return optimize.brentq(self._ppf_to_solve,
  1659. left, right, args=(q,)+args, xtol=self.xtol)
  1660. # moment from definition
  1661. def _mom_integ0(self, x, m, *args):
  1662. return x**m * self.pdf(x, *args)
  1663. def _mom0_sc(self, m, *args):
  1664. _a, _b = self._get_support(*args)
  1665. return integrate.quad(self._mom_integ0, _a, _b,
  1666. args=(m,)+args)[0]
  1667. # moment calculated using ppf
  1668. def _mom_integ1(self, q, m, *args):
  1669. return (self.ppf(q, *args))**m
  1670. def _mom1_sc(self, m, *args):
  1671. return integrate.quad(self._mom_integ1, 0, 1, args=(m,)+args)[0]
  1672. def _pdf(self, x, *args):
  1673. return _derivative(self._cdf, x, dx=1e-5, args=args, order=5)
  1674. # Could also define any of these
  1675. def _logpdf(self, x, *args):
  1676. p = self._pdf(x, *args)
  1677. with np.errstate(divide='ignore'):
  1678. return log(p)
  1679. def _logpxf(self, x, *args):
  1680. # continuous distributions have PDF, discrete have PMF, but sometimes
  1681. # the distinction doesn't matter. This lets us use `_logpxf` for both
  1682. # discrete and continuous distributions.
  1683. return self._logpdf(x, *args)
  1684. def _cdf_single(self, x, *args):
  1685. _a, _b = self._get_support(*args)
  1686. return integrate.quad(self._pdf, _a, x, args=args)[0]
  1687. def _cdf(self, x, *args):
  1688. return self._cdfvec(x, *args)
  1689. def _logcdf(self, x, *args):
  1690. median = self._ppf(0.5, *args)
  1691. with np.errstate(divide='ignore'):
  1692. return xpx.apply_where(
  1693. x < median, (x,) + args,
  1694. lambda x, *args: np.log(self._cdf(x, *args)),
  1695. lambda x, *args: np.log1p(-self._sf(x, *args)))
  1696. def _logsf(self, x, *args):
  1697. median = self._ppf(0.5, *args)
  1698. with np.errstate(divide='ignore'):
  1699. return xpx.apply_where(
  1700. x > median, (x,) + args,
  1701. lambda x, *args: np.log(self._sf(x, *args)),
  1702. lambda x, *args: np.log1p(-self._cdf(x, *args)))
  1703. # generic _argcheck, _sf, _ppf, _isf, _rvs are defined
  1704. # in rv_generic
  1705. def pdf(self, x, *args, **kwds):
  1706. """Probability density function at x of the given RV.
  1707. Parameters
  1708. ----------
  1709. x : array_like
  1710. quantiles
  1711. arg1, arg2, arg3,... : array_like
  1712. The shape parameter(s) for the distribution (see docstring of the
  1713. instance object for more information)
  1714. loc : array_like, optional
  1715. location parameter (default=0)
  1716. scale : array_like, optional
  1717. scale parameter (default=1)
  1718. Returns
  1719. -------
  1720. pdf : ndarray
  1721. Probability density function evaluated at x
  1722. """
  1723. args, loc, scale = self._parse_args(*args, **kwds)
  1724. x, loc, scale = map(asarray, (x, loc, scale))
  1725. args = tuple(map(asarray, args))
  1726. dtyp = np.promote_types(x.dtype, np.float64)
  1727. x = np.asarray((x - loc)/scale, dtype=dtyp)
  1728. cond0 = self._argcheck(*args) & (scale > 0)
  1729. cond1 = self._support_mask(x, *args) & (scale > 0)
  1730. cond = cond0 & cond1
  1731. output = zeros(shape(cond), dtyp)
  1732. putmask(output, (1-cond0)+np.isnan(x), self.badvalue)
  1733. if np.any(cond):
  1734. goodargs = argsreduce(cond, *((x,)+args+(scale,)))
  1735. scale, goodargs = goodargs[-1], goodargs[:-1]
  1736. place(output, cond, self._pdf(*goodargs) / scale)
  1737. if output.ndim == 0:
  1738. return output[()]
  1739. return output
  1740. def logpdf(self, x, *args, **kwds):
  1741. """Log of the probability density function at x of the given RV.
  1742. This uses a more numerically accurate calculation if available.
  1743. Parameters
  1744. ----------
  1745. x : array_like
  1746. quantiles
  1747. arg1, arg2, arg3,... : array_like
  1748. The shape parameter(s) for the distribution (see docstring of the
  1749. instance object for more information)
  1750. loc : array_like, optional
  1751. location parameter (default=0)
  1752. scale : array_like, optional
  1753. scale parameter (default=1)
  1754. Returns
  1755. -------
  1756. logpdf : array_like
  1757. Log of the probability density function evaluated at x
  1758. """
  1759. args, loc, scale = self._parse_args(*args, **kwds)
  1760. x, loc, scale = map(asarray, (x, loc, scale))
  1761. args = tuple(map(asarray, args))
  1762. dtyp = np.promote_types(x.dtype, np.float64)
  1763. x = np.asarray((x - loc)/scale, dtype=dtyp)
  1764. cond0 = self._argcheck(*args) & (scale > 0)
  1765. cond1 = self._support_mask(x, *args) & (scale > 0)
  1766. cond = cond0 & cond1
  1767. output = empty(shape(cond), dtyp)
  1768. output.fill(-inf)
  1769. putmask(output, (1-cond0)+np.isnan(x), self.badvalue)
  1770. if np.any(cond):
  1771. goodargs = argsreduce(cond, *((x,)+args+(scale,)))
  1772. scale, goodargs = goodargs[-1], goodargs[:-1]
  1773. place(output, cond, self._logpdf(*goodargs) - log(scale))
  1774. if output.ndim == 0:
  1775. return output[()]
  1776. return output
  1777. def cdf(self, x, *args, **kwds):
  1778. """
  1779. Cumulative distribution function of the given RV.
  1780. Parameters
  1781. ----------
  1782. x : array_like
  1783. quantiles
  1784. arg1, arg2, arg3,... : array_like
  1785. The shape parameter(s) for the distribution (see docstring of the
  1786. instance object for more information)
  1787. loc : array_like, optional
  1788. location parameter (default=0)
  1789. scale : array_like, optional
  1790. scale parameter (default=1)
  1791. Returns
  1792. -------
  1793. cdf : ndarray
  1794. Cumulative distribution function evaluated at `x`
  1795. """
  1796. args, loc, scale = self._parse_args(*args, **kwds)
  1797. x, loc, scale = map(asarray, (x, loc, scale))
  1798. args = tuple(map(asarray, args))
  1799. _a, _b = self._get_support(*args)
  1800. dtyp = np.promote_types(x.dtype, np.float64)
  1801. x = np.asarray((x - loc)/scale, dtype=dtyp)
  1802. cond0 = self._argcheck(*args) & (scale > 0)
  1803. cond1 = self._open_support_mask(x, *args) & (scale > 0)
  1804. cond2 = (x >= np.asarray(_b)) & cond0
  1805. cond = cond0 & cond1
  1806. output = zeros(shape(cond), dtyp)
  1807. place(output, (1-cond0)+np.isnan(x), self.badvalue)
  1808. place(output, cond2, 1.0)
  1809. if np.any(cond): # call only if at least 1 entry
  1810. goodargs = argsreduce(cond, *((x,)+args))
  1811. place(output, cond, self._cdf(*goodargs))
  1812. if output.ndim == 0:
  1813. return output[()]
  1814. return output
  1815. def logcdf(self, x, *args, **kwds):
  1816. """Log of the cumulative distribution function at x of the given RV.
  1817. Parameters
  1818. ----------
  1819. x : array_like
  1820. quantiles
  1821. arg1, arg2, arg3,... : array_like
  1822. The shape parameter(s) for the distribution (see docstring of the
  1823. instance object for more information)
  1824. loc : array_like, optional
  1825. location parameter (default=0)
  1826. scale : array_like, optional
  1827. scale parameter (default=1)
  1828. Returns
  1829. -------
  1830. logcdf : array_like
  1831. Log of the cumulative distribution function evaluated at x
  1832. """
  1833. args, loc, scale = self._parse_args(*args, **kwds)
  1834. x, loc, scale = map(asarray, (x, loc, scale))
  1835. args = tuple(map(asarray, args))
  1836. _a, _b = self._get_support(*args)
  1837. dtyp = np.promote_types(x.dtype, np.float64)
  1838. x = np.asarray((x - loc)/scale, dtype=dtyp)
  1839. cond0 = self._argcheck(*args) & (scale > 0)
  1840. cond1 = self._open_support_mask(x, *args) & (scale > 0)
  1841. cond2 = (x >= _b) & cond0
  1842. cond = cond0 & cond1
  1843. output = empty(shape(cond), dtyp)
  1844. output.fill(-inf)
  1845. place(output, (1-cond0)*(cond1 == cond1)+np.isnan(x), self.badvalue)
  1846. place(output, cond2, 0.0)
  1847. if np.any(cond): # call only if at least 1 entry
  1848. goodargs = argsreduce(cond, *((x,)+args))
  1849. place(output, cond, self._logcdf(*goodargs))
  1850. if output.ndim == 0:
  1851. return output[()]
  1852. return output
  1853. def sf(self, x, *args, **kwds):
  1854. """Survival function (1 - `cdf`) at x of the given RV.
  1855. Parameters
  1856. ----------
  1857. x : array_like
  1858. quantiles
  1859. arg1, arg2, arg3,... : array_like
  1860. The shape parameter(s) for the distribution (see docstring of the
  1861. instance object for more information)
  1862. loc : array_like, optional
  1863. location parameter (default=0)
  1864. scale : array_like, optional
  1865. scale parameter (default=1)
  1866. Returns
  1867. -------
  1868. sf : array_like
  1869. Survival function evaluated at x
  1870. """
  1871. args, loc, scale = self._parse_args(*args, **kwds)
  1872. x, loc, scale = map(asarray, (x, loc, scale))
  1873. args = tuple(map(asarray, args))
  1874. _a, _b = self._get_support(*args)
  1875. dtyp = np.promote_types(x.dtype, np.float64)
  1876. x = np.asarray((x - loc)/scale, dtype=dtyp)
  1877. cond0 = self._argcheck(*args) & (scale > 0)
  1878. cond1 = self._open_support_mask(x, *args) & (scale > 0)
  1879. cond2 = cond0 & (x <= _a)
  1880. cond = cond0 & cond1
  1881. output = zeros(shape(cond), dtyp)
  1882. place(output, (1-cond0)+np.isnan(x), self.badvalue)
  1883. place(output, cond2, 1.0)
  1884. if np.any(cond):
  1885. goodargs = argsreduce(cond, *((x,)+args))
  1886. place(output, cond, self._sf(*goodargs))
  1887. if output.ndim == 0:
  1888. return output[()]
  1889. return output
  1890. def logsf(self, x, *args, **kwds):
  1891. """Log of the survival function of the given RV.
  1892. Returns the log of the "survival function," defined as (1 - `cdf`),
  1893. evaluated at `x`.
  1894. Parameters
  1895. ----------
  1896. x : array_like
  1897. quantiles
  1898. arg1, arg2, arg3,... : array_like
  1899. The shape parameter(s) for the distribution (see docstring of the
  1900. instance object for more information)
  1901. loc : array_like, optional
  1902. location parameter (default=0)
  1903. scale : array_like, optional
  1904. scale parameter (default=1)
  1905. Returns
  1906. -------
  1907. logsf : ndarray
  1908. Log of the survival function evaluated at `x`.
  1909. """
  1910. args, loc, scale = self._parse_args(*args, **kwds)
  1911. x, loc, scale = map(asarray, (x, loc, scale))
  1912. args = tuple(map(asarray, args))
  1913. _a, _b = self._get_support(*args)
  1914. dtyp = np.promote_types(x.dtype, np.float64)
  1915. x = np.asarray((x - loc)/scale, dtype=dtyp)
  1916. cond0 = self._argcheck(*args) & (scale > 0)
  1917. cond1 = self._open_support_mask(x, *args) & (scale > 0)
  1918. cond2 = cond0 & (x <= _a)
  1919. cond = cond0 & cond1
  1920. output = empty(shape(cond), dtyp)
  1921. output.fill(-inf)
  1922. place(output, (1-cond0)+np.isnan(x), self.badvalue)
  1923. place(output, cond2, 0.0)
  1924. if np.any(cond):
  1925. goodargs = argsreduce(cond, *((x,)+args))
  1926. place(output, cond, self._logsf(*goodargs))
  1927. if output.ndim == 0:
  1928. return output[()]
  1929. return output
  1930. def ppf(self, q, *args, **kwds):
  1931. """Percent point function (inverse of `cdf`) at q of the given RV.
  1932. Parameters
  1933. ----------
  1934. q : array_like
  1935. lower tail probability
  1936. arg1, arg2, arg3,... : array_like
  1937. The shape parameter(s) for the distribution (see docstring of the
  1938. instance object for more information)
  1939. loc : array_like, optional
  1940. location parameter (default=0)
  1941. scale : array_like, optional
  1942. scale parameter (default=1)
  1943. Returns
  1944. -------
  1945. x : array_like
  1946. quantile corresponding to the lower tail probability q.
  1947. """
  1948. args, loc, scale = self._parse_args(*args, **kwds)
  1949. q, loc, scale = map(asarray, (q, loc, scale))
  1950. args = tuple(map(asarray, args))
  1951. _a, _b = self._get_support(*args)
  1952. cond0 = self._argcheck(*args) & (scale > 0) & (loc == loc)
  1953. cond1 = (0 < q) & (q < 1)
  1954. cond2 = cond0 & (q == 0)
  1955. cond3 = cond0 & (q == 1)
  1956. cond = cond0 & cond1
  1957. output = np.full(shape(cond), fill_value=self.badvalue)
  1958. lower_bound = _a * scale + loc
  1959. upper_bound = _b * scale + loc
  1960. place(output, cond2, argsreduce(cond2, lower_bound)[0])
  1961. place(output, cond3, argsreduce(cond3, upper_bound)[0])
  1962. if np.any(cond): # call only if at least 1 entry
  1963. goodargs = argsreduce(cond, *((q,)+args+(scale, loc)))
  1964. scale, loc, goodargs = goodargs[-2], goodargs[-1], goodargs[:-2]
  1965. place(output, cond, self._ppf(*goodargs) * scale + loc)
  1966. if output.ndim == 0:
  1967. return output[()]
  1968. return output
  1969. def isf(self, q, *args, **kwds):
  1970. """Inverse survival function (inverse of `sf`) at q of the given RV.
  1971. Parameters
  1972. ----------
  1973. q : array_like
  1974. upper tail probability
  1975. arg1, arg2, arg3,... : array_like
  1976. The shape parameter(s) for the distribution (see docstring of the
  1977. instance object for more information)
  1978. loc : array_like, optional
  1979. location parameter (default=0)
  1980. scale : array_like, optional
  1981. scale parameter (default=1)
  1982. Returns
  1983. -------
  1984. x : ndarray or scalar
  1985. Quantile corresponding to the upper tail probability q.
  1986. """
  1987. args, loc, scale = self._parse_args(*args, **kwds)
  1988. q, loc, scale = map(asarray, (q, loc, scale))
  1989. args = tuple(map(asarray, args))
  1990. _a, _b = self._get_support(*args)
  1991. cond0 = self._argcheck(*args) & (scale > 0) & (loc == loc)
  1992. cond1 = (0 < q) & (q < 1)
  1993. cond2 = cond0 & (q == 1)
  1994. cond3 = cond0 & (q == 0)
  1995. cond = cond0 & cond1
  1996. output = np.full(shape(cond), fill_value=self.badvalue)
  1997. lower_bound = _a * scale + loc
  1998. upper_bound = _b * scale + loc
  1999. place(output, cond2, argsreduce(cond2, lower_bound)[0])
  2000. place(output, cond3, argsreduce(cond3, upper_bound)[0])
  2001. if np.any(cond):
  2002. goodargs = argsreduce(cond, *((q,)+args+(scale, loc)))
  2003. scale, loc, goodargs = goodargs[-2], goodargs[-1], goodargs[:-2]
  2004. place(output, cond, self._isf(*goodargs) * scale + loc)
  2005. if output.ndim == 0:
  2006. return output[()]
  2007. return output
  2008. def _unpack_loc_scale(self, theta):
  2009. try:
  2010. loc = theta[-2]
  2011. scale = theta[-1]
  2012. args = tuple(theta[:-2])
  2013. except IndexError as e:
  2014. raise ValueError("Not enough input arguments.") from e
  2015. return loc, scale, args
  2016. def _nnlf_and_penalty(self, x, args):
  2017. """
  2018. Compute the penalized negative log-likelihood for the
  2019. "standardized" data (i.e. already shifted by loc and
  2020. scaled by scale) for the shape parameters in `args`.
  2021. `x` can be a 1D numpy array or a CensoredData instance.
  2022. """
  2023. if isinstance(x, CensoredData):
  2024. # Filter out the data that is not in the support.
  2025. xs = x._supported(*self._get_support(*args))
  2026. n_bad = len(x) - len(xs)
  2027. i1, i2 = xs._interval.T
  2028. terms = [
  2029. # logpdf of the noncensored data.
  2030. self._logpdf(xs._uncensored, *args),
  2031. # logcdf of the left-censored data.
  2032. self._logcdf(xs._left, *args),
  2033. # logsf of the right-censored data.
  2034. self._logsf(xs._right, *args),
  2035. # log of probability of the interval-censored data.
  2036. np.log(self._delta_cdf(i1, i2, *args)),
  2037. ]
  2038. else:
  2039. cond0 = ~self._support_mask(x, *args)
  2040. n_bad = np.count_nonzero(cond0)
  2041. if n_bad > 0:
  2042. x = argsreduce(~cond0, x)[0]
  2043. terms = [self._logpdf(x, *args)]
  2044. totals, bad_counts = zip(*[_sum_finite(term) for term in terms])
  2045. total = sum(totals)
  2046. n_bad += sum(bad_counts)
  2047. return -total + n_bad * _LOGXMAX * 100
  2048. def _penalized_nnlf(self, theta, x):
  2049. """Penalized negative loglikelihood function.
  2050. i.e., - sum (log pdf(x, theta), axis=0) + penalty
  2051. where theta are the parameters (including loc and scale)
  2052. """
  2053. loc, scale, args = self._unpack_loc_scale(theta)
  2054. if not self._argcheck(*args) or scale <= 0:
  2055. return inf
  2056. if isinstance(x, CensoredData):
  2057. x = (x - loc) / scale
  2058. n_log_scale = (len(x) - x.num_censored()) * log(scale)
  2059. else:
  2060. x = (x - loc) / scale
  2061. n_log_scale = len(x) * log(scale)
  2062. return self._nnlf_and_penalty(x, args) + n_log_scale
  2063. def _fitstart(self, data, args=None):
  2064. """Starting point for fit (shape arguments + loc + scale)."""
  2065. if args is None:
  2066. args = (1.0,)*self.numargs
  2067. loc, scale = self._fit_loc_scale_support(data, *args)
  2068. return args + (loc, scale)
  2069. def _reduce_func(self, args, kwds, data=None):
  2070. """
  2071. Return the (possibly reduced) function to optimize in order to find MLE
  2072. estimates for the .fit method.
  2073. """
  2074. # Convert fixed shape parameters to the standard numeric form: e.g. for
  2075. # stats.beta, shapes='a, b'. To fix `a`, the caller can give a value
  2076. # for `f0`, `fa` or 'fix_a'. The following converts the latter two
  2077. # into the first (numeric) form.
  2078. shapes = []
  2079. if self.shapes:
  2080. shapes = self.shapes.replace(',', ' ').split()
  2081. for j, s in enumerate(shapes):
  2082. key = 'f' + str(j)
  2083. names = [key, 'f' + s, 'fix_' + s]
  2084. val = _get_fixed_fit_value(kwds, names)
  2085. if val is not None:
  2086. kwds[key] = val
  2087. args = list(args)
  2088. Nargs = len(args)
  2089. fixedn = []
  2090. names = [f'f{n}' for n in range(Nargs - 2)] + ['floc', 'fscale']
  2091. x0 = []
  2092. for n, key in enumerate(names):
  2093. if key in kwds:
  2094. fixedn.append(n)
  2095. args[n] = kwds.pop(key)
  2096. else:
  2097. x0.append(args[n])
  2098. methods = {"mle", "mm"}
  2099. method = kwds.pop('method', "mle").lower()
  2100. if method == "mm":
  2101. n_params = len(shapes) + 2 - len(fixedn)
  2102. exponents = (np.arange(1, n_params+1))[:, np.newaxis]
  2103. data_moments = np.sum(data[None, :]**exponents/len(data), axis=1)
  2104. def objective(theta, x):
  2105. return self._moment_error(theta, x, data_moments)
  2106. elif method == "mle":
  2107. objective = self._penalized_nnlf
  2108. else:
  2109. raise ValueError(f"Method '{method}' not available; "
  2110. f"must be one of {methods}")
  2111. if len(fixedn) == 0:
  2112. func = objective
  2113. restore = None
  2114. else:
  2115. if len(fixedn) == Nargs:
  2116. raise ValueError(
  2117. "All parameters fixed. There is nothing to optimize.")
  2118. def restore(args, theta):
  2119. # Replace with theta for all numbers not in fixedn
  2120. # This allows the non-fixed values to vary, but
  2121. # we still call self.nnlf with all parameters.
  2122. i = 0
  2123. for n in range(Nargs):
  2124. if n not in fixedn:
  2125. args[n] = theta[i]
  2126. i += 1
  2127. return args
  2128. def func(theta, x):
  2129. newtheta = restore(args[:], theta)
  2130. return objective(newtheta, x)
  2131. return x0, func, restore, args
  2132. def _moment_error(self, theta, x, data_moments):
  2133. loc, scale, args = self._unpack_loc_scale(theta)
  2134. if not self._argcheck(*args) or scale <= 0:
  2135. return inf
  2136. dist_moments = np.array([self.moment(i+1, *args, loc=loc, scale=scale)
  2137. for i in range(len(data_moments))])
  2138. if np.any(np.isnan(dist_moments)):
  2139. raise ValueError("Method of moments encountered a non-finite "
  2140. "distribution moment and cannot continue. "
  2141. "Consider trying method='MLE'.")
  2142. return (((data_moments - dist_moments) /
  2143. np.maximum(np.abs(data_moments), 1e-8))**2).sum()
  2144. def fit(self, data, *args, **kwds):
  2145. r"""
  2146. Return estimates of shape (if applicable), location, and scale
  2147. parameters from data. The default estimation method is Maximum
  2148. Likelihood Estimation (MLE), but Method of Moments (MM)
  2149. is also available.
  2150. Starting estimates for the fit are given by input arguments;
  2151. for any arguments not provided with starting estimates,
  2152. ``self._fitstart(data)`` is called to generate such.
  2153. One can hold some parameters fixed to specific values by passing in
  2154. keyword arguments ``f0``, ``f1``, ..., ``fn`` (for shape parameters)
  2155. and ``floc`` and ``fscale`` (for location and scale parameters,
  2156. respectively).
  2157. Parameters
  2158. ----------
  2159. data : array_like or `CensoredData` instance
  2160. Data to use in estimating the distribution parameters.
  2161. arg1, arg2, arg3,... : floats, optional
  2162. Starting value(s) for any shape-characterizing arguments (those not
  2163. provided will be determined by a call to ``_fitstart(data)``).
  2164. No default value.
  2165. **kwds : floats, optional
  2166. - `loc`: initial guess of the distribution's location parameter.
  2167. - `scale`: initial guess of the distribution's scale parameter.
  2168. Special keyword arguments are recognized as holding certain
  2169. parameters fixed:
  2170. - f0, ..., fn : hold respective shape parameters fixed.
  2171. Alternatively, shape parameters to fix can be specified by name.
  2172. For example, if ``self.shapes == "a, b"``, ``fa`` and ``fix_a``
  2173. are equivalent to ``f0``, and ``fb`` and ``fix_b`` are
  2174. equivalent to ``f1``.
  2175. - floc : hold location parameter fixed to specified value.
  2176. - fscale : hold scale parameter fixed to specified value.
  2177. - optimizer : The optimizer to use. The optimizer must take
  2178. ``func`` and starting position as the first two arguments,
  2179. plus ``args`` (for extra arguments to pass to the
  2180. function to be optimized) and ``disp``.
  2181. The ``fit`` method calls the optimizer with ``disp=0`` to suppress output.
  2182. The optimizer must return the estimated parameters.
  2183. - method : The method to use. The default is "MLE" (Maximum
  2184. Likelihood Estimate); "MM" (Method of Moments)
  2185. is also available.
  2186. Raises
  2187. ------
  2188. TypeError, ValueError
  2189. If an input is invalid
  2190. `~scipy.stats.FitError`
  2191. If fitting fails or the fit produced would be invalid
  2192. Returns
  2193. -------
  2194. parameter_tuple : tuple of floats
  2195. Estimates for any shape parameters (if applicable), followed by
  2196. those for location and scale. For most random variables, shape
  2197. statistics will be returned, but there are exceptions (e.g.
  2198. ``norm``).
  2199. Notes
  2200. -----
  2201. With ``method="MLE"`` (default), the fit is computed by minimizing
  2202. the negative log-likelihood function. A large, finite penalty
  2203. (rather than infinite negative log-likelihood) is applied for
  2204. observations beyond the support of the distribution.
  2205. With ``method="MM"``, the fit is computed by minimizing the L2 norm
  2206. of the relative errors between the first *k* raw (about zero) data
  2207. moments and the corresponding distribution moments, where *k* is the
  2208. number of non-fixed parameters.
  2209. More precisely, the objective function is::
  2210. (((data_moments - dist_moments)
  2211. / np.maximum(np.abs(data_moments), 1e-8))**2).sum()
  2212. where the constant ``1e-8`` avoids division by zero in case of
  2213. vanishing data moments. Typically, this error norm can be reduced to
  2214. zero.
  2215. Note that the standard method of moments can produce parameters for
  2216. which some data are outside the support of the fitted distribution;
  2217. this implementation does nothing to prevent this.
  2218. For either method,
  2219. the returned answer is not guaranteed to be globally optimal; it
  2220. may only be locally optimal, or the optimization may fail altogether.
  2221. If the data contain any of ``np.nan``, ``np.inf``, or ``-np.inf``,
  2222. the `fit` method will raise a ``RuntimeError``.
  2223. When passing a ``CensoredData`` instance to ``data``, the log-likelihood
  2224. function is defined as:
  2225. .. math::
  2226. l(\pmb{\theta}; k) & = \sum
  2227. \log(f(k_u; \pmb{\theta}))
  2228. + \sum
  2229. \log(F(k_l; \pmb{\theta})) \\
  2230. & + \sum
  2231. \log(1 - F(k_r; \pmb{\theta})) \\
  2232. & + \sum
  2233. \log(F(k_{\text{high}, i}; \pmb{\theta})
  2234. - F(k_{\text{low}, i}; \pmb{\theta}))
  2235. where :math:`f` and :math:`F` are the pdf and cdf, respectively, of the
  2236. function being fitted, :math:`\pmb{\theta}` is the parameter vector,
  2237. :math:`u` are the indices of uncensored observations,
  2238. :math:`l` are the indices of left-censored observations,
  2239. :math:`r` are the indices of right-censored observations,
  2240. subscripts "low"/"high" denote endpoints of interval-censored observations, and
  2241. :math:`i` are the indices of interval-censored observations.
  2242. Examples
  2243. --------
  2244. Generate some data to fit: draw random variates from the `beta`
  2245. distribution
  2246. >>> import numpy as np
  2247. >>> from scipy.stats import beta
  2248. >>> a, b = 1., 2.
  2249. >>> rng = np.random.default_rng(172786373191770012695001057628748821561)
  2250. >>> x = beta.rvs(a, b, size=1000, random_state=rng)
  2251. Now we can fit all four parameters (``a``, ``b``, ``loc`` and
  2252. ``scale``):
  2253. >>> a1, b1, loc1, scale1 = beta.fit(x)
  2254. >>> a1, b1, loc1, scale1
  2255. (1.0198945204435628, 1.9484708982737828, 4.372241314917588e-05, 0.9979078845964814)
  2256. The fit can be done also using a custom optimizer:
  2257. >>> from scipy.optimize import minimize
  2258. >>> def custom_optimizer(func, x0, args=(), disp=0):
  2259. ... res = minimize(func, x0, args, method="slsqp", options={"disp": disp})
  2260. ... if res.success:
  2261. ... return res.x
  2262. ... raise RuntimeError('optimization routine failed')
  2263. >>> a1, b1, loc1, scale1 = beta.fit(x, method="MLE", optimizer=custom_optimizer)
  2264. >>> a1, b1, loc1, scale1
  2265. (1.0198821087258905, 1.948484145914738, 4.3705304486881485e-05, 0.9979104663953395)
  2266. We can also use some prior knowledge about the dataset: let's keep
  2267. ``loc`` and ``scale`` fixed:
  2268. >>> a1, b1, loc1, scale1 = beta.fit(x, floc=0, fscale=1)
  2269. >>> loc1, scale1
  2270. (0, 1)
  2271. We can also keep shape parameters fixed by using ``f``-keywords. To
  2272. keep the zero-th shape parameter ``a`` equal 1, use ``f0=1`` or,
  2273. equivalently, ``fa=1``:
  2274. >>> a1, b1, loc1, scale1 = beta.fit(x, fa=1, floc=0, fscale=1)
  2275. >>> a1
  2276. 1
  2277. Not all distributions return estimates for the shape parameters.
  2278. ``norm`` for example just returns estimates for location and scale:
  2279. >>> from scipy.stats import norm
  2280. >>> x = norm.rvs(a, b, size=1000, random_state=123)
  2281. >>> loc1, scale1 = norm.fit(x)
  2282. >>> loc1, scale1
  2283. (0.92087172783841631, 2.0015750750324668)
  2284. """ # noqa: E501
  2285. method = kwds.get('method', "mle").lower()
  2286. censored = isinstance(data, CensoredData)
  2287. if censored:
  2288. if method != 'mle':
  2289. raise ValueError('For censored data, the method must'
  2290. ' be "MLE".')
  2291. if data.num_censored() == 0:
  2292. # There are no censored values in data, so replace the
  2293. # CensoredData instance with a regular array.
  2294. data = data._uncensored
  2295. censored = False
  2296. Narg = len(args)
  2297. if Narg > self.numargs:
  2298. raise TypeError("Too many input arguments.")
  2299. # Check the finiteness of data only if data is not an instance of
  2300. # CensoredData. The arrays in a CensoredData instance have already
  2301. # been validated.
  2302. if not censored:
  2303. # Note: `ravel()` is called for backwards compatibility.
  2304. data = np.asarray(data).ravel()
  2305. if not np.isfinite(data).all():
  2306. raise ValueError("The data contains non-finite values.")
  2307. start = [None]*2
  2308. if (Narg < self.numargs) or not ('loc' in kwds and
  2309. 'scale' in kwds):
  2310. # get distribution specific starting locations
  2311. start = self._fitstart(data)
  2312. args += start[Narg:-2]
  2313. loc = kwds.pop('loc', start[-2])
  2314. scale = kwds.pop('scale', start[-1])
  2315. args += (loc, scale)
  2316. x0, func, restore, args = self._reduce_func(args, kwds, data=data)
  2317. optimizer = kwds.pop('optimizer', optimize.fmin)
  2318. # convert string to function in scipy.optimize
  2319. optimizer = _fit_determine_optimizer(optimizer)
  2320. # by now kwds must be empty, since everybody took what they needed
  2321. if kwds:
  2322. raise TypeError(f"Unknown arguments: {kwds}.")
  2323. # In some cases, method of moments can be done with fsolve/root
  2324. # instead of an optimizer, but sometimes no solution exists,
  2325. # especially when the user fixes parameters. Minimizing the sum
  2326. # of squares of the error generalizes to these cases.
  2327. vals = optimizer(func, x0, args=(data,), disp=0)
  2328. obj = func(vals, data)
  2329. if restore is not None:
  2330. vals = restore(args, vals)
  2331. vals = tuple(vals)
  2332. loc, scale, shapes = self._unpack_loc_scale(vals)
  2333. if not (np.all(self._argcheck(*shapes)) and scale > 0):
  2334. raise FitError("Optimization converged to parameters that are "
  2335. "outside the range allowed by the distribution.")
  2336. if method == 'mm':
  2337. if not np.isfinite(obj):
  2338. raise FitError("Optimization failed: either a data moment "
  2339. "or fitted distribution moment is "
  2340. "non-finite.")
  2341. return vals
  2342. def _fit_loc_scale_support(self, data, *args):
  2343. """Estimate loc and scale parameters from data accounting for support.
  2344. Parameters
  2345. ----------
  2346. data : array_like
  2347. Data to fit.
  2348. arg1, arg2, arg3,... : array_like
  2349. The shape parameter(s) for the distribution (see docstring of the
  2350. instance object for more information).
  2351. Returns
  2352. -------
  2353. Lhat : float
  2354. Estimated location parameter for the data.
  2355. Shat : float
  2356. Estimated scale parameter for the data.
  2357. """
  2358. if isinstance(data, CensoredData):
  2359. # For this estimate, "uncensor" the data by taking the
  2360. # given endpoints as the data for the left- or right-censored
  2361. # data, and the mean for the interval-censored data.
  2362. data = data._uncensor()
  2363. else:
  2364. data = np.asarray(data)
  2365. # Estimate location and scale according to the method of moments.
  2366. loc_hat, scale_hat = self.fit_loc_scale(data, *args)
  2367. # Compute the support according to the shape parameters.
  2368. self._argcheck(*args)
  2369. _a, _b = self._get_support(*args)
  2370. a, b = _a, _b
  2371. support_width = b - a
  2372. # If the support is empty then return the moment-based estimates.
  2373. if support_width <= 0:
  2374. return loc_hat, scale_hat
  2375. # Compute the proposed support according to the loc and scale
  2376. # estimates.
  2377. a_hat = loc_hat + a * scale_hat
  2378. b_hat = loc_hat + b * scale_hat
  2379. # Use the moment-based estimates if they are compatible with the data.
  2380. data_a = np.min(data)
  2381. data_b = np.max(data)
  2382. if a_hat < data_a and data_b < b_hat:
  2383. return loc_hat, scale_hat
  2384. # Otherwise find other estimates that are compatible with the data.
  2385. data_width = data_b - data_a
  2386. rel_margin = 0.1
  2387. margin = data_width * rel_margin
  2388. # For a finite interval, both the location and scale
  2389. # should have interesting values.
  2390. if support_width < np.inf:
  2391. loc_hat = (data_a - a) - margin
  2392. scale_hat = (data_width + 2 * margin) / support_width
  2393. return loc_hat, scale_hat
  2394. # For a one-sided interval, use only an interesting location parameter.
  2395. if a > -np.inf:
  2396. return (data_a - a) - margin, 1
  2397. elif b < np.inf:
  2398. return (data_b - b) + margin, 1
  2399. else:
  2400. raise RuntimeError
  2401. def fit_loc_scale(self, data, *args):
  2402. """
  2403. Estimate loc and scale parameters from data using 1st and 2nd moments.
  2404. Parameters
  2405. ----------
  2406. data : array_like
  2407. Data to fit.
  2408. arg1, arg2, arg3,... : array_like
  2409. The shape parameter(s) for the distribution (see docstring of the
  2410. instance object for more information).
  2411. Returns
  2412. -------
  2413. Lhat : float
  2414. Estimated location parameter for the data.
  2415. Shat : float
  2416. Estimated scale parameter for the data.
  2417. """
  2418. mu, mu2 = self.stats(*args, **{'moments': 'mv'})
  2419. tmp = asarray(data)
  2420. muhat = tmp.mean()
  2421. mu2hat = tmp.var()
  2422. Shat = sqrt(mu2hat / mu2)
  2423. with np.errstate(invalid='ignore'):
  2424. Lhat = muhat - Shat*mu
  2425. if not np.isfinite(Lhat):
  2426. Lhat = 0
  2427. if not (np.isfinite(Shat) and (0 < Shat)):
  2428. Shat = 1
  2429. return Lhat, Shat
  2430. def _entropy(self, *args):
  2431. def integ(x):
  2432. val = self._pdf(x, *args)
  2433. return entr(val)
  2434. # upper limit is often inf, so suppress warnings when integrating
  2435. _a, _b = self._get_support(*args)
  2436. with np.errstate(over='ignore'):
  2437. h = integrate.quad(integ, _a, _b)[0]
  2438. if not np.isnan(h):
  2439. return h
  2440. else:
  2441. # try with different limits if integration problems
  2442. low, upp = self.ppf([1e-10, 1. - 1e-10], *args)
  2443. if np.isinf(_b):
  2444. upper = upp
  2445. else:
  2446. upper = _b
  2447. if np.isinf(_a):
  2448. lower = low
  2449. else:
  2450. lower = _a
  2451. return integrate.quad(integ, lower, upper)[0]
  2452. def expect(self, func=None, args=(), loc=0, scale=1, lb=None, ub=None,
  2453. conditional=False, **kwds):
  2454. """Calculate expected value of a function with respect to the
  2455. distribution by numerical integration.
  2456. The expected value of a function ``f(x)`` with respect to a
  2457. distribution ``dist`` is defined as::
  2458. ub
  2459. E[f(x)] = Integral(f(x) * dist.pdf(x)),
  2460. lb
  2461. where ``ub`` and ``lb`` are arguments and ``x`` has the ``dist.pdf(x)``
  2462. distribution. If the bounds ``lb`` and ``ub`` correspond to the
  2463. support of the distribution, e.g. ``[-inf, inf]`` in the default
  2464. case, then the integral is the unrestricted expectation of ``f(x)``.
  2465. Also, the function ``f(x)`` may be defined such that ``f(x)`` is ``0``
  2466. outside a finite interval in which case the expectation is
  2467. calculated within the finite range ``[lb, ub]``.
  2468. Parameters
  2469. ----------
  2470. func : callable, optional
  2471. Function for which integral is calculated. Takes only one argument.
  2472. The default is the identity mapping f(x) = x.
  2473. args : tuple, optional
  2474. Shape parameters of the distribution.
  2475. loc : float, optional
  2476. Location parameter (default=0).
  2477. scale : float, optional
  2478. Scale parameter (default=1).
  2479. lb, ub : scalar, optional
  2480. Lower and upper bound for integration. Default is set to the
  2481. support of the distribution.
  2482. conditional : bool, optional
  2483. If True, the integral is corrected by the conditional probability
  2484. of the integration interval. The return value is the expectation
  2485. of the function, conditional on being in the given interval.
  2486. Default is False.
  2487. Additional keyword arguments are passed to the integration routine.
  2488. Returns
  2489. -------
  2490. expect : float
  2491. The calculated expected value.
  2492. Notes
  2493. -----
  2494. The integration behavior of this function is inherited from
  2495. `scipy.integrate.quad`. Neither this function nor
  2496. `scipy.integrate.quad` can verify whether the integral exists or is
  2497. finite. For example ``cauchy(0).mean()`` returns ``np.nan`` and
  2498. ``cauchy(0).expect()`` returns ``0.0``.
  2499. Likewise, the accuracy of results is not verified by the function.
  2500. `scipy.integrate.quad` is typically reliable for integrals that are
  2501. numerically favorable, but it is not guaranteed to converge
  2502. to a correct value for all possible intervals and integrands. This
  2503. function is provided for convenience; for critical applications,
  2504. check results against other integration methods.
  2505. The function is not vectorized.
  2506. Examples
  2507. --------
  2508. To understand the effect of the bounds of integration consider
  2509. >>> from scipy.stats import expon
  2510. >>> expon(1).expect(lambda x: 1, lb=0.0, ub=2.0)
  2511. 0.6321205588285578
  2512. This is close to
  2513. >>> expon(1).cdf(2.0) - expon(1).cdf(0.0)
  2514. 0.6321205588285577
  2515. If ``conditional=True``
  2516. >>> expon(1).expect(lambda x: 1, lb=0.0, ub=2.0, conditional=True)
  2517. 1.0000000000000002
  2518. The slight deviation from 1 is due to numerical integration.
  2519. The integrand can be treated as a complex-valued function
  2520. by passing ``complex_func=True`` to `scipy.integrate.quad` .
  2521. >>> import numpy as np
  2522. >>> from scipy.stats import vonmises
  2523. >>> res = vonmises(loc=2, kappa=1).expect(lambda x: np.exp(1j*x),
  2524. ... complex_func=True)
  2525. >>> res
  2526. (-0.18576377217422957+0.40590124735052263j)
  2527. >>> np.angle(res) # location of the (circular) distribution
  2528. 2.0
  2529. """
  2530. lockwds = {'loc': loc,
  2531. 'scale': scale}
  2532. self._argcheck(*args)
  2533. _a, _b = self._get_support(*args)
  2534. if func is None:
  2535. def fun(x, *args):
  2536. return x * self.pdf(x, *args, **lockwds)
  2537. else:
  2538. def fun(x, *args):
  2539. return func(x) * self.pdf(x, *args, **lockwds)
  2540. if lb is None:
  2541. lb = loc + _a * scale
  2542. if ub is None:
  2543. ub = loc + _b * scale
  2544. cdf_bounds = self.cdf([lb, ub], *args, **lockwds)
  2545. invfac = cdf_bounds[1] - cdf_bounds[0]
  2546. kwds['args'] = args
  2547. # split interval to help integrator w/ infinite support; see gh-8928
  2548. alpha = 0.05 # split body from tails at probability mass `alpha`
  2549. inner_bounds = np.array([alpha, 1-alpha])
  2550. cdf_inner_bounds = cdf_bounds[0] + invfac * inner_bounds
  2551. c, d = loc + self._ppf(cdf_inner_bounds, *args) * scale
  2552. # Do not silence warnings from integration.
  2553. lbc = integrate.quad(fun, lb, c, **kwds)[0]
  2554. cd = integrate.quad(fun, c, d, **kwds)[0]
  2555. dub = integrate.quad(fun, d, ub, **kwds)[0]
  2556. vals = (lbc + cd + dub)
  2557. if conditional:
  2558. vals /= invfac
  2559. return np.array(vals)[()] # make it a numpy scalar like other methods
  2560. def _param_info(self):
  2561. shape_info = self._shape_info()
  2562. loc_info = _ShapeInfo("loc", False, (-np.inf, np.inf), (False, False))
  2563. scale_info = _ShapeInfo("scale", False, (0, np.inf), (False, False))
  2564. param_info = shape_info + [loc_info, scale_info]
  2565. return param_info
  2566. # For now, _delta_cdf is a private method.
  2567. def _delta_cdf(self, x1, x2, *args, loc=0, scale=1):
  2568. """
  2569. Compute CDF(x2) - CDF(x1).
  2570. Where x1 is greater than the median, compute SF(x1) - SF(x2),
  2571. otherwise compute CDF(x2) - CDF(x1).
  2572. This function is only useful if `dist.sf(x, ...)` has an implementation
  2573. that is numerically more accurate than `1 - dist.cdf(x, ...)`.
  2574. """
  2575. cdf1 = self.cdf(x1, *args, loc=loc, scale=scale)
  2576. # Possible optimizations (needs investigation-these might not be
  2577. # better):
  2578. # * Use xpx.apply_where instead of np.where
  2579. # * Instead of cdf1 > 0.5, compare x1 to the median.
  2580. result = np.where(cdf1 > 0.5,
  2581. (self.sf(x1, *args, loc=loc, scale=scale)
  2582. - self.sf(x2, *args, loc=loc, scale=scale)),
  2583. self.cdf(x2, *args, loc=loc, scale=scale) - cdf1)
  2584. if result.ndim == 0:
  2585. result = result[()]
  2586. return result
  2587. # Helpers for the discrete distributions
  2588. def _drv2_moment(self, n, *args):
  2589. """Non-central moment of discrete distribution."""
  2590. def fun(x):
  2591. return np.power(x, n) * self._pmf(x, *args)
  2592. _a, _b = self._get_support(*args)
  2593. return _expect(fun, _a, _b, self._ppf(0.5, *args), self.inc)
  2594. def _drv2_ppfsingle(self, q, *args): # Use basic bisection algorithm
  2595. _a, _b = self._get_support(*args)
  2596. b = _b
  2597. a = _a
  2598. step = 10
  2599. if isinf(b): # Be sure ending point is > q
  2600. b = float(max(100*q, 10))
  2601. while 1:
  2602. if b >= _b:
  2603. qb = 1.0
  2604. break
  2605. qb = self._cdf(b, *args)
  2606. if (qb < q):
  2607. b += step
  2608. step *= 2
  2609. else:
  2610. break
  2611. else:
  2612. qb = 1.0
  2613. step = 10
  2614. if isinf(a): # be sure starting point < q
  2615. a = float(min(-100*q, -10))
  2616. while 1:
  2617. if a <= _a:
  2618. qb = 0.0
  2619. break
  2620. qa = self._cdf(a, *args)
  2621. if (qa > q):
  2622. a -= step
  2623. step *= 2
  2624. else:
  2625. break
  2626. else:
  2627. qa = self._cdf(a, *args)
  2628. if np.isinf(a) or np.isinf(b):
  2629. message = "Arguments that bracket the requested quantile could not be found."
  2630. raise RuntimeError(message)
  2631. # maximum number of bisections within the normal float64s
  2632. # maxiter = int(np.log2(finfo.max) - np.log2(finfo.smallest_normal))
  2633. maxiter = 2046
  2634. for i in range(maxiter):
  2635. if (qa == q):
  2636. return a
  2637. if (qb == q):
  2638. return b
  2639. if b <= a+1:
  2640. if qa > q:
  2641. return a
  2642. else:
  2643. return b
  2644. c = int((a+b)/2.0)
  2645. qc = self._cdf(c, *args)
  2646. if (qc < q):
  2647. if a != c:
  2648. a = c
  2649. else:
  2650. raise RuntimeError('updating stopped, endless loop')
  2651. qa = qc
  2652. elif (qc > q):
  2653. if b != c:
  2654. b = c
  2655. else:
  2656. raise RuntimeError('updating stopped, endless loop')
  2657. qb = qc
  2658. else:
  2659. return c
  2660. # Must over-ride one of _pmf or _cdf or pass in
  2661. # x_k, p(x_k) lists in initialization
  2662. class rv_discrete(rv_generic):
  2663. """A generic discrete random variable class meant for subclassing.
  2664. `rv_discrete` is a base class to construct specific distribution classes
  2665. and instances for discrete random variables. It can also be used
  2666. to construct an arbitrary distribution defined by a list of support
  2667. points and corresponding probabilities.
  2668. Parameters
  2669. ----------
  2670. a : float, optional
  2671. Lower bound of the support of the distribution, default: 0
  2672. b : float, optional
  2673. Upper bound of the support of the distribution, default: plus infinity
  2674. moment_tol : float, optional
  2675. The tolerance for the generic calculation of moments.
  2676. values : tuple of two array_like, optional
  2677. ``(xk, pk)`` where ``xk`` are integers and ``pk`` are the non-zero
  2678. probabilities between 0 and 1 with ``sum(pk) = 1``. ``xk``
  2679. and ``pk`` must have the same shape, and ``xk`` must be unique.
  2680. inc : integer, optional
  2681. Increment for the support of the distribution.
  2682. Default is 1. (other values have not been tested)
  2683. badvalue : float, optional
  2684. The value in a result arrays that indicates a value that for which
  2685. some argument restriction is violated, default is np.nan.
  2686. name : str, optional
  2687. The name of the instance. This string is used to construct the default
  2688. example for distributions.
  2689. longname : str, optional
  2690. This string is used as part of the first line of the docstring returned
  2691. when a subclass has no docstring of its own. Note: `longname` exists
  2692. for backwards compatibility, do not use for new subclasses.
  2693. shapes : str, optional
  2694. The shape of the distribution. For example "m, n" for a distribution
  2695. that takes two integers as the two shape arguments for all its methods
  2696. If not provided, shape parameters will be inferred from
  2697. the signatures of the private methods, ``_pmf`` and ``_cdf`` of
  2698. the instance.
  2699. seed : {None, int, `numpy.random.Generator`, `numpy.random.RandomState`}, optional
  2700. If `seed` is None (or `np.random`), the `numpy.random.RandomState`
  2701. singleton is used.
  2702. If `seed` is an int, a new ``RandomState`` instance is used,
  2703. seeded with `seed`.
  2704. If `seed` is already a ``Generator`` or ``RandomState`` instance then
  2705. that instance is used.
  2706. Attributes
  2707. ----------
  2708. a, b : float, optional
  2709. Lower/upper bound of the support of the unshifted/unscaled distribution.
  2710. This value is unaffected by the `loc` and `scale` parameters.
  2711. To calculate the support of the shifted/scaled distribution,
  2712. use the `support` method.
  2713. Methods
  2714. -------
  2715. rvs
  2716. pmf
  2717. logpmf
  2718. cdf
  2719. logcdf
  2720. sf
  2721. logsf
  2722. ppf
  2723. isf
  2724. moment
  2725. stats
  2726. entropy
  2727. expect
  2728. median
  2729. mean
  2730. std
  2731. var
  2732. interval
  2733. __call__
  2734. support
  2735. Notes
  2736. -----
  2737. This class is similar to `rv_continuous`. Whether a shape parameter is
  2738. valid is decided by an ``_argcheck`` method (which defaults to checking
  2739. that its arguments are strictly positive.)
  2740. The main differences are as follows.
  2741. - The support of the distribution is a set of integers.
  2742. - Instead of the probability density function, ``pdf`` (and the
  2743. corresponding private ``_pdf``), this class defines the
  2744. *probability mass function*, `pmf` (and the corresponding
  2745. private ``_pmf``.)
  2746. - There is no ``scale`` parameter.
  2747. - The default implementations of methods (e.g. ``_cdf``) are not designed
  2748. for distributions with support that is unbounded below (i.e.
  2749. ``a=-np.inf``), so they must be overridden.
  2750. To create a new discrete distribution, we would do the following:
  2751. >>> from scipy.stats import rv_discrete
  2752. >>> class poisson_gen(rv_discrete):
  2753. ... "Poisson distribution"
  2754. ... def _pmf(self, k, mu):
  2755. ... return exp(-mu) * mu**k / factorial(k)
  2756. and create an instance::
  2757. >>> poisson = poisson_gen(name="poisson")
  2758. Note that above we defined the Poisson distribution in the standard form.
  2759. Shifting the distribution can be done by providing the ``loc`` parameter
  2760. to the methods of the instance. For example, ``poisson.pmf(x, mu, loc)``
  2761. delegates the work to ``poisson._pmf(x-loc, mu)``.
  2762. **Discrete distributions from a list of probabilities**
  2763. Alternatively, you can construct an arbitrary discrete rv defined
  2764. on a finite set of values ``xk`` with ``Prob{X=xk} = pk`` by using the
  2765. ``values`` keyword argument to the `rv_discrete` constructor.
  2766. **Deepcopying / Pickling**
  2767. If a distribution or frozen distribution is deepcopied (pickled/unpickled,
  2768. etc.), any underlying random number generator is deepcopied with it. An
  2769. implication is that if a distribution relies on the singleton RandomState
  2770. before copying, it will rely on a copy of that random state after copying,
  2771. and ``np.random.seed`` will no longer control the state.
  2772. Examples
  2773. --------
  2774. Custom made discrete distribution:
  2775. >>> import numpy as np
  2776. >>> from scipy import stats
  2777. >>> xk = np.arange(7)
  2778. >>> pk = (0.1, 0.2, 0.3, 0.1, 0.1, 0.0, 0.2)
  2779. >>> custm = stats.rv_discrete(name='custm', values=(xk, pk))
  2780. >>>
  2781. >>> import matplotlib.pyplot as plt
  2782. >>> fig, ax = plt.subplots(1, 1)
  2783. >>> ax.plot(xk, custm.pmf(xk), 'ro', ms=12, mec='r')
  2784. >>> ax.vlines(xk, 0, custm.pmf(xk), colors='r', lw=4)
  2785. >>> plt.show()
  2786. Random number generation:
  2787. >>> R = custm.rvs(size=100)
  2788. """
  2789. def __new__(cls, a=0, b=inf, name=None, badvalue=None,
  2790. moment_tol=1e-8, values=None, inc=1, longname=None,
  2791. shapes=None, seed=None):
  2792. if values is not None:
  2793. # dispatch to a subclass
  2794. return super().__new__(rv_sample)
  2795. else:
  2796. # business as usual
  2797. return super().__new__(cls)
  2798. def __init__(self, a=0, b=inf, name=None, badvalue=None,
  2799. moment_tol=1e-8, values=None, inc=1, longname=None,
  2800. shapes=None, seed=None):
  2801. super().__init__(seed)
  2802. # cf generic freeze
  2803. self._ctor_param = dict(
  2804. a=a, b=b, name=name, badvalue=badvalue,
  2805. moment_tol=moment_tol, values=values, inc=inc,
  2806. longname=longname, shapes=shapes, seed=seed)
  2807. if badvalue is None:
  2808. badvalue = nan
  2809. self.badvalue = badvalue
  2810. self.a = a
  2811. self.b = b
  2812. self.moment_tol = moment_tol
  2813. self.inc = inc
  2814. self.shapes = shapes
  2815. if values is not None:
  2816. raise ValueError("rv_discrete.__init__(..., values != None, ...)")
  2817. self._construct_argparser(meths_to_inspect=[self._pmf, self._cdf],
  2818. locscale_in='loc=0',
  2819. # scale=1 for discrete RVs
  2820. locscale_out='loc, 1')
  2821. self._attach_methods()
  2822. self._construct_docstrings(name, longname)
  2823. def __getstate__(self):
  2824. dct = self.__dict__.copy()
  2825. # these methods will be remade in __setstate__
  2826. attrs = ["_parse_args", "_parse_args_stats", "_parse_args_rvs",
  2827. "_cdfvec", "_ppfvec", "generic_moment"]
  2828. [dct.pop(attr, None) for attr in attrs]
  2829. return dct
  2830. def _attach_methods(self):
  2831. """Attaches dynamically created methods to the rv_discrete instance."""
  2832. self._cdfvec = vectorize(self._cdf_single, otypes='d')
  2833. self.vecentropy = vectorize(self._entropy)
  2834. # _attach_methods is responsible for calling _attach_argparser_methods
  2835. self._attach_argparser_methods()
  2836. # nin correction needs to be after we know numargs
  2837. # correct nin for generic moment vectorization
  2838. _vec_generic_moment = vectorize(_drv2_moment, otypes='d')
  2839. _vec_generic_moment.nin = self.numargs + 2
  2840. self.generic_moment = types.MethodType(_vec_generic_moment, self)
  2841. # correct nin for ppf vectorization
  2842. _vppf = vectorize(_drv2_ppfsingle, otypes='d')
  2843. _vppf.nin = self.numargs + 2
  2844. self._ppfvec = types.MethodType(_vppf, self)
  2845. # now that self.numargs is defined, we can adjust nin
  2846. self._cdfvec.nin = self.numargs + 1
  2847. def _construct_docstrings(self, name, longname):
  2848. if name is None:
  2849. name = 'Distribution'
  2850. self.name = name
  2851. # generate docstring for subclass instances
  2852. if longname is None:
  2853. if name[0] in ['aeiouAEIOU']:
  2854. hstr = "An "
  2855. else:
  2856. hstr = "A "
  2857. longname = hstr + name
  2858. if sys.flags.optimize < 2:
  2859. # Skip adding docstrings if interpreter is run with -OO
  2860. if self.__doc__ is None:
  2861. self._construct_default_doc(longname=longname,
  2862. docdict=docdict_discrete,
  2863. discrete='discrete')
  2864. else:
  2865. dct = dict(distdiscrete)
  2866. self._construct_doc(docdict_discrete, dct.get(self.name))
  2867. # discrete RV do not have the scale parameter, remove it
  2868. self.__doc__ = self.__doc__.replace(
  2869. '\n scale : array_like, '
  2870. 'optional\n scale parameter (default=1)', '')
  2871. def _updated_ctor_param(self):
  2872. """Return the current version of _ctor_param, possibly updated by user.
  2873. Used by freezing.
  2874. Keep this in sync with the signature of __init__.
  2875. """
  2876. dct = self._ctor_param.copy()
  2877. dct['a'] = self.a
  2878. dct['b'] = self.b
  2879. dct['badvalue'] = self.badvalue
  2880. dct['moment_tol'] = self.moment_tol
  2881. dct['inc'] = self.inc
  2882. dct['name'] = self.name
  2883. dct['shapes'] = self.shapes
  2884. return dct
  2885. def _nonzero(self, k, *args):
  2886. return floor(k) == k
  2887. def _pmf(self, k, *args):
  2888. return self._cdf(k, *args) - self._cdf(k-1, *args)
  2889. def _logpmf(self, k, *args):
  2890. with np.errstate(divide='ignore'):
  2891. return log(self._pmf(k, *args))
  2892. def _logpxf(self, k, *args):
  2893. # continuous distributions have PDF, discrete have PMF, but sometimes
  2894. # the distinction doesn't matter. This lets us use `_logpxf` for both
  2895. # discrete and continuous distributions.
  2896. return self._logpmf(k, *args)
  2897. def _unpack_loc_scale(self, theta):
  2898. try:
  2899. loc = theta[-1]
  2900. scale = 1
  2901. args = tuple(theta[:-1])
  2902. except IndexError as e:
  2903. raise ValueError("Not enough input arguments.") from e
  2904. return loc, scale, args
  2905. def _cdf_single(self, k, *args):
  2906. _a, _b = self._get_support(*args)
  2907. m = arange(int(_a), k+1)
  2908. return np.sum(self._pmf(m, *args), axis=0)
  2909. def _cdf(self, x, *args):
  2910. k = floor(x).astype(np.float64)
  2911. return self._cdfvec(k, *args)
  2912. # generic _logcdf, _sf, _logsf, _ppf, _isf, _rvs defined in rv_generic
  2913. def rvs(self, *args, **kwargs):
  2914. """Random variates of given type.
  2915. Parameters
  2916. ----------
  2917. arg1, arg2, arg3,... : array_like
  2918. The shape parameter(s) for the distribution (see docstring of the
  2919. instance object for more information).
  2920. loc : array_like, optional
  2921. Location parameter (default=0).
  2922. size : int or tuple of ints, optional
  2923. Defining number of random variates (Default is 1). Note that `size`
  2924. has to be given as keyword, not as positional argument.
  2925. random_state : {None, int, `numpy.random.Generator`,
  2926. `numpy.random.RandomState`}, optional
  2927. If `random_state` is None (or `np.random`), the
  2928. `numpy.random.RandomState` singleton is used.
  2929. If `random_state` is an int, a new ``RandomState`` instance is
  2930. used, seeded with `random_state`.
  2931. If `random_state` is already a ``Generator`` or ``RandomState``
  2932. instance, that instance is used.
  2933. Returns
  2934. -------
  2935. rvs : ndarray or scalar
  2936. Random variates of given `size`.
  2937. """
  2938. kwargs['discrete'] = True
  2939. return super().rvs(*args, **kwargs)
  2940. def pmf(self, k, *args, **kwds):
  2941. """Probability mass function at k of the given RV.
  2942. Parameters
  2943. ----------
  2944. k : array_like
  2945. Quantiles.
  2946. arg1, arg2, arg3,... : array_like
  2947. The shape parameter(s) for the distribution (see docstring of the
  2948. instance object for more information)
  2949. loc : array_like, optional
  2950. Location parameter (default=0).
  2951. Returns
  2952. -------
  2953. pmf : array_like
  2954. Probability mass function evaluated at k
  2955. """
  2956. args, loc, _ = self._parse_args(*args, **kwds)
  2957. k, loc = map(asarray, (k, loc))
  2958. args = tuple(map(asarray, args))
  2959. _a, _b = self._get_support(*args)
  2960. k = asarray(k-loc)
  2961. cond0 = self._argcheck(*args)
  2962. cond1 = (k >= _a) & (k <= _b)
  2963. if not isinstance(self, rv_sample):
  2964. cond1 = cond1 & self._nonzero(k, *args)
  2965. cond = cond0 & cond1
  2966. output = zeros(shape(cond), 'd')
  2967. place(output, (1-cond0) + np.isnan(k), self.badvalue)
  2968. if np.any(cond):
  2969. goodargs = argsreduce(cond, *((k,)+args))
  2970. place(output, cond, np.clip(self._pmf(*goodargs), 0, 1))
  2971. if output.ndim == 0:
  2972. return output[()]
  2973. return output
  2974. def logpmf(self, k, *args, **kwds):
  2975. """Log of the probability mass function at k of the given RV.
  2976. Parameters
  2977. ----------
  2978. k : array_like
  2979. Quantiles.
  2980. arg1, arg2, arg3,... : array_like
  2981. The shape parameter(s) for the distribution (see docstring of the
  2982. instance object for more information).
  2983. loc : array_like, optional
  2984. Location parameter. Default is 0.
  2985. Returns
  2986. -------
  2987. logpmf : array_like
  2988. Log of the probability mass function evaluated at k.
  2989. """
  2990. args, loc, _ = self._parse_args(*args, **kwds)
  2991. k, loc = map(asarray, (k, loc))
  2992. args = tuple(map(asarray, args))
  2993. _a, _b = self._get_support(*args)
  2994. k = asarray(k-loc)
  2995. cond0 = self._argcheck(*args)
  2996. cond1 = (k >= _a) & (k <= _b)
  2997. if not isinstance(self, rv_sample):
  2998. cond1 = cond1 & self._nonzero(k, *args)
  2999. cond = cond0 & cond1
  3000. output = empty(shape(cond), 'd')
  3001. output.fill(-inf)
  3002. place(output, (1-cond0) + np.isnan(k), self.badvalue)
  3003. if np.any(cond):
  3004. goodargs = argsreduce(cond, *((k,)+args))
  3005. place(output, cond, self._logpmf(*goodargs))
  3006. if output.ndim == 0:
  3007. return output[()]
  3008. return output
  3009. def cdf(self, k, *args, **kwds):
  3010. """Cumulative distribution function of the given RV.
  3011. Parameters
  3012. ----------
  3013. k : array_like, int
  3014. Quantiles.
  3015. arg1, arg2, arg3,... : array_like
  3016. The shape parameter(s) for the distribution (see docstring of the
  3017. instance object for more information).
  3018. loc : array_like, optional
  3019. Location parameter (default=0).
  3020. Returns
  3021. -------
  3022. cdf : ndarray
  3023. Cumulative distribution function evaluated at `k`.
  3024. """
  3025. args, loc, _ = self._parse_args(*args, **kwds)
  3026. k, loc = map(asarray, (k, loc))
  3027. args = tuple(map(asarray, args))
  3028. _a, _b = self._get_support(*args)
  3029. k = asarray(k-loc)
  3030. cond0 = self._argcheck(*args)
  3031. cond1 = (k >= _a) & (k < _b)
  3032. cond2 = (k >= _b)
  3033. cond3 = np.isneginf(k)
  3034. cond = cond0 & cond1 & np.isfinite(k)
  3035. output = zeros(shape(cond), 'd')
  3036. place(output, cond2*(cond0 == cond0), 1.0)
  3037. place(output, cond3*(cond0 == cond0), 0.0)
  3038. place(output, (1-cond0) + np.isnan(k), self.badvalue)
  3039. if np.any(cond):
  3040. goodargs = argsreduce(cond, *((k,)+args))
  3041. place(output, cond, np.clip(self._cdf(*goodargs), 0, 1))
  3042. if output.ndim == 0:
  3043. return output[()]
  3044. return output
  3045. def logcdf(self, k, *args, **kwds):
  3046. """Log of the cumulative distribution function at k of the given RV.
  3047. Parameters
  3048. ----------
  3049. k : array_like, int
  3050. Quantiles.
  3051. arg1, arg2, arg3,... : array_like
  3052. The shape parameter(s) for the distribution (see docstring of the
  3053. instance object for more information).
  3054. loc : array_like, optional
  3055. Location parameter (default=0).
  3056. Returns
  3057. -------
  3058. logcdf : array_like
  3059. Log of the cumulative distribution function evaluated at k.
  3060. """
  3061. args, loc, _ = self._parse_args(*args, **kwds)
  3062. k, loc = map(asarray, (k, loc))
  3063. args = tuple(map(asarray, args))
  3064. _a, _b = self._get_support(*args)
  3065. k = asarray(k-loc)
  3066. cond0 = self._argcheck(*args)
  3067. cond1 = (k >= _a) & (k < _b)
  3068. cond2 = (k >= _b)
  3069. cond = cond0 & cond1
  3070. output = empty(shape(cond), 'd')
  3071. output.fill(-inf)
  3072. place(output, (1-cond0) + np.isnan(k), self.badvalue)
  3073. place(output, cond2*(cond0 == cond0), 0.0)
  3074. if np.any(cond):
  3075. goodargs = argsreduce(cond, *((k,)+args))
  3076. place(output, cond, self._logcdf(*goodargs))
  3077. if output.ndim == 0:
  3078. return output[()]
  3079. return output
  3080. def sf(self, k, *args, **kwds):
  3081. """Survival function (1 - `cdf`) at k of the given RV.
  3082. Parameters
  3083. ----------
  3084. k : array_like
  3085. Quantiles.
  3086. arg1, arg2, arg3,... : array_like
  3087. The shape parameter(s) for the distribution (see docstring of the
  3088. instance object for more information).
  3089. loc : array_like, optional
  3090. Location parameter (default=0).
  3091. Returns
  3092. -------
  3093. sf : array_like
  3094. Survival function evaluated at k.
  3095. """
  3096. args, loc, _ = self._parse_args(*args, **kwds)
  3097. k, loc = map(asarray, (k, loc))
  3098. args = tuple(map(asarray, args))
  3099. _a, _b = self._get_support(*args)
  3100. k = asarray(k-loc)
  3101. cond0 = self._argcheck(*args)
  3102. cond1 = (k >= _a) & (k < _b)
  3103. cond2 = ((k < _a) | np.isneginf(k)) & cond0
  3104. cond = cond0 & cond1 & np.isfinite(k)
  3105. output = zeros(shape(cond), 'd')
  3106. place(output, (1-cond0) + np.isnan(k), self.badvalue)
  3107. place(output, cond2, 1.0)
  3108. if np.any(cond):
  3109. goodargs = argsreduce(cond, *((k,)+args))
  3110. place(output, cond, np.clip(self._sf(*goodargs), 0, 1))
  3111. if output.ndim == 0:
  3112. return output[()]
  3113. return output
  3114. def logsf(self, k, *args, **kwds):
  3115. """Log of the survival function of the given RV.
  3116. Returns the log of the "survival function," defined as 1 - `cdf`,
  3117. evaluated at `k`.
  3118. Parameters
  3119. ----------
  3120. k : array_like
  3121. Quantiles.
  3122. arg1, arg2, arg3,... : array_like
  3123. The shape parameter(s) for the distribution (see docstring of the
  3124. instance object for more information).
  3125. loc : array_like, optional
  3126. Location parameter (default=0).
  3127. Returns
  3128. -------
  3129. logsf : ndarray
  3130. Log of the survival function evaluated at `k`.
  3131. """
  3132. args, loc, _ = self._parse_args(*args, **kwds)
  3133. k, loc = map(asarray, (k, loc))
  3134. args = tuple(map(asarray, args))
  3135. _a, _b = self._get_support(*args)
  3136. k = asarray(k-loc)
  3137. cond0 = self._argcheck(*args)
  3138. cond1 = (k >= _a) & (k < _b)
  3139. cond2 = (k < _a) & cond0
  3140. cond = cond0 & cond1
  3141. output = empty(shape(cond), 'd')
  3142. output.fill(-inf)
  3143. place(output, (1-cond0) + np.isnan(k), self.badvalue)
  3144. place(output, cond2, 0.0)
  3145. if np.any(cond):
  3146. goodargs = argsreduce(cond, *((k,)+args))
  3147. place(output, cond, self._logsf(*goodargs))
  3148. if output.ndim == 0:
  3149. return output[()]
  3150. return output
  3151. def ppf(self, q, *args, **kwds):
  3152. """Percent point function (inverse of `cdf`) at q of the given RV.
  3153. Parameters
  3154. ----------
  3155. q : array_like
  3156. Lower tail probability.
  3157. arg1, arg2, arg3,... : array_like
  3158. The shape parameter(s) for the distribution (see docstring of the
  3159. instance object for more information).
  3160. loc : array_like, optional
  3161. Location parameter (default=0).
  3162. Returns
  3163. -------
  3164. k : array_like
  3165. Quantile corresponding to the lower tail probability, q.
  3166. Notes
  3167. -----
  3168. For discrete distributions, the `cdf` is not strictly invertible. By convention,
  3169. this method returns the minimum value `k` for which the `cdf` at `k` is at
  3170. least `q`. There is one exception: the `ppf` of ``0`` is ``a-1``,
  3171. where ``a`` is the left endpoint of the support.
  3172. """
  3173. args, loc, _ = self._parse_args(*args, **kwds)
  3174. q, loc = map(asarray, (q, loc))
  3175. args = tuple(map(asarray, args))
  3176. _a, _b = self._get_support(*args)
  3177. cond0 = self._argcheck(*args) & (loc == loc)
  3178. cond1 = (q > 0) & (q < 1)
  3179. cond2 = (q == 0) & cond0
  3180. cond3 = (q == 1) & cond0
  3181. cond = cond0 & cond1
  3182. output = np.full(shape(cond), fill_value=self.badvalue, dtype='d')
  3183. # output type 'd' to handle nin and inf
  3184. place(output, cond2, argsreduce(cond2, _a-1 + loc)[0])
  3185. place(output, cond3, argsreduce(cond3, _b + loc)[0])
  3186. if np.any(cond):
  3187. goodargs = argsreduce(cond, *((q,)+args+(loc,)))
  3188. loc, goodargs = goodargs[-1], goodargs[:-1]
  3189. place(output, cond, self._ppf(*goodargs) + loc)
  3190. if output.ndim == 0:
  3191. return output[()]
  3192. return output
  3193. def isf(self, q, *args, **kwds):
  3194. """Inverse survival function (inverse of `sf`) at q of the given RV.
  3195. Parameters
  3196. ----------
  3197. q : array_like
  3198. Upper tail probability.
  3199. arg1, arg2, arg3,... : array_like
  3200. The shape parameter(s) for the distribution (see docstring of the
  3201. instance object for more information).
  3202. loc : array_like, optional
  3203. Location parameter (default=0).
  3204. Returns
  3205. -------
  3206. k : ndarray or scalar
  3207. Quantile corresponding to the upper tail probability, q.
  3208. Notes
  3209. -----
  3210. For discrete distributions, the `sf` is not strictly invertible. By convention,
  3211. this method returns the minimum value `k` for which the `sf` at `k` is
  3212. no greater than `q`. There is one exception: the `isf` of ``1`` is ``a-1``,
  3213. where ``a`` is the left endpoint of the support.
  3214. """
  3215. args, loc, _ = self._parse_args(*args, **kwds)
  3216. q, loc = map(asarray, (q, loc))
  3217. args = tuple(map(asarray, args))
  3218. _a, _b = self._get_support(*args)
  3219. cond0 = self._argcheck(*args) & (loc == loc)
  3220. cond1 = (q > 0) & (q < 1)
  3221. cond2 = (q == 1) & cond0
  3222. cond3 = (q == 0) & cond0
  3223. cond = cond0 & cond1
  3224. # same problem as with ppf; copied from ppf and changed
  3225. output = np.full(shape(cond), fill_value=self.badvalue, dtype='d')
  3226. # output type 'd' to handle nin and inf
  3227. lower_bound = _a - 1 + loc
  3228. upper_bound = _b + loc
  3229. place(output, cond2, argsreduce(cond2, lower_bound)[0])
  3230. place(output, cond3, argsreduce(cond3, upper_bound)[0])
  3231. # call place only if at least 1 valid argument
  3232. if np.any(cond):
  3233. goodargs = argsreduce(cond, *((q,)+args+(loc,)))
  3234. loc, goodargs = goodargs[-1], goodargs[:-1]
  3235. # PB same as ticket 766
  3236. place(output, cond, self._isf(*goodargs) + loc)
  3237. if output.ndim == 0:
  3238. return output[()]
  3239. return output
  3240. def _entropy(self, *args):
  3241. if hasattr(self, 'pk'):
  3242. return stats.entropy(self.pk)
  3243. else:
  3244. _a, _b = self._get_support(*args)
  3245. return _expect(lambda x: entr(self._pmf(x, *args)),
  3246. _a, _b, self._ppf(0.5, *args), self.inc)
  3247. def expect(self, func=None, args=(), loc=0, lb=None, ub=None,
  3248. conditional=False, maxcount=1000, tolerance=1e-10, chunksize=32):
  3249. """
  3250. Calculate expected value of a function with respect to the distribution
  3251. for discrete distribution by numerical summation.
  3252. Parameters
  3253. ----------
  3254. func : callable, optional
  3255. Function for which the expectation value is calculated.
  3256. Takes only one argument.
  3257. The default is the identity mapping f(k) = k.
  3258. args : tuple, optional
  3259. Shape parameters of the distribution.
  3260. loc : float, optional
  3261. Location parameter.
  3262. Default is 0.
  3263. lb, ub : int, optional
  3264. Lower and upper bound for the summation, default is set to the
  3265. support of the distribution, inclusive (``lb <= k <= ub``).
  3266. conditional : bool, optional
  3267. If true then the expectation is corrected by the conditional
  3268. probability of the summation interval. The return value is the
  3269. expectation of the function, `func`, conditional on being in
  3270. the given interval (k such that ``lb <= k <= ub``).
  3271. Default is False.
  3272. maxcount : int, optional
  3273. Maximal number of terms to evaluate (to avoid an endless loop for
  3274. an infinite sum). Default is 1000.
  3275. tolerance : float, optional
  3276. Absolute tolerance for the summation. Default is 1e-10.
  3277. chunksize : int, optional
  3278. Iterate over the support of a distributions in chunks of this size.
  3279. Default is 32.
  3280. Returns
  3281. -------
  3282. expect : float
  3283. Expected value.
  3284. Notes
  3285. -----
  3286. For heavy-tailed distributions, the expected value may or
  3287. may not exist,
  3288. depending on the function, `func`. If it does exist, but the
  3289. sum converges
  3290. slowly, the accuracy of the result may be rather low. For instance, for
  3291. ``zipf(4)``, accuracy for mean, variance in example is only 1e-5.
  3292. increasing `maxcount` and/or `chunksize` may improve the result,
  3293. but may also make zipf very slow.
  3294. The function is not vectorized.
  3295. """
  3296. # Although `args` is just the shape parameters, `poisson_binom` needs this
  3297. # to split the vector-valued shape into a tuple of separate shapes
  3298. args, _, _ = self._parse_args(*args)
  3299. if func is None:
  3300. def fun(x):
  3301. # loc and args from outer scope
  3302. return (x+loc)*self._pmf(x, *args)
  3303. else:
  3304. def fun(x):
  3305. # loc and args from outer scope
  3306. return func(x+loc)*self._pmf(x, *args)
  3307. # used pmf because _pmf does not check support in randint and there
  3308. # might be problems(?) with correct self.a, self.b at this stage maybe
  3309. # not anymore, seems to work now with _pmf
  3310. _a, _b = self._get_support(*args)
  3311. if lb is None:
  3312. lb = _a
  3313. else:
  3314. lb = lb - loc # convert bound for standardized distribution
  3315. if ub is None:
  3316. ub = _b
  3317. else:
  3318. ub = ub - loc # convert bound for standardized distribution
  3319. if conditional:
  3320. invfac = self.sf(lb-1, *args) - self.sf(ub, *args)
  3321. else:
  3322. invfac = 1.0
  3323. if isinstance(self, rv_sample):
  3324. res = self._expect(fun, lb, ub)
  3325. return res / invfac
  3326. # iterate over the support, starting from the median
  3327. x0 = self._ppf(0.5, *args)
  3328. res = _expect(fun, lb, ub, x0, self.inc, maxcount, tolerance, chunksize)
  3329. return res / invfac
  3330. def _param_info(self):
  3331. shape_info = self._shape_info()
  3332. loc_info = _ShapeInfo("loc", True, (-np.inf, np.inf), (False, False))
  3333. param_info = shape_info + [loc_info]
  3334. return param_info
  3335. def _expect(fun, lb, ub, x0, inc, maxcount=1000, tolerance=1e-10,
  3336. chunksize=32):
  3337. """Helper for computing the expectation value of `fun`."""
  3338. # short-circuit if the support size is small enough
  3339. if (ub - lb) <= chunksize:
  3340. supp = np.arange(lb, ub+1, inc)
  3341. vals = fun(supp)
  3342. return np.sum(vals)
  3343. # otherwise, iterate starting from x0
  3344. if x0 < lb:
  3345. x0 = lb
  3346. if x0 > ub:
  3347. x0 = ub
  3348. count, tot = 0, 0.
  3349. # iterate over [x0, ub] inclusive
  3350. for x in _iter_chunked(x0, ub+1, chunksize=chunksize, inc=inc):
  3351. count += x.size
  3352. delta = np.sum(fun(x))
  3353. tot += delta
  3354. if abs(delta) < tolerance * x.size:
  3355. break
  3356. if count > maxcount:
  3357. warnings.warn('expect(): sum did not converge',
  3358. RuntimeWarning, stacklevel=3)
  3359. return tot
  3360. # iterate over [lb, x0)
  3361. for x in _iter_chunked(x0-1, lb-1, chunksize=chunksize, inc=-inc):
  3362. count += x.size
  3363. delta = np.sum(fun(x))
  3364. tot += delta
  3365. if abs(delta) < tolerance * x.size:
  3366. break
  3367. if count > maxcount:
  3368. warnings.warn('expect(): sum did not converge',
  3369. RuntimeWarning, stacklevel=3)
  3370. break
  3371. return tot
  3372. def _iter_chunked(x0, x1, chunksize=4, inc=1):
  3373. """Iterate from x0 to x1 in chunks of chunksize and steps inc.
  3374. x0 must be finite, x1 need not be. In the latter case, the iterator is
  3375. infinite.
  3376. Handles both x0 < x1 and x0 > x1. In the latter case, iterates downwards
  3377. (make sure to set inc < 0.)
  3378. >>> from scipy.stats._distn_infrastructure import _iter_chunked
  3379. >>> [x for x in _iter_chunked(2, 5, inc=2)]
  3380. [array([2, 4])]
  3381. >>> [x for x in _iter_chunked(2, 11, inc=2)]
  3382. [array([2, 4, 6, 8]), array([10])]
  3383. >>> [x for x in _iter_chunked(2, -5, inc=-2)]
  3384. [array([ 2, 0, -2, -4])]
  3385. >>> [x for x in _iter_chunked(2, -9, inc=-2)]
  3386. [array([ 2, 0, -2, -4]), array([-6, -8])]
  3387. """
  3388. if inc == 0:
  3389. raise ValueError('Cannot increment by zero.')
  3390. if chunksize <= 0:
  3391. raise ValueError(f'Chunk size must be positive; got {chunksize}.')
  3392. s = 1 if inc > 0 else -1
  3393. stepsize = abs(chunksize * inc)
  3394. x = np.copy(x0)
  3395. while (x - x1) * inc < 0:
  3396. delta = min(stepsize, abs(x - x1))
  3397. step = delta * s
  3398. supp = np.arange(x, x + step, inc)
  3399. x += step
  3400. yield supp
  3401. class rv_sample(rv_discrete):
  3402. """A 'sample' discrete distribution defined by the support and values.
  3403. The ctor ignores most of the arguments, only needs the `values` argument.
  3404. """
  3405. def __init__(self, a=0, b=inf, name=None, badvalue=None,
  3406. moment_tol=1e-8, values=None, inc=1, longname=None,
  3407. shapes=None, seed=None):
  3408. super(rv_discrete, self).__init__(seed)
  3409. if values is None:
  3410. raise ValueError("rv_sample.__init__(..., values=None,...)")
  3411. # cf generic freeze
  3412. self._ctor_param = dict(
  3413. a=a, b=b, name=name, badvalue=badvalue,
  3414. moment_tol=moment_tol, values=values, inc=inc,
  3415. longname=longname, shapes=shapes, seed=seed)
  3416. if badvalue is None:
  3417. badvalue = nan
  3418. self.badvalue = badvalue
  3419. self.moment_tol = moment_tol
  3420. self.inc = inc
  3421. self.shapes = shapes
  3422. self.vecentropy = self._entropy
  3423. xk, pk = values
  3424. if np.shape(xk) != np.shape(pk):
  3425. raise ValueError("xk and pk must have the same shape.")
  3426. if np.less(pk, 0.0).any():
  3427. raise ValueError("All elements of pk must be non-negative.")
  3428. if not np.allclose(np.sum(pk), 1):
  3429. raise ValueError("The sum of provided pk is not 1.")
  3430. if not len(set(np.ravel(xk))) == np.size(xk):
  3431. raise ValueError("xk may not contain duplicate values.")
  3432. indx = np.argsort(np.ravel(xk))
  3433. self.xk = np.take(np.ravel(xk), indx, 0)
  3434. self.pk = np.take(np.ravel(pk), indx, 0)
  3435. self.a = self.xk[0]
  3436. self.b = self.xk[-1]
  3437. self.qvals = np.cumsum(self.pk, axis=0)
  3438. self.shapes = ' ' # bypass inspection
  3439. self._construct_argparser(meths_to_inspect=[self._pmf],
  3440. locscale_in='loc=0',
  3441. # scale=1 for discrete RVs
  3442. locscale_out='loc, 1')
  3443. self._attach_methods()
  3444. self._construct_docstrings(name, longname)
  3445. def __getstate__(self):
  3446. dct = self.__dict__.copy()
  3447. # these methods will be remade in rv_generic.__setstate__,
  3448. # which calls rv_generic._attach_methods
  3449. attrs = ["_parse_args", "_parse_args_stats", "_parse_args_rvs"]
  3450. [dct.pop(attr, None) for attr in attrs]
  3451. return dct
  3452. def _attach_methods(self):
  3453. """Attaches dynamically created argparser methods."""
  3454. self._attach_argparser_methods()
  3455. def _get_support(self, *args):
  3456. """Return the support of the (unscaled, unshifted) distribution.
  3457. Parameters
  3458. ----------
  3459. arg1, arg2, ... : array_like
  3460. The shape parameter(s) for the distribution (see docstring of the
  3461. instance object for more information).
  3462. Returns
  3463. -------
  3464. a, b : numeric (float, or int or +/-np.inf)
  3465. end-points of the distribution's support.
  3466. """
  3467. return self.a, self.b
  3468. def _pmf(self, x):
  3469. return np.select([x == k for k in self.xk],
  3470. [np.broadcast_arrays(p, x)[0] for p in self.pk], 0)
  3471. def _cdf(self, x):
  3472. xx, xxk = np.broadcast_arrays(x[:, None], self.xk)
  3473. indx = np.argmax(xxk > xx, axis=-1) - 1
  3474. return self.qvals[indx]
  3475. def _ppf(self, q):
  3476. qq, sqq = np.broadcast_arrays(q[..., None], self.qvals)
  3477. indx = argmax(sqq >= qq, axis=-1)
  3478. return self.xk[indx]
  3479. def _rvs(self, size=None, random_state=None):
  3480. # Need to define it explicitly, otherwise .rvs() with size=None
  3481. # fails due to explicit broadcasting in _ppf
  3482. U = random_state.uniform(size=size)
  3483. if size is None:
  3484. U = np.array(U, ndmin=1)
  3485. Y = self._ppf(U)[0]
  3486. else:
  3487. Y = self._ppf(U)
  3488. return Y
  3489. def _entropy(self):
  3490. return stats.entropy(self.pk)
  3491. def generic_moment(self, n):
  3492. n = asarray(n)
  3493. return np.sum(self.xk**n[np.newaxis, ...] * self.pk, axis=0)
  3494. def _expect(self, fun, lb, ub, *args, **kwds):
  3495. # ignore all args, just do a brute force summation
  3496. supp = self.xk[(lb <= self.xk) & (self.xk <= ub)]
  3497. vals = fun(supp)
  3498. return np.sum(vals)
  3499. def _check_shape(argshape, size):
  3500. """
  3501. This is a utility function used by `_rvs()` in the class geninvgauss_gen.
  3502. It compares the tuple argshape to the tuple size.
  3503. Parameters
  3504. ----------
  3505. argshape : tuple of integers
  3506. Shape of the arguments.
  3507. size : tuple of integers or integer
  3508. Size argument of rvs().
  3509. Returns
  3510. -------
  3511. The function returns two tuples, scalar_shape and bc.
  3512. scalar_shape : tuple
  3513. Shape to which the 1-d array of random variates returned by
  3514. _rvs_scalar() is converted when it is copied into the
  3515. output array of _rvs().
  3516. bc : tuple of booleans
  3517. bc is an tuple the same length as size. bc[j] is True if the data
  3518. associated with that index is generated in one call of _rvs_scalar().
  3519. """
  3520. scalar_shape = []
  3521. bc = []
  3522. for argdim, sizedim in zip_longest(argshape[::-1], size[::-1],
  3523. fillvalue=1):
  3524. if sizedim > argdim or (argdim == sizedim == 1):
  3525. scalar_shape.append(sizedim)
  3526. bc.append(True)
  3527. else:
  3528. bc.append(False)
  3529. return tuple(scalar_shape[::-1]), tuple(bc[::-1])
  3530. def get_distribution_names(namespace_pairs, rv_base_class):
  3531. """Collect names of statistical distributions and their generators.
  3532. Parameters
  3533. ----------
  3534. namespace_pairs : sequence
  3535. A snapshot of (name, value) pairs in the namespace of a module.
  3536. rv_base_class : class
  3537. The base class of random variable generator classes in a module.
  3538. Returns
  3539. -------
  3540. distn_names : list of strings
  3541. Names of the statistical distributions.
  3542. distn_gen_names : list of strings
  3543. Names of the generators of the statistical distributions.
  3544. Note that these are not simply the names of the statistical
  3545. distributions, with a _gen suffix added.
  3546. """
  3547. distn_names = []
  3548. distn_gen_names = []
  3549. for name, value in namespace_pairs:
  3550. if name.startswith('_'):
  3551. continue
  3552. if name.endswith('_gen') and issubclass(value, rv_base_class):
  3553. distn_gen_names.append(name)
  3554. if isinstance(value, rv_base_class):
  3555. distn_names.append(name)
  3556. return distn_names, distn_gen_names