| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228 |
- #
- # Author: Travis Oliphant 2002-2011 with contributions from
- # SciPy Developers 2004-2011
- #
- from scipy._lib._util import getfullargspec_no_self as _getfullargspec
- import sys
- import keyword
- import re
- import types
- import warnings
- from itertools import zip_longest
- from scipy._lib import doccer
- from ._distr_params import distcont, distdiscrete
- from scipy._lib._util import check_random_state
- import scipy._lib.array_api_extra as xpx
- from scipy.special import comb, entr
- # for root finding for continuous distribution ppf, and maximum likelihood
- # estimation
- from scipy import optimize
- # for functions of continuous distributions (e.g. moments, entropy, cdf)
- from scipy import integrate
- # to approximate the pdf of a continuous distribution given its cdf
- from scipy.stats._finite_differences import _derivative
- # for scipy.stats.entropy. Attempts to import just that function or file
- # have cause import problems
- from scipy import stats
- from numpy import (arange, putmask, ones, shape, ndarray, zeros, floor,
- logical_and, log, sqrt, place, argmax, vectorize, asarray,
- nan, inf, isinf, empty)
- import numpy as np
- from ._constants import _XMAX, _LOGXMAX
- from ._censored_data import CensoredData
- from scipy.stats._warnings_errors import FitError
- # These are the docstring parts used for substitution in specific
- # distribution docstrings
- docheaders = {'methods': """\nMethods\n-------\n""",
- 'notes': """\nNotes\n-----\n""",
- 'examples': """\nExamples\n--------\n"""}
- _doc_rvs = """\
- rvs(%(shapes)s, loc=0, scale=1, size=1, random_state=None)
- Random variates.
- """
- _doc_pdf = """\
- pdf(x, %(shapes)s, loc=0, scale=1)
- Probability density function.
- """
- _doc_logpdf = """\
- logpdf(x, %(shapes)s, loc=0, scale=1)
- Log of the probability density function.
- """
- _doc_pmf = """\
- pmf(k, %(shapes)s, loc=0, scale=1)
- Probability mass function.
- """
- _doc_logpmf = """\
- logpmf(k, %(shapes)s, loc=0, scale=1)
- Log of the probability mass function.
- """
- _doc_cdf = """\
- cdf(x, %(shapes)s, loc=0, scale=1)
- Cumulative distribution function.
- """
- _doc_logcdf = """\
- logcdf(x, %(shapes)s, loc=0, scale=1)
- Log of the cumulative distribution function.
- """
- _doc_sf = """\
- sf(x, %(shapes)s, loc=0, scale=1)
- Survival function (also defined as ``1 - cdf``, but `sf` is sometimes more accurate).
- """ # noqa: E501
- _doc_logsf = """\
- logsf(x, %(shapes)s, loc=0, scale=1)
- Log of the survival function.
- """
- _doc_ppf = """\
- ppf(q, %(shapes)s, loc=0, scale=1)
- Percent point function (inverse of ``cdf`` --- percentiles).
- """
- _doc_isf = """\
- isf(q, %(shapes)s, loc=0, scale=1)
- Inverse survival function (inverse of ``sf``).
- """
- _doc_moment = """\
- moment(order, %(shapes)s, loc=0, scale=1)
- Non-central moment of the specified order.
- """
- _doc_stats = """\
- stats(%(shapes)s, loc=0, scale=1, moments='mv')
- Mean('m'), variance('v'), skew('s'), and/or kurtosis('k').
- """
- _doc_entropy = """\
- entropy(%(shapes)s, loc=0, scale=1)
- (Differential) entropy of the RV.
- """
- _doc_fit = """\
- fit(data)
- Parameter estimates for generic data.
- See `scipy.stats.rv_continuous.fit <https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.rv_continuous.fit.html#scipy.stats.rv_continuous.fit>`__ for detailed documentation of the
- keyword arguments.
- """ # noqa: E501
- _doc_expect = """\
- expect(func, args=(%(shapes_)s), loc=0, scale=1, lb=None, ub=None, conditional=False, **kwds)
- Expected value of a function (of one argument) with respect to the distribution.
- """ # noqa: E501
- _doc_expect_discrete = """\
- expect(func, args=(%(shapes_)s), loc=0, lb=None, ub=None, conditional=False)
- Expected value of a function (of one argument) with respect to the distribution.
- """
- _doc_median = """\
- median(%(shapes)s, loc=0, scale=1)
- Median of the distribution.
- """
- _doc_mean = """\
- mean(%(shapes)s, loc=0, scale=1)
- Mean of the distribution.
- """
- _doc_var = """\
- var(%(shapes)s, loc=0, scale=1)
- Variance of the distribution.
- """
- _doc_std = """\
- std(%(shapes)s, loc=0, scale=1)
- Standard deviation of the distribution.
- """
- _doc_interval = """\
- interval(confidence, %(shapes)s, loc=0, scale=1)
- Confidence interval with equal areas around the median.
- """
- _doc_allmethods = ''.join([docheaders['methods'], _doc_rvs, _doc_pdf,
- _doc_logpdf, _doc_cdf, _doc_logcdf, _doc_sf,
- _doc_logsf, _doc_ppf, _doc_isf, _doc_moment,
- _doc_stats, _doc_entropy, _doc_fit,
- _doc_expect, _doc_median,
- _doc_mean, _doc_var, _doc_std, _doc_interval])
- _doc_default_longsummary = """\
- As an instance of the `rv_continuous` class, `%(name)s` object inherits from it
- a collection of generic methods (see below for the full list),
- and completes them with details specific for this particular distribution.
- """
- _doc_default_frozen_note = """
- Alternatively, the object may be called (as a function) to fix the shape,
- location, and scale parameters returning a "frozen" continuous RV object:
- rv = %(name)s(%(shapes)s, loc=0, scale=1)
- - Frozen RV object with the same methods but holding the given shape,
- location, and scale fixed.
- """
- _doc_default_example = """\
- Examples
- --------
- >>> import numpy as np
- >>> from scipy.stats import %(name)s
- >>> import matplotlib.pyplot as plt
- >>> fig, ax = plt.subplots(1, 1)
- Get the support:
- %(set_vals_stmt)s
- >>> lb, ub = %(name)s.support(%(shapes)s)
- Calculate the first four moments:
- >>> mean, var, skew, kurt = %(name)s.stats(%(shapes)s, moments='mvsk')
- Display the probability density function (``pdf``):
- >>> x = np.linspace(%(name)s.ppf(0.01, %(shapes)s),
- ... %(name)s.ppf(0.99, %(shapes)s), 100)
- >>> ax.plot(x, %(name)s.pdf(x, %(shapes)s),
- ... 'r-', lw=5, alpha=0.6, label='%(name)s pdf')
- Alternatively, the distribution object can be called (as a function)
- to fix the shape, location and scale parameters. This returns a "frozen"
- RV object holding the given parameters fixed.
- Freeze the distribution and display the frozen ``pdf``:
- >>> rv = %(name)s(%(shapes)s)
- >>> ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf')
- Check accuracy of ``cdf`` and ``ppf``:
- >>> vals = %(name)s.ppf([0.001, 0.5, 0.999], %(shapes)s)
- >>> np.allclose([0.001, 0.5, 0.999], %(name)s.cdf(vals, %(shapes)s))
- True
- Generate random numbers:
- >>> r = %(name)s.rvs(%(shapes)s, size=1000)
- And compare the histogram:
- >>> ax.hist(r, density=True, bins='auto', histtype='stepfilled', alpha=0.2)
- >>> ax.set_xlim([x[0], x[-1]])
- >>> ax.legend(loc='best', frameon=False)
- >>> plt.show()
- """
- _doc_default_locscale = """\
- The probability density above is defined in the "standardized" form. To shift
- and/or scale the distribution use the ``loc`` and ``scale`` parameters.
- Specifically, ``%(name)s.pdf(x, %(shapes)s, loc, scale)`` is identically
- equivalent to ``%(name)s.pdf(y, %(shapes)s) / scale`` with
- ``y = (x - loc) / scale``. Note that shifting the location of a distribution
- does not make it a "noncentral" distribution; noncentral generalizations of
- some distributions are available in separate classes.
- """
- _doc_default = ''.join([_doc_default_longsummary,
- _doc_allmethods,
- '\n',
- _doc_default_example])
- _doc_default_before_notes = ''.join([_doc_default_longsummary,
- _doc_allmethods])
- docdict = {
- 'rvs': _doc_rvs,
- 'pdf': _doc_pdf,
- 'logpdf': _doc_logpdf,
- 'cdf': _doc_cdf,
- 'logcdf': _doc_logcdf,
- 'sf': _doc_sf,
- 'logsf': _doc_logsf,
- 'ppf': _doc_ppf,
- 'isf': _doc_isf,
- 'stats': _doc_stats,
- 'entropy': _doc_entropy,
- 'fit': _doc_fit,
- 'moment': _doc_moment,
- 'expect': _doc_expect,
- 'interval': _doc_interval,
- 'mean': _doc_mean,
- 'std': _doc_std,
- 'var': _doc_var,
- 'median': _doc_median,
- 'allmethods': _doc_allmethods,
- 'longsummary': _doc_default_longsummary,
- 'frozennote': _doc_default_frozen_note,
- 'example': _doc_default_example,
- 'default': _doc_default,
- 'before_notes': _doc_default_before_notes,
- 'after_notes': _doc_default_locscale
- }
- # Reuse common content between continuous and discrete docs, change some
- # minor bits.
- docdict_discrete = docdict.copy()
- docdict_discrete['pmf'] = _doc_pmf
- docdict_discrete['logpmf'] = _doc_logpmf
- docdict_discrete['expect'] = _doc_expect_discrete
- _doc_disc_methods = ['rvs', 'pmf', 'logpmf', 'cdf', 'logcdf', 'sf', 'logsf',
- 'ppf', 'isf', 'stats', 'entropy', 'expect', 'median',
- 'mean', 'var', 'std', 'interval']
- for obj in _doc_disc_methods:
- docdict_discrete[obj] = docdict_discrete[obj].replace(', scale=1', '')
- _doc_disc_methods_err_varname = ['cdf', 'logcdf', 'sf', 'logsf']
- for obj in _doc_disc_methods_err_varname:
- docdict_discrete[obj] = docdict_discrete[obj].replace('(x, ', '(k, ')
- docdict_discrete.pop('pdf')
- docdict_discrete.pop('logpdf')
- _doc_allmethods = ''.join([docdict_discrete[obj] for obj in _doc_disc_methods])
- docdict_discrete['allmethods'] = docheaders['methods'] + _doc_allmethods
- docdict_discrete['longsummary'] = _doc_default_longsummary.replace(
- 'rv_continuous', 'rv_discrete')
- _doc_default_frozen_note = """
- Alternatively, the object may be called (as a function) to fix the shape and
- location parameters returning a "frozen" discrete RV object:
- rv = %(name)s(%(shapes)s, loc=0)
- - Frozen RV object with the same methods but holding the given shape and
- location fixed.
- """
- docdict_discrete['frozennote'] = _doc_default_frozen_note
- _doc_default_discrete_example = """\
- Examples
- --------
- >>> import numpy as np
- >>> from scipy.stats import %(name)s
- >>> import matplotlib.pyplot as plt
- >>> fig, ax = plt.subplots(1, 1)
- Get the support:
- %(set_vals_stmt)s
- >>> lb, ub = %(name)s.support(%(shapes)s)
- Calculate the first four moments:
- >>> mean, var, skew, kurt = %(name)s.stats(%(shapes)s, moments='mvsk')
- Display the probability mass function (``pmf``):
- >>> x = np.arange(%(name)s.ppf(0.01, %(shapes)s),
- ... %(name)s.ppf(0.99, %(shapes)s))
- >>> ax.plot(x, %(name)s.pmf(x, %(shapes)s), 'bo', ms=8, label='%(name)s pmf')
- >>> ax.vlines(x, 0, %(name)s.pmf(x, %(shapes)s), colors='b', lw=5, alpha=0.5)
- Alternatively, the distribution object can be called (as a function)
- to fix the shape and location. This returns a "frozen" RV object holding
- the given parameters fixed.
- Freeze the distribution and display the frozen ``pmf``:
- >>> rv = %(name)s(%(shapes)s)
- >>> ax.vlines(x, 0, rv.pmf(x), colors='k', linestyles='-', lw=1,
- ... label='frozen pmf')
- >>> ax.legend(loc='best', frameon=False)
- >>> plt.show()
- Check accuracy of ``cdf`` and ``ppf``:
- >>> prob = %(name)s.cdf(x, %(shapes)s)
- >>> np.allclose(x, %(name)s.ppf(prob, %(shapes)s))
- True
- Generate random numbers:
- >>> r = %(name)s.rvs(%(shapes)s, size=1000)
- """
- _doc_default_discrete_locscale = """\
- The probability mass function above is defined in the "standardized" form.
- To shift distribution use the ``loc`` parameter.
- Specifically, ``%(name)s.pmf(k, %(shapes)s, loc)`` is identically
- equivalent to ``%(name)s.pmf(k - loc, %(shapes)s)``.
- """
- docdict_discrete['example'] = _doc_default_discrete_example
- docdict_discrete['after_notes'] = _doc_default_discrete_locscale
- _doc_default_before_notes = ''.join([docdict_discrete['longsummary'],
- docdict_discrete['allmethods']])
- docdict_discrete['before_notes'] = _doc_default_before_notes
- _doc_default_disc = ''.join([docdict_discrete['longsummary'],
- docdict_discrete['allmethods'],
- docdict_discrete['frozennote'],
- docdict_discrete['example']])
- docdict_discrete['default'] = _doc_default_disc
- # clean up all the separate docstring elements, we do not need them anymore
- for obj in [s for s in dir() if s.startswith('_doc_')]:
- exec('del ' + obj)
- del obj
- def _moment(data, n, mu=None):
- if mu is None:
- mu = data.mean()
- return ((data - mu)**n).mean()
- def _moment_from_stats(n, mu, mu2, g1, g2, moment_func, args):
- if (n == 0):
- return 1.0
- elif (n == 1):
- if mu is None:
- val = moment_func(1, *args)
- else:
- val = mu
- elif (n == 2):
- if mu2 is None or mu is None:
- val = moment_func(2, *args)
- else:
- val = mu2 + mu*mu
- elif (n == 3):
- if g1 is None or mu2 is None or mu is None:
- val = moment_func(3, *args)
- else:
- mu3 = g1 * np.power(mu2, 1.5) # 3rd central moment
- val = mu3+3*mu*mu2+mu*mu*mu # 3rd non-central moment
- elif (n == 4):
- if g1 is None or g2 is None or mu2 is None or mu is None:
- val = moment_func(4, *args)
- else:
- mu4 = (g2+3.0)*(mu2**2.0) # 4th central moment
- mu3 = g1*np.power(mu2, 1.5) # 3rd central moment
- val = mu4+4*mu*mu3+6*mu*mu*mu2+mu*mu*mu*mu
- else:
- val = moment_func(n, *args)
- return val
- def _skew(data):
- """
- skew is third central moment / variance**(1.5)
- """
- data = np.ravel(data)
- mu = data.mean()
- m2 = ((data - mu)**2).mean()
- m3 = ((data - mu)**3).mean()
- return m3 / np.power(m2, 1.5)
- def _kurtosis(data):
- """Fisher's excess kurtosis is fourth central moment / variance**2 - 3."""
- data = np.ravel(data)
- mu = data.mean()
- m2 = ((data - mu)**2).mean()
- m4 = ((data - mu)**4).mean()
- return m4 / m2**2 - 3
- def _vectorize_rvs_over_shapes(_rvs1):
- """Decorator that vectorizes _rvs method to work on ndarray shapes"""
- # _rvs1 must be a _function_ that accepts _scalar_ args as positional
- # arguments, `size` and `random_state` as keyword arguments.
- # _rvs1 must return a random variate array with shape `size`. If `size` is
- # None, _rvs1 must return a scalar.
- # When applied to _rvs1, this decorator broadcasts ndarray args
- # and loops over them, calling _rvs1 for each set of scalar args.
- # For usage example, see _nchypergeom_gen
- def _rvs(*args, size, random_state):
- _rvs1_size, _rvs1_indices = _check_shape(args[0].shape, size)
- size = np.array(size)
- _rvs1_size = np.array(_rvs1_size)
- _rvs1_indices = np.array(_rvs1_indices)
- if np.all(_rvs1_indices): # all args are scalars
- return _rvs1(*args, size, random_state)
- out = np.empty(size)
- # out.shape can mix dimensions associated with arg_shape and _rvs1_size
- # Sort them to arg_shape + _rvs1_size for easy indexing of dimensions
- # corresponding with the different sets of scalar args
- j0 = np.arange(out.ndim)
- j1 = np.hstack((j0[~_rvs1_indices], j0[_rvs1_indices]))
- out = np.moveaxis(out, j1, j0)
- for i in np.ndindex(*size[~_rvs1_indices]):
- # arg can be squeezed because singleton dimensions will be
- # associated with _rvs1_size, not arg_shape per _check_shape
- out[i] = _rvs1(*[np.squeeze(arg)[i] for arg in args],
- _rvs1_size, random_state)
- return np.moveaxis(out, j0, j1) # move axes back before returning
- return _rvs
- def _fit_determine_optimizer(optimizer):
- if not callable(optimizer) and isinstance(optimizer, str):
- if not optimizer.startswith('fmin_'):
- optimizer = "fmin_"+optimizer
- if optimizer == 'fmin_':
- optimizer = 'fmin'
- try:
- optimizer = getattr(optimize, optimizer)
- except AttributeError as e:
- raise ValueError(f"{optimizer} is not a valid optimizer") from e
- return optimizer
- def _isintegral(x):
- return x == np.round(x)
- def _sum_finite(x):
- """
- For a 1D array x, return a tuple containing the sum of the
- finite values of x and the number of nonfinite values.
- This is a utility function used when evaluating the negative
- loglikelihood for a distribution and an array of samples.
- Examples
- --------
- >>> import numpy as np
- >>> from scipy.stats._distn_infrastructure import _sum_finite
- >>> tot, nbad = _sum_finite(np.array([-2, -np.inf, 5, 1]))
- >>> tot
- 4.0
- >>> nbad
- 1
- """
- finite_x = np.isfinite(x)
- bad_count = finite_x.size - np.count_nonzero(finite_x)
- return np.sum(x[finite_x]), bad_count
- # Frozen RV class
- class rv_frozen:
- # generic type compatibility with scipy-stubs
- __class_getitem__ = classmethod(types.GenericAlias)
- def __init__(self, dist, *args, **kwds):
- self.args = args
- self.kwds = kwds
- # create a new instance
- self.dist = dist.__class__(**dist._updated_ctor_param())
- shapes, _, _ = self.dist._parse_args(*args, **kwds)
- self.a, self.b = self.dist._get_support(*shapes)
- @property
- def random_state(self):
- return self.dist._random_state
- @random_state.setter
- def random_state(self, seed):
- self.dist._random_state = check_random_state(seed)
- def cdf(self, x):
- return self.dist.cdf(x, *self.args, **self.kwds)
- def logcdf(self, x):
- return self.dist.logcdf(x, *self.args, **self.kwds)
- def ppf(self, q):
- return self.dist.ppf(q, *self.args, **self.kwds)
- def isf(self, q):
- return self.dist.isf(q, *self.args, **self.kwds)
- def rvs(self, size=None, random_state=None):
- kwds = self.kwds.copy()
- kwds.update({'size': size, 'random_state': random_state})
- return self.dist.rvs(*self.args, **kwds)
- def sf(self, x):
- return self.dist.sf(x, *self.args, **self.kwds)
- def logsf(self, x):
- return self.dist.logsf(x, *self.args, **self.kwds)
- def stats(self, moments='mv'):
- kwds = self.kwds.copy()
- kwds.update({'moments': moments})
- return self.dist.stats(*self.args, **kwds)
- def median(self):
- return self.dist.median(*self.args, **self.kwds)
- def mean(self):
- return self.dist.mean(*self.args, **self.kwds)
- def var(self):
- return self.dist.var(*self.args, **self.kwds)
- def std(self):
- return self.dist.std(*self.args, **self.kwds)
- def moment(self, order=None):
- return self.dist.moment(order, *self.args, **self.kwds)
- def entropy(self):
- return self.dist.entropy(*self.args, **self.kwds)
- def interval(self, confidence=None):
- return self.dist.interval(confidence, *self.args, **self.kwds)
- def expect(self, func=None, lb=None, ub=None, conditional=False, **kwds):
- # expect method only accepts shape parameters as positional args
- # hence convert self.args, self.kwds, also loc/scale
- # See the .expect method docstrings for the meaning of
- # other parameters.
- a, loc, scale = self.dist._parse_args(*self.args, **self.kwds)
- if isinstance(self.dist, rv_discrete):
- return self.dist.expect(func, a, loc, lb, ub, conditional, **kwds)
- else:
- return self.dist.expect(func, a, loc, scale, lb, ub,
- conditional, **kwds)
- def support(self):
- return self.dist.support(*self.args, **self.kwds)
- class rv_discrete_frozen(rv_frozen):
- def pmf(self, k):
- return self.dist.pmf(k, *self.args, **self.kwds)
- def logpmf(self, k): # No error
- return self.dist.logpmf(k, *self.args, **self.kwds)
- class rv_continuous_frozen(rv_frozen):
- def pdf(self, x):
- return self.dist.pdf(x, *self.args, **self.kwds)
- def logpdf(self, x):
- return self.dist.logpdf(x, *self.args, **self.kwds)
- def argsreduce(cond, *args):
- """Clean arguments to:
- 1. Ensure all arguments are iterable (arrays of dimension at least one
- 2. If cond != True and size > 1, ravel(args[i]) where ravel(condition) is
- True, in 1D.
- Return list of processed arguments.
- Examples
- --------
- >>> import numpy as np
- >>> from scipy.stats._distn_infrastructure import argsreduce
- >>> rng = np.random.default_rng()
- >>> A = rng.random((4, 5))
- >>> B = 2
- >>> C = rng.random((1, 5))
- >>> cond = np.ones(A.shape)
- >>> [A1, B1, C1] = argsreduce(cond, A, B, C)
- >>> A1.shape
- (4, 5)
- >>> B1.shape
- (1,)
- >>> C1.shape
- (1, 5)
- >>> cond[2,:] = 0
- >>> [A1, B1, C1] = argsreduce(cond, A, B, C)
- >>> A1.shape
- (15,)
- >>> B1.shape
- (1,)
- >>> C1.shape
- (15,)
- """
- # some distributions assume arguments are iterable.
- newargs = np.atleast_1d(*args)
- # np.atleast_1d returns an array if only one argument, or a list of arrays
- # if more than one argument.
- if not isinstance(newargs, (list | tuple)):
- newargs = (newargs,)
- if np.all(cond):
- # broadcast arrays with cond
- *newargs, cond = np.broadcast_arrays(*newargs, cond)
- return [arg.ravel() for arg in newargs]
- s = cond.shape
- # np.extract returns flattened arrays, which are not broadcastable together
- # unless they are either the same size or size == 1.
- return [(arg if np.size(arg) == 1
- else np.extract(cond, np.broadcast_to(arg, s)))
- for arg in newargs]
- parse_arg_template = """
- def _parse_args(self, %(shape_arg_str)s %(locscale_in)s):
- return (%(shape_arg_str)s), %(locscale_out)s
- def _parse_args_rvs(self, %(shape_arg_str)s %(locscale_in)s, size=None):
- return self._argcheck_rvs(%(shape_arg_str)s %(locscale_out)s, size=size)
- def _parse_args_stats(self, %(shape_arg_str)s %(locscale_in)s, moments='mv'):
- return (%(shape_arg_str)s), %(locscale_out)s, moments
- """
- class rv_generic:
- """Class which encapsulates common functionality between rv_discrete
- and rv_continuous.
- """
- def __init__(self, seed=None):
- super().__init__()
- # figure out if _stats signature has 'moments' keyword
- sig = _getfullargspec(self._stats)
- self._stats_has_moments = ((sig.varkw is not None) or
- ('moments' in sig.args) or
- ('moments' in sig.kwonlyargs))
- self._random_state = check_random_state(seed)
- @property
- def random_state(self):
- """Get or set the generator object for generating random variates.
- If `random_state` is None (or `np.random`), the
- `numpy.random.RandomState` singleton is used.
- If `random_state` is an int, a new ``RandomState`` instance is used,
- seeded with `random_state`.
- If `random_state` is already a ``Generator`` or ``RandomState``
- instance, that instance is used.
- """
- return self._random_state
- @random_state.setter
- def random_state(self, seed):
- self._random_state = check_random_state(seed)
- def __setstate__(self, state):
- try:
- self.__dict__.update(state)
- # attaches the dynamically created methods on each instance.
- # if a subclass overrides rv_generic.__setstate__, or implements
- # it's own _attach_methods, then it must make sure that
- # _attach_argparser_methods is called.
- self._attach_methods()
- except ValueError:
- # reconstitute an old pickle scipy<1.6, that contains
- # (_ctor_param, random_state) as state
- self._ctor_param = state[0]
- self._random_state = state[1]
- self.__init__()
- def _attach_methods(self):
- """Attaches dynamically created methods to the rv_* instance.
- This method must be overridden by subclasses, and must itself call
- _attach_argparser_methods. This method is called in __init__ in
- subclasses, and in __setstate__
- """
- raise NotImplementedError
- def _attach_argparser_methods(self):
- """
- Generates the argument-parsing functions dynamically and attaches
- them to the instance.
- Should be called from `_attach_methods`, typically in __init__ and
- during unpickling (__setstate__)
- """
- ns = {}
- exec(self._parse_arg_template, ns)
- # NB: attach to the instance, not class
- for name in ['_parse_args', '_parse_args_stats', '_parse_args_rvs']:
- setattr(self, name, types.MethodType(ns[name], self))
- def _construct_argparser(
- self, meths_to_inspect, locscale_in, locscale_out):
- """Construct the parser string for the shape arguments.
- This method should be called in __init__ of a class for each
- distribution. It creates the `_parse_arg_template` attribute that is
- then used by `_attach_argparser_methods` to dynamically create and
- attach the `_parse_args`, `_parse_args_stats`, `_parse_args_rvs`
- methods to the instance.
- If self.shapes is a non-empty string, interprets it as a
- comma-separated list of shape parameters.
- Otherwise inspects the call signatures of `meths_to_inspect`
- and constructs the argument-parsing functions from these.
- In this case also sets `shapes` and `numargs`.
- """
- if self.shapes:
- # sanitize the user-supplied shapes
- if not isinstance(self.shapes, str):
- raise TypeError('shapes must be a string.')
- shapes = self.shapes.replace(',', ' ').split()
- for field in shapes:
- if keyword.iskeyword(field):
- raise SyntaxError('keywords cannot be used as shapes.')
- if not re.match('^[_a-zA-Z][_a-zA-Z0-9]*$', field):
- raise SyntaxError(
- 'shapes must be valid python identifiers')
- else:
- # find out the call signatures (_pdf, _cdf etc), deduce shape
- # arguments. Generic methods only have 'self, x', any further args
- # are shapes.
- shapes_list = []
- for meth in meths_to_inspect:
- shapes_args = _getfullargspec(meth) # NB does not contain self
- args = shapes_args.args[1:] # peel off 'x', too
- if args:
- shapes_list.append(args)
- # *args or **kwargs are not allowed w/automatic shapes
- if shapes_args.varargs is not None:
- raise TypeError(
- '*args are not allowed w/out explicit shapes')
- if shapes_args.varkw is not None:
- raise TypeError(
- '**kwds are not allowed w/out explicit shapes')
- if shapes_args.kwonlyargs:
- raise TypeError(
- 'kwonly args are not allowed w/out explicit shapes')
- if shapes_args.defaults is not None:
- raise TypeError('defaults are not allowed for shapes')
- if shapes_list:
- shapes = shapes_list[0]
- # make sure the signatures are consistent
- for item in shapes_list:
- if item != shapes:
- raise TypeError('Shape arguments are inconsistent.')
- else:
- shapes = []
- # have the arguments, construct the method from template
- shapes_str = ', '.join(shapes) + ', ' if shapes else '' # NB: not None
- dct = dict(shape_arg_str=shapes_str,
- locscale_in=locscale_in,
- locscale_out=locscale_out,
- )
- # this string is used by _attach_argparser_methods
- self._parse_arg_template = parse_arg_template % dct
- self.shapes = ', '.join(shapes) if shapes else None
- if not hasattr(self, 'numargs'):
- # allows more general subclassing with *args
- self.numargs = len(shapes)
- def _construct_doc(self, docdict, shapes_vals=None):
- """Construct the instance docstring with string substitutions."""
- if sys.flags.optimize > 1:
- # if run with -OO, docstrings are stripped
- # see https://docs.python.org/3/using/cmdline.html#cmdoption-OO
- return
- tempdict = docdict.copy()
- tempdict['name'] = self.name or 'distname'
- tempdict['shapes'] = self.shapes or ''
- if shapes_vals is None:
- shapes_vals = ()
- try:
- vals = ', '.join(f'{val:.3g}' for val in shapes_vals)
- except TypeError:
- vals = ', '.join(f'{val}' for val in shapes_vals)
- tempdict['vals'] = vals
- tempdict['shapes_'] = self.shapes or ''
- if self.shapes and self.numargs == 1:
- tempdict['shapes_'] += ','
- if self.shapes:
- tempdict['set_vals_stmt'] = f'>>> {self.shapes} = {vals}'
- else:
- tempdict['set_vals_stmt'] = ''
- if self.shapes is None:
- # remove shapes from call parameters if there are none
- for item in ['default', 'before_notes']:
- tempdict[item] = tempdict[item].replace(
- "\n%(shapes)s : array_like\n shape parameters", "")
- for i in range(2):
- if self.shapes is None:
- # necessary because we use %(shapes)s in two forms (w w/o ", ")
- self.__doc__ = self.__doc__.replace("%(shapes)s, ", "")
- try:
- self.__doc__ = doccer.docformat(self.__doc__, tempdict)
- except TypeError as e:
- raise Exception("Unable to construct docstring for "
- f"distribution \"{self.name}\": {repr(e)}") from e
- # correct for empty shapes
- self.__doc__ = self.__doc__.replace('(, ', '(').replace(', )', ')')
- def _construct_default_doc(self, longname=None,
- docdict=None, discrete='continuous'):
- """Construct instance docstring from the default template."""
- if sys.flags.optimize > 1:
- # if run with -OO, docstrings are stripped
- # see https://docs.python.org/3/using/cmdline.html#cmdoption-OO
- return
- if longname is None:
- longname = 'A'
- self.__doc__ = ''.join([f'{longname} {discrete} random variable.',
- '\n\n%(before_notes)s\n', docheaders['notes'],
- '\n%(example)s'])
- self._construct_doc(docdict)
- def freeze(self, *args, **kwds):
- """Freeze the distribution for the given arguments.
- Parameters
- ----------
- arg1, arg2, arg3,... : array_like
- The shape parameter(s) for the distribution. Should include all
- the non-optional arguments, may include ``loc`` and ``scale``.
- Returns
- -------
- rv_frozen : rv_frozen instance
- The frozen distribution.
- """
- if isinstance(self, rv_continuous):
- return rv_continuous_frozen(self, *args, **kwds)
- else:
- return rv_discrete_frozen(self, *args, **kwds)
- def __call__(self, *args, **kwds):
- return self.freeze(*args, **kwds)
- __call__.__doc__ = freeze.__doc__
- # The actual calculation functions (no basic checking need be done)
- # If these are defined, the others won't be looked at.
- # Otherwise, the other set can be defined.
- def _stats(self, *args, **kwds):
- return None, None, None, None
- # Noncentral moments (also known as the moment about the origin).
- # Expressed in LaTeX, munp would be $\mu'_{n}$, i.e. "mu-sub-n-prime".
- # The primed mu is a widely used notation for the noncentral moment.
- def _munp(self, n, *args):
- # Silence floating point warnings from integration.
- with np.errstate(all='ignore'):
- vals = self.generic_moment(n, *args)
- return vals
- def _argcheck_rvs(self, *args, **kwargs):
- # Handle broadcasting and size validation of the rvs method.
- # Subclasses should not have to override this method.
- # The rule is that if `size` is not None, then `size` gives the
- # shape of the result (integer values of `size` are treated as
- # tuples with length 1; i.e. `size=3` is the same as `size=(3,)`.)
- #
- # `args` is expected to contain the shape parameters (if any), the
- # location and the scale in a flat tuple (e.g. if there are two
- # shape parameters `a` and `b`, `args` will be `(a, b, loc, scale)`).
- # The only keyword argument expected is 'size'.
- size = kwargs.get('size', None)
- all_bcast = np.broadcast_arrays(*args)
- def squeeze_left(a):
- while a.ndim > 0 and a.shape[0] == 1:
- a = a[0]
- return a
- # Eliminate trivial leading dimensions. In the convention
- # used by numpy's random variate generators, trivial leading
- # dimensions are effectively ignored. In other words, when `size`
- # is given, trivial leading dimensions of the broadcast parameters
- # in excess of the number of dimensions in size are ignored, e.g.
- # >>> np.random.normal([[1, 3, 5]], [[[[0.01]]]], size=3)
- # array([ 1.00104267, 3.00422496, 4.99799278])
- # If `size` is not given, the exact broadcast shape is preserved:
- # >>> np.random.normal([[1, 3, 5]], [[[[0.01]]]])
- # array([[[[ 1.00862899, 3.00061431, 4.99867122]]]])
- #
- all_bcast = [squeeze_left(a) for a in all_bcast]
- bcast_shape = all_bcast[0].shape
- bcast_ndim = all_bcast[0].ndim
- if size is None:
- size_ = bcast_shape
- else:
- size_ = tuple(np.atleast_1d(size))
- # Check compatibility of size_ with the broadcast shape of all
- # the parameters. This check is intended to be consistent with
- # how the numpy random variate generators (e.g. np.random.normal,
- # np.random.beta) handle their arguments. The rule is that, if size
- # is given, it determines the shape of the output. Broadcasting
- # can't change the output size.
- # This is the standard broadcasting convention of extending the
- # shape with fewer dimensions with enough dimensions of length 1
- # so that the two shapes have the same number of dimensions.
- ndiff = bcast_ndim - len(size_)
- if ndiff < 0:
- bcast_shape = (1,)*(-ndiff) + bcast_shape
- elif ndiff > 0:
- size_ = (1,)*ndiff + size_
- # This compatibility test is not standard. In "regular" broadcasting,
- # two shapes are compatible if for each dimension, the lengths are the
- # same or one of the lengths is 1. Here, the length of a dimension in
- # size_ must not be less than the corresponding length in bcast_shape.
- ok = all([bcdim == 1 or bcdim == szdim
- for (bcdim, szdim) in zip(bcast_shape, size_)])
- if not ok:
- raise ValueError("size does not match the broadcast shape of "
- f"the parameters. {size}, {size_}, {bcast_shape}")
- param_bcast = all_bcast[:-2]
- loc_bcast = all_bcast[-2]
- scale_bcast = all_bcast[-1]
- return param_bcast, loc_bcast, scale_bcast, size_
- # These are the methods you must define (standard form functions)
- # NB: generic _pdf, _logpdf, _cdf are different for
- # rv_continuous and rv_discrete hence are defined in there
- def _argcheck(self, *args):
- """Default check for correct values on args and keywords.
- Returns condition array of 1's where arguments are correct and
- 0's where they are not.
- """
- cond = 1
- for arg in args:
- cond = logical_and(cond, (asarray(arg) > 0))
- return cond
- def _get_support(self, *args, **kwargs):
- """Return the support of the (unscaled, unshifted) distribution.
- *Must* be overridden by distributions which have support dependent
- upon the shape parameters of the distribution. Any such override
- *must not* set or change any of the class members, as these members
- are shared amongst all instances of the distribution.
- Parameters
- ----------
- arg1, arg2, ... : array_like
- The shape parameter(s) for the distribution (see docstring of the
- instance object for more information).
- Returns
- -------
- a, b : numeric (float, or int or +/-np.inf)
- end-points of the distribution's support for the specified
- shape parameters.
- """
- return self.a, self.b
- def _support_mask(self, x, *args):
- a, b = self._get_support(*args)
- with np.errstate(invalid='ignore'):
- return (a <= x) & (x <= b)
- def _open_support_mask(self, x, *args):
- a, b = self._get_support(*args)
- with np.errstate(invalid='ignore'):
- return (a < x) & (x < b)
- def _rvs(self, *args, size=None, random_state=None):
- # This method must handle size being a tuple, and it must
- # properly broadcast *args and size. size might be
- # an empty tuple, which means a scalar random variate is to be
- # generated.
- # Use basic inverse cdf algorithm for RV generation as default.
- U = random_state.uniform(size=size)
- Y = self._ppf(U, *args)
- return Y
- def _logcdf(self, x, *args):
- with np.errstate(divide='ignore'):
- return log(self._cdf(x, *args))
- def _sf(self, x, *args):
- return 1.0-self._cdf(x, *args)
- def _logsf(self, x, *args):
- with np.errstate(divide='ignore'):
- return log(self._sf(x, *args))
- def _ppf(self, q, *args):
- return self._ppfvec(q, *args)
- def _isf(self, q, *args):
- return self._ppf(1.0-q, *args) # use correct _ppf for subclasses
- # These are actually called, and should not be overwritten if you
- # want to keep error checking.
- def rvs(self, *args, **kwds):
- """Random variates of given type.
- Parameters
- ----------
- arg1, arg2, arg3,... : array_like
- The shape parameter(s) for the distribution (see docstring of the
- instance object for more information).
- loc : array_like, optional
- Location parameter (default=0).
- scale : array_like, optional
- Scale parameter (default=1).
- size : int or tuple of ints, optional
- Defining number of random variates (default is 1).
- random_state : {None, int, `numpy.random.Generator`,
- `numpy.random.RandomState`}, optional
- If `random_state` is None (or `np.random`), the
- `numpy.random.RandomState` singleton is used.
- If `random_state` is an int, a new ``RandomState`` instance is
- used, seeded with `random_state`.
- If `random_state` is already a ``Generator`` or ``RandomState``
- instance, that instance is used.
- Returns
- -------
- rvs : ndarray or scalar
- Random variates of given `size`.
- """
- discrete = kwds.pop('discrete', None)
- rndm = kwds.pop('random_state', None)
- args, loc, scale, size = self._parse_args_rvs(*args, **kwds)
- cond = logical_and(self._argcheck(*args), (scale >= 0))
- if not np.all(cond):
- message = ("Domain error in arguments. The `scale` parameter must "
- "be positive for all distributions, and many "
- "distributions have restrictions on shape parameters. "
- f"Please see the `scipy.stats.{self.name}` "
- "documentation for details.")
- raise ValueError(message)
- if np.all(scale == 0):
- return loc*ones(size, 'd')
- # extra gymnastics needed for a custom random_state
- if rndm is not None:
- random_state_saved = self._random_state
- random_state = check_random_state(rndm)
- else:
- random_state = self._random_state
- vals = self._rvs(*args, size=size, random_state=random_state)
- vals = vals * scale + loc
- # do not forget to restore the _random_state
- if rndm is not None:
- self._random_state = random_state_saved
- # Cast to int if discrete
- if discrete and not isinstance(self, rv_sample):
- if size == ():
- vals = int(vals)
- else:
- vals = vals.astype(np.int64)
- return vals
- def stats(self, *args, **kwds):
- """Some statistics of the given RV.
- Parameters
- ----------
- arg1, arg2, arg3,... : array_like
- The shape parameter(s) for the distribution (see docstring of the
- instance object for more information)
- loc : array_like, optional
- location parameter (default=0)
- scale : array_like, optional (continuous RVs only)
- scale parameter (default=1)
- moments : str, optional
- composed of letters ['mvsk'] defining which moments to compute:
- 'm' = mean,
- 'v' = variance,
- 's' = (Fisher's) skew,
- 'k' = (Fisher's) kurtosis.
- (default is 'mv')
- Returns
- -------
- stats : sequence
- of requested moments.
- """
- args, loc, scale, moments = self._parse_args_stats(*args, **kwds)
- # scale = 1 by construction for discrete RVs
- loc, scale = map(asarray, (loc, scale))
- args = tuple(map(asarray, args))
- cond = self._argcheck(*args) & (scale > 0) & (loc == loc)
- output = []
- default = np.full(shape(cond), fill_value=self.badvalue)
- # Use only entries that are valid in calculation
- if np.any(cond):
- goodargs = argsreduce(cond, *(args+(scale, loc)))
- scale, loc, goodargs = goodargs[-2], goodargs[-1], goodargs[:-2]
- if self._stats_has_moments:
- mu, mu2, g1, g2 = self._stats(*goodargs,
- **{'moments': moments})
- else:
- mu, mu2, g1, g2 = self._stats(*goodargs)
- if 'm' in moments:
- if mu is None:
- mu = self._munp(1, *goodargs)
- out0 = default.copy()
- place(out0, cond, mu * scale + loc)
- output.append(out0)
- if 'v' in moments:
- if mu2 is None:
- mu2p = self._munp(2, *goodargs)
- if mu is None:
- mu = self._munp(1, *goodargs)
- # if mean is inf then var is also inf
- with np.errstate(invalid='ignore'):
- mu2 = np.where(~np.isinf(mu), mu2p - mu**2, np.inf)
- out0 = default.copy()
- place(out0, cond, mu2 * scale * scale)
- output.append(out0)
- if 's' in moments:
- if g1 is None:
- mu3p = self._munp(3, *goodargs)
- if mu is None:
- mu = self._munp(1, *goodargs)
- if mu2 is None:
- mu2p = self._munp(2, *goodargs)
- with np.errstate(invalid='ignore'):
- mu2 = mu2p - mu * mu
- with np.errstate(invalid='ignore'):
- mu3 = (-mu*mu - 3*mu2)*mu + mu3p
- g1 = mu3 / np.power(mu2, 1.5)
- out0 = default.copy()
- place(out0, cond, g1)
- output.append(out0)
- if 'k' in moments:
- if g2 is None:
- mu4p = self._munp(4, *goodargs)
- if mu is None:
- mu = self._munp(1, *goodargs)
- if mu2 is None:
- mu2p = self._munp(2, *goodargs)
- with np.errstate(invalid='ignore'):
- mu2 = mu2p - mu * mu
- if g1 is None:
- mu3 = None
- else:
- # (mu2**1.5) breaks down for nan and inf
- mu3 = g1 * np.power(mu2, 1.5)
- if mu3 is None:
- mu3p = self._munp(3, *goodargs)
- with np.errstate(invalid='ignore'):
- mu3 = (-mu * mu - 3 * mu2) * mu + mu3p
- with np.errstate(invalid='ignore'):
- mu4 = ((-mu**2 - 6*mu2) * mu - 4*mu3)*mu + mu4p
- g2 = mu4 / mu2**2.0 - 3.0
- out0 = default.copy()
- place(out0, cond, g2)
- output.append(out0)
- else: # no valid args
- output = [default.copy() for _ in moments]
- output = [out[()] for out in output]
- if len(output) == 1:
- return output[0]
- else:
- return tuple(output)
- def entropy(self, *args, **kwds):
- """Differential entropy of the RV.
- Parameters
- ----------
- arg1, arg2, arg3,... : array_like
- The shape parameter(s) for the distribution (see docstring of the
- instance object for more information).
- loc : array_like, optional
- Location parameter (default=0).
- scale : array_like, optional (continuous distributions only).
- Scale parameter (default=1).
- Notes
- -----
- Entropy is defined base `e`:
- >>> import numpy as np
- >>> from scipy.stats._distn_infrastructure import rv_discrete
- >>> drv = rv_discrete(values=((0, 1), (0.5, 0.5)))
- >>> np.allclose(drv.entropy(), np.log(2.0))
- True
- """
- args, loc, scale = self._parse_args(*args, **kwds)
- # NB: for discrete distributions scale=1 by construction in _parse_args
- loc, scale = map(asarray, (loc, scale))
- args = tuple(map(asarray, args))
- cond0 = self._argcheck(*args) & (scale > 0) & (loc == loc)
- output = zeros(shape(cond0), 'd')
- place(output, (1-cond0), self.badvalue)
- goodargs = argsreduce(cond0, scale, *args)
- goodscale = goodargs[0]
- goodargs = goodargs[1:]
- place(output, cond0, self.vecentropy(*goodargs) + log(goodscale))
- return output[()]
- def moment(self, order, *args, **kwds):
- """non-central moment of distribution of specified order.
- Parameters
- ----------
- order : int, order >= 1
- Order of moment.
- arg1, arg2, arg3,... : float
- The shape parameter(s) for the distribution (see docstring of the
- instance object for more information).
- loc : array_like, optional
- location parameter (default=0)
- scale : array_like, optional
- scale parameter (default=1)
- """
- n = order
- shapes, loc, scale = self._parse_args(*args, **kwds)
- args = np.broadcast_arrays(*(*shapes, loc, scale))
- *shapes, loc, scale = args
- i0 = np.logical_and(self._argcheck(*shapes), scale > 0)
- i1 = np.logical_and(i0, loc == 0)
- i2 = np.logical_and(i0, loc != 0)
- args = argsreduce(i0, *shapes, loc, scale)
- *shapes, loc, scale = args
- if (floor(n) != n):
- raise ValueError("Moment must be an integer.")
- if (n < 0):
- raise ValueError("Moment must be positive.")
- mu, mu2, g1, g2 = None, None, None, None
- if (n > 0) and (n < 5):
- if self._stats_has_moments:
- mdict = {'moments': {1: 'm', 2: 'v', 3: 'vs', 4: 'mvsk'}[n]}
- else:
- mdict = {}
- mu, mu2, g1, g2 = self._stats(*shapes, **mdict)
- val = np.empty(loc.shape) # val needs to be indexed by loc
- val[...] = _moment_from_stats(n, mu, mu2, g1, g2, self._munp, shapes)
- # Convert to transformed X = L + S*Y
- # E[X^n] = E[(L+S*Y)^n] = L^n sum(comb(n, k)*(S/L)^k E[Y^k], k=0...n)
- result = zeros(i0.shape)
- place(result, ~i0, self.badvalue)
- if i1.any():
- res1 = scale[loc == 0]**n * val[loc == 0]
- place(result, i1, res1)
- if i2.any():
- mom = [mu, mu2, g1, g2]
- arrs = [i for i in mom if i is not None]
- idx = [i for i in range(4) if mom[i] is not None]
- if any(idx):
- arrs = argsreduce(loc != 0, *arrs)
- j = 0
- for i in idx:
- mom[i] = arrs[j]
- j += 1
- mu, mu2, g1, g2 = mom
- args = argsreduce(loc != 0, *shapes, loc, scale, val)
- *shapes, loc, scale, val = args
- res2 = zeros(loc.shape, dtype='d')
- fac = scale / loc
- for k in range(n):
- valk = _moment_from_stats(k, mu, mu2, g1, g2, self._munp,
- shapes)
- res2 += comb(n, k, exact=True)*fac**k * valk
- res2 += fac**n * val
- res2 *= loc**n
- place(result, i2, res2)
- return result[()]
- def median(self, *args, **kwds):
- """Median of the distribution.
- Parameters
- ----------
- arg1, arg2, arg3,... : array_like
- The shape parameter(s) for the distribution (see docstring of the
- instance object for more information)
- loc : array_like, optional
- Location parameter, Default is 0.
- scale : array_like, optional
- Scale parameter, Default is 1.
- Returns
- -------
- median : float
- The median of the distribution.
- See Also
- --------
- rv_discrete.ppf
- Inverse of the CDF
- """
- return self.ppf(0.5, *args, **kwds)
- def mean(self, *args, **kwds):
- """Mean of the distribution.
- Parameters
- ----------
- arg1, arg2, arg3,... : array_like
- The shape parameter(s) for the distribution (see docstring of the
- instance object for more information)
- loc : array_like, optional
- location parameter (default=0)
- scale : array_like, optional
- scale parameter (default=1)
- Returns
- -------
- mean : float
- the mean of the distribution
- """
- kwds['moments'] = 'm'
- res = self.stats(*args, **kwds)
- if isinstance(res, ndarray) and res.ndim == 0:
- return res[()]
- return res
- def var(self, *args, **kwds):
- """Variance of the distribution.
- Parameters
- ----------
- arg1, arg2, arg3,... : array_like
- The shape parameter(s) for the distribution (see docstring of the
- instance object for more information)
- loc : array_like, optional
- location parameter (default=0)
- scale : array_like, optional
- scale parameter (default=1)
- Returns
- -------
- var : float
- the variance of the distribution
- """
- kwds['moments'] = 'v'
- res = self.stats(*args, **kwds)
- if isinstance(res, ndarray) and res.ndim == 0:
- return res[()]
- return res
- def std(self, *args, **kwds):
- """Standard deviation of the distribution.
- Parameters
- ----------
- arg1, arg2, arg3,... : array_like
- The shape parameter(s) for the distribution (see docstring of the
- instance object for more information)
- loc : array_like, optional
- location parameter (default=0)
- scale : array_like, optional
- scale parameter (default=1)
- Returns
- -------
- std : float
- standard deviation of the distribution
- """
- kwds['moments'] = 'v'
- res = sqrt(self.stats(*args, **kwds))
- return res
- def interval(self, confidence, *args, **kwds):
- """Confidence interval with equal areas around the median.
- Parameters
- ----------
- confidence : array_like of float
- Probability that an rv will be drawn from the returned range.
- Each value should be in the range [0, 1].
- arg1, arg2, ... : array_like
- The shape parameter(s) for the distribution (see docstring of the
- instance object for more information).
- loc : array_like, optional
- location parameter, Default is 0.
- scale : array_like, optional
- scale parameter, Default is 1.
- Returns
- -------
- a, b : ndarray of float
- end-points of range that contain ``100 * alpha %`` of the rv's
- possible values.
- Notes
- -----
- This is implemented as ``ppf([p_tail, 1-p_tail])``, where
- ``ppf`` is the inverse cumulative distribution function and
- ``p_tail = (1-confidence)/2``. Suppose ``[c, d]`` is the support of a
- discrete distribution; then ``ppf([0, 1]) == (c-1, d)``. Therefore,
- when ``confidence=1`` and the distribution is discrete, the left end
- of the interval will be beyond the support of the distribution.
- For discrete distributions, the interval will limit the probability
- in each tail to be less than or equal to ``p_tail`` (usually
- strictly less).
- """
- alpha = confidence
- alpha = asarray(alpha)
- if np.any((alpha > 1) | (alpha < 0)):
- raise ValueError("alpha must be between 0 and 1 inclusive")
- q1 = (1.0-alpha)/2
- q2 = (1.0+alpha)/2
- a = self.ppf(q1, *args, **kwds)
- b = self.ppf(q2, *args, **kwds)
- return a, b
- def support(self, *args, **kwargs):
- """Support of the distribution.
- Parameters
- ----------
- arg1, arg2, ... : array_like
- The shape parameter(s) for the distribution (see docstring of the
- instance object for more information).
- loc : array_like, optional
- location parameter, Default is 0.
- scale : array_like, optional
- scale parameter, Default is 1.
- Returns
- -------
- a, b : array_like
- end-points of the distribution's support.
- """
- args, loc, scale = self._parse_args(*args, **kwargs)
- arrs = np.broadcast_arrays(*args, loc, scale)
- args, loc, scale = arrs[:-2], arrs[-2], arrs[-1]
- cond = self._argcheck(*args) & (scale > 0)
- _a, _b = self._get_support(*args)
- if cond.all():
- return _a * scale + loc, _b * scale + loc
- elif cond.ndim == 0:
- return self.badvalue, self.badvalue
- # promote bounds to at least float to fill in the badvalue
- _a, _b = np.asarray(_a).astype('d'), np.asarray(_b).astype('d')
- out_a, out_b = _a * scale + loc, _b * scale + loc
- place(out_a, 1-cond, self.badvalue)
- place(out_b, 1-cond, self.badvalue)
- return out_a, out_b
- def nnlf(self, theta, x):
- """Negative loglikelihood function.
- Notes
- -----
- This is ``-sum(log pdf(x, theta), axis=0)`` where `theta` are the
- parameters (including loc and scale).
- """
- loc, scale, args = self._unpack_loc_scale(theta)
- if not self._argcheck(*args) or scale <= 0:
- return inf
- x = (asarray(x)-loc) / scale
- n_log_scale = len(x) * log(scale)
- if np.any(~self._support_mask(x, *args)):
- return inf
- return self._nnlf(x, *args) + n_log_scale
- def _nnlf(self, x, *args):
- return -np.sum(self._logpxf(x, *args), axis=0)
- def _nlff_and_penalty(self, x, args, log_fitfun):
- # negative log fit function
- cond0 = ~self._support_mask(x, *args)
- n_bad = np.count_nonzero(cond0, axis=0)
- if n_bad > 0:
- x = argsreduce(~cond0, x)[0]
- logff = log_fitfun(x, *args)
- finite_logff = np.isfinite(logff)
- n_bad += np.sum(~finite_logff, axis=0)
- if n_bad > 0:
- penalty = n_bad * log(_XMAX) * 100
- return -np.sum(logff[finite_logff], axis=0) + penalty
- return -np.sum(logff, axis=0)
- def _penalized_nnlf(self, theta, x):
- """Penalized negative loglikelihood function.
- i.e., - sum (log pdf(x, theta), axis=0) + penalty
- where theta are the parameters (including loc and scale)
- """
- loc, scale, args = self._unpack_loc_scale(theta)
- if not self._argcheck(*args) or scale <= 0:
- return inf
- x = asarray((x-loc) / scale)
- n_log_scale = len(x) * log(scale)
- return self._nlff_and_penalty(x, args, self._logpxf) + n_log_scale
- def _penalized_nlpsf(self, theta, x):
- """Penalized negative log product spacing function.
- i.e., - sum (log (diff (cdf (x, theta))), axis=0) + penalty
- where theta are the parameters (including loc and scale)
- Follows reference [1] of scipy.stats.fit
- """
- loc, scale, args = self._unpack_loc_scale(theta)
- if not self._argcheck(*args) or scale <= 0:
- return inf
- x = (np.sort(x) - loc)/scale
- def log_psf(x, *args):
- x, lj = np.unique(x, return_counts=True) # fast for sorted x
- cdf_data = self._cdf(x, *args) if x.size else []
- if not (x.size and 1 - cdf_data[-1] <= 0):
- cdf = np.concatenate(([0], cdf_data, [1]))
- lj = np.concatenate((lj, [1]))
- else:
- cdf = np.concatenate(([0], cdf_data))
- # here we could use logcdf w/ logsumexp trick to take differences,
- # but in the context of the method, it seems unlikely to matter
- return lj * np.log(np.diff(cdf) / lj)
- return self._nlff_and_penalty(x, args, log_psf)
- class _ShapeInfo:
- def __init__(self, name, integrality=False, domain=(-np.inf, np.inf),
- inclusive=(True, True)):
- self.name = name
- self.integrality = integrality
- self.endpoints = domain
- self.inclusive = inclusive
- domain = list(domain)
- if np.isfinite(domain[0]) and not inclusive[0]:
- domain[0] = np.nextafter(domain[0], np.inf)
- if np.isfinite(domain[1]) and not inclusive[1]:
- domain[1] = np.nextafter(domain[1], -np.inf)
- self.domain = domain
- def _get_fixed_fit_value(kwds, names):
- """
- Given names such as ``['f0', 'fa', 'fix_a']``, check that there is
- at most one non-None value in `kwds` associated with those names.
- Return that value, or None if none of the names occur in `kwds`.
- As a side effect, all occurrences of those names in `kwds` are
- removed.
- """
- vals = [(name, kwds.pop(name)) for name in names if name in kwds]
- if len(vals) > 1:
- repeated = [name for name, val in vals]
- raise ValueError("fit method got multiple keyword arguments to "
- "specify the same fixed parameter: " +
- ', '.join(repeated))
- return vals[0][1] if vals else None
- # continuous random variables: implement maybe later
- #
- # hf --- Hazard Function (PDF / SF)
- # chf --- Cumulative hazard function (-log(SF))
- # psf --- Probability sparsity function (reciprocal of the pdf) in
- # units of percent-point-function (as a function of q).
- # Also, the derivative of the percent-point function.
- class rv_continuous(rv_generic):
- """A generic continuous random variable class meant for subclassing.
- `rv_continuous` is a base class to construct specific distribution classes
- and instances for continuous random variables. It cannot be used
- directly as a distribution.
- Parameters
- ----------
- momtype : int, optional
- The type of generic moment calculation to use: 0 for pdf, 1 (default)
- for ppf.
- a : float, optional
- Lower bound of the support of the distribution, default is minus
- infinity.
- b : float, optional
- Upper bound of the support of the distribution, default is plus
- infinity.
- xtol : float, optional
- The tolerance for fixed point calculation for generic ppf.
- badvalue : float, optional
- The value in a result arrays that indicates a value that for which
- some argument restriction is violated, default is np.nan.
- name : str, optional
- The name of the instance. This string is used to construct the default
- example for distributions.
- longname : str, optional
- This string is used as part of the first line of the docstring returned
- when a subclass has no docstring of its own. Note: `longname` exists
- for backwards compatibility, do not use for new subclasses.
- shapes : str, optional
- The shape of the distribution. For example ``"m, n"`` for a
- distribution that takes two integers as the two shape arguments for all
- its methods. If not provided, shape parameters will be inferred from
- the signature of the private methods, ``_pdf`` and ``_cdf`` of the
- instance.
- seed : {None, int, `numpy.random.Generator`, `numpy.random.RandomState`}, optional
- If `seed` is None (or `np.random`), the `numpy.random.RandomState`
- singleton is used.
- If `seed` is an int, a new ``RandomState`` instance is used,
- seeded with `seed`.
- If `seed` is already a ``Generator`` or ``RandomState`` instance then
- that instance is used.
- Attributes
- ----------
- a, b : float, optional
- Lower/upper bound of the support of the unshifted/unscaled distribution.
- This value is unaffected by the `loc` and `scale` parameters.
- To calculate the support of the shifted/scaled distribution,
- use the `support` method.
- Methods
- -------
- rvs
- pdf
- logpdf
- cdf
- logcdf
- sf
- logsf
- ppf
- isf
- moment
- stats
- entropy
- expect
- median
- mean
- std
- var
- interval
- __call__
- fit
- fit_loc_scale
- nnlf
- support
- Notes
- -----
- Public methods of an instance of a distribution class (e.g., ``pdf``,
- ``cdf``) check their arguments and pass valid arguments to private,
- computational methods (``_pdf``, ``_cdf``). For ``pdf(x)``, ``x`` is valid
- if it is within the support of the distribution.
- Whether a shape parameter is valid is decided by an ``_argcheck`` method
- (which defaults to checking that its arguments are strictly positive.)
- **Subclassing**
- New random variables can be defined by subclassing the `rv_continuous` class
- and re-defining at least the ``_pdf`` or the ``_cdf`` method (normalized
- to location 0 and scale 1).
- If positive argument checking is not correct for your RV
- then you will also need to re-define the ``_argcheck`` method.
- For most of the scipy.stats distributions, the support interval doesn't
- depend on the shape parameters. ``x`` being in the support interval is
- equivalent to ``self.a <= x <= self.b``. If either of the endpoints of
- the support do depend on the shape parameters, then
- i) the distribution must implement the ``_get_support`` method; and
- ii) those dependent endpoints must be omitted from the distribution's
- call to the ``rv_continuous`` initializer.
- Correct, but potentially slow defaults exist for the remaining
- methods but for speed and/or accuracy you can over-ride::
- _logpdf, _cdf, _logcdf, _ppf, _rvs, _isf, _sf, _logsf
- The default method ``_rvs`` relies on the inverse of the cdf, ``_ppf``,
- applied to a uniform random variate. In order to generate random variates
- efficiently, either the default ``_ppf`` needs to be overwritten (e.g.
- if the inverse cdf can expressed in an explicit form) or a sampling
- method needs to be implemented in a custom ``_rvs`` method.
- If possible, you should override ``_isf``, ``_sf`` or ``_logsf``.
- The main reason would be to improve numerical accuracy: for example,
- the survival function ``_sf`` is computed as ``1 - _cdf`` which can
- result in loss of precision if ``_cdf(x)`` is close to one.
- **Methods that can be overwritten by subclasses**
- ::
- _rvs
- _pdf
- _cdf
- _sf
- _ppf
- _isf
- _stats
- _munp
- _entropy
- _argcheck
- _get_support
- There are additional (internal and private) generic methods that can
- be useful for cross-checking and for debugging, but might work in all
- cases when directly called.
- A note on ``shapes``: subclasses need not specify them explicitly. In this
- case, `shapes` will be automatically deduced from the signatures of the
- overridden methods (`pdf`, `cdf` etc).
- If, for some reason, you prefer to avoid relying on introspection, you can
- specify ``shapes`` explicitly as an argument to the instance constructor.
- **Frozen Distributions**
- Normally, you must provide shape parameters (and, optionally, location and
- scale parameters to each call of a method of a distribution.
- Alternatively, the object may be called (as a function) to fix the shape,
- location, and scale parameters returning a "frozen" continuous RV object:
- rv = generic(<shape(s)>, loc=0, scale=1)
- `rv_frozen` object with the same methods but holding the given shape,
- location, and scale fixed
- **Statistics**
- Statistics are computed using numerical integration by default.
- For speed you can redefine this using ``_stats``:
- - take shape parameters and return mu, mu2, g1, g2
- - If you can't compute one of these, return it as None
- - Can also be defined with a keyword argument ``moments``, which is a
- string composed of "m", "v", "s", and/or "k".
- Only the components appearing in string should be computed and
- returned in the order "m", "v", "s", or "k" with missing values
- returned as None.
- Alternatively, you can override ``_munp``, which takes ``n`` and shape
- parameters and returns the n-th non-central moment of the distribution.
- **Deepcopying / Pickling**
- If a distribution or frozen distribution is deepcopied (pickled/unpickled,
- etc.), any underlying random number generator is deepcopied with it. An
- implication is that if a distribution relies on the singleton RandomState
- before copying, it will rely on a copy of that random state after copying,
- and ``np.random.seed`` will no longer control the state.
- Examples
- --------
- To create a new Gaussian distribution, we would do the following:
- >>> from scipy.stats import rv_continuous
- >>> class gaussian_gen(rv_continuous):
- ... "Gaussian distribution"
- ... def _pdf(self, x):
- ... return np.exp(-x**2 / 2.) / np.sqrt(2.0 * np.pi)
- >>> gaussian = gaussian_gen(name='gaussian')
- ``scipy.stats`` distributions are *instances*, so here we subclass
- `rv_continuous` and create an instance. With this, we now have
- a fully functional distribution with all relevant methods automagically
- generated by the framework.
- Note that above we defined a standard normal distribution, with zero mean
- and unit variance. Shifting and scaling of the distribution can be done
- by using ``loc`` and ``scale`` parameters: ``gaussian.pdf(x, loc, scale)``
- essentially computes ``y = (x - loc) / scale`` and
- ``gaussian._pdf(y) / scale``.
- """
- def __init__(self, momtype=1, a=None, b=None, xtol=1e-14,
- badvalue=None, name=None, longname=None,
- shapes=None, seed=None):
- super().__init__(seed)
- # save the ctor parameters, cf generic freeze
- self._ctor_param = dict(
- momtype=momtype, a=a, b=b, xtol=xtol,
- badvalue=badvalue, name=name, longname=longname,
- shapes=shapes, seed=seed)
- if badvalue is None:
- badvalue = nan
- if name is None:
- name = 'Distribution'
- self.badvalue = badvalue
- self.name = name
- self.a = a
- self.b = b
- if a is None:
- self.a = -inf
- if b is None:
- self.b = inf
- self.xtol = xtol
- self.moment_type = momtype
- self.shapes = shapes
- self._construct_argparser(meths_to_inspect=[self._pdf, self._cdf],
- locscale_in='loc=0, scale=1',
- locscale_out='loc, scale')
- self._attach_methods()
- if longname is None:
- if name[0] in ['aeiouAEIOU']:
- hstr = "An "
- else:
- hstr = "A "
- longname = hstr + name
- if sys.flags.optimize < 2:
- # Skip adding docstrings if interpreter is run with -OO
- if self.__doc__ is None:
- self._construct_default_doc(longname=longname,
- docdict=docdict,
- discrete='continuous')
- else:
- dct = dict(distcont)
- self._construct_doc(docdict, dct.get(self.name))
- def __getstate__(self):
- dct = self.__dict__.copy()
- # these methods will be remade in __setstate__
- # _random_state attribute is taken care of by rv_generic
- attrs = ["_parse_args", "_parse_args_stats", "_parse_args_rvs",
- "_cdfvec", "_ppfvec", "vecentropy", "generic_moment"]
- [dct.pop(attr, None) for attr in attrs]
- return dct
- def _attach_methods(self):
- """
- Attaches dynamically created methods to the rv_continuous instance.
- """
- # _attach_methods is responsible for calling _attach_argparser_methods
- self._attach_argparser_methods()
- # nin correction
- self._ppfvec = vectorize(self._ppf_single, otypes='d')
- self._ppfvec.nin = self.numargs + 1
- self.vecentropy = vectorize(self._entropy, otypes='d')
- self._cdfvec = vectorize(self._cdf_single, otypes='d')
- self._cdfvec.nin = self.numargs + 1
- if self.moment_type == 0:
- self.generic_moment = vectorize(self._mom0_sc, otypes='d')
- else:
- self.generic_moment = vectorize(self._mom1_sc, otypes='d')
- # Because of the *args argument of _mom0_sc, vectorize cannot count the
- # number of arguments correctly.
- self.generic_moment.nin = self.numargs + 1
- def _updated_ctor_param(self):
- """Return the current version of _ctor_param, possibly updated by user.
- Used by freezing.
- Keep this in sync with the signature of __init__.
- """
- dct = self._ctor_param.copy()
- dct['a'] = self.a
- dct['b'] = self.b
- dct['xtol'] = self.xtol
- dct['badvalue'] = self.badvalue
- dct['name'] = self.name
- dct['shapes'] = self.shapes
- return dct
- def _ppf_to_solve(self, x, q, *args):
- return self.cdf(*(x, )+args)-q
- def _ppf_single(self, q, *args):
- factor = 10.
- left, right = self._get_support(*args)
- if np.isinf(left):
- left = min(-factor, right)
- while self._ppf_to_solve(left, q, *args) > 0.:
- left, right = left * factor, left
- # left is now such that cdf(left) <= q
- # if right has changed, then cdf(right) > q
- if np.isinf(right):
- right = max(factor, left)
- while self._ppf_to_solve(right, q, *args) < 0.:
- left, right = right, right * factor
- # right is now such that cdf(right) >= q
- return optimize.brentq(self._ppf_to_solve,
- left, right, args=(q,)+args, xtol=self.xtol)
- # moment from definition
- def _mom_integ0(self, x, m, *args):
- return x**m * self.pdf(x, *args)
- def _mom0_sc(self, m, *args):
- _a, _b = self._get_support(*args)
- return integrate.quad(self._mom_integ0, _a, _b,
- args=(m,)+args)[0]
- # moment calculated using ppf
- def _mom_integ1(self, q, m, *args):
- return (self.ppf(q, *args))**m
- def _mom1_sc(self, m, *args):
- return integrate.quad(self._mom_integ1, 0, 1, args=(m,)+args)[0]
- def _pdf(self, x, *args):
- return _derivative(self._cdf, x, dx=1e-5, args=args, order=5)
- # Could also define any of these
- def _logpdf(self, x, *args):
- p = self._pdf(x, *args)
- with np.errstate(divide='ignore'):
- return log(p)
- def _logpxf(self, x, *args):
- # continuous distributions have PDF, discrete have PMF, but sometimes
- # the distinction doesn't matter. This lets us use `_logpxf` for both
- # discrete and continuous distributions.
- return self._logpdf(x, *args)
- def _cdf_single(self, x, *args):
- _a, _b = self._get_support(*args)
- return integrate.quad(self._pdf, _a, x, args=args)[0]
- def _cdf(self, x, *args):
- return self._cdfvec(x, *args)
- def _logcdf(self, x, *args):
- median = self._ppf(0.5, *args)
- with np.errstate(divide='ignore'):
- return xpx.apply_where(
- x < median, (x,) + args,
- lambda x, *args: np.log(self._cdf(x, *args)),
- lambda x, *args: np.log1p(-self._sf(x, *args)))
- def _logsf(self, x, *args):
- median = self._ppf(0.5, *args)
- with np.errstate(divide='ignore'):
- return xpx.apply_where(
- x > median, (x,) + args,
- lambda x, *args: np.log(self._sf(x, *args)),
- lambda x, *args: np.log1p(-self._cdf(x, *args)))
- # generic _argcheck, _sf, _ppf, _isf, _rvs are defined
- # in rv_generic
- def pdf(self, x, *args, **kwds):
- """Probability density function at x of the given RV.
- Parameters
- ----------
- x : array_like
- quantiles
- arg1, arg2, arg3,... : array_like
- The shape parameter(s) for the distribution (see docstring of the
- instance object for more information)
- loc : array_like, optional
- location parameter (default=0)
- scale : array_like, optional
- scale parameter (default=1)
- Returns
- -------
- pdf : ndarray
- Probability density function evaluated at x
- """
- args, loc, scale = self._parse_args(*args, **kwds)
- x, loc, scale = map(asarray, (x, loc, scale))
- args = tuple(map(asarray, args))
- dtyp = np.promote_types(x.dtype, np.float64)
- x = np.asarray((x - loc)/scale, dtype=dtyp)
- cond0 = self._argcheck(*args) & (scale > 0)
- cond1 = self._support_mask(x, *args) & (scale > 0)
- cond = cond0 & cond1
- output = zeros(shape(cond), dtyp)
- putmask(output, (1-cond0)+np.isnan(x), self.badvalue)
- if np.any(cond):
- goodargs = argsreduce(cond, *((x,)+args+(scale,)))
- scale, goodargs = goodargs[-1], goodargs[:-1]
- place(output, cond, self._pdf(*goodargs) / scale)
- if output.ndim == 0:
- return output[()]
- return output
- def logpdf(self, x, *args, **kwds):
- """Log of the probability density function at x of the given RV.
- This uses a more numerically accurate calculation if available.
- Parameters
- ----------
- x : array_like
- quantiles
- arg1, arg2, arg3,... : array_like
- The shape parameter(s) for the distribution (see docstring of the
- instance object for more information)
- loc : array_like, optional
- location parameter (default=0)
- scale : array_like, optional
- scale parameter (default=1)
- Returns
- -------
- logpdf : array_like
- Log of the probability density function evaluated at x
- """
- args, loc, scale = self._parse_args(*args, **kwds)
- x, loc, scale = map(asarray, (x, loc, scale))
- args = tuple(map(asarray, args))
- dtyp = np.promote_types(x.dtype, np.float64)
- x = np.asarray((x - loc)/scale, dtype=dtyp)
- cond0 = self._argcheck(*args) & (scale > 0)
- cond1 = self._support_mask(x, *args) & (scale > 0)
- cond = cond0 & cond1
- output = empty(shape(cond), dtyp)
- output.fill(-inf)
- putmask(output, (1-cond0)+np.isnan(x), self.badvalue)
- if np.any(cond):
- goodargs = argsreduce(cond, *((x,)+args+(scale,)))
- scale, goodargs = goodargs[-1], goodargs[:-1]
- place(output, cond, self._logpdf(*goodargs) - log(scale))
- if output.ndim == 0:
- return output[()]
- return output
- def cdf(self, x, *args, **kwds):
- """
- Cumulative distribution function of the given RV.
- Parameters
- ----------
- x : array_like
- quantiles
- arg1, arg2, arg3,... : array_like
- The shape parameter(s) for the distribution (see docstring of the
- instance object for more information)
- loc : array_like, optional
- location parameter (default=0)
- scale : array_like, optional
- scale parameter (default=1)
- Returns
- -------
- cdf : ndarray
- Cumulative distribution function evaluated at `x`
- """
- args, loc, scale = self._parse_args(*args, **kwds)
- x, loc, scale = map(asarray, (x, loc, scale))
- args = tuple(map(asarray, args))
- _a, _b = self._get_support(*args)
- dtyp = np.promote_types(x.dtype, np.float64)
- x = np.asarray((x - loc)/scale, dtype=dtyp)
- cond0 = self._argcheck(*args) & (scale > 0)
- cond1 = self._open_support_mask(x, *args) & (scale > 0)
- cond2 = (x >= np.asarray(_b)) & cond0
- cond = cond0 & cond1
- output = zeros(shape(cond), dtyp)
- place(output, (1-cond0)+np.isnan(x), self.badvalue)
- place(output, cond2, 1.0)
- if np.any(cond): # call only if at least 1 entry
- goodargs = argsreduce(cond, *((x,)+args))
- place(output, cond, self._cdf(*goodargs))
- if output.ndim == 0:
- return output[()]
- return output
- def logcdf(self, x, *args, **kwds):
- """Log of the cumulative distribution function at x of the given RV.
- Parameters
- ----------
- x : array_like
- quantiles
- arg1, arg2, arg3,... : array_like
- The shape parameter(s) for the distribution (see docstring of the
- instance object for more information)
- loc : array_like, optional
- location parameter (default=0)
- scale : array_like, optional
- scale parameter (default=1)
- Returns
- -------
- logcdf : array_like
- Log of the cumulative distribution function evaluated at x
- """
- args, loc, scale = self._parse_args(*args, **kwds)
- x, loc, scale = map(asarray, (x, loc, scale))
- args = tuple(map(asarray, args))
- _a, _b = self._get_support(*args)
- dtyp = np.promote_types(x.dtype, np.float64)
- x = np.asarray((x - loc)/scale, dtype=dtyp)
- cond0 = self._argcheck(*args) & (scale > 0)
- cond1 = self._open_support_mask(x, *args) & (scale > 0)
- cond2 = (x >= _b) & cond0
- cond = cond0 & cond1
- output = empty(shape(cond), dtyp)
- output.fill(-inf)
- place(output, (1-cond0)*(cond1 == cond1)+np.isnan(x), self.badvalue)
- place(output, cond2, 0.0)
- if np.any(cond): # call only if at least 1 entry
- goodargs = argsreduce(cond, *((x,)+args))
- place(output, cond, self._logcdf(*goodargs))
- if output.ndim == 0:
- return output[()]
- return output
- def sf(self, x, *args, **kwds):
- """Survival function (1 - `cdf`) at x of the given RV.
- Parameters
- ----------
- x : array_like
- quantiles
- arg1, arg2, arg3,... : array_like
- The shape parameter(s) for the distribution (see docstring of the
- instance object for more information)
- loc : array_like, optional
- location parameter (default=0)
- scale : array_like, optional
- scale parameter (default=1)
- Returns
- -------
- sf : array_like
- Survival function evaluated at x
- """
- args, loc, scale = self._parse_args(*args, **kwds)
- x, loc, scale = map(asarray, (x, loc, scale))
- args = tuple(map(asarray, args))
- _a, _b = self._get_support(*args)
- dtyp = np.promote_types(x.dtype, np.float64)
- x = np.asarray((x - loc)/scale, dtype=dtyp)
- cond0 = self._argcheck(*args) & (scale > 0)
- cond1 = self._open_support_mask(x, *args) & (scale > 0)
- cond2 = cond0 & (x <= _a)
- cond = cond0 & cond1
- output = zeros(shape(cond), dtyp)
- place(output, (1-cond0)+np.isnan(x), self.badvalue)
- place(output, cond2, 1.0)
- if np.any(cond):
- goodargs = argsreduce(cond, *((x,)+args))
- place(output, cond, self._sf(*goodargs))
- if output.ndim == 0:
- return output[()]
- return output
- def logsf(self, x, *args, **kwds):
- """Log of the survival function of the given RV.
- Returns the log of the "survival function," defined as (1 - `cdf`),
- evaluated at `x`.
- Parameters
- ----------
- x : array_like
- quantiles
- arg1, arg2, arg3,... : array_like
- The shape parameter(s) for the distribution (see docstring of the
- instance object for more information)
- loc : array_like, optional
- location parameter (default=0)
- scale : array_like, optional
- scale parameter (default=1)
- Returns
- -------
- logsf : ndarray
- Log of the survival function evaluated at `x`.
- """
- args, loc, scale = self._parse_args(*args, **kwds)
- x, loc, scale = map(asarray, (x, loc, scale))
- args = tuple(map(asarray, args))
- _a, _b = self._get_support(*args)
- dtyp = np.promote_types(x.dtype, np.float64)
- x = np.asarray((x - loc)/scale, dtype=dtyp)
- cond0 = self._argcheck(*args) & (scale > 0)
- cond1 = self._open_support_mask(x, *args) & (scale > 0)
- cond2 = cond0 & (x <= _a)
- cond = cond0 & cond1
- output = empty(shape(cond), dtyp)
- output.fill(-inf)
- place(output, (1-cond0)+np.isnan(x), self.badvalue)
- place(output, cond2, 0.0)
- if np.any(cond):
- goodargs = argsreduce(cond, *((x,)+args))
- place(output, cond, self._logsf(*goodargs))
- if output.ndim == 0:
- return output[()]
- return output
- def ppf(self, q, *args, **kwds):
- """Percent point function (inverse of `cdf`) at q of the given RV.
- Parameters
- ----------
- q : array_like
- lower tail probability
- arg1, arg2, arg3,... : array_like
- The shape parameter(s) for the distribution (see docstring of the
- instance object for more information)
- loc : array_like, optional
- location parameter (default=0)
- scale : array_like, optional
- scale parameter (default=1)
- Returns
- -------
- x : array_like
- quantile corresponding to the lower tail probability q.
- """
- args, loc, scale = self._parse_args(*args, **kwds)
- q, loc, scale = map(asarray, (q, loc, scale))
- args = tuple(map(asarray, args))
- _a, _b = self._get_support(*args)
- cond0 = self._argcheck(*args) & (scale > 0) & (loc == loc)
- cond1 = (0 < q) & (q < 1)
- cond2 = cond0 & (q == 0)
- cond3 = cond0 & (q == 1)
- cond = cond0 & cond1
- output = np.full(shape(cond), fill_value=self.badvalue)
- lower_bound = _a * scale + loc
- upper_bound = _b * scale + loc
- place(output, cond2, argsreduce(cond2, lower_bound)[0])
- place(output, cond3, argsreduce(cond3, upper_bound)[0])
- if np.any(cond): # call only if at least 1 entry
- goodargs = argsreduce(cond, *((q,)+args+(scale, loc)))
- scale, loc, goodargs = goodargs[-2], goodargs[-1], goodargs[:-2]
- place(output, cond, self._ppf(*goodargs) * scale + loc)
- if output.ndim == 0:
- return output[()]
- return output
- def isf(self, q, *args, **kwds):
- """Inverse survival function (inverse of `sf`) at q of the given RV.
- Parameters
- ----------
- q : array_like
- upper tail probability
- arg1, arg2, arg3,... : array_like
- The shape parameter(s) for the distribution (see docstring of the
- instance object for more information)
- loc : array_like, optional
- location parameter (default=0)
- scale : array_like, optional
- scale parameter (default=1)
- Returns
- -------
- x : ndarray or scalar
- Quantile corresponding to the upper tail probability q.
- """
- args, loc, scale = self._parse_args(*args, **kwds)
- q, loc, scale = map(asarray, (q, loc, scale))
- args = tuple(map(asarray, args))
- _a, _b = self._get_support(*args)
- cond0 = self._argcheck(*args) & (scale > 0) & (loc == loc)
- cond1 = (0 < q) & (q < 1)
- cond2 = cond0 & (q == 1)
- cond3 = cond0 & (q == 0)
- cond = cond0 & cond1
- output = np.full(shape(cond), fill_value=self.badvalue)
- lower_bound = _a * scale + loc
- upper_bound = _b * scale + loc
- place(output, cond2, argsreduce(cond2, lower_bound)[0])
- place(output, cond3, argsreduce(cond3, upper_bound)[0])
- if np.any(cond):
- goodargs = argsreduce(cond, *((q,)+args+(scale, loc)))
- scale, loc, goodargs = goodargs[-2], goodargs[-1], goodargs[:-2]
- place(output, cond, self._isf(*goodargs) * scale + loc)
- if output.ndim == 0:
- return output[()]
- return output
- def _unpack_loc_scale(self, theta):
- try:
- loc = theta[-2]
- scale = theta[-1]
- args = tuple(theta[:-2])
- except IndexError as e:
- raise ValueError("Not enough input arguments.") from e
- return loc, scale, args
- def _nnlf_and_penalty(self, x, args):
- """
- Compute the penalized negative log-likelihood for the
- "standardized" data (i.e. already shifted by loc and
- scaled by scale) for the shape parameters in `args`.
- `x` can be a 1D numpy array or a CensoredData instance.
- """
- if isinstance(x, CensoredData):
- # Filter out the data that is not in the support.
- xs = x._supported(*self._get_support(*args))
- n_bad = len(x) - len(xs)
- i1, i2 = xs._interval.T
- terms = [
- # logpdf of the noncensored data.
- self._logpdf(xs._uncensored, *args),
- # logcdf of the left-censored data.
- self._logcdf(xs._left, *args),
- # logsf of the right-censored data.
- self._logsf(xs._right, *args),
- # log of probability of the interval-censored data.
- np.log(self._delta_cdf(i1, i2, *args)),
- ]
- else:
- cond0 = ~self._support_mask(x, *args)
- n_bad = np.count_nonzero(cond0)
- if n_bad > 0:
- x = argsreduce(~cond0, x)[0]
- terms = [self._logpdf(x, *args)]
- totals, bad_counts = zip(*[_sum_finite(term) for term in terms])
- total = sum(totals)
- n_bad += sum(bad_counts)
- return -total + n_bad * _LOGXMAX * 100
- def _penalized_nnlf(self, theta, x):
- """Penalized negative loglikelihood function.
- i.e., - sum (log pdf(x, theta), axis=0) + penalty
- where theta are the parameters (including loc and scale)
- """
- loc, scale, args = self._unpack_loc_scale(theta)
- if not self._argcheck(*args) or scale <= 0:
- return inf
- if isinstance(x, CensoredData):
- x = (x - loc) / scale
- n_log_scale = (len(x) - x.num_censored()) * log(scale)
- else:
- x = (x - loc) / scale
- n_log_scale = len(x) * log(scale)
- return self._nnlf_and_penalty(x, args) + n_log_scale
- def _fitstart(self, data, args=None):
- """Starting point for fit (shape arguments + loc + scale)."""
- if args is None:
- args = (1.0,)*self.numargs
- loc, scale = self._fit_loc_scale_support(data, *args)
- return args + (loc, scale)
- def _reduce_func(self, args, kwds, data=None):
- """
- Return the (possibly reduced) function to optimize in order to find MLE
- estimates for the .fit method.
- """
- # Convert fixed shape parameters to the standard numeric form: e.g. for
- # stats.beta, shapes='a, b'. To fix `a`, the caller can give a value
- # for `f0`, `fa` or 'fix_a'. The following converts the latter two
- # into the first (numeric) form.
- shapes = []
- if self.shapes:
- shapes = self.shapes.replace(',', ' ').split()
- for j, s in enumerate(shapes):
- key = 'f' + str(j)
- names = [key, 'f' + s, 'fix_' + s]
- val = _get_fixed_fit_value(kwds, names)
- if val is not None:
- kwds[key] = val
- args = list(args)
- Nargs = len(args)
- fixedn = []
- names = [f'f{n}' for n in range(Nargs - 2)] + ['floc', 'fscale']
- x0 = []
- for n, key in enumerate(names):
- if key in kwds:
- fixedn.append(n)
- args[n] = kwds.pop(key)
- else:
- x0.append(args[n])
- methods = {"mle", "mm"}
- method = kwds.pop('method', "mle").lower()
- if method == "mm":
- n_params = len(shapes) + 2 - len(fixedn)
- exponents = (np.arange(1, n_params+1))[:, np.newaxis]
- data_moments = np.sum(data[None, :]**exponents/len(data), axis=1)
- def objective(theta, x):
- return self._moment_error(theta, x, data_moments)
- elif method == "mle":
- objective = self._penalized_nnlf
- else:
- raise ValueError(f"Method '{method}' not available; "
- f"must be one of {methods}")
- if len(fixedn) == 0:
- func = objective
- restore = None
- else:
- if len(fixedn) == Nargs:
- raise ValueError(
- "All parameters fixed. There is nothing to optimize.")
- def restore(args, theta):
- # Replace with theta for all numbers not in fixedn
- # This allows the non-fixed values to vary, but
- # we still call self.nnlf with all parameters.
- i = 0
- for n in range(Nargs):
- if n not in fixedn:
- args[n] = theta[i]
- i += 1
- return args
- def func(theta, x):
- newtheta = restore(args[:], theta)
- return objective(newtheta, x)
- return x0, func, restore, args
- def _moment_error(self, theta, x, data_moments):
- loc, scale, args = self._unpack_loc_scale(theta)
- if not self._argcheck(*args) or scale <= 0:
- return inf
- dist_moments = np.array([self.moment(i+1, *args, loc=loc, scale=scale)
- for i in range(len(data_moments))])
- if np.any(np.isnan(dist_moments)):
- raise ValueError("Method of moments encountered a non-finite "
- "distribution moment and cannot continue. "
- "Consider trying method='MLE'.")
- return (((data_moments - dist_moments) /
- np.maximum(np.abs(data_moments), 1e-8))**2).sum()
- def fit(self, data, *args, **kwds):
- r"""
- Return estimates of shape (if applicable), location, and scale
- parameters from data. The default estimation method is Maximum
- Likelihood Estimation (MLE), but Method of Moments (MM)
- is also available.
- Starting estimates for the fit are given by input arguments;
- for any arguments not provided with starting estimates,
- ``self._fitstart(data)`` is called to generate such.
- One can hold some parameters fixed to specific values by passing in
- keyword arguments ``f0``, ``f1``, ..., ``fn`` (for shape parameters)
- and ``floc`` and ``fscale`` (for location and scale parameters,
- respectively).
- Parameters
- ----------
- data : array_like or `CensoredData` instance
- Data to use in estimating the distribution parameters.
- arg1, arg2, arg3,... : floats, optional
- Starting value(s) for any shape-characterizing arguments (those not
- provided will be determined by a call to ``_fitstart(data)``).
- No default value.
- **kwds : floats, optional
- - `loc`: initial guess of the distribution's location parameter.
- - `scale`: initial guess of the distribution's scale parameter.
- Special keyword arguments are recognized as holding certain
- parameters fixed:
- - f0, ..., fn : hold respective shape parameters fixed.
- Alternatively, shape parameters to fix can be specified by name.
- For example, if ``self.shapes == "a, b"``, ``fa`` and ``fix_a``
- are equivalent to ``f0``, and ``fb`` and ``fix_b`` are
- equivalent to ``f1``.
- - floc : hold location parameter fixed to specified value.
- - fscale : hold scale parameter fixed to specified value.
- - optimizer : The optimizer to use. The optimizer must take
- ``func`` and starting position as the first two arguments,
- plus ``args`` (for extra arguments to pass to the
- function to be optimized) and ``disp``.
- The ``fit`` method calls the optimizer with ``disp=0`` to suppress output.
- The optimizer must return the estimated parameters.
- - method : The method to use. The default is "MLE" (Maximum
- Likelihood Estimate); "MM" (Method of Moments)
- is also available.
- Raises
- ------
- TypeError, ValueError
- If an input is invalid
- `~scipy.stats.FitError`
- If fitting fails or the fit produced would be invalid
- Returns
- -------
- parameter_tuple : tuple of floats
- Estimates for any shape parameters (if applicable), followed by
- those for location and scale. For most random variables, shape
- statistics will be returned, but there are exceptions (e.g.
- ``norm``).
- Notes
- -----
- With ``method="MLE"`` (default), the fit is computed by minimizing
- the negative log-likelihood function. A large, finite penalty
- (rather than infinite negative log-likelihood) is applied for
- observations beyond the support of the distribution.
- With ``method="MM"``, the fit is computed by minimizing the L2 norm
- of the relative errors between the first *k* raw (about zero) data
- moments and the corresponding distribution moments, where *k* is the
- number of non-fixed parameters.
- More precisely, the objective function is::
- (((data_moments - dist_moments)
- / np.maximum(np.abs(data_moments), 1e-8))**2).sum()
- where the constant ``1e-8`` avoids division by zero in case of
- vanishing data moments. Typically, this error norm can be reduced to
- zero.
- Note that the standard method of moments can produce parameters for
- which some data are outside the support of the fitted distribution;
- this implementation does nothing to prevent this.
- For either method,
- the returned answer is not guaranteed to be globally optimal; it
- may only be locally optimal, or the optimization may fail altogether.
- If the data contain any of ``np.nan``, ``np.inf``, or ``-np.inf``,
- the `fit` method will raise a ``RuntimeError``.
- When passing a ``CensoredData`` instance to ``data``, the log-likelihood
- function is defined as:
- .. math::
- l(\pmb{\theta}; k) & = \sum
- \log(f(k_u; \pmb{\theta}))
- + \sum
- \log(F(k_l; \pmb{\theta})) \\
- & + \sum
- \log(1 - F(k_r; \pmb{\theta})) \\
- & + \sum
- \log(F(k_{\text{high}, i}; \pmb{\theta})
- - F(k_{\text{low}, i}; \pmb{\theta}))
- where :math:`f` and :math:`F` are the pdf and cdf, respectively, of the
- function being fitted, :math:`\pmb{\theta}` is the parameter vector,
- :math:`u` are the indices of uncensored observations,
- :math:`l` are the indices of left-censored observations,
- :math:`r` are the indices of right-censored observations,
- subscripts "low"/"high" denote endpoints of interval-censored observations, and
- :math:`i` are the indices of interval-censored observations.
- Examples
- --------
- Generate some data to fit: draw random variates from the `beta`
- distribution
- >>> import numpy as np
- >>> from scipy.stats import beta
- >>> a, b = 1., 2.
- >>> rng = np.random.default_rng(172786373191770012695001057628748821561)
- >>> x = beta.rvs(a, b, size=1000, random_state=rng)
- Now we can fit all four parameters (``a``, ``b``, ``loc`` and
- ``scale``):
- >>> a1, b1, loc1, scale1 = beta.fit(x)
- >>> a1, b1, loc1, scale1
- (1.0198945204435628, 1.9484708982737828, 4.372241314917588e-05, 0.9979078845964814)
- The fit can be done also using a custom optimizer:
- >>> from scipy.optimize import minimize
- >>> def custom_optimizer(func, x0, args=(), disp=0):
- ... res = minimize(func, x0, args, method="slsqp", options={"disp": disp})
- ... if res.success:
- ... return res.x
- ... raise RuntimeError('optimization routine failed')
- >>> a1, b1, loc1, scale1 = beta.fit(x, method="MLE", optimizer=custom_optimizer)
- >>> a1, b1, loc1, scale1
- (1.0198821087258905, 1.948484145914738, 4.3705304486881485e-05, 0.9979104663953395)
- We can also use some prior knowledge about the dataset: let's keep
- ``loc`` and ``scale`` fixed:
- >>> a1, b1, loc1, scale1 = beta.fit(x, floc=0, fscale=1)
- >>> loc1, scale1
- (0, 1)
- We can also keep shape parameters fixed by using ``f``-keywords. To
- keep the zero-th shape parameter ``a`` equal 1, use ``f0=1`` or,
- equivalently, ``fa=1``:
- >>> a1, b1, loc1, scale1 = beta.fit(x, fa=1, floc=0, fscale=1)
- >>> a1
- 1
- Not all distributions return estimates for the shape parameters.
- ``norm`` for example just returns estimates for location and scale:
- >>> from scipy.stats import norm
- >>> x = norm.rvs(a, b, size=1000, random_state=123)
- >>> loc1, scale1 = norm.fit(x)
- >>> loc1, scale1
- (0.92087172783841631, 2.0015750750324668)
- """ # noqa: E501
- method = kwds.get('method', "mle").lower()
- censored = isinstance(data, CensoredData)
- if censored:
- if method != 'mle':
- raise ValueError('For censored data, the method must'
- ' be "MLE".')
- if data.num_censored() == 0:
- # There are no censored values in data, so replace the
- # CensoredData instance with a regular array.
- data = data._uncensored
- censored = False
- Narg = len(args)
- if Narg > self.numargs:
- raise TypeError("Too many input arguments.")
- # Check the finiteness of data only if data is not an instance of
- # CensoredData. The arrays in a CensoredData instance have already
- # been validated.
- if not censored:
- # Note: `ravel()` is called for backwards compatibility.
- data = np.asarray(data).ravel()
- if not np.isfinite(data).all():
- raise ValueError("The data contains non-finite values.")
- start = [None]*2
- if (Narg < self.numargs) or not ('loc' in kwds and
- 'scale' in kwds):
- # get distribution specific starting locations
- start = self._fitstart(data)
- args += start[Narg:-2]
- loc = kwds.pop('loc', start[-2])
- scale = kwds.pop('scale', start[-1])
- args += (loc, scale)
- x0, func, restore, args = self._reduce_func(args, kwds, data=data)
- optimizer = kwds.pop('optimizer', optimize.fmin)
- # convert string to function in scipy.optimize
- optimizer = _fit_determine_optimizer(optimizer)
- # by now kwds must be empty, since everybody took what they needed
- if kwds:
- raise TypeError(f"Unknown arguments: {kwds}.")
- # In some cases, method of moments can be done with fsolve/root
- # instead of an optimizer, but sometimes no solution exists,
- # especially when the user fixes parameters. Minimizing the sum
- # of squares of the error generalizes to these cases.
- vals = optimizer(func, x0, args=(data,), disp=0)
- obj = func(vals, data)
- if restore is not None:
- vals = restore(args, vals)
- vals = tuple(vals)
- loc, scale, shapes = self._unpack_loc_scale(vals)
- if not (np.all(self._argcheck(*shapes)) and scale > 0):
- raise FitError("Optimization converged to parameters that are "
- "outside the range allowed by the distribution.")
- if method == 'mm':
- if not np.isfinite(obj):
- raise FitError("Optimization failed: either a data moment "
- "or fitted distribution moment is "
- "non-finite.")
- return vals
- def _fit_loc_scale_support(self, data, *args):
- """Estimate loc and scale parameters from data accounting for support.
- Parameters
- ----------
- data : array_like
- Data to fit.
- arg1, arg2, arg3,... : array_like
- The shape parameter(s) for the distribution (see docstring of the
- instance object for more information).
- Returns
- -------
- Lhat : float
- Estimated location parameter for the data.
- Shat : float
- Estimated scale parameter for the data.
- """
- if isinstance(data, CensoredData):
- # For this estimate, "uncensor" the data by taking the
- # given endpoints as the data for the left- or right-censored
- # data, and the mean for the interval-censored data.
- data = data._uncensor()
- else:
- data = np.asarray(data)
- # Estimate location and scale according to the method of moments.
- loc_hat, scale_hat = self.fit_loc_scale(data, *args)
- # Compute the support according to the shape parameters.
- self._argcheck(*args)
- _a, _b = self._get_support(*args)
- a, b = _a, _b
- support_width = b - a
- # If the support is empty then return the moment-based estimates.
- if support_width <= 0:
- return loc_hat, scale_hat
- # Compute the proposed support according to the loc and scale
- # estimates.
- a_hat = loc_hat + a * scale_hat
- b_hat = loc_hat + b * scale_hat
- # Use the moment-based estimates if they are compatible with the data.
- data_a = np.min(data)
- data_b = np.max(data)
- if a_hat < data_a and data_b < b_hat:
- return loc_hat, scale_hat
- # Otherwise find other estimates that are compatible with the data.
- data_width = data_b - data_a
- rel_margin = 0.1
- margin = data_width * rel_margin
- # For a finite interval, both the location and scale
- # should have interesting values.
- if support_width < np.inf:
- loc_hat = (data_a - a) - margin
- scale_hat = (data_width + 2 * margin) / support_width
- return loc_hat, scale_hat
- # For a one-sided interval, use only an interesting location parameter.
- if a > -np.inf:
- return (data_a - a) - margin, 1
- elif b < np.inf:
- return (data_b - b) + margin, 1
- else:
- raise RuntimeError
- def fit_loc_scale(self, data, *args):
- """
- Estimate loc and scale parameters from data using 1st and 2nd moments.
- Parameters
- ----------
- data : array_like
- Data to fit.
- arg1, arg2, arg3,... : array_like
- The shape parameter(s) for the distribution (see docstring of the
- instance object for more information).
- Returns
- -------
- Lhat : float
- Estimated location parameter for the data.
- Shat : float
- Estimated scale parameter for the data.
- """
- mu, mu2 = self.stats(*args, **{'moments': 'mv'})
- tmp = asarray(data)
- muhat = tmp.mean()
- mu2hat = tmp.var()
- Shat = sqrt(mu2hat / mu2)
- with np.errstate(invalid='ignore'):
- Lhat = muhat - Shat*mu
- if not np.isfinite(Lhat):
- Lhat = 0
- if not (np.isfinite(Shat) and (0 < Shat)):
- Shat = 1
- return Lhat, Shat
- def _entropy(self, *args):
- def integ(x):
- val = self._pdf(x, *args)
- return entr(val)
- # upper limit is often inf, so suppress warnings when integrating
- _a, _b = self._get_support(*args)
- with np.errstate(over='ignore'):
- h = integrate.quad(integ, _a, _b)[0]
- if not np.isnan(h):
- return h
- else:
- # try with different limits if integration problems
- low, upp = self.ppf([1e-10, 1. - 1e-10], *args)
- if np.isinf(_b):
- upper = upp
- else:
- upper = _b
- if np.isinf(_a):
- lower = low
- else:
- lower = _a
- return integrate.quad(integ, lower, upper)[0]
- def expect(self, func=None, args=(), loc=0, scale=1, lb=None, ub=None,
- conditional=False, **kwds):
- """Calculate expected value of a function with respect to the
- distribution by numerical integration.
- The expected value of a function ``f(x)`` with respect to a
- distribution ``dist`` is defined as::
- ub
- E[f(x)] = Integral(f(x) * dist.pdf(x)),
- lb
- where ``ub`` and ``lb`` are arguments and ``x`` has the ``dist.pdf(x)``
- distribution. If the bounds ``lb`` and ``ub`` correspond to the
- support of the distribution, e.g. ``[-inf, inf]`` in the default
- case, then the integral is the unrestricted expectation of ``f(x)``.
- Also, the function ``f(x)`` may be defined such that ``f(x)`` is ``0``
- outside a finite interval in which case the expectation is
- calculated within the finite range ``[lb, ub]``.
- Parameters
- ----------
- func : callable, optional
- Function for which integral is calculated. Takes only one argument.
- The default is the identity mapping f(x) = x.
- args : tuple, optional
- Shape parameters of the distribution.
- loc : float, optional
- Location parameter (default=0).
- scale : float, optional
- Scale parameter (default=1).
- lb, ub : scalar, optional
- Lower and upper bound for integration. Default is set to the
- support of the distribution.
- conditional : bool, optional
- If True, the integral is corrected by the conditional probability
- of the integration interval. The return value is the expectation
- of the function, conditional on being in the given interval.
- Default is False.
- Additional keyword arguments are passed to the integration routine.
- Returns
- -------
- expect : float
- The calculated expected value.
- Notes
- -----
- The integration behavior of this function is inherited from
- `scipy.integrate.quad`. Neither this function nor
- `scipy.integrate.quad` can verify whether the integral exists or is
- finite. For example ``cauchy(0).mean()`` returns ``np.nan`` and
- ``cauchy(0).expect()`` returns ``0.0``.
- Likewise, the accuracy of results is not verified by the function.
- `scipy.integrate.quad` is typically reliable for integrals that are
- numerically favorable, but it is not guaranteed to converge
- to a correct value for all possible intervals and integrands. This
- function is provided for convenience; for critical applications,
- check results against other integration methods.
- The function is not vectorized.
- Examples
- --------
- To understand the effect of the bounds of integration consider
- >>> from scipy.stats import expon
- >>> expon(1).expect(lambda x: 1, lb=0.0, ub=2.0)
- 0.6321205588285578
- This is close to
- >>> expon(1).cdf(2.0) - expon(1).cdf(0.0)
- 0.6321205588285577
- If ``conditional=True``
- >>> expon(1).expect(lambda x: 1, lb=0.0, ub=2.0, conditional=True)
- 1.0000000000000002
- The slight deviation from 1 is due to numerical integration.
- The integrand can be treated as a complex-valued function
- by passing ``complex_func=True`` to `scipy.integrate.quad` .
- >>> import numpy as np
- >>> from scipy.stats import vonmises
- >>> res = vonmises(loc=2, kappa=1).expect(lambda x: np.exp(1j*x),
- ... complex_func=True)
- >>> res
- (-0.18576377217422957+0.40590124735052263j)
- >>> np.angle(res) # location of the (circular) distribution
- 2.0
- """
- lockwds = {'loc': loc,
- 'scale': scale}
- self._argcheck(*args)
- _a, _b = self._get_support(*args)
- if func is None:
- def fun(x, *args):
- return x * self.pdf(x, *args, **lockwds)
- else:
- def fun(x, *args):
- return func(x) * self.pdf(x, *args, **lockwds)
- if lb is None:
- lb = loc + _a * scale
- if ub is None:
- ub = loc + _b * scale
- cdf_bounds = self.cdf([lb, ub], *args, **lockwds)
- invfac = cdf_bounds[1] - cdf_bounds[0]
- kwds['args'] = args
- # split interval to help integrator w/ infinite support; see gh-8928
- alpha = 0.05 # split body from tails at probability mass `alpha`
- inner_bounds = np.array([alpha, 1-alpha])
- cdf_inner_bounds = cdf_bounds[0] + invfac * inner_bounds
- c, d = loc + self._ppf(cdf_inner_bounds, *args) * scale
- # Do not silence warnings from integration.
- lbc = integrate.quad(fun, lb, c, **kwds)[0]
- cd = integrate.quad(fun, c, d, **kwds)[0]
- dub = integrate.quad(fun, d, ub, **kwds)[0]
- vals = (lbc + cd + dub)
- if conditional:
- vals /= invfac
- return np.array(vals)[()] # make it a numpy scalar like other methods
- def _param_info(self):
- shape_info = self._shape_info()
- loc_info = _ShapeInfo("loc", False, (-np.inf, np.inf), (False, False))
- scale_info = _ShapeInfo("scale", False, (0, np.inf), (False, False))
- param_info = shape_info + [loc_info, scale_info]
- return param_info
- # For now, _delta_cdf is a private method.
- def _delta_cdf(self, x1, x2, *args, loc=0, scale=1):
- """
- Compute CDF(x2) - CDF(x1).
- Where x1 is greater than the median, compute SF(x1) - SF(x2),
- otherwise compute CDF(x2) - CDF(x1).
- This function is only useful if `dist.sf(x, ...)` has an implementation
- that is numerically more accurate than `1 - dist.cdf(x, ...)`.
- """
- cdf1 = self.cdf(x1, *args, loc=loc, scale=scale)
- # Possible optimizations (needs investigation-these might not be
- # better):
- # * Use xpx.apply_where instead of np.where
- # * Instead of cdf1 > 0.5, compare x1 to the median.
- result = np.where(cdf1 > 0.5,
- (self.sf(x1, *args, loc=loc, scale=scale)
- - self.sf(x2, *args, loc=loc, scale=scale)),
- self.cdf(x2, *args, loc=loc, scale=scale) - cdf1)
- if result.ndim == 0:
- result = result[()]
- return result
- # Helpers for the discrete distributions
- def _drv2_moment(self, n, *args):
- """Non-central moment of discrete distribution."""
- def fun(x):
- return np.power(x, n) * self._pmf(x, *args)
- _a, _b = self._get_support(*args)
- return _expect(fun, _a, _b, self._ppf(0.5, *args), self.inc)
- def _drv2_ppfsingle(self, q, *args): # Use basic bisection algorithm
- _a, _b = self._get_support(*args)
- b = _b
- a = _a
- step = 10
- if isinf(b): # Be sure ending point is > q
- b = float(max(100*q, 10))
- while 1:
- if b >= _b:
- qb = 1.0
- break
- qb = self._cdf(b, *args)
- if (qb < q):
- b += step
- step *= 2
- else:
- break
- else:
- qb = 1.0
- step = 10
- if isinf(a): # be sure starting point < q
- a = float(min(-100*q, -10))
- while 1:
- if a <= _a:
- qb = 0.0
- break
- qa = self._cdf(a, *args)
- if (qa > q):
- a -= step
- step *= 2
- else:
- break
- else:
- qa = self._cdf(a, *args)
- if np.isinf(a) or np.isinf(b):
- message = "Arguments that bracket the requested quantile could not be found."
- raise RuntimeError(message)
- # maximum number of bisections within the normal float64s
- # maxiter = int(np.log2(finfo.max) - np.log2(finfo.smallest_normal))
- maxiter = 2046
- for i in range(maxiter):
- if (qa == q):
- return a
- if (qb == q):
- return b
- if b <= a+1:
- if qa > q:
- return a
- else:
- return b
- c = int((a+b)/2.0)
- qc = self._cdf(c, *args)
- if (qc < q):
- if a != c:
- a = c
- else:
- raise RuntimeError('updating stopped, endless loop')
- qa = qc
- elif (qc > q):
- if b != c:
- b = c
- else:
- raise RuntimeError('updating stopped, endless loop')
- qb = qc
- else:
- return c
- # Must over-ride one of _pmf or _cdf or pass in
- # x_k, p(x_k) lists in initialization
- class rv_discrete(rv_generic):
- """A generic discrete random variable class meant for subclassing.
- `rv_discrete` is a base class to construct specific distribution classes
- and instances for discrete random variables. It can also be used
- to construct an arbitrary distribution defined by a list of support
- points and corresponding probabilities.
- Parameters
- ----------
- a : float, optional
- Lower bound of the support of the distribution, default: 0
- b : float, optional
- Upper bound of the support of the distribution, default: plus infinity
- moment_tol : float, optional
- The tolerance for the generic calculation of moments.
- values : tuple of two array_like, optional
- ``(xk, pk)`` where ``xk`` are integers and ``pk`` are the non-zero
- probabilities between 0 and 1 with ``sum(pk) = 1``. ``xk``
- and ``pk`` must have the same shape, and ``xk`` must be unique.
- inc : integer, optional
- Increment for the support of the distribution.
- Default is 1. (other values have not been tested)
- badvalue : float, optional
- The value in a result arrays that indicates a value that for which
- some argument restriction is violated, default is np.nan.
- name : str, optional
- The name of the instance. This string is used to construct the default
- example for distributions.
- longname : str, optional
- This string is used as part of the first line of the docstring returned
- when a subclass has no docstring of its own. Note: `longname` exists
- for backwards compatibility, do not use for new subclasses.
- shapes : str, optional
- The shape of the distribution. For example "m, n" for a distribution
- that takes two integers as the two shape arguments for all its methods
- If not provided, shape parameters will be inferred from
- the signatures of the private methods, ``_pmf`` and ``_cdf`` of
- the instance.
- seed : {None, int, `numpy.random.Generator`, `numpy.random.RandomState`}, optional
- If `seed` is None (or `np.random`), the `numpy.random.RandomState`
- singleton is used.
- If `seed` is an int, a new ``RandomState`` instance is used,
- seeded with `seed`.
- If `seed` is already a ``Generator`` or ``RandomState`` instance then
- that instance is used.
- Attributes
- ----------
- a, b : float, optional
- Lower/upper bound of the support of the unshifted/unscaled distribution.
- This value is unaffected by the `loc` and `scale` parameters.
- To calculate the support of the shifted/scaled distribution,
- use the `support` method.
- Methods
- -------
- rvs
- pmf
- logpmf
- cdf
- logcdf
- sf
- logsf
- ppf
- isf
- moment
- stats
- entropy
- expect
- median
- mean
- std
- var
- interval
- __call__
- support
- Notes
- -----
- This class is similar to `rv_continuous`. Whether a shape parameter is
- valid is decided by an ``_argcheck`` method (which defaults to checking
- that its arguments are strictly positive.)
- The main differences are as follows.
- - The support of the distribution is a set of integers.
- - Instead of the probability density function, ``pdf`` (and the
- corresponding private ``_pdf``), this class defines the
- *probability mass function*, `pmf` (and the corresponding
- private ``_pmf``.)
- - There is no ``scale`` parameter.
- - The default implementations of methods (e.g. ``_cdf``) are not designed
- for distributions with support that is unbounded below (i.e.
- ``a=-np.inf``), so they must be overridden.
- To create a new discrete distribution, we would do the following:
- >>> from scipy.stats import rv_discrete
- >>> class poisson_gen(rv_discrete):
- ... "Poisson distribution"
- ... def _pmf(self, k, mu):
- ... return exp(-mu) * mu**k / factorial(k)
- and create an instance::
- >>> poisson = poisson_gen(name="poisson")
- Note that above we defined the Poisson distribution in the standard form.
- Shifting the distribution can be done by providing the ``loc`` parameter
- to the methods of the instance. For example, ``poisson.pmf(x, mu, loc)``
- delegates the work to ``poisson._pmf(x-loc, mu)``.
- **Discrete distributions from a list of probabilities**
- Alternatively, you can construct an arbitrary discrete rv defined
- on a finite set of values ``xk`` with ``Prob{X=xk} = pk`` by using the
- ``values`` keyword argument to the `rv_discrete` constructor.
- **Deepcopying / Pickling**
- If a distribution or frozen distribution is deepcopied (pickled/unpickled,
- etc.), any underlying random number generator is deepcopied with it. An
- implication is that if a distribution relies on the singleton RandomState
- before copying, it will rely on a copy of that random state after copying,
- and ``np.random.seed`` will no longer control the state.
- Examples
- --------
- Custom made discrete distribution:
- >>> import numpy as np
- >>> from scipy import stats
- >>> xk = np.arange(7)
- >>> pk = (0.1, 0.2, 0.3, 0.1, 0.1, 0.0, 0.2)
- >>> custm = stats.rv_discrete(name='custm', values=(xk, pk))
- >>>
- >>> import matplotlib.pyplot as plt
- >>> fig, ax = plt.subplots(1, 1)
- >>> ax.plot(xk, custm.pmf(xk), 'ro', ms=12, mec='r')
- >>> ax.vlines(xk, 0, custm.pmf(xk), colors='r', lw=4)
- >>> plt.show()
- Random number generation:
- >>> R = custm.rvs(size=100)
- """
- def __new__(cls, a=0, b=inf, name=None, badvalue=None,
- moment_tol=1e-8, values=None, inc=1, longname=None,
- shapes=None, seed=None):
- if values is not None:
- # dispatch to a subclass
- return super().__new__(rv_sample)
- else:
- # business as usual
- return super().__new__(cls)
- def __init__(self, a=0, b=inf, name=None, badvalue=None,
- moment_tol=1e-8, values=None, inc=1, longname=None,
- shapes=None, seed=None):
- super().__init__(seed)
- # cf generic freeze
- self._ctor_param = dict(
- a=a, b=b, name=name, badvalue=badvalue,
- moment_tol=moment_tol, values=values, inc=inc,
- longname=longname, shapes=shapes, seed=seed)
- if badvalue is None:
- badvalue = nan
- self.badvalue = badvalue
- self.a = a
- self.b = b
- self.moment_tol = moment_tol
- self.inc = inc
- self.shapes = shapes
- if values is not None:
- raise ValueError("rv_discrete.__init__(..., values != None, ...)")
- self._construct_argparser(meths_to_inspect=[self._pmf, self._cdf],
- locscale_in='loc=0',
- # scale=1 for discrete RVs
- locscale_out='loc, 1')
- self._attach_methods()
- self._construct_docstrings(name, longname)
- def __getstate__(self):
- dct = self.__dict__.copy()
- # these methods will be remade in __setstate__
- attrs = ["_parse_args", "_parse_args_stats", "_parse_args_rvs",
- "_cdfvec", "_ppfvec", "generic_moment"]
- [dct.pop(attr, None) for attr in attrs]
- return dct
- def _attach_methods(self):
- """Attaches dynamically created methods to the rv_discrete instance."""
- self._cdfvec = vectorize(self._cdf_single, otypes='d')
- self.vecentropy = vectorize(self._entropy)
- # _attach_methods is responsible for calling _attach_argparser_methods
- self._attach_argparser_methods()
- # nin correction needs to be after we know numargs
- # correct nin for generic moment vectorization
- _vec_generic_moment = vectorize(_drv2_moment, otypes='d')
- _vec_generic_moment.nin = self.numargs + 2
- self.generic_moment = types.MethodType(_vec_generic_moment, self)
- # correct nin for ppf vectorization
- _vppf = vectorize(_drv2_ppfsingle, otypes='d')
- _vppf.nin = self.numargs + 2
- self._ppfvec = types.MethodType(_vppf, self)
- # now that self.numargs is defined, we can adjust nin
- self._cdfvec.nin = self.numargs + 1
- def _construct_docstrings(self, name, longname):
- if name is None:
- name = 'Distribution'
- self.name = name
- # generate docstring for subclass instances
- if longname is None:
- if name[0] in ['aeiouAEIOU']:
- hstr = "An "
- else:
- hstr = "A "
- longname = hstr + name
- if sys.flags.optimize < 2:
- # Skip adding docstrings if interpreter is run with -OO
- if self.__doc__ is None:
- self._construct_default_doc(longname=longname,
- docdict=docdict_discrete,
- discrete='discrete')
- else:
- dct = dict(distdiscrete)
- self._construct_doc(docdict_discrete, dct.get(self.name))
- # discrete RV do not have the scale parameter, remove it
- self.__doc__ = self.__doc__.replace(
- '\n scale : array_like, '
- 'optional\n scale parameter (default=1)', '')
- def _updated_ctor_param(self):
- """Return the current version of _ctor_param, possibly updated by user.
- Used by freezing.
- Keep this in sync with the signature of __init__.
- """
- dct = self._ctor_param.copy()
- dct['a'] = self.a
- dct['b'] = self.b
- dct['badvalue'] = self.badvalue
- dct['moment_tol'] = self.moment_tol
- dct['inc'] = self.inc
- dct['name'] = self.name
- dct['shapes'] = self.shapes
- return dct
- def _nonzero(self, k, *args):
- return floor(k) == k
- def _pmf(self, k, *args):
- return self._cdf(k, *args) - self._cdf(k-1, *args)
- def _logpmf(self, k, *args):
- with np.errstate(divide='ignore'):
- return log(self._pmf(k, *args))
- def _logpxf(self, k, *args):
- # continuous distributions have PDF, discrete have PMF, but sometimes
- # the distinction doesn't matter. This lets us use `_logpxf` for both
- # discrete and continuous distributions.
- return self._logpmf(k, *args)
- def _unpack_loc_scale(self, theta):
- try:
- loc = theta[-1]
- scale = 1
- args = tuple(theta[:-1])
- except IndexError as e:
- raise ValueError("Not enough input arguments.") from e
- return loc, scale, args
- def _cdf_single(self, k, *args):
- _a, _b = self._get_support(*args)
- m = arange(int(_a), k+1)
- return np.sum(self._pmf(m, *args), axis=0)
- def _cdf(self, x, *args):
- k = floor(x).astype(np.float64)
- return self._cdfvec(k, *args)
- # generic _logcdf, _sf, _logsf, _ppf, _isf, _rvs defined in rv_generic
- def rvs(self, *args, **kwargs):
- """Random variates of given type.
- Parameters
- ----------
- arg1, arg2, arg3,... : array_like
- The shape parameter(s) for the distribution (see docstring of the
- instance object for more information).
- loc : array_like, optional
- Location parameter (default=0).
- size : int or tuple of ints, optional
- Defining number of random variates (Default is 1). Note that `size`
- has to be given as keyword, not as positional argument.
- random_state : {None, int, `numpy.random.Generator`,
- `numpy.random.RandomState`}, optional
- If `random_state` is None (or `np.random`), the
- `numpy.random.RandomState` singleton is used.
- If `random_state` is an int, a new ``RandomState`` instance is
- used, seeded with `random_state`.
- If `random_state` is already a ``Generator`` or ``RandomState``
- instance, that instance is used.
- Returns
- -------
- rvs : ndarray or scalar
- Random variates of given `size`.
- """
- kwargs['discrete'] = True
- return super().rvs(*args, **kwargs)
- def pmf(self, k, *args, **kwds):
- """Probability mass function at k of the given RV.
- Parameters
- ----------
- k : array_like
- Quantiles.
- arg1, arg2, arg3,... : array_like
- The shape parameter(s) for the distribution (see docstring of the
- instance object for more information)
- loc : array_like, optional
- Location parameter (default=0).
- Returns
- -------
- pmf : array_like
- Probability mass function evaluated at k
- """
- args, loc, _ = self._parse_args(*args, **kwds)
- k, loc = map(asarray, (k, loc))
- args = tuple(map(asarray, args))
- _a, _b = self._get_support(*args)
- k = asarray(k-loc)
- cond0 = self._argcheck(*args)
- cond1 = (k >= _a) & (k <= _b)
- if not isinstance(self, rv_sample):
- cond1 = cond1 & self._nonzero(k, *args)
- cond = cond0 & cond1
- output = zeros(shape(cond), 'd')
- place(output, (1-cond0) + np.isnan(k), self.badvalue)
- if np.any(cond):
- goodargs = argsreduce(cond, *((k,)+args))
- place(output, cond, np.clip(self._pmf(*goodargs), 0, 1))
- if output.ndim == 0:
- return output[()]
- return output
- def logpmf(self, k, *args, **kwds):
- """Log of the probability mass function at k of the given RV.
- Parameters
- ----------
- k : array_like
- Quantiles.
- arg1, arg2, arg3,... : array_like
- The shape parameter(s) for the distribution (see docstring of the
- instance object for more information).
- loc : array_like, optional
- Location parameter. Default is 0.
- Returns
- -------
- logpmf : array_like
- Log of the probability mass function evaluated at k.
- """
- args, loc, _ = self._parse_args(*args, **kwds)
- k, loc = map(asarray, (k, loc))
- args = tuple(map(asarray, args))
- _a, _b = self._get_support(*args)
- k = asarray(k-loc)
- cond0 = self._argcheck(*args)
- cond1 = (k >= _a) & (k <= _b)
- if not isinstance(self, rv_sample):
- cond1 = cond1 & self._nonzero(k, *args)
- cond = cond0 & cond1
- output = empty(shape(cond), 'd')
- output.fill(-inf)
- place(output, (1-cond0) + np.isnan(k), self.badvalue)
- if np.any(cond):
- goodargs = argsreduce(cond, *((k,)+args))
- place(output, cond, self._logpmf(*goodargs))
- if output.ndim == 0:
- return output[()]
- return output
- def cdf(self, k, *args, **kwds):
- """Cumulative distribution function of the given RV.
- Parameters
- ----------
- k : array_like, int
- Quantiles.
- arg1, arg2, arg3,... : array_like
- The shape parameter(s) for the distribution (see docstring of the
- instance object for more information).
- loc : array_like, optional
- Location parameter (default=0).
- Returns
- -------
- cdf : ndarray
- Cumulative distribution function evaluated at `k`.
- """
- args, loc, _ = self._parse_args(*args, **kwds)
- k, loc = map(asarray, (k, loc))
- args = tuple(map(asarray, args))
- _a, _b = self._get_support(*args)
- k = asarray(k-loc)
- cond0 = self._argcheck(*args)
- cond1 = (k >= _a) & (k < _b)
- cond2 = (k >= _b)
- cond3 = np.isneginf(k)
- cond = cond0 & cond1 & np.isfinite(k)
- output = zeros(shape(cond), 'd')
- place(output, cond2*(cond0 == cond0), 1.0)
- place(output, cond3*(cond0 == cond0), 0.0)
- place(output, (1-cond0) + np.isnan(k), self.badvalue)
- if np.any(cond):
- goodargs = argsreduce(cond, *((k,)+args))
- place(output, cond, np.clip(self._cdf(*goodargs), 0, 1))
- if output.ndim == 0:
- return output[()]
- return output
- def logcdf(self, k, *args, **kwds):
- """Log of the cumulative distribution function at k of the given RV.
- Parameters
- ----------
- k : array_like, int
- Quantiles.
- arg1, arg2, arg3,... : array_like
- The shape parameter(s) for the distribution (see docstring of the
- instance object for more information).
- loc : array_like, optional
- Location parameter (default=0).
- Returns
- -------
- logcdf : array_like
- Log of the cumulative distribution function evaluated at k.
- """
- args, loc, _ = self._parse_args(*args, **kwds)
- k, loc = map(asarray, (k, loc))
- args = tuple(map(asarray, args))
- _a, _b = self._get_support(*args)
- k = asarray(k-loc)
- cond0 = self._argcheck(*args)
- cond1 = (k >= _a) & (k < _b)
- cond2 = (k >= _b)
- cond = cond0 & cond1
- output = empty(shape(cond), 'd')
- output.fill(-inf)
- place(output, (1-cond0) + np.isnan(k), self.badvalue)
- place(output, cond2*(cond0 == cond0), 0.0)
- if np.any(cond):
- goodargs = argsreduce(cond, *((k,)+args))
- place(output, cond, self._logcdf(*goodargs))
- if output.ndim == 0:
- return output[()]
- return output
- def sf(self, k, *args, **kwds):
- """Survival function (1 - `cdf`) at k of the given RV.
- Parameters
- ----------
- k : array_like
- Quantiles.
- arg1, arg2, arg3,... : array_like
- The shape parameter(s) for the distribution (see docstring of the
- instance object for more information).
- loc : array_like, optional
- Location parameter (default=0).
- Returns
- -------
- sf : array_like
- Survival function evaluated at k.
- """
- args, loc, _ = self._parse_args(*args, **kwds)
- k, loc = map(asarray, (k, loc))
- args = tuple(map(asarray, args))
- _a, _b = self._get_support(*args)
- k = asarray(k-loc)
- cond0 = self._argcheck(*args)
- cond1 = (k >= _a) & (k < _b)
- cond2 = ((k < _a) | np.isneginf(k)) & cond0
- cond = cond0 & cond1 & np.isfinite(k)
- output = zeros(shape(cond), 'd')
- place(output, (1-cond0) + np.isnan(k), self.badvalue)
- place(output, cond2, 1.0)
- if np.any(cond):
- goodargs = argsreduce(cond, *((k,)+args))
- place(output, cond, np.clip(self._sf(*goodargs), 0, 1))
- if output.ndim == 0:
- return output[()]
- return output
- def logsf(self, k, *args, **kwds):
- """Log of the survival function of the given RV.
- Returns the log of the "survival function," defined as 1 - `cdf`,
- evaluated at `k`.
- Parameters
- ----------
- k : array_like
- Quantiles.
- arg1, arg2, arg3,... : array_like
- The shape parameter(s) for the distribution (see docstring of the
- instance object for more information).
- loc : array_like, optional
- Location parameter (default=0).
- Returns
- -------
- logsf : ndarray
- Log of the survival function evaluated at `k`.
- """
- args, loc, _ = self._parse_args(*args, **kwds)
- k, loc = map(asarray, (k, loc))
- args = tuple(map(asarray, args))
- _a, _b = self._get_support(*args)
- k = asarray(k-loc)
- cond0 = self._argcheck(*args)
- cond1 = (k >= _a) & (k < _b)
- cond2 = (k < _a) & cond0
- cond = cond0 & cond1
- output = empty(shape(cond), 'd')
- output.fill(-inf)
- place(output, (1-cond0) + np.isnan(k), self.badvalue)
- place(output, cond2, 0.0)
- if np.any(cond):
- goodargs = argsreduce(cond, *((k,)+args))
- place(output, cond, self._logsf(*goodargs))
- if output.ndim == 0:
- return output[()]
- return output
- def ppf(self, q, *args, **kwds):
- """Percent point function (inverse of `cdf`) at q of the given RV.
- Parameters
- ----------
- q : array_like
- Lower tail probability.
- arg1, arg2, arg3,... : array_like
- The shape parameter(s) for the distribution (see docstring of the
- instance object for more information).
- loc : array_like, optional
- Location parameter (default=0).
- Returns
- -------
- k : array_like
- Quantile corresponding to the lower tail probability, q.
- Notes
- -----
- For discrete distributions, the `cdf` is not strictly invertible. By convention,
- this method returns the minimum value `k` for which the `cdf` at `k` is at
- least `q`. There is one exception: the `ppf` of ``0`` is ``a-1``,
- where ``a`` is the left endpoint of the support.
- """
- args, loc, _ = self._parse_args(*args, **kwds)
- q, loc = map(asarray, (q, loc))
- args = tuple(map(asarray, args))
- _a, _b = self._get_support(*args)
- cond0 = self._argcheck(*args) & (loc == loc)
- cond1 = (q > 0) & (q < 1)
- cond2 = (q == 0) & cond0
- cond3 = (q == 1) & cond0
- cond = cond0 & cond1
- output = np.full(shape(cond), fill_value=self.badvalue, dtype='d')
- # output type 'd' to handle nin and inf
- place(output, cond2, argsreduce(cond2, _a-1 + loc)[0])
- place(output, cond3, argsreduce(cond3, _b + loc)[0])
- if np.any(cond):
- goodargs = argsreduce(cond, *((q,)+args+(loc,)))
- loc, goodargs = goodargs[-1], goodargs[:-1]
- place(output, cond, self._ppf(*goodargs) + loc)
- if output.ndim == 0:
- return output[()]
- return output
- def isf(self, q, *args, **kwds):
- """Inverse survival function (inverse of `sf`) at q of the given RV.
- Parameters
- ----------
- q : array_like
- Upper tail probability.
- arg1, arg2, arg3,... : array_like
- The shape parameter(s) for the distribution (see docstring of the
- instance object for more information).
- loc : array_like, optional
- Location parameter (default=0).
- Returns
- -------
- k : ndarray or scalar
- Quantile corresponding to the upper tail probability, q.
- Notes
- -----
- For discrete distributions, the `sf` is not strictly invertible. By convention,
- this method returns the minimum value `k` for which the `sf` at `k` is
- no greater than `q`. There is one exception: the `isf` of ``1`` is ``a-1``,
- where ``a`` is the left endpoint of the support.
- """
- args, loc, _ = self._parse_args(*args, **kwds)
- q, loc = map(asarray, (q, loc))
- args = tuple(map(asarray, args))
- _a, _b = self._get_support(*args)
- cond0 = self._argcheck(*args) & (loc == loc)
- cond1 = (q > 0) & (q < 1)
- cond2 = (q == 1) & cond0
- cond3 = (q == 0) & cond0
- cond = cond0 & cond1
- # same problem as with ppf; copied from ppf and changed
- output = np.full(shape(cond), fill_value=self.badvalue, dtype='d')
- # output type 'd' to handle nin and inf
- lower_bound = _a - 1 + loc
- upper_bound = _b + loc
- place(output, cond2, argsreduce(cond2, lower_bound)[0])
- place(output, cond3, argsreduce(cond3, upper_bound)[0])
- # call place only if at least 1 valid argument
- if np.any(cond):
- goodargs = argsreduce(cond, *((q,)+args+(loc,)))
- loc, goodargs = goodargs[-1], goodargs[:-1]
- # PB same as ticket 766
- place(output, cond, self._isf(*goodargs) + loc)
- if output.ndim == 0:
- return output[()]
- return output
- def _entropy(self, *args):
- if hasattr(self, 'pk'):
- return stats.entropy(self.pk)
- else:
- _a, _b = self._get_support(*args)
- return _expect(lambda x: entr(self._pmf(x, *args)),
- _a, _b, self._ppf(0.5, *args), self.inc)
- def expect(self, func=None, args=(), loc=0, lb=None, ub=None,
- conditional=False, maxcount=1000, tolerance=1e-10, chunksize=32):
- """
- Calculate expected value of a function with respect to the distribution
- for discrete distribution by numerical summation.
- Parameters
- ----------
- func : callable, optional
- Function for which the expectation value is calculated.
- Takes only one argument.
- The default is the identity mapping f(k) = k.
- args : tuple, optional
- Shape parameters of the distribution.
- loc : float, optional
- Location parameter.
- Default is 0.
- lb, ub : int, optional
- Lower and upper bound for the summation, default is set to the
- support of the distribution, inclusive (``lb <= k <= ub``).
- conditional : bool, optional
- If true then the expectation is corrected by the conditional
- probability of the summation interval. The return value is the
- expectation of the function, `func`, conditional on being in
- the given interval (k such that ``lb <= k <= ub``).
- Default is False.
- maxcount : int, optional
- Maximal number of terms to evaluate (to avoid an endless loop for
- an infinite sum). Default is 1000.
- tolerance : float, optional
- Absolute tolerance for the summation. Default is 1e-10.
- chunksize : int, optional
- Iterate over the support of a distributions in chunks of this size.
- Default is 32.
- Returns
- -------
- expect : float
- Expected value.
- Notes
- -----
- For heavy-tailed distributions, the expected value may or
- may not exist,
- depending on the function, `func`. If it does exist, but the
- sum converges
- slowly, the accuracy of the result may be rather low. For instance, for
- ``zipf(4)``, accuracy for mean, variance in example is only 1e-5.
- increasing `maxcount` and/or `chunksize` may improve the result,
- but may also make zipf very slow.
- The function is not vectorized.
- """
- # Although `args` is just the shape parameters, `poisson_binom` needs this
- # to split the vector-valued shape into a tuple of separate shapes
- args, _, _ = self._parse_args(*args)
- if func is None:
- def fun(x):
- # loc and args from outer scope
- return (x+loc)*self._pmf(x, *args)
- else:
- def fun(x):
- # loc and args from outer scope
- return func(x+loc)*self._pmf(x, *args)
- # used pmf because _pmf does not check support in randint and there
- # might be problems(?) with correct self.a, self.b at this stage maybe
- # not anymore, seems to work now with _pmf
- _a, _b = self._get_support(*args)
- if lb is None:
- lb = _a
- else:
- lb = lb - loc # convert bound for standardized distribution
- if ub is None:
- ub = _b
- else:
- ub = ub - loc # convert bound for standardized distribution
- if conditional:
- invfac = self.sf(lb-1, *args) - self.sf(ub, *args)
- else:
- invfac = 1.0
- if isinstance(self, rv_sample):
- res = self._expect(fun, lb, ub)
- return res / invfac
- # iterate over the support, starting from the median
- x0 = self._ppf(0.5, *args)
- res = _expect(fun, lb, ub, x0, self.inc, maxcount, tolerance, chunksize)
- return res / invfac
- def _param_info(self):
- shape_info = self._shape_info()
- loc_info = _ShapeInfo("loc", True, (-np.inf, np.inf), (False, False))
- param_info = shape_info + [loc_info]
- return param_info
- def _expect(fun, lb, ub, x0, inc, maxcount=1000, tolerance=1e-10,
- chunksize=32):
- """Helper for computing the expectation value of `fun`."""
- # short-circuit if the support size is small enough
- if (ub - lb) <= chunksize:
- supp = np.arange(lb, ub+1, inc)
- vals = fun(supp)
- return np.sum(vals)
- # otherwise, iterate starting from x0
- if x0 < lb:
- x0 = lb
- if x0 > ub:
- x0 = ub
- count, tot = 0, 0.
- # iterate over [x0, ub] inclusive
- for x in _iter_chunked(x0, ub+1, chunksize=chunksize, inc=inc):
- count += x.size
- delta = np.sum(fun(x))
- tot += delta
- if abs(delta) < tolerance * x.size:
- break
- if count > maxcount:
- warnings.warn('expect(): sum did not converge',
- RuntimeWarning, stacklevel=3)
- return tot
- # iterate over [lb, x0)
- for x in _iter_chunked(x0-1, lb-1, chunksize=chunksize, inc=-inc):
- count += x.size
- delta = np.sum(fun(x))
- tot += delta
- if abs(delta) < tolerance * x.size:
- break
- if count > maxcount:
- warnings.warn('expect(): sum did not converge',
- RuntimeWarning, stacklevel=3)
- break
- return tot
- def _iter_chunked(x0, x1, chunksize=4, inc=1):
- """Iterate from x0 to x1 in chunks of chunksize and steps inc.
- x0 must be finite, x1 need not be. In the latter case, the iterator is
- infinite.
- Handles both x0 < x1 and x0 > x1. In the latter case, iterates downwards
- (make sure to set inc < 0.)
- >>> from scipy.stats._distn_infrastructure import _iter_chunked
- >>> [x for x in _iter_chunked(2, 5, inc=2)]
- [array([2, 4])]
- >>> [x for x in _iter_chunked(2, 11, inc=2)]
- [array([2, 4, 6, 8]), array([10])]
- >>> [x for x in _iter_chunked(2, -5, inc=-2)]
- [array([ 2, 0, -2, -4])]
- >>> [x for x in _iter_chunked(2, -9, inc=-2)]
- [array([ 2, 0, -2, -4]), array([-6, -8])]
- """
- if inc == 0:
- raise ValueError('Cannot increment by zero.')
- if chunksize <= 0:
- raise ValueError(f'Chunk size must be positive; got {chunksize}.')
- s = 1 if inc > 0 else -1
- stepsize = abs(chunksize * inc)
- x = np.copy(x0)
- while (x - x1) * inc < 0:
- delta = min(stepsize, abs(x - x1))
- step = delta * s
- supp = np.arange(x, x + step, inc)
- x += step
- yield supp
- class rv_sample(rv_discrete):
- """A 'sample' discrete distribution defined by the support and values.
- The ctor ignores most of the arguments, only needs the `values` argument.
- """
- def __init__(self, a=0, b=inf, name=None, badvalue=None,
- moment_tol=1e-8, values=None, inc=1, longname=None,
- shapes=None, seed=None):
- super(rv_discrete, self).__init__(seed)
- if values is None:
- raise ValueError("rv_sample.__init__(..., values=None,...)")
- # cf generic freeze
- self._ctor_param = dict(
- a=a, b=b, name=name, badvalue=badvalue,
- moment_tol=moment_tol, values=values, inc=inc,
- longname=longname, shapes=shapes, seed=seed)
- if badvalue is None:
- badvalue = nan
- self.badvalue = badvalue
- self.moment_tol = moment_tol
- self.inc = inc
- self.shapes = shapes
- self.vecentropy = self._entropy
- xk, pk = values
- if np.shape(xk) != np.shape(pk):
- raise ValueError("xk and pk must have the same shape.")
- if np.less(pk, 0.0).any():
- raise ValueError("All elements of pk must be non-negative.")
- if not np.allclose(np.sum(pk), 1):
- raise ValueError("The sum of provided pk is not 1.")
- if not len(set(np.ravel(xk))) == np.size(xk):
- raise ValueError("xk may not contain duplicate values.")
- indx = np.argsort(np.ravel(xk))
- self.xk = np.take(np.ravel(xk), indx, 0)
- self.pk = np.take(np.ravel(pk), indx, 0)
- self.a = self.xk[0]
- self.b = self.xk[-1]
- self.qvals = np.cumsum(self.pk, axis=0)
- self.shapes = ' ' # bypass inspection
- self._construct_argparser(meths_to_inspect=[self._pmf],
- locscale_in='loc=0',
- # scale=1 for discrete RVs
- locscale_out='loc, 1')
- self._attach_methods()
- self._construct_docstrings(name, longname)
- def __getstate__(self):
- dct = self.__dict__.copy()
- # these methods will be remade in rv_generic.__setstate__,
- # which calls rv_generic._attach_methods
- attrs = ["_parse_args", "_parse_args_stats", "_parse_args_rvs"]
- [dct.pop(attr, None) for attr in attrs]
- return dct
- def _attach_methods(self):
- """Attaches dynamically created argparser methods."""
- self._attach_argparser_methods()
- def _get_support(self, *args):
- """Return the support of the (unscaled, unshifted) distribution.
- Parameters
- ----------
- arg1, arg2, ... : array_like
- The shape parameter(s) for the distribution (see docstring of the
- instance object for more information).
- Returns
- -------
- a, b : numeric (float, or int or +/-np.inf)
- end-points of the distribution's support.
- """
- return self.a, self.b
- def _pmf(self, x):
- return np.select([x == k for k in self.xk],
- [np.broadcast_arrays(p, x)[0] for p in self.pk], 0)
- def _cdf(self, x):
- xx, xxk = np.broadcast_arrays(x[:, None], self.xk)
- indx = np.argmax(xxk > xx, axis=-1) - 1
- return self.qvals[indx]
- def _ppf(self, q):
- qq, sqq = np.broadcast_arrays(q[..., None], self.qvals)
- indx = argmax(sqq >= qq, axis=-1)
- return self.xk[indx]
- def _rvs(self, size=None, random_state=None):
- # Need to define it explicitly, otherwise .rvs() with size=None
- # fails due to explicit broadcasting in _ppf
- U = random_state.uniform(size=size)
- if size is None:
- U = np.array(U, ndmin=1)
- Y = self._ppf(U)[0]
- else:
- Y = self._ppf(U)
- return Y
- def _entropy(self):
- return stats.entropy(self.pk)
- def generic_moment(self, n):
- n = asarray(n)
- return np.sum(self.xk**n[np.newaxis, ...] * self.pk, axis=0)
- def _expect(self, fun, lb, ub, *args, **kwds):
- # ignore all args, just do a brute force summation
- supp = self.xk[(lb <= self.xk) & (self.xk <= ub)]
- vals = fun(supp)
- return np.sum(vals)
- def _check_shape(argshape, size):
- """
- This is a utility function used by `_rvs()` in the class geninvgauss_gen.
- It compares the tuple argshape to the tuple size.
- Parameters
- ----------
- argshape : tuple of integers
- Shape of the arguments.
- size : tuple of integers or integer
- Size argument of rvs().
- Returns
- -------
- The function returns two tuples, scalar_shape and bc.
- scalar_shape : tuple
- Shape to which the 1-d array of random variates returned by
- _rvs_scalar() is converted when it is copied into the
- output array of _rvs().
- bc : tuple of booleans
- bc is an tuple the same length as size. bc[j] is True if the data
- associated with that index is generated in one call of _rvs_scalar().
- """
- scalar_shape = []
- bc = []
- for argdim, sizedim in zip_longest(argshape[::-1], size[::-1],
- fillvalue=1):
- if sizedim > argdim or (argdim == sizedim == 1):
- scalar_shape.append(sizedim)
- bc.append(True)
- else:
- bc.append(False)
- return tuple(scalar_shape[::-1]), tuple(bc[::-1])
- def get_distribution_names(namespace_pairs, rv_base_class):
- """Collect names of statistical distributions and their generators.
- Parameters
- ----------
- namespace_pairs : sequence
- A snapshot of (name, value) pairs in the namespace of a module.
- rv_base_class : class
- The base class of random variable generator classes in a module.
- Returns
- -------
- distn_names : list of strings
- Names of the statistical distributions.
- distn_gen_names : list of strings
- Names of the generators of the statistical distributions.
- Note that these are not simply the names of the statistical
- distributions, with a _gen suffix added.
- """
- distn_names = []
- distn_gen_names = []
- for name, value in namespace_pairs:
- if name.startswith('_'):
- continue
- if name.endswith('_gen') and issubclass(value, rv_base_class):
- distn_gen_names.append(name)
- if isinstance(value, rv_base_class):
- distn_names.append(name)
- return distn_names, distn_gen_names
|