| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672 |
- """
- .. _statsrefmanual:
- ==========================================
- Statistical functions (:mod:`scipy.stats`)
- ==========================================
- .. currentmodule:: scipy.stats
- This module contains a large number of probability distributions,
- summary and frequency statistics, correlation functions and statistical
- tests, masked statistics, kernel density estimation, quasi-Monte Carlo
- functionality, and more.
- Statistics is a very large area, and there are topics that are out of scope
- for SciPy and are covered by other packages. Some of the most important ones
- are:
- - `statsmodels <https://www.statsmodels.org/stable/index.html>`__:
- regression, linear models, time series analysis, extensions to topics
- also covered by ``scipy.stats``.
- - `Pandas <https://pandas.pydata.org/>`__: tabular data, time series
- functionality, interfaces to other statistical languages.
- - `PyMC <https://docs.pymc.io/>`__: Bayesian statistical
- modeling, probabilistic machine learning.
- - `scikit-learn <https://scikit-learn.org/>`__: classification, regression,
- model selection.
- - `Seaborn <https://seaborn.pydata.org/>`__: statistical data visualization.
- - `rpy2 <https://rpy2.github.io/>`__: Python to R bridge.
- Probability distributions
- =========================
- Each univariate distribution is an instance of a subclass of `rv_continuous`
- (`rv_discrete` for discrete distributions):
- .. autosummary::
- :toctree: generated/
- rv_continuous
- rv_discrete
- rv_histogram
- Continuous distributions
- ------------------------
- .. autosummary::
- :toctree: generated/
- alpha -- Alpha
- anglit -- Anglit
- arcsine -- Arcsine
- argus -- Argus
- beta -- Beta
- betaprime -- Beta Prime
- bradford -- Bradford
- burr -- Burr (Type III)
- burr12 -- Burr (Type XII)
- cauchy -- Cauchy
- chi -- Chi
- chi2 -- Chi-squared
- cosine -- Cosine
- crystalball -- Crystalball
- dgamma -- Double Gamma
- dpareto_lognorm -- Double Pareto Lognormal
- dweibull -- Double Weibull
- erlang -- Erlang
- expon -- Exponential
- exponnorm -- Exponentially Modified Normal
- exponweib -- Exponentiated Weibull
- exponpow -- Exponential Power
- f -- F (Snecdor F)
- fatiguelife -- Fatigue Life (Birnbaum-Saunders)
- fisk -- Fisk
- foldcauchy -- Folded Cauchy
- foldnorm -- Folded Normal
- genlogistic -- Generalized Logistic
- gennorm -- Generalized normal
- genpareto -- Generalized Pareto
- genexpon -- Generalized Exponential
- genextreme -- Generalized Extreme Value
- gausshyper -- Gauss Hypergeometric
- gamma -- Gamma
- gengamma -- Generalized gamma
- genhalflogistic -- Generalized Half Logistic
- genhyperbolic -- Generalized Hyperbolic
- geninvgauss -- Generalized Inverse Gaussian
- gibrat -- Gibrat
- gompertz -- Gompertz (Truncated Gumbel)
- gumbel_r -- Right Sided Gumbel, Log-Weibull, Fisher-Tippett, Extreme Value Type I
- gumbel_l -- Left Sided Gumbel, etc.
- halfcauchy -- Half Cauchy
- halflogistic -- Half Logistic
- halfnorm -- Half Normal
- halfgennorm -- Generalized Half Normal
- hypsecant -- Hyperbolic Secant
- invgamma -- Inverse Gamma
- invgauss -- Inverse Gaussian
- invweibull -- Inverse Weibull
- irwinhall -- Irwin-Hall
- jf_skew_t -- Jones and Faddy Skew-T
- johnsonsb -- Johnson SB
- johnsonsu -- Johnson SU
- kappa4 -- Kappa 4 parameter
- kappa3 -- Kappa 3 parameter
- ksone -- Distribution of Kolmogorov-Smirnov one-sided test statistic
- kstwo -- Distribution of Kolmogorov-Smirnov two-sided test statistic
- kstwobign -- Limiting Distribution of scaled Kolmogorov-Smirnov two-sided test statistic.
- landau -- Landau
- laplace -- Laplace
- laplace_asymmetric -- Asymmetric Laplace
- levy -- Levy
- levy_l
- levy_stable
- logistic -- Logistic
- loggamma -- Log-Gamma
- loglaplace -- Log-Laplace (Log Double Exponential)
- lognorm -- Log-Normal
- loguniform -- Log-Uniform
- lomax -- Lomax (Pareto of the second kind)
- maxwell -- Maxwell
- mielke -- Mielke's Beta-Kappa
- moyal -- Moyal
- nakagami -- Nakagami
- ncx2 -- Non-central chi-squared
- ncf -- Non-central F
- nct -- Non-central Student's T
- norm -- Normal (Gaussian)
- norminvgauss -- Normal Inverse Gaussian
- pareto -- Pareto
- pearson3 -- Pearson type III
- powerlaw -- Power-function
- powerlognorm -- Power log normal
- powernorm -- Power normal
- rdist -- R-distribution
- rayleigh -- Rayleigh
- rel_breitwigner -- Relativistic Breit-Wigner
- rice -- Rice
- recipinvgauss -- Reciprocal Inverse Gaussian
- semicircular -- Semicircular
- skewcauchy -- Skew Cauchy
- skewnorm -- Skew normal
- studentized_range -- Studentized Range
- t -- Student's T
- trapezoid -- Trapezoidal
- triang -- Triangular
- truncexpon -- Truncated Exponential
- truncnorm -- Truncated Normal
- truncpareto -- Truncated Pareto
- truncweibull_min -- Truncated minimum Weibull distribution
- tukeylambda -- Tukey-Lambda
- uniform -- Uniform
- vonmises -- Von-Mises (Circular)
- vonmises_line -- Von-Mises (Line)
- wald -- Wald
- weibull_min -- Minimum Weibull (see Frechet)
- weibull_max -- Maximum Weibull (see Frechet)
- wrapcauchy -- Wrapped Cauchy
- The ``fit`` method of the univariate continuous distributions uses
- maximum likelihood estimation to fit the distribution to a data set.
- The ``fit`` method can accept regular data or *censored data*.
- Censored data is represented with instances of the `CensoredData`
- class.
- .. autosummary::
- :toctree: generated/
- CensoredData
- Multivariate distributions
- --------------------------
- .. autosummary::
- :toctree: generated/
- multivariate_normal -- Multivariate normal distribution
- matrix_normal -- Matrix normal distribution
- dirichlet -- Dirichlet
- dirichlet_multinomial -- Dirichlet multinomial distribution
- wishart -- Wishart
- invwishart -- Inverse Wishart
- multinomial -- Multinomial distribution
- special_ortho_group -- SO(N) group
- ortho_group -- O(N) group
- unitary_group -- U(N) group
- random_correlation -- random correlation matrices
- multivariate_t -- Multivariate t-distribution
- multivariate_hypergeom -- Multivariate hypergeometric distribution
- normal_inverse_gamma -- Normal-inverse-gamma distribution
- random_table -- Distribution of random tables with given marginals
- uniform_direction -- Uniform distribution on S(N-1)
- vonmises_fisher -- Von Mises-Fisher distribution
- matrix_t -- Matrix variate t distribution
- `scipy.stats.multivariate_normal` methods accept instances
- of the following class to represent the covariance.
- .. autosummary::
- :toctree: generated/
- Covariance -- Representation of a covariance matrix
- Discrete distributions
- ----------------------
- .. autosummary::
- :toctree: generated/
- bernoulli -- Bernoulli
- betabinom -- Beta-Binomial
- betanbinom -- Beta-Negative Binomial
- binom -- Binomial
- boltzmann -- Boltzmann (Truncated Discrete Exponential)
- dlaplace -- Discrete Laplacian
- geom -- Geometric
- hypergeom -- Hypergeometric
- logser -- Logarithmic (Log-Series, Series)
- nbinom -- Negative Binomial
- nchypergeom_fisher -- Fisher's Noncentral Hypergeometric
- nchypergeom_wallenius -- Wallenius's Noncentral Hypergeometric
- nhypergeom -- Negative Hypergeometric
- planck -- Planck (Discrete Exponential)
- poisson -- Poisson
- poisson_binom -- Poisson Binomial
- randint -- Discrete Uniform
- skellam -- Skellam
- yulesimon -- Yule-Simon
- zipf -- Zipf (Zeta)
- zipfian -- Zipfian
- An overview of statistical functions is given below. Many of these functions
- have a similar version in `scipy.stats.mstats` which work for masked arrays.
- Summary statistics
- ==================
- .. autosummary::
- :toctree: generated/
- describe -- Descriptive statistics
- gmean -- Geometric mean
- hmean -- Harmonic mean
- pmean -- Power mean
- kurtosis -- Fisher or Pearson kurtosis
- mode -- Modal value
- moment -- Central moment
- lmoment
- expectile -- Expectile
- skew -- Skewness
- kstat --
- kstatvar --
- tmean -- Truncated arithmetic mean
- tvar -- Truncated variance
- tmin --
- tmax --
- tstd --
- tsem --
- variation -- Coefficient of variation
- rankdata
- tiecorrect
- trim_mean
- gstd -- Geometric Standard Deviation
- iqr
- sem
- bayes_mvs
- mvsdist
- entropy
- differential_entropy
- median_abs_deviation
- Frequency statistics
- ====================
- .. autosummary::
- :toctree: generated/
- cumfreq
- quantile
- percentileofscore
- scoreatpercentile
- relfreq
- .. autosummary::
- :toctree: generated/
- binned_statistic -- Compute a binned statistic for a set of data.
- binned_statistic_2d -- Compute a 2-D binned statistic for a set of data.
- binned_statistic_dd -- Compute a d-D binned statistic for a set of data.
- .. _hypotests:
- Hypothesis Tests and related functions
- ======================================
- SciPy has many functions for performing hypothesis tests that return a
- test statistic and a p-value, and several of them return confidence intervals
- and/or other related information.
- The headings below are based on common uses of the functions within, but due to
- the wide variety of statistical procedures, any attempt at coarse-grained
- categorization will be imperfect. Also, note that tests within the same heading
- are not interchangeable in general (e.g. many have different distributional
- assumptions).
- One Sample Tests / Paired Sample Tests
- --------------------------------------
- One sample tests are typically used to assess whether a single sample was
- drawn from a specified distribution or a distribution with specified properties
- (e.g. zero mean).
- .. autosummary::
- :toctree: generated/
- ttest_1samp
- binomtest
- quantile_test
- skewtest
- kurtosistest
- normaltest
- jarque_bera
- shapiro
- anderson
- cramervonmises
- ks_1samp
- goodness_of_fit
- chisquare
- power_divergence
- Paired sample tests are often used to assess whether two samples were drawn
- from the same distribution; they differ from the independent sample tests below
- in that each observation in one sample is treated as paired with a
- closely-related observation in the other sample (e.g. when environmental
- factors are controlled between observations within a pair but not among pairs).
- They can also be interpreted or used as one-sample tests (e.g. tests on the
- mean or median of *differences* between paired observations).
- .. autosummary::
- :toctree: generated/
- ttest_rel
- wilcoxon
- Association/Correlation Tests
- -----------------------------
- These tests are often used to assess whether there is a relationship (e.g.
- linear) between paired observations in multiple samples or among the
- coordinates of multivariate observations.
- .. autosummary::
- :toctree: generated/
- linregress
- pearsonr
- spearmanrho
- pointbiserialr
- kendalltau
- chatterjeexi
- weightedtau
- somersd
- siegelslopes
- theilslopes
- page_trend_test
- multiscale_graphcorr
- spearmanr
- These association tests and are to work with samples in the form of contingency
- tables. Supporting functions are available in `scipy.stats.contingency`.
- .. autosummary::
- :toctree: generated/
- chi2_contingency
- fisher_exact
- barnard_exact
- boschloo_exact
- Independent Sample Tests
- ------------------------
- Independent sample tests are typically used to assess whether multiple samples
- were independently drawn from the same distribution or different distributions
- with a shared property (e.g. equal means).
- Some tests are specifically for comparing two samples.
- .. autosummary::
- :toctree: generated/
- ttest_ind_from_stats
- poisson_means_test
- ttest_ind
- mannwhitneyu
- bws_test
- ranksums
- brunnermunzel
- mood
- ansari
- cramervonmises_2samp
- epps_singleton_2samp
- ks_2samp
- kstest
- Others are generalized to multiple samples.
- .. autosummary::
- :toctree: generated/
- f_oneway
- tukey_hsd
- dunnett
- kruskal
- alexandergovern
- fligner
- levene
- bartlett
- median_test
- friedmanchisquare
- anderson_ksamp
- Resampling and Monte Carlo Methods
- ----------------------------------
- The following functions can reproduce the p-value and confidence interval
- results of most of the functions above, and often produce accurate results in a
- wider variety of conditions. They can also be used to perform hypothesis tests
- and generate confidence intervals for custom statistics. This flexibility comes
- at the cost of greater computational requirements and stochastic results.
- .. autosummary::
- :toctree: generated/
- monte_carlo_test
- permutation_test
- bootstrap
- power
- Instances of the following object can be passed into some hypothesis test
- functions to perform a resampling or Monte Carlo version of the hypothesis
- test.
- .. autosummary::
- :toctree: generated/
- MonteCarloMethod
- PermutationMethod
- BootstrapMethod
- Multiple Hypothesis Testing and Meta-Analysis
- ---------------------------------------------
- These functions are for assessing the results of individual tests as a whole.
- Functions for performing specific multiple hypothesis tests (e.g. post hoc
- tests) are listed above.
- .. autosummary::
- :toctree: generated/
- combine_pvalues
- false_discovery_control
- The following functions are related to the tests above but do not belong in the
- above categories.
- Random Variables
- ================
- .. autosummary::
- :toctree: generated/
- make_distribution
- Normal
- Logistic
- Uniform
- Binomial
- Mixture
- order_statistic
- truncate
- abs
- exp
- log
- Quasi-Monte Carlo
- =================
- .. toctree::
- :maxdepth: 4
- stats.qmc
- Contingency Tables
- ==================
- .. toctree::
- :maxdepth: 4
- stats.contingency
- Masked statistics functions
- ===========================
- .. toctree::
- stats.mstats
- Other statistical functionality
- ===============================
- Transformations
- ---------------
- .. autosummary::
- :toctree: generated/
- boxcox
- boxcox_normmax
- boxcox_llf
- yeojohnson
- yeojohnson_normmax
- yeojohnson_llf
- obrientransform
- sigmaclip
- trimboth
- trim1
- zmap
- zscore
- gzscore
- Statistical distances
- ---------------------
- .. autosummary::
- :toctree: generated/
- wasserstein_distance
- wasserstein_distance_nd
- energy_distance
- Sampling
- --------
- .. toctree::
- :maxdepth: 4
- stats.sampling
- Fitting / Survival Analysis
- ---------------------------
- .. autosummary::
- :toctree: generated/
- fit
- ecdf
- logrank
- Directional statistical functions
- ---------------------------------
- .. autosummary::
- :toctree: generated/
- directional_stats
- circmean
- circvar
- circstd
- Sensitivity Analysis
- --------------------
- .. autosummary::
- :toctree: generated/
- sobol_indices
- Plot-tests
- ----------
- .. autosummary::
- :toctree: generated/
- ppcc_max
- ppcc_plot
- probplot
- boxcox_normplot
- yeojohnson_normplot
- Univariate and multivariate kernel density estimation
- -----------------------------------------------------
- .. autosummary::
- :toctree: generated/
- gaussian_kde
- Warnings / Errors used in :mod:`scipy.stats`
- --------------------------------------------
- .. autosummary::
- :toctree: generated/
- DegenerateDataWarning
- ConstantInputWarning
- NearConstantInputWarning
- FitError
- Result classes used in :mod:`scipy.stats`
- -----------------------------------------
- .. warning::
- These classes are private, but they are included here because instances
- of them are returned by other statistical functions. User import and
- instantiation is not supported.
- .. toctree::
- :maxdepth: 2
- stats._result_classes
- """ # noqa: E501
- from ._warnings_errors import (ConstantInputWarning, NearConstantInputWarning,
- DegenerateDataWarning, FitError)
- from ._stats_py import *
- from ._variation import variation
- from .distributions import *
- from ._morestats import *
- from ._multicomp import *
- from ._binomtest import binomtest
- from ._binned_statistic import *
- from ._kde import gaussian_kde
- from . import mstats
- from . import qmc
- from ._multivariate import *
- from . import contingency
- from .contingency import chi2_contingency
- from ._censored_data import CensoredData
- from ._resampling import (bootstrap, monte_carlo_test, permutation_test, power,
- MonteCarloMethod, PermutationMethod, BootstrapMethod)
- from ._entropy import *
- from ._hypotests import *
- from ._page_trend_test import page_trend_test
- from ._mannwhitneyu import mannwhitneyu
- from ._bws_test import bws_test
- from ._fit import fit, goodness_of_fit
- from ._covariance import Covariance
- from ._sensitivity_analysis import *
- from ._survival import *
- from ._distribution_infrastructure import (
- make_distribution, Mixture, order_statistic, truncate, exp, log, abs
- )
- from ._new_distributions import Normal, Logistic, Uniform, Binomial
- from ._mgc import multiscale_graphcorr
- from ._correlation import chatterjeexi, spearmanrho
- from ._quantile import quantile
- # Deprecated namespaces, to be removed in v2.0.0
- from . import (
- biasedurn, kde, morestats, mstats_basic, mstats_extras, mvn, stats
- )
- __all__ = [s for s in dir() if not s.startswith("_")] # Remove dunders.
- from scipy._lib._testutils import PytestTester
- test = PytestTester(__name__)
- del PytestTester
|