__init__.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672
  1. """
  2. .. _statsrefmanual:
  3. ==========================================
  4. Statistical functions (:mod:`scipy.stats`)
  5. ==========================================
  6. .. currentmodule:: scipy.stats
  7. This module contains a large number of probability distributions,
  8. summary and frequency statistics, correlation functions and statistical
  9. tests, masked statistics, kernel density estimation, quasi-Monte Carlo
  10. functionality, and more.
  11. Statistics is a very large area, and there are topics that are out of scope
  12. for SciPy and are covered by other packages. Some of the most important ones
  13. are:
  14. - `statsmodels <https://www.statsmodels.org/stable/index.html>`__:
  15. regression, linear models, time series analysis, extensions to topics
  16. also covered by ``scipy.stats``.
  17. - `Pandas <https://pandas.pydata.org/>`__: tabular data, time series
  18. functionality, interfaces to other statistical languages.
  19. - `PyMC <https://docs.pymc.io/>`__: Bayesian statistical
  20. modeling, probabilistic machine learning.
  21. - `scikit-learn <https://scikit-learn.org/>`__: classification, regression,
  22. model selection.
  23. - `Seaborn <https://seaborn.pydata.org/>`__: statistical data visualization.
  24. - `rpy2 <https://rpy2.github.io/>`__: Python to R bridge.
  25. Probability distributions
  26. =========================
  27. Each univariate distribution is an instance of a subclass of `rv_continuous`
  28. (`rv_discrete` for discrete distributions):
  29. .. autosummary::
  30. :toctree: generated/
  31. rv_continuous
  32. rv_discrete
  33. rv_histogram
  34. Continuous distributions
  35. ------------------------
  36. .. autosummary::
  37. :toctree: generated/
  38. alpha -- Alpha
  39. anglit -- Anglit
  40. arcsine -- Arcsine
  41. argus -- Argus
  42. beta -- Beta
  43. betaprime -- Beta Prime
  44. bradford -- Bradford
  45. burr -- Burr (Type III)
  46. burr12 -- Burr (Type XII)
  47. cauchy -- Cauchy
  48. chi -- Chi
  49. chi2 -- Chi-squared
  50. cosine -- Cosine
  51. crystalball -- Crystalball
  52. dgamma -- Double Gamma
  53. dpareto_lognorm -- Double Pareto Lognormal
  54. dweibull -- Double Weibull
  55. erlang -- Erlang
  56. expon -- Exponential
  57. exponnorm -- Exponentially Modified Normal
  58. exponweib -- Exponentiated Weibull
  59. exponpow -- Exponential Power
  60. f -- F (Snecdor F)
  61. fatiguelife -- Fatigue Life (Birnbaum-Saunders)
  62. fisk -- Fisk
  63. foldcauchy -- Folded Cauchy
  64. foldnorm -- Folded Normal
  65. genlogistic -- Generalized Logistic
  66. gennorm -- Generalized normal
  67. genpareto -- Generalized Pareto
  68. genexpon -- Generalized Exponential
  69. genextreme -- Generalized Extreme Value
  70. gausshyper -- Gauss Hypergeometric
  71. gamma -- Gamma
  72. gengamma -- Generalized gamma
  73. genhalflogistic -- Generalized Half Logistic
  74. genhyperbolic -- Generalized Hyperbolic
  75. geninvgauss -- Generalized Inverse Gaussian
  76. gibrat -- Gibrat
  77. gompertz -- Gompertz (Truncated Gumbel)
  78. gumbel_r -- Right Sided Gumbel, Log-Weibull, Fisher-Tippett, Extreme Value Type I
  79. gumbel_l -- Left Sided Gumbel, etc.
  80. halfcauchy -- Half Cauchy
  81. halflogistic -- Half Logistic
  82. halfnorm -- Half Normal
  83. halfgennorm -- Generalized Half Normal
  84. hypsecant -- Hyperbolic Secant
  85. invgamma -- Inverse Gamma
  86. invgauss -- Inverse Gaussian
  87. invweibull -- Inverse Weibull
  88. irwinhall -- Irwin-Hall
  89. jf_skew_t -- Jones and Faddy Skew-T
  90. johnsonsb -- Johnson SB
  91. johnsonsu -- Johnson SU
  92. kappa4 -- Kappa 4 parameter
  93. kappa3 -- Kappa 3 parameter
  94. ksone -- Distribution of Kolmogorov-Smirnov one-sided test statistic
  95. kstwo -- Distribution of Kolmogorov-Smirnov two-sided test statistic
  96. kstwobign -- Limiting Distribution of scaled Kolmogorov-Smirnov two-sided test statistic.
  97. landau -- Landau
  98. laplace -- Laplace
  99. laplace_asymmetric -- Asymmetric Laplace
  100. levy -- Levy
  101. levy_l
  102. levy_stable
  103. logistic -- Logistic
  104. loggamma -- Log-Gamma
  105. loglaplace -- Log-Laplace (Log Double Exponential)
  106. lognorm -- Log-Normal
  107. loguniform -- Log-Uniform
  108. lomax -- Lomax (Pareto of the second kind)
  109. maxwell -- Maxwell
  110. mielke -- Mielke's Beta-Kappa
  111. moyal -- Moyal
  112. nakagami -- Nakagami
  113. ncx2 -- Non-central chi-squared
  114. ncf -- Non-central F
  115. nct -- Non-central Student's T
  116. norm -- Normal (Gaussian)
  117. norminvgauss -- Normal Inverse Gaussian
  118. pareto -- Pareto
  119. pearson3 -- Pearson type III
  120. powerlaw -- Power-function
  121. powerlognorm -- Power log normal
  122. powernorm -- Power normal
  123. rdist -- R-distribution
  124. rayleigh -- Rayleigh
  125. rel_breitwigner -- Relativistic Breit-Wigner
  126. rice -- Rice
  127. recipinvgauss -- Reciprocal Inverse Gaussian
  128. semicircular -- Semicircular
  129. skewcauchy -- Skew Cauchy
  130. skewnorm -- Skew normal
  131. studentized_range -- Studentized Range
  132. t -- Student's T
  133. trapezoid -- Trapezoidal
  134. triang -- Triangular
  135. truncexpon -- Truncated Exponential
  136. truncnorm -- Truncated Normal
  137. truncpareto -- Truncated Pareto
  138. truncweibull_min -- Truncated minimum Weibull distribution
  139. tukeylambda -- Tukey-Lambda
  140. uniform -- Uniform
  141. vonmises -- Von-Mises (Circular)
  142. vonmises_line -- Von-Mises (Line)
  143. wald -- Wald
  144. weibull_min -- Minimum Weibull (see Frechet)
  145. weibull_max -- Maximum Weibull (see Frechet)
  146. wrapcauchy -- Wrapped Cauchy
  147. The ``fit`` method of the univariate continuous distributions uses
  148. maximum likelihood estimation to fit the distribution to a data set.
  149. The ``fit`` method can accept regular data or *censored data*.
  150. Censored data is represented with instances of the `CensoredData`
  151. class.
  152. .. autosummary::
  153. :toctree: generated/
  154. CensoredData
  155. Multivariate distributions
  156. --------------------------
  157. .. autosummary::
  158. :toctree: generated/
  159. multivariate_normal -- Multivariate normal distribution
  160. matrix_normal -- Matrix normal distribution
  161. dirichlet -- Dirichlet
  162. dirichlet_multinomial -- Dirichlet multinomial distribution
  163. wishart -- Wishart
  164. invwishart -- Inverse Wishart
  165. multinomial -- Multinomial distribution
  166. special_ortho_group -- SO(N) group
  167. ortho_group -- O(N) group
  168. unitary_group -- U(N) group
  169. random_correlation -- random correlation matrices
  170. multivariate_t -- Multivariate t-distribution
  171. multivariate_hypergeom -- Multivariate hypergeometric distribution
  172. normal_inverse_gamma -- Normal-inverse-gamma distribution
  173. random_table -- Distribution of random tables with given marginals
  174. uniform_direction -- Uniform distribution on S(N-1)
  175. vonmises_fisher -- Von Mises-Fisher distribution
  176. matrix_t -- Matrix variate t distribution
  177. `scipy.stats.multivariate_normal` methods accept instances
  178. of the following class to represent the covariance.
  179. .. autosummary::
  180. :toctree: generated/
  181. Covariance -- Representation of a covariance matrix
  182. Discrete distributions
  183. ----------------------
  184. .. autosummary::
  185. :toctree: generated/
  186. bernoulli -- Bernoulli
  187. betabinom -- Beta-Binomial
  188. betanbinom -- Beta-Negative Binomial
  189. binom -- Binomial
  190. boltzmann -- Boltzmann (Truncated Discrete Exponential)
  191. dlaplace -- Discrete Laplacian
  192. geom -- Geometric
  193. hypergeom -- Hypergeometric
  194. logser -- Logarithmic (Log-Series, Series)
  195. nbinom -- Negative Binomial
  196. nchypergeom_fisher -- Fisher's Noncentral Hypergeometric
  197. nchypergeom_wallenius -- Wallenius's Noncentral Hypergeometric
  198. nhypergeom -- Negative Hypergeometric
  199. planck -- Planck (Discrete Exponential)
  200. poisson -- Poisson
  201. poisson_binom -- Poisson Binomial
  202. randint -- Discrete Uniform
  203. skellam -- Skellam
  204. yulesimon -- Yule-Simon
  205. zipf -- Zipf (Zeta)
  206. zipfian -- Zipfian
  207. An overview of statistical functions is given below. Many of these functions
  208. have a similar version in `scipy.stats.mstats` which work for masked arrays.
  209. Summary statistics
  210. ==================
  211. .. autosummary::
  212. :toctree: generated/
  213. describe -- Descriptive statistics
  214. gmean -- Geometric mean
  215. hmean -- Harmonic mean
  216. pmean -- Power mean
  217. kurtosis -- Fisher or Pearson kurtosis
  218. mode -- Modal value
  219. moment -- Central moment
  220. lmoment
  221. expectile -- Expectile
  222. skew -- Skewness
  223. kstat --
  224. kstatvar --
  225. tmean -- Truncated arithmetic mean
  226. tvar -- Truncated variance
  227. tmin --
  228. tmax --
  229. tstd --
  230. tsem --
  231. variation -- Coefficient of variation
  232. rankdata
  233. tiecorrect
  234. trim_mean
  235. gstd -- Geometric Standard Deviation
  236. iqr
  237. sem
  238. bayes_mvs
  239. mvsdist
  240. entropy
  241. differential_entropy
  242. median_abs_deviation
  243. Frequency statistics
  244. ====================
  245. .. autosummary::
  246. :toctree: generated/
  247. cumfreq
  248. quantile
  249. percentileofscore
  250. scoreatpercentile
  251. relfreq
  252. .. autosummary::
  253. :toctree: generated/
  254. binned_statistic -- Compute a binned statistic for a set of data.
  255. binned_statistic_2d -- Compute a 2-D binned statistic for a set of data.
  256. binned_statistic_dd -- Compute a d-D binned statistic for a set of data.
  257. .. _hypotests:
  258. Hypothesis Tests and related functions
  259. ======================================
  260. SciPy has many functions for performing hypothesis tests that return a
  261. test statistic and a p-value, and several of them return confidence intervals
  262. and/or other related information.
  263. The headings below are based on common uses of the functions within, but due to
  264. the wide variety of statistical procedures, any attempt at coarse-grained
  265. categorization will be imperfect. Also, note that tests within the same heading
  266. are not interchangeable in general (e.g. many have different distributional
  267. assumptions).
  268. One Sample Tests / Paired Sample Tests
  269. --------------------------------------
  270. One sample tests are typically used to assess whether a single sample was
  271. drawn from a specified distribution or a distribution with specified properties
  272. (e.g. zero mean).
  273. .. autosummary::
  274. :toctree: generated/
  275. ttest_1samp
  276. binomtest
  277. quantile_test
  278. skewtest
  279. kurtosistest
  280. normaltest
  281. jarque_bera
  282. shapiro
  283. anderson
  284. cramervonmises
  285. ks_1samp
  286. goodness_of_fit
  287. chisquare
  288. power_divergence
  289. Paired sample tests are often used to assess whether two samples were drawn
  290. from the same distribution; they differ from the independent sample tests below
  291. in that each observation in one sample is treated as paired with a
  292. closely-related observation in the other sample (e.g. when environmental
  293. factors are controlled between observations within a pair but not among pairs).
  294. They can also be interpreted or used as one-sample tests (e.g. tests on the
  295. mean or median of *differences* between paired observations).
  296. .. autosummary::
  297. :toctree: generated/
  298. ttest_rel
  299. wilcoxon
  300. Association/Correlation Tests
  301. -----------------------------
  302. These tests are often used to assess whether there is a relationship (e.g.
  303. linear) between paired observations in multiple samples or among the
  304. coordinates of multivariate observations.
  305. .. autosummary::
  306. :toctree: generated/
  307. linregress
  308. pearsonr
  309. spearmanrho
  310. pointbiserialr
  311. kendalltau
  312. chatterjeexi
  313. weightedtau
  314. somersd
  315. siegelslopes
  316. theilslopes
  317. page_trend_test
  318. multiscale_graphcorr
  319. spearmanr
  320. These association tests and are to work with samples in the form of contingency
  321. tables. Supporting functions are available in `scipy.stats.contingency`.
  322. .. autosummary::
  323. :toctree: generated/
  324. chi2_contingency
  325. fisher_exact
  326. barnard_exact
  327. boschloo_exact
  328. Independent Sample Tests
  329. ------------------------
  330. Independent sample tests are typically used to assess whether multiple samples
  331. were independently drawn from the same distribution or different distributions
  332. with a shared property (e.g. equal means).
  333. Some tests are specifically for comparing two samples.
  334. .. autosummary::
  335. :toctree: generated/
  336. ttest_ind_from_stats
  337. poisson_means_test
  338. ttest_ind
  339. mannwhitneyu
  340. bws_test
  341. ranksums
  342. brunnermunzel
  343. mood
  344. ansari
  345. cramervonmises_2samp
  346. epps_singleton_2samp
  347. ks_2samp
  348. kstest
  349. Others are generalized to multiple samples.
  350. .. autosummary::
  351. :toctree: generated/
  352. f_oneway
  353. tukey_hsd
  354. dunnett
  355. kruskal
  356. alexandergovern
  357. fligner
  358. levene
  359. bartlett
  360. median_test
  361. friedmanchisquare
  362. anderson_ksamp
  363. Resampling and Monte Carlo Methods
  364. ----------------------------------
  365. The following functions can reproduce the p-value and confidence interval
  366. results of most of the functions above, and often produce accurate results in a
  367. wider variety of conditions. They can also be used to perform hypothesis tests
  368. and generate confidence intervals for custom statistics. This flexibility comes
  369. at the cost of greater computational requirements and stochastic results.
  370. .. autosummary::
  371. :toctree: generated/
  372. monte_carlo_test
  373. permutation_test
  374. bootstrap
  375. power
  376. Instances of the following object can be passed into some hypothesis test
  377. functions to perform a resampling or Monte Carlo version of the hypothesis
  378. test.
  379. .. autosummary::
  380. :toctree: generated/
  381. MonteCarloMethod
  382. PermutationMethod
  383. BootstrapMethod
  384. Multiple Hypothesis Testing and Meta-Analysis
  385. ---------------------------------------------
  386. These functions are for assessing the results of individual tests as a whole.
  387. Functions for performing specific multiple hypothesis tests (e.g. post hoc
  388. tests) are listed above.
  389. .. autosummary::
  390. :toctree: generated/
  391. combine_pvalues
  392. false_discovery_control
  393. The following functions are related to the tests above but do not belong in the
  394. above categories.
  395. Random Variables
  396. ================
  397. .. autosummary::
  398. :toctree: generated/
  399. make_distribution
  400. Normal
  401. Logistic
  402. Uniform
  403. Binomial
  404. Mixture
  405. order_statistic
  406. truncate
  407. abs
  408. exp
  409. log
  410. Quasi-Monte Carlo
  411. =================
  412. .. toctree::
  413. :maxdepth: 4
  414. stats.qmc
  415. Contingency Tables
  416. ==================
  417. .. toctree::
  418. :maxdepth: 4
  419. stats.contingency
  420. Masked statistics functions
  421. ===========================
  422. .. toctree::
  423. stats.mstats
  424. Other statistical functionality
  425. ===============================
  426. Transformations
  427. ---------------
  428. .. autosummary::
  429. :toctree: generated/
  430. boxcox
  431. boxcox_normmax
  432. boxcox_llf
  433. yeojohnson
  434. yeojohnson_normmax
  435. yeojohnson_llf
  436. obrientransform
  437. sigmaclip
  438. trimboth
  439. trim1
  440. zmap
  441. zscore
  442. gzscore
  443. Statistical distances
  444. ---------------------
  445. .. autosummary::
  446. :toctree: generated/
  447. wasserstein_distance
  448. wasserstein_distance_nd
  449. energy_distance
  450. Sampling
  451. --------
  452. .. toctree::
  453. :maxdepth: 4
  454. stats.sampling
  455. Fitting / Survival Analysis
  456. ---------------------------
  457. .. autosummary::
  458. :toctree: generated/
  459. fit
  460. ecdf
  461. logrank
  462. Directional statistical functions
  463. ---------------------------------
  464. .. autosummary::
  465. :toctree: generated/
  466. directional_stats
  467. circmean
  468. circvar
  469. circstd
  470. Sensitivity Analysis
  471. --------------------
  472. .. autosummary::
  473. :toctree: generated/
  474. sobol_indices
  475. Plot-tests
  476. ----------
  477. .. autosummary::
  478. :toctree: generated/
  479. ppcc_max
  480. ppcc_plot
  481. probplot
  482. boxcox_normplot
  483. yeojohnson_normplot
  484. Univariate and multivariate kernel density estimation
  485. -----------------------------------------------------
  486. .. autosummary::
  487. :toctree: generated/
  488. gaussian_kde
  489. Warnings / Errors used in :mod:`scipy.stats`
  490. --------------------------------------------
  491. .. autosummary::
  492. :toctree: generated/
  493. DegenerateDataWarning
  494. ConstantInputWarning
  495. NearConstantInputWarning
  496. FitError
  497. Result classes used in :mod:`scipy.stats`
  498. -----------------------------------------
  499. .. warning::
  500. These classes are private, but they are included here because instances
  501. of them are returned by other statistical functions. User import and
  502. instantiation is not supported.
  503. .. toctree::
  504. :maxdepth: 2
  505. stats._result_classes
  506. """ # noqa: E501
  507. from ._warnings_errors import (ConstantInputWarning, NearConstantInputWarning,
  508. DegenerateDataWarning, FitError)
  509. from ._stats_py import *
  510. from ._variation import variation
  511. from .distributions import *
  512. from ._morestats import *
  513. from ._multicomp import *
  514. from ._binomtest import binomtest
  515. from ._binned_statistic import *
  516. from ._kde import gaussian_kde
  517. from . import mstats
  518. from . import qmc
  519. from ._multivariate import *
  520. from . import contingency
  521. from .contingency import chi2_contingency
  522. from ._censored_data import CensoredData
  523. from ._resampling import (bootstrap, monte_carlo_test, permutation_test, power,
  524. MonteCarloMethod, PermutationMethod, BootstrapMethod)
  525. from ._entropy import *
  526. from ._hypotests import *
  527. from ._page_trend_test import page_trend_test
  528. from ._mannwhitneyu import mannwhitneyu
  529. from ._bws_test import bws_test
  530. from ._fit import fit, goodness_of_fit
  531. from ._covariance import Covariance
  532. from ._sensitivity_analysis import *
  533. from ._survival import *
  534. from ._distribution_infrastructure import (
  535. make_distribution, Mixture, order_statistic, truncate, exp, log, abs
  536. )
  537. from ._new_distributions import Normal, Logistic, Uniform, Binomial
  538. from ._mgc import multiscale_graphcorr
  539. from ._correlation import chatterjeexi, spearmanrho
  540. from ._quantile import quantile
  541. # Deprecated namespaces, to be removed in v2.0.0
  542. from . import (
  543. biasedurn, kde, morestats, mstats_basic, mstats_extras, mvn, stats
  544. )
  545. __all__ = [s for s in dir() if not s.startswith("_")] # Remove dunders.
  546. from scipy._lib._testutils import PytestTester
  547. test = PytestTester(__name__)
  548. del PytestTester