test_kolmogorov.py 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491
  1. import itertools
  2. import numpy as np
  3. from numpy.testing import assert_
  4. from scipy.special._testutils import FuncData
  5. from scipy.special import kolmogorov, kolmogi, smirnov, smirnovi
  6. from scipy.special._ufuncs import (_kolmogc, _kolmogci, _kolmogp,
  7. _smirnovc, _smirnovci, _smirnovp)
  8. _rtol = 1e-10
  9. class TestSmirnov:
  10. def test_nan(self):
  11. assert_(np.isnan(smirnov(1, np.nan)))
  12. def test_basic(self):
  13. dataset = [(1, 0.1, 0.9),
  14. (1, 0.875, 0.125),
  15. (2, 0.875, 0.125 * 0.125),
  16. (3, 0.875, 0.125 * 0.125 * 0.125)]
  17. dataset = np.asarray(dataset)
  18. FuncData(
  19. smirnov, dataset, (0, 1), 2, rtol=_rtol
  20. ).check(dtypes=[int, float, float])
  21. dataset[:, -1] = 1 - dataset[:, -1]
  22. FuncData(
  23. _smirnovc, dataset, (0, 1), 2, rtol=_rtol
  24. ).check(dtypes=[int, float, float])
  25. def test_x_equals_0(self):
  26. dataset = [(n, 0, 1) for n in itertools.chain(range(2, 20), range(1010, 1020))]
  27. dataset = np.asarray(dataset)
  28. FuncData(
  29. smirnov, dataset, (0, 1), 2, rtol=_rtol
  30. ).check(dtypes=[int, float, float])
  31. dataset[:, -1] = 1 - dataset[:, -1]
  32. FuncData(
  33. _smirnovc, dataset, (0, 1), 2, rtol=_rtol
  34. ).check(dtypes=[int, float, float])
  35. def test_x_equals_1(self):
  36. dataset = [(n, 1, 0) for n in itertools.chain(range(2, 20), range(1010, 1020))]
  37. dataset = np.asarray(dataset)
  38. FuncData(
  39. smirnov, dataset, (0, 1), 2, rtol=_rtol
  40. ).check(dtypes=[int, float, float])
  41. dataset[:, -1] = 1 - dataset[:, -1]
  42. FuncData(
  43. _smirnovc, dataset, (0, 1), 2, rtol=_rtol
  44. ).check(dtypes=[int, float, float])
  45. def test_x_equals_0point5(self):
  46. dataset = [(1, 0.5, 0.5),
  47. (2, 0.5, 0.25),
  48. (3, 0.5, 0.166666666667),
  49. (4, 0.5, 0.09375),
  50. (5, 0.5, 0.056),
  51. (6, 0.5, 0.0327932098765),
  52. (7, 0.5, 0.0191958707681),
  53. (8, 0.5, 0.0112953186035),
  54. (9, 0.5, 0.00661933257355),
  55. (10, 0.5, 0.003888705)]
  56. dataset = np.asarray(dataset)
  57. FuncData(
  58. smirnov, dataset, (0, 1), 2, rtol=_rtol
  59. ).check(dtypes=[int, float, float])
  60. dataset[:, -1] = 1 - dataset[:, -1]
  61. FuncData(
  62. _smirnovc, dataset, (0, 1), 2, rtol=_rtol
  63. ).check(dtypes=[int, float, float])
  64. def test_n_equals_1(self):
  65. x = np.linspace(0, 1, 101, endpoint=True)
  66. dataset = np.column_stack([[1]*len(x), x, 1-x])
  67. FuncData(
  68. smirnov, dataset, (0, 1), 2, rtol=_rtol
  69. ).check(dtypes=[int, float, float])
  70. dataset[:, -1] = 1 - dataset[:, -1]
  71. FuncData(
  72. _smirnovc, dataset, (0, 1), 2, rtol=_rtol
  73. ).check(dtypes=[int, float, float])
  74. def test_n_equals_2(self):
  75. x = np.linspace(0.5, 1, 101, endpoint=True)
  76. p = np.power(1-x, 2)
  77. n = np.array([2] * len(x))
  78. dataset = np.column_stack([n, x, p])
  79. FuncData(
  80. smirnov, dataset, (0, 1), 2, rtol=_rtol
  81. ).check(dtypes=[int, float, float])
  82. dataset[:, -1] = 1 - dataset[:, -1]
  83. FuncData(
  84. _smirnovc, dataset, (0, 1), 2, rtol=_rtol
  85. ).check(dtypes=[int, float, float])
  86. def test_n_equals_3(self):
  87. x = np.linspace(0.7, 1, 31, endpoint=True)
  88. p = np.power(1-x, 3)
  89. n = np.array([3] * len(x))
  90. dataset = np.column_stack([n, x, p])
  91. FuncData(
  92. smirnov, dataset, (0, 1), 2, rtol=_rtol
  93. ).check(dtypes=[int, float, float])
  94. dataset[:, -1] = 1 - dataset[:, -1]
  95. FuncData(
  96. _smirnovc, dataset, (0, 1), 2, rtol=_rtol
  97. ).check(dtypes=[int, float, float])
  98. def test_n_large(self):
  99. # test for large values of n
  100. # Probabilities should go down as n goes up
  101. x = 0.4
  102. pvals = np.array([smirnov(n, x) for n in range(400, 1100, 20)])
  103. dfs = np.diff(pvals)
  104. assert_(np.all(dfs <= 0), msg=f'Not all diffs negative {dfs}')
  105. class TestSmirnovi:
  106. def test_nan(self):
  107. assert_(np.isnan(smirnovi(1, np.nan)))
  108. def test_basic(self):
  109. dataset = [(1, 0.4, 0.6),
  110. (1, 0.6, 0.4),
  111. (1, 0.99, 0.01),
  112. (1, 0.01, 0.99),
  113. (2, 0.125 * 0.125, 0.875),
  114. (3, 0.125 * 0.125 * 0.125, 0.875),
  115. (10, 1.0 / 16 ** 10, 1 - 1.0 / 16)]
  116. dataset = np.asarray(dataset)
  117. FuncData(
  118. smirnovi, dataset, (0, 1), 2, rtol=_rtol
  119. ).check(dtypes=[int, float, float])
  120. dataset[:, 1] = 1 - dataset[:, 1]
  121. FuncData(
  122. _smirnovci, dataset, (0, 1), 2, rtol=_rtol
  123. ).check(dtypes=[int, float, float])
  124. def test_x_equals_0(self):
  125. dataset = [(n, 0, 1) for n in itertools.chain(range(2, 20), range(1010, 1020))]
  126. dataset = np.asarray(dataset)
  127. FuncData(
  128. smirnovi, dataset, (0, 1), 2, rtol=_rtol
  129. ).check(dtypes=[int, float, float])
  130. dataset[:, 1] = 1 - dataset[:, 1]
  131. FuncData(
  132. _smirnovci, dataset, (0, 1), 2, rtol=_rtol
  133. ).check(dtypes=[int, float, float])
  134. def test_x_equals_1(self):
  135. dataset = [(n, 1, 0) for n in itertools.chain(range(2, 20), range(1010, 1020))]
  136. dataset = np.asarray(dataset)
  137. FuncData(
  138. smirnovi, dataset, (0, 1), 2, rtol=_rtol
  139. ).check(dtypes=[int, float, float])
  140. dataset[:, 1] = 1 - dataset[:, 1]
  141. FuncData(
  142. _smirnovci, dataset, (0, 1), 2, rtol=_rtol
  143. ).check(dtypes=[int, float, float])
  144. def test_n_equals_1(self):
  145. pp = np.linspace(0, 1, 101, endpoint=True)
  146. # dataset = np.array([(1, p, 1-p) for p in pp])
  147. dataset = np.column_stack([[1]*len(pp), pp, 1-pp])
  148. FuncData(
  149. smirnovi, dataset, (0, 1), 2, rtol=_rtol
  150. ).check(dtypes=[int, float, float])
  151. dataset[:, 1] = 1 - dataset[:, 1]
  152. FuncData(
  153. _smirnovci, dataset, (0, 1), 2, rtol=_rtol
  154. ).check(dtypes=[int, float, float])
  155. def test_n_equals_2(self):
  156. x = np.linspace(0.5, 1, 101, endpoint=True)
  157. p = np.power(1-x, 2)
  158. n = np.array([2] * len(x))
  159. dataset = np.column_stack([n, p, x])
  160. FuncData(
  161. smirnovi, dataset, (0, 1), 2, rtol=_rtol
  162. ).check(dtypes=[int, float, float])
  163. dataset[:, 1] = 1 - dataset[:, 1]
  164. FuncData(
  165. _smirnovci, dataset, (0, 1), 2, rtol=_rtol
  166. ).check(dtypes=[int, float, float])
  167. def test_n_equals_3(self):
  168. x = np.linspace(0.7, 1, 31, endpoint=True)
  169. p = np.power(1-x, 3)
  170. n = np.array([3] * len(x))
  171. dataset = np.column_stack([n, p, x])
  172. FuncData(
  173. smirnovi, dataset, (0, 1), 2, rtol=_rtol
  174. ).check(dtypes=[int, float, float])
  175. dataset[:, 1] = 1 - dataset[:, 1]
  176. FuncData(
  177. _smirnovci, dataset, (0, 1), 2, rtol=_rtol
  178. ).check(dtypes=[int, float, float])
  179. def test_round_trip(self):
  180. def _sm_smi(n, p):
  181. return smirnov(n, smirnovi(n, p))
  182. def _smc_smci(n, p):
  183. return _smirnovc(n, _smirnovci(n, p))
  184. dataset = [(1, 0.4, 0.4),
  185. (1, 0.6, 0.6),
  186. (2, 0.875, 0.875),
  187. (3, 0.875, 0.875),
  188. (3, 0.125, 0.125),
  189. (10, 0.999, 0.999),
  190. (10, 0.0001, 0.0001)]
  191. dataset = np.asarray(dataset)
  192. FuncData(
  193. _sm_smi, dataset, (0, 1), 2, rtol=_rtol
  194. ).check(dtypes=[int, float, float])
  195. FuncData(
  196. _smc_smci, dataset, (0, 1), 2, rtol=_rtol
  197. ).check(dtypes=[int, float, float])
  198. def test_x_equals_0point5(self):
  199. dataset = [(1, 0.5, 0.5),
  200. (2, 0.5, 0.366025403784),
  201. (2, 0.25, 0.5),
  202. (3, 0.5, 0.297156508177),
  203. (4, 0.5, 0.255520481121),
  204. (5, 0.5, 0.234559536069),
  205. (6, 0.5, 0.21715965898),
  206. (7, 0.5, 0.202722580034),
  207. (8, 0.5, 0.190621765256),
  208. (9, 0.5, 0.180363501362),
  209. (10, 0.5, 0.17157867006)]
  210. dataset = np.asarray(dataset)
  211. FuncData(
  212. smirnovi, dataset, (0, 1), 2, rtol=_rtol
  213. ).check(dtypes=[int, float, float])
  214. dataset[:, 1] = 1 - dataset[:, 1]
  215. FuncData(
  216. _smirnovci, dataset, (0, 1), 2, rtol=_rtol
  217. ).check(dtypes=[int, float, float])
  218. class TestSmirnovp:
  219. def test_nan(self):
  220. assert_(np.isnan(_smirnovp(1, np.nan)))
  221. def test_basic(self):
  222. # Check derivative at endpoints
  223. n1_10 = np.arange(1, 10)
  224. dataset0 = np.column_stack([n1_10,
  225. np.full_like(n1_10, 0),
  226. np.full_like(n1_10, -1)])
  227. FuncData(
  228. _smirnovp, dataset0, (0, 1), 2, rtol=_rtol
  229. ).check(dtypes=[int, float, float])
  230. n2_10 = np.arange(2, 10)
  231. dataset1 = np.column_stack([n2_10,
  232. np.full_like(n2_10, 1.0),
  233. np.full_like(n2_10, 0)])
  234. FuncData(
  235. _smirnovp, dataset1, (0, 1), 2, rtol=_rtol
  236. ).check(dtypes=[int, float, float])
  237. def test_oneminusoneovern(self):
  238. # Check derivative at x=1-1/n
  239. n = np.arange(1, 20)
  240. x = 1.0/n
  241. xm1 = 1-1.0/n
  242. pp1 = -n * x**(n-1)
  243. pp1 -= (1-np.sign(n-2)**2) * 0.5 # n=2, x=0.5, 1-1/n = 0.5, need to adjust
  244. dataset1 = np.column_stack([n, xm1, pp1])
  245. FuncData(
  246. _smirnovp, dataset1, (0, 1), 2, rtol=_rtol
  247. ).check(dtypes=[int, float, float])
  248. def test_oneovertwon(self):
  249. # Check derivative at x=1/2n (Discontinuous at x=1/n, so check at x=1/2n)
  250. n = np.arange(1, 20)
  251. x = 1.0/2/n
  252. pp = -(n*x+1) * (1+x)**(n-2)
  253. dataset0 = np.column_stack([n, x, pp])
  254. FuncData(
  255. _smirnovp, dataset0, (0, 1), 2, rtol=_rtol
  256. ).check(dtypes=[int, float, float])
  257. def test_oneovern(self):
  258. # Check derivative at x=1/n
  259. # (Discontinuous at x=1/n, hard to tell if x==1/n, only use n=power of 2)
  260. n = 2**np.arange(1, 10)
  261. x = 1.0/n
  262. pp = -(n*x+1) * (1+x)**(n-2) + 0.5
  263. dataset0 = np.column_stack([n, x, pp])
  264. FuncData(
  265. _smirnovp, dataset0, (0, 1), 2, rtol=_rtol
  266. ).check(dtypes=[int, float, float])
  267. def test_oneovernclose(self):
  268. # Check derivative at x=1/n
  269. # (Discontinuous at x=1/n, test on either side: x=1/n +/- 2epsilon)
  270. n = np.arange(3, 20)
  271. x = 1.0/n - 2*np.finfo(float).eps
  272. pp = -(n*x+1) * (1+x)**(n-2)
  273. dataset0 = np.column_stack([n, x, pp])
  274. FuncData(
  275. _smirnovp, dataset0, (0, 1), 2, rtol=_rtol
  276. ).check(dtypes=[int, float, float])
  277. x = 1.0/n + 2*np.finfo(float).eps
  278. pp = -(n*x+1) * (1+x)**(n-2) + 1
  279. dataset1 = np.column_stack([n, x, pp])
  280. FuncData(
  281. _smirnovp, dataset1, (0, 1), 2, rtol=_rtol
  282. ).check(dtypes=[int, float, float])
  283. class TestKolmogorov:
  284. def test_nan(self):
  285. assert_(np.isnan(kolmogorov(np.nan)))
  286. def test_basic(self):
  287. dataset = [(0, 1.0),
  288. (0.5, 0.96394524366487511),
  289. (0.8275735551899077, 0.5000000000000000),
  290. (1, 0.26999967167735456),
  291. (2, 0.00067092525577969533)]
  292. dataset = np.asarray(dataset)
  293. FuncData(kolmogorov, dataset, (0,), 1, rtol=_rtol).check()
  294. def test_linspace(self):
  295. x = np.linspace(0, 2.0, 21)
  296. dataset = [1.0000000000000000, 1.0000000000000000, 0.9999999999994950,
  297. 0.9999906941986655, 0.9971923267772983, 0.9639452436648751,
  298. 0.8642827790506042, 0.7112351950296890, 0.5441424115741981,
  299. 0.3927307079406543, 0.2699996716773546, 0.1777181926064012,
  300. 0.1122496666707249, 0.0680922218447664, 0.0396818795381144,
  301. 0.0222179626165251, 0.0119520432391966, 0.0061774306344441,
  302. 0.0030676213475797, 0.0014636048371873, 0.0006709252557797]
  303. dataset_c = [0.0000000000000000, 6.609305242245699e-53, 5.050407338670114e-13,
  304. 9.305801334566668e-06, 0.0028076732227017, 0.0360547563351249,
  305. 0.1357172209493958, 0.2887648049703110, 0.4558575884258019,
  306. 0.6072692920593457, 0.7300003283226455, 0.8222818073935988,
  307. 0.8877503333292751, 0.9319077781552336, 0.9603181204618857,
  308. 0.9777820373834749, 0.9880479567608034, 0.9938225693655559,
  309. 0.9969323786524203, 0.9985363951628127, 0.9993290747442203]
  310. dataset = np.column_stack([x, dataset])
  311. FuncData(kolmogorov, dataset, (0,), 1, rtol=_rtol).check()
  312. dataset_c = np.column_stack([x, dataset_c])
  313. FuncData(_kolmogc, dataset_c, (0,), 1, rtol=_rtol).check()
  314. def test_linspacei(self):
  315. p = np.linspace(0, 1.0, 21, endpoint=True)
  316. dataset = [np.inf, 1.3580986393225507, 1.2238478702170823,
  317. 1.1379465424937751, 1.0727491749396481, 1.0191847202536859,
  318. 0.9730633753323726, 0.9320695842357622, 0.8947644549851197,
  319. 0.8601710725555463, 0.8275735551899077, 0.7964065373291559,
  320. 0.7661855555617682, 0.7364542888171910, 0.7067326523068980,
  321. 0.6764476915028201, 0.6448126061663567, 0.6105590999244391,
  322. 0.5711732651063401, 0.5196103791686224, 0.0000000000000000]
  323. dataset_c = [0.0000000000000000, 0.5196103791686225, 0.5711732651063401,
  324. 0.6105590999244391, 0.6448126061663567, 0.6764476915028201,
  325. 0.7067326523068980, 0.7364542888171910, 0.7661855555617682,
  326. 0.7964065373291559, 0.8275735551899077, 0.8601710725555463,
  327. 0.8947644549851196, 0.9320695842357622, 0.9730633753323727,
  328. 1.0191847202536859, 1.0727491749396481, 1.1379465424937754,
  329. 1.2238478702170825, 1.3580986393225509, np.inf]
  330. dataset = np.column_stack([p[1:], dataset[1:]])
  331. FuncData(kolmogi, dataset, (0,), 1, rtol=_rtol).check()
  332. dataset_c = np.column_stack([p[:-1], dataset_c[:-1]])
  333. FuncData(_kolmogci, dataset_c, (0,), 1, rtol=_rtol).check()
  334. def test_smallx(self):
  335. epsilon = 0.1 ** np.arange(1, 14)
  336. x = np.array([0.571173265106, 0.441027698518, 0.374219690278, 0.331392659217,
  337. 0.300820537459, 0.277539353999, 0.259023494805, 0.243829561254,
  338. 0.231063086389, 0.220135543236, 0.210641372041, 0.202290283658,
  339. 0.19487060742])
  340. dataset = np.column_stack([x, 1-epsilon])
  341. FuncData(kolmogorov, dataset, (0,), 1, rtol=_rtol).check()
  342. def test_round_trip(self):
  343. def _ki_k(_x):
  344. return kolmogi(kolmogorov(_x))
  345. def _kci_kc(_x):
  346. return _kolmogci(_kolmogc(_x))
  347. x = np.linspace(0.0, 2.0, 21, endpoint=True)
  348. # Exclude 0.1, 0.2. 0.2 almost makes succeeds, but 0.1 has no chance.
  349. x02 = x[(x == 0) | (x > 0.21)]
  350. dataset02 = np.column_stack([x02, x02])
  351. FuncData(_ki_k, dataset02, (0,), 1, rtol=_rtol).check()
  352. dataset = np.column_stack([x, x])
  353. FuncData(_kci_kc, dataset, (0,), 1, rtol=_rtol).check()
  354. class TestKolmogi:
  355. def test_nan(self):
  356. assert_(np.isnan(kolmogi(np.nan)))
  357. def test_basic(self):
  358. dataset = [(1.0, 0),
  359. (0.96394524366487511, 0.5),
  360. (0.9, 0.571173265106),
  361. (0.5000000000000000, 0.8275735551899077),
  362. (0.26999967167735456, 1),
  363. (0.00067092525577969533, 2)]
  364. dataset = np.asarray(dataset)
  365. FuncData(kolmogi, dataset, (0,), 1, rtol=_rtol).check()
  366. def test_smallpcdf(self):
  367. epsilon = 0.5 ** np.arange(1, 55, 3)
  368. # kolmogi(1-p) == _kolmogci(p) if 1-(1-p) == p, but not necessarily otherwise
  369. # Use epsilon s.t. 1-(1-epsilon)) == epsilon,
  370. # so can use same x-array for both results
  371. x = np.array([0.8275735551899077, 0.5345255069097583, 0.4320114038786941,
  372. 0.3736868442620478, 0.3345161714909591, 0.3057833329315859,
  373. 0.2835052890528936, 0.2655578150208676, 0.2506869966107999,
  374. 0.2380971058736669, 0.2272549289962079, 0.2177876361600040,
  375. 0.2094254686862041, 0.2019676748836232, 0.1952612948137504,
  376. 0.1891874239646641, 0.1836520225050326, 0.1785795904846466])
  377. dataset = np.column_stack([1-epsilon, x])
  378. FuncData(kolmogi, dataset, (0,), 1, rtol=_rtol).check()
  379. dataset = np.column_stack([epsilon, x])
  380. FuncData(_kolmogci, dataset, (0,), 1, rtol=_rtol).check()
  381. def test_smallpsf(self):
  382. epsilon = 0.5 ** np.arange(1, 55, 3)
  383. # kolmogi(p) == _kolmogci(1-p) if 1-(1-p) == p, but not necessarily otherwise
  384. # Use epsilon s.t. 1-(1-epsilon)) == epsilon,
  385. # so can use same x-array for both results
  386. x = np.array([0.8275735551899077, 1.3163786275161036, 1.6651092133663343,
  387. 1.9525136345289607, 2.2027324540033235, 2.4272929437460848,
  388. 2.6327688477341593, 2.8233300509220260, 3.0018183401530627,
  389. 3.1702735084088891, 3.3302184446307912, 3.4828258153113318,
  390. 3.6290214150152051, 3.7695513262825959, 3.9050272690877326,
  391. 4.0359582187082550, 4.1627730557884890, 4.2858371743264527])
  392. dataset = np.column_stack([epsilon, x])
  393. FuncData(kolmogi, dataset, (0,), 1, rtol=_rtol).check()
  394. dataset = np.column_stack([1-epsilon, x])
  395. FuncData(_kolmogci, dataset, (0,), 1, rtol=_rtol).check()
  396. def test_round_trip(self):
  397. def _k_ki(_p):
  398. return kolmogorov(kolmogi(_p))
  399. p = np.linspace(0.1, 1.0, 10, endpoint=True)
  400. dataset = np.column_stack([p, p])
  401. FuncData(_k_ki, dataset, (0,), 1, rtol=_rtol).check()
  402. class TestKolmogp:
  403. def test_nan(self):
  404. assert_(np.isnan(_kolmogp(np.nan)))
  405. def test_basic(self):
  406. dataset = [(0.000000, -0.0),
  407. (0.200000, -1.532420541338916e-10),
  408. (0.400000, -0.1012254419260496),
  409. (0.600000, -1.324123244249925),
  410. (0.800000, -1.627024345636592),
  411. (1.000000, -1.071948558356941),
  412. (1.200000, -0.538512430720529),
  413. (1.400000, -0.2222133182429472),
  414. (1.600000, -0.07649302775520538),
  415. (1.800000, -0.02208687346347873),
  416. (2.000000, -0.005367402045629683)]
  417. dataset = np.asarray(dataset)
  418. FuncData(_kolmogp, dataset, (0,), 1, rtol=_rtol).check()