test_survival.py 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466
  1. import pytest
  2. import numpy as np
  3. from numpy.testing import assert_equal, assert_allclose
  4. from scipy import stats
  5. from scipy.stats import _survival
  6. def _kaplan_meier_reference(times, censored):
  7. # This is a very straightforward implementation of the Kaplan-Meier
  8. # estimator that does almost everything differently from the implementation
  9. # in stats.ecdf.
  10. # Begin by sorting the raw data. Note that the order of death and loss
  11. # at a given time matters: death happens first. See [2] page 461:
  12. # "These conventions may be paraphrased by saying that deaths recorded as
  13. # of an age t are treated as if they occurred slightly before t, and losses
  14. # recorded as of an age t are treated as occurring slightly after t."
  15. # We implement this by sorting the data first by time, then by `censored`,
  16. # (which is 0 when there is a death and 1 when there is only a loss).
  17. dtype = [('time', float), ('censored', int)]
  18. data = np.array([(t, d) for t, d in zip(times, censored)], dtype=dtype)
  19. data = np.sort(data, order=('time', 'censored'))
  20. times = data['time']
  21. died = np.logical_not(data['censored'])
  22. m = times.size
  23. n = np.arange(m, 0, -1) # number at risk
  24. sf = np.cumprod((n - died) / n)
  25. # Find the indices of the *last* occurrence of unique times. The
  26. # corresponding entries of `times` and `sf` are what we want.
  27. _, indices = np.unique(times[::-1], return_index=True)
  28. ref_times = times[-indices - 1]
  29. ref_sf = sf[-indices - 1]
  30. return ref_times, ref_sf
  31. class TestSurvival:
  32. @staticmethod
  33. def get_random_sample(rng, n_unique):
  34. # generate random sample
  35. unique_times = rng.random(n_unique)
  36. # convert to `np.int32` to resolve `np.repeat` failure in 32-bit CI
  37. repeats = rng.integers(1, 4, n_unique).astype(np.int32)
  38. times = rng.permuted(np.repeat(unique_times, repeats))
  39. censored = rng.random(size=times.size) > rng.random()
  40. sample = stats.CensoredData.right_censored(times, censored)
  41. return sample, times, censored
  42. def test_input_validation(self):
  43. message = '`sample` must be a one-dimensional sequence.'
  44. with pytest.raises(ValueError, match=message):
  45. stats.ecdf([[1]])
  46. with pytest.raises(ValueError, match=message):
  47. stats.ecdf(1)
  48. message = '`sample` must not contain nan'
  49. with pytest.raises(ValueError, match=message):
  50. stats.ecdf([np.nan])
  51. message = 'Currently, only uncensored and right-censored data...'
  52. with pytest.raises(NotImplementedError, match=message):
  53. stats.ecdf(stats.CensoredData.left_censored([1], censored=[True]))
  54. message = 'method` must be one of...'
  55. res = stats.ecdf([1, 2, 3])
  56. with pytest.raises(ValueError, match=message):
  57. res.cdf.confidence_interval(method='ekki-ekki')
  58. with pytest.raises(ValueError, match=message):
  59. res.sf.confidence_interval(method='shrubbery')
  60. message = 'confidence_level` must be a scalar between 0 and 1'
  61. with pytest.raises(ValueError, match=message):
  62. res.cdf.confidence_interval(-1)
  63. with pytest.raises(ValueError, match=message):
  64. res.sf.confidence_interval([0.5, 0.6])
  65. message = 'The confidence interval is undefined at some observations.'
  66. with pytest.warns(RuntimeWarning, match=message):
  67. ci = res.cdf.confidence_interval()
  68. message = 'Confidence interval bounds do not implement...'
  69. with pytest.raises(NotImplementedError, match=message):
  70. ci.low.confidence_interval()
  71. with pytest.raises(NotImplementedError, match=message):
  72. ci.high.confidence_interval()
  73. def test_edge_cases(self):
  74. res = stats.ecdf([])
  75. assert_equal(res.cdf.quantiles, [])
  76. assert_equal(res.cdf.probabilities, [])
  77. res = stats.ecdf([1])
  78. assert_equal(res.cdf.quantiles, [1])
  79. assert_equal(res.cdf.probabilities, [1])
  80. def test_unique(self):
  81. # Example with unique observations; `stats.ecdf` ref. [1] page 80
  82. sample = [6.23, 5.58, 7.06, 6.42, 5.20]
  83. res = stats.ecdf(sample)
  84. ref_x = np.sort(np.unique(sample))
  85. ref_cdf = np.arange(1, 6) / 5
  86. ref_sf = 1 - ref_cdf
  87. assert_equal(res.cdf.quantiles, ref_x)
  88. assert_equal(res.cdf.probabilities, ref_cdf)
  89. assert_equal(res.sf.quantiles, ref_x)
  90. assert_equal(res.sf.probabilities, ref_sf)
  91. def test_nonunique(self):
  92. # Example with non-unique observations; `stats.ecdf` ref. [1] page 82
  93. sample = [0, 2, 1, 2, 3, 4]
  94. res = stats.ecdf(sample)
  95. ref_x = np.sort(np.unique(sample))
  96. ref_cdf = np.array([1/6, 2/6, 4/6, 5/6, 1])
  97. ref_sf = 1 - ref_cdf
  98. assert_equal(res.cdf.quantiles, ref_x)
  99. assert_equal(res.cdf.probabilities, ref_cdf)
  100. assert_equal(res.sf.quantiles, ref_x)
  101. assert_equal(res.sf.probabilities, ref_sf)
  102. def test_evaluate_methods(self):
  103. # Test CDF and SF `evaluate` methods
  104. rng = np.random.default_rng(1162729143302572461)
  105. sample, _, _ = self.get_random_sample(rng, 15)
  106. res = stats.ecdf(sample)
  107. x = res.cdf.quantiles
  108. xr = x + np.diff(x, append=x[-1]+1)/2 # right shifted points
  109. assert_equal(res.cdf.evaluate(x), res.cdf.probabilities)
  110. assert_equal(res.cdf.evaluate(xr), res.cdf.probabilities)
  111. assert_equal(res.cdf.evaluate(x[0]-1), 0) # CDF starts at 0
  112. assert_equal(res.cdf.evaluate([-np.inf, np.inf]), [0, 1])
  113. assert_equal(res.sf.evaluate(x), res.sf.probabilities)
  114. assert_equal(res.sf.evaluate(xr), res.sf.probabilities)
  115. assert_equal(res.sf.evaluate(x[0]-1), 1) # SF starts at 1
  116. assert_equal(res.sf.evaluate([-np.inf, np.inf]), [1, 0])
  117. # ref. [1] page 91
  118. t1 = [37, 43, 47, 56, 60, 62, 71, 77, 80, 81] # times
  119. d1 = [0, 0, 1, 1, 0, 0, 0, 1, 1, 1] # 1 means deaths (not censored)
  120. r1 = [1, 1, 0.875, 0.75, 0.75, 0.75, 0.75, 0.5, 0.25, 0] # reference SF
  121. # https://sphweb.bumc.bu.edu/otlt/mph-modules/bs/bs704_survival/BS704_Survival5.html
  122. t2 = [8, 12, 26, 14, 21, 27, 8, 32, 20, 40]
  123. d2 = [1, 1, 1, 1, 1, 1, 0, 0, 0, 0]
  124. r2 = [0.9, 0.788, 0.675, 0.675, 0.54, 0.405, 0.27, 0.27, 0.27]
  125. t3 = [33, 28, 41, 48, 48, 25, 37, 48, 25, 43]
  126. d3 = [1, 1, 1, 0, 0, 0, 0, 0, 0, 0]
  127. r3 = [1, 0.875, 0.75, 0.75, 0.6, 0.6, 0.6]
  128. # https://sphweb.bumc.bu.edu/otlt/mph-modules/bs/bs704_survival/bs704_survival4.html
  129. t4 = [24, 3, 11, 19, 24, 13, 14, 2, 18, 17,
  130. 24, 21, 12, 1, 10, 23, 6, 5, 9, 17]
  131. d4 = [0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1]
  132. r4 = [0.95, 0.95, 0.897, 0.844, 0.844, 0.844, 0.844, 0.844, 0.844,
  133. 0.844, 0.76, 0.676, 0.676, 0.676, 0.676, 0.507, 0.507]
  134. # https://www.real-statistics.com/survival-analysis/kaplan-meier-procedure/confidence-interval-for-the-survival-function/
  135. t5 = [3, 5, 8, 10, 5, 5, 8, 12, 15, 14, 2, 11, 10, 9, 12, 5, 8, 11]
  136. d5 = [1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1]
  137. r5 = [0.944, 0.889, 0.722, 0.542, 0.542, 0.542, 0.361, 0.181, 0.181, 0.181]
  138. @pytest.mark.parametrize("case", [(t1, d1, r1), (t2, d2, r2), (t3, d3, r3),
  139. (t4, d4, r4), (t5, d5, r5)])
  140. def test_right_censored_against_examples(self, case):
  141. # test `ecdf` against other implementations on example problems
  142. times, died, ref = case
  143. sample = stats.CensoredData.right_censored(times, np.logical_not(died))
  144. res = stats.ecdf(sample)
  145. assert_allclose(res.sf.probabilities, ref, atol=1e-3)
  146. assert_equal(res.sf.quantiles, np.sort(np.unique(times)))
  147. # test reference implementation against other implementations
  148. res = _kaplan_meier_reference(times, np.logical_not(died))
  149. assert_equal(res[0], np.sort(np.unique(times)))
  150. assert_allclose(res[1], ref, atol=1e-3)
  151. @pytest.mark.parametrize('seed', [182746786639392128, 737379171436494115,
  152. 576033618403180168, 308115465002673650])
  153. def test_right_censored_against_reference_implementation(self, seed):
  154. # test `ecdf` against reference implementation on random problems
  155. rng = np.random.default_rng(seed)
  156. n_unique = rng.integers(10, 100)
  157. sample, times, censored = self.get_random_sample(rng, n_unique)
  158. res = stats.ecdf(sample)
  159. ref = _kaplan_meier_reference(times, censored)
  160. assert_allclose(res.sf.quantiles, ref[0])
  161. assert_allclose(res.sf.probabilities, ref[1])
  162. # If all observations are uncensored, the KM estimate should match
  163. # the usual estimate for uncensored data
  164. sample = stats.CensoredData(uncensored=times)
  165. res = _survival._ecdf_right_censored(sample) # force Kaplan-Meier
  166. ref = stats.ecdf(times)
  167. assert_equal(res[0], ref.sf.quantiles)
  168. assert_allclose(res[1], ref.cdf.probabilities, rtol=1e-14)
  169. assert_allclose(res[2], ref.sf.probabilities, rtol=1e-14)
  170. def test_right_censored_ci(self):
  171. # test "greenwood" confidence interval against example 4 (URL above).
  172. times, died = self.t4, self.d4
  173. sample = stats.CensoredData.right_censored(times, np.logical_not(died))
  174. res = stats.ecdf(sample)
  175. ref_allowance = [0.096, 0.096, 0.135, 0.162, 0.162, 0.162, 0.162,
  176. 0.162, 0.162, 0.162, 0.214, 0.246, 0.246, 0.246,
  177. 0.246, 0.341, 0.341]
  178. sf_ci = res.sf.confidence_interval()
  179. cdf_ci = res.cdf.confidence_interval()
  180. allowance = res.sf.probabilities - sf_ci.low.probabilities
  181. assert_allclose(allowance, ref_allowance, atol=1e-3)
  182. assert_allclose(sf_ci.low.probabilities,
  183. np.clip(res.sf.probabilities - allowance, 0, 1))
  184. assert_allclose(sf_ci.high.probabilities,
  185. np.clip(res.sf.probabilities + allowance, 0, 1))
  186. assert_allclose(cdf_ci.low.probabilities,
  187. np.clip(res.cdf.probabilities - allowance, 0, 1))
  188. assert_allclose(cdf_ci.high.probabilities,
  189. np.clip(res.cdf.probabilities + allowance, 0, 1))
  190. # test "log-log" confidence interval against Mathematica
  191. # e = {24, 3, 11, 19, 24, 13, 14, 2, 18, 17, 24, 21, 12, 1, 10, 23, 6, 5,
  192. # 9, 17}
  193. # ci = {1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0}
  194. # R = EventData[e, ci]
  195. # S = SurvivalModelFit[R]
  196. # S["PointwiseIntervals", ConfidenceLevel->0.95,
  197. # ConfidenceTransform->"LogLog"]
  198. ref_low = [0.694743, 0.694743, 0.647529, 0.591142, 0.591142, 0.591142,
  199. 0.591142, 0.591142, 0.591142, 0.591142, 0.464605, 0.370359,
  200. 0.370359, 0.370359, 0.370359, 0.160489, 0.160489]
  201. ref_high = [0.992802, 0.992802, 0.973299, 0.947073, 0.947073, 0.947073,
  202. 0.947073, 0.947073, 0.947073, 0.947073, 0.906422, 0.856521,
  203. 0.856521, 0.856521, 0.856521, 0.776724, 0.776724]
  204. sf_ci = res.sf.confidence_interval(method='log-log')
  205. assert_allclose(sf_ci.low.probabilities, ref_low, atol=1e-6)
  206. assert_allclose(sf_ci.high.probabilities, ref_high, atol=1e-6)
  207. def test_right_censored_ci_example_5(self):
  208. # test "exponential greenwood" confidence interval against example 5
  209. times, died = self.t5, self.d5
  210. sample = stats.CensoredData.right_censored(times, np.logical_not(died))
  211. res = stats.ecdf(sample)
  212. lower = np.array([0.66639, 0.624174, 0.456179, 0.287822, 0.287822,
  213. 0.287822, 0.128489, 0.030957, 0.030957, 0.030957])
  214. upper = np.array([0.991983, 0.970995, 0.87378, 0.739467, 0.739467,
  215. 0.739467, 0.603133, 0.430365, 0.430365, 0.430365])
  216. sf_ci = res.sf.confidence_interval(method='log-log')
  217. cdf_ci = res.cdf.confidence_interval(method='log-log')
  218. assert_allclose(sf_ci.low.probabilities, lower, atol=1e-5)
  219. assert_allclose(sf_ci.high.probabilities, upper, atol=1e-5)
  220. assert_allclose(cdf_ci.low.probabilities, 1-upper, atol=1e-5)
  221. assert_allclose(cdf_ci.high.probabilities, 1-lower, atol=1e-5)
  222. # Test against R's `survival` library `survfit` function, 90%CI
  223. # library(survival)
  224. # options(digits=16)
  225. # time = c(3, 5, 8, 10, 5, 5, 8, 12, 15, 14, 2, 11, 10, 9, 12, 5, 8, 11)
  226. # status = c(1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1)
  227. # res = survfit(Surv(time, status)
  228. # ~1, conf.type = "log-log", conf.int = 0.90)
  229. # res$time; res$lower; res$upper
  230. low = [0.74366748406861172, 0.68582332289196246, 0.50596835651480121,
  231. 0.32913131413336727, 0.32913131413336727, 0.32913131413336727,
  232. 0.15986912028781664, 0.04499539918147757, 0.04499539918147757,
  233. 0.04499539918147757]
  234. high = [0.9890291867238429, 0.9638835422144144, 0.8560366823086629,
  235. 0.7130167643978450, 0.7130167643978450, 0.7130167643978450,
  236. 0.5678602982997164, 0.3887616766886558, 0.3887616766886558,
  237. 0.3887616766886558]
  238. sf_ci = res.sf.confidence_interval(method='log-log',
  239. confidence_level=0.9)
  240. assert_allclose(sf_ci.low.probabilities, low)
  241. assert_allclose(sf_ci.high.probabilities, high)
  242. # And with conf.type = "plain"
  243. low = [0.8556383113628162, 0.7670478794850761, 0.5485720663578469,
  244. 0.3441515412527123, 0.3441515412527123, 0.3441515412527123,
  245. 0.1449184105424544, 0., 0., 0.]
  246. high = [1., 1., 0.8958723780865975, 0.7391817920806210,
  247. 0.7391817920806210, 0.7391817920806210, 0.5773038116797676,
  248. 0.3642270254596720, 0.3642270254596720, 0.3642270254596720]
  249. sf_ci = res.sf.confidence_interval(confidence_level=0.9)
  250. assert_allclose(sf_ci.low.probabilities, low)
  251. assert_allclose(sf_ci.high.probabilities, high)
  252. def test_right_censored_ci_nans(self):
  253. # test `ecdf` confidence interval on a problem that results in NaNs
  254. times, died = self.t1, self.d1
  255. sample = stats.CensoredData.right_censored(times, np.logical_not(died))
  256. res = stats.ecdf(sample)
  257. # Reference values generated with Matlab
  258. # format long
  259. # t = [37 43 47 56 60 62 71 77 80 81];
  260. # d = [0 0 1 1 0 0 0 1 1 1];
  261. # censored = ~d1;
  262. # [f, x, flo, fup] = ecdf(t, 'Censoring', censored, 'Alpha', 0.05);
  263. x = [37, 47, 56, 77, 80, 81]
  264. flo = [np.nan, 0, 0, 0.052701464070711, 0.337611126231790, np.nan]
  265. fup = [np.nan, 0.35417230377, 0.5500569798, 0.9472985359, 1.0, np.nan]
  266. i = np.searchsorted(res.cdf.quantiles, x)
  267. message = "The confidence interval is undefined at some observations"
  268. with pytest.warns(RuntimeWarning, match=message):
  269. ci = res.cdf.confidence_interval()
  270. # Matlab gives NaN as the first element of the CIs. Mathematica agrees,
  271. # but R's survfit does not. It makes some sense, but it's not what the
  272. # formula gives, so skip that element.
  273. assert_allclose(ci.low.probabilities[i][1:], flo[1:])
  274. assert_allclose(ci.high.probabilities[i][1:], fup[1:])
  275. # [f, x, flo, fup] = ecdf(t, 'Censoring', censored, 'Function',
  276. # 'survivor', 'Alpha', 0.05);
  277. flo = [np.nan, 0.64582769623, 0.449943020228, 0.05270146407, 0, np.nan]
  278. fup = [np.nan, 1.0, 1.0, 0.947298535929289, 0.662388873768210, np.nan]
  279. i = np.searchsorted(res.cdf.quantiles, x)
  280. with pytest.warns(RuntimeWarning, match=message):
  281. ci = res.sf.confidence_interval()
  282. assert_allclose(ci.low.probabilities[i][1:], flo[1:])
  283. assert_allclose(ci.high.probabilities[i][1:], fup[1:])
  284. # With the same data, R's `survival` library `survfit` function
  285. # doesn't produce the leading NaN
  286. # library(survival)
  287. # options(digits=16)
  288. # time = c(37, 43, 47, 56, 60, 62, 71, 77, 80, 81)
  289. # status = c(0, 0, 1, 1, 0, 0, 0, 1, 1, 1)
  290. # res = survfit(Surv(time, status)
  291. # ~1, conf.type = "plain", conf.int = 0.95)
  292. # res$time
  293. # res$lower
  294. # res$upper
  295. low = [1., 1., 0.64582769623233816, 0.44994302022779326,
  296. 0.44994302022779326, 0.44994302022779326, 0.44994302022779326,
  297. 0.05270146407071086, 0., np.nan]
  298. high = [1., 1., 1., 1., 1., 1., 1., 0.9472985359292891,
  299. 0.6623888737682101, np.nan]
  300. assert_allclose(ci.low.probabilities, low)
  301. assert_allclose(ci.high.probabilities, high)
  302. # It does with conf.type="log-log", as do we
  303. with pytest.warns(RuntimeWarning, match=message):
  304. ci = res.sf.confidence_interval(method='log-log')
  305. low = [np.nan, np.nan, 0.38700001403202522, 0.31480711370551911,
  306. 0.31480711370551911, 0.31480711370551911, 0.31480711370551911,
  307. 0.08048821148507734, 0.01049958986680601, np.nan]
  308. high = [np.nan, np.nan, 0.9813929658789660, 0.9308983170906275,
  309. 0.9308983170906275, 0.9308983170906275, 0.9308983170906275,
  310. 0.8263946341076415, 0.6558775085110887, np.nan]
  311. assert_allclose(ci.low.probabilities, low)
  312. assert_allclose(ci.high.probabilities, high)
  313. def test_right_censored_against_uncensored(self):
  314. rng = np.random.default_rng(7463952748044886637)
  315. sample = rng.integers(10, 100, size=1000)
  316. censored = np.zeros_like(sample)
  317. censored[np.argmax(sample)] = True
  318. res = stats.ecdf(sample)
  319. ref = stats.ecdf(stats.CensoredData.right_censored(sample, censored))
  320. assert_equal(res.sf.quantiles, ref.sf.quantiles)
  321. assert_equal(res.sf._n, ref.sf._n)
  322. assert_equal(res.sf._d[:-1], ref.sf._d[:-1]) # difference @ [-1]
  323. assert_allclose(res.sf._sf[:-1], ref.sf._sf[:-1], rtol=1e-14)
  324. def test_plot_iv(self):
  325. rng = np.random.default_rng(1769658657308472721)
  326. n_unique = rng.integers(10, 100)
  327. sample, _, _ = self.get_random_sample(rng, n_unique)
  328. res = stats.ecdf(sample)
  329. try:
  330. import matplotlib.pyplot as plt # noqa: F401
  331. res.sf.plot() # no other errors occur
  332. except (ModuleNotFoundError, ImportError):
  333. message = r"matplotlib must be installed to use method `plot`."
  334. with pytest.raises(ModuleNotFoundError, match=message):
  335. res.sf.plot()
  336. class TestLogRank:
  337. @pytest.mark.parametrize(
  338. "x, y, statistic, pvalue",
  339. # Results validate with R
  340. # library(survival)
  341. # options(digits=16)
  342. #
  343. # futime_1 <- c(8, 12, 26, 14, 21, 27, 8, 32, 20, 40)
  344. # fustat_1 <- c(1, 1, 1, 1, 1, 1, 0, 0, 0, 0)
  345. # rx_1 <- c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
  346. #
  347. # futime_2 <- c(33, 28, 41, 48, 48, 25, 37, 48, 25, 43)
  348. # fustat_2 <- c(1, 1, 1, 0, 0, 0, 0, 0, 0, 0)
  349. # rx_2 <- c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1)
  350. #
  351. # futime <- c(futime_1, futime_2)
  352. # fustat <- c(fustat_1, fustat_2)
  353. # rx <- c(rx_1, rx_2)
  354. #
  355. # survdiff(formula = Surv(futime, fustat) ~ rx)
  356. #
  357. # Also check against another library which handle alternatives
  358. # library(nph)
  359. # logrank.test(futime, fustat, rx, alternative = "two.sided")
  360. # res["test"]
  361. [(
  362. # https://sphweb.bumc.bu.edu/otlt/mph-modules/bs/bs704_survival/BS704_Survival5.html
  363. # uncensored, censored
  364. [[8, 12, 26, 14, 21, 27], [8, 32, 20, 40]],
  365. [[33, 28, 41], [48, 48, 25, 37, 48, 25, 43]],
  366. # chi2, ["two-sided", "less", "greater"]
  367. 6.91598157449,
  368. [0.008542873404, 0.9957285632979385, 0.004271436702061537]
  369. ),
  370. (
  371. # https://sphweb.bumc.bu.edu/otlt/mph-modules/bs/bs704_survival/BS704_Survival5.html
  372. [[19, 6, 5, 4], [20, 19, 17, 14]],
  373. [[16, 21, 7], [21, 15, 18, 18, 5]],
  374. 0.835004855038,
  375. [0.3608293039, 0.8195853480676912, 0.1804146519323088]
  376. ),
  377. (
  378. # Bland, Altman, "The logrank test", BMJ, 2004
  379. # https://www.bmj.com/content/328/7447/1073.short
  380. [[6, 13, 21, 30, 37, 38, 49, 50, 63, 79, 86, 98, 202, 219],
  381. [31, 47, 80, 82, 82, 149]],
  382. [[10, 10, 12, 13, 14, 15, 16, 17, 18, 20, 24, 24, 25, 28, 30,
  383. 33, 35, 37, 40, 40, 46, 48, 76, 81, 82, 91, 112, 181],
  384. [34, 40, 70]],
  385. 7.49659416854,
  386. [0.006181578637, 0.003090789318730882, 0.9969092106812691]
  387. )]
  388. )
  389. def test_log_rank(self, x, y, statistic, pvalue):
  390. x = stats.CensoredData(uncensored=x[0], right=x[1])
  391. y = stats.CensoredData(uncensored=y[0], right=y[1])
  392. for i, alternative in enumerate(["two-sided", "less", "greater"]):
  393. res = stats.logrank(x=x, y=y, alternative=alternative)
  394. # we return z and use the normal distribution while other framework
  395. # return z**2. The p-value are directly comparable, but we have to
  396. # square the statistic
  397. assert_allclose(res.statistic**2, statistic, atol=1e-10)
  398. assert_allclose(res.pvalue, pvalue[i], atol=1e-10)
  399. def test_raises(self):
  400. sample = stats.CensoredData([1, 2])
  401. msg = r"`y` must be"
  402. with pytest.raises(ValueError, match=msg):
  403. stats.logrank(x=sample, y=[[1, 2]])
  404. msg = r"`x` must be"
  405. with pytest.raises(ValueError, match=msg):
  406. stats.logrank(x=[[1, 2]], y=sample)