_page_trend_test.py 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488
  1. from dataclasses import dataclass
  2. from itertools import permutations
  3. import math
  4. import threading
  5. import numpy as np
  6. from ._continuous_distns import norm
  7. from scipy._lib._array_api import xp_capabilities
  8. import scipy.stats
  9. @dataclass
  10. class PageTrendTestResult:
  11. statistic: float
  12. pvalue: float
  13. method: str
  14. @xp_capabilities(np_only=True)
  15. def page_trend_test(data, ranked=False, predicted_ranks=None, method='auto'):
  16. r"""
  17. Perform Page's Test, a measure of trend in observations between treatments.
  18. Page's Test (also known as Page's :math:`L` test) is useful when:
  19. * there are :math:`n \geq 3` treatments,
  20. * :math:`m \geq 2` subjects are observed for each treatment, and
  21. * the observations are hypothesized to have a particular order.
  22. Specifically, the test considers the null hypothesis that
  23. .. math::
  24. m_1 = m_2 = m_3 \cdots = m_n,
  25. where :math:`m_j` is the mean of the observed quantity under treatment
  26. :math:`j`, against the alternative hypothesis that
  27. .. math::
  28. m_1 \leq m_2 \leq m_3 \leq \cdots \leq m_n,
  29. where at least one inequality is strict.
  30. As noted by [4]_, Page's :math:`L` test has greater statistical power than
  31. the Friedman test against the alternative that there is a difference in
  32. trend, as Friedman's test only considers a difference in the means of the
  33. observations without considering their order. Whereas Spearman :math:`\rho`
  34. considers the correlation between the ranked observations of two variables
  35. (e.g. the airspeed velocity of a swallow vs. the weight of the coconut it
  36. carries), Page's :math:`L` is concerned with a trend in an observation
  37. (e.g. the airspeed velocity of a swallow) across several distinct
  38. treatments (e.g. carrying each of five coconuts of different weight) even
  39. as the observation is repeated with multiple subjects (e.g. one European
  40. swallow and one African swallow).
  41. Parameters
  42. ----------
  43. data : array-like
  44. A :math:`m \times n` array; the element in row :math:`i` and
  45. column :math:`j` is the observation corresponding with subject
  46. :math:`i` and treatment :math:`j`. By default, the columns are
  47. assumed to be arranged in order of increasing predicted mean.
  48. ranked : boolean, optional
  49. By default, `data` is assumed to be observations rather than ranks;
  50. it will be ranked with `scipy.stats.rankdata` along ``axis=1``. If
  51. `data` is provided in the form of ranks, pass argument ``True``.
  52. predicted_ranks : array-like, optional
  53. The predicted ranks of the column means. If not specified,
  54. the columns are assumed to be arranged in order of increasing
  55. predicted mean, so the default `predicted_ranks` are
  56. :math:`[1, 2, \dots, n-1, n]`.
  57. method : {'auto', 'asymptotic', 'exact'}, optional
  58. Selects the method used to calculate the *p*-value. The following
  59. options are available.
  60. * 'auto': selects between 'exact' and 'asymptotic' to
  61. achieve reasonably accurate results in reasonable time (default)
  62. * 'asymptotic': compares the standardized test statistic against
  63. the normal distribution
  64. * 'exact': computes the exact *p*-value by comparing the observed
  65. :math:`L` statistic against those realized by all possible
  66. permutations of ranks (under the null hypothesis that each
  67. permutation is equally likely)
  68. Returns
  69. -------
  70. res : PageTrendTestResult
  71. An object containing attributes:
  72. statistic : float
  73. Page's :math:`L` test statistic.
  74. pvalue : float
  75. The associated *p*-value
  76. method : {'asymptotic', 'exact'}
  77. The method used to compute the *p*-value
  78. See Also
  79. --------
  80. rankdata, friedmanchisquare, spearmanr
  81. Notes
  82. -----
  83. As noted in [1]_, "the :math:`n` 'treatments' could just as well represent
  84. :math:`n` objects or events or performances or persons or trials ranked."
  85. Similarly, the :math:`m` 'subjects' could equally stand for :math:`m`
  86. "groupings by ability or some other control variable, or judges doing
  87. the ranking, or random replications of some other sort."
  88. The procedure for calculating the :math:`L` statistic, adapted from
  89. [1]_, is:
  90. 1. "Predetermine with careful logic the appropriate hypotheses
  91. concerning the predicted ordering of the experimental results.
  92. If no reasonable basis for ordering any treatments is known, the
  93. :math:`L` test is not appropriate."
  94. 2. "As in other experiments, determine at what level of confidence
  95. you will reject the null hypothesis that there is no agreement of
  96. experimental results with the monotonic hypothesis."
  97. 3. "Cast the experimental material into a two-way table of :math:`n`
  98. columns (treatments, objects ranked, conditions) and :math:`m`
  99. rows (subjects, replication groups, levels of control variables)."
  100. 4. "When experimental observations are recorded, rank them across each
  101. row", e.g. ``ranks = scipy.stats.rankdata(data, axis=1)``.
  102. 5. "Add the ranks in each column", e.g.
  103. ``colsums = np.sum(ranks, axis=0)``.
  104. 6. "Multiply each sum of ranks by the predicted rank for that same
  105. column", e.g. ``products = predicted_ranks * colsums``.
  106. 7. "Sum all such products", e.g. ``L = products.sum()``.
  107. [1]_ continues by suggesting use of the standardized statistic
  108. .. math::
  109. \chi_L^2 = \frac{\left[12L-3mn(n+1)^2\right]^2}{mn^2(n^2-1)(n+1)}
  110. "which is distributed approximately as chi-square with 1 degree of
  111. freedom. The ordinary use of :math:`\chi^2` tables would be
  112. equivalent to a two-sided test of agreement. If a one-sided test
  113. is desired, *as will almost always be the case*, the probability
  114. discovered in the chi-square table should be *halved*."
  115. However, this standardized statistic does not distinguish between the
  116. observed values being well correlated with the predicted ranks and being
  117. _anti_-correlated with the predicted ranks. Instead, we follow [2]_
  118. and calculate the standardized statistic
  119. .. math::
  120. \Lambda = \frac{L - E_0}{\sqrt{V_0}},
  121. where :math:`E_0 = \frac{1}{4} mn(n+1)^2` and
  122. :math:`V_0 = \frac{1}{144} mn^2(n+1)(n^2-1)`, "which is asymptotically
  123. normal under the null hypothesis".
  124. The *p*-value for ``method='exact'`` is generated by comparing the observed
  125. value of :math:`L` against the :math:`L` values generated for all
  126. :math:`(n!)^m` possible permutations of ranks. The calculation is performed
  127. using the recursive method of [5].
  128. The *p*-values are not adjusted for the possibility of ties. When
  129. ties are present, the reported ``'exact'`` *p*-values may be somewhat
  130. larger (i.e. more conservative) than the true *p*-value [2]_. The
  131. ``'asymptotic'``` *p*-values, however, tend to be smaller (i.e. less
  132. conservative) than the ``'exact'`` *p*-values.
  133. References
  134. ----------
  135. .. [1] Ellis Batten Page, "Ordered hypotheses for multiple treatments:
  136. a significant test for linear ranks", *Journal of the American
  137. Statistical Association* 58(301), p. 216--230, 1963.
  138. .. [2] Markus Neuhauser, *Nonparametric Statistical Test: A computational
  139. approach*, CRC Press, p. 150--152, 2012.
  140. .. [3] Statext LLC, "Page's L Trend Test - Easy Statistics", *Statext -
  141. Statistics Study*, https://www.statext.com/practice/PageTrendTest03.php,
  142. Accessed July 12, 2020.
  143. .. [4] "Page's Trend Test", *Wikipedia*, WikimediaFoundation,
  144. https://en.wikipedia.org/wiki/Page%27s_trend_test,
  145. Accessed July 12, 2020.
  146. .. [5] Robert E. Odeh, "The exact distribution of Page's L-statistic in
  147. the two-way layout", *Communications in Statistics - Simulation and
  148. Computation*, 6(1), p. 49--61, 1977.
  149. Examples
  150. --------
  151. We use the example from [3]_: 10 students are asked to rate three
  152. teaching methods - tutorial, lecture, and seminar - on a scale of 1-5,
  153. with 1 being the lowest and 5 being the highest. We have decided that
  154. a confidence level of 99% is required to reject the null hypothesis in
  155. favor of our alternative: that the seminar will have the highest ratings
  156. and the tutorial will have the lowest. Initially, the data have been
  157. tabulated with each row representing an individual student's ratings of
  158. the three methods in the following order: tutorial, lecture, seminar.
  159. >>> table = [[3, 4, 3],
  160. ... [2, 2, 4],
  161. ... [3, 3, 5],
  162. ... [1, 3, 2],
  163. ... [2, 3, 2],
  164. ... [2, 4, 5],
  165. ... [1, 2, 4],
  166. ... [3, 4, 4],
  167. ... [2, 4, 5],
  168. ... [1, 3, 4]]
  169. Because the tutorial is hypothesized to have the lowest ratings, the
  170. column corresponding with tutorial rankings should be first; the seminar
  171. is hypothesized to have the highest ratings, so its column should be last.
  172. Since the columns are already arranged in this order of increasing
  173. predicted mean, we can pass the table directly into `page_trend_test`.
  174. >>> from scipy.stats import page_trend_test
  175. >>> res = page_trend_test(table)
  176. >>> res
  177. PageTrendTestResult(statistic=133.5, pvalue=0.0018191161948127822,
  178. method='exact')
  179. This *p*-value indicates that there is a 0.1819% chance that
  180. the :math:`L` statistic would reach such an extreme value under the null
  181. hypothesis. Because 0.1819% is less than 1%, we have evidence to reject
  182. the null hypothesis in favor of our alternative at a 99% confidence level.
  183. The value of the :math:`L` statistic is 133.5. To check this manually,
  184. we rank the data such that high scores correspond with high ranks, settling
  185. ties with an average rank:
  186. >>> from scipy.stats import rankdata
  187. >>> ranks = rankdata(table, axis=1)
  188. >>> ranks
  189. array([[1.5, 3. , 1.5],
  190. [1.5, 1.5, 3. ],
  191. [1.5, 1.5, 3. ],
  192. [1. , 3. , 2. ],
  193. [1.5, 3. , 1.5],
  194. [1. , 2. , 3. ],
  195. [1. , 2. , 3. ],
  196. [1. , 2.5, 2.5],
  197. [1. , 2. , 3. ],
  198. [1. , 2. , 3. ]])
  199. We add the ranks within each column, multiply the sums by the
  200. predicted ranks, and sum the products.
  201. >>> import numpy as np
  202. >>> m, n = ranks.shape
  203. >>> predicted_ranks = np.arange(1, n+1)
  204. >>> L = (predicted_ranks * np.sum(ranks, axis=0)).sum()
  205. >>> res.statistic == L
  206. True
  207. As presented in [3]_, the asymptotic approximation of the *p*-value is the
  208. survival function of the normal distribution evaluated at the standardized
  209. test statistic:
  210. >>> from scipy.stats import norm
  211. >>> E0 = (m*n*(n+1)**2)/4
  212. >>> V0 = (m*n**2*(n+1)*(n**2-1))/144
  213. >>> Lambda = (L-E0)/np.sqrt(V0)
  214. >>> p = norm.sf(Lambda)
  215. >>> p
  216. 0.0012693433690751756
  217. This does not precisely match the *p*-value reported by `page_trend_test`
  218. above. The asymptotic distribution is not very accurate, nor conservative,
  219. for :math:`m \leq 12` and :math:`n \leq 8`, so `page_trend_test` chose to
  220. use ``method='exact'`` based on the dimensions of the table and the
  221. recommendations in Page's original paper [1]_. To override
  222. `page_trend_test`'s choice, provide the `method` argument.
  223. >>> res = page_trend_test(table, method="asymptotic")
  224. >>> res
  225. PageTrendTestResult(statistic=133.5, pvalue=0.0012693433690751756,
  226. method='asymptotic')
  227. If the data are already ranked, we can pass in the ``ranks`` instead of
  228. the ``table`` to save computation time.
  229. >>> res = page_trend_test(ranks, # ranks of data
  230. ... ranked=True, # data is already ranked
  231. ... )
  232. >>> res
  233. PageTrendTestResult(statistic=133.5, pvalue=0.0018191161948127822,
  234. method='exact')
  235. Suppose the raw data had been tabulated in an order different from the
  236. order of predicted means, say lecture, seminar, tutorial.
  237. >>> table = np.asarray(table)[:, [1, 2, 0]]
  238. Since the arrangement of this table is not consistent with the assumed
  239. ordering, we can either rearrange the table or provide the
  240. `predicted_ranks`. Remembering that the lecture is predicted
  241. to have the middle rank, the seminar the highest, and tutorial the lowest,
  242. we pass:
  243. >>> res = page_trend_test(table, # data as originally tabulated
  244. ... predicted_ranks=[2, 3, 1], # our predicted order
  245. ... )
  246. >>> res
  247. PageTrendTestResult(statistic=133.5, pvalue=0.0018191161948127822,
  248. method='exact')
  249. """
  250. if not hasattr(_pagel_state, 'state'):
  251. _pagel_state.state = _PageL()
  252. # Possible values of the method parameter and the corresponding function
  253. # used to evaluate the p value
  254. methods = {"asymptotic": _l_p_asymptotic,
  255. "exact": _l_p_exact,
  256. "auto": None}
  257. if method not in methods:
  258. raise ValueError(f"`method` must be in {set(methods)}")
  259. ranks = np.asarray(data)
  260. if ranks.ndim != 2: # TODO: relax this to accept 3d arrays?
  261. raise ValueError("`data` must be a 2d array.")
  262. m, n = ranks.shape
  263. if m < 2 or n < 3:
  264. raise ValueError("Page's L is only appropriate for data with two "
  265. "or more rows and three or more columns.")
  266. if np.any(np.isnan(data)):
  267. raise ValueError("`data` contains NaNs, which cannot be ranked "
  268. "meaningfully")
  269. # ensure NumPy array and rank the data if it's not already ranked
  270. if ranked:
  271. # Only a basic check on whether data is ranked. Checking that the data
  272. # is properly ranked could take as much time as ranking it.
  273. if not (ranks.min() >= 1 and ranks.max() <= ranks.shape[1]):
  274. raise ValueError("`data` is not properly ranked. Rank the data or "
  275. "pass `ranked=False`.")
  276. else:
  277. ranks = scipy.stats.rankdata(data, axis=-1)
  278. # generate predicted ranks if not provided, ensure valid NumPy array
  279. if predicted_ranks is None:
  280. predicted_ranks = np.arange(1, n+1)
  281. else:
  282. predicted_ranks = np.asarray(predicted_ranks)
  283. if (predicted_ranks.ndim < 1 or
  284. (set(predicted_ranks) != set(range(1, n+1)) or
  285. len(predicted_ranks) != n)):
  286. raise ValueError(f"`predicted_ranks` must include each integer "
  287. f"from 1 to {n} (the number of columns in "
  288. f"`data`) exactly once.")
  289. if not isinstance(ranked, bool):
  290. raise TypeError("`ranked` must be boolean.")
  291. # Calculate the L statistic
  292. L = _l_vectorized(ranks, predicted_ranks)
  293. # Calculate the p-value
  294. if method == "auto":
  295. method = _choose_method(ranks)
  296. p_fun = methods[method] # get the function corresponding with the method
  297. p = p_fun(L, m, n)
  298. page_result = PageTrendTestResult(statistic=L, pvalue=p, method=method)
  299. return page_result
  300. def _choose_method(ranks):
  301. '''Choose method for computing p-value automatically'''
  302. m, n = ranks.shape
  303. if n > 8 or (m > 12 and n > 3) or m > 20: # as in [1], [4]
  304. method = "asymptotic"
  305. else:
  306. method = "exact"
  307. return method
  308. def _l_vectorized(ranks, predicted_ranks):
  309. '''Calculate's Page's L statistic for each page of a 3d array'''
  310. colsums = ranks.sum(axis=-2, keepdims=True)
  311. products = predicted_ranks * colsums
  312. Ls = products.sum(axis=-1)
  313. Ls = Ls[0] if Ls.size == 1 else Ls.ravel()
  314. return Ls
  315. def _l_p_asymptotic(L, m, n):
  316. '''Calculate the p-value of Page's L from the asymptotic distribution'''
  317. # Using [1] as a reference, the asymptotic p-value would be calculated as:
  318. # chi_L = (12*L - 3*m*n*(n+1)**2)**2/(m*n**2*(n**2-1)*(n+1))
  319. # p = chi2.sf(chi_L, df=1, loc=0, scale=1)/2
  320. # but this is insensitive to the direction of the hypothesized ranking
  321. # See [2] page 151
  322. E0 = (m*n*(n+1)**2)/4
  323. V0 = (m*n**2*(n+1)*(n**2-1))/144
  324. Lambda = (L-E0)/np.sqrt(V0)
  325. # This is a one-sided "greater" test - calculate the probability that the
  326. # L statistic under H0 would be greater than the observed L statistic
  327. p = norm.sf(Lambda)
  328. return p
  329. def _l_p_exact(L, m, n):
  330. '''Calculate the p-value of Page's L exactly'''
  331. # [1] uses m, n; [5] uses n, k.
  332. # Switch convention here because exact calculation code references [5].
  333. L, n, k = int(L), int(m), int(n)
  334. _pagel_state.state.set_k(k)
  335. return _pagel_state.state.sf(L, n)
  336. class _PageL:
  337. '''Maintains state between `page_trend_test` executions'''
  338. def __init__(self):
  339. '''Lightweight initialization'''
  340. self.all_pmfs = {}
  341. def set_k(self, k):
  342. '''Calculate lower and upper limits of L for single row'''
  343. self.k = k
  344. # See [5] top of page 52
  345. self.a, self.b = (k*(k+1)*(k+2))//6, (k*(k+1)*(2*k+1))//6
  346. def sf(self, l, n):
  347. '''Survival function of Page's L statistic'''
  348. ps = [self.pmf(l, n) for l in range(l, n*self.b + 1)]
  349. return np.sum(ps)
  350. def p_l_k_1(self):
  351. '''Relative frequency of each L value over all possible single rows'''
  352. # See [5] Equation (6)
  353. ranks = range(1, self.k+1)
  354. # generate all possible rows of length k
  355. rank_perms = np.array(list(permutations(ranks)))
  356. # compute Page's L for all possible rows
  357. Ls = (ranks*rank_perms).sum(axis=1)
  358. # count occurrences of each L value
  359. counts = np.histogram(Ls, np.arange(self.a-0.5, self.b+1.5))[0]
  360. # factorial(k) is number of possible permutations
  361. return counts/math.factorial(self.k)
  362. def pmf(self, l, n):
  363. '''Recursive function to evaluate p(l, k, n); see [5] Equation 1'''
  364. if n not in self.all_pmfs:
  365. self.all_pmfs[n] = {}
  366. if self.k not in self.all_pmfs[n]:
  367. self.all_pmfs[n][self.k] = {}
  368. # Cache results to avoid repeating calculation. Initially this was
  369. # written with lru_cache, but this seems faster? Also, we could add
  370. # an option to save this for future lookup.
  371. if l in self.all_pmfs[n][self.k]:
  372. return self.all_pmfs[n][self.k][l]
  373. if n == 1:
  374. ps = self.p_l_k_1() # [5] Equation 6
  375. ls = range(self.a, self.b+1)
  376. # not fast, but we'll only be here once
  377. self.all_pmfs[n][self.k] = {l: p for l, p in zip(ls, ps)}
  378. return self.all_pmfs[n][self.k][l]
  379. p = 0
  380. low = max(l-(n-1)*self.b, self.a) # [5] Equation 2
  381. high = min(l-(n-1)*self.a, self.b)
  382. # [5] Equation 1
  383. for t in range(low, high+1):
  384. p1 = self.pmf(l-t, n-1)
  385. p2 = self.pmf(t, 1)
  386. p += p1*p2
  387. self.all_pmfs[n][self.k][l] = p
  388. return p
  389. # Maintain state for faster repeat calls to page_trend_test w/ method='exact'
  390. # _PageL() is calculated once per thread and stored as an attribute on
  391. # this thread-local variable inside page_trend_test().
  392. _pagel_state = threading.local()