_quantile.py 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522
  1. import math
  2. import numpy as np
  3. from scipy.special import betainc
  4. from scipy._lib._array_api import (
  5. xp_capabilities,
  6. xp_ravel,
  7. array_namespace,
  8. xp_promote,
  9. xp_device,
  10. _length_nonmasked,
  11. is_torch,
  12. )
  13. import scipy._lib.array_api_extra as xpx
  14. from scipy.stats._axis_nan_policy import _broadcast_arrays, _contains_nan
  15. def _quantile_iv(x, p, method, axis, nan_policy, keepdims, weights):
  16. xp = array_namespace(x, p, weights)
  17. if not xp.isdtype(xp.asarray(x).dtype, ('integral', 'real floating')):
  18. raise ValueError("`x` must have real dtype.")
  19. if not xp.isdtype(xp.asarray(p).dtype, 'real floating'):
  20. raise ValueError("`p` must have real floating dtype.")
  21. if not (weights is None
  22. or xp.isdtype(xp.asarray(weights).dtype, ('integral', 'real floating'))):
  23. raise ValueError("`weights` must have real dtype.")
  24. x, p, weights = xp_promote(x, p, weights, force_floating=True, xp=xp)
  25. p = xp.asarray(p, device=xp_device(x))
  26. dtype = x.dtype
  27. axis_none = axis is None
  28. ndim = max(x.ndim, p.ndim)
  29. if axis_none:
  30. x = xp_ravel(x)
  31. p = xp_ravel(p)
  32. axis = 0
  33. elif np.iterable(axis) or int(axis) != axis:
  34. message = "`axis` must be an integer or None."
  35. raise ValueError(message)
  36. elif (axis >= ndim) or (axis < -ndim):
  37. message = "`axis` is not compatible with the shapes of the inputs."
  38. raise ValueError(message)
  39. axis = int(axis)
  40. methods = {'inverted_cdf', 'averaged_inverted_cdf', 'closest_observation',
  41. 'hazen', 'interpolated_inverted_cdf', 'linear',
  42. 'median_unbiased', 'normal_unbiased', 'weibull',
  43. 'harrell-davis', '_lower', '_midpoint', '_higher', '_nearest',
  44. 'round_outward', 'round_inward', 'round_nearest'}
  45. if method not in methods:
  46. message = f"`method` must be one of {methods}"
  47. raise ValueError(message)
  48. no_weights = {'_lower', '_midpoint', '_higher', '_nearest', 'harrell-davis',
  49. 'round_nearest', 'round_inward', 'round_outward'}
  50. if weights is not None and method in no_weights:
  51. message = f"`method='{method}'` does not support `weights`."
  52. raise ValueError(message)
  53. contains_nans = _contains_nan(x, nan_policy, xp_omit_okay=True, xp=xp)
  54. if keepdims not in {None, True, False}:
  55. message = "If specified, `keepdims` must be True or False."
  56. raise ValueError(message)
  57. # If data has length zero along `axis`, the result will be an array of NaNs just
  58. # as if the data had length 1 along axis and were filled with NaNs. This is treated
  59. # naturally below whether `nan_policy` is `'propagate'` or `'omit'`.
  60. if x.shape[axis] == 0:
  61. shape = list(x.shape)
  62. shape[axis] = 1
  63. x = xp.full(shape, xp.nan, dtype=dtype, device=xp_device(x))
  64. if weights is None:
  65. y = xp.sort(x, axis=axis, stable=False)
  66. y, p = _broadcast_arrays((y, p), axis=axis)
  67. n_zero_weight = None
  68. else:
  69. x, weights = xp.broadcast_arrays(x, weights)
  70. i_zero_weight = (weights == 0)
  71. n_zero_weight = xp.count_nonzero(i_zero_weight, axis=axis, keepdims=True)
  72. x = xpx.at(x)[i_zero_weight].set(xp.inf, copy=True)
  73. i_y = xp.argsort(x, axis=axis, stable=False)
  74. y = xp.take_along_axis(x, i_y, axis=axis)
  75. weights = xp.take_along_axis(weights, i_y, axis=axis)
  76. y, p, weights, i_y, n_zero_weight = _broadcast_arrays(
  77. (y, p, weights, i_y, n_zero_weight), axis=axis)
  78. if (keepdims is False) and (p.shape[axis] != 1):
  79. message = "`keepdims` may be False only if the length of `p` along `axis` is 1."
  80. raise ValueError(message)
  81. keepdims = (p.shape[axis] != 1) if keepdims is None else keepdims
  82. y = xp.moveaxis(y, axis, -1)
  83. p = xp.moveaxis(p, axis, -1)
  84. weights = weights if weights is None else xp.moveaxis(weights, axis, -1)
  85. n_zero_weight = (n_zero_weight if n_zero_weight is None
  86. else xp.moveaxis(n_zero_weight, axis, -1))
  87. n = _length_nonmasked(y, -1, xp=xp, keepdims=True)
  88. n = n if n_zero_weight is None else n - n_zero_weight
  89. if contains_nans:
  90. nans = xp.isnan(y)
  91. # Note that if length along `axis` were 0 to begin with,
  92. # it is now length 1 and filled with NaNs.
  93. if nan_policy == 'propagate':
  94. nan_out = xp.any(nans, axis=-1)
  95. else: # 'omit'
  96. n_int = n - xp.count_nonzero(nans, axis=-1, keepdims=True)
  97. n = xp.astype(n_int, dtype)
  98. # NaNs are produced only if slice is empty after removing NaNs
  99. nan_out = xp.any(n == 0, axis=-1)
  100. n = xpx.at(n, nan_out).set(y.shape[-1]) # avoids pytorch/pytorch#146211
  101. if xp.any(nan_out):
  102. y = xp.asarray(y, copy=True) # ensure writable
  103. y = xpx.at(y, nan_out).set(xp.nan)
  104. elif xp.any(nans) and method == 'harrell-davis':
  105. y = xp.asarray(y, copy=True) # ensure writable
  106. y = xpx.at(y, nans).set(0) # any non-nan will prevent NaN from propagating
  107. n = xp.asarray(n, dtype=dtype, device=xp_device(y))
  108. p_mask = (p > 1) | (p < 0) | xp.isnan(p)
  109. if xp.any(p_mask):
  110. p = xp.asarray(p, copy=True)
  111. p = xpx.at(p, p_mask).set(0.5) # these get NaN-ed out at the end
  112. return (y, p, method, axis, nan_policy, keepdims,
  113. n, axis_none, ndim, p_mask, weights, xp)
  114. @xp_capabilities(skip_backends=[("dask.array", "No take_along_axis yet.")],
  115. jax_jit=False)
  116. def quantile(x, p, *, method='linear', axis=0, nan_policy='propagate', keepdims=None,
  117. weights=None):
  118. """
  119. Compute the p-th quantile of the data along the specified axis.
  120. Parameters
  121. ----------
  122. x : array_like of real numbers
  123. Data array.
  124. p : array_like of float
  125. Probability or sequence of probabilities of the quantiles to compute.
  126. Values must be between 0 and 1 (inclusive).
  127. While `numpy.quantile` can only compute quantiles according to the Cartesian
  128. product of the first two arguments, this function enables calculation of
  129. quantiles at different probabilities for each axis slice by following
  130. broadcasting rules like those of `scipy.stats` reducing functions.
  131. See `axis`, `keepdims`, and the examples.
  132. method : str, default: 'linear'
  133. The method to use for estimating the quantile.
  134. The available options, numbered as they appear in [1]_, are:
  135. 1. 'inverted_cdf'
  136. 2. 'averaged_inverted_cdf'
  137. 3. 'closest_observation'
  138. 4. 'interpolated_inverted_cdf'
  139. 5. 'hazen'
  140. 6. 'weibull'
  141. 7. 'linear' (default)
  142. 8. 'median_unbiased'
  143. 9. 'normal_unbiased'
  144. 'harrell-davis' is also available to compute the quantile estimate
  145. according to [2]_.
  146. 'round_outward', 'round_inward', and 'round_nearest' are available for use
  147. in trimming and winsorizing data.
  148. See Notes for details.
  149. axis : int or None, default: 0
  150. Axis along which the quantiles are computed.
  151. ``None`` ravels both `x` and `p` before performing the calculation,
  152. without checking whether the original shapes were compatible.
  153. As in other `scipy.stats` functions, a positive integer `axis` is resolved
  154. after prepending 1s to the shape of `x` or `p` as needed until the two arrays
  155. have the same dimensionality. When providing `x` and `p` with different
  156. dimensionality, consider using negative `axis` integers for clarity.
  157. nan_policy : str, default: 'propagate'
  158. Defines how to handle NaNs in the input data `x`.
  159. - ``propagate``: if a NaN is present in the axis slice (e.g. row) along
  160. which the statistic is computed, the corresponding slice of the output
  161. will contain NaN(s).
  162. - ``omit``: NaNs will be omitted when performing the calculation.
  163. If insufficient data remains in the axis slice along which the
  164. statistic is computed, the corresponding slice of the output will
  165. contain NaN(s).
  166. - ``raise``: if a NaN is present, a ``ValueError`` will be raised.
  167. If NaNs are present in `p`, a ``ValueError`` will be raised.
  168. keepdims : bool, optional
  169. Consider the case in which `x` is 1-D and `p` is a scalar: the quantile
  170. is a reducing statistic, and the default behavior is to return a scalar.
  171. If `keepdims` is set to True, the axis will not be reduced away, and the
  172. result will be a 1-D array with one element.
  173. The general case is more subtle, since multiple quantiles may be
  174. requested for each axis-slice of `x`. For instance, if both `x` and `p`
  175. are 1-D and ``p.size > 1``, no axis can be reduced away; there must be an
  176. axis to contain the number of quantiles given by ``p.size``. Therefore:
  177. - By default, the axis will be reduced away if possible (i.e. if there is
  178. exactly one element of `p` per axis-slice of `x`).
  179. - If `keepdims` is set to True, the axis will not be reduced away.
  180. - If `keepdims` is set to False, the axis will be reduced away
  181. if possible, and an error will be raised otherwise.
  182. weights : array_like of finite, non-negative real numbers
  183. Frequency weights; e.g., for counting number weights,
  184. ``quantile(x, p, weights=weights)`` is equivalent to
  185. ``quantile(np.repeat(x, weights), p)``. Values other than finite counting
  186. numbers are accepted, but may not have valid statistical interpretations.
  187. Not compatible with ``method='harrell-davis'`` or those that begin with
  188. ``'round_'``.
  189. Returns
  190. -------
  191. quantile : scalar or ndarray
  192. The resulting quantile(s). The dtype is the result dtype of `x` and `p`.
  193. See Also
  194. --------
  195. numpy.quantile
  196. :ref:`outliers`
  197. Notes
  198. -----
  199. Given a sample `x` from an underlying distribution, `quantile` provides a
  200. nonparametric estimate of the inverse cumulative distribution function.
  201. By default, this is done by interpolating between adjacent elements in
  202. ``y``, a sorted copy of `x`::
  203. (1-g)*y[j] + g*y[j+1]
  204. where the index ``j`` and coefficient ``g`` are the integral and
  205. fractional components of ``p * (n-1)``, and ``n`` is the number of
  206. elements in the sample.
  207. This is a special case of Equation 1 of H&F [1]_. More generally,
  208. - ``j = (p*n + m - 1) // 1``, and
  209. - ``g = (p*n + m - 1) % 1``,
  210. where ``m`` may be defined according to several different conventions.
  211. The preferred convention may be selected using the ``method`` parameter:
  212. =============================== =============== ===============
  213. ``method`` number in H&F ``m``
  214. =============================== =============== ===============
  215. ``interpolated_inverted_cdf`` 4 ``0``
  216. ``hazen`` 5 ``1/2``
  217. ``weibull`` 6 ``p``
  218. ``linear`` (default) 7 ``1 - p``
  219. ``median_unbiased`` 8 ``p/3 + 1/3``
  220. ``normal_unbiased`` 9 ``p/4 + 3/8``
  221. =============================== =============== ===============
  222. Note that indices ``j`` and ``j + 1`` are clipped to the range ``0`` to
  223. ``n - 1`` when the results of the formula would be outside the allowed
  224. range of non-negative indices. When ``j`` is clipped to zero, ``g`` is
  225. set to zero as well. The ``-1`` in the formulas for ``j`` and ``g``
  226. accounts for Python's 0-based indexing.
  227. The table above includes only the estimators from [1]_ that are continuous
  228. functions of probability `p` (estimators 4-9). SciPy also provides the
  229. three discontinuous estimators from [1]_ (estimators 1-3), where ``j`` is
  230. defined as above, ``m`` is defined as follows, and ``g`` is ``0`` when
  231. ``index = p*n + m - 1`` is less than ``0`` and otherwise is defined below.
  232. 1. ``inverted_cdf``: ``m = 0`` and ``g = int(index - j > 0)``
  233. 2. ``averaged_inverted_cdf``: ``m = 0`` and
  234. ``g = (1 + int(index - j > 0)) / 2``
  235. 3. ``closest_observation``: ``m = -1/2`` and
  236. ``g = 1 - int((index == j) & (j%2 == 1))``
  237. Note that for methods ``inverted_cdf`` and ``averaged_inverted_cdf``, only the
  238. relative proportions of tied observations (and relative weights) affect the
  239. results; for all other methods, the total number of observations (and absolute
  240. weights) matter.
  241. A different strategy for computing quantiles from [2]_, ``method='harrell-davis'``,
  242. uses a weighted combination of all elements. The weights are computed as:
  243. .. math::
  244. w_{n, i} = I_{i/n}(a, b) - I_{(i - 1)/n}(a, b)
  245. where :math:`n` is the number of elements in the sample,
  246. :math:`i` are the indices :math:`1, 2, ..., n-1, n` of the sorted elements,
  247. :math:`a = p (n + 1)`, :math:`b = (1 - p)(n + 1)`,
  248. :math:`p` is the probability of the quantile, and
  249. :math:`I` is the regularized, lower incomplete beta function
  250. (`scipy.special.betainc`).
  251. ``method='round_nearest'`` is equivalent to indexing ``y[j]``, where::
  252. j = int(np.round(p*n) if p < 0.5 else np.round(n*p - 1))
  253. This is useful when winsorizing data: replacing ``p*n`` of the most extreme
  254. observations with the next most extreme observation. ``method='round_outward'``
  255. adjusts the direction of rounding to winsorize fewer elements::
  256. j = int(np.floor(p*n) if p < 0.5 else np.ceil(n*p - 1))
  257. and ``method='round_inward'`` rounds to winsorize more elements::
  258. j = int(np.ceil(p*n) if p < 0.5 else np.floor(n*p - 1))
  259. These methods are also useful for trimming data: removing ``p*n`` of the most
  260. extreme observations. See :ref:`outliers` for example applications.
  261. Examples
  262. --------
  263. >>> import numpy as np
  264. >>> from scipy import stats
  265. >>> x = np.asarray([[10, 8, 7, 5, 4],
  266. ... [0, 1, 2, 3, 5]])
  267. Take the median of each row.
  268. >>> stats.quantile(x, 0.5, axis=-1)
  269. array([7., 2.])
  270. Take a different quantile for each row.
  271. >>> stats.quantile(x, [[0.25], [0.75]], axis=-1, keepdims=True)
  272. array([[5.],
  273. [3.]])
  274. Take multiple quantiles for each row.
  275. >>> stats.quantile(x, [0.25, 0.75], axis=-1)
  276. array([[5., 8.],
  277. [1., 3.]])
  278. Take different quantiles for each row.
  279. >>> p = np.asarray([[0.25, 0.75],
  280. ... [0.5, 1.0]])
  281. >>> stats.quantile(x, p, axis=-1)
  282. array([[5., 8.],
  283. [2., 5.]])
  284. Take different quantiles for each column.
  285. >>> stats.quantile(x.T, p.T, axis=0)
  286. array([[5., 2.],
  287. [8., 5.]])
  288. References
  289. ----------
  290. .. [1] R. J. Hyndman and Y. Fan,
  291. "Sample quantiles in statistical packages,"
  292. The American Statistician, 50(4), pp. 361-365, 1996
  293. .. [2] Harrell, Frank E., and C. E. Davis.
  294. "A new distribution-free quantile estimator."
  295. Biometrika 69.3 (1982): 635-640.
  296. """
  297. # Input validation / standardization
  298. temp = _quantile_iv(x, p, method, axis, nan_policy, keepdims, weights)
  299. (y, p, method, axis, nan_policy, keepdims,
  300. n, axis_none, ndim, p_mask, weights, xp) = temp
  301. if method in {'inverted_cdf', 'averaged_inverted_cdf', 'closest_observation',
  302. 'hazen', 'interpolated_inverted_cdf', 'linear',
  303. 'median_unbiased', 'normal_unbiased', 'weibull'}:
  304. res = _quantile_hf(y, p, n, method, weights, xp)
  305. elif method in {'harrell-davis'}:
  306. res = _quantile_hd(y, p, n, xp)
  307. elif method in {'_lower', '_midpoint', '_higher', '_nearest'}:
  308. res = _quantile_bc(y, p, n, method, xp)
  309. else: # method.startswith('round'):
  310. res = _quantile_winsor(y, p, n, method, xp)
  311. res = xpx.at(res, p_mask).set(xp.nan)
  312. # Reshape per axis/keepdims
  313. if axis_none and keepdims:
  314. shape = (1,)*(ndim - 1) + res.shape
  315. res = xp.reshape(res, shape)
  316. axis = -1
  317. res = xp.moveaxis(res, -1, axis)
  318. if not keepdims:
  319. res = xp.squeeze(res, axis=axis)
  320. return res[()] if res.ndim == 0 else res
  321. def _quantile_hf(y, p, n, method, weights, xp):
  322. ms = dict(inverted_cdf=0, averaged_inverted_cdf=0, closest_observation=-0.5,
  323. interpolated_inverted_cdf=0, hazen=0.5, weibull=p, linear=1 - p,
  324. median_unbiased=p/3 + 1/3, normal_unbiased=p/4 + 3/8)
  325. m = ms[method]
  326. if weights is None:
  327. jg = p * n + m
  328. jp1 = jg // 1
  329. j = jp1 - 1
  330. else:
  331. cumulative_weights = xp.cumulative_sum(weights, axis=-1)
  332. n_int = xp.asarray(n, dtype=xp.int64)
  333. n_int = xp.broadcast_to(n_int, cumulative_weights.shape[:-1] + (1,))
  334. total_weight = xp.take_along_axis(cumulative_weights, n_int-1, axis=-1)
  335. jg = p * total_weight + m
  336. jp1 = _xp_searchsorted(cumulative_weights, jg, side='right')
  337. j = _xp_searchsorted(cumulative_weights, jg-1, side='right')
  338. j, jp1 = xp.astype(j, y.dtype), xp.astype(jp1, y.dtype)
  339. g = jg % 1
  340. if method == 'inverted_cdf':
  341. g = xp.astype((g > 0), jg.dtype)
  342. elif method == 'averaged_inverted_cdf':
  343. g = (1 + xp.astype((g > 0), jg.dtype)) / 2
  344. elif method == 'closest_observation':
  345. g = (1 - xp.astype((g == 0) & (j % 2 == 1), jg.dtype))
  346. if method in {'inverted_cdf', 'averaged_inverted_cdf', 'closest_observation'}:
  347. g = xp.asarray(g)
  348. g = xpx.at(g, jg < 0).set(0)
  349. g = xpx.at(g)[j < 0].set(0)
  350. j = xp.clip(j, 0., n - 1)
  351. jp1 = xp.clip(jp1, 0., n - 1)
  352. return ((1 - g) * xp.take_along_axis(y, xp.astype(j, xp.int64), axis=-1)
  353. + g * xp.take_along_axis(y, xp.astype(jp1, xp.int64), axis=-1))
  354. def _quantile_hd(y, p, n, xp):
  355. # RE axis handling: We need to perform a reducing operation over rows of `y` for
  356. # each element in the corresponding row of `p` (a la Cartesian product). Strategy:
  357. # move rows of `p` to an axis at the front that is orthogonal to all the rest,
  358. # perform the reducing operating over the last axis, then move the front axis back
  359. # to the end.
  360. p = xp.moveaxis(p, -1, 0)[..., xp.newaxis]
  361. a = p * (n + 1)
  362. b = (1 - p) * (n + 1)
  363. i = xp.arange(y.shape[-1] + 1, dtype=y.dtype, device=xp_device(y))
  364. w = betainc(a, b, i / n)
  365. w = w[..., 1:] - w[..., :-1]
  366. w = xpx.at(w, xp.isnan(w)).set(0)
  367. res = xp.vecdot(w, y, axis=-1)
  368. return xp.moveaxis(res, 0, -1)
  369. def _quantile_winsor(y, p, n, method, xp):
  370. ops = dict(round_outward=(xp.floor, xp.ceil),
  371. round_inward=(xp.ceil, xp.floor),
  372. round_nearest=(xp.round, xp.round))
  373. op_left, op_right = ops[method]
  374. j = xp.where(p < 0.5, op_left(p*n), op_right(n*p - 1))
  375. return xp.take_along_axis(y, xp.astype(j, xp.int64), axis=-1)
  376. def _quantile_bc(y, p, n, method, xp):
  377. # Methods retained for backward compatibility. NumPy documentation is not
  378. # quite right about what these methods do: if `p * (n - 1)` is integral,
  379. # that is used as the index. See numpy/numpy#28910.
  380. ij = p * (n - 1)
  381. if method == '_midpoint':
  382. return (xp.take_along_axis(y, xp.astype(xp.floor(ij), xp.int64), axis=-1)
  383. + xp.take_along_axis(y, xp.astype(xp.ceil(ij), xp.int64), axis=-1)) / 2
  384. elif method == '_lower':
  385. k = xp.floor(ij)
  386. elif method == '_higher':
  387. k = xp.ceil(ij)
  388. elif method == '_nearest':
  389. k = xp.round(ij)
  390. return xp.take_along_axis(y, xp.astype(k, xp.int64), axis=-1)
  391. @xp_capabilities(skip_backends=[("dask.array", "No take_along_axis yet.")])
  392. def _xp_searchsorted(x, y, *, side='left', xp=None):
  393. # Vectorized xp.searchsorted. Search is performed along last axis. The shape of the
  394. # output is that of `y`, broadcasting the batch dimensions with those of `x` if
  395. # necessary.
  396. xp = array_namespace(x, y) if xp is None else xp
  397. xp_default_int = xp.asarray(1).dtype
  398. y_0d = xp.asarray(y).ndim == 0
  399. x, y = _broadcast_arrays((x, y), axis=-1, xp=xp)
  400. x_1d = x.ndim <= 1
  401. if x_1d or is_torch(xp):
  402. y = xp.reshape(y, ()) if (y_0d and x_1d) else y
  403. out = xp.searchsorted(x, y, side=side)
  404. out = xp.astype(out, xp_default_int, copy=False)
  405. return out
  406. a = xp.full(y.shape, 0, device=xp_device(x))
  407. if x.shape[-1] == 0:
  408. return a
  409. n = xp.count_nonzero(~xp.isnan(x), axis=-1, keepdims=True)
  410. b = xp.broadcast_to(n, y.shape)
  411. compare = xp.less_equal if side == 'left' else xp.less
  412. # while xp.any(b - a > 1):
  413. # refactored to for loop with ~log2(n) iterations for JAX JIT
  414. for i in range(int(math.log2(x.shape[-1])) + 1):
  415. c = (a + b) // 2
  416. x0 = xp.take_along_axis(x, c, axis=-1)
  417. j = compare(y, x0)
  418. b = xp.where(j, c, b)
  419. a = xp.where(j, a, c)
  420. out = xp.where(compare(y, xp.min(x, axis=-1, keepdims=True)), 0, b)
  421. out = xp.where(xp.isnan(y), x.shape[-1], out) if side == 'right' else out
  422. out = xp.astype(out, xp_default_int, copy=False)
  423. return out