_savitzky_golay.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383
  1. from scipy._lib._util import float_factorial
  2. from scipy._lib.array_api_compat import numpy as np_compat
  3. from scipy._lib._array_api import array_namespace, xp_swapaxes, xp_device
  4. import scipy._lib.array_api_extra as xpx
  5. from scipy.ndimage import convolve1d # type: ignore[attr-defined]
  6. from scipy.signal import _polyutils as _pu
  7. from ._arraytools import axis_slice
  8. def savgol_coeffs(window_length, polyorder, deriv=0, delta=1.0, pos=None,
  9. use="conv", *, xp=None, device=None):
  10. """Compute the coefficients for a 1-D Savitzky-Golay FIR filter.
  11. Parameters
  12. ----------
  13. window_length : int
  14. The length of the filter window (i.e., the number of coefficients).
  15. polyorder : int
  16. The order of the polynomial used to fit the samples.
  17. `polyorder` must be less than `window_length`.
  18. deriv : int, optional
  19. The order of the derivative to compute. This must be a
  20. nonnegative integer. The default is 0, which means to filter
  21. the data without differentiating.
  22. delta : float, optional
  23. The spacing of the samples to which the filter will be applied.
  24. This is only used if deriv > 0.
  25. pos : int or None, optional
  26. If pos is not None, it specifies evaluation position within the
  27. window. The default is the middle of the window.
  28. use : str, optional
  29. Either 'conv' or 'dot'. This argument chooses the order of the
  30. coefficients. The default is 'conv', which means that the
  31. coefficients are ordered to be used in a convolution. With
  32. use='dot', the order is reversed, so the filter is applied by
  33. dotting the coefficients with the data set.
  34. Returns
  35. -------
  36. coeffs : 1-D ndarray
  37. The filter coefficients.
  38. See Also
  39. --------
  40. savgol_filter
  41. Notes
  42. -----
  43. .. versionadded:: 0.14.0
  44. References
  45. ----------
  46. A. Savitzky, M. J. E. Golay, Smoothing and Differentiation of Data by
  47. Simplified Least Squares Procedures. Analytical Chemistry, 1964, 36 (8),
  48. pp 1627-1639.
  49. Jianwen Luo, Kui Ying, and Jing Bai. 2005. Savitzky-Golay smoothing and
  50. differentiation filter for even number data. Signal Process.
  51. 85, 7 (July 2005), 1429-1434.
  52. Examples
  53. --------
  54. >>> import numpy as np
  55. >>> from scipy.signal import savgol_coeffs
  56. >>> savgol_coeffs(5, 2)
  57. array([-0.08571429, 0.34285714, 0.48571429, 0.34285714, -0.08571429])
  58. >>> savgol_coeffs(5, 2, deriv=1)
  59. array([ 2.00000000e-01, 1.00000000e-01, 2.07548111e-16, -1.00000000e-01,
  60. -2.00000000e-01])
  61. Note that use='dot' simply reverses the coefficients.
  62. >>> savgol_coeffs(5, 2, pos=3)
  63. array([ 0.25714286, 0.37142857, 0.34285714, 0.17142857, -0.14285714])
  64. >>> savgol_coeffs(5, 2, pos=3, use='dot')
  65. array([-0.14285714, 0.17142857, 0.34285714, 0.37142857, 0.25714286])
  66. >>> savgol_coeffs(4, 2, pos=3, deriv=1, use='dot')
  67. array([0.45, -0.85, -0.65, 1.05])
  68. `x` contains data from the parabola x = t**2, sampled at
  69. t = -1, 0, 1, 2, 3. `c` holds the coefficients that will compute the
  70. derivative at the last position. When dotted with `x` the result should
  71. be 6.
  72. >>> x = np.array([1, 0, 1, 4, 9])
  73. >>> c = savgol_coeffs(5, 2, pos=4, deriv=1, use='dot')
  74. >>> c.dot(x)
  75. 6.0
  76. """
  77. # An alternative method for finding the coefficients when deriv=0 is
  78. # t = np.arange(window_length)
  79. # unit = (t == pos).astype(int)
  80. # coeffs = np.polyval(np.polyfit(t, unit, polyorder), t)
  81. # The method implemented here is faster.
  82. # To recreate the table of sample coefficients shown in the chapter on
  83. # the Savitzy-Golay filter in the Numerical Recipes book, use
  84. # window_length = nL + nR + 1
  85. # pos = nL + 1
  86. # c = savgol_coeffs(window_length, M, pos=pos, use='dot')
  87. if polyorder >= window_length:
  88. raise ValueError("polyorder must be less than window_length.")
  89. halflen, rem = divmod(window_length, 2)
  90. if pos is None:
  91. if rem == 0:
  92. pos = halflen - 0.5
  93. else:
  94. pos = halflen
  95. if not (0 <= pos < window_length):
  96. raise ValueError("pos must be nonnegative and less than "
  97. "window_length.")
  98. if use not in ['conv', 'dot']:
  99. raise ValueError("`use` must be 'conv' or 'dot'")
  100. # cf windows/_windows.py
  101. xp = np_compat if xp is None else array_namespace(xp.empty(0))
  102. if deriv > polyorder:
  103. coeffs = xp.zeros(window_length, dtype=xp.float64, device=device)
  104. return coeffs
  105. # Form the design matrix A. The columns of A are powers of the integers
  106. # from -pos to window_length - pos - 1. The powers (i.e., rows) range
  107. # from 0 to polyorder. (That is, A is a vandermonde matrix, but not
  108. # necessarily square.)
  109. x = xp.arange(-pos, window_length - pos, dtype=xp.float64, device=device)
  110. if use == "conv":
  111. # Reverse so that result can be used in a convolution.
  112. x = xp.flip(x)
  113. order = xp.reshape(
  114. xp.arange(polyorder + 1, dtype=xp.float64, device=device), (-1, 1)
  115. )
  116. A = x ** order
  117. # y determines which order derivative is returned.
  118. y = xp.zeros(polyorder + 1, dtype=xp.float64, device=device)
  119. # The coefficient assigned to y[deriv] scales the result to take into
  120. # account the order of the derivative and the sample spacing.
  121. y = xpx.at(y, deriv).set(float_factorial(deriv) / (delta ** deriv))
  122. # Find the least-squares solution of A*c = y
  123. coeffs, _, _, _ = _pu._lstsq(A, y, xp=xp)
  124. return coeffs
  125. def _polyder(p, m, *, xp):
  126. """Differentiate polynomials represented with coefficients.
  127. p must be a 1-D or 2-D array. In the 2-D case, each column gives
  128. the coefficients of a polynomial; the first row holds the coefficients
  129. associated with the highest power. m must be a nonnegative integer.
  130. (numpy.polyder doesn't handle the 2-D case.)
  131. """
  132. if m == 0:
  133. result = p
  134. else:
  135. n = p.shape[0]
  136. if n <= m:
  137. result = xp.zeros_like(p[:1, ...])
  138. else:
  139. dp = xp.asarray(p[:-m, ...], copy=True)
  140. for k in range(m):
  141. rng = xp.arange(
  142. n - k - 1, m - k - 1, -1, dtype=p.dtype, device=xp_device(p)
  143. )
  144. dp *= xp.reshape(rng, (n - m,) + (1,) * (p.ndim - 1))
  145. result = dp
  146. return result
  147. def _fit_edge(x, window_start, window_stop, interp_start, interp_stop,
  148. axis, polyorder, deriv, delta, y):
  149. """
  150. Given an N-d array `x` and the specification of a slice of `x` from
  151. `window_start` to `window_stop` along `axis`, create an interpolating
  152. polynomial of each 1-D slice, and evaluate that polynomial in the slice
  153. from `interp_start` to `interp_stop`. Put the result into the
  154. corresponding slice of `y`.
  155. """
  156. xp = array_namespace(x)
  157. # Get the edge into a (window_length, -1) array.
  158. x_edge = axis_slice(x, start=window_start, stop=window_stop, axis=axis)
  159. if axis == 0 or axis == -x.ndim:
  160. xx_edge = x_edge
  161. swapped = False
  162. else:
  163. xx_edge = xp_swapaxes(x_edge, axis, 0, xp)
  164. swapped = True
  165. xx_edge = xp.reshape(xx_edge, (xx_edge.shape[0], -1))
  166. # Fit the edges. poly_coeffs has shape (polyorder + 1, -1),
  167. # where '-1' is the same as in xx_edge.
  168. poly_coeffs = _pu.polyfit(
  169. xp.arange(
  170. 0, window_stop - window_start, dtype=x.dtype, device=xp_device(x)
  171. ), xx_edge, polyorder, xp=xp
  172. )
  173. if deriv > 0:
  174. poly_coeffs = _polyder(poly_coeffs, deriv, xp=xp)
  175. # Compute the interpolated values for the edge.
  176. i = xp.arange(
  177. interp_start - window_start, interp_stop - window_start,
  178. dtype=poly_coeffs.dtype, device=xp_device(poly_coeffs)
  179. )
  180. values = _pu.polyval(poly_coeffs, xp.reshape(i, (-1, 1)), xp=xp) / (delta ** deriv)
  181. # Now put the values into the appropriate slice of y.
  182. # First reshape values to match y.
  183. shp = list(y.shape)
  184. shp[0], shp[axis] = shp[axis], shp[0]
  185. values = xp.reshape(values, (interp_stop - interp_start, *shp[1:]))
  186. if swapped:
  187. values = xp_swapaxes(values, 0, axis, xp)
  188. # Get a view of the data to be replaced by values.
  189. y_slice = [slice(None)] * y.ndim
  190. y_slice[axis] = slice(interp_start, interp_stop)
  191. y = xpx.at(y, tuple(y_slice)).set(values)
  192. return y
  193. def _fit_edges_polyfit(x, window_length, polyorder, deriv, delta, axis, y):
  194. """
  195. Use polynomial interpolation of x at the low and high ends of the axis
  196. to fill in the halflen values in y.
  197. This function just calls _fit_edge twice, once for each end of the axis.
  198. """
  199. halflen = window_length // 2
  200. y = _fit_edge(x, 0, window_length, 0, halflen, axis,
  201. polyorder, deriv, delta, y)
  202. n = x.shape[axis]
  203. y = _fit_edge(x, n - window_length, n, n - halflen, n, axis,
  204. polyorder, deriv, delta, y)
  205. return y
  206. def savgol_filter(x, window_length, polyorder, deriv=0, delta=1.0,
  207. axis=-1, mode='interp', cval=0.0):
  208. """ Apply a Savitzky-Golay filter to an array.
  209. This is a 1-D filter. If `x` has dimension greater than 1, `axis`
  210. determines the axis along which the filter is applied.
  211. Parameters
  212. ----------
  213. x : array_like
  214. The data to be filtered. If `x` is not a single or double precision
  215. floating point array, it will be converted to type ``numpy.float64``
  216. before filtering.
  217. window_length : int
  218. The length of the filter window (i.e., the number of coefficients).
  219. If `mode` is 'interp', `window_length` must be less than or equal
  220. to the size of `x`.
  221. polyorder : int
  222. The order of the polynomial used to fit the samples.
  223. `polyorder` must be less than `window_length`.
  224. deriv : int, optional
  225. The order of the derivative to compute. This must be a
  226. nonnegative integer. The default is 0, which means to filter
  227. the data without differentiating.
  228. delta : float, optional
  229. The spacing of the samples to which the filter will be applied.
  230. This is only used if deriv > 0. Default is 1.0.
  231. axis : int, optional
  232. The axis of the array `x` along which the filter is to be applied.
  233. Default is -1.
  234. mode : str, optional
  235. Must be 'mirror', 'constant', 'nearest', 'wrap' or 'interp'. This
  236. determines the type of extension to use for the padded signal to
  237. which the filter is applied. When `mode` is 'constant', the padding
  238. value is given by `cval`. See the Notes for more details on 'mirror',
  239. 'constant', 'wrap', and 'nearest'.
  240. When the 'interp' mode is selected (the default), no extension
  241. is used. Instead, a degree `polyorder` polynomial is fit to the
  242. last `window_length` values of the edges, and this polynomial is
  243. used to evaluate the last `window_length // 2` output values.
  244. cval : scalar, optional
  245. Value to fill past the edges of the input if `mode` is 'constant'.
  246. Default is 0.0.
  247. Returns
  248. -------
  249. y : ndarray, same shape as `x`
  250. The filtered data.
  251. See Also
  252. --------
  253. savgol_coeffs
  254. Notes
  255. -----
  256. Details on the `mode` options:
  257. 'mirror':
  258. Repeats the values at the edges in reverse order. The value
  259. closest to the edge is not included.
  260. 'nearest':
  261. The extension contains the nearest input value.
  262. 'constant':
  263. The extension contains the value given by the `cval` argument.
  264. 'wrap':
  265. The extension contains the values from the other end of the array.
  266. For example, if the input is [1, 2, 3, 4, 5, 6, 7, 8], and
  267. `window_length` is 7, the following shows the extended data for
  268. the various `mode` options (assuming `cval` is 0)::
  269. mode | Ext | Input | Ext
  270. -----------+---------+------------------------+---------
  271. 'mirror' | 4 3 2 | 1 2 3 4 5 6 7 8 | 7 6 5
  272. 'nearest' | 1 1 1 | 1 2 3 4 5 6 7 8 | 8 8 8
  273. 'constant' | 0 0 0 | 1 2 3 4 5 6 7 8 | 0 0 0
  274. 'wrap' | 6 7 8 | 1 2 3 4 5 6 7 8 | 1 2 3
  275. .. versionadded:: 0.14.0
  276. Examples
  277. --------
  278. >>> import numpy as np
  279. >>> from scipy.signal import savgol_filter
  280. >>> np.set_printoptions(precision=2) # For compact display.
  281. >>> x = np.array([2, 2, 5, 2, 1, 0, 1, 4, 9])
  282. Filter with a window length of 5 and a degree 2 polynomial. Use
  283. the defaults for all other parameters.
  284. >>> savgol_filter(x, 5, 2)
  285. array([1.66, 3.17, 3.54, 2.86, 0.66, 0.17, 1. , 4. , 9. ])
  286. Note that the last five values in x are samples of a parabola, so
  287. when mode='interp' (the default) is used with polyorder=2, the last
  288. three values are unchanged. Compare that to, for example,
  289. `mode='nearest'`:
  290. >>> savgol_filter(x, 5, 2, mode='nearest')
  291. array([1.74, 3.03, 3.54, 2.86, 0.66, 0.17, 1. , 4.6 , 7.97])
  292. """
  293. if mode not in ["mirror", "constant", "nearest", "interp", "wrap"]:
  294. raise ValueError("mode must be 'mirror', 'constant', 'nearest' "
  295. "'wrap' or 'interp'.")
  296. xp = array_namespace(x)
  297. x = xp.asarray(x)
  298. # Ensure that x is either single or double precision floating point.
  299. if x.dtype != xp.float64 and x.dtype != xp.float32:
  300. x = xp.astype(x, xp.float64)
  301. coeffs = savgol_coeffs(
  302. window_length, polyorder, deriv=deriv, delta=delta, xp=xp, device=xp_device(x)
  303. )
  304. if mode == "interp":
  305. if window_length > x.shape[axis]:
  306. raise ValueError("If mode is 'interp', window_length must be less "
  307. "than or equal to the size of x.")
  308. # Do not pad. Instead, for the elements within `window_length // 2`
  309. # of the ends of the sequence, use the polynomial that is fitted to
  310. # the last `window_length` elements.
  311. y = convolve1d(x, coeffs, axis=axis, mode="constant")
  312. y = _fit_edges_polyfit(x, window_length, polyorder, deriv, delta, axis, y)
  313. else:
  314. # Any mode other than 'interp' is passed on to ndimage.convolve1d.
  315. y = convolve1d(x, coeffs, axis=axis, mode=mode, cval=cval)
  316. return y