_numdiff.py 35 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963
  1. """Routines for numerical differentiation."""
  2. import functools
  3. import numpy as np
  4. from numpy.linalg import norm
  5. from scipy.sparse.linalg import LinearOperator
  6. from ..sparse import issparse, isspmatrix, find, csc_array, csr_array, csr_matrix
  7. from ._group_columns import group_dense, group_sparse
  8. from scipy._lib._array_api import array_namespace
  9. from scipy._lib._util import MapWrapper
  10. from scipy._lib import array_api_extra as xpx
  11. def _adjust_scheme_to_bounds(x0, h, num_steps, scheme, lb, ub):
  12. """Adjust final difference scheme to the presence of bounds.
  13. Parameters
  14. ----------
  15. x0 : ndarray, shape (n,)
  16. Point at which we wish to estimate derivative.
  17. h : ndarray, shape (n,)
  18. Desired absolute finite difference steps.
  19. num_steps : int
  20. Number of `h` steps in one direction required to implement finite
  21. difference scheme. For example, 2 means that we need to evaluate
  22. f(x0 + 2 * h) or f(x0 - 2 * h)
  23. scheme : {'1-sided', '2-sided'}
  24. Whether steps in one or both directions are required. In other
  25. words '1-sided' applies to forward and backward schemes, '2-sided'
  26. applies to center schemes.
  27. lb : ndarray, shape (n,)
  28. Lower bounds on independent variables.
  29. ub : ndarray, shape (n,)
  30. Upper bounds on independent variables.
  31. Returns
  32. -------
  33. h_adjusted : ndarray, shape (n,)
  34. Adjusted absolute step sizes. Step size decreases only if a sign flip
  35. or switching to one-sided scheme doesn't allow to take a full step.
  36. use_one_sided : ndarray of bool, shape (n,)
  37. Whether to switch to one-sided scheme. Informative only for
  38. ``scheme='2-sided'``.
  39. """
  40. if scheme == '1-sided':
  41. use_one_sided = np.ones_like(h, dtype=bool)
  42. elif scheme == '2-sided':
  43. h = np.abs(h)
  44. use_one_sided = np.zeros_like(h, dtype=bool)
  45. else:
  46. raise ValueError("`scheme` must be '1-sided' or '2-sided'.")
  47. if np.all((lb == -np.inf) & (ub == np.inf)):
  48. return h, use_one_sided
  49. h_total = h * num_steps
  50. h_adjusted = h.copy()
  51. lower_dist = x0 - lb
  52. upper_dist = ub - x0
  53. if scheme == '1-sided':
  54. x = x0 + h_total
  55. violated = (x < lb) | (x > ub)
  56. fitting = np.abs(h_total) <= np.maximum(lower_dist, upper_dist)
  57. h_adjusted[violated & fitting] *= -1
  58. forward = (upper_dist >= lower_dist) & ~fitting
  59. h_adjusted[forward] = upper_dist[forward] / num_steps
  60. backward = (upper_dist < lower_dist) & ~fitting
  61. h_adjusted[backward] = -lower_dist[backward] / num_steps
  62. elif scheme == '2-sided':
  63. central = (lower_dist >= h_total) & (upper_dist >= h_total)
  64. forward = (upper_dist >= lower_dist) & ~central
  65. h_adjusted[forward] = np.minimum(
  66. h[forward], 0.5 * upper_dist[forward] / num_steps)
  67. use_one_sided[forward] = True
  68. backward = (upper_dist < lower_dist) & ~central
  69. h_adjusted[backward] = -np.minimum(
  70. h[backward], 0.5 * lower_dist[backward] / num_steps)
  71. use_one_sided[backward] = True
  72. min_dist = np.minimum(upper_dist, lower_dist) / num_steps
  73. adjusted_central = (~central & (np.abs(h_adjusted) <= min_dist))
  74. h_adjusted[adjusted_central] = min_dist[adjusted_central]
  75. use_one_sided[adjusted_central] = False
  76. return h_adjusted, use_one_sided
  77. @functools.lru_cache
  78. def _eps_for_method(x0_dtype, f0_dtype, method):
  79. """
  80. Calculates relative EPS step to use for a given data type
  81. and numdiff step method.
  82. Progressively smaller steps are used for larger floating point types.
  83. Parameters
  84. ----------
  85. f0_dtype: np.dtype
  86. dtype of function evaluation
  87. x0_dtype: np.dtype
  88. dtype of parameter vector
  89. method: {'2-point', '3-point', 'cs'}
  90. Returns
  91. -------
  92. EPS: float
  93. relative step size. May be np.float16, np.float32, np.float64
  94. Notes
  95. -----
  96. The default relative step will be np.float64. However, if x0 or f0 are
  97. smaller floating point types (np.float16, np.float32), then the smallest
  98. floating point type is chosen.
  99. """
  100. # the default EPS value
  101. EPS = np.finfo(np.float64).eps
  102. x0_is_fp = False
  103. if np.issubdtype(x0_dtype, np.inexact):
  104. # if you're a floating point type then over-ride the default EPS
  105. EPS = np.finfo(x0_dtype).eps
  106. x0_itemsize = np.dtype(x0_dtype).itemsize
  107. x0_is_fp = True
  108. if np.issubdtype(f0_dtype, np.inexact):
  109. f0_itemsize = np.dtype(f0_dtype).itemsize
  110. # choose the smallest itemsize between x0 and f0
  111. if x0_is_fp and f0_itemsize < x0_itemsize:
  112. EPS = np.finfo(f0_dtype).eps
  113. if method in ["2-point", "cs"]:
  114. return EPS**0.5
  115. elif method in ["3-point"]:
  116. return EPS**(1/3)
  117. else:
  118. raise RuntimeError("Unknown step method, should be one of "
  119. "{'2-point', '3-point', 'cs'}")
  120. def _compute_absolute_step(rel_step, x0, f0, method):
  121. """
  122. Computes an absolute step from a relative step for finite difference
  123. calculation.
  124. Parameters
  125. ----------
  126. rel_step: None or array-like
  127. Relative step for the finite difference calculation
  128. x0 : np.ndarray
  129. Parameter vector
  130. f0 : np.ndarray or scalar
  131. method : {'2-point', '3-point', 'cs'}
  132. Returns
  133. -------
  134. h : float
  135. The absolute step size
  136. Notes
  137. -----
  138. `h` will always be np.float64. However, if `x0` or `f0` are
  139. smaller floating point dtypes (e.g. np.float32), then the absolute
  140. step size will be calculated from the smallest floating point size.
  141. """
  142. # this is used instead of np.sign(x0) because we need
  143. # sign_x0 to be 1 when x0 == 0.
  144. sign_x0 = (x0 >= 0).astype(float) * 2 - 1
  145. rstep = _eps_for_method(x0.dtype, f0.dtype, method)
  146. if rel_step is None:
  147. abs_step = rstep * sign_x0 * np.maximum(1.0, np.abs(x0))
  148. else:
  149. # User has requested specific relative steps.
  150. # Don't multiply by max(1, abs(x0) because if x0 < 1 then their
  151. # requested step is not used.
  152. abs_step = rel_step * sign_x0 * np.abs(x0)
  153. # however we don't want an abs_step of 0, which can happen if
  154. # rel_step is 0, or x0 is 0. Instead, substitute a realistic step
  155. dx = ((x0 + abs_step) - x0)
  156. abs_step = np.where(dx == 0,
  157. rstep * sign_x0 * np.maximum(1.0, np.abs(x0)),
  158. abs_step)
  159. return abs_step
  160. def _prepare_bounds(bounds, x0):
  161. """
  162. Prepares new-style bounds from a two-tuple specifying the lower and upper
  163. limits for values in x0. If a value is not bound then the lower/upper bound
  164. will be expected to be -np.inf/np.inf.
  165. Examples
  166. --------
  167. >>> _prepare_bounds([(0, 1, 2), (1, 2, np.inf)], [0.5, 1.5, 2.5])
  168. (array([0., 1., 2.]), array([ 1., 2., inf]))
  169. """
  170. lb, ub = (np.asarray(b, dtype=float) for b in bounds)
  171. if lb.ndim == 0:
  172. lb = np.resize(lb, x0.shape)
  173. if ub.ndim == 0:
  174. ub = np.resize(ub, x0.shape)
  175. return lb, ub
  176. def group_columns(A, order=0):
  177. """Group columns of a 2-D matrix for sparse finite differencing [1]_.
  178. Two columns are in the same group if in each row at least one of them
  179. has zero. A greedy sequential algorithm is used to construct groups.
  180. Parameters
  181. ----------
  182. A : array_like or sparse array, shape (m, n)
  183. Matrix of which to group columns.
  184. order : int, iterable of int with shape (n,) or None
  185. Permutation array which defines the order of columns enumeration.
  186. If int or None, a random permutation is used with `order` used as
  187. a random seed. Default is 0, that is use a random permutation but
  188. guarantee repeatability.
  189. Returns
  190. -------
  191. groups : ndarray of int, shape (n,)
  192. Contains values from 0 to n_groups-1, where n_groups is the number
  193. of found groups. Each value ``groups[i]`` is an index of a group to
  194. which ith column assigned. The procedure was helpful only if
  195. n_groups is significantly less than n.
  196. References
  197. ----------
  198. .. [1] A. Curtis, M. J. D. Powell, and J. Reid, "On the estimation of
  199. sparse Jacobian matrices", Journal of the Institute of Mathematics
  200. and its Applications, 13 (1974), pp. 117-120.
  201. """
  202. if issparse(A):
  203. A = csc_array(A)
  204. else:
  205. A = np.atleast_2d(A)
  206. A = (A != 0).astype(np.int32)
  207. if A.ndim != 2:
  208. raise ValueError("`A` must be 2-dimensional.")
  209. m, n = A.shape
  210. if order is None or np.isscalar(order):
  211. rng = np.random.RandomState(order)
  212. order = rng.permutation(n)
  213. else:
  214. order = np.asarray(order)
  215. if order.shape != (n,):
  216. raise ValueError("`order` has incorrect shape.")
  217. A = A[:, order]
  218. if issparse(A):
  219. groups = group_sparse(m, n, A.indices, A.indptr)
  220. else:
  221. groups = group_dense(m, n, A)
  222. groups[order] = groups.copy()
  223. return groups
  224. def approx_derivative(fun, x0, method='3-point', rel_step=None, abs_step=None,
  225. f0=None, bounds=(-np.inf, np.inf), sparsity=None,
  226. as_linear_operator=False, args=(), kwargs=None,
  227. full_output=False, workers=None):
  228. """Compute finite difference approximation of the derivatives of a
  229. vector-valued function.
  230. If a function maps from R^n to R^m, its derivatives form m-by-n matrix
  231. called the Jacobian, where an element (i, j) is a partial derivative of
  232. f[i] with respect to x[j].
  233. Parameters
  234. ----------
  235. fun : callable
  236. Function of which to estimate the derivatives. The argument x
  237. passed to this function is ndarray of shape (n,) (never a scalar
  238. even if n=1). It must return 1-D array_like of shape (m,) or a scalar.
  239. x0 : array_like of shape (n,) or float
  240. Point at which to estimate the derivatives. Float will be converted
  241. to a 1-D array.
  242. method : {'3-point', '2-point', 'cs'}, optional
  243. Finite difference method to use:
  244. - '2-point' - use the first order accuracy forward or backward
  245. difference.
  246. - '3-point' - use central difference in interior points and the
  247. second order accuracy forward or backward difference
  248. near the boundary.
  249. - 'cs' - use a complex-step finite difference scheme. This assumes
  250. that the user function is real-valued and can be
  251. analytically continued to the complex plane. Otherwise,
  252. produces bogus results.
  253. rel_step : None or array_like, optional
  254. Relative step size to use. If None (default) the absolute step size is
  255. computed as ``h = rel_step * sign(x0) * max(1, abs(x0))``, with
  256. `rel_step` being selected automatically, see Notes. Otherwise
  257. ``h = rel_step * sign(x0) * abs(x0)``. For ``method='3-point'`` the
  258. sign of `h` is ignored. The calculated step size is possibly adjusted
  259. to fit into the bounds.
  260. abs_step : array_like, optional
  261. Absolute step size to use, possibly adjusted to fit into the bounds.
  262. For ``method='3-point'`` the sign of `abs_step` is ignored. By default
  263. relative steps are used, only if ``abs_step is not None`` are absolute
  264. steps used.
  265. f0 : None or array_like, optional
  266. If not None it is assumed to be equal to ``fun(x0)``, in this case
  267. the ``fun(x0)`` is not called. Default is None.
  268. bounds : tuple of array_like, optional
  269. Lower and upper bounds on independent variables. Defaults to no bounds.
  270. Each bound must match the size of `x0` or be a scalar, in the latter
  271. case the bound will be the same for all variables. Use it to limit the
  272. range of function evaluation. Bounds checking is not implemented
  273. when `as_linear_operator` is True.
  274. sparsity : {None, array_like, sparse array, 2-tuple}, optional
  275. Defines a sparsity structure of the Jacobian matrix. If the Jacobian
  276. matrix is known to have only few non-zero elements in each row, then
  277. it's possible to estimate its several columns by a single function
  278. evaluation [3]_. To perform such economic computations two ingredients
  279. are required:
  280. * structure : array_like or sparse array of shape (m, n). A zero
  281. element means that a corresponding element of the Jacobian
  282. identically equals to zero.
  283. * groups : array_like of shape (n,). A column grouping for a given
  284. sparsity structure, use `group_columns` to obtain it.
  285. A single array or a sparse array is interpreted as a sparsity
  286. structure, and groups are computed inside the function. A tuple is
  287. interpreted as (structure, groups). If None (default), a standard
  288. dense differencing will be used.
  289. Note, that sparse differencing makes sense only for large Jacobian
  290. matrices where each row contains few non-zero elements.
  291. as_linear_operator : bool, optional
  292. When True the function returns an `scipy.sparse.linalg.LinearOperator`.
  293. Otherwise it returns a dense array or a sparse array depending on
  294. `sparsity`. The linear operator provides an efficient way of computing
  295. ``J.dot(p)`` for any vector ``p`` of shape (n,), but does not allow
  296. direct access to individual elements of the matrix. By default
  297. `as_linear_operator` is False.
  298. args, kwargs : tuple and dict, optional
  299. Additional arguments passed to `fun`. Both empty by default.
  300. The calling signature is ``fun(x, *args, **kwargs)``.
  301. full_output : bool, optional
  302. If True then the function also returns a dictionary with extra information
  303. about the calculation.
  304. workers : int or map-like callable, optional
  305. Supply a map-like callable, such as
  306. `multiprocessing.Pool.map` for evaluating the population in parallel.
  307. This evaluation is carried out as ``workers(fun, iterable)``.
  308. Alternatively, if `workers` is an int the task is subdivided into `workers`
  309. sections and the fun evaluated in parallel
  310. (uses `multiprocessing.Pool <multiprocessing>`).
  311. Supply -1 to use all available CPU cores.
  312. It is recommended that a map-like be used instead of int, as repeated
  313. calls to `approx_derivative` will incur large overhead from setting up
  314. new processes.
  315. Returns
  316. -------
  317. J : {ndarray, sparse array, LinearOperator}
  318. Finite difference approximation of the Jacobian matrix.
  319. If `as_linear_operator` is True returns a LinearOperator
  320. with shape (m, n). Otherwise it returns a dense array or sparse
  321. array depending on how `sparsity` is defined. If `sparsity`
  322. is None then a ndarray with shape (m, n) is returned. If
  323. `sparsity` is not None returns a csr_array or csr_matrix with
  324. shape (m, n) following the array/matrix type of the incoming structure.
  325. For sparse arrays and linear operators it is always returned as
  326. a 2-D structure. For ndarrays, if m=1 it is returned
  327. as a 1-D gradient array with shape (n,).
  328. info_dict : dict
  329. Dictionary containing extra information about the calculation. The
  330. keys include:
  331. - `nfev`, number of function evaluations. If `as_linear_operator` is True
  332. then `fun` is expected to track the number of evaluations itself.
  333. This is because multiple calls may be made to the linear operator which
  334. are not trackable here.
  335. See Also
  336. --------
  337. check_derivative : Check correctness of a function computing derivatives.
  338. Notes
  339. -----
  340. If `rel_step` is not provided, it assigned as ``EPS**(1/s)``, where EPS is
  341. determined from the smallest floating point dtype of `x0` or `fun(x0)`,
  342. ``np.finfo(x0.dtype).eps``, s=2 for '2-point' method and
  343. s=3 for '3-point' method. Such relative step approximately minimizes a sum
  344. of truncation and round-off errors, see [1]_. Relative steps are used by
  345. default. However, absolute steps are used when ``abs_step is not None``.
  346. If any of the absolute or relative steps produces an indistinguishable
  347. difference from the original `x0`, ``(x0 + dx) - x0 == 0``, then a
  348. automatic step size is substituted for that particular entry.
  349. A finite difference scheme for '3-point' method is selected automatically.
  350. The well-known central difference scheme is used for points sufficiently
  351. far from the boundary, and 3-point forward or backward scheme is used for
  352. points near the boundary. Both schemes have the second-order accuracy in
  353. terms of Taylor expansion. Refer to [2]_ for the formulas of 3-point
  354. forward and backward difference schemes.
  355. For dense differencing when m=1 Jacobian is returned with a shape (n,),
  356. on the other hand when n=1 Jacobian is returned with a shape (m, 1).
  357. Our motivation is the following: a) It handles a case of gradient
  358. computation (m=1) in a conventional way. b) It clearly separates these two
  359. different cases. b) In all cases np.atleast_2d can be called to get 2-D
  360. Jacobian with correct dimensions.
  361. References
  362. ----------
  363. .. [1] W. H. Press et. al. "Numerical Recipes. The Art of Scientific
  364. Computing. 3rd edition", sec. 5.7.
  365. .. [2] A. Curtis, M. J. D. Powell, and J. Reid, "On the estimation of
  366. sparse Jacobian matrices", Journal of the Institute of Mathematics
  367. and its Applications, 13 (1974), pp. 117-120.
  368. .. [3] B. Fornberg, "Generation of Finite Difference Formulas on
  369. Arbitrarily Spaced Grids", Mathematics of Computation 51, 1988.
  370. Examples
  371. --------
  372. >>> import numpy as np
  373. >>> from scipy.optimize._numdiff import approx_derivative
  374. >>>
  375. >>> def f(x, c1, c2):
  376. ... return np.array([x[0] * np.sin(c1 * x[1]),
  377. ... x[0] * np.cos(c2 * x[1])])
  378. ...
  379. >>> x0 = np.array([1.0, 0.5 * np.pi])
  380. >>> approx_derivative(f, x0, args=(1, 2))
  381. array([[ 1., 0.],
  382. [-1., 0.]])
  383. Bounds can be used to limit the region of function evaluation.
  384. In the example below we compute left and right derivative at point 1.0.
  385. >>> def g(x):
  386. ... return x**2 if x >= 1 else x
  387. ...
  388. >>> x0 = 1.0
  389. >>> approx_derivative(g, x0, bounds=(-np.inf, 1.0))
  390. array([ 1.])
  391. >>> approx_derivative(g, x0, bounds=(1.0, np.inf))
  392. array([ 2.])
  393. We can also parallelize the derivative calculation using the workers
  394. keyword.
  395. >>> from multiprocessing import Pool
  396. >>> import time
  397. >>> def fun2(x): # import from an external file for use with multiprocessing
  398. ... time.sleep(0.002)
  399. ... return rosen(x)
  400. >>> rng = np.random.default_rng()
  401. >>> x0 = rng.uniform(high=10, size=(2000,))
  402. >>> f0 = rosen(x0)
  403. >>> %timeit approx_derivative(fun2, x0, f0=f0) # may vary
  404. 10.5 s ± 5.91 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
  405. >>> elapsed = []
  406. >>> with Pool() as workers:
  407. ... for i in range(10):
  408. ... t = time.perf_counter()
  409. ... approx_derivative(fun2, x0, workers=workers.map, f0=f0)
  410. ... et = time.perf_counter()
  411. ... elapsed.append(et - t)
  412. >>> np.mean(elapsed) # may vary
  413. np.float64(1.442545195999901)
  414. Create a map-like vectorized version. `x` is a generator, so first of all
  415. a 2-D array, `xx`, is reconstituted. Here `xx` has shape `(Y, N)` where `Y`
  416. is the number of function evaluations to perform and `N` is the dimensionality
  417. of the objective function. The underlying objective function is `rosen`, which
  418. requires `xx` to have shape `(N, Y)`, so a transpose is required.
  419. >>> def fun(f, x, *args, **kwds):
  420. ... xx = np.r_[[xs for xs in x]]
  421. ... return f(xx.T)
  422. >>> %timeit approx_derivative(fun2, x0, workers=fun, f0=f0) # may vary
  423. 91.8 ms ± 755 μs per loop (mean ± std. dev. of 7 runs, 10 loops each)
  424. """
  425. if method not in ['2-point', '3-point', 'cs']:
  426. raise ValueError(f"Unknown method '{method}'. ")
  427. info_dict = {'nfev': None}
  428. xp = array_namespace(x0)
  429. _x = xpx.atleast_nd(xp.asarray(x0), ndim=1, xp=xp)
  430. _dtype = xp.float64
  431. if xp.isdtype(_x.dtype, "real floating"):
  432. _dtype = _x.dtype
  433. # promotes to floating
  434. x0 = xp.astype(_x, _dtype)
  435. if x0.ndim > 1:
  436. raise ValueError("`x0` must have at most 1 dimension.")
  437. lb, ub = _prepare_bounds(bounds, x0)
  438. if lb.shape != x0.shape or ub.shape != x0.shape:
  439. raise ValueError("Inconsistent shapes between bounds and `x0`.")
  440. if as_linear_operator and not (np.all(np.isinf(lb))
  441. and np.all(np.isinf(ub))):
  442. raise ValueError("Bounds not supported when "
  443. "`as_linear_operator` is True.")
  444. if kwargs is None:
  445. kwargs = {}
  446. fun_wrapped = _Fun_Wrapper(fun, x0, args, kwargs)
  447. # Record how function evaluations are consumed by `approx_derivative`.
  448. # Historically this was done by upstream functions wrapping `fun`.
  449. # However, with parallelization via workers it was going to be impossible to
  450. # keep that counter updated across Processes. Counter synchronisation can
  451. # be achieved via multiprocessing.Value and a Pool. However, workers can be
  452. # any map-like, not necessarily a Pool, so initialization of the Value would
  453. # be difficult.
  454. nfev = _nfev = 0
  455. if f0 is None:
  456. f0 = fun_wrapped(x0)
  457. nfev = 1
  458. else:
  459. f0 = np.atleast_1d(f0)
  460. if f0.ndim > 1:
  461. raise ValueError("`f0` passed has more than 1 dimension.")
  462. if np.any((x0 < lb) | (x0 > ub)):
  463. raise ValueError("`x0` violates bound constraints.")
  464. if as_linear_operator:
  465. if rel_step is None:
  466. rel_step = _eps_for_method(x0.dtype, f0.dtype, method)
  467. J, _ = _linear_operator_difference(fun_wrapped, x0,
  468. f0, rel_step, method)
  469. else:
  470. # by default we use rel_step
  471. if abs_step is None:
  472. h = _compute_absolute_step(rel_step, x0, f0, method)
  473. else:
  474. # user specifies an absolute step
  475. sign_x0 = (x0 >= 0).astype(float) * 2 - 1
  476. h = abs_step
  477. # cannot have a zero step. This might happen if x0 is very large
  478. # or small. In which case fall back to relative step.
  479. dx = ((x0 + h) - x0)
  480. h = np.where(dx == 0,
  481. _eps_for_method(x0.dtype, f0.dtype, method) *
  482. sign_x0 * np.maximum(1.0, np.abs(x0)),
  483. h)
  484. if method == '2-point':
  485. h, use_one_sided = _adjust_scheme_to_bounds(
  486. x0, h, 1, '1-sided', lb, ub)
  487. elif method == '3-point':
  488. h, use_one_sided = _adjust_scheme_to_bounds(
  489. x0, h, 1, '2-sided', lb, ub)
  490. elif method == 'cs':
  491. use_one_sided = False
  492. # normalize workers
  493. workers = workers or map
  494. with MapWrapper(workers) as mf:
  495. if sparsity is None:
  496. J, _nfev = _dense_difference(fun_wrapped, x0, f0, h,
  497. use_one_sided, method,
  498. mf)
  499. else:
  500. if not issparse(sparsity) and len(sparsity) == 2:
  501. structure, groups = sparsity
  502. else:
  503. structure = sparsity
  504. groups = group_columns(sparsity)
  505. if issparse(structure):
  506. structure = structure.tocsc()
  507. else:
  508. structure = np.atleast_2d(structure)
  509. groups = np.atleast_1d(groups)
  510. J, _nfev = _sparse_difference(fun_wrapped, x0, f0, h,
  511. use_one_sided, structure,
  512. groups, method, mf)
  513. if full_output:
  514. nfev += _nfev
  515. info_dict["nfev"] = nfev
  516. return J, info_dict
  517. else:
  518. return J
  519. def _linear_operator_difference(fun, x0, f0, h, method):
  520. m = f0.size
  521. n = x0.size
  522. if method == '2-point':
  523. # nfev = 1
  524. def matvec(p):
  525. if np.array_equal(p, np.zeros_like(p)):
  526. return np.zeros(m)
  527. dx = h / norm(p)
  528. x = x0 + dx*p
  529. df = fun(x) - f0
  530. return df / dx
  531. elif method == '3-point':
  532. # nfev = 2
  533. def matvec(p):
  534. if np.array_equal(p, np.zeros_like(p)):
  535. return np.zeros(m)
  536. dx = 2*h / norm(p)
  537. x1 = x0 - (dx/2)*p
  538. x2 = x0 + (dx/2)*p
  539. f1 = fun(x1)
  540. f2 = fun(x2)
  541. df = f2 - f1
  542. return df / dx
  543. elif method == 'cs':
  544. # nfev = 1
  545. def matvec(p):
  546. if np.array_equal(p, np.zeros_like(p)):
  547. return np.zeros(m)
  548. dx = h / norm(p)
  549. x = x0 + dx*p*1.j
  550. f1 = fun(x)
  551. df = f1.imag
  552. return df / dx
  553. else:
  554. raise RuntimeError("Never be here.")
  555. return LinearOperator((m, n), matvec), 0
  556. def _dense_difference(fun, x0, f0, h, use_one_sided, method, workers):
  557. m = f0.size
  558. n = x0.size
  559. J_transposed = np.empty((n, m))
  560. nfev = 0
  561. if method == '2-point':
  562. def x_generator2(x0, h):
  563. for i in range(n):
  564. # If copying isn't done then it's possible for different workers
  565. # to see the same values of x1. (At least that's what happened
  566. # when I used `multiprocessing.dummy.Pool`).
  567. # I also considered creating all the vectors at once, but that
  568. # means assembling a very large N x N array. It's therefore a
  569. # trade-off between N array copies or creating an NxN array.
  570. x1 = np.copy(x0)
  571. x1[i] = x0[i] + h[i]
  572. yield x1
  573. # only f_evals (numerator) needs parallelization, the denominator
  574. # (the step size) is fast to calculate.
  575. f_evals = workers(fun, x_generator2(x0, h))
  576. dx = [(x0[i] + h[i]) - x0[i] for i in range(n)]
  577. df = [f_eval - f0 for f_eval in f_evals]
  578. df_dx = [delf / delx for delf, delx in zip(df, dx)]
  579. nfev += len(df_dx)
  580. elif method == '3-point':
  581. def x_generator3(x0, h, use_one_sided):
  582. for i, one_sided in enumerate(use_one_sided):
  583. x1 = np.copy(x0)
  584. x2 = np.copy(x0)
  585. if one_sided:
  586. x1[i] = x0[i] + h[i]
  587. x2[i] = x0[i] + 2*h[i]
  588. else:
  589. x1[i] = x0[i] - h[i]
  590. x2[i] = x0[i] + h[i]
  591. yield x1
  592. yield x2
  593. # workers may return something like a list that needs to be turned
  594. # into an iterable (can't call `next` on a list)
  595. f_evals = iter(workers(fun, x_generator3(x0, h, use_one_sided)))
  596. gen = x_generator3(x0, h, use_one_sided)
  597. dx = list()
  598. df = list()
  599. for i, one_sided in enumerate(use_one_sided):
  600. l = next(gen)
  601. u = next(gen)
  602. f1 = next(f_evals)
  603. f2 = next(f_evals)
  604. if one_sided:
  605. dx.append(u[i] - x0[i])
  606. df.append(-3.0 * f0 + 4 * f1 - f2)
  607. else:
  608. dx.append(u[i] - l[i])
  609. df.append(f2 - f1)
  610. df_dx = [delf / delx for delf, delx in zip(df, dx)]
  611. nfev += 2 * len(df_dx)
  612. elif method == 'cs':
  613. def x_generator_cs(x0, h):
  614. for i in range(n):
  615. xc = x0.astype(complex, copy=True)
  616. xc[i] += h[i] * 1.j
  617. yield xc
  618. f_evals = iter(workers(fun, x_generator_cs(x0, h)))
  619. df_dx = [f1.imag / hi for f1, hi in zip(f_evals, h)]
  620. nfev += len(df_dx)
  621. else:
  622. raise RuntimeError("Never be here.")
  623. for i, v in enumerate(df_dx):
  624. J_transposed[i] = v
  625. if m == 1:
  626. J_transposed = np.ravel(J_transposed)
  627. return J_transposed.T, nfev
  628. def _sparse_difference(fun, x0, f0, h, use_one_sided,
  629. structure, groups, method, workers):
  630. m = f0.size
  631. n = x0.size
  632. row_indices = []
  633. col_indices = []
  634. fractions = []
  635. n_groups = np.max(groups) + 1
  636. nfev = 0
  637. def e_generator():
  638. # Perturb variables which are in the same group simultaneously.
  639. for group in range(n_groups):
  640. yield np.equal(group, groups)
  641. def x_generator2():
  642. e_gen = e_generator()
  643. for e in e_gen:
  644. h_vec = h * e
  645. x = x0 + h_vec
  646. yield x
  647. def x_generator3():
  648. e_gen = e_generator()
  649. for e in e_gen:
  650. h_vec = h * e
  651. x1 = x0.copy()
  652. x2 = x0.copy()
  653. mask_1 = use_one_sided & e
  654. x1[mask_1] += h_vec[mask_1]
  655. x2[mask_1] += 2 * h_vec[mask_1]
  656. mask_2 = ~use_one_sided & e
  657. x1[mask_2] -= h_vec[mask_2]
  658. x2[mask_2] += h_vec[mask_2]
  659. yield x1
  660. yield x2
  661. def x_generator_cs():
  662. e_gen = e_generator()
  663. for e in e_gen:
  664. h_vec = h * e
  665. yield x0 + h_vec * 1.j
  666. # evaluate the function for each of the groups
  667. if method == '2-point':
  668. f_evals = iter(workers(fun, x_generator2()))
  669. xs = x_generator2()
  670. elif method == '3-point':
  671. f_evals = iter(workers(fun, x_generator3()))
  672. xs = x_generator3()
  673. elif method == 'cs':
  674. f_evals = iter(workers(fun, x_generator_cs()))
  675. for e in e_generator():
  676. # The result is written to columns which correspond to perturbed
  677. # variables.
  678. cols, = np.nonzero(e)
  679. # Find all non-zero elements in selected columns of Jacobian.
  680. i, j, _ = find(structure[:, cols])
  681. # Restore column indices in the full array.
  682. j = cols[j]
  683. if method == '2-point':
  684. dx = next(xs) - x0
  685. df = next(f_evals) - f0
  686. nfev += 1
  687. elif method == '3-point':
  688. # Here we do conceptually the same but separate one-sided
  689. # and two-sided schemes.
  690. x1 = next(xs)
  691. x2 = next(xs)
  692. mask_1 = use_one_sided & e
  693. mask_2 = ~use_one_sided & e
  694. dx = np.zeros(n)
  695. dx[mask_1] = x2[mask_1] - x0[mask_1]
  696. dx[mask_2] = x2[mask_2] - x1[mask_2]
  697. f1 = next(f_evals)
  698. f2 = next(f_evals)
  699. nfev += 2
  700. mask = use_one_sided[j]
  701. df = np.empty(m)
  702. rows = i[mask]
  703. df[rows] = -3 * f0[rows] + 4 * f1[rows] - f2[rows]
  704. rows = i[~mask]
  705. df[rows] = f2[rows] - f1[rows]
  706. elif method == 'cs':
  707. f1 = next(f_evals)
  708. nfev += 1
  709. df = f1.imag
  710. dx = h * e
  711. else:
  712. raise ValueError("Never be here.")
  713. # All that's left is to compute the fraction. We store i, j and
  714. # fractions as separate arrays and later construct csr_array.
  715. row_indices.append(i)
  716. col_indices.append(j)
  717. fractions.append(df[i] / dx[j])
  718. row_indices = np.hstack(row_indices)
  719. col_indices = np.hstack(col_indices)
  720. fractions = np.hstack(fractions)
  721. if isspmatrix(structure):
  722. return csr_matrix((fractions, (row_indices, col_indices)), shape=(m, n)), nfev
  723. return csr_array((fractions, (row_indices, col_indices)), shape=(m, n)), nfev
  724. class _Fun_Wrapper:
  725. # Permits pickling of a wrapped function
  726. def __init__(self, fun, x0, args, kwargs):
  727. self.fun = fun
  728. self.x0 = x0
  729. self.args = args
  730. self.kwargs = kwargs
  731. def __call__(self, x):
  732. # send user function same fp type as x0. (but only if cs is not being
  733. # used
  734. xp = array_namespace(self.x0)
  735. if xp.isdtype(x.dtype, "real floating"):
  736. x = xp.astype(x, self.x0.dtype)
  737. f = np.atleast_1d(self.fun(x, *self.args, **self.kwargs))
  738. if f.ndim > 1:
  739. raise RuntimeError("`fun` return value has "
  740. "more than 1 dimension.")
  741. return f
  742. def check_derivative(fun, jac, x0, bounds=(-np.inf, np.inf), args=(),
  743. kwargs=None):
  744. """Check correctness of a function computing derivatives (Jacobian or
  745. gradient) by comparison with a finite difference approximation.
  746. Parameters
  747. ----------
  748. fun : callable
  749. Function of which to estimate the derivatives. The argument x
  750. passed to this function is ndarray of shape (n,) (never a scalar
  751. even if n=1). It must return 1-D array_like of shape (m,) or a scalar.
  752. jac : callable
  753. Function which computes Jacobian matrix of `fun`. It must work with
  754. argument x the same way as `fun`. The return value must be array_like
  755. or sparse array with an appropriate shape.
  756. x0 : array_like of shape (n,) or float
  757. Point at which to estimate the derivatives. Float will be converted
  758. to 1-D array.
  759. bounds : 2-tuple of array_like, optional
  760. Lower and upper bounds on independent variables. Defaults to no bounds.
  761. Each bound must match the size of `x0` or be a scalar, in the latter
  762. case the bound will be the same for all variables. Use it to limit the
  763. range of function evaluation.
  764. args, kwargs : tuple and dict, optional
  765. Additional arguments passed to `fun` and `jac`. Both empty by default.
  766. The calling signature is ``fun(x, *args, **kwargs)`` and the same
  767. for `jac`.
  768. Returns
  769. -------
  770. accuracy : float
  771. The maximum among all relative errors for elements with absolute values
  772. higher than 1 and absolute errors for elements with absolute values
  773. less or equal than 1. If `accuracy` is on the order of 1e-6 or lower,
  774. then it is likely that your `jac` implementation is correct.
  775. See Also
  776. --------
  777. approx_derivative : Compute finite difference approximation of derivative.
  778. Examples
  779. --------
  780. >>> import numpy as np
  781. >>> from scipy.optimize._numdiff import check_derivative
  782. >>>
  783. >>>
  784. >>> def f(x, c1, c2):
  785. ... return np.array([x[0] * np.sin(c1 * x[1]),
  786. ... x[0] * np.cos(c2 * x[1])])
  787. ...
  788. >>> def jac(x, c1, c2):
  789. ... return np.array([
  790. ... [np.sin(c1 * x[1]), c1 * x[0] * np.cos(c1 * x[1])],
  791. ... [np.cos(c2 * x[1]), -c2 * x[0] * np.sin(c2 * x[1])]
  792. ... ])
  793. ...
  794. >>>
  795. >>> x0 = np.array([1.0, 0.5 * np.pi])
  796. >>> check_derivative(f, jac, x0, args=(1, 2))
  797. 2.4492935982947064e-16
  798. """
  799. if kwargs is None:
  800. kwargs = {}
  801. J_to_test = jac(x0, *args, **kwargs)
  802. if issparse(J_to_test):
  803. J_diff = approx_derivative(fun, x0, bounds=bounds, sparsity=J_to_test,
  804. args=args, kwargs=kwargs)
  805. J_to_test = csr_array(J_to_test)
  806. abs_err = J_to_test - J_diff
  807. i, j, abs_err_data = find(abs_err)
  808. J_diff_data = np.asarray(J_diff[i, j]).ravel()
  809. return np.max(np.abs(abs_err_data) /
  810. np.maximum(1, np.abs(J_diff_data)))
  811. else:
  812. J_diff = approx_derivative(fun, x0, bounds=bounds,
  813. args=args, kwargs=kwargs)
  814. abs_err = np.abs(J_to_test - J_diff)
  815. return np.max(abs_err / np.maximum(1, np.abs(J_diff)))