_differentiable_functions.py 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844
  1. from collections import namedtuple
  2. import numpy as np
  3. import scipy.sparse as sps
  4. from ._numdiff import approx_derivative, group_columns
  5. from ._hessian_update_strategy import HessianUpdateStrategy
  6. from scipy.sparse.linalg import LinearOperator
  7. from scipy._lib._array_api import array_namespace, xp_copy
  8. from scipy._lib import array_api_extra as xpx
  9. from scipy._lib._util import _ScalarFunctionWrapper
  10. FD_METHODS = ('2-point', '3-point', 'cs')
  11. class _ScalarGradWrapper:
  12. """
  13. Wrapper class for gradient calculation
  14. """
  15. def __init__(
  16. self,
  17. grad,
  18. fun=None,
  19. args=None,
  20. finite_diff_options=None,
  21. ):
  22. self.fun = fun
  23. self.grad = grad
  24. self.args = [] if args is None else args
  25. self.finite_diff_options = finite_diff_options
  26. self.ngev = 0
  27. # number of function evaluations consumed by finite difference
  28. self.nfev = 0
  29. def __call__(self, x, f0=None, **kwds):
  30. # Send a copy because the user may overwrite it.
  31. # The user of this class might want `x` to remain unchanged.
  32. if callable(self.grad):
  33. g = np.atleast_1d(self.grad(np.copy(x), *self.args))
  34. elif self.grad in FD_METHODS:
  35. g, dct = approx_derivative(
  36. self.fun,
  37. x,
  38. f0=f0,
  39. **self.finite_diff_options,
  40. )
  41. self.nfev += dct['nfev']
  42. self.ngev += 1
  43. return g
  44. class _ScalarHessWrapper:
  45. """
  46. Wrapper class for hess calculation via finite differences
  47. """
  48. def __init__(
  49. self,
  50. hess,
  51. x0=None,
  52. grad=None,
  53. args=None,
  54. finite_diff_options=None,
  55. ):
  56. self.hess = hess
  57. self.grad = grad
  58. self.args = [] if args is None else args
  59. self.finite_diff_options = finite_diff_options
  60. # keep track of any finite difference function evaluations for grad
  61. self.ngev = 0
  62. self.nhev = 0
  63. self.H = None
  64. self._hess_func = None
  65. if callable(hess):
  66. self.H = hess(np.copy(x0), *args)
  67. self.nhev += 1
  68. if sps.issparse(self.H):
  69. self._hess_func = "sparse_callable"
  70. self.H = sps.csr_array(self.H)
  71. elif isinstance(self.H, LinearOperator):
  72. self._hess_func = "linearoperator_callable"
  73. else:
  74. # dense
  75. self._hess_func = "dense_callable"
  76. self.H = np.atleast_2d(np.asarray(self.H))
  77. elif hess in FD_METHODS:
  78. self._hess_func = "fd_hess"
  79. def __call__(self, x, f0=None, **kwds):
  80. match self._hess_func:
  81. case "sparse_callable":
  82. _h = self._sparse_callable
  83. case "linearoperator_callable":
  84. _h = self._linearoperator_callable
  85. case "dense_callable":
  86. _h = self._dense_callable
  87. case "fd_hess":
  88. _h = self._fd_hess
  89. return _h(np.copy(x), f0=f0)
  90. def _fd_hess(self, x, f0=None, **kwds):
  91. self.H, dct = approx_derivative(
  92. self.grad, x, f0=f0, **self.finite_diff_options
  93. )
  94. self.ngev += dct["nfev"]
  95. return self.H
  96. def _sparse_callable(self, x, **kwds):
  97. self.nhev += 1
  98. self.H = sps.csr_array(self.hess(x, *self.args))
  99. return self.H
  100. def _dense_callable(self, x, **kwds):
  101. self.nhev += 1
  102. self.H = np.atleast_2d(
  103. np.asarray(self.hess(x, *self.args))
  104. )
  105. return self.H
  106. def _linearoperator_callable(self, x, **kwds):
  107. self.nhev += 1
  108. self.H = self.hess(x, *self.args)
  109. return self.H
  110. class ScalarFunction:
  111. """Scalar function and its derivatives.
  112. This class defines a scalar function F: R^n->R and methods for
  113. computing or approximating its first and second derivatives.
  114. Parameters
  115. ----------
  116. fun : callable
  117. evaluates the scalar function. Must be of the form ``fun(x, *args)``,
  118. where ``x`` is the argument in the form of a 1-D array and ``args`` is
  119. a tuple of any additional fixed parameters needed to completely specify
  120. the function. Should return a scalar.
  121. x0 : array-like
  122. Provides an initial set of variables for evaluating fun. Array of real
  123. elements of size (n,), where 'n' is the number of independent
  124. variables.
  125. args : tuple, optional
  126. Any additional fixed parameters needed to completely specify the scalar
  127. function.
  128. grad : {callable, '2-point', '3-point', 'cs'}
  129. Method for computing the gradient vector.
  130. If it is a callable, it should be a function that returns the gradient
  131. vector:
  132. ``grad(x, *args) -> array_like, shape (n,)``
  133. where ``x`` is an array with shape (n,) and ``args`` is a tuple with
  134. the fixed parameters.
  135. Alternatively, the keywords {'2-point', '3-point', 'cs'} can be used
  136. to select a finite difference scheme for numerical estimation of the
  137. gradient with a relative step size. These finite difference schemes
  138. obey any specified `bounds`.
  139. hess : {callable, '2-point', '3-point', 'cs', HessianUpdateStrategy}
  140. Method for computing the Hessian matrix. If it is callable, it should
  141. return the Hessian matrix:
  142. ``hess(x, *args) -> {LinearOperator, spmatrix, array}, (n, n)``
  143. where x is a (n,) ndarray and `args` is a tuple with the fixed
  144. parameters. Alternatively, the keywords {'2-point', '3-point', 'cs'}
  145. select a finite difference scheme for numerical estimation. Or, objects
  146. implementing `HessianUpdateStrategy` interface can be used to
  147. approximate the Hessian.
  148. Whenever the gradient is estimated via finite-differences, the Hessian
  149. cannot be estimated with options {'2-point', '3-point', 'cs'} and needs
  150. to be estimated using one of the quasi-Newton strategies.
  151. finite_diff_rel_step : None or array_like
  152. Relative step size to use. The absolute step size is computed as
  153. ``h = finite_diff_rel_step * sign(x0) * max(1, abs(x0))``, possibly
  154. adjusted to fit into the bounds. For ``method='3-point'`` the sign
  155. of `h` is ignored. If None then finite_diff_rel_step is selected
  156. automatically,
  157. finite_diff_bounds : tuple of array_like
  158. Lower and upper bounds on independent variables. Defaults to no bounds,
  159. (-np.inf, np.inf). Each bound must match the size of `x0` or be a
  160. scalar, in the latter case the bound will be the same for all
  161. variables. Use it to limit the range of function evaluation.
  162. epsilon : None or array_like, optional
  163. Absolute step size to use, possibly adjusted to fit into the bounds.
  164. For ``method='3-point'`` the sign of `epsilon` is ignored. By default
  165. relative steps are used, only if ``epsilon is not None`` are absolute
  166. steps used.
  167. workers : map-like callable, optional
  168. A map-like callable, such as `multiprocessing.Pool.map` for evaluating
  169. any numerical differentiation in parallel.
  170. This evaluation is carried out as ``workers(fun, iterable)``, or
  171. ``workers(grad, iterable)``, depending on what is being numerically
  172. differentiated.
  173. Alternatively, if `workers` is an int the task is subdivided into `workers`
  174. sections and the function evaluated in parallel
  175. (uses `multiprocessing.Pool <multiprocessing>`).
  176. Supply -1 to use all available CPU cores.
  177. It is recommended that a map-like be used instead of int, as repeated
  178. calls to `approx_derivative` will incur large overhead from setting up
  179. new processes.
  180. .. versionadded:: 1.16.0
  181. Notes
  182. -----
  183. This class implements a memoization logic. There are methods `fun`,
  184. `grad`, hess` and corresponding attributes `f`, `g` and `H`. The following
  185. things should be considered:
  186. 1. Use only public methods `fun`, `grad` and `hess`.
  187. 2. After one of the methods is called, the corresponding attribute
  188. will be set. However, a subsequent call with a different argument
  189. of *any* of the methods may overwrite the attribute.
  190. """
  191. def __init__(self, fun, x0, args, grad, hess, finite_diff_rel_step=None,
  192. finite_diff_bounds=(-np.inf, np.inf), epsilon=None, workers=None):
  193. if not callable(grad) and grad not in FD_METHODS:
  194. raise ValueError(
  195. f"`grad` must be either callable or one of {FD_METHODS}."
  196. )
  197. if not (callable(hess) or hess in FD_METHODS
  198. or isinstance(hess, HessianUpdateStrategy)):
  199. raise ValueError(
  200. f"`hess` must be either callable, HessianUpdateStrategy"
  201. f" or one of {FD_METHODS}."
  202. )
  203. if grad in FD_METHODS and hess in FD_METHODS:
  204. raise ValueError("Whenever the gradient is estimated via "
  205. "finite-differences, we require the Hessian "
  206. "to be estimated using one of the "
  207. "quasi-Newton strategies.")
  208. self.xp = xp = array_namespace(x0)
  209. _x = xpx.atleast_nd(xp.asarray(x0), ndim=1, xp=xp)
  210. _dtype = xp.float64
  211. if xp.isdtype(_x.dtype, "real floating"):
  212. _dtype = _x.dtype
  213. # original arguments
  214. self._wrapped_fun = _ScalarFunctionWrapper(fun, args)
  215. self._orig_fun = fun
  216. self._orig_grad = grad
  217. self._orig_hess = hess
  218. self._args = args
  219. # promotes to floating
  220. self.x = xp.astype(_x, _dtype)
  221. self.x_dtype = _dtype
  222. self.n = self.x.size
  223. self.f_updated = False
  224. self.g_updated = False
  225. self.H_updated = False
  226. self._lowest_x = None
  227. self._lowest_f = np.inf
  228. # normalize workers
  229. workers = workers or map
  230. finite_diff_options = {}
  231. if grad in FD_METHODS:
  232. finite_diff_options["method"] = grad
  233. finite_diff_options["rel_step"] = finite_diff_rel_step
  234. finite_diff_options["abs_step"] = epsilon
  235. finite_diff_options["bounds"] = finite_diff_bounds
  236. finite_diff_options["workers"] = workers
  237. finite_diff_options["full_output"] = True
  238. if hess in FD_METHODS:
  239. finite_diff_options["method"] = hess
  240. finite_diff_options["rel_step"] = finite_diff_rel_step
  241. finite_diff_options["abs_step"] = epsilon
  242. finite_diff_options["as_linear_operator"] = True
  243. finite_diff_options["workers"] = workers
  244. finite_diff_options["full_output"] = True
  245. # Initial function evaluation
  246. self._nfev = 0
  247. self._update_fun()
  248. # Initial gradient evaluation
  249. self._wrapped_grad = _ScalarGradWrapper(
  250. grad,
  251. fun=self._wrapped_fun,
  252. args=args,
  253. finite_diff_options=finite_diff_options,
  254. )
  255. self._update_grad()
  256. # Hessian evaluation
  257. if isinstance(hess, HessianUpdateStrategy):
  258. self.H = hess
  259. self.H.initialize(self.n, 'hess')
  260. self.H_updated = True
  261. self.x_prev = None
  262. self.g_prev = None
  263. _FakeCounter = namedtuple('_FakeCounter', ['ngev', 'nhev'])
  264. self._wrapped_hess = _FakeCounter(ngev=0, nhev=0)
  265. else:
  266. if callable(hess):
  267. self._wrapped_hess = _ScalarHessWrapper(
  268. hess,
  269. x0=x0,
  270. args=args,
  271. finite_diff_options=finite_diff_options
  272. )
  273. self.H = self._wrapped_hess.H
  274. self.H_updated = True
  275. elif hess in FD_METHODS:
  276. self._wrapped_hess = _ScalarHessWrapper(
  277. hess,
  278. x0=x0,
  279. args=args,
  280. grad=self._wrapped_grad,
  281. finite_diff_options=finite_diff_options
  282. )
  283. self._update_grad()
  284. self.H = self._wrapped_hess(self.x, f0=self.g)
  285. self.H_updated = True
  286. @property
  287. def nfev(self):
  288. return self._nfev + self._wrapped_grad.nfev
  289. @property
  290. def ngev(self):
  291. return self._wrapped_grad.ngev #+ self._wrapped_hess.ngev
  292. @property
  293. def nhev(self):
  294. return self._wrapped_hess.nhev
  295. def _update_x(self, x):
  296. if isinstance(self._orig_hess, HessianUpdateStrategy):
  297. self._update_grad()
  298. self.x_prev = self.x
  299. self.g_prev = self.g
  300. # ensure that self.x is a copy of x. Don't store a reference
  301. # otherwise the memoization doesn't work properly.
  302. _x = xpx.atleast_nd(self.xp.asarray(x), ndim=1, xp=self.xp)
  303. self.x = self.xp.astype(_x, self.x_dtype)
  304. self.f_updated = False
  305. self.g_updated = False
  306. self.H_updated = False
  307. self._update_hess()
  308. else:
  309. # ensure that self.x is a copy of x. Don't store a reference
  310. # otherwise the memoization doesn't work properly.
  311. _x = xpx.atleast_nd(self.xp.asarray(x), ndim=1, xp=self.xp)
  312. self.x = self.xp.astype(_x, self.x_dtype)
  313. self.f_updated = False
  314. self.g_updated = False
  315. self.H_updated = False
  316. def _update_fun(self):
  317. if not self.f_updated:
  318. fx = self._wrapped_fun(self.x)
  319. self._nfev += 1
  320. if fx < self._lowest_f:
  321. self._lowest_x = self.x
  322. self._lowest_f = fx
  323. self.f = fx
  324. self.f_updated = True
  325. def _update_grad(self):
  326. if not self.g_updated:
  327. if self._orig_grad in FD_METHODS:
  328. self._update_fun()
  329. self.g = self._wrapped_grad(self.x, f0=self.f)
  330. self.g_updated = True
  331. def _update_hess(self):
  332. if not self.H_updated:
  333. if self._orig_hess in FD_METHODS:
  334. self._update_grad()
  335. self.H = self._wrapped_hess(self.x, f0=self.g)
  336. elif isinstance(self._orig_hess, HessianUpdateStrategy):
  337. self._update_grad()
  338. self.H.update(self.x - self.x_prev, self.g - self.g_prev)
  339. else: # should be callable(hess)
  340. self.H = self._wrapped_hess(self.x)
  341. self.H_updated = True
  342. def fun(self, x):
  343. if not np.array_equal(x, self.x):
  344. self._update_x(x)
  345. self._update_fun()
  346. return self.f
  347. def grad(self, x):
  348. if not np.array_equal(x, self.x):
  349. self._update_x(x)
  350. self._update_grad()
  351. return self.g
  352. def hess(self, x):
  353. if not np.array_equal(x, self.x):
  354. self._update_x(x)
  355. self._update_hess()
  356. return self.H
  357. def fun_and_grad(self, x):
  358. if not np.array_equal(x, self.x):
  359. self._update_x(x)
  360. self._update_fun()
  361. self._update_grad()
  362. return self.f, self.g
  363. class _VectorFunWrapper:
  364. def __init__(self, fun):
  365. self.fun = fun
  366. self.nfev = 0
  367. def __call__(self, x):
  368. self.nfev += 1
  369. return np.atleast_1d(self.fun(x))
  370. class _VectorJacWrapper:
  371. """
  372. Wrapper class for Jacobian calculation
  373. """
  374. def __init__(
  375. self,
  376. jac,
  377. fun=None,
  378. finite_diff_options=None,
  379. sparse_jacobian=None
  380. ):
  381. self.fun = fun
  382. self.jac = jac
  383. self.finite_diff_options = finite_diff_options
  384. self.sparse_jacobian = sparse_jacobian
  385. self.njev = 0
  386. # number of function evaluations consumed by finite difference
  387. self.nfev = 0
  388. def __call__(self, x, f0=None, **kwds):
  389. # Send a copy because the user may overwrite it.
  390. # The user of this class might want `x` to remain unchanged.
  391. if callable(self.jac):
  392. J = self.jac(x)
  393. self.njev += 1
  394. elif self.jac in FD_METHODS:
  395. J, dct = approx_derivative(
  396. self.fun,
  397. x,
  398. f0=f0,
  399. **self.finite_diff_options,
  400. )
  401. self.nfev += dct['nfev']
  402. if self.sparse_jacobian:
  403. return sps.csr_array(J)
  404. elif sps.issparse(J):
  405. return J.toarray()
  406. elif isinstance(J, LinearOperator):
  407. return J
  408. else:
  409. return np.atleast_2d(J)
  410. class _VectorHessWrapper:
  411. """
  412. Wrapper class for Jacobian calculation
  413. """
  414. def __init__(
  415. self,
  416. hess,
  417. jac=None,
  418. finite_diff_options=None,
  419. ):
  420. self.jac = jac
  421. self.hess = hess
  422. self.finite_diff_options = finite_diff_options
  423. self.nhev = 0
  424. # number of jac evaluations consumed by finite difference
  425. self.njev = 0
  426. def __call__(self, x, v, J0=None, **kwds):
  427. # Send a copy because the user may overwrite it.
  428. # The user of this class might want `x` to remain unchanged.
  429. if callable(self.hess):
  430. self.nhev += 1
  431. return self._callable_hess(x, v)
  432. elif self.hess in FD_METHODS:
  433. return self._fd_hess(x, v, J0=J0)
  434. def _fd_hess(self, x, v, J0=None):
  435. if J0 is None:
  436. J0 = self.jac(x)
  437. self.njev += 1
  438. # H will be a LinearOperator
  439. H = approx_derivative(self.jac_dot_v, x,
  440. f0=J0.T.dot(v),
  441. args=(v,),
  442. **self.finite_diff_options)
  443. return H
  444. def jac_dot_v(self, x, v):
  445. self.njev += 1
  446. return self.jac(x).T.dot(v)
  447. def _callable_hess(self, x, v):
  448. H = self.hess(x, v)
  449. if sps.issparse(H):
  450. return sps.csr_array(H)
  451. elif isinstance(H, LinearOperator):
  452. return H
  453. else:
  454. return np.atleast_2d(np.asarray(H))
  455. class VectorFunction:
  456. """Vector function and its derivatives.
  457. This class defines a vector function F: R^n->R^m and methods for
  458. computing or approximating its first and second derivatives.
  459. Notes
  460. -----
  461. This class implements a memoization logic. There are methods `fun`,
  462. `jac`, hess` and corresponding attributes `f`, `J` and `H`. The following
  463. things should be considered:
  464. 1. Use only public methods `fun`, `jac` and `hess`.
  465. 2. After one of the methods is called, the corresponding attribute
  466. will be set. However, a subsequent call with a different argument
  467. of *any* of the methods may overwrite the attribute.
  468. """
  469. def __init__(self, fun, x0, jac, hess,
  470. finite_diff_rel_step=None, finite_diff_jac_sparsity=None,
  471. finite_diff_bounds=(-np.inf, np.inf), sparse_jacobian=None,
  472. workers=None):
  473. if not callable(jac) and jac not in FD_METHODS:
  474. raise ValueError(f"`jac` must be either callable or one of {FD_METHODS}.")
  475. if not (callable(hess) or hess in FD_METHODS
  476. or isinstance(hess, HessianUpdateStrategy)):
  477. raise ValueError("`hess` must be either callable,"
  478. f"HessianUpdateStrategy or one of {FD_METHODS}.")
  479. if jac in FD_METHODS and hess in FD_METHODS:
  480. raise ValueError("Whenever the Jacobian is estimated via "
  481. "finite-differences, we require the Hessian to "
  482. "be estimated using one of the quasi-Newton "
  483. "strategies.")
  484. self.xp = xp = array_namespace(x0)
  485. _x = xpx.atleast_nd(xp.asarray(x0), ndim=1, xp=xp)
  486. _dtype = xp.float64
  487. if xp.isdtype(_x.dtype, "real floating"):
  488. _dtype = _x.dtype
  489. # store original functions
  490. self._orig_fun = fun
  491. self._orig_jac = jac
  492. self._orig_hess = hess
  493. # promotes to floating, ensures that it's a copy
  494. self.x = xp.astype(_x, _dtype)
  495. self.x_dtype = _dtype
  496. self.n = self.x.size
  497. self._nfev = 0
  498. self._njev = 0
  499. self._nhev = 0
  500. self.f_updated = False
  501. self.J_updated = False
  502. self.H_updated = False
  503. # normalize workers
  504. workers = workers or map
  505. finite_diff_options = {}
  506. if jac in FD_METHODS:
  507. finite_diff_options["method"] = jac
  508. finite_diff_options["rel_step"] = finite_diff_rel_step
  509. if finite_diff_jac_sparsity is not None:
  510. sparsity_groups = group_columns(finite_diff_jac_sparsity)
  511. finite_diff_options["sparsity"] = (finite_diff_jac_sparsity,
  512. sparsity_groups)
  513. finite_diff_options["bounds"] = finite_diff_bounds
  514. finite_diff_options["workers"] = workers
  515. finite_diff_options["full_output"] = True
  516. self.x_diff = np.copy(self.x)
  517. if hess in FD_METHODS:
  518. finite_diff_options["method"] = hess
  519. finite_diff_options["rel_step"] = finite_diff_rel_step
  520. finite_diff_options["as_linear_operator"] = True
  521. # workers is not useful for evaluation of the LinearOperator
  522. # produced by approx_derivative. Only two/three function
  523. # evaluations are used, and the LinearOperator may persist
  524. # outside the scope that workers is valid in.
  525. self.x_diff = np.copy(self.x)
  526. if jac in FD_METHODS and hess in FD_METHODS:
  527. raise ValueError("Whenever the Jacobian is estimated via "
  528. "finite-differences, we require the Hessian to "
  529. "be estimated using one of the quasi-Newton "
  530. "strategies.")
  531. self.fun_wrapped = _VectorFunWrapper(fun)
  532. self._update_fun()
  533. self.v = np.zeros_like(self.f)
  534. self.m = self.v.size
  535. # Initial Jacobian Evaluation
  536. if callable(jac):
  537. self.J = jac(xp_copy(self.x))
  538. self.J_updated = True
  539. self._njev += 1
  540. elif jac in FD_METHODS:
  541. self.J, dct = approx_derivative(
  542. self.fun_wrapped, self.x, f0=self.f, **finite_diff_options
  543. )
  544. self.J_updated = True
  545. self._nfev += dct['nfev']
  546. self.sparse_jacobian = False
  547. if (sparse_jacobian or
  548. sparse_jacobian is None and sps.issparse(self.J)):
  549. # something truthy was specified for sparse_jacobian,
  550. # or it turns out that the Jacobian was sparse.
  551. self.J = sps.csr_array(self.J)
  552. self.sparse_jacobian = True
  553. elif sps.issparse(self.J):
  554. self.J = self.J.toarray()
  555. elif isinstance(self.J, LinearOperator):
  556. pass
  557. else:
  558. self.J = np.atleast_2d(self.J)
  559. self.jac_wrapped = _VectorJacWrapper(
  560. jac,
  561. fun=self.fun_wrapped,
  562. finite_diff_options=finite_diff_options,
  563. sparse_jacobian=self.sparse_jacobian
  564. )
  565. self.hess_wrapped = _VectorHessWrapper(
  566. hess, jac=self.jac_wrapped, finite_diff_options=finite_diff_options
  567. )
  568. # Define Hessian
  569. if callable(hess) or hess in FD_METHODS:
  570. self.H = self.hess_wrapped(xp_copy(self.x), self.v, J0=self.J)
  571. self.H_updated = True
  572. if callable(hess):
  573. self._nhev += 1
  574. elif isinstance(hess, HessianUpdateStrategy):
  575. self.H = hess
  576. self.H.initialize(self.n, 'hess')
  577. self.H_updated = True
  578. self.x_prev = None
  579. self.J_prev = None
  580. @property
  581. def nfev(self):
  582. return self._nfev + self.jac_wrapped.nfev
  583. @property
  584. def njev(self):
  585. return self._njev + self.hess_wrapped.njev
  586. @property
  587. def nhev(self):
  588. return self._nhev
  589. def _update_v(self, v):
  590. if not np.array_equal(v, self.v):
  591. self.v = v
  592. self.H_updated = False
  593. def _update_x(self, x):
  594. if not np.array_equal(x, self.x):
  595. if isinstance(self._orig_hess, HessianUpdateStrategy):
  596. self._update_jac()
  597. self.x_prev = self.x
  598. self.J_prev = self.J
  599. _x = xpx.atleast_nd(self.xp.asarray(x), ndim=1, xp=self.xp)
  600. self.x = self.xp.astype(_x, self.x_dtype)
  601. self.f_updated = False
  602. self.J_updated = False
  603. self.H_updated = False
  604. self._update_hess()
  605. else:
  606. _x = xpx.atleast_nd(self.xp.asarray(x), ndim=1, xp=self.xp)
  607. self.x = self.xp.astype(_x, self.x_dtype)
  608. self.f_updated = False
  609. self.J_updated = False
  610. self.H_updated = False
  611. def _update_fun(self):
  612. if not self.f_updated:
  613. self.f = self.fun_wrapped(xp_copy(self.x))
  614. self._nfev += 1
  615. self.f_updated = True
  616. def _update_jac(self):
  617. if not self.J_updated:
  618. if self._orig_jac in FD_METHODS:
  619. # need to update fun to get f0
  620. self._update_fun()
  621. else:
  622. self._njev += 1
  623. self.J = self.jac_wrapped(xp_copy(self.x), f0=self.f)
  624. self.J_updated = True
  625. def _update_hess(self):
  626. if not self.H_updated:
  627. if callable(self._orig_hess):
  628. self.H = self.hess_wrapped(xp_copy(self.x), self.v)
  629. self._nhev += 1
  630. elif self._orig_hess in FD_METHODS:
  631. self._update_jac()
  632. self.H = self.hess_wrapped(xp_copy(self.x), self.v, J0=self.J)
  633. elif isinstance(self._orig_hess, HessianUpdateStrategy):
  634. self._update_jac()
  635. # When v is updated before x was updated, then x_prev and
  636. # J_prev are None and we need this check.
  637. if self.x_prev is not None and self.J_prev is not None:
  638. delta_x = self.x - self.x_prev
  639. delta_g = self.J.T.dot(self.v) - self.J_prev.T.dot(self.v)
  640. self.H.update(delta_x, delta_g)
  641. self.H_updated = True
  642. def fun(self, x):
  643. self._update_x(x)
  644. self._update_fun()
  645. # returns a copy so that downstream can't overwrite the
  646. # internal attribute
  647. return xp_copy(self.f)
  648. def jac(self, x):
  649. self._update_x(x)
  650. self._update_jac()
  651. if hasattr(self.J, "astype"):
  652. # returns a copy so that downstream can't overwrite the
  653. # internal attribute. But one can't copy a LinearOperator
  654. return self.J.astype(self.J.dtype)
  655. return self.J
  656. def hess(self, x, v):
  657. # v should be updated before x.
  658. self._update_v(v)
  659. self._update_x(x)
  660. self._update_hess()
  661. if hasattr(self.H, "astype"):
  662. # returns a copy so that downstream can't overwrite the
  663. # internal attribute. But one can't copy non-arrays
  664. return self.H.astype(self.H.dtype)
  665. return self.H
  666. class LinearVectorFunction:
  667. """Linear vector function and its derivatives.
  668. Defines a linear function F = A x, where x is N-D vector and
  669. A is m-by-n matrix. The Jacobian is constant and equals to A. The Hessian
  670. is identically zero and it is returned as a csr matrix.
  671. """
  672. def __init__(self, A, x0, sparse_jacobian):
  673. if sparse_jacobian or sparse_jacobian is None and sps.issparse(A):
  674. self.J = sps.csr_array(A)
  675. self.sparse_jacobian = True
  676. elif sps.issparse(A):
  677. self.J = A.toarray()
  678. self.sparse_jacobian = False
  679. else:
  680. # np.asarray makes sure A is ndarray and not matrix
  681. self.J = np.atleast_2d(np.asarray(A))
  682. self.sparse_jacobian = False
  683. self.m, self.n = self.J.shape
  684. self.xp = xp = array_namespace(x0)
  685. _x = xpx.atleast_nd(xp.asarray(x0), ndim=1, xp=xp)
  686. _dtype = xp.float64
  687. if xp.isdtype(_x.dtype, "real floating"):
  688. _dtype = _x.dtype
  689. # promotes to floating
  690. self.x = xp.astype(_x, _dtype)
  691. self.x_dtype = _dtype
  692. self.f = self.J.dot(self.x)
  693. self.f_updated = True
  694. self.v = np.zeros(self.m, dtype=float)
  695. self.H = sps.csr_array((self.n, self.n))
  696. def _update_x(self, x):
  697. if not np.array_equal(x, self.x):
  698. _x = xpx.atleast_nd(self.xp.asarray(x), ndim=1, xp=self.xp)
  699. self.x = self.xp.astype(_x, self.x_dtype)
  700. self.f_updated = False
  701. def fun(self, x):
  702. self._update_x(x)
  703. if not self.f_updated:
  704. self.f = self.J.dot(x)
  705. self.f_updated = True
  706. return self.f
  707. def jac(self, x):
  708. self._update_x(x)
  709. return self.J
  710. def hess(self, x, v):
  711. self._update_x(x)
  712. self.v = v
  713. return self.H
  714. class IdentityVectorFunction(LinearVectorFunction):
  715. """Identity vector function and its derivatives.
  716. The Jacobian is the identity matrix, returned as a dense array when
  717. `sparse_jacobian=False` and as a csr matrix otherwise. The Hessian is
  718. identically zero and it is returned as a csr matrix.
  719. """
  720. def __init__(self, x0, sparse_jacobian):
  721. n = len(x0)
  722. if sparse_jacobian or sparse_jacobian is None:
  723. A = sps.eye_array(n, format='csr')
  724. sparse_jacobian = True
  725. else:
  726. A = np.eye(n)
  727. sparse_jacobian = False
  728. super().__init__(A, x0, sparse_jacobian)