_trustregion.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329
  1. """Trust-region optimization."""
  2. import math
  3. import warnings
  4. import numpy as np
  5. import scipy.linalg
  6. from ._optimize import (_check_unknown_options, _status_message,
  7. OptimizeResult, _prepare_scalar_function,
  8. _call_callback_maybe_halt)
  9. from scipy.optimize._hessian_update_strategy import HessianUpdateStrategy
  10. from scipy.optimize._differentiable_functions import FD_METHODS
  11. __all__ = []
  12. def _wrap_function(function, args):
  13. # wraps a minimizer function to count number of evaluations
  14. # and to easily provide an args kwd.
  15. ncalls = [0]
  16. if function is None:
  17. return ncalls, None
  18. def function_wrapper(x, *wrapper_args):
  19. ncalls[0] += 1
  20. # A copy of x is sent to the user function (gh13740)
  21. return function(np.copy(x), *(wrapper_args + args))
  22. return ncalls, function_wrapper
  23. class BaseQuadraticSubproblem:
  24. """
  25. Base/abstract class defining the quadratic model for trust-region
  26. minimization. Child classes must implement the ``solve`` method.
  27. Values of the objective function, Jacobian and Hessian (if provided) at
  28. the current iterate ``x`` are evaluated on demand and then stored as
  29. attributes ``fun``, ``jac``, ``hess``.
  30. """
  31. def __init__(self, x, fun, jac, hess=None, hessp=None):
  32. self._x = x
  33. self._f = None
  34. self._g = None
  35. self._h = None
  36. self._g_mag = None
  37. self._cauchy_point = None
  38. self._newton_point = None
  39. self._fun = fun
  40. self._jac = jac
  41. self._hess = hess
  42. self._hessp = hessp
  43. def __call__(self, p):
  44. return self.fun + np.dot(self.jac, p) + 0.5 * np.dot(p, self.hessp(p))
  45. @property
  46. def fun(self):
  47. """Value of objective function at current iteration."""
  48. if self._f is None:
  49. self._f = self._fun(self._x)
  50. return self._f
  51. @property
  52. def jac(self):
  53. """Value of Jacobian of objective function at current iteration."""
  54. if self._g is None:
  55. self._g = self._jac(self._x)
  56. return self._g
  57. @property
  58. def hess(self):
  59. """Value of Hessian of objective function at current iteration."""
  60. if self._h is None:
  61. self._h = self._hess(self._x)
  62. return self._h
  63. def hessp(self, p):
  64. if self._hessp is not None:
  65. return self._hessp(self._x, p)
  66. else:
  67. return np.dot(self.hess, p)
  68. @property
  69. def jac_mag(self):
  70. """Magnitude of jacobian of objective function at current iteration."""
  71. if self._g_mag is None:
  72. self._g_mag = scipy.linalg.norm(self.jac)
  73. return self._g_mag
  74. def get_boundaries_intersections(self, z, d, trust_radius):
  75. """
  76. Solve the scalar quadratic equation ``||z + t d|| == trust_radius``.
  77. This is like a line-sphere intersection.
  78. Return the two values of t, sorted from low to high.
  79. """
  80. a = np.dot(d, d)
  81. b = 2 * np.dot(z, d)
  82. c = np.dot(z, z) - trust_radius**2
  83. sqrt_discriminant = math.sqrt(b*b - 4*a*c)
  84. # The following calculation is mathematically
  85. # equivalent to:
  86. # ta = (-b - sqrt_discriminant) / (2*a)
  87. # tb = (-b + sqrt_discriminant) / (2*a)
  88. # but produce smaller round off errors.
  89. # Look at Matrix Computation p.97
  90. # for a better justification.
  91. aux = b + math.copysign(sqrt_discriminant, b)
  92. ta = -aux / (2*a)
  93. tb = -2*c / aux
  94. return sorted([ta, tb])
  95. def solve(self, trust_radius):
  96. raise NotImplementedError('The solve method should be implemented by '
  97. 'the child class')
  98. def _minimize_trust_region(fun, x0, args=(), jac=None, hess=None, hessp=None,
  99. subproblem=None, initial_trust_radius=1.0,
  100. max_trust_radius=1000.0, eta=0.15, gtol=1e-4,
  101. maxiter=None, disp=False, return_all=False,
  102. callback=None, inexact=True, workers=None,
  103. subproblem_maxiter=None, **unknown_options):
  104. """
  105. Minimization of scalar function of one or more variables using a
  106. trust-region algorithm.
  107. Options for the trust-region algorithm are:
  108. initial_trust_radius : float
  109. Initial trust radius.
  110. max_trust_radius : float
  111. Never propose steps that are longer than this value.
  112. eta : float
  113. Trust region related acceptance stringency for proposed steps.
  114. gtol : float
  115. Gradient norm must be less than `gtol`
  116. before successful termination.
  117. maxiter : int
  118. Maximum number of iterations to perform.
  119. disp : bool
  120. If True, print convergence message.
  121. inexact : bool
  122. Accuracy to solve subproblems. If True requires less nonlinear
  123. iterations, but more vector products. Only effective for method
  124. trust-krylov.
  125. workers : int, map-like callable, optional
  126. A map-like callable, such as `multiprocessing.Pool.map` for evaluating
  127. any numerical differentiation in parallel.
  128. This evaluation is carried out as ``workers(fun, iterable)``.
  129. Only for 'trust-krylov', 'trust-ncg'.
  130. .. versionadded:: 1.16.0
  131. subproblem_maxiter : int, optional
  132. Maximum number of iterations to perform per subproblem. Only affects
  133. trust-exact. Default is 25.
  134. .. versionadded:: 1.17.0
  135. This function is called by the `minimize` function.
  136. It is not supposed to be called directly.
  137. """
  138. _check_unknown_options(unknown_options)
  139. if jac is None:
  140. raise ValueError('Jacobian is currently required for trust-region '
  141. 'methods')
  142. if hess is None and hessp is None:
  143. raise ValueError('Either the Hessian or the Hessian-vector product '
  144. 'is currently required for trust-region methods')
  145. if subproblem is None:
  146. raise ValueError('A subproblem solving strategy is required for '
  147. 'trust-region methods')
  148. if not (0 <= eta < 0.25):
  149. raise Exception('invalid acceptance stringency')
  150. if max_trust_radius <= 0:
  151. raise Exception('the max trust radius must be positive')
  152. if initial_trust_radius <= 0:
  153. raise ValueError('the initial trust radius must be positive')
  154. if initial_trust_radius >= max_trust_radius:
  155. raise ValueError('the initial trust radius must be less than the '
  156. 'max trust radius')
  157. # force the initial guess into a nice format
  158. x0 = np.asarray(x0).flatten()
  159. # A ScalarFunction representing the problem. This caches calls to fun, jac,
  160. # hess.
  161. # the workers kwd only has an effect for trust-ncg, trust-krylov when
  162. # estimating the Hessian with finite-differences. It's never used
  163. # during calculation of jacobian, because callables are required for all
  164. # methods.
  165. sf = _prepare_scalar_function(
  166. fun, x0, jac=jac, hess=hess, args=args, workers=workers
  167. )
  168. fun = sf.fun
  169. jac = sf.grad
  170. if callable(hess):
  171. hess = sf.hess
  172. elif callable(hessp):
  173. # this elif statement must come before examining whether hess
  174. # is estimated by FD methods or a HessianUpdateStrategy
  175. pass
  176. elif (hess in FD_METHODS or isinstance(hess, HessianUpdateStrategy)):
  177. # If the Hessian is being estimated by finite differences or a
  178. # Hessian update strategy then ScalarFunction.hess returns a
  179. # LinearOperator or a HessianUpdateStrategy. This enables the
  180. # calculation/creation of a hessp. BUT you only want to do this
  181. # if the user *hasn't* provided a callable(hessp) function.
  182. hess = None
  183. def hessp(x, p, *args):
  184. return sf.hess(x).dot(p)
  185. else:
  186. raise ValueError('Either the Hessian or the Hessian-vector product '
  187. 'is currently required for trust-region methods')
  188. # ScalarFunction doesn't represent hessp
  189. nhessp, hessp = _wrap_function(hessp, args)
  190. # limit the number of iterations
  191. if maxiter is None:
  192. maxiter = len(x0)*200
  193. # init the search status
  194. warnflag = 0
  195. # initialize the search
  196. trust_radius = initial_trust_radius
  197. x = x0
  198. if return_all:
  199. allvecs = [x]
  200. subproblem_init_kw = {}
  201. if hasattr(subproblem, 'MAXITER_DEFAULT'):
  202. subproblem_init_kw['maxiter'] = subproblem_maxiter
  203. m = subproblem(x, fun, jac, hess, hessp, **subproblem_init_kw)
  204. k = 0
  205. # search for the function min
  206. # do not even start if the gradient is small enough
  207. while m.jac_mag >= gtol:
  208. # Solve the sub-problem.
  209. # This gives us the proposed step relative to the current position
  210. # and it tells us whether the proposed step
  211. # has reached the trust region boundary or not.
  212. try:
  213. p, hits_boundary = m.solve(trust_radius)
  214. except np.linalg.LinAlgError:
  215. warnflag = 3
  216. break
  217. # calculate the predicted value at the proposed point
  218. predicted_value = m(p)
  219. # define the local approximation at the proposed point
  220. x_proposed = x + p
  221. m_proposed = subproblem(x_proposed, fun, jac, hess, hessp, **subproblem_init_kw)
  222. # evaluate the ratio defined in equation (4.4)
  223. actual_reduction = m.fun - m_proposed.fun
  224. predicted_reduction = m.fun - predicted_value
  225. if predicted_reduction <= 0:
  226. warnflag = 2
  227. break
  228. rho = actual_reduction / predicted_reduction
  229. # update the trust radius according to the actual/predicted ratio
  230. if rho < 0.25:
  231. trust_radius *= 0.25
  232. elif rho > 0.75 and hits_boundary:
  233. trust_radius = min(2*trust_radius, max_trust_radius)
  234. # if the ratio is high enough then accept the proposed step
  235. if rho > eta:
  236. x = x_proposed
  237. m = m_proposed
  238. # append the best guess, call back, increment the iteration count
  239. if return_all:
  240. allvecs.append(np.copy(x))
  241. k += 1
  242. intermediate_result = OptimizeResult(x=x, fun=m.fun)
  243. if _call_callback_maybe_halt(callback, intermediate_result):
  244. break
  245. # check if the gradient is small enough to stop
  246. if m.jac_mag < gtol:
  247. warnflag = 0
  248. break
  249. # check if we have looked at enough iterations
  250. if k >= maxiter:
  251. warnflag = 1
  252. break
  253. # print some stuff if requested
  254. status_messages = (
  255. _status_message['success'],
  256. _status_message['maxiter'],
  257. 'A bad approximation caused failure to predict improvement.',
  258. 'A linalg error occurred, such as a non-psd Hessian.',
  259. )
  260. if disp:
  261. if warnflag == 0:
  262. print(status_messages[warnflag])
  263. else:
  264. warnings.warn(status_messages[warnflag], RuntimeWarning, stacklevel=3)
  265. print(f" Current function value: {m.fun:f}")
  266. print(f" Iterations: {k:d}")
  267. print(f" Function evaluations: {sf.nfev:d}")
  268. print(f" Gradient evaluations: {sf.ngev:d}")
  269. print(f" Hessian evaluations: {sf.nhev + nhessp[0]:d}")
  270. result = OptimizeResult(x=x, success=(warnflag == 0), status=warnflag,
  271. fun=m.fun, jac=m.jac, nfev=sf.nfev, njev=sf.ngev,
  272. nhev=sf.nhev + nhessp[0], nit=k,
  273. message=status_messages[warnflag])
  274. if hess is not None:
  275. result['hess'] = m.hess
  276. if return_all:
  277. result['allvecs'] = allvecs
  278. return result