| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634 |
- """
- Functions
- ---------
- .. autosummary::
- :toctree: generated/
- fmin_l_bfgs_b
- """
- ## License for the Python wrapper
- ## ==============================
- ## Copyright (c) 2004 David M. Cooke <cookedm@physics.mcmaster.ca>
- ## Permission is hereby granted, free of charge, to any person obtaining a
- ## copy of this software and associated documentation files (the "Software"),
- ## to deal in the Software without restriction, including without limitation
- ## the rights to use, copy, modify, merge, publish, distribute, sublicense,
- ## and/or sell copies of the Software, and to permit persons to whom the
- ## Software is furnished to do so, subject to the following conditions:
- ## The above copyright notice and this permission notice shall be included in
- ## all copies or substantial portions of the Software.
- ## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- ## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- ## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- ## AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- ## LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- ## FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- ## DEALINGS IN THE SOFTWARE.
- ## Modifications by Travis Oliphant and Enthought, Inc. for inclusion in SciPy
- import numpy as np
- from numpy import array, asarray, float64, zeros
- from . import _lbfgsb
- from ._optimize import (MemoizeJac, OptimizeResult, _call_callback_maybe_halt,
- _wrap_callback, _check_unknown_options,
- _prepare_scalar_function)
- from ._constraints import old_bound_to_new
- from scipy.sparse.linalg import LinearOperator
- from scipy._lib.deprecation import _NoValue
- import warnings
- __all__ = ['fmin_l_bfgs_b', 'LbfgsInvHessProduct']
- status_messages = {
- 0 : "START",
- 1 : "NEW_X",
- 2 : "RESTART",
- 3 : "FG",
- 4 : "CONVERGENCE",
- 5 : "STOP",
- 6 : "WARNING",
- 7 : "ERROR",
- 8 : "ABNORMAL"
- }
- task_messages = {
- 0 : "",
- 301 : "",
- 302 : "",
- 401 : "NORM OF PROJECTED GRADIENT <= PGTOL",
- 402 : "RELATIVE REDUCTION OF F <= FACTR*EPSMCH",
- 501 : "CPU EXCEEDING THE TIME LIMIT",
- 502 : "TOTAL NO. OF F,G EVALUATIONS EXCEEDS LIMIT",
- 503 : "PROJECTED GRADIENT IS SUFFICIENTLY SMALL",
- 504 : "TOTAL NO. OF ITERATIONS REACHED LIMIT",
- 505 : "CALLBACK REQUESTED HALT",
- 601 : "ROUNDING ERRORS PREVENT PROGRESS",
- 602 : "STP = STPMAX",
- 603 : "STP = STPMIN",
- 604 : "XTOL TEST SATISFIED",
- 701 : "NO FEASIBLE SOLUTION",
- 702 : "FACTR < 0",
- 703 : "FTOL < 0",
- 704 : "GTOL < 0",
- 705 : "XTOL < 0",
- 706 : "STP < STPMIN",
- 707 : "STP > STPMAX",
- 708 : "STPMIN < 0",
- 709 : "STPMAX < STPMIN",
- 710 : "INITIAL G >= 0",
- 711 : "M <= 0",
- 712 : "N <= 0",
- 713 : "INVALID NBD",
- }
- def fmin_l_bfgs_b(func, x0, fprime=None, args=(),
- approx_grad=0,
- bounds=None, m=10, factr=1e7, pgtol=1e-5,
- epsilon=1e-8,
- iprint=_NoValue, maxfun=15000, maxiter=15000, disp=_NoValue,
- callback=None, maxls=20):
- """
- Minimize a function func using the L-BFGS-B algorithm.
- Parameters
- ----------
- func : callable f(x,*args)
- Function to minimize.
- x0 : ndarray
- Initial guess.
- fprime : callable fprime(x,*args), optional
- The gradient of `func`. If None, then `func` returns the function
- value and the gradient (``f, g = func(x, *args)``), unless
- `approx_grad` is True in which case `func` returns only ``f``.
- args : sequence, optional
- Arguments to pass to `func` and `fprime`.
- approx_grad : bool, optional
- Whether to approximate the gradient numerically (in which case
- `func` returns only the function value).
- bounds : list, optional
- ``(min, max)`` pairs for each element in ``x``, defining
- the bounds on that parameter. Use None or +-inf for one of ``min`` or
- ``max`` when there is no bound in that direction.
- m : int, optional
- The maximum number of variable metric corrections
- used to define the limited memory matrix. (The limited memory BFGS
- method does not store the full hessian but uses this many terms in an
- approximation to it.)
- factr : float, optional
- The iteration stops when
- ``(f^k - f^{k+1})/max{|f^k|,|f^{k+1}|,1} <= factr * eps``,
- where ``eps`` is the machine precision, which is automatically
- generated by the code. Typical values for `factr` are: 1e12 for
- low accuracy; 1e7 for moderate accuracy; 10.0 for extremely
- high accuracy. See Notes for relationship to `ftol`, which is exposed
- (instead of `factr`) by the `scipy.optimize.minimize` interface to
- L-BFGS-B.
- pgtol : float, optional
- The iteration will stop when
- ``max{|proj g_i | i = 1, ..., n} <= pgtol``
- where ``proj g_i`` is the i-th component of the projected gradient.
- epsilon : float, optional
- Step size used when `approx_grad` is True, for numerically
- calculating the gradient
- iprint : int, optional
- Deprecated option that previously controlled the text printed on the
- screen during the problem solution. Now the code does not emit any
- output and this keyword has no function.
- .. deprecated:: 1.15.0
- This keyword is deprecated and will be removed from SciPy 1.18.0.
- disp : int, optional
- Deprecated option that previously controlled the text printed on the
- screen during the problem solution. Now the code does not emit any
- output and this keyword has no function.
- .. deprecated:: 1.15.0
- This keyword is deprecated and will be removed from SciPy 1.18.0.
- maxfun : int, optional
- Maximum number of function evaluations. Note that this function
- may violate the limit because of evaluating gradients by numerical
- differentiation.
- maxiter : int, optional
- Maximum number of iterations.
- callback : callable, optional
- Called after each iteration, as ``callback(xk)``, where ``xk`` is the
- current parameter vector.
- maxls : int, optional
- Maximum number of line search steps (per iteration). Default is 20.
- Returns
- -------
- x : array_like
- Estimated position of the minimum.
- f : float
- Value of `func` at the minimum.
- d : dict
- Information dictionary.
- * d['warnflag'] is
- - 0 if converged,
- - 1 if too many function evaluations or too many iterations,
- - 2 if stopped for another reason, given in d['task']
- * d['grad'] is the gradient at the minimum (should be 0 ish)
- * d['funcalls'] is the number of function calls made.
- * d['nit'] is the number of iterations.
- See also
- --------
- minimize: Interface to minimization algorithms for multivariate
- functions. See the 'L-BFGS-B' `method` in particular. Note that the
- `ftol` option is made available via that interface, while `factr` is
- provided via this interface, where `factr` is the factor multiplying
- the default machine floating-point precision to arrive at `ftol`:
- ``ftol = factr * numpy.finfo(float).eps``.
- Notes
- -----
- SciPy uses a C-translated and modified version of the Fortran code,
- L-BFGS-B v3.0 (released April 25, 2011, BSD-3 licensed). Original Fortran
- version was written by Ciyou Zhu, Richard Byrd, Jorge Nocedal and,
- Jose Luis Morales.
- References
- ----------
- * R. H. Byrd, P. Lu and J. Nocedal. A Limited Memory Algorithm for Bound
- Constrained Optimization, (1995), SIAM Journal on Scientific and
- Statistical Computing, 16, 5, pp. 1190-1208.
- * C. Zhu, R. H. Byrd and J. Nocedal. L-BFGS-B: Algorithm 778: L-BFGS-B,
- FORTRAN routines for large scale bound constrained optimization (1997),
- ACM Transactions on Mathematical Software, 23, 4, pp. 550 - 560.
- * J.L. Morales and J. Nocedal. L-BFGS-B: Remark on Algorithm 778: L-BFGS-B,
- FORTRAN routines for large scale bound constrained optimization (2011),
- ACM Transactions on Mathematical Software, 38, 1.
- Examples
- --------
- Solve a linear regression problem via `fmin_l_bfgs_b`. To do this, first we
- define an objective function ``f(m, b) = (y - y_model)**2``, where `y`
- describes the observations and `y_model` the prediction of the linear model
- as ``y_model = m*x + b``. The bounds for the parameters, ``m`` and ``b``,
- are arbitrarily chosen as ``(0,5)`` and ``(5,10)`` for this example.
- >>> import numpy as np
- >>> from scipy.optimize import fmin_l_bfgs_b
- >>> X = np.arange(0, 10, 1)
- >>> M = 2
- >>> B = 3
- >>> Y = M * X + B
- >>> def func(parameters, *args):
- ... x = args[0]
- ... y = args[1]
- ... m, b = parameters
- ... y_model = m*x + b
- ... error = sum(np.power((y - y_model), 2))
- ... return error
- >>> initial_values = np.array([0.0, 1.0])
- >>> x_opt, f_opt, info = fmin_l_bfgs_b(func, x0=initial_values, args=(X, Y),
- ... approx_grad=True)
- >>> x_opt, f_opt
- array([1.99999999, 3.00000006]), 1.7746231151323805e-14 # may vary
- The optimized parameters in ``x_opt`` agree with the ground truth parameters
- ``m`` and ``b``. Next, let us perform a bound constrained optimization using
- the `bounds` parameter.
- >>> bounds = [(0, 5), (5, 10)]
- >>> x_opt, f_op, info = fmin_l_bfgs_b(func, x0=initial_values, args=(X, Y),
- ... approx_grad=True, bounds=bounds)
- >>> x_opt, f_opt
- array([1.65990508, 5.31649385]), 15.721334516453945 # may vary
- """
- # handle fprime/approx_grad
- if approx_grad:
- fun = func
- jac = None
- elif fprime is None:
- fun = MemoizeJac(func)
- jac = fun.derivative
- else:
- fun = func
- jac = fprime
- # build options
- callback = _wrap_callback(callback)
- opts = {'disp': disp,
- 'iprint': iprint,
- 'maxcor': m,
- 'ftol': factr * np.finfo(float).eps,
- 'gtol': pgtol,
- 'eps': epsilon,
- 'maxfun': maxfun,
- 'maxiter': maxiter,
- 'callback': callback,
- 'maxls': maxls}
- res = _minimize_lbfgsb(fun, x0, args=args, jac=jac, bounds=bounds,
- **opts)
- d = {'grad': res['jac'],
- 'task': res['message'],
- 'funcalls': res['nfev'],
- 'nit': res['nit'],
- 'warnflag': res['status']}
- f = res['fun']
- x = res['x']
- return x, f, d
- def _minimize_lbfgsb(fun, x0, args=(), jac=None, bounds=None,
- disp=_NoValue, maxcor=10, ftol=2.2204460492503131e-09,
- gtol=1e-5, eps=1e-8, maxfun=15000, maxiter=15000,
- iprint=_NoValue, callback=None, maxls=20,
- finite_diff_rel_step=None, workers=None,
- **unknown_options):
- """
- Minimize a scalar function of one or more variables using the L-BFGS-B
- algorithm.
- Options
- -------
- disp : None or int
- Deprecated option that previously controlled the text printed on the
- screen during the problem solution. Now the code does not emit any
- output and this keyword has no function.
- .. deprecated:: 1.15.0
- This keyword is deprecated and will be removed from SciPy 1.18.0.
- maxcor : int
- The maximum number of variable metric corrections used to
- define the limited memory matrix. (The limited memory BFGS
- method does not store the full hessian but uses this many terms
- in an approximation to it.)
- ftol : float
- The iteration stops when ``(f^k -
- f^{k+1})/max{|f^k|,|f^{k+1}|,1} <= ftol``.
- gtol : float
- The iteration will stop when ``max{|proj g_i | i = 1, ..., n}
- <= gtol`` where ``proj g_i`` is the i-th component of the
- projected gradient.
- eps : float or ndarray
- If `jac is None` the absolute step size used for numerical
- approximation of the jacobian via forward differences.
- maxfun : int
- Maximum number of function evaluations before minimization terminates.
- Note that this function may violate the limit if the gradients
- are evaluated by numerical differentiation.
- maxiter : int
- Maximum number of algorithm iterations.
- iprint : int, optional
- Deprecated option that previously controlled the text printed on the
- screen during the problem solution. Now the code does not emit any
- output and this keyword has no function.
- .. deprecated:: 1.15.0
- This keyword is deprecated and will be removed from SciPy 1.18.0.
- maxls : int, optional
- Maximum number of line search steps (per iteration). Default is 20.
- finite_diff_rel_step : None or array_like, optional
- If ``jac in ['2-point', '3-point', 'cs']`` the relative step size to
- use for numerical approximation of the jacobian. The absolute step
- size is computed as ``h = rel_step * sign(x) * max(1, abs(x))``,
- possibly adjusted to fit into the bounds. For ``method='3-point'``
- the sign of `h` is ignored. If None (default) then step is selected
- automatically.
- workers : int, map-like callable, optional
- A map-like callable, such as `multiprocessing.Pool.map` for evaluating
- any numerical differentiation in parallel.
- This evaluation is carried out as ``workers(fun, iterable)``.
- .. versionadded:: 1.16.0
- Notes
- -----
- The option `ftol` is exposed via the `scipy.optimize.minimize` interface,
- but calling `scipy.optimize.fmin_l_bfgs_b` directly exposes `factr`. The
- relationship between the two is ``ftol = factr * numpy.finfo(float).eps``.
- I.e., `factr` multiplies the default machine floating-point precision to
- arrive at `ftol`.
- If the minimization is slow to converge the optimizer may halt if the
- total number of function evaluations exceeds `maxfun`, or the number of
- algorithm iterations has reached `maxiter` (whichever comes first). If
- this is the case then ``result.success=False``, and an appropriate
- error message is contained in ``result.message``.
- """
- _check_unknown_options(unknown_options)
- m = maxcor
- pgtol = gtol
- factr = ftol / np.finfo(float).eps
- x0 = asarray(x0).ravel()
- n, = x0.shape
- if disp is not _NoValue:
- warnings.warn("scipy.optimize: The `disp` and `iprint` options of the "
- "L-BFGS-B solver are deprecated and will be removed in "
- "SciPy 1.18.0.",
- DeprecationWarning, stacklevel=3)
- if iprint is not _NoValue:
- warnings.warn("scipy.optimize: The `disp` and `iprint` options of the "
- "L-BFGS-B solver are deprecated and will be removed in "
- "SciPy 1.18.0.",
- DeprecationWarning, stacklevel=3)
- # historically old-style bounds were/are expected by lbfgsb.
- # That's still the case but we'll deal with new-style from here on,
- # it's easier
- if bounds is None:
- pass
- elif len(bounds) != n:
- raise ValueError('length of x0 != length of bounds')
- else:
- bounds = np.array(old_bound_to_new(bounds))
- # check bounds
- if (bounds[0] > bounds[1]).any():
- raise ValueError(
- "LBFGSB - one of the lower bounds is greater than an upper bound."
- )
- # initial vector must lie within the bounds. Otherwise ScalarFunction and
- # approx_derivative will cause problems
- x0 = np.clip(x0, bounds[0], bounds[1])
- # _prepare_scalar_function can use bounds=None to represent no bounds
- sf = _prepare_scalar_function(fun, x0, jac=jac, args=args, epsilon=eps,
- bounds=bounds,
- finite_diff_rel_step=finite_diff_rel_step,
- workers=workers)
- func_and_grad = sf.fun_and_grad
- nbd = zeros(n, np.int32)
- low_bnd = zeros(n, float64)
- upper_bnd = zeros(n, float64)
- bounds_map = {(-np.inf, np.inf): 0,
- (1, np.inf): 1,
- (1, 1): 2,
- (-np.inf, 1): 3}
- if bounds is not None:
- for i in range(0, n):
- L, U = bounds[0, i], bounds[1, i]
- if not np.isinf(L):
- low_bnd[i] = L
- L = 1
- if not np.isinf(U):
- upper_bnd[i] = U
- U = 1
- nbd[i] = bounds_map[L, U]
- if not maxls > 0:
- raise ValueError('maxls must be positive.')
- x = array(x0, dtype=np.float64)
- f = array(0.0, dtype=np.float64)
- g = zeros((n,), dtype=np.float64)
- wa = zeros(2*m*n + 5*n + 11*m*m + 8*m, float64)
- iwa = zeros(3*n, dtype=np.int32)
- task = zeros(2, dtype=np.int32)
- ln_task = zeros(2, dtype=np.int32)
- lsave = zeros(4, dtype=np.int32)
- isave = zeros(44, dtype=np.int32)
- dsave = zeros(29, dtype=float64)
- n_iterations = 0
- while True:
- # g may become float32 if a user provides a function that calculates
- # the Jacobian in float32 (see gh-18730). The underlying code expects
- # float64, so upcast it
- g = g.astype(np.float64)
- # x, f, g, wa, iwa, task, csave, lsave, isave, dsave = \
- _lbfgsb.setulb(m, x, low_bnd, upper_bnd, nbd, f, g, factr, pgtol, wa,
- iwa, task, lsave, isave, dsave, maxls, ln_task)
- if task[0] == 3:
- # The minimization routine wants f and g at the current x.
- # Note that interruptions due to maxfun are postponed
- # until the completion of the current minimization iteration.
- # Overwrite f and g:
- f, g = func_and_grad(x)
- elif task[0] == 1:
- # new iteration
- n_iterations += 1
- intermediate_result = OptimizeResult(x=x, fun=f)
- if _call_callback_maybe_halt(callback, intermediate_result):
- task[0] = 5
- task[1] = 505
- if n_iterations >= maxiter:
- task[0] = 5
- task[1] = 504
- elif sf.nfev > maxfun:
- task[0] = 5
- task[1] = 502
- else:
- break
- if task[0] == 4:
- warnflag = 0
- elif sf.nfev > maxfun or n_iterations >= maxiter:
- warnflag = 1
- else:
- warnflag = 2
- # These two portions of the workspace are described in the mainlb
- # function docstring in "__lbfgsb.c", ws and wy arguments.
- s = wa[0: m*n].reshape(m, n)
- y = wa[m*n: 2*m*n].reshape(m, n)
- # isave(31) = the total number of BFGS updates prior the current iteration.
- n_bfgs_updates = isave[30]
- n_corrs = min(n_bfgs_updates, maxcor)
- hess_inv = LbfgsInvHessProduct(s[:n_corrs], y[:n_corrs])
- msg = status_messages[task[0]] + ": " + task_messages[task[1]]
- return OptimizeResult(fun=f, jac=g, nfev=sf.nfev,
- njev=sf.ngev,
- nit=n_iterations, status=warnflag, message=msg,
- x=x, success=(warnflag == 0), hess_inv=hess_inv)
- class LbfgsInvHessProduct(LinearOperator):
- """Linear operator for the L-BFGS approximate inverse Hessian.
- This operator computes the product of a vector with the approximate inverse
- of the Hessian of the objective function, using the L-BFGS limited
- memory approximation to the inverse Hessian, accumulated during the
- optimization.
- Objects of this class implement the ``scipy.sparse.linalg.LinearOperator``
- interface.
- Parameters
- ----------
- sk : array_like, shape=(n_corr, n)
- Array of `n_corr` most recent updates to the solution vector.
- (See [1]).
- yk : array_like, shape=(n_corr, n)
- Array of `n_corr` most recent updates to the gradient. (See [1]).
- References
- ----------
- .. [1] Nocedal, Jorge. "Updating quasi-Newton matrices with limited
- storage." Mathematics of computation 35.151 (1980): 773-782.
- """
- def __init__(self, sk, yk):
- """Construct the operator."""
- if sk.shape != yk.shape or sk.ndim != 2:
- raise ValueError('sk and yk must have matching shape, (n_corrs, n)')
- n_corrs, n = sk.shape
- super().__init__(dtype=np.float64, shape=(n, n))
- self.sk = sk
- self.yk = yk
- self.n_corrs = n_corrs
- self.rho = 1 / np.einsum('ij,ij->i', sk, yk)
- def _matvec(self, x):
- """Efficient matrix-vector multiply with the BFGS matrices.
- This calculation is described in Section (4) of [1].
- Parameters
- ----------
- x : ndarray
- An array with shape (n,) or (n,1).
- Returns
- -------
- y : ndarray
- The matrix-vector product
- """
- s, y, n_corrs, rho = self.sk, self.yk, self.n_corrs, self.rho
- q = np.array(x, dtype=self.dtype, copy=True)
- if q.ndim == 2 and q.shape[1] == 1:
- q = q.reshape(-1)
- alpha = np.empty(n_corrs)
- for i in range(n_corrs-1, -1, -1):
- alpha[i] = rho[i] * np.dot(s[i], q)
- q = q - alpha[i]*y[i]
- r = q
- for i in range(n_corrs):
- beta = rho[i] * np.dot(y[i], r)
- r = r + s[i] * (alpha[i] - beta)
- return r
- def _matmat(self, X):
- """Efficient matrix-matrix multiply with the BFGS matrices.
- This calculation is described in Section (4) of [1].
- Parameters
- ----------
- X : ndarray
- An array with shape (n,m)
- Returns
- -------
- Y : ndarray
- The matrix-matrix product
- Notes
- -----
- This implementation is written starting from _matvec and broadcasting
- all expressions along the second axis of X.
- """
- s, y, n_corrs, rho = self.sk, self.yk, self.n_corrs, self.rho
- Q = np.array(X, dtype=self.dtype, copy=True)
- alpha = np.empty((n_corrs, Q.shape[1]))
- for i in range(n_corrs-1, -1, -1):
- alpha[i] = rho[i] * np.dot(s[i], Q)
- Q -= alpha[i]*y[i][:, np.newaxis]
- R = Q
- for i in range(n_corrs):
- beta = rho[i] * np.dot(y[i], R)
- R += s[i][:, np.newaxis] * (alpha[i] - beta)
- return R
- def todense(self):
- """Return a dense array representation of this operator.
- Returns
- -------
- arr : ndarray, shape=(n, n)
- An array with the same shape and containing
- the same data represented by this `LinearOperator`.
- """
- I_arr = np.eye(*self.shape, dtype=self.dtype)
- return self._matmat(I_arr)
|