| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559 |
- '''
- This module provides Powell's COBYLA algorithm.
- Translated from Zaikun Zhang's modern-Fortran reference implementation in PRIMA.
- Dedicated to late Professor M. J. D. Powell FRS (1936--2015).
- Python translation by Nickolai Belakovski.
- N.B.:
- 1. The modern-Fortran reference implementation in PRIMA contains bug fixes and improvements over the
- original Fortran 77 implementation by Powell. Consequently, the PRIMA implementation behaves differently
- from the original Fortran 77 implementation by Powell. Therefore, it is important to point out that
- you are using PRIMA rather than the original solvers if you want your results to be reproducible.
- 2. Compared to Powell's Fortran 77 implementation, the modern-Fortran implementation and hence any
- faithful translation like this one generally produce better solutions with fewer function evaluations,
- making them preferable for applications with expensive function evaluations. However, if function
- evaluations are not the dominant cost in your application, the Fortran 77 solvers are likely to be
- faster, as they are more efficient in terms of memory usage and flops thanks to the careful and
- ingenious (but unmaintained and unmaintainable) implementation by Powell.
- See the PRIMA documentation (www.libprima.net) for more information.
- '''
- from ..common.evaluate import evaluate, moderatex, moderatef, moderatec
- from ..common.consts import (EPS, RHOBEG_DEFAULT, RHOEND_DEFAULT, CTOL_DEFAULT,
- CWEIGHT_DEFAULT, FTARGET_DEFAULT, IPRINT_DEFAULT,
- MAXFUN_DIM_DEFAULT, DEBUGGING, BOUNDMAX,
- ETA1_DEFAULT, ETA2_DEFAULT, GAMMA1_DEFAULT,
- GAMMA2_DEFAULT)
- from ..common.preproc import preproc
- from ..common.present import present
- from ..common.linalg import matprod
- from .cobylb import cobylb
- import numpy as np
- from dataclasses import dataclass
- from copy import copy
- @dataclass
- class COBYLAResult:
- x: np.ndarray
- f: float
- constr: np.ndarray
- cstrv: float
- nf: int
- xhist: np.ndarray | None
- fhist: np.ndarray | None
- chist: np.ndarray | None
- conhist: np.ndarray | None
- info: int
- def cobyla(calcfc, m_nlcon, x, Aineq=None, bineq=None, Aeq=None, beq=None,
- xl=None, xu=None, f0=None, nlconstr0=None, rhobeg=None, rhoend=None,
- ftarget=FTARGET_DEFAULT, ctol=CTOL_DEFAULT, cweight=CWEIGHT_DEFAULT,
- maxfun=None, iprint=IPRINT_DEFAULT, eta1=None, eta2=None,
- gamma1=GAMMA1_DEFAULT, gamma2=GAMMA2_DEFAULT, maxhist=None, maxfilt=2000,
- callback=None):
- """
- Among all the arguments, only CALCFC, M_NLCON, and X are obligatory. The others are
- OPTIONAL and you can neglect them unless you are familiar with the algorithm. Any
- unspecified optional input will take the default value detailed below. For
- instance, we may invoke the solver as follows.
- # First define CALCFC, M_NLCON, and X, and then do the following.
- result = cobyla(calcfc, m_nlcon, x)
- or
- # First define CALCFC, M_NLCON, X, Aineq, and Bineq, and then do the following.
- result = cobyla(calcfc, m_nlcon, x, Aineq=Aineq, bineq=bineq, rhobeg=1.0e0,
- rhoend=1.0e-6)
- ####################################################################################
- # IMPORTANT NOTICE: The user must set M_NLCON correctly to the number of nonlinear
- # constraints, namely the size of NLCONSTR introduced below. Set it to 0 if there
- # is no nonlinear constraint.
- ####################################################################################
- See examples/cobyla/cobyla_example.py for a concrete example.
- A detailed introduction to the arguments is as follows.
- ####################################################################################
- # INPUTS
- ####################################################################################
- CALCFC
- Input, function.
- f, nlconstr = CALCFC(X) should evaluate the objective function and nonlinear
- constraints at the given vector X; it should return a tuple consisting of the
- objective function value and the nonlinear constraint value. It must be provided
- by the user, and its definition must conform to the following interface:
- #-------------------------------------------------------------------------#
- def calcfc(x):
- f = 0.0
- nlconstr = np.zeros(m_nlcon)
- return f, nlconstr
- #-------------------------------------------------------------------------#
- M_NLCON
- Input, scalar.
- M_NLCON must be set to the number of nonlinear constraints, namely the size of
- NLCONSTR(X).
- N.B.:
- 1. Why don't we define M_NLCON as optional and default it to 0 when it is absent?
- This is because we need to allocate memory for CONSTR_LOC using M_NLCON. To
- ensure that the size of CONSTR_LOC is correct, we require the user to specify
- M_NLCON explicitly.
- X
- Input, vector.
- As an input, X should be an N-dimensional vector that contains the starting
- point, N being the dimension of the problem.
- Aineq, Bineq
- Input, matrix of size [Mineq, N] and vector of size Mineq unless they are both
- empty, default: None and None.
- Aineq and Bineq represent the linear inequality constraints: Aineq*X <= Bineq.
- Aeq, Beq
- Input, matrix of size [Meq, N] and vector of size Meq unless they are both
- empty, default: None and None.
- Aeq and Beq represent the linear equality constraints: Aeq*X = Beq.
- XL, XU
- Input, vectors of size N unless they are both None, default: None and None.
- XL is the lower bound for X. If XL is None, X has no
- lower bound. Any entry of XL that is NaN or below -BOUNDMAX will be taken as
- -BOUNDMAX, which effectively means there is no lower bound for the corresponding
- entry of X. The value of BOUNDMAX is 0.25*HUGE(X), which is about 8.6E37 for
- single precision and 4.5E307 for double precision. XU is similar.
- F0
- Input, scalar.
- F0, if present, should be set to the objective function value of the starting X.
- NLCONSTR0
- Input, vector.
- NLCONSTR0, if present, should be set to the nonlinear constraint value at the
- starting X; in addition, SIZE(NLCONSTR0) must be M_NLCON, or the solver will
- abort.
- RHOBEG, RHOEND
- Inputs, scalars, default: RHOBEG = 1, RHOEND = 10^-6. RHOBEG and RHOEND must be
- set to the initial and final values of a trust-region radius, both being positive
- and RHOEND <= RHOBEG. Typically RHOBEG should be about one tenth of the greatest
- expected change to a variable, and RHOEND should indicate the accuracy that is
- required in the final values of the variables.
- FTARGET
- Input, scalar, default: -Inf.
- FTARGET is the target function value. The algorithm will terminate when a
- feasible point with a function value <= FTARGET is found.
- CTOL
- Input, scalar, default: sqrt(machine epsilon).
- CTOL is the tolerance of constraint violation. X is considered feasible if
- CSTRV(X) <= CTOL.
- N.B.:
- 1. CTOL is absolute, not relative.
- 2. CTOL is used only when selecting the returned X. It does not affect the
- iterations of the algorithm.
- CWEIGHT
- Input, scalar, default: CWEIGHT_DFT defined in common/consts.py.
- CWEIGHT is the weight that the constraint violation takes in the selection of the
- returned X.
- MAXFUN
- Input, integer scalar, default: MAXFUN_DIM_DFT*N with MAXFUN_DIM_DFT defined in
- common/consts.py. MAXFUN is the maximal number of calls of CALCFC.
- IPRINT
- Input, integer scalar, default: 0.
- The value of IPRINT should be set to 0, 1, -1, 2, -2, 3, or -3, which controls
- how much information will be printed during the computation:
- 0: there will be no printing;
- 1: a message will be printed to the screen at the return, showing the best vector
- of variables found and its objective function value;
- 2: in addition to 1, each new value of RHO is printed to the screen, with the
- best vector of variables so far and its objective function value; each new
- value of CPEN is also printed;
- 3: in addition to 2, each function evaluation with its variables will be printed
- to the screen; -1, -2, -3: the same information as 1, 2, 3 will be printed,
- not to the screen but to a file named COBYLA_output.txt; the file will be
- created if it does not exist; the new output will be appended to the end of
- this file if it already exists.
- Note that IPRINT = +/-3 can be costly in terms of time and/or space.
- ETA1, ETA2, GAMMA1, GAMMA2
- Input, scalars, default: ETA1 = 0.1, ETA2 = 0.7, GAMMA1 = 0.5, and GAMMA2 = 2.
- ETA1, ETA2, GAMMA1, and GAMMA2 are parameters in the updating scheme of the
- trust-region radius detailed in the subroutine TRRAD in trustregion.py. Roughly
- speaking, the trust-region radius is contracted by a factor of GAMMA1 when the
- reduction ratio is below ETA1, and enlarged by a factor of GAMMA2 when the
- reduction ratio is above ETA2. It is required that 0 < ETA1 <= ETA2 < 1 and
- 0 < GAMMA1 < 1 < GAMMA2. Normally, ETA1 <= 0.25. It is NOT advised to set
- ETA1 >= 0.5.
- MAXFILT
- Input, scalar.
- MAXFILT is a nonnegative integer indicating the maximal length of the filter used
- for selecting the returned solution; default: MAXFILT_DFT (a value lower than
- MIN_MAXFILT is not recommended);
- see common/consts.py for the definitions of MAXFILT_DFT and MIN_MAXFILT.
- CALLBACK
- Input, function to report progress and optionally request termination.
- ####################################################################################
- # OUTPUTS
- ####################################################################################
- The output is a single data structure, COBYLAResult, with the following fields:
- X
- Output, vector.
- As an output, X will be set to an approximate minimizer.
- F
- Output, scalar.
- F will be set to the objective function value of X at exit.
- CONSTR
- Output, vector.
- CONSTR will be set to the constraint value of X at exit.
- CSTRV
- Output, scalar.
- CSTRV will be set to the constraint violation of X at exit, i.e.,
- max([0, XL - X, X - XU, Aineq*X - Bineq, ABS(Aeq*X -Beq), NLCONSTR(X)]).
- NF
- Output, scalar.
- NF will be set to the number of calls of CALCFC at exit.
- XHIST, FHIST, CHIST, CONHIST, MAXHIST
- XHIST: Output, rank 2 array;
- FHIST: Output, rank 1 array;
- CHIST: Output, rank 1 array;
- CONHIST: Output, rank 2 array;
- MAXHIST: Input, scalar, default: MAXFUN
- XHIST, if present, will output the history of iterates; FHIST, if present, will
- output the history function values; CHIST, if present, will output the history of
- constraint violations; CONHIST, if present, will output the history of constraint
- values; MAXHIST should be a nonnegative integer, and XHIST/FHIST/CHIST/CONHIST
- will output only the history of the last MAXHIST iterations.
- Therefore, MAXHIST= 0 means XHIST/FHIST/CONHIST/CHIST will output
- nothing, while setting MAXHIST = MAXFUN requests XHIST/FHIST/CHIST/CONHIST to
- output all the history. If XHIST is present, its size at exit will be
- (N, min(NF, MAXHIST)); if FHIST is present, its size at exit will be
- min(NF, MAXHIST); if CHIST is present, its size at exit will be min(NF, MAXHIST);
- if CONHIST is present, its size at exit will be (M, min(NF, MAXHIST)).
- IMPORTANT NOTICE:
- Setting MAXHIST to a large value can be costly in terms of memory for large
- problems.
- MAXHIST will be reset to a smaller value if the memory needed exceeds MAXHISTMEM
- defined in common/consts.py
- Use *HIST with caution!!! (N.B.: the algorithm is NOT designed for large
- problems).
- INFO
- Output, scalar.
- INFO is the exit flag. It will be set to one of the following values defined in
- common/infos.py:
- SMALL_TR_RADIUS: the lower bound for the trust region radius is reached;
- FTARGET_ACHIEVED: the target function value is reached;
- MAXFUN_REACHED: the objective function has been evaluated MAXFUN times;
- MAXTR_REACHED: the trust region iteration has been performed MAXTR times (MAXTR = 2*MAXFUN);
- NAN_INF_X: NaN or Inf occurs in X;
- DAMAGING_ROUNDING: rounding errors are becoming damaging.
- #--------------------------------------------------------------------------#
- The following case(s) should NEVER occur unless there is a bug.
- NAN_INF_F: the objective function returns NaN or +Inf;
- NAN_INF_MODEL: NaN or Inf occurs in the model;
- TRSUBP_FAILED: a trust region step failed to reduce the model
- #--------------------------------------------------------------------------#
- """
- # Local variables
- solver = "COBYLA"
- srname = "COBYLA"
- # Sizes
- mineq = len(bineq) if present(bineq) else 0
- meq = len(beq) if present(beq) else 0
- mxl = sum(xl > -BOUNDMAX) if present(xl) else 0
- mxu = sum(xu < BOUNDMAX) if present(xu) else 0
- mmm = mxu + mxl + 2*meq + mineq + m_nlcon
- num_vars = len(x)
- # Preconditions
- if DEBUGGING:
- assert m_nlcon >= 0, f'{srname} M_NLCON >= 0'
- assert num_vars >= 1, f'{srname} N >= 1'
- assert present(Aineq) == present(bineq), \
- f'{srname} Aineq and Bineq are both present or both absent'
- if (present(Aineq)):
- assert Aineq.shape == (mineq, num_vars), f'{srname} SIZE(Aineq) == [Mineq, N]'
- assert present(Aeq) == present(beq), \
- f'{srname} Aeq and Beq are both present or both absent'
- if (present(Aeq)):
- assert Aeq.shape == (meq, num_vars), f'{srname} SIZE(Aeq) == [Meq, N]'
- if (present(xl)):
- assert len(xl) == num_vars, f'{srname} SIZE(XL) == N'
- if (present(xu)):
- assert len(xu) == num_vars, f'{srname} SIZE(XU) == N'
- # N.B.: If NLCONSTR0 is present, then F0 must be present, and we assume that
- # F(X0) = F0 even if F0 is NaN; if NLCONSTR0 is absent, then F0 must be either
- # absent or NaN, both of which will be interpreted as F(X0) is not provided.
- if present(nlconstr0):
- assert present(f0), f'{srname} If NLCONSTR0 is present, then F0 is present'
- if present(f0):
- assert np.isnan(f0) or present(nlconstr0), \
- f'{srname} If F0 is present and not NaN, then NLCONSTR0 is present'
- # Exit if the size of NLCONSTR0 is inconsistent with M_NLCON.
- if present(nlconstr0):
- assert np.size(nlconstr0) == m_nlcon
- # Read the inputs.
- if xl is not None:
- xl = copy(xl)
- xl[np.isnan(xl)] = -BOUNDMAX
- xl[xl < -BOUNDMAX] = -BOUNDMAX
- if xu is not None:
- xu = copy(xu)
- xu[np.isnan(xu)] = BOUNDMAX
- xu[xu > BOUNDMAX] = BOUNDMAX
- # Wrap the linear and bound constraints into a single constraint: AMAT@X <= BVEC.
- amat, bvec = get_lincon(Aeq, Aineq, beq, bineq, xl, xu)
- # Create constraint vector
- constr = np.zeros(mmm)
- # Set [F_LOC, CONSTR_LOC] to [F(X0), CONSTR(X0)] after evaluating the latter if
- # needed. In this way, COBYLB only needs one interface.
- # N.B.: Due to the preconditions above, there are two possibilities for F0 and
- # NLCONSTR0.
- # If NLCONSTR0 is present, then F0 must be present, and we assume that F(X0) = F0
- # even if F0 is NaN.
- # If NLCONSTR0 is absent, then F0 must be either absent or NaN, both of which will
- # be interpreted as F(X0) is not provided and we have to evaluate F(X0) and
- # NLCONSTR(X0) now.
- if (present(f0) and present(nlconstr0) and all(np.isfinite(x))):
- f = moderatef(f0)
- if amat is not None:
- constr[:mmm - m_nlcon] = moderatec(matprod(amat, x) - bvec)
- constr[mmm - m_nlcon:] = moderatec(nlconstr0)
- else:
- x = moderatex(x)
- f, constr = evaluate(calcfc, x, m_nlcon, amat, bvec)
- constr[:mmm - m_nlcon] = moderatec(constr[:mmm - m_nlcon])
- # N.B.: Do NOT call FMSG, SAVEHIST, or SAVEFILT for the function/constraint evaluation at X0.
- # They will be called during the initialization, which will read the function/constraint at X0.
- cstrv = max(np.append(0, constr))
- # If RHOBEG is present, use it; otherwise, RHOBEG takes the default value for
- # RHOBEG, taking the value of RHOEND into account. Note that RHOEND is considered
- # only if it is present and it is VALID (i.e., finite and positive). The other
- # inputs are read similarly.
- if present(rhobeg):
- rhobeg = rhobeg
- elif present(rhoend) and np.isfinite(rhoend) and rhoend > 0:
- rhobeg = max(10 * rhoend, RHOBEG_DEFAULT)
- else:
- rhobeg = RHOBEG_DEFAULT
- if present(rhoend):
- rhoend = rhoend
- elif rhobeg > 0:
- rhoend = max(EPS, min(RHOEND_DEFAULT/RHOBEG_DEFAULT * rhobeg, RHOEND_DEFAULT))
- else:
- rhoend = RHOEND_DEFAULT
- maxfun = maxfun if present(maxfun) else MAXFUN_DIM_DEFAULT * num_vars
- if present(eta1):
- eta1 = eta1
- elif present(eta2) and 0 < eta2 < 1:
- eta1 = max(EPS, eta2 / 7)
- else:
- eta1 = ETA1_DEFAULT
- if present(eta2):
- eta2 = eta2
- elif 0 < eta1 < 1:
- eta2 = (eta1 + 2) / 3
- else:
- eta2 = ETA2_DEFAULT
- maxhist = (
- maxhist
- if present(maxhist)
- else max(maxfun, num_vars + 2, MAXFUN_DIM_DEFAULT * num_vars)
- )
- # Preprocess the inputs in case some of them are invalid. It does nothing if all
- # inputs are valid.
- (
- iprint,
- maxfun,
- maxhist,
- ftarget,
- rhobeg,
- rhoend,
- npt, # Unused in COBYLA
- maxfilt,
- ctol,
- cweight,
- eta1,
- eta2,
- gamma1,
- gamma2,
- _x0, # Unused in COBYLA
- ) = preproc(
- solver,
- num_vars,
- iprint,
- maxfun,
- maxhist,
- ftarget,
- rhobeg,
- rhoend,
- num_constraints=mmm,
- maxfilt=maxfilt,
- ctol=ctol,
- cweight=cweight,
- eta1=eta1,
- eta2=eta2,
- gamma1=gamma1,
- gamma2=gamma2,
- is_constrained=(mmm > 0),
- )
- # Further revise MAXHIST according to MAXHISTMEM, and allocate memory for the history.
- # In MATLAB/Python/Julia/R implementation, we should simply set MAXHIST = MAXFUN and initialize
- # CHIST = NaN(1, MAXFUN), CONHIST = NaN(M, MAXFUN), FHIST = NaN(1, MAXFUN), XHIST = NaN(N, MAXFUN)
- # if they are requested; replace MAXFUN with 0 for the history that is not requested.
- # prehist(maxhist, num_vars, present(xhist), xhist_loc, present(fhist), fhist_loc, &
- # & present(chist), chist_loc, m, present(conhist), conhist_loc)
- # call cobylb, which performs the real calculations
- x, f, constr, cstrv, nf, xhist, fhist, chist, conhist, info = cobylb(
- calcfc,
- iprint,
- maxfilt,
- maxfun,
- amat,
- bvec,
- ctol,
- cweight,
- eta1,
- eta2,
- ftarget,
- gamma1,
- gamma2,
- rhobeg,
- rhoend,
- constr,
- f,
- x,
- maxhist,
- callback
- )
- return COBYLAResult(x, f, constr, cstrv, nf, xhist, fhist, chist, conhist, info)
- def get_lincon(Aeq=None, Aineq=None, beq=None, bineq=None, xl=None, xu=None):
- """
- This subroutine wraps the linear and bound constraints into a single constraint:
- AMAT*X <= BVEC.
- N.B.:
- LINCOA normalizes the linear constraints so that each constraint has a gradient
- of norm 1. However, COBYLA does not do this.
- """
- # Sizes
- if Aeq is not None:
- num_vars = Aeq.shape[1]
- elif Aineq is not None:
- num_vars = Aineq.shape[1]
- elif xl is not None:
- num_vars = len(xl)
- elif xu is not None:
- num_vars = len(xu)
- else:
- return None, None
- # Preconditions
- if DEBUGGING:
- assert Aineq is None or Aineq.shape == (len(bineq), num_vars)
- assert Aeq is None or Aeq.shape == (len(beq), num_vars)
- assert (xl is None or xu is None) or len(xl) == len(xu) == num_vars
- #====================#
- # Calculation starts #
- #====================#
- # Define the indices of the nontrivial bound constraints.
- ixl = np.where(xl > -BOUNDMAX)[0] if xl is not None else None
- ixu = np.where(xu < BOUNDMAX)[0] if xu is not None else None
- # Wrap the linear constraints.
- # The bound constraint XL <= X <= XU is handled as two constraints:
- # -X <= -XL, X <= XU.
- # The equality constraint Aeq*X = Beq is handled as two constraints:
- # -Aeq*X <= -Beq, Aeq*X <= Beq.
- # N.B.:
- # 1. The treatment of the equality constraints is naive. One may choose to
- # eliminate them instead.
- idmat = np.eye(num_vars)
- amat = np.vstack([
- -idmat[ixl, :] if ixl is not None else np.empty((0, num_vars)),
- idmat[ixu, :] if ixu is not None else np.empty((0, num_vars)),
- -Aeq if Aeq is not None else np.empty((0, num_vars)),
- Aeq if Aeq is not None else np.empty((0, num_vars)),
- Aineq if Aineq is not None else np.empty((0, num_vars))
- ])
- bvec = np.hstack([
- -xl[ixl] if ixl is not None else np.empty(0),
- xu[ixu] if ixu is not None else np.empty(0),
- -beq if beq is not None else np.empty(0),
- beq if beq is not None else np.empty(0),
- bineq if bineq is not None else np.empty(0)
- ])
- amat = amat if amat.shape[0] > 0 else None
- bvec = bvec if bvec.shape[0] > 0 else None
- #==================#
- # Calculation ends #
- #==================#
- # Postconditions
- if DEBUGGING:
- assert (amat is None and bvec is None) or amat.shape == (len(bvec), num_vars)
- return amat, bvec
|