cobylb.py 40 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714
  1. '''
  2. This module performs the major calculations of COBYLA.
  3. Translated from Zaikun Zhang's modern-Fortran reference implementation in PRIMA.
  4. Dedicated to late Professor M. J. D. Powell FRS (1936--2015).
  5. Python translation by Nickolai Belakovski.
  6. '''
  7. import numpy as np
  8. from ..common.checkbreak import checkbreak_con
  9. from ..common.consts import REALMAX, EPS, DEBUGGING, MIN_MAXFILT
  10. from ..common.infos import INFO_DEFAULT, MAXTR_REACHED, DAMAGING_ROUNDING, \
  11. SMALL_TR_RADIUS, CALLBACK_TERMINATE
  12. from ..common.evaluate import evaluate
  13. from ..common.history import savehist
  14. from ..common.linalg import isinv, matprod, inprod, norm, primasum, primapow2
  15. from ..common.message import fmsg, retmsg, rhomsg
  16. from ..common.ratio import redrat
  17. from ..common.redrho import redrho
  18. from ..common.selectx import savefilt, selectx
  19. from .update import updatepole, findpole, updatexfc
  20. from .geometry import setdrop_tr, geostep
  21. from .trustregion import trstlp, trrad
  22. from .initialize import initxfc, initfilt
  23. def cobylb(calcfc, iprint, maxfilt, maxfun, amat, bvec, ctol, cweight, eta1, eta2,
  24. ftarget, gamma1, gamma2, rhobeg, rhoend, constr, f, x, maxhist, callback):
  25. '''
  26. This subroutine performs the actual computations of COBYLA.
  27. '''
  28. # Outputs
  29. xhist = []
  30. fhist = []
  31. chist = []
  32. conhist = []
  33. # Local variables
  34. solver = 'COBYLA'
  35. A = np.zeros((np.size(x), np.size(constr))) # A contains the approximate gradient for the constraints
  36. distsq = np.zeros(np.size(x) + 1)
  37. # CPENMIN is the minimum of the penalty parameter CPEN for the L-infinity
  38. # constraint violation in the merit function. Note that CPENMIN = 0 in Powell's
  39. # implementation, which allows CPEN to be 0. Here, we take CPENMIN > 0 so that CPEN
  40. # is always positive. This avoids the situation where PREREM becomes 0 when
  41. # PREREF = 0 = CPEN. It brings two advantages as follows.
  42. # 1. If the trust-region subproblem solver works correctly and the trust-region
  43. # center is not optimal for the subproblem, then PREREM > 0 is guaranteed. This
  44. # is because, in theory, PREREC >= 0 and MAX(PREREC, PREREF) > 0, and the
  45. # definition of CPEN in GETCPEN ensures that PREREM > 0.
  46. # 2. There is no need to revise ACTREM and PREREM when CPEN = 0 and F = FVAL(N+1)
  47. # as in lines 312--314 of Powell's cobylb.f code. Powell's code revises ACTREM
  48. # to CVAL(N + 1) - CSTRV and PREREM to PREREC in this case, which is crucial for
  49. # feasibility problems.
  50. cpenmin = EPS
  51. # Sizes
  52. m_lcon = np.size(bvec) if bvec is not None else 0
  53. num_constraints = np.size(constr)
  54. m_nlcon = num_constraints - m_lcon
  55. num_vars = np.size(x)
  56. # Preconditions
  57. if DEBUGGING:
  58. assert abs(iprint) <= 3
  59. assert num_constraints >= m_lcon and m_lcon >= 0
  60. assert num_vars >= 1
  61. assert maxfun >= num_vars + 2
  62. assert rhobeg >= rhoend and rhoend > 0
  63. assert all(np.isfinite(x))
  64. assert 0 <= eta1 <= eta2 < 1
  65. assert 0 < gamma1 < 1 < gamma2
  66. assert 0 <= ctol
  67. assert 0 <= cweight
  68. assert 0 <= maxhist <= maxfun
  69. assert amat is None or np.shape(amat) == (m_lcon, num_vars)
  70. assert min(MIN_MAXFILT, maxfun) <= maxfilt <= maxfun
  71. #====================#
  72. # Calculation starts #
  73. #====================#
  74. # Initialize SIM, FVAL, CONMAT, and CVAL, together with the history.
  75. # After the initialization, SIM[:, NUM_VARS] holds the vertex of the initial
  76. # simplex with the smallest function value (regardless of the constraint
  77. # violation), and SIM[:, :NUM_VARS] holds the displacements from the other vertices
  78. # to SIM[:, NUM_VARS]. FVAL, CONMAT, and CVAL hold the function values, constraint
  79. # values, and constraint violations on the vertices in the order corresponding to
  80. # SIM.
  81. evaluated, conmat, cval, sim, simi, fval, nf, subinfo = initxfc(calcfc, iprint,
  82. maxfun, constr, amat, bvec, ctol, f, ftarget, rhobeg, x,
  83. xhist, fhist, chist, conhist, maxhist)
  84. # Initialize the filter, including xfilt, ffilt, confilt, cfilt, and nfilt.
  85. # N.B.: The filter is used only when selecting which iterate to return. It does not
  86. # interfere with the iterations. COBYLA is NOT a filter method but a trust-region
  87. # method based on an L-infinity merit function. Powell's implementation does not
  88. # use a filter to select the iterate, possibly returning a suboptimal iterate.
  89. cfilt = np.zeros(np.minimum(np.maximum(maxfilt, 1), maxfun))
  90. confilt = np.zeros((np.size(constr), np.size(cfilt)))
  91. ffilt = np.zeros(np.size(cfilt))
  92. xfilt = np.zeros((np.size(x), np.size(cfilt)))
  93. nfilt = initfilt(conmat, ctol, cweight, cval, fval, sim, evaluated, cfilt, confilt,
  94. ffilt, xfilt)
  95. # Check whether to return due to abnormal cases that may occur during the initialization.
  96. if subinfo != INFO_DEFAULT:
  97. info = subinfo
  98. # Return the best calculated values of the variables
  99. # N.B: Selectx and findpole choose X by different standards, one cannot replace the other
  100. kopt = selectx(ffilt[:nfilt], cfilt[:nfilt], cweight, ctol)
  101. x = xfilt[:, kopt]
  102. f = ffilt[kopt]
  103. constr = confilt[:, kopt]
  104. cstrv = cfilt[kopt]
  105. # print a return message according to IPRINT.
  106. retmsg(solver, info, iprint, nf, f, x, cstrv, constr)
  107. # Postconditions
  108. if DEBUGGING:
  109. assert nf <= maxfun
  110. assert np.size(x) == num_vars and not any(np.isnan(x))
  111. assert not (np.isnan(f) or np.isposinf(f))
  112. # assert np.size(xhist, 0) == n and np.size(xhist, 1) == maxxhist
  113. # assert not any(np.isnan(xhist(:, 1:min(nf, maxxhist))))
  114. # The last calculated X can be Inf (finite + finite can be Inf numerically).
  115. # assert np.size(fhist) == maxfhist
  116. # assert not any(np.isnan(fhist(1:min(nf, maxfhist))) or np.isposinf(fhist(1:min(nf, maxfhist))))
  117. # assert np.size(conhist, 0) == m and np.size(conhist, 1) == maxconhist
  118. # assert not any(np.isnan(conhist(:, 1:min(nf, maxconhist))) or np.isneginf(conhist(:, 1:min(nf, maxconhist))))
  119. # assert np.size(chist) == maxchist
  120. # assert not any(chist(1:min(nf, maxchist)) < 0 or np.isnan(chist(1:min(nf, maxchist))) or np.isposinf(chist(1:min(nf, maxchist))))
  121. # nhist = minval([nf, maxfhist, maxchist])
  122. # assert not any(isbetter(fhist(1:nhist), chist(1:nhist), f, cstrv, ctol))
  123. return x, f, constr, cstrv, nf, xhist, fhist, chist, conhist, info
  124. # Set some more initial values.
  125. # We must initialize shortd, ratio, and jdrop_tr because these get defined on
  126. # branches that are not guaranteed to be executed, but their values are used later.
  127. # Our initialization of CPEN differs from Powell's in two ways. First, we use the
  128. # ratio defined in (13) of Powell's COBYLA paper to initialize CPEN. Second, we
  129. # impose CPEN >= CPENMIN > 0. Powell's code simply initializes CPEN to 0.
  130. rho = rhobeg
  131. delta = rhobeg
  132. cpen = np.maximum(cpenmin, np.minimum(1.0E3, fcratio(conmat, fval))) # Powell's code: CPEN = ZERO
  133. shortd = False
  134. ratio = -1
  135. jdrop_tr = 0
  136. # If DELTA <= GAMMA3*RHO after an update, we set DELTA to RHO. GAMMA3 must be less
  137. # than GAMMA2. The reason is as follows. Imagine a very successful step with
  138. # DNORM = the un-updated DELTA = RHO. The TRRAD will update DELTA to GAMMA2*RHO.
  139. # If GAMMA3 >= GAMMA2, then DELTA will be reset to RHO, which is not reasonable as
  140. # D is very successful. See paragraph two of Sec 5.2.5 in T. M. Ragonneau's thesis:
  141. # "Model-Based Derivative-Free Optimization Methods and Software." According to
  142. # test on 20230613, for COBYLA, this Powellful updating scheme of DELTA works
  143. # slightly better than setting directly DELTA = max(NEW_DELTA, RHO).
  144. gamma3 = np.maximum(1, np.minimum(0.75 * gamma2, 1.5))
  145. # MAXTR is the maximal number of trust region iterations. Each trust-region
  146. # iteration takes 1 or 2 function evaluations unless the trust-region step is short
  147. # or the trust-region subproblem solver fails but the geometry step is not invoked.
  148. # Thus the following MAXTR is unlikely to be reached.
  149. maxtr = 10 * maxfun
  150. info = MAXTR_REACHED
  151. # Begin the iterative procedure
  152. # After solving a trust-region subproblem, we use three boolean variables to
  153. # control the workflow.
  154. # SHORTD - Is the trust-region trial step too short to invoke # a function
  155. # evaluation?
  156. # IMPROVE_GEO - Will we improve the model after the trust-region iteration? If yes,
  157. # a geometry step will be taken, corresponding to the "Branch (Delta)"
  158. # in the COBYLA paper.
  159. # REDUCE_RHO - Will we reduce rho after the trust-region iteration?
  160. # COBYLA never sets IMPROVE_GEO and REDUCE_RHO to True simultaneously.
  161. for tr in range(maxtr):
  162. # Increase the penalty parameter CPEN, if needed, so that
  163. # PREREM = PREREF + CPEN * PREREC > 0.
  164. # This is the first (out of two) update of CPEN, where CPEN increases or
  165. # remains the same.
  166. # N.B.: CPEN and the merit function PHI = FVAL + CPEN*CVAL are used in three
  167. # places only.
  168. # 1. In FINDPOLE/UPDATEPOLE, deciding the optimal vertex of the current simplex.
  169. # 2. After the trust-region trial step, calculating the reduction ratio.
  170. # 3. In GEOSTEP, deciding the direction of the geometry step.
  171. # They do not appear explicitly in the trust-region subproblem, though the
  172. # trust-region center (i.e. the current optimal vertex) is defined by them.
  173. cpen = getcpen(amat, bvec, conmat, cpen, cval, delta, fval, rho, sim, simi)
  174. # Switch the best vertex of the current simplex to SIM[:, NUM_VARS].
  175. conmat, cval, fval, sim, simi, subinfo = updatepole(cpen, conmat, cval, fval,
  176. sim, simi)
  177. # Check whether to exit due to damaging rounding in UPDATEPOLE.
  178. if subinfo == DAMAGING_ROUNDING:
  179. info = subinfo
  180. break # Better action to take? Geometry step, or simply continue?
  181. # Does the interpolation set have adequate geometry? It affects improve_geo and
  182. # reduce_rho.
  183. adequate_geo = all(primasum(primapow2(sim[:, :num_vars]), axis=0) <= 4 * primapow2(delta))
  184. # Calculate the linear approximations to the objective and constraint functions.
  185. # N.B.: TRSTLP accesses A mostly by columns, so it is more reasonable to save A
  186. # instead of A^T.
  187. # Zaikun 2023108: According to a test on 2023108, calculating G and
  188. # A(:, M_LCON+1:M) by solving the linear systems SIM^T*G = FVAL(1:N)-FVAL(N+1)
  189. # and SIM^T*A = CONMAT(:, 1:N)-CONMAT(:, N+1) does not seem to improve or worsen
  190. # the performance of COBYLA in terms of the number of function evaluations. The
  191. # system was solved by SOLVE in LINALG_MOD based on a QR factorization of SIM
  192. # (not necessarily a good algorithm). No preconditioning or scaling was used.
  193. g = matprod((fval[:num_vars] - fval[num_vars]), simi)
  194. A[:, :m_lcon] = amat.T if amat is not None else amat
  195. A[:, m_lcon:] = matprod((conmat[m_lcon:, :num_vars] -
  196. np.tile(conmat[m_lcon:, num_vars], (num_vars, 1)).T), simi).T
  197. # Calculate the trust-region trial step d. Note that d does NOT depend on cpen.
  198. d = trstlp(A, -conmat[:, num_vars], delta, g)
  199. dnorm = min(delta, norm(d))
  200. # Is the trust-region trial step short? N.B.: we compare DNORM with RHO, not
  201. # DELTA. Powell's code especially defines SHORTD by SHORTD = (DNORM < 0.5 *
  202. # RHO). In our tests 1/10 seems to work better than 1/2 or 1/4, especially for
  203. # linearly constrained problems. Note that LINCOA has a slightly more
  204. # sophisticated way of defining SHORTD, taking into account whether D causes a
  205. # change to the active set. Should we try the same here?
  206. shortd = (dnorm <= 0.1 * rho)
  207. # Predict the change to F (PREREF) and to the constraint violation (PREREC) due
  208. # to D. We have the following in precise arithmetic. They may fail to hold due
  209. # to rounding errors.
  210. # 1. B[:NUM_CONSTRAINTS] = -CONMAT[:, NUM_VARS] and hence
  211. # np.max(np.append(B[:NUM_CONSTRAINTS] - D@A[:, :NUM_CONSTRAINTS], 0)) is the
  212. # L-infinity violation of the linearized constraints corresponding to D. When
  213. # D=0, the violation is np.max(np.append(B[:NUM_CONSTRAINTS], 0)) =
  214. # CVAL[NUM_VARS]. PREREC is the reduction of this violation achieved by D,
  215. # which is nonnegative in theory; PREREC = 0 iff B[:NUM_CONSTRAINTS] <= 0, i.e.
  216. # the trust-region center satisfies the linearized constraints.
  217. # 2. PREREF may be negative or 0, but it is positive when PREREC = 0 and shortd
  218. # is False
  219. # 3. Due to 2, in theory, max(PREREC, PREREF) > 0 if shortd is False.
  220. preref = -inprod(d, g) # Can be negative
  221. prerec = cval[num_vars] - np.max(np.append(0, conmat[:, num_vars] + matprod(d, A)))
  222. # Evaluate PREREM, which is the predicted reduction in the merit function.
  223. # In theory, PREREM >= 0 and it is 0 iff CPEN = 0 = PREREF. This may not be true
  224. # numerically.
  225. prerem = preref + cpen * prerec
  226. trfail = not (prerem > 1.0E-6 * min(cpen, 1) * rho)
  227. if shortd or trfail:
  228. # Reduce DELTA if D is short or if D fails to render PREREM > 0. The latter
  229. # can only happen due to rounding errors. This seems quite important for
  230. # performance
  231. delta *= 0.1
  232. if delta <= gamma3 * rho:
  233. delta = rho # set delta to rho when it is close to or below
  234. else:
  235. # Calculate the next value of the objective and constraint functions.
  236. # If X is close to one of the points in the interpolation set, then we do
  237. # not evaluate the objective and constraints at X, assuming them to have
  238. # the values at the closest point.
  239. # N.B.: If this happens, do NOT include X into the filter, as F and CONSTR
  240. # are inaccurate.
  241. x = sim[:, num_vars] + d
  242. distsq[num_vars] = primasum(primapow2(x - sim[:, num_vars]))
  243. distsq[:num_vars] = primasum(primapow2(x.reshape(num_vars, 1) -
  244. (sim[:, num_vars].reshape(num_vars, 1) + sim[:, :num_vars])), axis=0)
  245. j = np.argmin(distsq)
  246. if distsq[j] <= primapow2(1e-4 * rhoend):
  247. f = fval[j]
  248. constr = conmat[:, j]
  249. cstrv = cval[j]
  250. else:
  251. # Evaluate the objective and constraints at X, taking care of possible
  252. # inf/nan values.
  253. f, constr = evaluate(calcfc, x, m_nlcon, amat, bvec)
  254. cstrv = np.max(np.append(0, constr))
  255. nf += 1
  256. # Save X, F, CONSTR, CSTRV into the history.
  257. savehist(maxhist, x, xhist, f, fhist, cstrv, chist, constr, conhist)
  258. # Save X, F, CONSTR, CSTRV into the filter.
  259. nfilt, cfilt, ffilt, xfilt, confilt = savefilt(cstrv, ctol, cweight, f,
  260. x, nfilt, cfilt, ffilt,
  261. xfilt, constr, confilt)
  262. # Print a message about the function/constraint evaluation according to
  263. # iprint
  264. fmsg(solver, 'Trust region', iprint, nf, delta, f, x, cstrv, constr)
  265. # Evaluate ACTREM, which is the actual reduction in the merit function
  266. actrem = (fval[num_vars] + cpen * cval[num_vars]) - (f + cpen * cstrv)
  267. # Calculate the reduction ratio by redrat, which hands inf/nan carefully
  268. ratio = redrat(actrem, prerem, eta1)
  269. # Update DELTA. After this, DELTA < DNORM may hold.
  270. # N.B.:
  271. # 1. Powell's code uses RHO as the trust-region radius and updates it as
  272. # follows.
  273. # Reduce RHO to GAMMA1*RHO if ADEQUATE_GEO is TRUE and either SHORTD is
  274. # TRUE or RATIO < ETA1, and then revise RHO to RHOEND if its new value is
  275. # not more than GAMMA3*RHOEND; RHO remains unchanged in all other cases;
  276. # in particular, RHO is never increased.
  277. # 2. Our implementation uses DELTA as the trust-region radius, while using
  278. # RHO as a lower bound for DELTA. DELTA is updated in a way that is
  279. # typical for trust-region methods, and it is revised to RHO if its new
  280. # value is not more than GAMMA3*RHO. RHO reflects the current resolution
  281. # of the algorithm; its update is essentially the same as the update of
  282. # RHO in Powell's code (see the definition of REDUCE_RHO below). Our
  283. # implementation aligns with UOBYQA/NEWUOA/BOBYQA/LINCOA and improves the
  284. # performance of COBYLA.
  285. # 3. The same as Powell's code, we do not reduce RHO unless ADEQUATE_GEO is
  286. # TRUE. This is also how Powell updated RHO in
  287. # UOBYQA/NEWUOA/BOBYQA/LINCOA. What about we also use ADEQUATE_GEO ==
  288. # TRUE as a prerequisite for reducing DELTA? The argument would be that
  289. # the bad (small) value of RATIO may be because of a bad geometry (and
  290. # hence a bad model) rather than an improperly large DELTA, and it might
  291. # be good to try improving the geometry first without reducing DELTA.
  292. # However, according to a test on 20230206, it does not improve the
  293. # performance if we skip the update of DELTA when ADEQUATE_GEO is FALSE
  294. # and RATIO < 0.1. Therefore, we choose to update DELTA without checking
  295. # ADEQUATE_GEO.
  296. delta = trrad(delta, dnorm, eta1, eta2, gamma1, gamma2, ratio)
  297. if delta <= gamma3*rho:
  298. delta = rho # Set delta to rho when it is close to or below.
  299. # Is the newly generated X better than the current best point?
  300. ximproved = actrem > 0 # If ACTREM is NaN, then XIMPROVED should and will be False
  301. # Set JDROP_TR to the index of the vertex to be replaced with X. JDROP_TR = 0 means there
  302. # is no good point to replace, and X will not be included into the simplex; in this case,
  303. # the geometry of the simplex likely needs improvement, which will be handled below.
  304. jdrop_tr = setdrop_tr(ximproved, d, delta, rho, sim, simi)
  305. # Update SIM, SIMI, FVAL, CONMAT, and CVAL so that SIM[:, JDROP_TR] is replaced with D.
  306. # UPDATEXFC does nothing if JDROP_TR is None, as the algorithm decides to discard X.
  307. sim, simi, fval, conmat, cval, subinfo = updatexfc(jdrop_tr, constr, cpen, cstrv, d, f, conmat, cval, fval, sim, simi)
  308. # Check whether to break due to damaging rounding in UPDATEXFC
  309. if subinfo == DAMAGING_ROUNDING:
  310. info = subinfo
  311. break # Better action to take? Geometry step, or a RESCUE as in BOBYQA?
  312. # Check whether to break due to maxfun, ftarget, etc.
  313. subinfo = checkbreak_con(maxfun, nf, cstrv, ctol, f, ftarget, x)
  314. if subinfo != INFO_DEFAULT:
  315. info = subinfo
  316. break
  317. # End of if SHORTD or TRFAIL. The normal trust-region calculation ends.
  318. # Before the next trust-region iteration, we possibly improve the geometry of the simplex or
  319. # reduce RHO according to IMPROVE_GEO and REDUCE_RHO. Now we decide these indicators.
  320. # N.B.: We must ensure that the algorithm does not set IMPROVE_GEO = True at infinitely many
  321. # consecutive iterations without moving SIM[:, NUM_VARS] or reducing RHO. Otherwise, the algorithm
  322. # will get stuck in repetitive invocations of GEOSTEP. This is ensured by the following facts:
  323. # 1. If an iteration sets IMPROVE_GEO to True, it must also reduce DELTA or set DELTA to RHO.
  324. # 2. If SIM[:, NUM_VARS] and RHO remain unchanged, then ADEQUATE_GEO will become True after at
  325. # most NUM_VARS invocations of GEOSTEP.
  326. # BAD_TRSTEP: Is the last trust-region step bad?
  327. bad_trstep = shortd or trfail or ratio <= 0 or jdrop_tr is None
  328. # IMPROVE_GEO: Should we take a geometry step to improve the geometry of the interpolation set?
  329. improve_geo = bad_trstep and not adequate_geo
  330. # REDUCE_RHO: Should we enhance the resolution by reducing rho?
  331. reduce_rho = bad_trstep and adequate_geo and max(delta, dnorm) <= rho
  332. # COBYLA never sets IMPROVE_GEO and REDUCE_RHO to True simultaneously.
  333. # assert not (IMPROVE_GEO and REDUCE_RHO), 'IMPROVE_GEO or REDUCE_RHO are not both TRUE, COBYLA'
  334. # If SHORTD or TRFAIL is True, then either IMPROVE_GEO or REDUCE_RHO is True unless ADEQUATE_GEO
  335. # is True and max(DELTA, DNORM) > RHO.
  336. # assert not (shortd or trfail) or (improve_geo or reduce_rho or (adequate_geo and max(delta, dnorm) > rho)), \
  337. # 'If SHORTD or TRFAIL is TRUE, then either IMPROVE_GEO or REDUCE_RHO is TRUE unless ADEQUATE_GEO is TRUE and MAX(DELTA, DNORM) > RHO'
  338. # Comments on BAD_TRSTEP:
  339. # 1. Powell's definition of BAD_TRSTEP is as follows. The one used above seems to work better,
  340. # especially for linearly constrained problems due to the factor TENTH (= ETA1).
  341. # !bad_trstep = (shortd .or. actrem <= 0 .or. actrem < TENTH * prerem .or. jdrop_tr == 0)
  342. # Besides, Powell did not check PREREM > 0 in BAD_TRSTEP, which is reasonable to do but has
  343. # little impact upon the performance.
  344. # 2. NEWUOA/BOBYQA/LINCOA would define BAD_TRSTEP, IMPROVE_GEO, and REDUCE_RHO as follows. Two
  345. # different thresholds are used in BAD_TRSTEP. It outperforms Powell's version.
  346. # !bad_trstep = (shortd .or. trfail .or. ratio <= eta1 .or. jdrop_tr == 0)
  347. # !improve_geo = bad_trstep .and. .not. adequate_geo
  348. # !bad_trstep = (shortd .or. trfail .or. ratio <= 0 .or. jdrop_tr == 0)
  349. # !reduce_rho = bad_trstep .and. adequate_geo .and. max(delta, dnorm) <= rho
  350. # 3. Theoretically, JDROP_TR > 0 when ACTREM > 0 (guaranteed by RATIO > 0). However, in Powell's
  351. # implementation, JDROP_TR may be 0 even RATIO > 0 due to NaN. The modernized code has rectified
  352. # this in the function SETDROP_TR. After this rectification, we can indeed simplify the
  353. # definition of BAD_TRSTEP by removing the condition JDROP_TR == 0. We retain it for robustness.
  354. # Comments on REDUCE_RHO:
  355. # When SHORTD is TRUE, UOBYQA/NEWUOA/BOBYQA/LINCOA all set REDUCE_RHO to TRUE if the recent
  356. # models are sufficiently accurate according to certain criteria. See the paragraph around (37)
  357. # in the UOBYQA paper and the discussions about Box 14 in the NEWUOA paper. This strategy is
  358. # crucial for the performance of the solvers. However, as of 20221111, we have not managed to
  359. # make it work in COBYLA. As in NEWUOA, we recorded the errors of the recent models, and set
  360. # REDUCE_RHO to true if they are small (e.g., ALL(ABS(MODERR_REC) <= 0.1 * MAXVAL(ABS(A))*RHO) or
  361. # ALL(ABS(MODERR_REC) <= RHO**2)) when SHORTD is TRUE. It made little impact on the performance.
  362. # Since COBYLA never sets IMPROVE_GEO and REDUCE_RHO to TRUE simultaneously, the following
  363. # two blocks are exchangeable: IF (IMPROVE_GEO) ... END IF and IF (REDUCE_RHO) ... END IF.
  364. # Improve the geometry of the simplex by removing a point and adding a new one.
  365. # If the current interpolation set has acceptable geometry, then we skip the geometry step.
  366. # The code has a small difference from Powell's original code here: If the current geometry
  367. # is acceptable, then we will continue with a new trust-region iteration; however, at the
  368. # beginning of the iteration, CPEN may be updated, which may alter the pole point SIM(:, N+1)
  369. # by UPDATEPOLE; the quality of the interpolation point depends on SIM(:, N + 1), meaning
  370. # that the same interpolation set may have good or bad geometry with respect to different
  371. # "poles"; if the geometry turns out bad with the new pole, the original COBYLA code will
  372. # take a geometry step, but our code here will NOT do it but continue to take a trust-region
  373. # step. The argument is this: even if the geometry step is not skipped in the first place, the
  374. # geometry may turn out bad again after the pole is altered due to an update to CPEN; should
  375. # we take another geometry step in that case? If no, why should we do it here? Indeed, this
  376. # distinction makes no practical difference for CUTEst problems with at most 100 variables
  377. # and 5000 constraints, while the algorithm framework is simplified.
  378. if improve_geo and not all(primasum(primapow2(sim[:, :num_vars]), axis=0) <= 4 * primapow2(delta)):
  379. # Before the geometry step, updatepole has been called either implicitly by UPDATEXFC or
  380. # explicitly after CPEN is updated, so that SIM[:, :NUM_VARS] is the optimal vertex.
  381. # Decide a vertex to drop from the simplex. It will be replaced with SIM[:, NUM_VARS] + D to
  382. # improve the geometry of the simplex.
  383. # N.B.:
  384. # 1. COBYLA never sets JDROP_GEO = num_vars.
  385. # 2. The following JDROP_GEO comes from UOBYQA/NEWUOA/BOBYQA/LINCOA.
  386. # 3. In Powell's original algorithm, the geometry of the simplex is considered acceptable
  387. # iff the distance between any vertex and the pole is at most 2.1*DELTA, and the distance
  388. # between any vertex and the opposite face of the simplex is at least 0.25*DELTA, as
  389. # specified in (14) of the COBYLA paper. Correspondingly, JDROP_GEO is set to the index of
  390. # the vertex with the largest distance to the pole provided that the distance is larger than
  391. # 2.1*DELTA, or the vertex with the smallest distance to the opposite face of the simplex,
  392. # in which case the distance must be less than 0.25*DELTA, as the current simplex does not
  393. # have acceptable geometry (see (15)--(16) of the COBYLA paper). Once JDROP_GEO is set, the
  394. # algorithm replaces SIM(:, JDROP_GEO) with D specified in (17) of the COBYLA paper, which
  395. # is orthogonal to the face opposite to SIM(:, JDROP_GEO) and has a length of 0.5*DELTA,
  396. # intending to improve the geometry of the simplex as per (14).
  397. # 4. Powell's geometry-improving procedure outlined above has an intrinsic flaw: it may lead
  398. # to infinite cycling, as was observed in a test on 20240320. In this test, the geometry-
  399. # improving point introduced in the previous iteration was replaced with the trust-region
  400. # trial point in the current iteration, which was then replaced with the same geometry-
  401. # improving point in the next iteration, and so on. In this process, the simplex alternated
  402. # between two configurations, neither of which had acceptable geometry. Thus RHO was never
  403. # reduced, leading to infinite cycling. (N.B.: Our implementation uses DELTA as the trust
  404. # region radius, with RHO being its lower bound. When the infinite cycling occurred in this
  405. # test, DELTA = RHO and it could not be reduced due to the requirement that DELTA >= RHO.)
  406. jdrop_geo = np.argmax(primasum(primapow2(sim[:, :num_vars]), axis=0), axis=0)
  407. # Calculate the geometry step D.
  408. delbar = delta/2
  409. d = geostep(jdrop_geo, amat, bvec, conmat, cpen, cval, delbar, fval, simi)
  410. # Calculate the next value of the objective and constraint functions.
  411. # If X is close to one of the points in the interpolation set, then we do not evaluate the
  412. # objective and constraints at X, assuming them to have the values at the closest point.
  413. # N.B.:
  414. # 1. If this happens, do NOT include X into the filter, as F and CONSTR are inaccurate.
  415. # 2. In precise arithmetic, the geometry improving step ensures that the distance between X
  416. # and any interpolation point is at least DELBAR, yet X may be close to them due to
  417. # rounding. In an experiment with single precision on 20240317, X = SIM(:, N+1) occurred.
  418. x = sim[:, num_vars] + d
  419. distsq[num_vars] = primasum(primapow2(x - sim[:, num_vars]))
  420. distsq[:num_vars] = primasum(primapow2(x.reshape(num_vars, 1) -
  421. (sim[:, num_vars].reshape(num_vars, 1) + sim[:, :num_vars])), axis=0)
  422. j = np.argmin(distsq)
  423. if distsq[j] <= primapow2(1e-4 * rhoend):
  424. f = fval[j]
  425. constr = conmat[:, j]
  426. cstrv = cval[j]
  427. else:
  428. # Evaluate the objective and constraints at X, taking care of possible
  429. # inf/nan values.
  430. f, constr = evaluate(calcfc, x, m_nlcon, amat, bvec)
  431. cstrv = np.max(np.append(0, constr))
  432. nf += 1
  433. # Save X, F, CONSTR, CSTRV into the history.
  434. savehist(maxhist, x, xhist, f, fhist, cstrv, chist, constr, conhist)
  435. # Save X, F, CONSTR, CSTRV into the filter.
  436. nfilt, cfilt, ffilt, xfilt, confilt = savefilt(cstrv, ctol, cweight, f,
  437. x, nfilt, cfilt, ffilt,
  438. xfilt, constr, confilt)
  439. # Print a message about the function/constraint evaluation according to iprint
  440. fmsg(solver, 'Geometry', iprint, nf, delta, f, x, cstrv, constr)
  441. # Update SIM, SIMI, FVAL, CONMAT, and CVAL so that SIM(:, JDROP_GEO) is replaced with D.
  442. sim, simi, fval, conmat, cval, subinfo = updatexfc(jdrop_geo, constr, cpen, cstrv, d, f, conmat, cval, fval, sim, simi)
  443. # Check whether to break due to damaging rounding in UPDATEXFC
  444. if subinfo == DAMAGING_ROUNDING:
  445. info = subinfo
  446. break # Better action to take? Geometry step, or simply continue?
  447. # Check whether to break due to maxfun, ftarget, etc.
  448. subinfo = checkbreak_con(maxfun, nf, cstrv, ctol, f, ftarget, x)
  449. if subinfo != INFO_DEFAULT:
  450. info = subinfo
  451. break
  452. # end of if improve_geo. The procedure of improving the geometry ends.
  453. # The calculations with the current RHO are complete. Enhance the resolution of the algorithm
  454. # by reducing RHO; update DELTA and CPEN at the same time.
  455. if reduce_rho:
  456. if rho <= rhoend:
  457. info = SMALL_TR_RADIUS
  458. break
  459. delta = max(0.5 * rho, redrho(rho, rhoend))
  460. rho = redrho(rho, rhoend)
  461. # THe second (out of two) updates of CPEN, where CPEN decreases or remains the same.
  462. # Powell's code: cpen = min(cpen, fcratio(fval, conmat)), which may set CPEN to 0.
  463. cpen = np.maximum(cpenmin, np.minimum(cpen, fcratio(conmat, fval)))
  464. # Print a message about the reduction of rho according to iprint
  465. rhomsg(solver, iprint, nf, fval[num_vars], rho, sim[:, num_vars], cval[num_vars], conmat[:, num_vars], cpen)
  466. conmat, cval, fval, sim, simi, subinfo = updatepole(cpen, conmat, cval, fval, sim, simi)
  467. # Check whether to break due to damaging rounding detected in updatepole
  468. if subinfo == DAMAGING_ROUNDING:
  469. info = subinfo
  470. break # Better action to take? Geometry step, or simply continue?
  471. # End of if reduce_rho. The procedure of reducing RHO ends.
  472. # Report the current best value, and check if user asks for early termination.
  473. if callback:
  474. terminate = callback(sim[:, num_vars], fval[num_vars], nf, tr, cval[num_vars], conmat[:, num_vars])
  475. if terminate:
  476. info = CALLBACK_TERMINATE
  477. break
  478. # End of for loop. The iterative procedure ends
  479. # Return from the calculation, after trying the last trust-region step if it has not been tried yet.
  480. # Ensure that D has not been updated after SHORTD == TRUE occurred, or the code below is incorrect.
  481. x = sim[:, num_vars] + d
  482. if (info == SMALL_TR_RADIUS and
  483. shortd and
  484. norm(x - sim[:, num_vars]) > 1.0E-3 * rhoend and
  485. nf < maxfun):
  486. # Zaikun 20230615: UPDATEXFC or UPDATEPOLE is not called since the last trust-region step. Hence
  487. # SIM[:, NUM_VARS] remains unchanged. Otherwise SIM[:, NUM_VARS] + D would not make sense.
  488. f, constr = evaluate(calcfc, x, m_nlcon, amat, bvec)
  489. cstrv = np.max(np.append(0, constr))
  490. nf += 1
  491. savehist(maxhist, x, xhist, f, fhist, cstrv, chist, constr, conhist)
  492. nfilt, cfilt, ffilt, xfilt, confilt = savefilt(cstrv, ctol, cweight, f, x, nfilt, cfilt, ffilt, xfilt, constr, confilt)
  493. # Zaikun 20230512: DELTA has been updated. RHO is only indicative here. TO BE IMPROVED.
  494. fmsg(solver, 'Trust region', iprint, nf, rho, f, x, cstrv, constr)
  495. # Return the best calculated values of the variables
  496. # N.B.: SELECTX and FINDPOLE choose X by different standards, one cannot replace the other.
  497. kopt = selectx(ffilt[:nfilt], cfilt[:nfilt], max(cpen, cweight), ctol)
  498. x = xfilt[:, kopt]
  499. f = ffilt[kopt]
  500. constr = confilt[:, kopt]
  501. cstrv = cfilt[kopt]
  502. # Print a return message according to IPRINT.
  503. retmsg(solver, info, iprint, nf, f, x, cstrv, constr)
  504. return x, f, constr, cstrv, nf, xhist, fhist, chist, conhist, info
  505. def getcpen(amat, bvec, conmat, cpen, cval, delta, fval, rho, sim, simi):
  506. '''
  507. This function gets the penalty parameter CPEN so that PREREM = PREREF + CPEN * PREREC > 0.
  508. See the discussions around equation (9) of the COBYLA paper.
  509. '''
  510. # Even after nearly all of the pycutest problems were showing nearly bit for bit
  511. # identical results between Python and the Fortran bindings, HS102 was still off by
  512. # more than machine epsilon. It turned out to be due to the fact that getcpen was
  513. # modifying fval, among other. It just goes to show that even when you're nearly
  514. # perfect, you can still have non trivial bugs.
  515. conmat = conmat.copy()
  516. cval = cval.copy()
  517. fval = fval.copy()
  518. sim = sim.copy()
  519. simi = simi.copy()
  520. # Intermediate variables
  521. A = np.zeros((np.size(sim, 0), np.size(conmat, 0)))
  522. itol = 1
  523. # Sizes
  524. m_lcon = np.size(bvec) if bvec is not None else 0
  525. num_constraints = np.size(conmat, 0)
  526. num_vars = np.size(sim, 0)
  527. # Preconditions
  528. if DEBUGGING:
  529. assert num_constraints >= 0
  530. assert num_vars >= 1
  531. assert cpen > 0
  532. assert np.size(conmat, 0) == num_constraints and np.size(conmat, 1) == num_vars + 1
  533. assert not (np.isnan(conmat) | np.isneginf(conmat)).any()
  534. assert np.size(cval) == num_vars + 1 and \
  535. not any(cval < 0 | np.isnan(cval) | np.isposinf(cval))
  536. assert np.size(fval) == num_vars + 1 and not any(np.isnan(fval) | np.isposinf(fval))
  537. assert np.size(sim, 0) == num_vars and np.size(sim, 1) == num_vars + 1
  538. assert np.isfinite(sim).all()
  539. assert all(np.max(abs(sim[:, :num_vars]), axis=0) > 0)
  540. assert np.size(simi, 0) == num_vars and np.size(simi, 1) == num_vars
  541. assert np.isfinite(simi).all()
  542. assert isinv(sim[:, :num_vars], simi, itol)
  543. assert delta >= rho and rho > 0
  544. #====================#
  545. # Calculation starts #
  546. #====================#
  547. # Initialize INFO which is needed in the postconditions
  548. info = INFO_DEFAULT
  549. # Increase CPEN if necessary to ensure PREREM > 0. Branch back for the next loop
  550. # if this change alters the optimal vertex of the current simplex.
  551. # Note the following:
  552. # 1. In each loop, CPEN is changed only if PREREC > 0 > PREREF, in which case
  553. # PREREM is guaranteed positive after the update. Note that PREREC >= 0 and
  554. # max(PREREC, PREREF) > 0 in theory. If this holds numerically as well then CPEN
  555. # is not changed only if PREREC = 0 or PREREF >= 0, in which case PREREM is
  556. # currently positive, explaining why CPEN needs no update.
  557. # 2. Even without an upper bound for the loop counter, the loop can occur at most
  558. # NUM_VARS+1 times. This is because the update of CPEN does not decrease CPEN,
  559. # and hence it can make vertex J (J <= NUM_VARS) become the new optimal vertex
  560. # only if CVAL[J] is less than CVAL[NUM_VARS], which can happen at most NUM_VARS
  561. # times. See the paragraph below (9) in the COBYLA paper. After the "correct"
  562. # optimal vertex is found, one more loop is needed to calculate CPEN, and hence
  563. # the loop can occur at most NUM_VARS+1 times.
  564. for iter in range(num_vars + 1):
  565. # Switch the best vertex of the current simplex to SIM[:, NUM_VARS]
  566. conmat, cval, fval, sim, simi, info = updatepole(cpen, conmat, cval, fval, sim,
  567. simi)
  568. # Check whether to exit due to damaging rounding in UPDATEPOLE
  569. if info == DAMAGING_ROUNDING:
  570. break
  571. # Calculate the linear approximations to the objective and constraint functions.
  572. g = matprod(fval[:num_vars] - fval[num_vars], simi)
  573. A[:, :m_lcon] = amat.T if amat is not None else amat
  574. A[:, m_lcon:] = matprod((conmat[m_lcon:, :num_vars] -
  575. np.tile(conmat[m_lcon:, num_vars], (num_vars, 1)).T), simi).T
  576. # Calculate the trust-region trial step D. Note that D does NOT depend on CPEN.
  577. d = trstlp(A, -conmat[:, num_vars], delta, g)
  578. # Predict the change to F (PREREF) and to the constraint violation (PREREC) due
  579. # to D.
  580. preref = -inprod(d, g) # Can be negative
  581. prerec = cval[num_vars] - np.max(np.append(0, conmat[:, num_vars] + matprod(d, A)))
  582. # PREREC <= 0 or PREREF >=0 or either is NaN
  583. if not (prerec > 0 and preref < 0):
  584. break
  585. # Powell's code defines BARMU = -PREREF / PREREC, and CPEN is increased to
  586. # 2*BARMU if and only if it is currently less than 1.5*BARMU, a very
  587. # "Powellful" scheme. In our implementation, however, we set CPEN directly to
  588. # the maximum between its current value and 2*BARMU while handling possible
  589. # overflow. The simplifies the scheme without worsening the performance of
  590. # COBYLA.
  591. cpen = max(cpen, min(-2 * preref / prerec, REALMAX))
  592. if findpole(cpen, cval, fval) == num_vars:
  593. break
  594. #==================#
  595. # Calculation ends #
  596. #==================#
  597. # Postconditions
  598. if DEBUGGING:
  599. assert cpen >= cpen and cpen > 0
  600. assert preref + cpen * prerec > 0 or info == DAMAGING_ROUNDING or \
  601. not (prerec >= 0 and np.maximum(prerec, preref) > 0) or not np.isfinite(preref)
  602. return cpen
  603. def fcratio(conmat, fval):
  604. '''
  605. This function calculates the ratio between the "typical change" of F and that of CONSTR.
  606. See equations (12)-(13) in Section 3 of the COBYLA paper for the definition of the ratio.
  607. '''
  608. # Preconditions
  609. if DEBUGGING:
  610. assert np.size(fval) >= 1
  611. assert np.size(conmat, 1) == np.size(fval)
  612. #====================#
  613. # Calculation starts #
  614. #====================#
  615. cmin = np.min(-conmat, axis=1)
  616. cmax = np.max(-conmat, axis=1)
  617. fmin = min(fval)
  618. fmax = max(fval)
  619. if any(cmin < 0.5 * cmax) and fmin < fmax:
  620. denom = np.min(np.maximum(cmax, 0) - cmin, where=cmin < 0.5 * cmax, initial=np.inf)
  621. # Powell mentioned the following alternative in section 4 of his COBYLA paper. According to a test
  622. # on 20230610, it does not make much difference to the performance.
  623. # denom = np.max(max(*cmax, 0) - cmin, mask=(cmin < 0.5 * cmax))
  624. r = (fmax - fmin) / denom
  625. else:
  626. r = 0
  627. #==================#
  628. # Calculation ends #
  629. #==================#
  630. # Postconditions
  631. if DEBUGGING:
  632. assert r >= 0
  633. return r