optim.py 44 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203
  1. import inspect
  2. import numpy as np
  3. from scipy.linalg import qr
  4. from ..utils import get_arrays_tol
  5. TINY = np.finfo(float).tiny
  6. EPS = np.finfo(float).eps
  7. def tangential_byrd_omojokun(grad, hess_prod, xl, xu, delta, debug, **kwargs):
  8. r"""
  9. Minimize approximately a quadratic function subject to bound constraints in
  10. a trust region.
  11. This function solves approximately
  12. .. math::
  13. \min_{s \in \mathbb{R}^n} \quad g^{\mathsf{T}} s + \frac{1}{2}
  14. s^{\mathsf{T}} H s \quad \text{s.t.} \quad
  15. \left\{ \begin{array}{l}
  16. l \le s \le u\\
  17. \lVert s \rVert \le \Delta,
  18. \end{array} \right.
  19. using an active-set variation of the truncated conjugate gradient method.
  20. Parameters
  21. ----------
  22. grad : `numpy.ndarray`, shape (n,)
  23. Gradient :math:`g` as shown above.
  24. hess_prod : callable
  25. Product of the Hessian matrix :math:`H` with any vector.
  26. ``hess_prod(s) -> `numpy.ndarray`, shape (n,)``
  27. returns the product :math:`H s`.
  28. xl : `numpy.ndarray`, shape (n,)
  29. Lower bounds :math:`l` as shown above.
  30. xu : `numpy.ndarray`, shape (n,)
  31. Upper bounds :math:`u` as shown above.
  32. delta : float
  33. Trust-region radius :math:`\Delta` as shown above.
  34. debug : bool
  35. Whether to make debugging tests during the execution.
  36. Returns
  37. -------
  38. `numpy.ndarray`, shape (n,)
  39. Approximate solution :math:`s`.
  40. Other Parameters
  41. ----------------
  42. improve_tcg : bool, optional
  43. If True, a solution generated by the truncated conjugate gradient
  44. method that is on the boundary of the trust region is improved by
  45. moving around the trust-region boundary on the two-dimensional space
  46. spanned by the solution and the gradient of the quadratic function at
  47. the solution (default is True).
  48. Notes
  49. -----
  50. This function implements Algorithm 6.2 of [1]_. It is assumed that the
  51. origin is feasible with respect to the bound constraints and that `delta`
  52. is finite and positive.
  53. References
  54. ----------
  55. .. [1] T. M. Ragonneau. *Model-Based Derivative-Free Optimization Methods
  56. and Software*. PhD thesis, Department of Applied Mathematics, The Hong
  57. Kong Polytechnic University, Hong Kong, China, 2022. URL:
  58. https://theses.lib.polyu.edu.hk/handle/200/12294.
  59. """
  60. if debug:
  61. assert isinstance(grad, np.ndarray) and grad.ndim == 1
  62. assert inspect.signature(hess_prod).bind(grad)
  63. assert isinstance(xl, np.ndarray) and xl.shape == grad.shape
  64. assert isinstance(xu, np.ndarray) and xu.shape == grad.shape
  65. assert isinstance(delta, float)
  66. assert isinstance(debug, bool)
  67. tol = get_arrays_tol(xl, xu)
  68. assert np.all(xl <= tol)
  69. assert np.all(xu >= -tol)
  70. assert np.isfinite(delta) and delta > 0.0
  71. xl = np.minimum(xl, 0.0)
  72. xu = np.maximum(xu, 0.0)
  73. # Copy the arrays that may be modified by the code below.
  74. n = grad.size
  75. grad = np.copy(grad)
  76. grad_orig = np.copy(grad)
  77. # Calculate the initial active set.
  78. free_bd = ((xl < 0.0) | (grad < 0.0)) & ((xu > 0.0) | (grad > 0.0))
  79. # Set the initial iterate and the initial search direction.
  80. step = np.zeros_like(grad)
  81. sd = np.zeros_like(step)
  82. sd[free_bd] = -grad[free_bd]
  83. k = 0
  84. reduct = 0.0
  85. boundary_reached = False
  86. while k < np.count_nonzero(free_bd):
  87. # Stop the computations if sd is not a descent direction.
  88. grad_sd = grad @ sd
  89. if grad_sd >= -10.0 * EPS * n * max(1.0, np.linalg.norm(grad)):
  90. break
  91. # Set alpha_tr to the step size for the trust-region constraint.
  92. try:
  93. alpha_tr = _alpha_tr(step, sd, delta)
  94. except ZeroDivisionError:
  95. break
  96. # Stop the computations if a step along sd is expected to give a
  97. # relatively small reduction in the objective function.
  98. if -alpha_tr * grad_sd <= 1e-8 * reduct:
  99. break
  100. # Set alpha_quad to the step size for the minimization problem.
  101. hess_sd = hess_prod(sd)
  102. curv_sd = sd @ hess_sd
  103. if curv_sd > TINY * abs(grad_sd):
  104. alpha_quad = max(-grad_sd / curv_sd, 0.0)
  105. else:
  106. alpha_quad = np.inf
  107. # Stop the computations if the reduction in the objective function
  108. # provided by an unconstrained step is small.
  109. alpha = min(alpha_tr, alpha_quad)
  110. if -alpha * (grad_sd + 0.5 * alpha * curv_sd) <= 1e-8 * reduct:
  111. break
  112. # Set alpha_bd to the step size for the bound constraints.
  113. i_xl = (xl > -np.inf) & (sd < -TINY * np.abs(xl - step))
  114. i_xu = (xu < np.inf) & (sd > TINY * np.abs(xu - step))
  115. all_alpha_xl = np.full_like(step, np.inf)
  116. all_alpha_xu = np.full_like(step, np.inf)
  117. all_alpha_xl[i_xl] = np.maximum(
  118. (xl[i_xl] - step[i_xl]) / sd[i_xl],
  119. 0.0,
  120. )
  121. all_alpha_xu[i_xu] = np.maximum(
  122. (xu[i_xu] - step[i_xu]) / sd[i_xu],
  123. 0.0,
  124. )
  125. alpha_xl = np.min(all_alpha_xl)
  126. alpha_xu = np.min(all_alpha_xu)
  127. alpha_bd = min(alpha_xl, alpha_xu)
  128. # Update the iterate.
  129. alpha = min(alpha, alpha_bd)
  130. if alpha > 0.0:
  131. step[free_bd] = np.clip(
  132. step[free_bd] + alpha * sd[free_bd],
  133. xl[free_bd],
  134. xu[free_bd],
  135. )
  136. grad += alpha * hess_sd
  137. reduct -= alpha * (grad_sd + 0.5 * alpha * curv_sd)
  138. if alpha < min(alpha_tr, alpha_bd):
  139. # The current iteration is a conjugate gradient iteration. Update
  140. # the search direction so that it is conjugate (with respect to H)
  141. # to all the previous search directions.
  142. beta = (grad[free_bd] @ hess_sd[free_bd]) / curv_sd
  143. sd[free_bd] = beta * sd[free_bd] - grad[free_bd]
  144. sd[~free_bd] = 0.0
  145. k += 1
  146. elif alpha < alpha_tr:
  147. # The iterate is restricted by a bound constraint. Add this bound
  148. # constraint to the active set, and restart the calculations.
  149. if alpha_xl <= alpha:
  150. i_new = np.argmin(all_alpha_xl)
  151. step[i_new] = xl[i_new]
  152. else:
  153. i_new = np.argmin(all_alpha_xu)
  154. step[i_new] = xu[i_new]
  155. free_bd[i_new] = False
  156. sd[free_bd] = -grad[free_bd]
  157. sd[~free_bd] = 0.0
  158. k = 0
  159. else:
  160. # The current iterate is on the trust-region boundary. Add all the
  161. # active bounds to the working set to prepare for the improvement
  162. # of the solution, and stop the iterations.
  163. if alpha_xl <= alpha:
  164. i_new = _argmin(all_alpha_xl)
  165. step[i_new] = xl[i_new]
  166. free_bd[i_new] = False
  167. if alpha_xu <= alpha:
  168. i_new = _argmin(all_alpha_xu)
  169. step[i_new] = xu[i_new]
  170. free_bd[i_new] = False
  171. boundary_reached = True
  172. break
  173. # Attempt to improve the solution on the trust-region boundary.
  174. if kwargs.get("improve_tcg", True) and boundary_reached:
  175. step_base = np.copy(step)
  176. step_comparator = grad_orig @ step_base + 0.5 * step_base @ hess_prod(
  177. step_base
  178. )
  179. while np.count_nonzero(free_bd) > 0:
  180. # Check whether a substantial reduction in the objective function
  181. # is possible, and set the search direction.
  182. step_sq = step[free_bd] @ step[free_bd]
  183. grad_sq = grad[free_bd] @ grad[free_bd]
  184. grad_step = grad[free_bd] @ step[free_bd]
  185. grad_sd = -np.sqrt(max(step_sq * grad_sq - grad_step**2.0, 0.0))
  186. sd[free_bd] = grad_step * step[free_bd] - step_sq * grad[free_bd]
  187. sd[~free_bd] = 0.0
  188. if grad_sd >= -1e-8 * reduct or np.any(
  189. grad_sd >= -TINY * np.abs(sd[free_bd])
  190. ):
  191. break
  192. sd[free_bd] /= -grad_sd
  193. # Calculate an upper bound for the tangent of half the angle theta
  194. # of this alternative iteration. The step will be updated as:
  195. # step = cos(theta) * step + sin(theta) * sd.
  196. temp_xl = np.zeros(n)
  197. temp_xu = np.zeros(n)
  198. temp_xl[free_bd] = (
  199. step[free_bd] ** 2.0 + sd[free_bd] ** 2.0 - xl[free_bd] ** 2.0
  200. )
  201. temp_xu[free_bd] = (
  202. step[free_bd] ** 2.0 + sd[free_bd] ** 2.0 - xu[free_bd] ** 2.0
  203. )
  204. temp_xl[temp_xl > 0.0] = (
  205. np.sqrt(temp_xl[temp_xl > 0.0]) - sd[temp_xl > 0.0]
  206. )
  207. temp_xu[temp_xu > 0.0] = (
  208. np.sqrt(temp_xu[temp_xu > 0.0]) + sd[temp_xu > 0.0]
  209. )
  210. dist_xl = np.maximum(step - xl, 0.0)
  211. dist_xu = np.maximum(xu - step, 0.0)
  212. i_xl = temp_xl > TINY * dist_xl
  213. i_xu = temp_xu > TINY * dist_xu
  214. all_t_xl = np.ones(n)
  215. all_t_xu = np.ones(n)
  216. all_t_xl[i_xl] = np.minimum(
  217. all_t_xl[i_xl],
  218. dist_xl[i_xl] / temp_xl[i_xl],
  219. )
  220. all_t_xu[i_xu] = np.minimum(
  221. all_t_xu[i_xu],
  222. dist_xu[i_xu] / temp_xu[i_xu],
  223. )
  224. t_xl = np.min(all_t_xl)
  225. t_xu = np.min(all_t_xu)
  226. t_bd = min(t_xl, t_xu)
  227. # Calculate some curvature information.
  228. hess_step = hess_prod(step)
  229. hess_sd = hess_prod(sd)
  230. curv_step = step @ hess_step
  231. curv_sd = sd @ hess_sd
  232. curv_step_sd = step @ hess_sd
  233. # For a range of equally spaced values of tan(0.5 * theta),
  234. # calculate the reduction in the objective function that would be
  235. # obtained by accepting the corresponding angle.
  236. n_samples = 20
  237. n_samples = int((n_samples - 3) * t_bd + 3)
  238. t_samples = np.linspace(t_bd / n_samples, t_bd, n_samples)
  239. sin_values = 2.0 * t_samples / (1.0 + t_samples**2.0)
  240. all_reduct = sin_values * (
  241. grad_step * t_samples
  242. - grad_sd
  243. - t_samples * curv_step
  244. + sin_values
  245. * (t_samples * curv_step_sd - 0.5 * (curv_sd - curv_step))
  246. )
  247. if np.all(all_reduct <= 0.0):
  248. # No reduction in the objective function is obtained.
  249. break
  250. # Accept the angle that provides the largest reduction in the
  251. # objective function, and update the iterate.
  252. i_max = np.argmax(all_reduct)
  253. cos_value = (1.0 - t_samples[i_max] ** 2.0) / (
  254. 1.0 + t_samples[i_max] ** 2.0
  255. )
  256. step[free_bd] = (
  257. cos_value * step[free_bd] + sin_values[i_max] * sd[free_bd]
  258. )
  259. grad += (cos_value - 1.0) * hess_step + sin_values[i_max] * hess_sd
  260. reduct += all_reduct[i_max]
  261. # If the above angle is restricted by bound constraints, add them
  262. # to the working set, and restart the alternative iteration.
  263. # Otherwise, the calculations are terminated.
  264. if t_bd < 1.0 and i_max == n_samples - 1:
  265. if t_xl <= t_bd:
  266. i_new = _argmin(all_t_xl)
  267. step[i_new] = xl[i_new]
  268. free_bd[i_new] = False
  269. if t_xu <= t_bd:
  270. i_new = _argmin(all_t_xu)
  271. step[i_new] = xu[i_new]
  272. free_bd[i_new] = False
  273. else:
  274. break
  275. # Ensure that the alternative iteration improves the objective
  276. # function.
  277. if grad_orig @ step + 0.5 * step @ hess_prod(step) > step_comparator:
  278. step = step_base
  279. if debug:
  280. assert np.all(xl <= step)
  281. assert np.all(step <= xu)
  282. assert np.linalg.norm(step) < 1.1 * delta
  283. return step
  284. def constrained_tangential_byrd_omojokun(
  285. grad,
  286. hess_prod,
  287. xl,
  288. xu,
  289. aub,
  290. bub,
  291. aeq,
  292. delta,
  293. debug,
  294. **kwargs,
  295. ):
  296. r"""
  297. Minimize approximately a quadratic function subject to bound and linear
  298. constraints in a trust region.
  299. This function solves approximately
  300. .. math::
  301. \min_{s \in \mathbb{R}^n} \quad g^{\mathsf{T}} s + \frac{1}{2}
  302. s^{\mathsf{T}} H s \quad \text{s.t.} \quad
  303. \left\{ \begin{array}{l}
  304. l \le s \le u,\\
  305. A_{\scriptscriptstyle I} s \le b_{\scriptscriptstyle I},\\
  306. A_{\scriptscriptstyle E} s = 0,\\
  307. \lVert s \rVert \le \Delta,
  308. \end{array} \right.
  309. using an active-set variation of the truncated conjugate gradient method.
  310. Parameters
  311. ----------
  312. grad : `numpy.ndarray`, shape (n,)
  313. Gradient :math:`g` as shown above.
  314. hess_prod : callable
  315. Product of the Hessian matrix :math:`H` with any vector.
  316. ``hess_prod(s) -> `numpy.ndarray`, shape (n,)``
  317. returns the product :math:`H s`.
  318. xl : `numpy.ndarray`, shape (n,)
  319. Lower bounds :math:`l` as shown above.
  320. xu : `numpy.ndarray`, shape (n,)
  321. Upper bounds :math:`u` as shown above.
  322. aub : `numpy.ndarray`, shape (m_linear_ub, n)
  323. Coefficient matrix :math:`A_{\scriptscriptstyle I}` as shown above.
  324. bub : `numpy.ndarray`, shape (m_linear_ub,)
  325. Right-hand side :math:`b_{\scriptscriptstyle I}` as shown above.
  326. aeq : `numpy.ndarray`, shape (m_linear_eq, n)
  327. Coefficient matrix :math:`A_{\scriptscriptstyle E}` as shown above.
  328. delta : float
  329. Trust-region radius :math:`\Delta` as shown above.
  330. debug : bool
  331. Whether to make debugging tests during the execution.
  332. Returns
  333. -------
  334. `numpy.ndarray`, shape (n,)
  335. Approximate solution :math:`s`.
  336. Other Parameters
  337. ----------------
  338. improve_tcg : bool, optional
  339. If True, a solution generated by the truncated conjugate gradient
  340. method that is on the boundary of the trust region is improved by
  341. moving around the trust-region boundary on the two-dimensional space
  342. spanned by the solution and the gradient of the quadratic function at
  343. the solution (default is True).
  344. Notes
  345. -----
  346. This function implements Algorithm 6.3 of [1]_. It is assumed that the
  347. origin is feasible with respect to the bound and linear constraints, and
  348. that `delta` is finite and positive.
  349. References
  350. ----------
  351. .. [1] T. M. Ragonneau. *Model-Based Derivative-Free Optimization Methods
  352. and Software*. PhD thesis, Department of Applied Mathematics, The Hong
  353. Kong Polytechnic University, Hong Kong, China, 2022. URL:
  354. https://theses.lib.polyu.edu.hk/handle/200/12294.
  355. """
  356. if debug:
  357. assert isinstance(grad, np.ndarray) and grad.ndim == 1
  358. assert inspect.signature(hess_prod).bind(grad)
  359. assert isinstance(xl, np.ndarray) and xl.shape == grad.shape
  360. assert isinstance(xu, np.ndarray) and xu.shape == grad.shape
  361. assert (
  362. isinstance(aub, np.ndarray)
  363. and aub.ndim == 2
  364. and aub.shape[1] == grad.size
  365. )
  366. assert (
  367. isinstance(bub, np.ndarray)
  368. and bub.ndim == 1
  369. and bub.size == aub.shape[0]
  370. )
  371. assert (
  372. isinstance(aeq, np.ndarray)
  373. and aeq.ndim == 2
  374. and aeq.shape[1] == grad.size
  375. )
  376. assert isinstance(delta, float)
  377. assert isinstance(debug, bool)
  378. tol = get_arrays_tol(xl, xu)
  379. assert np.all(xl <= tol)
  380. assert np.all(xu >= -tol)
  381. assert np.all(bub >= -tol)
  382. assert np.isfinite(delta) and delta > 0.0
  383. xl = np.minimum(xl, 0.0)
  384. xu = np.maximum(xu, 0.0)
  385. bub = np.maximum(bub, 0.0)
  386. # Copy the arrays that may be modified by the code below.
  387. n = grad.size
  388. grad = np.copy(grad)
  389. grad_orig = np.copy(grad)
  390. # Calculate the initial active set.
  391. free_xl = (xl < 0.0) | (grad < 0.0)
  392. free_xu = (xu > 0.0) | (grad > 0.0)
  393. free_ub = (bub > 0.0) | (aub @ grad > 0.0)
  394. n_act, q = qr_tangential_byrd_omojokun(aub, aeq, free_xl, free_xu, free_ub)
  395. # Set the initial iterate and the initial search direction.
  396. step = np.zeros_like(grad)
  397. sd = -q[:, n_act:] @ (q[:, n_act:].T @ grad)
  398. resid = np.copy(bub)
  399. k = 0
  400. reduct = 0.0
  401. boundary_reached = False
  402. while k < n - n_act:
  403. # Stop the computations if sd is not a descent direction.
  404. grad_sd = grad @ sd
  405. if grad_sd >= -10.0 * EPS * n * max(1.0, np.linalg.norm(grad)):
  406. break
  407. # Set alpha_tr to the step size for the trust-region constraint.
  408. try:
  409. alpha_tr = _alpha_tr(step, sd, delta)
  410. except ZeroDivisionError:
  411. break
  412. # Stop the computations if a step along sd is expected to give a
  413. # relatively small reduction in the objective function.
  414. if -alpha_tr * grad_sd <= 1e-8 * reduct:
  415. break
  416. # Set alpha_quad to the step size for the minimization problem.
  417. hess_sd = hess_prod(sd)
  418. curv_sd = sd @ hess_sd
  419. if curv_sd > TINY * abs(grad_sd):
  420. alpha_quad = max(-grad_sd / curv_sd, 0.0)
  421. else:
  422. alpha_quad = np.inf
  423. # Stop the computations if the reduction in the objective function
  424. # provided by an unconstrained step is small.
  425. alpha = min(alpha_tr, alpha_quad)
  426. if -alpha * (grad_sd + 0.5 * alpha * curv_sd) <= 1e-8 * reduct:
  427. break
  428. # Set alpha_bd to the step size for the bound constraints.
  429. i_xl = free_xl & (xl > -np.inf) & (sd < -TINY * np.abs(xl - step))
  430. i_xu = free_xu & (xu < np.inf) & (sd > TINY * np.abs(xu - step))
  431. all_alpha_xl = np.full_like(step, np.inf)
  432. all_alpha_xu = np.full_like(step, np.inf)
  433. all_alpha_xl[i_xl] = np.maximum(
  434. (xl[i_xl] - step[i_xl]) / sd[i_xl],
  435. 0.0,
  436. )
  437. all_alpha_xu[i_xu] = np.maximum(
  438. (xu[i_xu] - step[i_xu]) / sd[i_xu],
  439. 0.0,
  440. )
  441. alpha_xl = np.min(all_alpha_xl)
  442. alpha_xu = np.min(all_alpha_xu)
  443. alpha_bd = min(alpha_xl, alpha_xu)
  444. # Set alpha_ub to the step size for the linear constraints.
  445. aub_sd = aub @ sd
  446. i_ub = free_ub & (aub_sd > TINY * np.abs(resid))
  447. all_alpha_ub = np.full_like(bub, np.inf)
  448. all_alpha_ub[i_ub] = resid[i_ub] / aub_sd[i_ub]
  449. alpha_ub = np.min(all_alpha_ub, initial=np.inf)
  450. # Update the iterate.
  451. alpha = min(alpha, alpha_bd, alpha_ub)
  452. if alpha > 0.0:
  453. step = np.clip(step + alpha * sd, xl, xu)
  454. grad += alpha * hess_sd
  455. resid = np.maximum(0.0, resid - alpha * aub_sd)
  456. reduct -= alpha * (grad_sd + 0.5 * alpha * curv_sd)
  457. if alpha < min(alpha_tr, alpha_bd, alpha_ub):
  458. # The current iteration is a conjugate gradient iteration. Update
  459. # the search direction so that it is conjugate (with respect to H)
  460. # to all the previous search directions.
  461. grad_proj = q[:, n_act:] @ (q[:, n_act:].T @ grad)
  462. beta = (grad_proj @ hess_sd) / curv_sd
  463. sd = beta * sd - grad_proj
  464. k += 1
  465. elif alpha < alpha_tr:
  466. # The iterate is restricted by a bound/linear constraint. Add this
  467. # constraint to the active set, and restart the calculations.
  468. if alpha_xl <= alpha:
  469. i_new = np.argmin(all_alpha_xl)
  470. step[i_new] = xl[i_new]
  471. free_xl[i_new] = False
  472. elif alpha_xu <= alpha:
  473. i_new = np.argmin(all_alpha_xu)
  474. step[i_new] = xu[i_new]
  475. free_xu[i_new] = False
  476. else:
  477. i_new = np.argmin(all_alpha_ub)
  478. free_ub[i_new] = False
  479. n_act, q = qr_tangential_byrd_omojokun(
  480. aub,
  481. aeq,
  482. free_xl,
  483. free_xu,
  484. free_ub,
  485. )
  486. sd = -q[:, n_act:] @ (q[:, n_act:].T @ grad)
  487. k = 0
  488. else:
  489. # The current iterate is on the trust-region boundary. Add all the
  490. # active bound/linear constraints to the working set to prepare for
  491. # the improvement of the solution, and stop the iterations.
  492. if alpha_xl <= alpha:
  493. i_new = _argmin(all_alpha_xl)
  494. step[i_new] = xl[i_new]
  495. free_xl[i_new] = False
  496. if alpha_xu <= alpha:
  497. i_new = _argmin(all_alpha_xu)
  498. step[i_new] = xu[i_new]
  499. free_xu[i_new] = False
  500. if alpha_ub <= alpha:
  501. i_new = _argmin(all_alpha_ub)
  502. free_ub[i_new] = False
  503. n_act, q = qr_tangential_byrd_omojokun(
  504. aub,
  505. aeq,
  506. free_xl,
  507. free_xu,
  508. free_ub,
  509. )
  510. boundary_reached = True
  511. break
  512. # Attempt to improve the solution on the trust-region boundary.
  513. if kwargs.get("improve_tcg", True) and boundary_reached and n_act < n:
  514. step_base = np.copy(step)
  515. while n_act < n:
  516. # Check whether a substantial reduction in the objective function
  517. # is possible, and set the search direction.
  518. step_proj = q[:, n_act:] @ (q[:, n_act:].T @ step)
  519. grad_proj = q[:, n_act:] @ (q[:, n_act:].T @ grad)
  520. step_sq = step_proj @ step_proj
  521. grad_sq = grad_proj @ grad_proj
  522. grad_step = grad_proj @ step_proj
  523. grad_sd = -np.sqrt(max(step_sq * grad_sq - grad_step**2.0, 0.0))
  524. sd = q[:, n_act:] @ (
  525. q[:, n_act:].T @ (grad_step * step - step_sq * grad)
  526. )
  527. if grad_sd >= -1e-8 * reduct or np.any(
  528. grad_sd >= -TINY * np.abs(sd)
  529. ):
  530. break
  531. sd /= -grad_sd
  532. # Calculate an upper bound for the tangent of half the angle theta
  533. # of this alternative iteration for the bound constraints. The step
  534. # will be updated as:
  535. # step += (cos(theta) - 1) * step_proj + sin(theta) * sd.
  536. temp_xl = np.zeros(n)
  537. temp_xu = np.zeros(n)
  538. dist_xl = np.maximum(step - xl, 0.0)
  539. dist_xu = np.maximum(xu - step, 0.0)
  540. temp_xl[free_xl] = sd[free_xl] ** 2.0 - dist_xl[free_xl] * (
  541. dist_xl[free_xl] - 2.0 * step_proj[free_xl]
  542. )
  543. temp_xu[free_xu] = sd[free_xu] ** 2.0 - dist_xu[free_xu] * (
  544. dist_xu[free_xu] + 2.0 * step_proj[free_xu]
  545. )
  546. temp_xl[temp_xl > 0.0] = (
  547. np.sqrt(temp_xl[temp_xl > 0.0]) - sd[temp_xl > 0.0]
  548. )
  549. temp_xu[temp_xu > 0.0] = (
  550. np.sqrt(temp_xu[temp_xu > 0.0]) + sd[temp_xu > 0.0]
  551. )
  552. i_xl = temp_xl > TINY * dist_xl
  553. i_xu = temp_xu > TINY * dist_xu
  554. all_t_xl = np.ones(n)
  555. all_t_xu = np.ones(n)
  556. all_t_xl[i_xl] = np.minimum(
  557. all_t_xl[i_xl],
  558. dist_xl[i_xl] / temp_xl[i_xl],
  559. )
  560. all_t_xu[i_xu] = np.minimum(
  561. all_t_xu[i_xu],
  562. dist_xu[i_xu] / temp_xu[i_xu],
  563. )
  564. t_xl = np.min(all_t_xl)
  565. t_xu = np.min(all_t_xu)
  566. t_bd = min(t_xl, t_xu)
  567. # Calculate an upper bound for the tangent of half the angle theta
  568. # of this alternative iteration for the linear constraints.
  569. temp_ub = np.zeros_like(resid)
  570. aub_step = aub @ step_proj
  571. aub_sd = aub @ sd
  572. temp_ub[free_ub] = aub_sd[free_ub] ** 2.0 - resid[free_ub] * (
  573. resid[free_ub] + 2.0 * aub_step[free_ub]
  574. )
  575. temp_ub[temp_ub > 0.0] = (
  576. np.sqrt(temp_ub[temp_ub > 0.0]) + aub_sd[temp_ub > 0.0]
  577. )
  578. i_ub = temp_ub > TINY * resid
  579. all_t_ub = np.ones_like(resid)
  580. all_t_ub[i_ub] = np.minimum(
  581. all_t_ub[i_ub],
  582. resid[i_ub] / temp_ub[i_ub],
  583. )
  584. t_ub = np.min(all_t_ub, initial=1.0)
  585. t_min = min(t_bd, t_ub)
  586. # Calculate some curvature information.
  587. hess_step = hess_prod(step_proj)
  588. hess_sd = hess_prod(sd)
  589. curv_step = step_proj @ hess_step
  590. curv_sd = sd @ hess_sd
  591. curv_step_sd = step_proj @ hess_sd
  592. # For a range of equally spaced values of tan(0.5 * theta),
  593. # calculate the reduction in the objective function that would be
  594. # obtained by accepting the corresponding angle.
  595. n_samples = 20
  596. n_samples = int((n_samples - 3) * t_min + 3)
  597. t_samples = np.linspace(t_min / n_samples, t_min, n_samples)
  598. sin_values = 2.0 * t_samples / (1.0 + t_samples**2.0)
  599. all_reduct = sin_values * (
  600. grad_step * t_samples
  601. - grad_sd
  602. - sin_values
  603. * (
  604. 0.5 * t_samples**2.0 * curv_step
  605. - 2.0 * t_samples * curv_step_sd
  606. + 0.5 * curv_sd
  607. )
  608. )
  609. if np.all(all_reduct <= 0.0):
  610. # No reduction in the objective function is obtained.
  611. break
  612. # Accept the angle that provides the largest reduction in the
  613. # objective function, and update the iterate.
  614. i_max = np.argmax(all_reduct)
  615. cos_value = (1.0 - t_samples[i_max] ** 2.0) / (
  616. 1.0 + t_samples[i_max] ** 2.0
  617. )
  618. step = np.clip(
  619. step + (cos_value - 1.0) * step_proj + sin_values[i_max] * sd,
  620. xl,
  621. xu,
  622. )
  623. grad += (cos_value - 1.0) * hess_step + sin_values[i_max] * hess_sd
  624. resid = np.maximum(
  625. 0.0,
  626. resid
  627. - (cos_value - 1.0) * aub_step
  628. - sin_values[i_max] * aub_sd,
  629. )
  630. reduct += all_reduct[i_max]
  631. # If the above angle is restricted by bound constraints, add them
  632. # to the working set, and restart the alternative iteration.
  633. # Otherwise, the calculations are terminated.
  634. if t_min < 1.0 and i_max == n_samples - 1:
  635. if t_xl <= t_min:
  636. i_new = _argmin(all_t_xl)
  637. step[i_new] = xl[i_new]
  638. free_xl[i_new] = False
  639. if t_xu <= t_min:
  640. i_new = _argmin(all_t_xu)
  641. step[i_new] = xu[i_new]
  642. free_xl[i_new] = False
  643. if t_ub <= t_min:
  644. i_new = _argmin(all_t_ub)
  645. free_ub[i_new] = False
  646. n_act, q = qr_tangential_byrd_omojokun(
  647. aub,
  648. aeq,
  649. free_xl,
  650. free_xu,
  651. free_ub,
  652. )
  653. else:
  654. break
  655. # Ensure that the alternative iteration improves the objective
  656. # function.
  657. if grad_orig @ step + 0.5 * step @ hess_prod(
  658. step
  659. ) > grad_orig @ step_base + 0.5 * step_base @ hess_prod(step_base):
  660. step = step_base
  661. if debug:
  662. tol = get_arrays_tol(xl, xu)
  663. assert np.all(xl <= step)
  664. assert np.all(step <= xu)
  665. assert np.all(aub @ step <= bub + tol)
  666. assert np.all(np.abs(aeq @ step) <= tol)
  667. assert np.linalg.norm(step) < 1.1 * delta
  668. return step
  669. def normal_byrd_omojokun(aub, bub, aeq, beq, xl, xu, delta, debug, **kwargs):
  670. r"""
  671. Minimize approximately a linear constraint violation subject to bound
  672. constraints in a trust region.
  673. This function solves approximately
  674. .. math::
  675. \min_{s \in \mathbb{R}^n} \quad \frac{1}{2} \big( \lVert \max \{
  676. A_{\scriptscriptstyle I} s - b_{\scriptscriptstyle I}, 0 \} \rVert^2 +
  677. \lVert A_{\scriptscriptstyle E} s - b_{\scriptscriptstyle E} \rVert^2
  678. \big) \quad \text{s.t.}
  679. \quad
  680. \left\{ \begin{array}{l}
  681. l \le s \le u,\\
  682. \lVert s \rVert \le \Delta,
  683. \end{array} \right.
  684. using a variation of the truncated conjugate gradient method.
  685. Parameters
  686. ----------
  687. aub : `numpy.ndarray`, shape (m_linear_ub, n)
  688. Matrix :math:`A_{\scriptscriptstyle I}` as shown above.
  689. bub : `numpy.ndarray`, shape (m_linear_ub,)
  690. Vector :math:`b_{\scriptscriptstyle I}` as shown above.
  691. aeq : `numpy.ndarray`, shape (m_linear_eq, n)
  692. Matrix :math:`A_{\scriptscriptstyle E}` as shown above.
  693. beq : `numpy.ndarray`, shape (m_linear_eq,)
  694. Vector :math:`b_{\scriptscriptstyle E}` as shown above.
  695. xl : `numpy.ndarray`, shape (n,)
  696. Lower bounds :math:`l` as shown above.
  697. xu : `numpy.ndarray`, shape (n,)
  698. Upper bounds :math:`u` as shown above.
  699. delta : float
  700. Trust-region radius :math:`\Delta` as shown above.
  701. debug : bool
  702. Whether to make debugging tests during the execution.
  703. Returns
  704. -------
  705. `numpy.ndarray`, shape (n,)
  706. Approximate solution :math:`s`.
  707. Other Parameters
  708. ----------------
  709. improve_tcg : bool, optional
  710. If True, a solution generated by the truncated conjugate gradient
  711. method that is on the boundary of the trust region is improved by
  712. moving around the trust-region boundary on the two-dimensional space
  713. spanned by the solution and the gradient of the quadratic function at
  714. the solution (default is True).
  715. Notes
  716. -----
  717. This function implements Algorithm 6.4 of [1]_. It is assumed that the
  718. origin is feasible with respect to the bound constraints and that `delta`
  719. is finite and positive.
  720. References
  721. ----------
  722. .. [1] T. M. Ragonneau. *Model-Based Derivative-Free Optimization Methods
  723. and Software*. PhD thesis, Department of Applied Mathematics, The Hong
  724. Kong Polytechnic University, Hong Kong, China, 2022. URL:
  725. https://theses.lib.polyu.edu.hk/handle/200/12294.
  726. """
  727. if debug:
  728. assert isinstance(aub, np.ndarray) and aub.ndim == 2
  729. assert (
  730. isinstance(bub, np.ndarray)
  731. and bub.ndim == 1
  732. and bub.size == aub.shape[0]
  733. )
  734. assert (
  735. isinstance(aeq, np.ndarray)
  736. and aeq.ndim == 2
  737. and aeq.shape[1] == aub.shape[1]
  738. )
  739. assert (
  740. isinstance(beq, np.ndarray)
  741. and beq.ndim == 1
  742. and beq.size == aeq.shape[0]
  743. )
  744. assert isinstance(xl, np.ndarray) and xl.shape == (aub.shape[1],)
  745. assert isinstance(xu, np.ndarray) and xu.shape == (aub.shape[1],)
  746. assert isinstance(delta, float)
  747. assert isinstance(debug, bool)
  748. tol = get_arrays_tol(xl, xu)
  749. assert np.all(xl <= tol)
  750. assert np.all(xu >= -tol)
  751. assert np.isfinite(delta) and delta > 0.0
  752. xl = np.minimum(xl, 0.0)
  753. xu = np.maximum(xu, 0.0)
  754. # Calculate the initial active set.
  755. m_linear_ub, n = aub.shape
  756. grad = np.r_[aeq.T @ -beq, np.maximum(0.0, -bub)]
  757. free_xl = (xl < 0.0) | (grad[:n] < 0.0)
  758. free_xu = (xu > 0.0) | (grad[:n] > 0.0)
  759. free_slack = bub < 0.0
  760. free_ub = (bub > 0.0) | (aub @ grad[:n] - grad[n:] > 0.0)
  761. n_act, q = qr_normal_byrd_omojokun(
  762. aub,
  763. free_xl,
  764. free_xu,
  765. free_slack,
  766. free_ub,
  767. )
  768. # Calculate an upper bound on the norm of the slack variables. It is not
  769. # used in the original algorithm, but it may prevent undesired behaviors
  770. # engendered by computer rounding errors.
  771. delta_slack = np.sqrt(beq @ beq + grad[n:] @ grad[n:])
  772. # Set the initial iterate and the initial search direction.
  773. step = np.zeros(n)
  774. sd = -q[:, n_act:] @ (q[:, n_act:].T @ grad)
  775. resid = bub + grad[n:]
  776. k = 0
  777. reduct = 0.0
  778. boundary_reached = False
  779. while k < n + m_linear_ub - n_act:
  780. # Stop the computations if sd is not a descent direction.
  781. grad_sd = grad @ sd
  782. if grad_sd >= -10.0 * EPS * n * max(1.0, np.linalg.norm(grad)):
  783. break
  784. # Set alpha_tr to the step size for the trust-region constraint.
  785. try:
  786. alpha_tr = _alpha_tr(step, sd[:n], delta)
  787. except ZeroDivisionError:
  788. alpha_tr = np.inf
  789. # Prevent undesired behaviors engendered by computer rounding errors by
  790. # considering the trust-region constraint on the slack variables.
  791. try:
  792. alpha_tr = min(alpha_tr, _alpha_tr(grad[n:], sd[n:], delta_slack))
  793. except ZeroDivisionError:
  794. pass
  795. # Stop the computations if a step along sd is expected to give a
  796. # relatively small reduction in the objective function.
  797. if -alpha_tr * grad_sd <= 1e-8 * reduct:
  798. break
  799. # Set alpha_quad to the step size for the minimization problem.
  800. hess_sd = np.r_[aeq.T @ (aeq @ sd[:n]), sd[n:]]
  801. curv_sd = sd @ hess_sd
  802. if curv_sd > TINY * abs(grad_sd):
  803. alpha_quad = max(-grad_sd / curv_sd, 0.0)
  804. else:
  805. alpha_quad = np.inf
  806. # Stop the computations if the reduction in the objective function
  807. # provided by an unconstrained step is small.
  808. alpha = min(alpha_tr, alpha_quad)
  809. if -alpha * (grad_sd + 0.5 * alpha * curv_sd) <= 1e-8 * reduct:
  810. break
  811. # Set alpha_bd to the step size for the bound constraints.
  812. i_xl = free_xl & (xl > -np.inf) & (sd[:n] < -TINY * np.abs(xl - step))
  813. i_xu = free_xu & (xu < np.inf) & (sd[:n] > TINY * np.abs(xu - step))
  814. i_slack = free_slack & (sd[n:] < -TINY * np.abs(grad[n:]))
  815. all_alpha_xl = np.full_like(step, np.inf)
  816. all_alpha_xu = np.full_like(step, np.inf)
  817. all_alpha_slack = np.full_like(bub, np.inf)
  818. all_alpha_xl[i_xl] = np.maximum(
  819. (xl[i_xl] - step[i_xl]) / sd[:n][i_xl],
  820. 0.0,
  821. )
  822. all_alpha_xu[i_xu] = np.maximum(
  823. (xu[i_xu] - step[i_xu]) / sd[:n][i_xu],
  824. 0.0,
  825. )
  826. all_alpha_slack[i_slack] = np.maximum(
  827. -grad[n:][i_slack] / sd[n:][i_slack],
  828. 0.0,
  829. )
  830. alpha_xl = np.min(all_alpha_xl)
  831. alpha_xu = np.min(all_alpha_xu)
  832. alpha_slack = np.min(all_alpha_slack, initial=np.inf)
  833. alpha_bd = min(alpha_xl, alpha_xu, alpha_slack)
  834. # Set alpha_ub to the step size for the linear constraints.
  835. aub_sd = aub @ sd[:n] - sd[n:]
  836. i_ub = free_ub & (aub_sd > TINY * np.abs(resid))
  837. all_alpha_ub = np.full_like(bub, np.inf)
  838. all_alpha_ub[i_ub] = resid[i_ub] / aub_sd[i_ub]
  839. alpha_ub = np.min(all_alpha_ub, initial=np.inf)
  840. # Update the iterate.
  841. alpha = min(alpha, alpha_bd, alpha_ub)
  842. if alpha > 0.0:
  843. step = np.clip(step + alpha * sd[:n], xl, xu)
  844. grad += alpha * hess_sd
  845. resid = np.maximum(0.0, resid - alpha * aub_sd)
  846. reduct -= alpha * (grad_sd + 0.5 * alpha * curv_sd)
  847. if alpha < min(alpha_tr, alpha_bd, alpha_ub):
  848. # The current iteration is a conjugate gradient iteration. Update
  849. # the search direction so that it is conjugate (with respect to H)
  850. # to all the previous search directions.
  851. grad_proj = q[:, n_act:] @ (q[:, n_act:].T @ grad)
  852. beta = (grad_proj @ hess_sd) / curv_sd
  853. sd = beta * sd - grad_proj
  854. k += 1
  855. elif alpha < alpha_tr:
  856. # The iterate is restricted by a bound/linear constraint. Add this
  857. # constraint to the active set, and restart the calculations.
  858. if alpha_xl <= alpha:
  859. i_new = np.argmin(all_alpha_xl)
  860. step[i_new] = xl[i_new]
  861. free_xl[i_new] = False
  862. elif alpha_xu <= alpha:
  863. i_new = np.argmin(all_alpha_xu)
  864. step[i_new] = xu[i_new]
  865. free_xu[i_new] = False
  866. elif alpha_slack <= alpha:
  867. i_new = np.argmin(all_alpha_slack)
  868. free_slack[i_new] = False
  869. else:
  870. i_new = np.argmin(all_alpha_ub)
  871. free_ub[i_new] = False
  872. n_act, q = qr_normal_byrd_omojokun(
  873. aub, free_xl, free_xu, free_slack, free_ub
  874. )
  875. sd = -q[:, n_act:] @ (q[:, n_act:].T @ grad)
  876. k = 0
  877. else:
  878. # The current iterate is on the trust-region boundary. Add all the
  879. # active bound constraints to the working set to prepare for the
  880. # improvement of the solution, and stop the iterations.
  881. if alpha_xl <= alpha:
  882. i_new = _argmin(all_alpha_xl)
  883. step[i_new] = xl[i_new]
  884. free_xl[i_new] = False
  885. if alpha_xu <= alpha:
  886. i_new = _argmin(all_alpha_xu)
  887. step[i_new] = xu[i_new]
  888. free_xu[i_new] = False
  889. boundary_reached = True
  890. break
  891. # Attempt to improve the solution on the trust-region boundary.
  892. if kwargs.get("improve_tcg", True) and boundary_reached:
  893. step_base = np.copy(step)
  894. free_bd = free_xl & free_xu
  895. grad = aub.T @ np.maximum(aub @ step - bub, 0.0) + aeq.T @ (
  896. aeq @ step - beq
  897. )
  898. sd = np.zeros(n)
  899. while np.count_nonzero(free_bd) > 0:
  900. # Check whether a substantial reduction in the objective function
  901. # is possible, and set the search direction.
  902. step_sq = step[free_bd] @ step[free_bd]
  903. grad_sq = grad[free_bd] @ grad[free_bd]
  904. grad_step = grad[free_bd] @ step[free_bd]
  905. grad_sd = -np.sqrt(max(step_sq * grad_sq - grad_step**2.0, 0.0))
  906. sd[free_bd] = grad_step * step[free_bd] - step_sq * grad[free_bd]
  907. sd[~free_bd] = 0.0
  908. if grad_sd >= -1e-8 * reduct or np.any(
  909. grad_sd >= -TINY * np.abs(sd[free_bd])
  910. ):
  911. break
  912. sd[free_bd] /= -grad_sd
  913. # Calculate an upper bound for the tangent of half the angle theta
  914. # of this alternative iteration. The step will be updated as:
  915. # step = cos(theta) * step + sin(theta) * sd.
  916. temp_xl = np.zeros(n)
  917. temp_xu = np.zeros(n)
  918. temp_xl[free_bd] = (
  919. step[free_bd] ** 2.0 + sd[free_bd] ** 2.0 - xl[free_bd] ** 2.0
  920. )
  921. temp_xu[free_bd] = (
  922. step[free_bd] ** 2.0 + sd[free_bd] ** 2.0 - xu[free_bd] ** 2.0
  923. )
  924. temp_xl[temp_xl > 0.0] = (
  925. np.sqrt(temp_xl[temp_xl > 0.0]) - sd[temp_xl > 0.0]
  926. )
  927. temp_xu[temp_xu > 0.0] = (
  928. np.sqrt(temp_xu[temp_xu > 0.0]) + sd[temp_xu > 0.0]
  929. )
  930. dist_xl = np.maximum(step - xl, 0.0)
  931. dist_xu = np.maximum(xu - step, 0.0)
  932. i_xl = temp_xl > TINY * dist_xl
  933. i_xu = temp_xu > TINY * dist_xu
  934. all_t_xl = np.ones(n)
  935. all_t_xu = np.ones(n)
  936. all_t_xl[i_xl] = np.minimum(
  937. all_t_xl[i_xl],
  938. dist_xl[i_xl] / temp_xl[i_xl],
  939. )
  940. all_t_xu[i_xu] = np.minimum(
  941. all_t_xu[i_xu],
  942. dist_xu[i_xu] / temp_xu[i_xu],
  943. )
  944. t_xl = np.min(all_t_xl)
  945. t_xu = np.min(all_t_xu)
  946. t_bd = min(t_xl, t_xu)
  947. # For a range of equally spaced values of tan(0.5 * theta),
  948. # calculate the reduction in the objective function that would be
  949. # obtained by accepting the corresponding angle.
  950. n_samples = 20
  951. n_samples = int((n_samples - 3) * t_bd + 3)
  952. t_samples = np.linspace(t_bd / n_samples, t_bd, n_samples)
  953. resid_ub = np.maximum(aub @ step - bub, 0.0)
  954. resid_eq = aeq @ step - beq
  955. step_proj = np.copy(step)
  956. step_proj[~free_bd] = 0.0
  957. all_reduct = np.empty(n_samples)
  958. for i in range(n_samples):
  959. sin_value = 2.0 * t_samples[i] / (1.0 + t_samples[i] ** 2.0)
  960. step_alt = np.clip(
  961. step + sin_value * (sd - t_samples[i] * step_proj),
  962. xl,
  963. xu,
  964. )
  965. resid_ub_alt = np.maximum(aub @ step_alt - bub, 0.0)
  966. resid_eq_alt = aeq @ step_alt - beq
  967. all_reduct[i] = 0.5 * (
  968. resid_ub @ resid_ub
  969. + resid_eq @ resid_eq
  970. - resid_ub_alt @ resid_ub_alt
  971. - resid_eq_alt @ resid_eq_alt
  972. )
  973. if np.all(all_reduct <= 0.0):
  974. # No reduction in the objective function is obtained.
  975. break
  976. # Accept the angle that provides the largest reduction in the
  977. # objective function, and update the iterate.
  978. i_max = np.argmax(all_reduct)
  979. cos_value = (1.0 - t_samples[i_max] ** 2.0) / (
  980. 1.0 + t_samples[i_max] ** 2.0
  981. )
  982. sin_value = (2.0 * t_samples[i_max]
  983. / (1.0 + t_samples[i_max] ** 2.0))
  984. step[free_bd] = cos_value * step[free_bd] + sin_value * sd[free_bd]
  985. grad = aub.T @ np.maximum(aub @ step - bub, 0.0) + aeq.T @ (
  986. aeq @ step - beq
  987. )
  988. reduct += all_reduct[i_max]
  989. # If the above angle is restricted by bound constraints, add them
  990. # to the working set, and restart the alternative iteration.
  991. # Otherwise, the calculations are terminated.
  992. if t_bd < 1.0 and i_max == n_samples - 1:
  993. if t_xl <= t_bd:
  994. i_new = _argmin(all_t_xl)
  995. step[i_new] = xl[i_new]
  996. free_bd[i_new] = False
  997. if t_xu <= t_bd:
  998. i_new = _argmin(all_t_xu)
  999. step[i_new] = xu[i_new]
  1000. free_bd[i_new] = False
  1001. else:
  1002. break
  1003. # Ensure that the alternative iteration improves the objective
  1004. # function.
  1005. resid_ub = np.maximum(aub @ step - bub, 0.0)
  1006. resid_ub_base = np.maximum(aub @ step_base - bub, 0.0)
  1007. resid_eq = aeq @ step - beq
  1008. resid_eq_base = aeq @ step_base - beq
  1009. if (
  1010. resid_ub @ resid_ub + resid_eq @ resid_eq
  1011. > resid_ub_base @ resid_ub_base + resid_eq_base @ resid_eq_base
  1012. ):
  1013. step = step_base
  1014. if debug:
  1015. assert np.all(xl <= step)
  1016. assert np.all(step <= xu)
  1017. assert np.linalg.norm(step) < 1.1 * delta
  1018. return step
  1019. def qr_tangential_byrd_omojokun(aub, aeq, free_xl, free_xu, free_ub):
  1020. n = free_xl.size
  1021. identity = np.eye(n)
  1022. q, r, _ = qr(
  1023. np.block(
  1024. [
  1025. [aeq],
  1026. [aub[~free_ub, :]],
  1027. [-identity[~free_xl, :]],
  1028. [identity[~free_xu, :]],
  1029. ]
  1030. ).T,
  1031. pivoting=True,
  1032. )
  1033. n_act = np.count_nonzero(
  1034. np.abs(np.diag(r))
  1035. >= 10.0
  1036. * EPS
  1037. * n
  1038. * np.linalg.norm(r[: np.min(r.shape), : np.min(r.shape)], axis=0)
  1039. )
  1040. return n_act, q
  1041. def qr_normal_byrd_omojokun(aub, free_xl, free_xu, free_slack, free_ub):
  1042. m_linear_ub, n = aub.shape
  1043. identity_n = np.eye(n)
  1044. identity_m = np.eye(m_linear_ub)
  1045. q, r, _ = qr(
  1046. np.block(
  1047. [
  1048. [
  1049. aub[~free_ub, :],
  1050. -identity_m[~free_ub, :],
  1051. ],
  1052. [
  1053. np.zeros((m_linear_ub - np.count_nonzero(free_slack), n)),
  1054. -identity_m[~free_slack, :],
  1055. ],
  1056. [
  1057. -identity_n[~free_xl, :],
  1058. np.zeros((n - np.count_nonzero(free_xl), m_linear_ub)),
  1059. ],
  1060. [
  1061. identity_n[~free_xu, :],
  1062. np.zeros((n - np.count_nonzero(free_xu), m_linear_ub)),
  1063. ],
  1064. ]
  1065. ).T,
  1066. pivoting=True,
  1067. )
  1068. n_act = np.count_nonzero(
  1069. np.abs(np.diag(r))
  1070. >= 10.0
  1071. * EPS
  1072. * (n + m_linear_ub)
  1073. * np.linalg.norm(r[: np.min(r.shape), : np.min(r.shape)], axis=0)
  1074. )
  1075. return n_act, q
  1076. def _alpha_tr(step, sd, delta):
  1077. step_sd = step @ sd
  1078. sd_sq = sd @ sd
  1079. dist_tr_sq = delta**2.0 - step @ step
  1080. temp = np.sqrt(max(step_sd**2.0 + sd_sq * dist_tr_sq, 0.0))
  1081. if step_sd <= 0.0 and sd_sq > TINY * abs(temp - step_sd):
  1082. alpha_tr = max((temp - step_sd) / sd_sq, 0.0)
  1083. elif abs(temp + step_sd) > TINY * dist_tr_sq:
  1084. alpha_tr = max(dist_tr_sq / (temp + step_sd), 0.0)
  1085. else:
  1086. raise ZeroDivisionError
  1087. return alpha_tr
  1088. def _argmax(x):
  1089. return np.flatnonzero(x >= np.max(x))
  1090. def _argmin(x):
  1091. return np.flatnonzero(x <= np.min(x))