| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203 |
- import inspect
- import numpy as np
- from scipy.linalg import qr
- from ..utils import get_arrays_tol
- TINY = np.finfo(float).tiny
- EPS = np.finfo(float).eps
- def tangential_byrd_omojokun(grad, hess_prod, xl, xu, delta, debug, **kwargs):
- r"""
- Minimize approximately a quadratic function subject to bound constraints in
- a trust region.
- This function solves approximately
- .. math::
- \min_{s \in \mathbb{R}^n} \quad g^{\mathsf{T}} s + \frac{1}{2}
- s^{\mathsf{T}} H s \quad \text{s.t.} \quad
- \left\{ \begin{array}{l}
- l \le s \le u\\
- \lVert s \rVert \le \Delta,
- \end{array} \right.
- using an active-set variation of the truncated conjugate gradient method.
- Parameters
- ----------
- grad : `numpy.ndarray`, shape (n,)
- Gradient :math:`g` as shown above.
- hess_prod : callable
- Product of the Hessian matrix :math:`H` with any vector.
- ``hess_prod(s) -> `numpy.ndarray`, shape (n,)``
- returns the product :math:`H s`.
- xl : `numpy.ndarray`, shape (n,)
- Lower bounds :math:`l` as shown above.
- xu : `numpy.ndarray`, shape (n,)
- Upper bounds :math:`u` as shown above.
- delta : float
- Trust-region radius :math:`\Delta` as shown above.
- debug : bool
- Whether to make debugging tests during the execution.
- Returns
- -------
- `numpy.ndarray`, shape (n,)
- Approximate solution :math:`s`.
- Other Parameters
- ----------------
- improve_tcg : bool, optional
- If True, a solution generated by the truncated conjugate gradient
- method that is on the boundary of the trust region is improved by
- moving around the trust-region boundary on the two-dimensional space
- spanned by the solution and the gradient of the quadratic function at
- the solution (default is True).
- Notes
- -----
- This function implements Algorithm 6.2 of [1]_. It is assumed that the
- origin is feasible with respect to the bound constraints and that `delta`
- is finite and positive.
- References
- ----------
- .. [1] T. M. Ragonneau. *Model-Based Derivative-Free Optimization Methods
- and Software*. PhD thesis, Department of Applied Mathematics, The Hong
- Kong Polytechnic University, Hong Kong, China, 2022. URL:
- https://theses.lib.polyu.edu.hk/handle/200/12294.
- """
- if debug:
- assert isinstance(grad, np.ndarray) and grad.ndim == 1
- assert inspect.signature(hess_prod).bind(grad)
- assert isinstance(xl, np.ndarray) and xl.shape == grad.shape
- assert isinstance(xu, np.ndarray) and xu.shape == grad.shape
- assert isinstance(delta, float)
- assert isinstance(debug, bool)
- tol = get_arrays_tol(xl, xu)
- assert np.all(xl <= tol)
- assert np.all(xu >= -tol)
- assert np.isfinite(delta) and delta > 0.0
- xl = np.minimum(xl, 0.0)
- xu = np.maximum(xu, 0.0)
- # Copy the arrays that may be modified by the code below.
- n = grad.size
- grad = np.copy(grad)
- grad_orig = np.copy(grad)
- # Calculate the initial active set.
- free_bd = ((xl < 0.0) | (grad < 0.0)) & ((xu > 0.0) | (grad > 0.0))
- # Set the initial iterate and the initial search direction.
- step = np.zeros_like(grad)
- sd = np.zeros_like(step)
- sd[free_bd] = -grad[free_bd]
- k = 0
- reduct = 0.0
- boundary_reached = False
- while k < np.count_nonzero(free_bd):
- # Stop the computations if sd is not a descent direction.
- grad_sd = grad @ sd
- if grad_sd >= -10.0 * EPS * n * max(1.0, np.linalg.norm(grad)):
- break
- # Set alpha_tr to the step size for the trust-region constraint.
- try:
- alpha_tr = _alpha_tr(step, sd, delta)
- except ZeroDivisionError:
- break
- # Stop the computations if a step along sd is expected to give a
- # relatively small reduction in the objective function.
- if -alpha_tr * grad_sd <= 1e-8 * reduct:
- break
- # Set alpha_quad to the step size for the minimization problem.
- hess_sd = hess_prod(sd)
- curv_sd = sd @ hess_sd
- if curv_sd > TINY * abs(grad_sd):
- alpha_quad = max(-grad_sd / curv_sd, 0.0)
- else:
- alpha_quad = np.inf
- # Stop the computations if the reduction in the objective function
- # provided by an unconstrained step is small.
- alpha = min(alpha_tr, alpha_quad)
- if -alpha * (grad_sd + 0.5 * alpha * curv_sd) <= 1e-8 * reduct:
- break
- # Set alpha_bd to the step size for the bound constraints.
- i_xl = (xl > -np.inf) & (sd < -TINY * np.abs(xl - step))
- i_xu = (xu < np.inf) & (sd > TINY * np.abs(xu - step))
- all_alpha_xl = np.full_like(step, np.inf)
- all_alpha_xu = np.full_like(step, np.inf)
- all_alpha_xl[i_xl] = np.maximum(
- (xl[i_xl] - step[i_xl]) / sd[i_xl],
- 0.0,
- )
- all_alpha_xu[i_xu] = np.maximum(
- (xu[i_xu] - step[i_xu]) / sd[i_xu],
- 0.0,
- )
- alpha_xl = np.min(all_alpha_xl)
- alpha_xu = np.min(all_alpha_xu)
- alpha_bd = min(alpha_xl, alpha_xu)
- # Update the iterate.
- alpha = min(alpha, alpha_bd)
- if alpha > 0.0:
- step[free_bd] = np.clip(
- step[free_bd] + alpha * sd[free_bd],
- xl[free_bd],
- xu[free_bd],
- )
- grad += alpha * hess_sd
- reduct -= alpha * (grad_sd + 0.5 * alpha * curv_sd)
- if alpha < min(alpha_tr, alpha_bd):
- # The current iteration is a conjugate gradient iteration. Update
- # the search direction so that it is conjugate (with respect to H)
- # to all the previous search directions.
- beta = (grad[free_bd] @ hess_sd[free_bd]) / curv_sd
- sd[free_bd] = beta * sd[free_bd] - grad[free_bd]
- sd[~free_bd] = 0.0
- k += 1
- elif alpha < alpha_tr:
- # The iterate is restricted by a bound constraint. Add this bound
- # constraint to the active set, and restart the calculations.
- if alpha_xl <= alpha:
- i_new = np.argmin(all_alpha_xl)
- step[i_new] = xl[i_new]
- else:
- i_new = np.argmin(all_alpha_xu)
- step[i_new] = xu[i_new]
- free_bd[i_new] = False
- sd[free_bd] = -grad[free_bd]
- sd[~free_bd] = 0.0
- k = 0
- else:
- # The current iterate is on the trust-region boundary. Add all the
- # active bounds to the working set to prepare for the improvement
- # of the solution, and stop the iterations.
- if alpha_xl <= alpha:
- i_new = _argmin(all_alpha_xl)
- step[i_new] = xl[i_new]
- free_bd[i_new] = False
- if alpha_xu <= alpha:
- i_new = _argmin(all_alpha_xu)
- step[i_new] = xu[i_new]
- free_bd[i_new] = False
- boundary_reached = True
- break
- # Attempt to improve the solution on the trust-region boundary.
- if kwargs.get("improve_tcg", True) and boundary_reached:
- step_base = np.copy(step)
- step_comparator = grad_orig @ step_base + 0.5 * step_base @ hess_prod(
- step_base
- )
- while np.count_nonzero(free_bd) > 0:
- # Check whether a substantial reduction in the objective function
- # is possible, and set the search direction.
- step_sq = step[free_bd] @ step[free_bd]
- grad_sq = grad[free_bd] @ grad[free_bd]
- grad_step = grad[free_bd] @ step[free_bd]
- grad_sd = -np.sqrt(max(step_sq * grad_sq - grad_step**2.0, 0.0))
- sd[free_bd] = grad_step * step[free_bd] - step_sq * grad[free_bd]
- sd[~free_bd] = 0.0
- if grad_sd >= -1e-8 * reduct or np.any(
- grad_sd >= -TINY * np.abs(sd[free_bd])
- ):
- break
- sd[free_bd] /= -grad_sd
- # Calculate an upper bound for the tangent of half the angle theta
- # of this alternative iteration. The step will be updated as:
- # step = cos(theta) * step + sin(theta) * sd.
- temp_xl = np.zeros(n)
- temp_xu = np.zeros(n)
- temp_xl[free_bd] = (
- step[free_bd] ** 2.0 + sd[free_bd] ** 2.0 - xl[free_bd] ** 2.0
- )
- temp_xu[free_bd] = (
- step[free_bd] ** 2.0 + sd[free_bd] ** 2.0 - xu[free_bd] ** 2.0
- )
- temp_xl[temp_xl > 0.0] = (
- np.sqrt(temp_xl[temp_xl > 0.0]) - sd[temp_xl > 0.0]
- )
- temp_xu[temp_xu > 0.0] = (
- np.sqrt(temp_xu[temp_xu > 0.0]) + sd[temp_xu > 0.0]
- )
- dist_xl = np.maximum(step - xl, 0.0)
- dist_xu = np.maximum(xu - step, 0.0)
- i_xl = temp_xl > TINY * dist_xl
- i_xu = temp_xu > TINY * dist_xu
- all_t_xl = np.ones(n)
- all_t_xu = np.ones(n)
- all_t_xl[i_xl] = np.minimum(
- all_t_xl[i_xl],
- dist_xl[i_xl] / temp_xl[i_xl],
- )
- all_t_xu[i_xu] = np.minimum(
- all_t_xu[i_xu],
- dist_xu[i_xu] / temp_xu[i_xu],
- )
- t_xl = np.min(all_t_xl)
- t_xu = np.min(all_t_xu)
- t_bd = min(t_xl, t_xu)
- # Calculate some curvature information.
- hess_step = hess_prod(step)
- hess_sd = hess_prod(sd)
- curv_step = step @ hess_step
- curv_sd = sd @ hess_sd
- curv_step_sd = step @ hess_sd
- # For a range of equally spaced values of tan(0.5 * theta),
- # calculate the reduction in the objective function that would be
- # obtained by accepting the corresponding angle.
- n_samples = 20
- n_samples = int((n_samples - 3) * t_bd + 3)
- t_samples = np.linspace(t_bd / n_samples, t_bd, n_samples)
- sin_values = 2.0 * t_samples / (1.0 + t_samples**2.0)
- all_reduct = sin_values * (
- grad_step * t_samples
- - grad_sd
- - t_samples * curv_step
- + sin_values
- * (t_samples * curv_step_sd - 0.5 * (curv_sd - curv_step))
- )
- if np.all(all_reduct <= 0.0):
- # No reduction in the objective function is obtained.
- break
- # Accept the angle that provides the largest reduction in the
- # objective function, and update the iterate.
- i_max = np.argmax(all_reduct)
- cos_value = (1.0 - t_samples[i_max] ** 2.0) / (
- 1.0 + t_samples[i_max] ** 2.0
- )
- step[free_bd] = (
- cos_value * step[free_bd] + sin_values[i_max] * sd[free_bd]
- )
- grad += (cos_value - 1.0) * hess_step + sin_values[i_max] * hess_sd
- reduct += all_reduct[i_max]
- # If the above angle is restricted by bound constraints, add them
- # to the working set, and restart the alternative iteration.
- # Otherwise, the calculations are terminated.
- if t_bd < 1.0 and i_max == n_samples - 1:
- if t_xl <= t_bd:
- i_new = _argmin(all_t_xl)
- step[i_new] = xl[i_new]
- free_bd[i_new] = False
- if t_xu <= t_bd:
- i_new = _argmin(all_t_xu)
- step[i_new] = xu[i_new]
- free_bd[i_new] = False
- else:
- break
- # Ensure that the alternative iteration improves the objective
- # function.
- if grad_orig @ step + 0.5 * step @ hess_prod(step) > step_comparator:
- step = step_base
- if debug:
- assert np.all(xl <= step)
- assert np.all(step <= xu)
- assert np.linalg.norm(step) < 1.1 * delta
- return step
- def constrained_tangential_byrd_omojokun(
- grad,
- hess_prod,
- xl,
- xu,
- aub,
- bub,
- aeq,
- delta,
- debug,
- **kwargs,
- ):
- r"""
- Minimize approximately a quadratic function subject to bound and linear
- constraints in a trust region.
- This function solves approximately
- .. math::
- \min_{s \in \mathbb{R}^n} \quad g^{\mathsf{T}} s + \frac{1}{2}
- s^{\mathsf{T}} H s \quad \text{s.t.} \quad
- \left\{ \begin{array}{l}
- l \le s \le u,\\
- A_{\scriptscriptstyle I} s \le b_{\scriptscriptstyle I},\\
- A_{\scriptscriptstyle E} s = 0,\\
- \lVert s \rVert \le \Delta,
- \end{array} \right.
- using an active-set variation of the truncated conjugate gradient method.
- Parameters
- ----------
- grad : `numpy.ndarray`, shape (n,)
- Gradient :math:`g` as shown above.
- hess_prod : callable
- Product of the Hessian matrix :math:`H` with any vector.
- ``hess_prod(s) -> `numpy.ndarray`, shape (n,)``
- returns the product :math:`H s`.
- xl : `numpy.ndarray`, shape (n,)
- Lower bounds :math:`l` as shown above.
- xu : `numpy.ndarray`, shape (n,)
- Upper bounds :math:`u` as shown above.
- aub : `numpy.ndarray`, shape (m_linear_ub, n)
- Coefficient matrix :math:`A_{\scriptscriptstyle I}` as shown above.
- bub : `numpy.ndarray`, shape (m_linear_ub,)
- Right-hand side :math:`b_{\scriptscriptstyle I}` as shown above.
- aeq : `numpy.ndarray`, shape (m_linear_eq, n)
- Coefficient matrix :math:`A_{\scriptscriptstyle E}` as shown above.
- delta : float
- Trust-region radius :math:`\Delta` as shown above.
- debug : bool
- Whether to make debugging tests during the execution.
- Returns
- -------
- `numpy.ndarray`, shape (n,)
- Approximate solution :math:`s`.
- Other Parameters
- ----------------
- improve_tcg : bool, optional
- If True, a solution generated by the truncated conjugate gradient
- method that is on the boundary of the trust region is improved by
- moving around the trust-region boundary on the two-dimensional space
- spanned by the solution and the gradient of the quadratic function at
- the solution (default is True).
- Notes
- -----
- This function implements Algorithm 6.3 of [1]_. It is assumed that the
- origin is feasible with respect to the bound and linear constraints, and
- that `delta` is finite and positive.
- References
- ----------
- .. [1] T. M. Ragonneau. *Model-Based Derivative-Free Optimization Methods
- and Software*. PhD thesis, Department of Applied Mathematics, The Hong
- Kong Polytechnic University, Hong Kong, China, 2022. URL:
- https://theses.lib.polyu.edu.hk/handle/200/12294.
- """
- if debug:
- assert isinstance(grad, np.ndarray) and grad.ndim == 1
- assert inspect.signature(hess_prod).bind(grad)
- assert isinstance(xl, np.ndarray) and xl.shape == grad.shape
- assert isinstance(xu, np.ndarray) and xu.shape == grad.shape
- assert (
- isinstance(aub, np.ndarray)
- and aub.ndim == 2
- and aub.shape[1] == grad.size
- )
- assert (
- isinstance(bub, np.ndarray)
- and bub.ndim == 1
- and bub.size == aub.shape[0]
- )
- assert (
- isinstance(aeq, np.ndarray)
- and aeq.ndim == 2
- and aeq.shape[1] == grad.size
- )
- assert isinstance(delta, float)
- assert isinstance(debug, bool)
- tol = get_arrays_tol(xl, xu)
- assert np.all(xl <= tol)
- assert np.all(xu >= -tol)
- assert np.all(bub >= -tol)
- assert np.isfinite(delta) and delta > 0.0
- xl = np.minimum(xl, 0.0)
- xu = np.maximum(xu, 0.0)
- bub = np.maximum(bub, 0.0)
- # Copy the arrays that may be modified by the code below.
- n = grad.size
- grad = np.copy(grad)
- grad_orig = np.copy(grad)
- # Calculate the initial active set.
- free_xl = (xl < 0.0) | (grad < 0.0)
- free_xu = (xu > 0.0) | (grad > 0.0)
- free_ub = (bub > 0.0) | (aub @ grad > 0.0)
- n_act, q = qr_tangential_byrd_omojokun(aub, aeq, free_xl, free_xu, free_ub)
- # Set the initial iterate and the initial search direction.
- step = np.zeros_like(grad)
- sd = -q[:, n_act:] @ (q[:, n_act:].T @ grad)
- resid = np.copy(bub)
- k = 0
- reduct = 0.0
- boundary_reached = False
- while k < n - n_act:
- # Stop the computations if sd is not a descent direction.
- grad_sd = grad @ sd
- if grad_sd >= -10.0 * EPS * n * max(1.0, np.linalg.norm(grad)):
- break
- # Set alpha_tr to the step size for the trust-region constraint.
- try:
- alpha_tr = _alpha_tr(step, sd, delta)
- except ZeroDivisionError:
- break
- # Stop the computations if a step along sd is expected to give a
- # relatively small reduction in the objective function.
- if -alpha_tr * grad_sd <= 1e-8 * reduct:
- break
- # Set alpha_quad to the step size for the minimization problem.
- hess_sd = hess_prod(sd)
- curv_sd = sd @ hess_sd
- if curv_sd > TINY * abs(grad_sd):
- alpha_quad = max(-grad_sd / curv_sd, 0.0)
- else:
- alpha_quad = np.inf
- # Stop the computations if the reduction in the objective function
- # provided by an unconstrained step is small.
- alpha = min(alpha_tr, alpha_quad)
- if -alpha * (grad_sd + 0.5 * alpha * curv_sd) <= 1e-8 * reduct:
- break
- # Set alpha_bd to the step size for the bound constraints.
- i_xl = free_xl & (xl > -np.inf) & (sd < -TINY * np.abs(xl - step))
- i_xu = free_xu & (xu < np.inf) & (sd > TINY * np.abs(xu - step))
- all_alpha_xl = np.full_like(step, np.inf)
- all_alpha_xu = np.full_like(step, np.inf)
- all_alpha_xl[i_xl] = np.maximum(
- (xl[i_xl] - step[i_xl]) / sd[i_xl],
- 0.0,
- )
- all_alpha_xu[i_xu] = np.maximum(
- (xu[i_xu] - step[i_xu]) / sd[i_xu],
- 0.0,
- )
- alpha_xl = np.min(all_alpha_xl)
- alpha_xu = np.min(all_alpha_xu)
- alpha_bd = min(alpha_xl, alpha_xu)
- # Set alpha_ub to the step size for the linear constraints.
- aub_sd = aub @ sd
- i_ub = free_ub & (aub_sd > TINY * np.abs(resid))
- all_alpha_ub = np.full_like(bub, np.inf)
- all_alpha_ub[i_ub] = resid[i_ub] / aub_sd[i_ub]
- alpha_ub = np.min(all_alpha_ub, initial=np.inf)
- # Update the iterate.
- alpha = min(alpha, alpha_bd, alpha_ub)
- if alpha > 0.0:
- step = np.clip(step + alpha * sd, xl, xu)
- grad += alpha * hess_sd
- resid = np.maximum(0.0, resid - alpha * aub_sd)
- reduct -= alpha * (grad_sd + 0.5 * alpha * curv_sd)
- if alpha < min(alpha_tr, alpha_bd, alpha_ub):
- # The current iteration is a conjugate gradient iteration. Update
- # the search direction so that it is conjugate (with respect to H)
- # to all the previous search directions.
- grad_proj = q[:, n_act:] @ (q[:, n_act:].T @ grad)
- beta = (grad_proj @ hess_sd) / curv_sd
- sd = beta * sd - grad_proj
- k += 1
- elif alpha < alpha_tr:
- # The iterate is restricted by a bound/linear constraint. Add this
- # constraint to the active set, and restart the calculations.
- if alpha_xl <= alpha:
- i_new = np.argmin(all_alpha_xl)
- step[i_new] = xl[i_new]
- free_xl[i_new] = False
- elif alpha_xu <= alpha:
- i_new = np.argmin(all_alpha_xu)
- step[i_new] = xu[i_new]
- free_xu[i_new] = False
- else:
- i_new = np.argmin(all_alpha_ub)
- free_ub[i_new] = False
- n_act, q = qr_tangential_byrd_omojokun(
- aub,
- aeq,
- free_xl,
- free_xu,
- free_ub,
- )
- sd = -q[:, n_act:] @ (q[:, n_act:].T @ grad)
- k = 0
- else:
- # The current iterate is on the trust-region boundary. Add all the
- # active bound/linear constraints to the working set to prepare for
- # the improvement of the solution, and stop the iterations.
- if alpha_xl <= alpha:
- i_new = _argmin(all_alpha_xl)
- step[i_new] = xl[i_new]
- free_xl[i_new] = False
- if alpha_xu <= alpha:
- i_new = _argmin(all_alpha_xu)
- step[i_new] = xu[i_new]
- free_xu[i_new] = False
- if alpha_ub <= alpha:
- i_new = _argmin(all_alpha_ub)
- free_ub[i_new] = False
- n_act, q = qr_tangential_byrd_omojokun(
- aub,
- aeq,
- free_xl,
- free_xu,
- free_ub,
- )
- boundary_reached = True
- break
- # Attempt to improve the solution on the trust-region boundary.
- if kwargs.get("improve_tcg", True) and boundary_reached and n_act < n:
- step_base = np.copy(step)
- while n_act < n:
- # Check whether a substantial reduction in the objective function
- # is possible, and set the search direction.
- step_proj = q[:, n_act:] @ (q[:, n_act:].T @ step)
- grad_proj = q[:, n_act:] @ (q[:, n_act:].T @ grad)
- step_sq = step_proj @ step_proj
- grad_sq = grad_proj @ grad_proj
- grad_step = grad_proj @ step_proj
- grad_sd = -np.sqrt(max(step_sq * grad_sq - grad_step**2.0, 0.0))
- sd = q[:, n_act:] @ (
- q[:, n_act:].T @ (grad_step * step - step_sq * grad)
- )
- if grad_sd >= -1e-8 * reduct or np.any(
- grad_sd >= -TINY * np.abs(sd)
- ):
- break
- sd /= -grad_sd
- # Calculate an upper bound for the tangent of half the angle theta
- # of this alternative iteration for the bound constraints. The step
- # will be updated as:
- # step += (cos(theta) - 1) * step_proj + sin(theta) * sd.
- temp_xl = np.zeros(n)
- temp_xu = np.zeros(n)
- dist_xl = np.maximum(step - xl, 0.0)
- dist_xu = np.maximum(xu - step, 0.0)
- temp_xl[free_xl] = sd[free_xl] ** 2.0 - dist_xl[free_xl] * (
- dist_xl[free_xl] - 2.0 * step_proj[free_xl]
- )
- temp_xu[free_xu] = sd[free_xu] ** 2.0 - dist_xu[free_xu] * (
- dist_xu[free_xu] + 2.0 * step_proj[free_xu]
- )
- temp_xl[temp_xl > 0.0] = (
- np.sqrt(temp_xl[temp_xl > 0.0]) - sd[temp_xl > 0.0]
- )
- temp_xu[temp_xu > 0.0] = (
- np.sqrt(temp_xu[temp_xu > 0.0]) + sd[temp_xu > 0.0]
- )
- i_xl = temp_xl > TINY * dist_xl
- i_xu = temp_xu > TINY * dist_xu
- all_t_xl = np.ones(n)
- all_t_xu = np.ones(n)
- all_t_xl[i_xl] = np.minimum(
- all_t_xl[i_xl],
- dist_xl[i_xl] / temp_xl[i_xl],
- )
- all_t_xu[i_xu] = np.minimum(
- all_t_xu[i_xu],
- dist_xu[i_xu] / temp_xu[i_xu],
- )
- t_xl = np.min(all_t_xl)
- t_xu = np.min(all_t_xu)
- t_bd = min(t_xl, t_xu)
- # Calculate an upper bound for the tangent of half the angle theta
- # of this alternative iteration for the linear constraints.
- temp_ub = np.zeros_like(resid)
- aub_step = aub @ step_proj
- aub_sd = aub @ sd
- temp_ub[free_ub] = aub_sd[free_ub] ** 2.0 - resid[free_ub] * (
- resid[free_ub] + 2.0 * aub_step[free_ub]
- )
- temp_ub[temp_ub > 0.0] = (
- np.sqrt(temp_ub[temp_ub > 0.0]) + aub_sd[temp_ub > 0.0]
- )
- i_ub = temp_ub > TINY * resid
- all_t_ub = np.ones_like(resid)
- all_t_ub[i_ub] = np.minimum(
- all_t_ub[i_ub],
- resid[i_ub] / temp_ub[i_ub],
- )
- t_ub = np.min(all_t_ub, initial=1.0)
- t_min = min(t_bd, t_ub)
- # Calculate some curvature information.
- hess_step = hess_prod(step_proj)
- hess_sd = hess_prod(sd)
- curv_step = step_proj @ hess_step
- curv_sd = sd @ hess_sd
- curv_step_sd = step_proj @ hess_sd
- # For a range of equally spaced values of tan(0.5 * theta),
- # calculate the reduction in the objective function that would be
- # obtained by accepting the corresponding angle.
- n_samples = 20
- n_samples = int((n_samples - 3) * t_min + 3)
- t_samples = np.linspace(t_min / n_samples, t_min, n_samples)
- sin_values = 2.0 * t_samples / (1.0 + t_samples**2.0)
- all_reduct = sin_values * (
- grad_step * t_samples
- - grad_sd
- - sin_values
- * (
- 0.5 * t_samples**2.0 * curv_step
- - 2.0 * t_samples * curv_step_sd
- + 0.5 * curv_sd
- )
- )
- if np.all(all_reduct <= 0.0):
- # No reduction in the objective function is obtained.
- break
- # Accept the angle that provides the largest reduction in the
- # objective function, and update the iterate.
- i_max = np.argmax(all_reduct)
- cos_value = (1.0 - t_samples[i_max] ** 2.0) / (
- 1.0 + t_samples[i_max] ** 2.0
- )
- step = np.clip(
- step + (cos_value - 1.0) * step_proj + sin_values[i_max] * sd,
- xl,
- xu,
- )
- grad += (cos_value - 1.0) * hess_step + sin_values[i_max] * hess_sd
- resid = np.maximum(
- 0.0,
- resid
- - (cos_value - 1.0) * aub_step
- - sin_values[i_max] * aub_sd,
- )
- reduct += all_reduct[i_max]
- # If the above angle is restricted by bound constraints, add them
- # to the working set, and restart the alternative iteration.
- # Otherwise, the calculations are terminated.
- if t_min < 1.0 and i_max == n_samples - 1:
- if t_xl <= t_min:
- i_new = _argmin(all_t_xl)
- step[i_new] = xl[i_new]
- free_xl[i_new] = False
- if t_xu <= t_min:
- i_new = _argmin(all_t_xu)
- step[i_new] = xu[i_new]
- free_xl[i_new] = False
- if t_ub <= t_min:
- i_new = _argmin(all_t_ub)
- free_ub[i_new] = False
- n_act, q = qr_tangential_byrd_omojokun(
- aub,
- aeq,
- free_xl,
- free_xu,
- free_ub,
- )
- else:
- break
- # Ensure that the alternative iteration improves the objective
- # function.
- if grad_orig @ step + 0.5 * step @ hess_prod(
- step
- ) > grad_orig @ step_base + 0.5 * step_base @ hess_prod(step_base):
- step = step_base
- if debug:
- tol = get_arrays_tol(xl, xu)
- assert np.all(xl <= step)
- assert np.all(step <= xu)
- assert np.all(aub @ step <= bub + tol)
- assert np.all(np.abs(aeq @ step) <= tol)
- assert np.linalg.norm(step) < 1.1 * delta
- return step
- def normal_byrd_omojokun(aub, bub, aeq, beq, xl, xu, delta, debug, **kwargs):
- r"""
- Minimize approximately a linear constraint violation subject to bound
- constraints in a trust region.
- This function solves approximately
- .. math::
- \min_{s \in \mathbb{R}^n} \quad \frac{1}{2} \big( \lVert \max \{
- A_{\scriptscriptstyle I} s - b_{\scriptscriptstyle I}, 0 \} \rVert^2 +
- \lVert A_{\scriptscriptstyle E} s - b_{\scriptscriptstyle E} \rVert^2
- \big) \quad \text{s.t.}
- \quad
- \left\{ \begin{array}{l}
- l \le s \le u,\\
- \lVert s \rVert \le \Delta,
- \end{array} \right.
- using a variation of the truncated conjugate gradient method.
- Parameters
- ----------
- aub : `numpy.ndarray`, shape (m_linear_ub, n)
- Matrix :math:`A_{\scriptscriptstyle I}` as shown above.
- bub : `numpy.ndarray`, shape (m_linear_ub,)
- Vector :math:`b_{\scriptscriptstyle I}` as shown above.
- aeq : `numpy.ndarray`, shape (m_linear_eq, n)
- Matrix :math:`A_{\scriptscriptstyle E}` as shown above.
- beq : `numpy.ndarray`, shape (m_linear_eq,)
- Vector :math:`b_{\scriptscriptstyle E}` as shown above.
- xl : `numpy.ndarray`, shape (n,)
- Lower bounds :math:`l` as shown above.
- xu : `numpy.ndarray`, shape (n,)
- Upper bounds :math:`u` as shown above.
- delta : float
- Trust-region radius :math:`\Delta` as shown above.
- debug : bool
- Whether to make debugging tests during the execution.
- Returns
- -------
- `numpy.ndarray`, shape (n,)
- Approximate solution :math:`s`.
- Other Parameters
- ----------------
- improve_tcg : bool, optional
- If True, a solution generated by the truncated conjugate gradient
- method that is on the boundary of the trust region is improved by
- moving around the trust-region boundary on the two-dimensional space
- spanned by the solution and the gradient of the quadratic function at
- the solution (default is True).
- Notes
- -----
- This function implements Algorithm 6.4 of [1]_. It is assumed that the
- origin is feasible with respect to the bound constraints and that `delta`
- is finite and positive.
- References
- ----------
- .. [1] T. M. Ragonneau. *Model-Based Derivative-Free Optimization Methods
- and Software*. PhD thesis, Department of Applied Mathematics, The Hong
- Kong Polytechnic University, Hong Kong, China, 2022. URL:
- https://theses.lib.polyu.edu.hk/handle/200/12294.
- """
- if debug:
- assert isinstance(aub, np.ndarray) and aub.ndim == 2
- assert (
- isinstance(bub, np.ndarray)
- and bub.ndim == 1
- and bub.size == aub.shape[0]
- )
- assert (
- isinstance(aeq, np.ndarray)
- and aeq.ndim == 2
- and aeq.shape[1] == aub.shape[1]
- )
- assert (
- isinstance(beq, np.ndarray)
- and beq.ndim == 1
- and beq.size == aeq.shape[0]
- )
- assert isinstance(xl, np.ndarray) and xl.shape == (aub.shape[1],)
- assert isinstance(xu, np.ndarray) and xu.shape == (aub.shape[1],)
- assert isinstance(delta, float)
- assert isinstance(debug, bool)
- tol = get_arrays_tol(xl, xu)
- assert np.all(xl <= tol)
- assert np.all(xu >= -tol)
- assert np.isfinite(delta) and delta > 0.0
- xl = np.minimum(xl, 0.0)
- xu = np.maximum(xu, 0.0)
- # Calculate the initial active set.
- m_linear_ub, n = aub.shape
- grad = np.r_[aeq.T @ -beq, np.maximum(0.0, -bub)]
- free_xl = (xl < 0.0) | (grad[:n] < 0.0)
- free_xu = (xu > 0.0) | (grad[:n] > 0.0)
- free_slack = bub < 0.0
- free_ub = (bub > 0.0) | (aub @ grad[:n] - grad[n:] > 0.0)
- n_act, q = qr_normal_byrd_omojokun(
- aub,
- free_xl,
- free_xu,
- free_slack,
- free_ub,
- )
- # Calculate an upper bound on the norm of the slack variables. It is not
- # used in the original algorithm, but it may prevent undesired behaviors
- # engendered by computer rounding errors.
- delta_slack = np.sqrt(beq @ beq + grad[n:] @ grad[n:])
- # Set the initial iterate and the initial search direction.
- step = np.zeros(n)
- sd = -q[:, n_act:] @ (q[:, n_act:].T @ grad)
- resid = bub + grad[n:]
- k = 0
- reduct = 0.0
- boundary_reached = False
- while k < n + m_linear_ub - n_act:
- # Stop the computations if sd is not a descent direction.
- grad_sd = grad @ sd
- if grad_sd >= -10.0 * EPS * n * max(1.0, np.linalg.norm(grad)):
- break
- # Set alpha_tr to the step size for the trust-region constraint.
- try:
- alpha_tr = _alpha_tr(step, sd[:n], delta)
- except ZeroDivisionError:
- alpha_tr = np.inf
- # Prevent undesired behaviors engendered by computer rounding errors by
- # considering the trust-region constraint on the slack variables.
- try:
- alpha_tr = min(alpha_tr, _alpha_tr(grad[n:], sd[n:], delta_slack))
- except ZeroDivisionError:
- pass
- # Stop the computations if a step along sd is expected to give a
- # relatively small reduction in the objective function.
- if -alpha_tr * grad_sd <= 1e-8 * reduct:
- break
- # Set alpha_quad to the step size for the minimization problem.
- hess_sd = np.r_[aeq.T @ (aeq @ sd[:n]), sd[n:]]
- curv_sd = sd @ hess_sd
- if curv_sd > TINY * abs(grad_sd):
- alpha_quad = max(-grad_sd / curv_sd, 0.0)
- else:
- alpha_quad = np.inf
- # Stop the computations if the reduction in the objective function
- # provided by an unconstrained step is small.
- alpha = min(alpha_tr, alpha_quad)
- if -alpha * (grad_sd + 0.5 * alpha * curv_sd) <= 1e-8 * reduct:
- break
- # Set alpha_bd to the step size for the bound constraints.
- i_xl = free_xl & (xl > -np.inf) & (sd[:n] < -TINY * np.abs(xl - step))
- i_xu = free_xu & (xu < np.inf) & (sd[:n] > TINY * np.abs(xu - step))
- i_slack = free_slack & (sd[n:] < -TINY * np.abs(grad[n:]))
- all_alpha_xl = np.full_like(step, np.inf)
- all_alpha_xu = np.full_like(step, np.inf)
- all_alpha_slack = np.full_like(bub, np.inf)
- all_alpha_xl[i_xl] = np.maximum(
- (xl[i_xl] - step[i_xl]) / sd[:n][i_xl],
- 0.0,
- )
- all_alpha_xu[i_xu] = np.maximum(
- (xu[i_xu] - step[i_xu]) / sd[:n][i_xu],
- 0.0,
- )
- all_alpha_slack[i_slack] = np.maximum(
- -grad[n:][i_slack] / sd[n:][i_slack],
- 0.0,
- )
- alpha_xl = np.min(all_alpha_xl)
- alpha_xu = np.min(all_alpha_xu)
- alpha_slack = np.min(all_alpha_slack, initial=np.inf)
- alpha_bd = min(alpha_xl, alpha_xu, alpha_slack)
- # Set alpha_ub to the step size for the linear constraints.
- aub_sd = aub @ sd[:n] - sd[n:]
- i_ub = free_ub & (aub_sd > TINY * np.abs(resid))
- all_alpha_ub = np.full_like(bub, np.inf)
- all_alpha_ub[i_ub] = resid[i_ub] / aub_sd[i_ub]
- alpha_ub = np.min(all_alpha_ub, initial=np.inf)
- # Update the iterate.
- alpha = min(alpha, alpha_bd, alpha_ub)
- if alpha > 0.0:
- step = np.clip(step + alpha * sd[:n], xl, xu)
- grad += alpha * hess_sd
- resid = np.maximum(0.0, resid - alpha * aub_sd)
- reduct -= alpha * (grad_sd + 0.5 * alpha * curv_sd)
- if alpha < min(alpha_tr, alpha_bd, alpha_ub):
- # The current iteration is a conjugate gradient iteration. Update
- # the search direction so that it is conjugate (with respect to H)
- # to all the previous search directions.
- grad_proj = q[:, n_act:] @ (q[:, n_act:].T @ grad)
- beta = (grad_proj @ hess_sd) / curv_sd
- sd = beta * sd - grad_proj
- k += 1
- elif alpha < alpha_tr:
- # The iterate is restricted by a bound/linear constraint. Add this
- # constraint to the active set, and restart the calculations.
- if alpha_xl <= alpha:
- i_new = np.argmin(all_alpha_xl)
- step[i_new] = xl[i_new]
- free_xl[i_new] = False
- elif alpha_xu <= alpha:
- i_new = np.argmin(all_alpha_xu)
- step[i_new] = xu[i_new]
- free_xu[i_new] = False
- elif alpha_slack <= alpha:
- i_new = np.argmin(all_alpha_slack)
- free_slack[i_new] = False
- else:
- i_new = np.argmin(all_alpha_ub)
- free_ub[i_new] = False
- n_act, q = qr_normal_byrd_omojokun(
- aub, free_xl, free_xu, free_slack, free_ub
- )
- sd = -q[:, n_act:] @ (q[:, n_act:].T @ grad)
- k = 0
- else:
- # The current iterate is on the trust-region boundary. Add all the
- # active bound constraints to the working set to prepare for the
- # improvement of the solution, and stop the iterations.
- if alpha_xl <= alpha:
- i_new = _argmin(all_alpha_xl)
- step[i_new] = xl[i_new]
- free_xl[i_new] = False
- if alpha_xu <= alpha:
- i_new = _argmin(all_alpha_xu)
- step[i_new] = xu[i_new]
- free_xu[i_new] = False
- boundary_reached = True
- break
- # Attempt to improve the solution on the trust-region boundary.
- if kwargs.get("improve_tcg", True) and boundary_reached:
- step_base = np.copy(step)
- free_bd = free_xl & free_xu
- grad = aub.T @ np.maximum(aub @ step - bub, 0.0) + aeq.T @ (
- aeq @ step - beq
- )
- sd = np.zeros(n)
- while np.count_nonzero(free_bd) > 0:
- # Check whether a substantial reduction in the objective function
- # is possible, and set the search direction.
- step_sq = step[free_bd] @ step[free_bd]
- grad_sq = grad[free_bd] @ grad[free_bd]
- grad_step = grad[free_bd] @ step[free_bd]
- grad_sd = -np.sqrt(max(step_sq * grad_sq - grad_step**2.0, 0.0))
- sd[free_bd] = grad_step * step[free_bd] - step_sq * grad[free_bd]
- sd[~free_bd] = 0.0
- if grad_sd >= -1e-8 * reduct or np.any(
- grad_sd >= -TINY * np.abs(sd[free_bd])
- ):
- break
- sd[free_bd] /= -grad_sd
- # Calculate an upper bound for the tangent of half the angle theta
- # of this alternative iteration. The step will be updated as:
- # step = cos(theta) * step + sin(theta) * sd.
- temp_xl = np.zeros(n)
- temp_xu = np.zeros(n)
- temp_xl[free_bd] = (
- step[free_bd] ** 2.0 + sd[free_bd] ** 2.0 - xl[free_bd] ** 2.0
- )
- temp_xu[free_bd] = (
- step[free_bd] ** 2.0 + sd[free_bd] ** 2.0 - xu[free_bd] ** 2.0
- )
- temp_xl[temp_xl > 0.0] = (
- np.sqrt(temp_xl[temp_xl > 0.0]) - sd[temp_xl > 0.0]
- )
- temp_xu[temp_xu > 0.0] = (
- np.sqrt(temp_xu[temp_xu > 0.0]) + sd[temp_xu > 0.0]
- )
- dist_xl = np.maximum(step - xl, 0.0)
- dist_xu = np.maximum(xu - step, 0.0)
- i_xl = temp_xl > TINY * dist_xl
- i_xu = temp_xu > TINY * dist_xu
- all_t_xl = np.ones(n)
- all_t_xu = np.ones(n)
- all_t_xl[i_xl] = np.minimum(
- all_t_xl[i_xl],
- dist_xl[i_xl] / temp_xl[i_xl],
- )
- all_t_xu[i_xu] = np.minimum(
- all_t_xu[i_xu],
- dist_xu[i_xu] / temp_xu[i_xu],
- )
- t_xl = np.min(all_t_xl)
- t_xu = np.min(all_t_xu)
- t_bd = min(t_xl, t_xu)
- # For a range of equally spaced values of tan(0.5 * theta),
- # calculate the reduction in the objective function that would be
- # obtained by accepting the corresponding angle.
- n_samples = 20
- n_samples = int((n_samples - 3) * t_bd + 3)
- t_samples = np.linspace(t_bd / n_samples, t_bd, n_samples)
- resid_ub = np.maximum(aub @ step - bub, 0.0)
- resid_eq = aeq @ step - beq
- step_proj = np.copy(step)
- step_proj[~free_bd] = 0.0
- all_reduct = np.empty(n_samples)
- for i in range(n_samples):
- sin_value = 2.0 * t_samples[i] / (1.0 + t_samples[i] ** 2.0)
- step_alt = np.clip(
- step + sin_value * (sd - t_samples[i] * step_proj),
- xl,
- xu,
- )
- resid_ub_alt = np.maximum(aub @ step_alt - bub, 0.0)
- resid_eq_alt = aeq @ step_alt - beq
- all_reduct[i] = 0.5 * (
- resid_ub @ resid_ub
- + resid_eq @ resid_eq
- - resid_ub_alt @ resid_ub_alt
- - resid_eq_alt @ resid_eq_alt
- )
- if np.all(all_reduct <= 0.0):
- # No reduction in the objective function is obtained.
- break
- # Accept the angle that provides the largest reduction in the
- # objective function, and update the iterate.
- i_max = np.argmax(all_reduct)
- cos_value = (1.0 - t_samples[i_max] ** 2.0) / (
- 1.0 + t_samples[i_max] ** 2.0
- )
- sin_value = (2.0 * t_samples[i_max]
- / (1.0 + t_samples[i_max] ** 2.0))
- step[free_bd] = cos_value * step[free_bd] + sin_value * sd[free_bd]
- grad = aub.T @ np.maximum(aub @ step - bub, 0.0) + aeq.T @ (
- aeq @ step - beq
- )
- reduct += all_reduct[i_max]
- # If the above angle is restricted by bound constraints, add them
- # to the working set, and restart the alternative iteration.
- # Otherwise, the calculations are terminated.
- if t_bd < 1.0 and i_max == n_samples - 1:
- if t_xl <= t_bd:
- i_new = _argmin(all_t_xl)
- step[i_new] = xl[i_new]
- free_bd[i_new] = False
- if t_xu <= t_bd:
- i_new = _argmin(all_t_xu)
- step[i_new] = xu[i_new]
- free_bd[i_new] = False
- else:
- break
- # Ensure that the alternative iteration improves the objective
- # function.
- resid_ub = np.maximum(aub @ step - bub, 0.0)
- resid_ub_base = np.maximum(aub @ step_base - bub, 0.0)
- resid_eq = aeq @ step - beq
- resid_eq_base = aeq @ step_base - beq
- if (
- resid_ub @ resid_ub + resid_eq @ resid_eq
- > resid_ub_base @ resid_ub_base + resid_eq_base @ resid_eq_base
- ):
- step = step_base
- if debug:
- assert np.all(xl <= step)
- assert np.all(step <= xu)
- assert np.linalg.norm(step) < 1.1 * delta
- return step
- def qr_tangential_byrd_omojokun(aub, aeq, free_xl, free_xu, free_ub):
- n = free_xl.size
- identity = np.eye(n)
- q, r, _ = qr(
- np.block(
- [
- [aeq],
- [aub[~free_ub, :]],
- [-identity[~free_xl, :]],
- [identity[~free_xu, :]],
- ]
- ).T,
- pivoting=True,
- )
- n_act = np.count_nonzero(
- np.abs(np.diag(r))
- >= 10.0
- * EPS
- * n
- * np.linalg.norm(r[: np.min(r.shape), : np.min(r.shape)], axis=0)
- )
- return n_act, q
- def qr_normal_byrd_omojokun(aub, free_xl, free_xu, free_slack, free_ub):
- m_linear_ub, n = aub.shape
- identity_n = np.eye(n)
- identity_m = np.eye(m_linear_ub)
- q, r, _ = qr(
- np.block(
- [
- [
- aub[~free_ub, :],
- -identity_m[~free_ub, :],
- ],
- [
- np.zeros((m_linear_ub - np.count_nonzero(free_slack), n)),
- -identity_m[~free_slack, :],
- ],
- [
- -identity_n[~free_xl, :],
- np.zeros((n - np.count_nonzero(free_xl), m_linear_ub)),
- ],
- [
- identity_n[~free_xu, :],
- np.zeros((n - np.count_nonzero(free_xu), m_linear_ub)),
- ],
- ]
- ).T,
- pivoting=True,
- )
- n_act = np.count_nonzero(
- np.abs(np.diag(r))
- >= 10.0
- * EPS
- * (n + m_linear_ub)
- * np.linalg.norm(r[: np.min(r.shape), : np.min(r.shape)], axis=0)
- )
- return n_act, q
- def _alpha_tr(step, sd, delta):
- step_sd = step @ sd
- sd_sq = sd @ sd
- dist_tr_sq = delta**2.0 - step @ step
- temp = np.sqrt(max(step_sd**2.0 + sd_sq * dist_tr_sq, 0.0))
- if step_sd <= 0.0 and sd_sq > TINY * abs(temp - step_sd):
- alpha_tr = max((temp - step_sd) / sd_sq, 0.0)
- elif abs(temp + step_sd) > TINY * dist_tr_sq:
- alpha_tr = max(dist_tr_sq / (temp + step_sd), 0.0)
- else:
- raise ZeroDivisionError
- return alpha_tr
- def _argmax(x):
- return np.flatnonzero(x >= np.max(x))
- def _argmin(x):
- return np.flatnonzero(x <= np.min(x))
|