geometry.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387
  1. import inspect
  2. import numpy as np
  3. from ..utils import get_arrays_tol
  4. TINY = np.finfo(float).tiny
  5. def cauchy_geometry(const, grad, curv, xl, xu, delta, debug):
  6. r"""
  7. Maximize approximately the absolute value of a quadratic function subject
  8. to bound constraints in a trust region.
  9. This function solves approximately
  10. .. math::
  11. \max_{s \in \mathbb{R}^n} \quad \bigg\lvert c + g^{\mathsf{T}} s +
  12. \frac{1}{2} s^{\mathsf{T}} H s \bigg\rvert \quad \text{s.t.} \quad
  13. \left\{ \begin{array}{l}
  14. l \le s \le u,\\
  15. \lVert s \rVert \le \Delta,
  16. \end{array} \right.
  17. by maximizing the objective function along the constrained Cauchy
  18. direction.
  19. Parameters
  20. ----------
  21. const : float
  22. Constant :math:`c` as shown above.
  23. grad : `numpy.ndarray`, shape (n,)
  24. Gradient :math:`g` as shown above.
  25. curv : callable
  26. Curvature of :math:`H` along any vector.
  27. ``curv(s) -> float``
  28. returns :math:`s^{\mathsf{T}} H s`.
  29. xl : `numpy.ndarray`, shape (n,)
  30. Lower bounds :math:`l` as shown above.
  31. xu : `numpy.ndarray`, shape (n,)
  32. Upper bounds :math:`u` as shown above.
  33. delta : float
  34. Trust-region radius :math:`\Delta` as shown above.
  35. debug : bool
  36. Whether to make debugging tests during the execution.
  37. Returns
  38. -------
  39. `numpy.ndarray`, shape (n,)
  40. Approximate solution :math:`s`.
  41. Notes
  42. -----
  43. This function is described as the first alternative in Section 6.5 of [1]_.
  44. It is assumed that the origin is feasible with respect to the bound
  45. constraints and that `delta` is finite and positive.
  46. References
  47. ----------
  48. .. [1] T. M. Ragonneau. *Model-Based Derivative-Free Optimization Methods
  49. and Software*. PhD thesis, Department of Applied Mathematics, The Hong
  50. Kong Polytechnic University, Hong Kong, China, 2022. URL:
  51. https://theses.lib.polyu.edu.hk/handle/200/12294.
  52. """
  53. if debug:
  54. assert isinstance(const, float)
  55. assert isinstance(grad, np.ndarray) and grad.ndim == 1
  56. assert inspect.signature(curv).bind(grad)
  57. assert isinstance(xl, np.ndarray) and xl.shape == grad.shape
  58. assert isinstance(xu, np.ndarray) and xu.shape == grad.shape
  59. assert isinstance(delta, float)
  60. assert isinstance(debug, bool)
  61. tol = get_arrays_tol(xl, xu)
  62. assert np.all(xl <= tol)
  63. assert np.all(xu >= -tol)
  64. assert np.isfinite(delta) and delta > 0.0
  65. xl = np.minimum(xl, 0.0)
  66. xu = np.maximum(xu, 0.0)
  67. # To maximize the absolute value of a quadratic function, we maximize the
  68. # function itself or its negative, and we choose the solution that provides
  69. # the largest function value.
  70. step1, q_val1 = _cauchy_geom(const, grad, curv, xl, xu, delta, debug)
  71. step2, q_val2 = _cauchy_geom(
  72. -const,
  73. -grad,
  74. lambda x: -curv(x),
  75. xl,
  76. xu,
  77. delta,
  78. debug,
  79. )
  80. step = step1 if abs(q_val1) >= abs(q_val2) else step2
  81. if debug:
  82. assert np.all(xl <= step)
  83. assert np.all(step <= xu)
  84. assert np.linalg.norm(step) < 1.1 * delta
  85. return step
  86. def spider_geometry(const, grad, curv, xpt, xl, xu, delta, debug):
  87. r"""
  88. Maximize approximately the absolute value of a quadratic function subject
  89. to bound constraints in a trust region.
  90. This function solves approximately
  91. .. math::
  92. \max_{s \in \mathbb{R}^n} \quad \bigg\lvert c + g^{\mathsf{T}} s +
  93. \frac{1}{2} s^{\mathsf{T}} H s \bigg\rvert \quad \text{s.t.} \quad
  94. \left\{ \begin{array}{l}
  95. l \le s \le u,\\
  96. \lVert s \rVert \le \Delta,
  97. \end{array} \right.
  98. by maximizing the objective function along given straight lines.
  99. Parameters
  100. ----------
  101. const : float
  102. Constant :math:`c` as shown above.
  103. grad : `numpy.ndarray`, shape (n,)
  104. Gradient :math:`g` as shown above.
  105. curv : callable
  106. Curvature of :math:`H` along any vector.
  107. ``curv(s) -> float``
  108. returns :math:`s^{\mathsf{T}} H s`.
  109. xpt : `numpy.ndarray`, shape (n, npt)
  110. Points defining the straight lines. The straight lines considered are
  111. the ones passing through the origin and the points in `xpt`.
  112. xl : `numpy.ndarray`, shape (n,)
  113. Lower bounds :math:`l` as shown above.
  114. xu : `numpy.ndarray`, shape (n,)
  115. Upper bounds :math:`u` as shown above.
  116. delta : float
  117. Trust-region radius :math:`\Delta` as shown above.
  118. debug : bool
  119. Whether to make debugging tests during the execution.
  120. Returns
  121. -------
  122. `numpy.ndarray`, shape (n,)
  123. Approximate solution :math:`s`.
  124. Notes
  125. -----
  126. This function is described as the second alternative in Section 6.5 of
  127. [1]_. It is assumed that the origin is feasible with respect to the bound
  128. constraints and that `delta` is finite and positive.
  129. References
  130. ----------
  131. .. [1] T. M. Ragonneau. *Model-Based Derivative-Free Optimization Methods
  132. and Software*. PhD thesis, Department of Applied Mathematics, The Hong
  133. Kong Polytechnic University, Hong Kong, China, 2022. URL:
  134. https://theses.lib.polyu.edu.hk/handle/200/12294.
  135. """
  136. if debug:
  137. assert isinstance(const, float)
  138. assert isinstance(grad, np.ndarray) and grad.ndim == 1
  139. assert inspect.signature(curv).bind(grad)
  140. assert (
  141. isinstance(xpt, np.ndarray)
  142. and xpt.ndim == 2
  143. and xpt.shape[0] == grad.size
  144. )
  145. assert isinstance(xl, np.ndarray) and xl.shape == grad.shape
  146. assert isinstance(xu, np.ndarray) and xu.shape == grad.shape
  147. assert isinstance(delta, float)
  148. assert isinstance(debug, bool)
  149. tol = get_arrays_tol(xl, xu)
  150. assert np.all(xl <= tol)
  151. assert np.all(xu >= -tol)
  152. assert np.isfinite(delta) and delta > 0.0
  153. xl = np.minimum(xl, 0.0)
  154. xu = np.maximum(xu, 0.0)
  155. # Iterate through the straight lines.
  156. step = np.zeros_like(grad)
  157. q_val = const
  158. s_norm = np.linalg.norm(xpt, axis=0)
  159. # Set alpha_xl to the step size for the lower-bound constraint and
  160. # alpha_xu to the step size for the upper-bound constraint.
  161. # xl.shape = (N,)
  162. # xpt.shape = (N, M)
  163. # i_xl_pos.shape = (M, N)
  164. i_xl_pos = (xl > -np.inf) & (xpt.T > -TINY * xl)
  165. i_xl_neg = (xl > -np.inf) & (xpt.T < TINY * xl)
  166. i_xu_pos = (xu < np.inf) & (xpt.T > TINY * xu)
  167. i_xu_neg = (xu < np.inf) & (xpt.T < -TINY * xu)
  168. # (M, N)
  169. alpha_xl_pos = np.atleast_2d(
  170. np.broadcast_to(xl, i_xl_pos.shape)[i_xl_pos] / xpt.T[i_xl_pos]
  171. )
  172. # (M,)
  173. alpha_xl_pos = np.max(alpha_xl_pos, axis=1, initial=-np.inf)
  174. # make sure it's (M,)
  175. alpha_xl_pos = np.broadcast_to(np.atleast_1d(alpha_xl_pos), xpt.shape[1])
  176. alpha_xl_neg = np.atleast_2d(
  177. np.broadcast_to(xl, i_xl_neg.shape)[i_xl_neg] / xpt.T[i_xl_neg]
  178. )
  179. alpha_xl_neg = np.max(alpha_xl_neg, axis=1, initial=np.inf)
  180. alpha_xl_neg = np.broadcast_to(np.atleast_1d(alpha_xl_neg), xpt.shape[1])
  181. alpha_xu_neg = np.atleast_2d(
  182. np.broadcast_to(xu, i_xu_neg.shape)[i_xu_neg] / xpt.T[i_xu_neg]
  183. )
  184. alpha_xu_neg = np.max(alpha_xu_neg, axis=1, initial=-np.inf)
  185. alpha_xu_neg = np.broadcast_to(np.atleast_1d(alpha_xu_neg), xpt.shape[1])
  186. alpha_xu_pos = np.atleast_2d(
  187. np.broadcast_to(xu, i_xu_pos.shape)[i_xu_pos] / xpt.T[i_xu_pos]
  188. )
  189. alpha_xu_pos = np.max(alpha_xu_pos, axis=1, initial=np.inf)
  190. alpha_xu_pos = np.broadcast_to(np.atleast_1d(alpha_xu_pos), xpt.shape[1])
  191. for k in range(xpt.shape[1]):
  192. # Set alpha_tr to the step size for the trust-region constraint.
  193. if s_norm[k] > TINY * delta:
  194. alpha_tr = max(delta / s_norm[k], 0.0)
  195. else:
  196. # The current straight line is basically zero.
  197. continue
  198. alpha_bd_pos = max(min(alpha_xu_pos[k], alpha_xl_neg[k]), 0.0)
  199. alpha_bd_neg = min(max(alpha_xl_pos[k], alpha_xu_neg[k]), 0.0)
  200. # Set alpha_quad_pos and alpha_quad_neg to the step size to the extrema
  201. # of the quadratic function along the positive and negative directions.
  202. grad_step = grad @ xpt[:, k]
  203. curv_step = curv(xpt[:, k])
  204. if (
  205. grad_step >= 0.0
  206. and curv_step < -TINY * grad_step
  207. or grad_step <= 0.0
  208. and curv_step > -TINY * grad_step
  209. ):
  210. alpha_quad_pos = max(-grad_step / curv_step, 0.0)
  211. else:
  212. alpha_quad_pos = np.inf
  213. if (
  214. grad_step >= 0.0
  215. and curv_step > TINY * grad_step
  216. or grad_step <= 0.0
  217. and curv_step < TINY * grad_step
  218. ):
  219. alpha_quad_neg = min(-grad_step / curv_step, 0.0)
  220. else:
  221. alpha_quad_neg = -np.inf
  222. # Select the step that provides the largest value of the objective
  223. # function if it improves the current best. The best positive step is
  224. # either the one that reaches the constraints or the one that reaches
  225. # the extremum of the objective function along the current direction
  226. # (only possible if the resulting step is feasible). We test both, and
  227. # we perform similar calculations along the negative step.
  228. # N.B.: we select the largest possible step among all the ones that
  229. # maximize the objective function. This is to avoid returning the zero
  230. # step in some extreme cases.
  231. alpha_pos = min(alpha_tr, alpha_bd_pos)
  232. alpha_neg = max(-alpha_tr, alpha_bd_neg)
  233. q_val_pos = (
  234. const + alpha_pos * grad_step + 0.5 * alpha_pos**2.0 * curv_step
  235. )
  236. q_val_neg = (
  237. const + alpha_neg * grad_step + 0.5 * alpha_neg**2.0 * curv_step
  238. )
  239. if alpha_quad_pos < alpha_pos:
  240. q_val_quad_pos = (
  241. const
  242. + alpha_quad_pos * grad_step
  243. + 0.5 * alpha_quad_pos**2.0 * curv_step
  244. )
  245. if abs(q_val_quad_pos) > abs(q_val_pos):
  246. alpha_pos = alpha_quad_pos
  247. q_val_pos = q_val_quad_pos
  248. if alpha_quad_neg > alpha_neg:
  249. q_val_quad_neg = (
  250. const
  251. + alpha_quad_neg * grad_step
  252. + 0.5 * alpha_quad_neg**2.0 * curv_step
  253. )
  254. if abs(q_val_quad_neg) > abs(q_val_neg):
  255. alpha_neg = alpha_quad_neg
  256. q_val_neg = q_val_quad_neg
  257. if abs(q_val_pos) >= abs(q_val_neg) and abs(q_val_pos) > abs(q_val):
  258. step = np.clip(alpha_pos * xpt[:, k], xl, xu)
  259. q_val = q_val_pos
  260. elif abs(q_val_neg) > abs(q_val_pos) and abs(q_val_neg) > abs(q_val):
  261. step = np.clip(alpha_neg * xpt[:, k], xl, xu)
  262. q_val = q_val_neg
  263. if debug:
  264. assert np.all(xl <= step)
  265. assert np.all(step <= xu)
  266. assert np.linalg.norm(step) < 1.1 * delta
  267. return step
  268. def _cauchy_geom(const, grad, curv, xl, xu, delta, debug):
  269. """
  270. Same as `bound_constrained_cauchy_step` without the absolute value.
  271. """
  272. # Calculate the initial active set.
  273. fixed_xl = (xl < 0.0) & (grad > 0.0)
  274. fixed_xu = (xu > 0.0) & (grad < 0.0)
  275. # Calculate the Cauchy step.
  276. cauchy_step = np.zeros_like(grad)
  277. cauchy_step[fixed_xl] = xl[fixed_xl]
  278. cauchy_step[fixed_xu] = xu[fixed_xu]
  279. if np.linalg.norm(cauchy_step) > delta:
  280. working = fixed_xl | fixed_xu
  281. while True:
  282. # Calculate the Cauchy step for the directions in the working set.
  283. g_norm = np.linalg.norm(grad[working])
  284. delta_reduced = np.sqrt(
  285. delta**2.0 - cauchy_step[~working] @ cauchy_step[~working]
  286. )
  287. if g_norm > TINY * abs(delta_reduced):
  288. mu = max(delta_reduced / g_norm, 0.0)
  289. else:
  290. break
  291. cauchy_step[working] = mu * grad[working]
  292. # Update the working set.
  293. fixed_xl = working & (cauchy_step < xl)
  294. fixed_xu = working & (cauchy_step > xu)
  295. if not np.any(fixed_xl) and not np.any(fixed_xu):
  296. # Stop the calculations as the Cauchy step is now feasible.
  297. break
  298. cauchy_step[fixed_xl] = xl[fixed_xl]
  299. cauchy_step[fixed_xu] = xu[fixed_xu]
  300. working = working & ~(fixed_xl | fixed_xu)
  301. # Calculate the step that maximizes the quadratic along the Cauchy step.
  302. grad_step = grad @ cauchy_step
  303. if grad_step >= 0.0:
  304. # Set alpha_tr to the step size for the trust-region constraint.
  305. s_norm = np.linalg.norm(cauchy_step)
  306. if s_norm > TINY * delta:
  307. alpha_tr = max(delta / s_norm, 0.0)
  308. else:
  309. # The Cauchy step is basically zero.
  310. alpha_tr = 0.0
  311. # Set alpha_quad to the step size for the maximization problem.
  312. curv_step = curv(cauchy_step)
  313. if curv_step < -TINY * grad_step:
  314. alpha_quad = max(-grad_step / curv_step, 0.0)
  315. else:
  316. alpha_quad = np.inf
  317. # Set alpha_bd to the step size for the bound constraints.
  318. i_xl = (xl > -np.inf) & (cauchy_step < TINY * xl)
  319. i_xu = (xu < np.inf) & (cauchy_step > TINY * xu)
  320. alpha_xl = np.min(xl[i_xl] / cauchy_step[i_xl], initial=np.inf)
  321. alpha_xu = np.min(xu[i_xu] / cauchy_step[i_xu], initial=np.inf)
  322. alpha_bd = min(alpha_xl, alpha_xu)
  323. # Calculate the solution and the corresponding function value.
  324. alpha = min(alpha_tr, alpha_quad, alpha_bd)
  325. step = np.clip(alpha * cauchy_step, xl, xu)
  326. q_val = const + alpha * grad_step + 0.5 * alpha**2.0 * curv_step
  327. else:
  328. # This case is never reached in exact arithmetic. It prevents this
  329. # function to return a step that decreases the objective function.
  330. step = np.zeros_like(grad)
  331. q_val = const
  332. if debug:
  333. assert np.all(xl <= step)
  334. assert np.all(step <= xu)
  335. assert np.linalg.norm(step) < 1.1 * delta
  336. return step, q_val