bdf.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479
  1. import numpy as np
  2. from scipy.linalg import lu_factor, lu_solve
  3. from scipy.sparse import issparse, csc_matrix, eye
  4. from scipy.sparse.linalg import splu
  5. from scipy.optimize._numdiff import group_columns
  6. from .common import (validate_max_step, validate_tol, select_initial_step,
  7. norm, EPS, num_jac, validate_first_step,
  8. warn_extraneous)
  9. from .base import OdeSolver, DenseOutput
  10. MAX_ORDER = 5
  11. NEWTON_MAXITER = 4
  12. MIN_FACTOR = 0.2
  13. MAX_FACTOR = 10
  14. def compute_R(order, factor):
  15. """Compute the matrix for changing the differences array."""
  16. I = np.arange(1, order + 1)[:, None]
  17. J = np.arange(1, order + 1)
  18. M = np.zeros((order + 1, order + 1))
  19. M[1:, 1:] = (I - 1 - factor * J) / I
  20. M[0] = 1
  21. return np.cumprod(M, axis=0)
  22. def change_D(D, order, factor):
  23. """Change differences array in-place when step size is changed."""
  24. R = compute_R(order, factor)
  25. U = compute_R(order, 1)
  26. RU = R.dot(U)
  27. D[:order + 1] = np.dot(RU.T, D[:order + 1])
  28. def solve_bdf_system(fun, t_new, y_predict, c, psi, LU, solve_lu, scale, tol):
  29. """Solve the algebraic system resulting from BDF method."""
  30. d = 0
  31. y = y_predict.copy()
  32. dy_norm_old = None
  33. converged = False
  34. for k in range(NEWTON_MAXITER):
  35. f = fun(t_new, y)
  36. if not np.all(np.isfinite(f)):
  37. break
  38. dy = solve_lu(LU, c * f - psi - d)
  39. dy_norm = norm(dy / scale)
  40. if dy_norm_old is None:
  41. rate = None
  42. else:
  43. rate = dy_norm / dy_norm_old
  44. if (rate is not None and (rate >= 1 or
  45. rate ** (NEWTON_MAXITER - k) / (1 - rate) * dy_norm > tol)):
  46. break
  47. y += dy
  48. d += dy
  49. if (dy_norm == 0 or
  50. rate is not None and rate / (1 - rate) * dy_norm < tol):
  51. converged = True
  52. break
  53. dy_norm_old = dy_norm
  54. return converged, k + 1, y, d
  55. class BDF(OdeSolver):
  56. """Implicit method based on backward-differentiation formulas.
  57. This is a variable order method with the order varying automatically from
  58. 1 to 5. The general framework of the BDF algorithm is described in [1]_.
  59. This class implements a quasi-constant step size as explained in [2]_.
  60. The error estimation strategy for the constant-step BDF is derived in [3]_.
  61. An accuracy enhancement using modified formulas (NDF) [2]_ is also implemented.
  62. Can be applied in the complex domain.
  63. Parameters
  64. ----------
  65. fun : callable
  66. Right-hand side of the system: the time derivative of the state ``y``
  67. at time ``t``. The calling signature is ``fun(t, y)``, where ``t`` is a
  68. scalar and ``y`` is an ndarray with ``len(y) = len(y0)``. ``fun`` must
  69. return an array of the same shape as ``y``. See `vectorized` for more
  70. information.
  71. t0 : float
  72. Initial time.
  73. y0 : array_like, shape (n,)
  74. Initial state.
  75. t_bound : float
  76. Boundary time - the integration won't continue beyond it. It also
  77. determines the direction of the integration.
  78. first_step : float or None, optional
  79. Initial step size. Default is ``None`` which means that the algorithm
  80. should choose.
  81. max_step : float, optional
  82. Maximum allowed step size. Default is np.inf, i.e., the step size is not
  83. bounded and determined solely by the solver.
  84. rtol, atol : float and array_like, optional
  85. Relative and absolute tolerances. The solver keeps the local error
  86. estimates less than ``atol + rtol * abs(y)``. Here `rtol` controls a
  87. relative accuracy (number of correct digits), while `atol` controls
  88. absolute accuracy (number of correct decimal places). To achieve the
  89. desired `rtol`, set `atol` to be smaller than the smallest value that
  90. can be expected from ``rtol * abs(y)`` so that `rtol` dominates the
  91. allowable error. If `atol` is larger than ``rtol * abs(y)`` the
  92. number of correct digits is not guaranteed. Conversely, to achieve the
  93. desired `atol` set `rtol` such that ``rtol * abs(y)`` is always smaller
  94. than `atol`. If components of y have different scales, it might be
  95. beneficial to set different `atol` values for different components by
  96. passing array_like with shape (n,) for `atol`. Default values are
  97. 1e-3 for `rtol` and 1e-6 for `atol`.
  98. jac : {None, array_like, sparse_matrix, callable}, optional
  99. Jacobian matrix of the right-hand side of the system with respect to y,
  100. required by this method. The Jacobian matrix has shape (n, n) and its
  101. element (i, j) is equal to ``d f_i / d y_j``.
  102. There are three ways to define the Jacobian:
  103. * If array_like or sparse_matrix, the Jacobian is assumed to
  104. be constant.
  105. * If callable, the Jacobian is assumed to depend on both
  106. t and y; it will be called as ``jac(t, y)`` as necessary.
  107. For the 'Radau' and 'BDF' methods, the return value might be a
  108. sparse matrix.
  109. * If None (default), the Jacobian will be approximated by
  110. finite differences.
  111. It is generally recommended to provide the Jacobian rather than
  112. relying on a finite-difference approximation.
  113. jac_sparsity : {None, array_like, sparse matrix}, optional
  114. Defines a sparsity structure of the Jacobian matrix for a
  115. finite-difference approximation. Its shape must be (n, n). This argument
  116. is ignored if `jac` is not `None`. If the Jacobian has only few non-zero
  117. elements in *each* row, providing the sparsity structure will greatly
  118. speed up the computations [4]_. A zero entry means that a corresponding
  119. element in the Jacobian is always zero. If None (default), the Jacobian
  120. is assumed to be dense.
  121. vectorized : bool, optional
  122. Whether `fun` can be called in a vectorized fashion. Default is False.
  123. If ``vectorized`` is False, `fun` will always be called with ``y`` of
  124. shape ``(n,)``, where ``n = len(y0)``.
  125. If ``vectorized`` is True, `fun` may be called with ``y`` of shape
  126. ``(n, k)``, where ``k`` is an integer. In this case, `fun` must behave
  127. such that ``fun(t, y)[:, i] == fun(t, y[:, i])`` (i.e. each column of
  128. the returned array is the time derivative of the state corresponding
  129. with a column of ``y``).
  130. Setting ``vectorized=True`` allows for faster finite difference
  131. approximation of the Jacobian by this method, but may result in slower
  132. execution overall in some circumstances (e.g. small ``len(y0)``).
  133. Attributes
  134. ----------
  135. n : int
  136. Number of equations.
  137. status : string
  138. Current status of the solver: 'running', 'finished' or 'failed'.
  139. t_bound : float
  140. Boundary time.
  141. direction : float
  142. Integration direction: +1 or -1.
  143. t : float
  144. Current time.
  145. y : ndarray
  146. Current state.
  147. t_old : float
  148. Previous time. None if no steps were made yet.
  149. step_size : float
  150. Size of the last successful step. None if no steps were made yet.
  151. nfev : int
  152. Number of evaluations of the right-hand side.
  153. njev : int
  154. Number of evaluations of the Jacobian.
  155. nlu : int
  156. Number of LU decompositions.
  157. References
  158. ----------
  159. .. [1] G. D. Byrne, A. C. Hindmarsh, "A Polyalgorithm for the Numerical
  160. Solution of Ordinary Differential Equations", ACM Transactions on
  161. Mathematical Software, Vol. 1, No. 1, pp. 71-96, March 1975.
  162. .. [2] L. F. Shampine, M. W. Reichelt, "THE MATLAB ODE SUITE", SIAM J. SCI.
  163. COMPUTE., Vol. 18, No. 1, pp. 1-22, January 1997.
  164. .. [3] E. Hairer, G. Wanner, "Solving Ordinary Differential Equations I:
  165. Nonstiff Problems", Sec. III.2.
  166. .. [4] A. Curtis, M. J. D. Powell, and J. Reid, "On the estimation of
  167. sparse Jacobian matrices", Journal of the Institute of Mathematics
  168. and its Applications, 13, pp. 117-120, 1974.
  169. """
  170. def __init__(self, fun, t0, y0, t_bound, max_step=np.inf,
  171. rtol=1e-3, atol=1e-6, jac=None, jac_sparsity=None,
  172. vectorized=False, first_step=None, **extraneous):
  173. warn_extraneous(extraneous)
  174. super().__init__(fun, t0, y0, t_bound, vectorized,
  175. support_complex=True)
  176. self.max_step = validate_max_step(max_step)
  177. self.rtol, self.atol = validate_tol(rtol, atol, self.n)
  178. f = self.fun(self.t, self.y)
  179. if first_step is None:
  180. self.h_abs = select_initial_step(self.fun, self.t, self.y,
  181. t_bound, max_step, f,
  182. self.direction, 1,
  183. self.rtol, self.atol)
  184. else:
  185. self.h_abs = validate_first_step(first_step, t0, t_bound)
  186. self.h_abs_old = None
  187. self.error_norm_old = None
  188. self.newton_tol = max(10 * EPS / rtol, min(0.03, rtol ** 0.5))
  189. self.jac_factor = None
  190. self.jac, self.J = self._validate_jac(jac, jac_sparsity)
  191. if issparse(self.J):
  192. def lu(A):
  193. self.nlu += 1
  194. return splu(A)
  195. def solve_lu(LU, b):
  196. return LU.solve(b)
  197. I = eye(self.n, format='csc', dtype=self.y.dtype)
  198. else:
  199. def lu(A):
  200. self.nlu += 1
  201. return lu_factor(A, overwrite_a=True)
  202. def solve_lu(LU, b):
  203. return lu_solve(LU, b, overwrite_b=True)
  204. I = np.identity(self.n, dtype=self.y.dtype)
  205. self.lu = lu
  206. self.solve_lu = solve_lu
  207. self.I = I
  208. kappa = np.array([0, -0.1850, -1/9, -0.0823, -0.0415, 0])
  209. self.gamma = np.hstack((0, np.cumsum(1 / np.arange(1, MAX_ORDER + 1))))
  210. self.alpha = (1 - kappa) * self.gamma
  211. self.error_const = kappa * self.gamma + 1 / np.arange(1, MAX_ORDER + 2)
  212. D = np.empty((MAX_ORDER + 3, self.n), dtype=self.y.dtype)
  213. D[0] = self.y
  214. D[1] = f * self.h_abs * self.direction
  215. self.D = D
  216. self.order = 1
  217. self.n_equal_steps = 0
  218. self.LU = None
  219. def _validate_jac(self, jac, sparsity):
  220. t0 = self.t
  221. y0 = self.y
  222. if jac is None:
  223. if sparsity is not None:
  224. if issparse(sparsity):
  225. sparsity = csc_matrix(sparsity)
  226. groups = group_columns(sparsity)
  227. sparsity = (sparsity, groups)
  228. def jac_wrapped(t, y):
  229. self.njev += 1
  230. f = self.fun_single(t, y)
  231. J, self.jac_factor = num_jac(self.fun_vectorized, t, y, f,
  232. self.atol, self.jac_factor,
  233. sparsity)
  234. return J
  235. J = jac_wrapped(t0, y0)
  236. elif callable(jac):
  237. J = jac(t0, y0)
  238. self.njev += 1
  239. if issparse(J):
  240. J = csc_matrix(J, dtype=y0.dtype)
  241. def jac_wrapped(t, y):
  242. self.njev += 1
  243. return csc_matrix(jac(t, y), dtype=y0.dtype)
  244. else:
  245. J = np.asarray(J, dtype=y0.dtype)
  246. def jac_wrapped(t, y):
  247. self.njev += 1
  248. return np.asarray(jac(t, y), dtype=y0.dtype)
  249. if J.shape != (self.n, self.n):
  250. raise ValueError(f"`jac` is expected to have shape {(self.n, self.n)},"
  251. f" but actually has {J.shape}.")
  252. else:
  253. if issparse(jac):
  254. J = csc_matrix(jac, dtype=y0.dtype)
  255. else:
  256. J = np.asarray(jac, dtype=y0.dtype)
  257. if J.shape != (self.n, self.n):
  258. raise ValueError(f"`jac` is expected to have shape {(self.n, self.n)},"
  259. f" but actually has {J.shape}.")
  260. jac_wrapped = None
  261. return jac_wrapped, J
  262. def _step_impl(self):
  263. t = self.t
  264. D = self.D
  265. max_step = self.max_step
  266. min_step = 10 * np.abs(np.nextafter(t, self.direction * np.inf) - t)
  267. if self.h_abs > max_step:
  268. h_abs = max_step
  269. change_D(D, self.order, max_step / self.h_abs)
  270. self.n_equal_steps = 0
  271. elif self.h_abs < min_step:
  272. h_abs = min_step
  273. change_D(D, self.order, min_step / self.h_abs)
  274. self.n_equal_steps = 0
  275. else:
  276. h_abs = self.h_abs
  277. atol = self.atol
  278. rtol = self.rtol
  279. order = self.order
  280. alpha = self.alpha
  281. gamma = self.gamma
  282. error_const = self.error_const
  283. J = self.J
  284. LU = self.LU
  285. current_jac = self.jac is None
  286. step_accepted = False
  287. while not step_accepted:
  288. if h_abs < min_step:
  289. return False, self.TOO_SMALL_STEP
  290. h = h_abs * self.direction
  291. t_new = t + h
  292. if self.direction * (t_new - self.t_bound) > 0:
  293. t_new = self.t_bound
  294. change_D(D, order, np.abs(t_new - t) / h_abs)
  295. self.n_equal_steps = 0
  296. LU = None
  297. h = t_new - t
  298. h_abs = np.abs(h)
  299. y_predict = np.sum(D[:order + 1], axis=0)
  300. scale = atol + rtol * np.abs(y_predict)
  301. psi = np.dot(D[1: order + 1].T, gamma[1: order + 1]) / alpha[order]
  302. converged = False
  303. c = h / alpha[order]
  304. while not converged:
  305. if LU is None:
  306. LU = self.lu(self.I - c * J)
  307. converged, n_iter, y_new, d = solve_bdf_system(
  308. self.fun, t_new, y_predict, c, psi, LU, self.solve_lu,
  309. scale, self.newton_tol)
  310. if not converged:
  311. if current_jac:
  312. break
  313. J = self.jac(t_new, y_predict)
  314. LU = None
  315. current_jac = True
  316. if not converged:
  317. factor = 0.5
  318. h_abs *= factor
  319. change_D(D, order, factor)
  320. self.n_equal_steps = 0
  321. LU = None
  322. continue
  323. safety = 0.9 * (2 * NEWTON_MAXITER + 1) / (2 * NEWTON_MAXITER
  324. + n_iter)
  325. scale = atol + rtol * np.abs(y_new)
  326. error = error_const[order] * d
  327. error_norm = norm(error / scale)
  328. if error_norm > 1:
  329. factor = max(MIN_FACTOR,
  330. safety * error_norm ** (-1 / (order + 1)))
  331. h_abs *= factor
  332. change_D(D, order, factor)
  333. self.n_equal_steps = 0
  334. # As we didn't have problems with convergence, we don't
  335. # reset LU here.
  336. else:
  337. step_accepted = True
  338. self.n_equal_steps += 1
  339. self.t = t_new
  340. self.y = y_new
  341. self.h_abs = h_abs
  342. self.J = J
  343. self.LU = LU
  344. # Update differences. The principal relation here is
  345. # D^{j + 1} y_n = D^{j} y_n - D^{j} y_{n - 1}. Keep in mind that D
  346. # contained difference for previous interpolating polynomial and
  347. # d = D^{k + 1} y_n. Thus this elegant code follows.
  348. D[order + 2] = d - D[order + 1]
  349. D[order + 1] = d
  350. for i in reversed(range(order + 1)):
  351. D[i] += D[i + 1]
  352. if self.n_equal_steps < order + 1:
  353. return True, None
  354. if order > 1:
  355. error_m = error_const[order - 1] * D[order]
  356. error_m_norm = norm(error_m / scale)
  357. else:
  358. error_m_norm = np.inf
  359. if order < MAX_ORDER:
  360. error_p = error_const[order + 1] * D[order + 2]
  361. error_p_norm = norm(error_p / scale)
  362. else:
  363. error_p_norm = np.inf
  364. error_norms = np.array([error_m_norm, error_norm, error_p_norm])
  365. with np.errstate(divide='ignore'):
  366. factors = error_norms ** (-1 / np.arange(order, order + 3))
  367. delta_order = np.argmax(factors) - 1
  368. order += delta_order
  369. self.order = order
  370. factor = min(MAX_FACTOR, safety * np.max(factors))
  371. self.h_abs *= factor
  372. change_D(D, order, factor)
  373. self.n_equal_steps = 0
  374. self.LU = None
  375. return True, None
  376. def _dense_output_impl(self):
  377. return BdfDenseOutput(self.t_old, self.t, self.h_abs * self.direction,
  378. self.order, self.D[:self.order + 1].copy())
  379. class BdfDenseOutput(DenseOutput):
  380. def __init__(self, t_old, t, h, order, D):
  381. super().__init__(t_old, t)
  382. self.order = order
  383. self.t_shift = self.t - h * np.arange(self.order)
  384. self.denom = h * (1 + np.arange(self.order))
  385. self.D = D
  386. def _call_impl(self, t):
  387. if t.ndim == 0:
  388. x = (t - self.t_shift) / self.denom
  389. p = np.cumprod(x)
  390. else:
  391. x = (t - self.t_shift[:, None]) / self.denom[:, None]
  392. p = np.cumprod(x, axis=0)
  393. y = np.dot(self.D[1:].T, p)
  394. if y.ndim == 1:
  395. y += self.D[0]
  396. else:
  397. y += self.D[0, :, None]
  398. return y