_svdp.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291
  1. __all__ = ['_svdp']
  2. import numpy as np
  3. from scipy.sparse.linalg import aslinearoperator
  4. from scipy.linalg import LinAlgError
  5. from . import _propack # type: ignore[attr-defined]
  6. _lansvd_dict = {
  7. 'f': _propack.slansvd,
  8. 'd': _propack.dlansvd,
  9. 'F': _propack.clansvd,
  10. 'D': _propack.zlansvd,
  11. }
  12. _lansvd_irl_dict = {
  13. 'f': _propack.slansvd_irl,
  14. 'd': _propack.dlansvd_irl,
  15. 'F': _propack.clansvd_irl,
  16. 'D': _propack.zlansvd_irl,
  17. }
  18. _which_converter = {
  19. 'LM': 1,
  20. 'SM': 0,
  21. }
  22. class _AProd:
  23. """
  24. Wrapper class for linear operator
  25. The call signature of the __call__ method matches the callback of
  26. the PROPACK routines.
  27. """
  28. def __init__(self, A):
  29. try:
  30. self.A = aslinearoperator(A)
  31. except TypeError:
  32. self.A = aslinearoperator(np.asarray(A))
  33. def __call__(self, transa, m, n, x, y):
  34. if transa == 0:
  35. y[:] = self.A.matvec(x)
  36. else:
  37. y[:] = self.A.rmatvec(x)
  38. @property
  39. def shape(self):
  40. return self.A.shape
  41. @property
  42. def dtype(self):
  43. try:
  44. return self.A.dtype
  45. except AttributeError:
  46. return self.A.matvec(np.zeros(self.A.shape[1])).dtype
  47. def _svdp(A, k, which='LM', irl_mode=True, kmax=None,
  48. compute_u=True, compute_v=True, v0=None, full_output=False, tol=0,
  49. delta=None, eta=None, anorm=0, cgs=False, elr=True,
  50. min_relgap=0.002, shifts=None, maxiter=None, rng=None):
  51. """
  52. Compute the singular value decomposition of a linear operator using PROPACK
  53. Parameters
  54. ----------
  55. A : array_like, sparse matrix, or LinearOperator
  56. Operator for which SVD will be computed. If `A` is a LinearOperator
  57. object, it must define both ``matvec`` and ``rmatvec`` methods.
  58. k : int
  59. Number of singular values/vectors to compute
  60. which : {"LM", "SM"}
  61. Which singular triplets to compute:
  62. - 'LM': compute triplets corresponding to the `k` largest singular
  63. values
  64. - 'SM': compute triplets corresponding to the `k` smallest singular
  65. values
  66. `which='SM'` requires `irl_mode=True`. Computes largest singular
  67. values by default.
  68. irl_mode : bool, optional
  69. If `True`, then compute SVD using IRL (implicitly restarted Lanczos)
  70. mode. Default is `True`.
  71. kmax : int, optional
  72. Maximal number of iterations / maximal dimension of the Krylov
  73. subspace. Default is ``10 * k``.
  74. compute_u : bool, optional
  75. If `True` (default) then compute left singular vectors, `u`.
  76. compute_v : bool, optional
  77. If `True` (default) then compute right singular vectors, `v`.
  78. tol : float, optional
  79. The desired relative accuracy for computed singular values.
  80. If not specified, it will be set based on machine precision.
  81. v0 : array_like, optional
  82. Starting vector for iterations: must be of length ``A.shape[0]``.
  83. If not specified, PROPACK will generate a starting vector.
  84. full_output : bool, optional
  85. If `True`, then return sigma_bound. Default is `False`.
  86. delta : float, optional
  87. Level of orthogonality to maintain between Lanczos vectors.
  88. Default is set based on machine precision.
  89. eta : float, optional
  90. Orthogonality cutoff. During reorthogonalization, vectors with
  91. component larger than `eta` along the Lanczos vector will be purged.
  92. Default is set based on machine precision.
  93. anorm : float, optional
  94. Estimate of ``||A||``. Default is ``0``.
  95. cgs : bool, optional
  96. If `True`, reorthogonalization is done using classical Gram-Schmidt.
  97. If `False` (default), it is done using modified Gram-Schmidt.
  98. elr : bool, optional
  99. If `True` (default), then extended local orthogonality is enforced
  100. when obtaining singular vectors.
  101. min_relgap : float, optional
  102. The smallest relative gap allowed between any shift in IRL mode.
  103. Default is ``0.001``. Accessed only if ``irl_mode=True``.
  104. shifts : int, optional
  105. Number of shifts per restart in IRL mode. Default is determined
  106. to satisfy ``k <= min(kmax-shifts, m, n)``. Must be
  107. >= 0, but choosing 0 might lead to performance degradation.
  108. Accessed only if ``irl_mode=True``.
  109. maxiter : int, optional
  110. Maximum number of restarts in IRL mode. Default is ``1000``.
  111. Accessed only if ``irl_mode=True``.
  112. rng : `numpy.random.Generator`, optional
  113. Pseudorandom number generator state. When `rng` is None, a new
  114. `numpy.random.Generator` is created using entropy from the
  115. operating system. Types other than `numpy.random.Generator` are
  116. passed to `numpy.random.default_rng` to instantiate a ``Generator``.
  117. Returns
  118. -------
  119. u : ndarray
  120. The `k` largest (``which="LM"``) or smallest (``which="SM"``) left
  121. singular vectors, ``shape == (A.shape[0], 3)``, returned only if
  122. ``compute_u=True``.
  123. sigma : ndarray
  124. The top `k` singular values, ``shape == (k,)``
  125. vt : ndarray
  126. The `k` largest (``which="LM"``) or smallest (``which="SM"``) right
  127. singular vectors, ``shape == (3, A.shape[1])``, returned only if
  128. ``compute_v=True``.
  129. sigma_bound : ndarray
  130. the error bounds on the singular values sigma, returned only if
  131. ``full_output=True``.
  132. """
  133. if rng is None:
  134. raise ValueError("`rng` must be a normalized numpy.random.Generator instance")
  135. which = which.upper()
  136. if which not in {'LM', 'SM'}:
  137. raise ValueError("`which` must be either 'LM' or 'SM'")
  138. if not irl_mode and which == 'SM':
  139. raise ValueError("`which`='SM' requires irl_mode=True")
  140. aprod = _AProd(A)
  141. typ = aprod.dtype.char
  142. try:
  143. lansvd_irl = _lansvd_irl_dict[typ]
  144. lansvd = _lansvd_dict[typ]
  145. except KeyError:
  146. # work with non-supported types using native system precision
  147. if np.iscomplexobj(np.empty(0, dtype=typ)):
  148. typ = 'D'
  149. else:
  150. typ = 'd'
  151. lansvd_irl = _lansvd_irl_dict[typ]
  152. lansvd = _lansvd_dict[typ]
  153. m, n = aprod.shape
  154. if (k < 1) or (k > min(m, n)):
  155. raise ValueError("k must be positive and not greater than m or n")
  156. if kmax is None:
  157. kmax = 10*k
  158. if maxiter is None:
  159. maxiter = 1000
  160. # guard against unnecessarily large kmax
  161. kmax = min(m + 1, n + 1, kmax)
  162. if kmax < k:
  163. raise ValueError(
  164. "kmax must be greater than or equal to k, "
  165. f"but kmax ({kmax}) < k ({k})")
  166. # convert python args to fortran args
  167. jobu = 1 if compute_u else 0
  168. jobv = 1 if compute_v else 0
  169. # these will be the output arrays
  170. u = np.zeros((m, kmax + 1), order='F', dtype=typ)
  171. v = np.zeros((n, kmax), order='F', dtype=typ)
  172. sigma = np.zeros(k, order='F', dtype=typ.lower())
  173. bnd = np.zeros(k, order='F', dtype=typ.lower())
  174. # Specify the starting vector. if v0 is all zero, PROPACK will generate
  175. # a random starting vector: the random seed cannot be controlled in that
  176. # case, so we'll instead use numpy to generate a random vector
  177. if v0 is None:
  178. u[:, 0] = rng.uniform(size=m)
  179. if np.iscomplexobj(np.empty(0, dtype=typ)): # complex type
  180. u[:, 0] += 1j * rng.uniform(size=m)
  181. else:
  182. try:
  183. u[:, 0] = v0
  184. except ValueError:
  185. raise ValueError(f"v0 must be of length {m}")
  186. # process options for the fit
  187. if delta is None:
  188. delta = np.sqrt(np.finfo(typ).eps)
  189. if eta is None:
  190. eta = np.finfo(typ).eps ** 0.75
  191. if irl_mode:
  192. doption = np.array((delta, eta, anorm, min_relgap), dtype=typ.lower())
  193. # validate or find default shifts
  194. if shifts is None:
  195. shifts = kmax - k
  196. if k > min(kmax - shifts, m, n):
  197. raise ValueError('shifts must satisfy '
  198. 'k <= min(kmax-shifts, m, n)!')
  199. elif shifts < 0:
  200. raise ValueError('shifts must be >= 0!')
  201. else:
  202. doption = np.array((delta, eta, anorm), dtype=typ.lower())
  203. ioption = np.array((int(bool(cgs)), int(bool(elr))), dtype='i')
  204. # PROPACK uses a few LAPACK functions that require sufficiently large
  205. # work arrays to utilize BLAS level 3 operations. In almost all relevant
  206. # architectures, the blocksize is 32 or 64. We use 32 here to be on
  207. # the conservative side.
  208. NB = 32
  209. # Determine lwork & liwork:
  210. # the required lengths are specified in the PROPACK documentation
  211. if compute_u or compute_v:
  212. lwork = m + n + 5*kmax**2 + 9*kmax + 4
  213. lwork += max(3*kmax**2 + 4*kmax + 4, NB*max(m, n))
  214. liwork = 8*kmax
  215. else:
  216. lwork = m + n + 9*kmax + 2*kmax**2 + 4 + max(m + n, 4*kmax + 4)
  217. liwork = 2*kmax + 2
  218. work = np.empty(lwork, dtype=typ.lower())
  219. iwork = np.empty(liwork, dtype=np.int32)
  220. # dummy arguments: these are passed to aprod, and not used in this wrapper
  221. dparm = np.empty(1, dtype=typ.lower())
  222. iparm = np.empty(1, dtype=np.int32)
  223. if typ.isupper():
  224. zwork = np.empty(m + n + kmax, dtype=typ)
  225. works = work, zwork, iwork
  226. else:
  227. works = work, iwork
  228. # Generate the seed for the PROPACK random float generator.
  229. rng_state = rng.integers(low=0, high=np.iinfo(np.int64).max,
  230. size=4, dtype=np.uint64)
  231. if irl_mode:
  232. info = lansvd_irl(_which_converter[which], jobu,
  233. jobv, m, n, shifts, k, maxiter, tol,
  234. aprod, u, sigma, bnd, v, *works, doption,
  235. ioption, dparm, iparm, rng_state)
  236. else:
  237. info = lansvd(jobu, jobv, m, n, k, kmax, tol, aprod, u, sigma, bnd, v,
  238. *works, doption, ioption, dparm, iparm, rng_state)
  239. if info > 0:
  240. raise LinAlgError(
  241. f"An invariant subspace of dimension {info} was found.")
  242. elif info < 0:
  243. raise LinAlgError(
  244. f"k={k} singular triplets did not converge within "
  245. f"kmax={kmax} iterations")
  246. # info == 0: The K largest (or smallest) singular triplets were computed
  247. # successfully!
  248. return u[:, :k], sigma, v[:, :k].conj().T, bnd