_construct.py 58 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709
  1. """Functions to construct sparse matrices and arrays
  2. """
  3. __docformat__ = "restructuredtext en"
  4. __all__ = ['spdiags', 'eye', 'identity', 'kron', 'kronsum',
  5. 'hstack', 'vstack', 'bmat', 'rand', 'random', 'diags', 'block_diag',
  6. 'diags_array', 'block_array', 'eye_array', 'random_array',
  7. 'expand_dims', 'permute_dims', 'swapaxes']
  8. import numbers
  9. import math
  10. import os
  11. import sys
  12. import warnings
  13. import numpy as np
  14. from scipy._lib._util import check_random_state, rng_integers, _transition_to_rng
  15. from scipy._lib.deprecation import _NoValue
  16. from ._sputils import upcast, get_index_dtype, isscalarlike, isintlike
  17. from ._sparsetools import csr_hstack
  18. from ._bsr import bsr_matrix, bsr_array
  19. from ._coo import coo_matrix, coo_array
  20. from ._csc import csc_matrix, csc_array
  21. from ._csr import csr_matrix, csr_array
  22. from ._dia import dia_matrix, dia_array
  23. from ._base import issparse, sparray
  24. def expand_dims(A, /, *, axis=0):
  25. """
  26. Add trivial axes to an array. Shape gets a ``1`` inserted at position `axis`.
  27. Parameters
  28. ----------
  29. A : sparse array
  30. axis : int
  31. Position in the expanded axes where the new axis (or axes) is placed.
  32. For a dimension ``N`` array, a valid axis is an integer on the
  33. closed-interval ``[-N-1, N]``. Negative values work from the end of
  34. the shape. ``0`` prepends an axis, as does ``-N-1``. ``-1`` appends
  35. an axis, as does ``N``. The new axis has shape ``1`` and indices are
  36. created with the value ``0``.
  37. Returns
  38. -------
  39. out : sparse array
  40. A expanded copy output in COO format with the same dtype as `A`.
  41. Raises
  42. ------
  43. ValueError
  44. If provided a non-integer or out of range ``[-N-1, N]`` axis,
  45. where ``N`` is ``A.ndim``.
  46. Examples
  47. --------
  48. >>> from scipy.sparse import csr_array, expand_dims
  49. >>> A = csr_array([[1, 2], [2, 0]])
  50. >>> A.shape
  51. (2, 2)
  52. >>> expand_dims(A, axis=1).shape
  53. (2, 1, 2)
  54. """
  55. if not isintlike(axis):
  56. raise ValueError(f"Invalid axis {axis}. Must be an integer.")
  57. idx = axis if axis >= 0 else axis + A.ndim + 1
  58. if idx < 0 or idx > A.ndim:
  59. raise ValueError(f"Invalid axis {axis} for N={A.ndim}. Must be in [-N-1, N].")
  60. newA = A.tocoo(copy=True)
  61. new_coord = np.zeros_like(newA.coords[0])
  62. newA.coords = newA.coords[:idx] + (new_coord,) + newA.coords[idx:]
  63. newA._shape = newA.shape[:idx] + (1,) + newA.shape[idx:]
  64. if A.format == "coo":
  65. newA.has_canonical_format = A.has_canonical_format
  66. return newA
  67. def swapaxes(A, axis1, axis2):
  68. """Interchange two axes of an array.
  69. Parameters
  70. ----------
  71. A : sparse array
  72. axis1 : int
  73. First axis.
  74. axis2 : int
  75. Second axis.
  76. Returns
  77. -------
  78. a_swapped : sparse array in COO format
  79. A copy of the input array with the two identified axes swapped.
  80. Raises
  81. ------
  82. ValueError
  83. If provided a non-integer or out of range ``[-N, N-1]`` axis,
  84. where ``N`` is ``A.ndim``.
  85. Examples
  86. --------
  87. >>> from scipy.sparse import coo_array, swapaxes
  88. >>> A = coo_array([[[1, 2, 3], [2, 0, 0]]])
  89. >>> A.shape
  90. (1, 2, 3)
  91. >>> swapaxes(A, 1, 2).shape
  92. (1, 3, 2)
  93. """
  94. axes = np.arange(A.ndim)
  95. try:
  96. axes[[axis1, axis2]] = axes[[axis2, axis1]]
  97. except IndexError as err:
  98. # original msg looks like: "index -4 is out of bounds for axis 0 with size 2"
  99. msg = str(err)
  100. msg = msg.removeprefix('index ').split(' axis ', 1)[0]
  101. # Final error is: "Invalid axis: -4 is out of bounds for ndim=2"
  102. raise ValueError(f"Invalid axis: {msg} ndim={A.ndim}")
  103. axes = tuple(axes)
  104. return permute_dims(A, axes=axes, copy=True)
  105. def permute_dims(A, axes=None, copy=False):
  106. """Permute the axes of the sparse array `A` to the order `axes`.
  107. Parameters
  108. ----------
  109. A : sparse array
  110. axes : tuple or list of ints, optional
  111. If specified, it must be a tuple or list which contains a permutation
  112. of ``[0, 1, ..., N-1]`` where ``N`` is ``A.ndim``. The ith
  113. axis of the returned array will correspond to the axis numbered ``axes[i]``
  114. of the input. If not specified, defaults to ``range(A.ndim)[::-1]``,
  115. which reverses the order of the axes.
  116. copy : bool, optional (default: False)
  117. Whether to return the permutation as a copy. If False, an in-place
  118. permutation is provided if possible depending on format.
  119. Returns
  120. -------
  121. out : sparse array in COO format
  122. A copy of `A` with permuted axes.
  123. Raises
  124. ------
  125. ValueError
  126. If provided a non-integer or out of range ``[-N, N-1]`` axis,
  127. where ``N`` is ``A.ndim``.
  128. Examples
  129. --------
  130. >>> from scipy.sparse import coo_array, permute_dims
  131. >>> A = coo_array([[[1, 2, 3], [2, 0, 0]]])
  132. >>> A.shape
  133. (1, 2, 3)
  134. >>> permute_dims(A, axes=(1, 2, 0)).shape
  135. (2, 3, 1)
  136. """
  137. ndim = A.ndim
  138. if axes is None:
  139. axes = tuple(range(ndim)[::-1])
  140. elif len(axes) != ndim:
  141. raise ValueError(f"Incorrect number of axes: {ndim} instead of {A.ndim}")
  142. # -------------This is from _sputils.validateaxis which almost does what we want
  143. # TODO stop _sputils.validateaxis from returning `None` when len(axes)==ndim
  144. if not isinstance(axes, (tuple, list)):
  145. # If not a tuple, check that the provided axes is actually
  146. # an integer and raise a TypeError similar to NumPy's
  147. if not np.issubdtype(np.dtype(type(axes)), np.integer):
  148. raise TypeError(f'axis must be an integer/tuple of ints, not {type(axes)}')
  149. axes = (axes,)
  150. canon_axes = []
  151. for ax in axes:
  152. if not isintlike(ax):
  153. raise TypeError(f"axis must be an integer. (given {ax})")
  154. if ax < 0:
  155. ax += ndim
  156. if ax < 0 or ax >= ndim:
  157. raise ValueError("axis out of range for ndim")
  158. canon_axes.append(ax)
  159. if len(canon_axes) != len(set(canon_axes)):
  160. raise ValueError("duplicate value in axis")
  161. # -------------End of code from _sputils.validateaxis
  162. axes = canon_axes
  163. if axes == list(range(ndim)):
  164. return A if not copy else A.copy()
  165. A = A.tocoo(copy=copy)
  166. A._shape = tuple(A.shape[idx] for idx in axes)
  167. A.coords = tuple(A.coords[idx] for idx in axes)
  168. A.has_canonical_format = False # data usually no longer sorted
  169. return A
  170. def spdiags(data, diags, m=None, n=None, format=None):
  171. """
  172. Return a sparse matrix from diagonals.
  173. .. warning::
  174. This function returns a sparse matrix -- not a sparse array.
  175. You are encouraged to use `dia_array` to take advantage
  176. of the sparse array functionality. (See Notes below.)
  177. Parameters
  178. ----------
  179. data : array_like
  180. Matrix diagonals stored row-wise
  181. diags : sequence of int or an int
  182. Diagonals to set:
  183. * k = 0 the main diagonal
  184. * k > 0 the kth upper diagonal
  185. * k < 0 the kth lower diagonal
  186. m, n : int, tuple, optional
  187. Shape of the result. If `n` is None and `m` is a given tuple,
  188. the shape is this tuple. If omitted, the matrix is square and
  189. its shape is ``len(data[0])``.
  190. format : str, optional
  191. Format of the result. By default (format=None) an appropriate sparse
  192. matrix format is returned. This choice is subject to change.
  193. Returns
  194. -------
  195. new_matrix : sparse matrix
  196. `dia_matrix` format with values in ``data`` on diagonals from ``diags``.
  197. Notes
  198. -----
  199. This function can be replaced by an equivalent call to `dia_matrix`
  200. as::
  201. dia_matrix((data, diags), shape=(m, n)).asformat(format)
  202. See Also
  203. --------
  204. diags_array : more convenient form of this function
  205. diags : matrix version of diags_array
  206. dia_matrix : the sparse DIAgonal format.
  207. Examples
  208. --------
  209. >>> import numpy as np
  210. >>> from scipy.sparse import spdiags
  211. >>> data = np.array([[1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4]])
  212. >>> diags = np.array([0, -1, 2])
  213. >>> spdiags(data, diags, 4, 4).toarray()
  214. array([[1, 0, 3, 0],
  215. [1, 2, 0, 4],
  216. [0, 2, 3, 0],
  217. [0, 0, 3, 4]])
  218. """
  219. if m is None and n is None:
  220. m = n = len(data[0])
  221. elif n is None:
  222. m, n = m
  223. return dia_matrix((data, diags), shape=(m, n)).asformat(format)
  224. def diags_array(diagonals, /, *, offsets=0, shape=None, format=None, dtype=_NoValue):
  225. """
  226. Construct a sparse array from diagonals.
  227. Parameters
  228. ----------
  229. diagonals : sequence of array_like
  230. Sequence of arrays containing the array diagonals,
  231. corresponding to `offsets`.
  232. offsets : sequence of int or an int, optional
  233. Diagonals to set (repeated offsets are not allowed):
  234. - k = 0 the main diagonal (default)
  235. - k > 0 the kth upper diagonal
  236. - k < 0 the kth lower diagonal
  237. shape : tuple of int, optional
  238. Shape of the result. If omitted, a square array large enough
  239. to contain the diagonals is returned.
  240. format : {"dia", "csr", "csc", "lil", ...}, optional
  241. Matrix format of the result. By default (format=None) an
  242. appropriate sparse array format is returned. This choice is
  243. subject to change.
  244. dtype : dtype, optional
  245. Data type of the array. If `dtype` is None, the output
  246. data type is determined by the data type of the input diagonals.
  247. Up until SciPy 1.19, the default behavior will be to return an array
  248. with an inexact (floating point) data type. In particular, integer
  249. input will be converted to double precision floating point. This
  250. behavior is deprecated, and in SciPy 1.19, the default behavior
  251. will be changed to return an array with the same data type as the
  252. input diagonals. To adopt this behavior before version 1.19, use
  253. `dtype=None`.
  254. Returns
  255. -------
  256. new_array : dia_array
  257. `dia_array` holding the values in `diagonals` offset from the main diagonal
  258. as indicated in `offsets`.
  259. Notes
  260. -----
  261. Repeated diagonal offsets are disallowed.
  262. The result from ``diags_array`` is the sparse equivalent of::
  263. np.diag(diagonals[0], offsets[0])
  264. + ...
  265. + np.diag(diagonals[k], offsets[k])
  266. ``diags_array`` differs from `dia_array` in the way it handles off-diagonals.
  267. Specifically, `dia_array` assumes the data input includes padding
  268. (ignored values) at the start/end of the rows for positive/negative
  269. offset, while ``diags_array`` assumes the input data has no padding.
  270. Each value in the input `diagonals` is used.
  271. .. versionadded:: 1.11
  272. See Also
  273. --------
  274. dia_array : constructor for the sparse DIAgonal format.
  275. Examples
  276. --------
  277. >>> from scipy.sparse import diags_array
  278. >>> diagonals = [[1.0, 2.0, 3.0, 4.0], [1.0, 2.0, 3.0], [1.0, 2.0]]
  279. >>> diags_array(diagonals, offsets=[0, -1, 2]).toarray()
  280. array([[1., 0., 1., 0.],
  281. [1., 2., 0., 2.],
  282. [0., 2., 3., 0.],
  283. [0., 0., 3., 4.]])
  284. Broadcasting of scalars is supported (but shape needs to be
  285. specified):
  286. >>> diags_array([1.0, -2.0, 1.0], offsets=[-1, 0, 1], shape=(4, 4)).toarray()
  287. array([[-2., 1., 0., 0.],
  288. [ 1., -2., 1., 0.],
  289. [ 0., 1., -2., 1.],
  290. [ 0., 0., 1., -2.]])
  291. If only one diagonal is wanted (as in `numpy.diag`), the following
  292. works as well:
  293. >>> diags_array([1.0, 2.0, 3.0], offsets=1).toarray()
  294. array([[ 0., 1., 0., 0.],
  295. [ 0., 0., 2., 0.],
  296. [ 0., 0., 0., 3.],
  297. [ 0., 0., 0., 0.]])
  298. """
  299. # if offsets is not a sequence, assume that there's only one diagonal
  300. if isscalarlike(offsets):
  301. # now check that there's actually only one diagonal
  302. if len(diagonals) == 0 or isscalarlike(diagonals[0]):
  303. diagonals = [np.atleast_1d(diagonals)]
  304. else:
  305. raise ValueError("Different number of diagonals and offsets.")
  306. else:
  307. diagonals = list(map(np.atleast_1d, diagonals))
  308. offsets = np.atleast_1d(offsets)
  309. # Basic check
  310. if len(diagonals) != len(offsets):
  311. raise ValueError("Different number of diagonals and offsets.")
  312. # Determine shape, if omitted
  313. if shape is None:
  314. m = len(diagonals[0]) + abs(int(offsets[0]))
  315. shape = (m, m)
  316. # Determine data type, if omitted
  317. if dtype is None:
  318. dtype = np.result_type(*diagonals)
  319. elif dtype is _NoValue:
  320. # This is the old deprecated behavior that uses np.common_type().
  321. # After the deprecation period, this elif branch can be removed,
  322. # and the default for the `dtype` parameter changed back to `None`.
  323. dtype = np.dtype(np.common_type(*diagonals))
  324. future_dtype = np.result_type(*diagonals)
  325. if sys.version_info < (3, 12):
  326. warn_kwargs = {'stacklevel': 2}
  327. extra_msg = ("\nNote: In Python 3.11, this warning can be generated by "
  328. "a call of scipy.sparse.diags(), but the code indicated in "
  329. "the warning message will refer to an internal call of "
  330. "scipy.sparse.diags_array(). If that happens, check your "
  331. "code for the use of diags().")
  332. else:
  333. warn_kwargs = {'skip_file_prefixes': (os.path.dirname(__file__),)}
  334. extra_msg = ""
  335. if (dtype != future_dtype):
  336. warnings.warn(
  337. f"Input has data type {future_dtype}, but the output has been cast "
  338. f"to {dtype}. In the future, the output data type will match the "
  339. "input. To avoid this warning, set the `dtype` parameter to `None` "
  340. "to have the output dtype match the input, or set it to the "
  341. "desired output data type."
  342. + extra_msg,
  343. FutureWarning,
  344. **warn_kwargs
  345. )
  346. # Construct data array
  347. m, n = shape
  348. M = max([min(m + offset, n - offset) + max(0, offset)
  349. for offset in offsets])
  350. M = max(0, M)
  351. data_arr = np.zeros((len(offsets), M), dtype=dtype)
  352. K = min(m, n)
  353. for j, diagonal in enumerate(diagonals):
  354. offset = offsets[j]
  355. k = max(0, offset)
  356. length = min(m + offset, n - offset, K)
  357. if length < 0:
  358. raise ValueError(f"Offset {offset} (index {j}) out of bounds")
  359. try:
  360. data_arr[j, k:k+length] = diagonal[...,:length]
  361. except ValueError as e:
  362. if len(diagonal) != length and len(diagonal) != 1:
  363. raise ValueError(
  364. f"Diagonal length (index {j}: {len(diagonal)} at"
  365. f" offset {offset}) does not agree with array size ({m}, {n})."
  366. ) from e
  367. raise
  368. return dia_array((data_arr, offsets), shape=(m, n)).asformat(format)
  369. def diags(diagonals, offsets=0, shape=None, format=None, dtype=_NoValue):
  370. """
  371. Construct a sparse matrix from diagonals.
  372. .. warning::
  373. This function returns a sparse matrix -- not a sparse array.
  374. You are encouraged to use `diags_array` to take advantage
  375. of the sparse array functionality.
  376. Parameters
  377. ----------
  378. diagonals : sequence of array_like
  379. Sequence of arrays containing the matrix diagonals,
  380. corresponding to `offsets`.
  381. offsets : sequence of int or an int, optional
  382. Diagonals to set (repeated offsets are not allowed):
  383. - k = 0 the main diagonal (default)
  384. - k > 0 the kth upper diagonal
  385. - k < 0 the kth lower diagonal
  386. shape : tuple of int, optional
  387. Shape of the result. If omitted, a square matrix large enough
  388. to contain the diagonals is returned.
  389. format : {"dia", "csr", "csc", "lil", ...}, optional
  390. Matrix format of the result. By default (format=None) an
  391. appropriate sparse matrix format is returned. This choice is
  392. subject to change.
  393. dtype : dtype, optional
  394. Data type of the matrix. If `dtype` is None, the output
  395. data type is determined by the data type of the input diagonals.
  396. Up until SciPy 1.19, the default behavior will be to return a matrix
  397. with an inexact (floating point) data type. In particular, integer
  398. input will be converted to double precision floating point. This
  399. behavior is deprecated, and in SciPy 1.19, the default behavior
  400. will be changed to return a matrix with the same data type as the
  401. input diagonals. To adopt this behavior before version 1.19, use
  402. `dtype=None`.
  403. Returns
  404. -------
  405. new_matrix : dia_matrix
  406. `dia_matrix` holding the values in `diagonals` offset from the main diagonal
  407. as indicated in `offsets`.
  408. Notes
  409. -----
  410. Repeated diagonal offsets are disallowed.
  411. The result from ``diags`` is the sparse equivalent of::
  412. np.diag(diagonals[0], offsets[0])
  413. + ...
  414. + np.diag(diagonals[k], offsets[k])
  415. ``diags`` differs from `dia_matrix` in the way it handles off-diagonals.
  416. Specifically, `dia_matrix` assumes the data input includes padding
  417. (ignored values) at the start/end of the rows for positive/negative
  418. offset, while ``diags`` assumes the input data has no padding.
  419. Each value in the input `diagonals` is used.
  420. .. versionadded:: 0.11
  421. See Also
  422. --------
  423. spdiags : construct matrix from diagonals
  424. diags_array : construct sparse array instead of sparse matrix
  425. Examples
  426. --------
  427. >>> from scipy.sparse import diags
  428. >>> diagonals = [[1.0, 2.0, 3.0, 4.0], [1.0, 2.0, 3.0], [1.0, 2.0]]
  429. >>> diags(diagonals, [0, -1, 2]).toarray()
  430. array([[1., 0., 1., 0.],
  431. [1., 2., 0., 2.],
  432. [0., 2., 3., 0.],
  433. [0., 0., 3., 4.]])
  434. Broadcasting of scalars is supported (but shape needs to be
  435. specified):
  436. >>> diags([1.0, -2.0, 1.0], [-1, 0, 1], shape=(4, 4)).toarray()
  437. array([[-2., 1., 0., 0.],
  438. [ 1., -2., 1., 0.],
  439. [ 0., 1., -2., 1.],
  440. [ 0., 0., 1., -2.]])
  441. If only one diagonal is wanted (as in `numpy.diag`), the following
  442. works as well:
  443. >>> diags([1.0, 2.0, 3.0], 1).toarray()
  444. array([[ 0., 1., 0., 0.],
  445. [ 0., 0., 2., 0.],
  446. [ 0., 0., 0., 3.],
  447. [ 0., 0., 0., 0.]])
  448. """
  449. A = diags_array(diagonals, offsets=offsets, shape=shape, dtype=dtype)
  450. return dia_matrix(A).asformat(format)
  451. def identity(n, dtype='d', format=None):
  452. """Identity matrix in sparse format
  453. Returns an identity matrix with shape ``(n, n)`` using a given
  454. sparse format and dtype. This differs from `eye_array` in
  455. that it has a square shape with ones only on the main diagonal.
  456. It is thus the multiplicative identity. `eye_array` allows
  457. rectangular shapes and the diagonal can be offset from the main one.
  458. .. warning::
  459. This function returns a sparse matrix -- not a sparse array.
  460. You are encouraged to use `eye_array` to take advantage
  461. of the sparse array functionality.
  462. Parameters
  463. ----------
  464. n : int
  465. Shape of the identity matrix.
  466. dtype : dtype, optional
  467. Data type of the matrix
  468. format : str, optional
  469. Sparse format of the result, e.g., format="csr", etc.
  470. Returns
  471. -------
  472. new_matrix : sparse matrix
  473. A square sparse matrix with ones on the main diagonal and zeros elsewhere.
  474. See Also
  475. --------
  476. eye_array : Sparse array of chosen shape with ones on a specified diagonal.
  477. eye : Sparse matrix of chosen shape with ones on a specified diagonal.
  478. Examples
  479. --------
  480. >>> import scipy as sp
  481. >>> sp.sparse.identity(3).toarray()
  482. array([[ 1., 0., 0.],
  483. [ 0., 1., 0.],
  484. [ 0., 0., 1.]])
  485. >>> sp.sparse.identity(3, dtype='int8', format='dia')
  486. <DIAgonal sparse matrix of dtype 'int8'
  487. with 3 stored elements (1 diagonals) and shape (3, 3)>
  488. >>> sp.sparse.eye_array(3, dtype='int8', format='dia')
  489. <DIAgonal sparse array of dtype 'int8'
  490. with 3 stored elements (1 diagonals) and shape (3, 3)>
  491. """
  492. return eye(n, n, dtype=dtype, format=format)
  493. def eye_array(m, n=None, *, k=0, dtype=float, format=None):
  494. """Sparse array of chosen shape with ones on the kth diagonal and zeros elsewhere.
  495. Return a sparse array with ones on diagonal.
  496. Specifically a sparse array (m x n) where the kth diagonal
  497. is all ones and everything else is zeros.
  498. Parameters
  499. ----------
  500. m : int
  501. Number of rows requested.
  502. n : int, optional
  503. Number of columns. Default: `m`.
  504. k : int, optional
  505. Diagonal to place ones on. Default: 0 (main diagonal).
  506. dtype : dtype, optional
  507. Data type of the array
  508. format : str, optional (default: "dia")
  509. Sparse format of the result, e.g., format="csr", etc.
  510. Returns
  511. -------
  512. new_array : sparse array
  513. Sparse array of chosen shape with ones on the kth diagonal and zeros elsewhere.
  514. Examples
  515. --------
  516. >>> import numpy as np
  517. >>> import scipy as sp
  518. >>> sp.sparse.eye_array(3).toarray()
  519. array([[ 1., 0., 0.],
  520. [ 0., 1., 0.],
  521. [ 0., 0., 1.]])
  522. >>> sp.sparse.eye_array(3, dtype=np.int8)
  523. <DIAgonal sparse array of dtype 'int8'
  524. with 3 stored elements (1 diagonals) and shape (3, 3)>
  525. """
  526. # TODO: delete next 15 lines [combine with _eye()] once spmatrix removed
  527. return _eye(m, n, k, dtype, format)
  528. def _eye(m, n, k, dtype, format, as_sparray=True):
  529. if as_sparray:
  530. csr_sparse = csr_array
  531. csc_sparse = csc_array
  532. coo_sparse = coo_array
  533. diags_sparse = diags_array
  534. else:
  535. csr_sparse = csr_matrix
  536. csc_sparse = csc_matrix
  537. coo_sparse = coo_matrix
  538. diags_sparse = diags
  539. if n is None:
  540. n = m
  541. m, n = int(m), int(n)
  542. if m == n and k == 0:
  543. # fast branch for special formats
  544. if format in ['csr', 'csc']:
  545. idx_dtype = get_index_dtype(maxval=n)
  546. indptr = np.arange(n+1, dtype=idx_dtype)
  547. indices = np.arange(n, dtype=idx_dtype)
  548. data = np.ones(n, dtype=dtype)
  549. cls = {'csr': csr_sparse, 'csc': csc_sparse}[format]
  550. return cls((data, indices, indptr), (n, n))
  551. elif format == 'coo':
  552. idx_dtype = get_index_dtype(maxval=n)
  553. row = np.arange(n, dtype=idx_dtype)
  554. col = np.arange(n, dtype=idx_dtype)
  555. data = np.ones(n, dtype=dtype)
  556. return coo_sparse((data, (row, col)), (n, n))
  557. data = np.ones((1, max(0, min(m + k, n))), dtype=dtype)
  558. return diags_sparse(data, offsets=[k], shape=(m, n), dtype=dtype).asformat(format)
  559. def eye(m, n=None, k=0, dtype=float, format=None):
  560. """Sparse matrix of chosen shape with ones on the kth diagonal and zeros elsewhere.
  561. Returns a sparse matrix (m x n) where the kth diagonal
  562. is all ones and everything else is zeros.
  563. .. warning::
  564. This function returns a sparse matrix -- not a sparse array.
  565. You are encouraged to use `eye_array` to take advantage
  566. of the sparse array functionality.
  567. Parameters
  568. ----------
  569. m : int
  570. Number of rows in the matrix.
  571. n : int, optional
  572. Number of columns. Default: `m`.
  573. k : int, optional
  574. Diagonal to place ones on. Default: 0 (main diagonal).
  575. dtype : dtype, optional
  576. Data type of the matrix.
  577. format : str, optional
  578. Sparse format of the result, e.g., format="csr", etc.
  579. Returns
  580. -------
  581. new_matrix : sparse matrix
  582. Sparse matrix of chosen shape with ones on the kth diagonaland zeros elsewhere.
  583. See Also
  584. --------
  585. eye_array : Sparse array of chosen shape with ones on a specified diagonal.
  586. Examples
  587. --------
  588. >>> import numpy as np
  589. >>> import scipy as sp
  590. >>> sp.sparse.eye(3).toarray()
  591. array([[ 1., 0., 0.],
  592. [ 0., 1., 0.],
  593. [ 0., 0., 1.]])
  594. >>> sp.sparse.eye(3, dtype=np.int8)
  595. <DIAgonal sparse matrix of dtype 'int8'
  596. with 3 stored elements (1 diagonals) and shape (3, 3)>
  597. """
  598. return _eye(m, n, k, dtype, format, False)
  599. def kron(A, B, format=None):
  600. """Sparse representation of the Kronecker product of `A` and `B`
  601. Computes the Kronecker product, a composite sparse array
  602. made of blocks consisting of the second input array multiplied
  603. by each element of the first input array.
  604. Parameters
  605. ----------
  606. A : sparse or dense array
  607. first array of the product
  608. B : sparse or dense array
  609. second array of the product
  610. format : str, optional (default: 'bsr' or 'coo')
  611. format of the result (e.g. "csr")
  612. If None, choose 'bsr' for relatively dense 2D arrays and 'coo' for others
  613. Returns
  614. -------
  615. sparse matrix or array
  616. kronecker product in a sparse format. Returns a sparse matrix unless either
  617. `A` or `B` is a sparse array in which case returns a sparse array.
  618. Examples
  619. --------
  620. >>> import numpy as np
  621. >>> import scipy as sp
  622. >>> A = sp.sparse.csr_array(np.array([[0, 2], [5, 0]]))
  623. >>> B = sp.sparse.csr_array(np.array([[1, 2], [3, 4]]))
  624. >>> sp.sparse.kron(A, B).toarray()
  625. array([[ 0, 0, 2, 4],
  626. [ 0, 0, 6, 8],
  627. [ 5, 10, 0, 0],
  628. [15, 20, 0, 0]])
  629. >>> sp.sparse.kron(A, [[1, 2], [3, 4]]).toarray()
  630. array([[ 0, 0, 2, 4],
  631. [ 0, 0, 6, 8],
  632. [ 5, 10, 0, 0],
  633. [15, 20, 0, 0]])
  634. """
  635. # TODO: delete next 10 lines and replace _sparse with _array when spmatrix removed
  636. if isinstance(A, sparray) or isinstance(B, sparray):
  637. # convert to local variables
  638. bsr_sparse = bsr_array
  639. csr_sparse = csr_array
  640. coo_sparse = coo_array
  641. else: # use spmatrix
  642. bsr_sparse = bsr_matrix
  643. csr_sparse = csr_matrix
  644. coo_sparse = coo_matrix
  645. B = coo_sparse(B)
  646. # B is 2D and fairly dense, and format aligns with bsr, compute using BSR
  647. if (
  648. (format is None or format == "bsr") and
  649. B.ndim == 2 and 2*B.nnz >= math.prod(B.shape)
  650. ):
  651. if not hasattr(A, 'ndim') or A.ndim != 2:
  652. # CSR routes thru COO in constructor so can make COO to check ndim
  653. A = coo_sparse(A)
  654. if A.ndim == 2:
  655. A = csr_sparse(A, copy=True)
  656. output_shape = (A.shape[0]*B.shape[0], A.shape[1]*B.shape[1])
  657. if A.nnz == 0 or B.nnz == 0:
  658. # kronecker product is the zero matrix
  659. return coo_sparse(output_shape).asformat(format)
  660. B = B.toarray()
  661. data = A.data.repeat(B.size).reshape(-1, B.shape[0], B.shape[1])
  662. data = data * B
  663. return bsr_sparse((data, A.indices, A.indptr), shape=output_shape)
  664. else:
  665. A = coo_sparse(A) # no copy needed as we use np.repeat below
  666. # compute using COO (convert to desired format just before return)
  667. if coo_sparse is coo_matrix:
  668. output_shape = (A.shape[0] * B.shape[0], A.shape[1] * B.shape[1])
  669. ndim_diff = 0
  670. else:
  671. ndim_diff = A.ndim - B.ndim
  672. A_shape = A.shape if ndim_diff >= 0 else (1,) * (-ndim_diff) + A.shape
  673. B_shape = B.shape if ndim_diff <= 0 else (1,) * ndim_diff + B.shape
  674. output_shape = tuple(a * b for a, b in zip(A_shape, B_shape))
  675. if A.nnz == 0 or B.nnz == 0:
  676. # kronecker product is the zero matrix
  677. return coo_sparse(output_shape).asformat(format)
  678. # expand entries of a into blocks
  679. data = A.data.repeat(B.nnz)
  680. idx_dtype = get_index_dtype(A.coords, maxval=max(output_shape))
  681. coords = [np.asarray(co, dtype=idx_dtype).repeat(B.nnz) for co in A.coords]
  682. if ndim_diff < 0:
  683. new_co = np.zeros_like(coords[0])
  684. coords = [new_co] + [new_co.copy() for _ in range(-ndim_diff - 1)] + coords
  685. # The last B.ndim coords need to be updated. Any previous columns in B are from
  686. # prepending (1,)s, so coord from A is what we need (B coord axis is all 0)
  687. for co, B_shape_i in zip(coords[-B.ndim:], B.shape):
  688. co *= B_shape_i
  689. # increment block indices
  690. coords[-B.ndim:] = [(co.reshape(-1, B.nnz) + Bco).ravel()
  691. for co, Bco in zip(coords[-B.ndim:], B.coords)]
  692. # compute block entries
  693. data = (data.reshape(-1, B.nnz) * B.data).ravel()
  694. return coo_sparse((data, tuple(coords)), shape=output_shape).asformat(format)
  695. def kronsum(A, B, format=None):
  696. """Kronecker sum of square sparse matrices `A` and `B`
  697. Kronecker sum of two sparse matrices is a sum of two Kronecker
  698. products ``kron(I_n,A) + kron(B,I_m)`` where `A` has shape ``(m, m)``
  699. and `B` has shape ``(n, n)`` and ``I_m`` and ``I_n`` are identity matrices
  700. of shape ``(m, m)`` and ``(n, n)``, respectively.
  701. Parameters
  702. ----------
  703. A : sparse matrix or array
  704. Square matrix
  705. B : sparse array or array
  706. Square matrix
  707. format : str
  708. format of the result (e.g. "csr")
  709. Returns
  710. -------
  711. sparse matrix or array
  712. kronecker sum in a sparse format. Returns a sparse matrix unless either
  713. `A` or `B` is a sparse array in which case returns a sparse array.
  714. Examples
  715. --------
  716. `kronsum` can be used to construct a finite difference discretization of the 2D
  717. Laplacian from a 1D discretization.
  718. >>> from scipy.sparse import diags_array, kronsum
  719. >>> from matplotlib import pyplot as plt
  720. >>> import numpy as np
  721. >>> ex = np.ones(10)
  722. >>> D_x = diags_array([ex, -ex[1:]], offsets=[0, -1]) # 1D first derivative
  723. >>> D_xx = D_x.T @ D_x # 1D second derivative
  724. >>> L = kronsum(D_xx, D_xx) # 2D Laplacian
  725. >>> plt.spy(L.toarray())
  726. >>> plt.show()
  727. """
  728. # TODO: delete next 8 lines and replace _sparse with _array when spmatrix removed
  729. if isinstance(A, sparray) or isinstance(B, sparray):
  730. # convert to local variables
  731. coo_sparse = coo_array
  732. identity_sparse = eye_array
  733. else:
  734. coo_sparse = coo_matrix
  735. identity_sparse = identity
  736. A = coo_sparse(A)
  737. B = coo_sparse(B)
  738. if A.ndim != 2:
  739. raise ValueError(f"kronsum requires 2D inputs. `A` is {A.ndim}D.")
  740. if B.ndim != 2:
  741. raise ValueError(f"kronsum requires 2D inputs. `B` is {B.ndim}D.")
  742. if A.shape[0] != A.shape[1]:
  743. raise ValueError('A is not square')
  744. if B.shape[0] != B.shape[1]:
  745. raise ValueError('B is not square')
  746. dtype = upcast(A.dtype, B.dtype)
  747. I_n = identity_sparse(A.shape[0], dtype=dtype)
  748. I_m = identity_sparse(B.shape[0], dtype=dtype)
  749. L = kron(I_m, A, format='coo')
  750. R = kron(B, I_n, format='coo')
  751. return (L + R).asformat(format)
  752. def _compressed_sparse_stack(blocks, axis, return_spmatrix):
  753. """
  754. Stacking fast path for CSR/CSC matrices or arrays
  755. (i) vstack for CSR, (ii) hstack for CSC.
  756. """
  757. other_axis = 1 if axis == 0 else 0
  758. data = np.concatenate([b.data for b in blocks])
  759. constant_dim = blocks[0]._shape_as_2d[other_axis]
  760. idx_dtype = get_index_dtype(arrays=[b.indptr for b in blocks],
  761. maxval=max(data.size, constant_dim))
  762. indices = np.empty(data.size, dtype=idx_dtype)
  763. indptr = np.empty(sum(b._shape_as_2d[axis] for b in blocks) + 1, dtype=idx_dtype)
  764. last_indptr = idx_dtype(0)
  765. sum_dim = 0
  766. sum_indices = 0
  767. for b in blocks:
  768. if b._shape_as_2d[other_axis] != constant_dim:
  769. raise ValueError(f'incompatible dimensions for axis {other_axis}')
  770. indices[sum_indices:sum_indices+b.indices.size] = b.indices
  771. sum_indices += b.indices.size
  772. idxs = slice(sum_dim, sum_dim + b._shape_as_2d[axis])
  773. indptr[idxs] = b.indptr[:-1]
  774. indptr[idxs] += last_indptr
  775. sum_dim += b._shape_as_2d[axis]
  776. last_indptr += b.indptr[-1]
  777. indptr[-1] = last_indptr
  778. # TODO remove this if-structure when sparse matrices removed
  779. if return_spmatrix:
  780. if axis == 0:
  781. return csr_matrix((data, indices, indptr),
  782. shape=(sum_dim, constant_dim))
  783. else:
  784. return csc_matrix((data, indices, indptr),
  785. shape=(constant_dim, sum_dim))
  786. if axis == 0:
  787. return csr_array((data, indices, indptr),
  788. shape=(sum_dim, constant_dim))
  789. else:
  790. return csc_array((data, indices, indptr),
  791. shape=(constant_dim, sum_dim))
  792. def _stack_along_minor_axis(blocks, axis):
  793. """
  794. Stacking fast path for CSR/CSC matrices along the minor axis
  795. (i) hstack for CSR, (ii) vstack for CSC.
  796. """
  797. n_blocks = len(blocks)
  798. if n_blocks == 0:
  799. raise ValueError('Missing block matrices')
  800. if n_blocks == 1:
  801. return blocks[0]
  802. # check for incompatible dimensions
  803. other_axis = 1 if axis == 0 else 0
  804. other_axis_dims = {b._shape_as_2d[other_axis] for b in blocks}
  805. if len(other_axis_dims) > 1:
  806. raise ValueError(f'Mismatching dimensions along axis {other_axis}: '
  807. f'{other_axis_dims}')
  808. constant_dim, = other_axis_dims
  809. # Do the stacking
  810. indptr_list = [b.indptr for b in blocks]
  811. data_cat = np.concatenate([b.data for b in blocks])
  812. # Need to check if any indices/indptr, would be too large post-
  813. # concatenation for np.int32:
  814. # - The max value of indices is the output array's stacking-axis length - 1
  815. # - The max value in indptr is the number of non-zero entries. This is
  816. # exceedingly unlikely to require int64, but is checked out of an
  817. # abundance of caution.
  818. sum_dim = sum(b._shape_as_2d[axis] for b in blocks)
  819. nnz = sum(len(b.indices) for b in blocks)
  820. idx_dtype = get_index_dtype(indptr_list, maxval=max(sum_dim - 1, nnz))
  821. stack_dim_cat = np.array([b._shape_as_2d[axis] for b in blocks], dtype=idx_dtype)
  822. if data_cat.size > 0:
  823. indptr_cat = np.concatenate(indptr_list, dtype=idx_dtype)
  824. indices_cat = np.concatenate([b.indices for b in blocks], dtype=idx_dtype)
  825. indptr = np.empty(constant_dim + 1, dtype=idx_dtype)
  826. indices = np.empty_like(indices_cat)
  827. data = np.empty_like(data_cat)
  828. csr_hstack(n_blocks, constant_dim, stack_dim_cat,
  829. indptr_cat, indices_cat, data_cat,
  830. indptr, indices, data)
  831. else:
  832. indptr = np.zeros(constant_dim + 1, dtype=idx_dtype)
  833. indices = np.empty(0, dtype=idx_dtype)
  834. data = np.empty(0, dtype=data_cat.dtype)
  835. if axis == 0:
  836. return blocks[0]._csc_container((data, indices, indptr),
  837. shape=(sum_dim, constant_dim))
  838. else:
  839. return blocks[0]._csr_container((data, indices, indptr),
  840. shape=(constant_dim, sum_dim))
  841. def hstack(blocks, format=None, dtype=None):
  842. """
  843. Stack sparse matrices horizontally (column wise)
  844. Parameters
  845. ----------
  846. blocks
  847. sequence of sparse matrices with compatible shapes
  848. format : str
  849. sparse format of the result (e.g., "csr")
  850. by default an appropriate sparse matrix format is returned.
  851. This choice is subject to change.
  852. dtype : dtype, optional
  853. The data-type of the output matrix. If not given, the dtype is
  854. determined from that of `blocks`.
  855. Returns
  856. -------
  857. new_array : sparse matrix or array
  858. If any block in blocks is a sparse array, return a sparse array.
  859. Otherwise return a sparse matrix.
  860. If you want a sparse array built from blocks that are not sparse
  861. arrays, use ``block(hstack(blocks))`` or convert one block
  862. e.g. ``blocks[0] = csr_array(blocks[0])``.
  863. See Also
  864. --------
  865. vstack : stack sparse matrices vertically (row wise)
  866. Examples
  867. --------
  868. >>> from scipy.sparse import coo_matrix, hstack
  869. >>> A = coo_matrix([[1, 2], [3, 4]])
  870. >>> B = coo_matrix([[5], [6]])
  871. >>> hstack([A,B]).toarray()
  872. array([[1, 2, 5],
  873. [3, 4, 6]])
  874. """
  875. blocks = np.asarray(blocks, dtype='object')
  876. if any(isinstance(b, sparray) for b in blocks.flat):
  877. return _block([blocks], format, dtype)
  878. else:
  879. return _block([blocks], format, dtype, return_spmatrix=True)
  880. def vstack(blocks, format=None, dtype=None):
  881. """
  882. Stack sparse arrays vertically (row wise)
  883. Parameters
  884. ----------
  885. blocks
  886. sequence of sparse arrays with compatible shapes
  887. format : str, optional
  888. sparse format of the result (e.g., "csr")
  889. by default an appropriate sparse array format is returned.
  890. This choice is subject to change.
  891. dtype : dtype, optional
  892. The data-type of the output array. If not given, the dtype is
  893. determined from that of `blocks`.
  894. Returns
  895. -------
  896. new_array : sparse matrix or array
  897. If any block in blocks is a sparse array, return a sparse array.
  898. Otherwise return a sparse matrix.
  899. If you want a sparse array built from blocks that are not sparse
  900. arrays, use ``block(vstack(blocks))`` or convert one block
  901. e.g. ``blocks[0] = csr_array(blocks[0])``.
  902. See Also
  903. --------
  904. hstack : stack sparse matrices horizontally (column wise)
  905. Examples
  906. --------
  907. >>> from scipy.sparse import coo_array, vstack
  908. >>> A = coo_array([[1, 2], [3, 4]])
  909. >>> B = coo_array([[5, 6]])
  910. >>> vstack([A, B]).toarray()
  911. array([[1, 2],
  912. [3, 4],
  913. [5, 6]])
  914. """
  915. blocks = np.asarray(blocks, dtype='object')
  916. if any(isinstance(b, sparray) for b in blocks.flat):
  917. return _block([[b] for b in blocks], format, dtype)
  918. else:
  919. return _block([[b] for b in blocks], format, dtype, return_spmatrix=True)
  920. def bmat(blocks, format=None, dtype=None):
  921. """
  922. Build a sparse array or matrix from sparse sub-blocks
  923. Note: `block_array` is preferred over ``bmat``. They are the same function
  924. except that ``bmat`` returns a deprecated sparse matrix when none of the
  925. inputs are sparse arrays.
  926. .. warning::
  927. This function returns a sparse matrix when no inputs are sparse arrays.
  928. You are encouraged to use `block_array` to take advantage
  929. of the sparse array functionality.
  930. Parameters
  931. ----------
  932. blocks : array_like
  933. Grid of sparse matrices with compatible shapes.
  934. An entry of None implies an all-zero matrix.
  935. format : {'bsr', 'coo', 'csc', 'csr', 'dia', 'dok', 'lil'}, optional
  936. The sparse format of the result (e.g. "csr"). By default an
  937. appropriate sparse matrix format is returned.
  938. This choice is subject to change.
  939. dtype : dtype, optional
  940. The data-type of the output matrix. If not given, the dtype is
  941. determined from that of `blocks`.
  942. Returns
  943. -------
  944. bmat : sparse matrix or array
  945. If any block in blocks is a sparse array, return a sparse array.
  946. Otherwise return a sparse matrix.
  947. If you want a sparse array built from blocks that are not sparse
  948. arrays, use ``block_array()``.
  949. See Also
  950. --------
  951. block_array
  952. Examples
  953. --------
  954. >>> from scipy.sparse import coo_array, bmat
  955. >>> A = coo_array([[1, 2], [3, 4]])
  956. >>> B = coo_array([[5], [6]])
  957. >>> C = coo_array([[7]])
  958. >>> bmat([[A, B], [None, C]]).toarray()
  959. array([[1, 2, 5],
  960. [3, 4, 6],
  961. [0, 0, 7]])
  962. >>> bmat([[A, None], [None, C]]).toarray()
  963. array([[1, 2, 0],
  964. [3, 4, 0],
  965. [0, 0, 7]])
  966. """
  967. blocks = np.asarray(blocks, dtype='object')
  968. if any(isinstance(b, sparray) for b in blocks.flat):
  969. return _block(blocks, format, dtype)
  970. else:
  971. return _block(blocks, format, dtype, return_spmatrix=True)
  972. def block_array(blocks, *, format=None, dtype=None):
  973. """
  974. Build a sparse array from sparse sub-blocks
  975. Parameters
  976. ----------
  977. blocks : array_like
  978. Grid of sparse arrays with compatible shapes.
  979. An entry of None implies an all-zero array.
  980. format : {'bsr', 'coo', 'csc', 'csr', 'dia', 'dok', 'lil'}, optional
  981. The sparse format of the result (e.g. "csr"). By default an
  982. appropriate sparse array format is returned.
  983. This choice is subject to change.
  984. dtype : dtype, optional
  985. The data-type of the output array. If not given, the dtype is
  986. determined from that of `blocks`.
  987. Returns
  988. -------
  989. block : sparse array
  990. See Also
  991. --------
  992. block_diag : specify blocks along the main diagonals
  993. diags : specify (possibly offset) diagonals
  994. Examples
  995. --------
  996. >>> from scipy.sparse import coo_array, block_array
  997. >>> A = coo_array([[1, 2], [3, 4]])
  998. >>> B = coo_array([[5], [6]])
  999. >>> C = coo_array([[7]])
  1000. >>> block_array([[A, B], [None, C]]).toarray()
  1001. array([[1, 2, 5],
  1002. [3, 4, 6],
  1003. [0, 0, 7]])
  1004. >>> block_array([[A, None], [None, C]]).toarray()
  1005. array([[1, 2, 0],
  1006. [3, 4, 0],
  1007. [0, 0, 7]])
  1008. """
  1009. return _block(blocks, format, dtype)
  1010. def _block(blocks, format, dtype, return_spmatrix=False):
  1011. blocks = np.asarray(blocks, dtype='object')
  1012. if blocks.ndim != 2:
  1013. raise ValueError('blocks must be 2-D')
  1014. M,N = blocks.shape
  1015. # check for fast path cases
  1016. if (format in (None, 'csr') and
  1017. all(issparse(b) and b.format == 'csr' for b in blocks.flat)
  1018. ):
  1019. if N > 1:
  1020. # stack along columns (axis 1): must have shape (M, 1)
  1021. blocks = [[_stack_along_minor_axis(blocks[b, :], 1)] for b in range(M)]
  1022. blocks = np.asarray(blocks, dtype='object')
  1023. # stack along rows (axis 0):
  1024. A = _compressed_sparse_stack(blocks[:, 0], 0, return_spmatrix)
  1025. if dtype is not None:
  1026. A = A.astype(dtype, copy=False)
  1027. return A
  1028. elif (format in (None, 'csc') and
  1029. all(issparse(b) and b.format == 'csc' for b in blocks.flat)
  1030. ):
  1031. if M > 1:
  1032. # stack along rows (axis 0): must have shape (1, N)
  1033. blocks = [[_stack_along_minor_axis(blocks[:, b], 0) for b in range(N)]]
  1034. blocks = np.asarray(blocks, dtype='object')
  1035. # stack along columns (axis 1):
  1036. A = _compressed_sparse_stack(blocks[0, :], 1, return_spmatrix)
  1037. if dtype is not None:
  1038. A = A.astype(dtype, copy=False)
  1039. return A
  1040. block_mask = np.zeros(blocks.shape, dtype=bool)
  1041. brow_lengths = np.zeros(M, dtype=np.int64)
  1042. bcol_lengths = np.zeros(N, dtype=np.int64)
  1043. # convert everything to COO format
  1044. for i in range(M):
  1045. for j in range(N):
  1046. if blocks[i,j] is not None:
  1047. A = coo_array(blocks[i,j])
  1048. blocks[i,j] = A
  1049. block_mask[i,j] = True
  1050. if brow_lengths[i] == 0:
  1051. brow_lengths[i] = A._shape_as_2d[0]
  1052. elif brow_lengths[i] != A._shape_as_2d[0]:
  1053. msg = (f'blocks[{i},:] has incompatible row dimensions. '
  1054. f'Got blocks[{i},{j}].shape[0] == {A._shape_as_2d[0]}, '
  1055. f'expected {brow_lengths[i]}.')
  1056. raise ValueError(msg)
  1057. if bcol_lengths[j] == 0:
  1058. bcol_lengths[j] = A._shape_as_2d[1]
  1059. elif bcol_lengths[j] != A._shape_as_2d[1]:
  1060. msg = (f'blocks[:,{j}] has incompatible column '
  1061. f'dimensions. '
  1062. f'Got blocks[{i},{j}].shape[1] == {A._shape_as_2d[1]}, '
  1063. f'expected {bcol_lengths[j]}.')
  1064. raise ValueError(msg)
  1065. nnz = sum(block.nnz for block in blocks[block_mask])
  1066. if dtype is None:
  1067. all_dtypes = [blk.dtype for blk in blocks[block_mask]]
  1068. dtype = upcast(*all_dtypes) if all_dtypes else None
  1069. row_offsets = np.append(0, np.cumsum(brow_lengths))
  1070. col_offsets = np.append(0, np.cumsum(bcol_lengths))
  1071. shape = (row_offsets[-1], col_offsets[-1])
  1072. data = np.empty(nnz, dtype=dtype)
  1073. idx_dtype = get_index_dtype([b.coords[0] for b in blocks[block_mask]],
  1074. maxval=max(shape))
  1075. row = np.empty(nnz, dtype=idx_dtype)
  1076. col = np.empty(nnz, dtype=idx_dtype)
  1077. nnz = 0
  1078. ii, jj = np.nonzero(block_mask)
  1079. for i, j in zip(ii, jj):
  1080. B = blocks[i, j]
  1081. idx = slice(nnz, nnz + B.nnz)
  1082. data[idx] = B.data
  1083. np.add(B.row, row_offsets[i], out=row[idx], dtype=idx_dtype)
  1084. np.add(B.col, col_offsets[j], out=col[idx], dtype=idx_dtype)
  1085. nnz += B.nnz
  1086. if return_spmatrix:
  1087. return coo_matrix((data, (row, col)), shape=shape).asformat(format)
  1088. return coo_array((data, (row, col)), shape=shape).asformat(format)
  1089. def block_diag(mats, format=None, dtype=None):
  1090. """
  1091. Build a block diagonal sparse matrix or array from provided matrices.
  1092. Parameters
  1093. ----------
  1094. mats : sequence of matrices or arrays
  1095. Input matrices or arrays.
  1096. format : str, optional
  1097. The sparse format of the result (e.g., "csr"). If not given, the result
  1098. is returned in "coo" format.
  1099. dtype : dtype specifier, optional
  1100. The data-type of the output. If not given, the dtype is
  1101. determined from that of `blocks`.
  1102. Returns
  1103. -------
  1104. res : sparse matrix or array
  1105. If at least one input is a sparse array, the output is a sparse array.
  1106. Otherwise the output is a sparse matrix.
  1107. Notes
  1108. -----
  1109. .. versionadded:: 0.11.0
  1110. See Also
  1111. --------
  1112. block_array
  1113. diags_array
  1114. Examples
  1115. --------
  1116. >>> from scipy.sparse import coo_array, block_diag
  1117. >>> A = coo_array([[1, 2], [3, 4]])
  1118. >>> B = coo_array([[5], [6]])
  1119. >>> C = coo_array([[7]])
  1120. >>> block_diag((A, B, C)).toarray()
  1121. array([[1, 2, 0, 0],
  1122. [3, 4, 0, 0],
  1123. [0, 0, 5, 0],
  1124. [0, 0, 6, 0],
  1125. [0, 0, 0, 7]])
  1126. """
  1127. if any(isinstance(a, sparray) for a in mats):
  1128. container = coo_array
  1129. else:
  1130. container = coo_matrix
  1131. row = []
  1132. col = []
  1133. data = []
  1134. idx_arrays = [] # track idx_dtype of incoming sparse arrays
  1135. r_idx = 0
  1136. c_idx = 0
  1137. for a in mats:
  1138. if isinstance(a, (list | numbers.Number)):
  1139. a = coo_array(np.atleast_2d(a))
  1140. if issparse(a):
  1141. a = a.tocoo()
  1142. if not idx_arrays and a.coords[0].dtype == np.int64:
  1143. idx_arrays.append(a.coords[0])
  1144. nrows, ncols = a._shape_as_2d
  1145. row.append(a.row + r_idx)
  1146. col.append(a.col + c_idx)
  1147. data.append(a.data)
  1148. else:
  1149. nrows, ncols = a.shape
  1150. a_row, a_col = np.divmod(np.arange(nrows*ncols), ncols)
  1151. row.append(a_row + r_idx)
  1152. col.append(a_col + c_idx)
  1153. data.append(a.ravel())
  1154. r_idx += nrows
  1155. c_idx += ncols
  1156. idx_dtype = get_index_dtype(idx_arrays, maxval=max(r_idx, c_idx))
  1157. row = np.concatenate(row, dtype=idx_dtype)
  1158. col = np.concatenate(col, dtype=idx_dtype)
  1159. data = np.concatenate(data)
  1160. new_shape = (r_idx, c_idx)
  1161. return container((data, (row, col)), shape=new_shape, dtype=dtype).asformat(format)
  1162. @_transition_to_rng("random_state")
  1163. def random_array(shape, *, density=0.01, format='coo', dtype=None,
  1164. rng=None, data_sampler=None):
  1165. """Return a sparse array of uniformly random numbers in [0, 1)
  1166. Returns a sparse array with the given shape and density
  1167. where values are generated uniformly randomly in the range [0, 1).
  1168. Parameters
  1169. ----------
  1170. shape : tuple of int
  1171. shape of the array.
  1172. density : real, optional (default: 0.01)
  1173. density of the generated matrix: density equal to one means a full
  1174. matrix, density of 0 means a matrix with no non-zero items.
  1175. format : str, optional (default: 'coo')
  1176. sparse matrix format.
  1177. dtype : dtype, optional (default: np.float64)
  1178. type of the returned matrix values.
  1179. rng : `numpy.random.Generator`, optional
  1180. Pseudorandom number generator state. When `rng` is None, a new
  1181. `numpy.random.Generator` is created using entropy from the
  1182. operating system. Types other than `numpy.random.Generator` are
  1183. passed to `numpy.random.default_rng` to instantiate a ``Generator``.
  1184. This random state will be used for sampling ``indices`` (the sparsity
  1185. structure), and by default for the data values too (see `data_sampler`).
  1186. data_sampler : callable, optional (default depends on dtype)
  1187. Sampler of random data values with keyword arg ``size``.
  1188. This function should take a single keyword argument ``size`` specifying
  1189. the length of its returned ndarray. It is used to generate the nonzero
  1190. values in the matrix after the locations of those values are chosen.
  1191. By default, uniform [0, 1) random values are used unless `dtype` is
  1192. an integer (default uniform integers from that dtype) or
  1193. complex (default uniform over the unit square in the complex plane).
  1194. For these, the `rng` is used e.g. ``rng.uniform(size=size)``.
  1195. Returns
  1196. -------
  1197. res : sparse array
  1198. Examples
  1199. --------
  1200. Passing a ``np.random.Generator`` instance for better performance:
  1201. >>> import numpy as np
  1202. >>> import scipy as sp
  1203. >>> rng = np.random.default_rng()
  1204. Default sampling uniformly from [0, 1):
  1205. >>> S = sp.sparse.random_array((3, 4), density=0.25, rng=rng)
  1206. Providing a sampler for the values:
  1207. >>> rvs = sp.stats.poisson(25, loc=10).rvs
  1208. >>> S = sp.sparse.random_array((3, 4), density=0.25,
  1209. ... rng=rng, data_sampler=rvs)
  1210. >>> S.toarray()
  1211. array([[ 36., 0., 33., 0.], # random
  1212. [ 0., 0., 0., 0.],
  1213. [ 0., 0., 36., 0.]])
  1214. Providing a sampler for uint values:
  1215. >>> def random_uint32_to_100(size=None):
  1216. ... return rng.integers(100, size=size, dtype=np.uint32)
  1217. >>> S = sp.sparse.random_array((3, 4), density=0.25, rng=rng,
  1218. ... data_sampler=random_uint32_to_100)
  1219. Building a custom distribution.
  1220. This example builds a squared normal from np.random:
  1221. >>> def np_normal_squared(size=None, rng=rng):
  1222. ... return rng.standard_normal(size) ** 2
  1223. >>> S = sp.sparse.random_array((3, 4), density=0.25, rng=rng,
  1224. ... data_sampler=np_normal_squared)
  1225. Or we can build it from sp.stats style rvs functions:
  1226. >>> def sp_stats_normal_squared(size=None, rng=rng):
  1227. ... std_normal = sp.stats.distributions.norm_gen().rvs
  1228. ... return std_normal(size=size, random_state=rng) ** 2
  1229. >>> S = sp.sparse.random_array((3, 4), density=0.25, rng=rng,
  1230. ... data_sampler=sp_stats_normal_squared)
  1231. Or we can subclass sp.stats rv_continuous or rv_discrete:
  1232. >>> class NormalSquared(sp.stats.rv_continuous):
  1233. ... def _rvs(self, size=None, random_state=rng):
  1234. ... return rng.standard_normal(size) ** 2
  1235. >>> X = NormalSquared()
  1236. >>> Y = X().rvs
  1237. >>> S = sp.sparse.random_array((3, 4), density=0.25,
  1238. ... rng=rng, data_sampler=Y)
  1239. """
  1240. data, ind = _random(shape, density, format, dtype, rng, data_sampler)
  1241. # downcast, if safe, before calling coo_constructor
  1242. idx_dtype = get_index_dtype(maxval=max(shape))
  1243. ind = tuple(np.asarray(co, dtype=idx_dtype) for co in ind)
  1244. return coo_array((data, ind), shape=shape).asformat(format)
  1245. def _random(shape, density=0.01, format=None, dtype=None,
  1246. rng=None, data_sampler=None):
  1247. if density < 0 or density > 1:
  1248. raise ValueError("density expected to be 0 <= density <= 1")
  1249. tot_prod = math.prod(shape) # use `math` for when prod is >= 2**64
  1250. # Number of non zero values
  1251. size = int(round(density * tot_prod))
  1252. rng = check_random_state(rng)
  1253. if data_sampler is None:
  1254. if np.issubdtype(dtype, np.integer):
  1255. def data_sampler(size):
  1256. return rng_integers(rng,
  1257. np.iinfo(dtype).min,
  1258. np.iinfo(dtype).max,
  1259. size,
  1260. dtype=dtype)
  1261. elif np.issubdtype(dtype, np.complexfloating):
  1262. def data_sampler(size):
  1263. return (rng.uniform(size=size) +
  1264. rng.uniform(size=size) * 1j)
  1265. else:
  1266. data_sampler = rng.uniform
  1267. idx_dtype = get_index_dtype(maxval=max(shape))
  1268. # rng.choice uses int64 if first arg is an int
  1269. if tot_prod <= np.iinfo(np.int64).max:
  1270. raveled_ind = rng.choice(tot_prod, size=size, replace=False)
  1271. ind = np.unravel_index(raveled_ind, shape=shape, order='F')
  1272. ind = tuple(np.asarray(co, idx_dtype) for co in ind)
  1273. else:
  1274. # for ravel indices bigger than dtype max, use sets to remove duplicates
  1275. ndim = len(shape)
  1276. seen = set()
  1277. while len(seen) < size:
  1278. dsize = size - len(seen)
  1279. seen.update(map(tuple, rng_integers(rng, shape, size=(dsize, ndim))))
  1280. ind = tuple(np.array(list(seen), dtype=idx_dtype).T)
  1281. # size kwarg allows eg data_sampler=partial(np.random.poisson, lam=5)
  1282. vals = data_sampler(size=size).astype(dtype, copy=False)
  1283. return vals, ind
  1284. @_transition_to_rng("random_state", position_num=5)
  1285. def random(m, n, density=0.01, format='coo', dtype=None,
  1286. rng=None, data_rvs=None):
  1287. """Generate a sparse matrix of the given shape and density with randomly
  1288. distributed values.
  1289. .. warning::
  1290. This function returns a sparse matrix -- not a sparse array.
  1291. You are encouraged to use `random_array` to take advantage of the
  1292. sparse array functionality.
  1293. Parameters
  1294. ----------
  1295. m, n : int
  1296. shape of the matrix
  1297. density : real, optional
  1298. density of the generated matrix: density equal to one means a full
  1299. matrix, density of 0 means a matrix with no non-zero items.
  1300. format : str, optional
  1301. sparse matrix format.
  1302. dtype : dtype, optional
  1303. type of the returned matrix values.
  1304. rng : `numpy.random.Generator`, optional
  1305. Pseudorandom number generator state. When `rng` is None, a new
  1306. `numpy.random.Generator` is created using entropy from the
  1307. operating system. Types other than `numpy.random.Generator` are
  1308. passed to `numpy.random.default_rng` to instantiate a ``Generator``.
  1309. This random state will be used for sampling the sparsity structure, but
  1310. not necessarily for sampling the values of the structurally nonzero
  1311. entries of the matrix.
  1312. data_rvs : callable, optional
  1313. Samples a requested number of random values.
  1314. This function should take a single argument specifying the length
  1315. of the ndarray that it will return. The structurally nonzero entries
  1316. of the sparse random matrix will be taken from the array sampled
  1317. by this function. By default, uniform [0, 1) random values will be
  1318. sampled using the same random state as is used for sampling
  1319. the sparsity structure.
  1320. Returns
  1321. -------
  1322. res : sparse matrix
  1323. See Also
  1324. --------
  1325. random_array : constructs sparse arrays instead of sparse matrices
  1326. Examples
  1327. --------
  1328. Passing a ``np.random.Generator`` instance for better performance:
  1329. >>> import scipy as sp
  1330. >>> import numpy as np
  1331. >>> rng = np.random.default_rng()
  1332. >>> S = sp.sparse.random(3, 4, density=0.25, rng=rng)
  1333. Providing a sampler for the values:
  1334. >>> rvs = sp.stats.poisson(25, loc=10).rvs
  1335. >>> S = sp.sparse.random(3, 4, density=0.25, rng=rng, data_rvs=rvs)
  1336. >>> S.toarray()
  1337. array([[ 36., 0., 33., 0.], # random
  1338. [ 0., 0., 0., 0.],
  1339. [ 0., 0., 36., 0.]])
  1340. Building a custom distribution.
  1341. This example builds a squared normal from np.random:
  1342. >>> def np_normal_squared(size=None, rng=rng):
  1343. ... return rng.standard_normal(size) ** 2
  1344. >>> S = sp.sparse.random(3, 4, density=0.25, rng=rng,
  1345. ... data_rvs=np_normal_squared)
  1346. Or we can build it from sp.stats style rvs functions:
  1347. >>> def sp_stats_normal_squared(size=None, rng=rng):
  1348. ... std_normal = sp.stats.distributions.norm_gen().rvs
  1349. ... return std_normal(size=size, random_state=rng) ** 2
  1350. >>> S = sp.sparse.random(3, 4, density=0.25, rng=rng,
  1351. ... data_rvs=sp_stats_normal_squared)
  1352. Or we can subclass sp.stats rv_continuous or rv_discrete:
  1353. >>> class NormalSquared(sp.stats.rv_continuous):
  1354. ... def _rvs(self, size=None, random_state=rng):
  1355. ... return rng.standard_normal(size) ** 2
  1356. >>> X = NormalSquared()
  1357. >>> Y = X() # get a frozen version of the distribution
  1358. >>> S = sp.sparse.random(3, 4, density=0.25, rng=rng, data_rvs=Y.rvs)
  1359. """
  1360. if n is None:
  1361. n = m
  1362. m, n = int(m), int(n)
  1363. # make keyword syntax work for data_rvs e.g. data_rvs(size=7)
  1364. if data_rvs is not None:
  1365. def data_rvs_kw(size):
  1366. return data_rvs(size)
  1367. else:
  1368. data_rvs_kw = None
  1369. vals, ind = _random((m, n), density, format, dtype, rng, data_rvs_kw)
  1370. return coo_matrix((vals, ind), shape=(m, n)).asformat(format)
  1371. @_transition_to_rng("random_state", position_num=5)
  1372. def rand(m, n, density=0.01, format="coo", dtype=None, rng=None):
  1373. """Generate a sparse matrix of the given shape and density with uniformly
  1374. distributed values.
  1375. .. warning::
  1376. This function returns a sparse matrix -- not a sparse array.
  1377. You are encouraged to use `random_array` to take advantage
  1378. of the sparse array functionality.
  1379. Parameters
  1380. ----------
  1381. m, n : int
  1382. shape of the matrix
  1383. density : real, optional
  1384. density of the generated matrix: density equal to one means a full
  1385. matrix, density of 0 means a matrix with no non-zero items.
  1386. format : str, optional
  1387. sparse matrix format.
  1388. dtype : dtype, optional
  1389. type of the returned matrix values.
  1390. rng : `numpy.random.Generator`, optional
  1391. Pseudorandom number generator state. When `rng` is None, a new
  1392. `numpy.random.Generator` is created using entropy from the
  1393. operating system. Types other than `numpy.random.Generator` are
  1394. passed to `numpy.random.default_rng` to instantiate a ``Generator``.
  1395. Returns
  1396. -------
  1397. res : sparse matrix
  1398. Notes
  1399. -----
  1400. Only float types are supported for now.
  1401. See Also
  1402. --------
  1403. random : Similar function allowing a custom random data sampler
  1404. random_array : Similar to random() but returns a sparse array
  1405. Examples
  1406. --------
  1407. >>> from scipy.sparse import rand
  1408. >>> matrix = rand(3, 4, density=0.25, format="csr", rng=42)
  1409. >>> matrix
  1410. <Compressed Sparse Row sparse matrix of dtype 'float64'
  1411. with 3 stored elements and shape (3, 4)>
  1412. >>> matrix.toarray()
  1413. array([[0.05641158, 0. , 0. , 0.65088847], # random
  1414. [0. , 0. , 0. , 0.14286682],
  1415. [0. , 0. , 0. , 0. ]])
  1416. """
  1417. return random(m, n, density, format, dtype, rng)