_base.py 57 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606
  1. """Base class for sparse matrices"""
  2. from warnings import warn
  3. import math
  4. import numpy as np
  5. import operator
  6. from ._sputils import (asmatrix, check_reshape_kwargs, check_shape,
  7. get_sum_dtype, isdense, isscalarlike, _todata,
  8. matrix, validateaxis, getdtype, is_pydata_spmatrix)
  9. from scipy._lib._sparse import SparseABC, issparse
  10. from ._matrix import spmatrix
  11. __all__ = ['isspmatrix', 'issparse', 'sparray',
  12. 'SparseWarning', 'SparseEfficiencyWarning']
  13. class SparseWarning(Warning):
  14. """General warning for :mod:`scipy.sparse`."""
  15. pass
  16. class SparseFormatWarning(SparseWarning):
  17. pass
  18. class SparseEfficiencyWarning(SparseWarning):
  19. """The warning emitted when the operation is
  20. inefficient for sparse matrices.
  21. """
  22. pass
  23. # The formats that we might potentially understand.
  24. _formats = {'csc': [0, "Compressed Sparse Column"],
  25. 'csr': [1, "Compressed Sparse Row"],
  26. 'dok': [2, "Dictionary Of Keys"],
  27. 'lil': [3, "List of Lists"],
  28. 'dod': [4, "Dictionary of Dictionaries"],
  29. 'sss': [5, "Symmetric Sparse Skyline"],
  30. 'coo': [6, "COOrdinate"],
  31. 'lba': [7, "Linpack BAnded"],
  32. 'egd': [8, "Ellpack-itpack Generalized Diagonal"],
  33. 'dia': [9, "DIAgonal"],
  34. 'bsr': [10, "Block Sparse Row"],
  35. 'msr': [11, "Modified compressed Sparse Row"],
  36. 'bsc': [12, "Block Sparse Column"],
  37. 'msc': [13, "Modified compressed Sparse Column"],
  38. 'ssk': [14, "Symmetric SKyline"],
  39. 'nsk': [15, "Nonsymmetric SKyline"],
  40. 'jad': [16, "JAgged Diagonal"],
  41. 'uss': [17, "Unsymmetric Sparse Skyline"],
  42. 'vbr': [18, "Variable Block Row"],
  43. 'und': [19, "Undefined"]
  44. }
  45. # These univariate ufuncs preserve zeros.
  46. _ufuncs_with_fixed_point_at_zero = frozenset([
  47. np.sin, np.tan, np.arcsin, np.arctan, np.sinh, np.tanh, np.arcsinh,
  48. np.arctanh, np.rint, np.sign, np.expm1, np.log1p, np.deg2rad,
  49. np.rad2deg, np.floor, np.ceil, np.trunc, np.sqrt])
  50. MAXPRINT = 50
  51. # helper dicts to manipulate comparison operators
  52. # We negate operators (with warning) when all implicit values would be True
  53. op_neg = {operator.eq: operator.ne, operator.ne: operator.eq,
  54. operator.lt: operator.ge, operator.ge: operator.lt,
  55. operator.gt: operator.le, operator.le: operator.gt}
  56. # We use symbolic version of operators in warning messages.
  57. op_sym = {operator.eq: '==', operator.ge: '>=', operator.le: '<=',
  58. operator.ne: '!=', operator.gt: '>', operator.lt: '<'}
  59. # `_spbase` is a subclass of `SparseABC`.
  60. # This allows other submodules to check for instances of sparse subclasses
  61. # via `scipy._lib._sparse.issparse`, without introducing
  62. # an import dependency on `scipy.sparse`.
  63. class _spbase(SparseABC):
  64. """ This class provides a base class for all sparse arrays. It
  65. cannot be instantiated. Most of the work is provided by subclasses.
  66. """
  67. __array_priority__ = 10.1
  68. _format = 'und' # undefined
  69. _allow_nd = (2,)
  70. @property
  71. def ndim(self) -> int:
  72. return len(self._shape)
  73. @property
  74. def _shape_as_2d(self):
  75. s = self._shape
  76. return (1, s[-1]) if len(s) == 1 else s
  77. @property
  78. def _bsr_container(self):
  79. from ._bsr import bsr_array
  80. return bsr_array
  81. @property
  82. def _coo_container(self):
  83. from ._coo import coo_array
  84. return coo_array
  85. @property
  86. def _csc_container(self):
  87. from ._csc import csc_array
  88. return csc_array
  89. @property
  90. def _csr_container(self):
  91. from ._csr import csr_array
  92. return csr_array
  93. @property
  94. def _dia_container(self):
  95. from ._dia import dia_array
  96. return dia_array
  97. @property
  98. def _dok_container(self):
  99. from ._dok import dok_array
  100. return dok_array
  101. @property
  102. def _lil_container(self):
  103. from ._lil import lil_array
  104. return lil_array
  105. def __init__(self, arg1, *, maxprint=None):
  106. self._shape = None
  107. if self.__class__.__name__ == '_spbase':
  108. raise ValueError("This class is not intended"
  109. " to be instantiated directly.")
  110. if isinstance(self, sparray) and np.isscalar(arg1):
  111. raise ValueError(
  112. "scipy sparse array classes do not support instantiation from a scalar"
  113. )
  114. self.maxprint = MAXPRINT if maxprint is None else maxprint
  115. @property
  116. def shape(self):
  117. return self._shape
  118. def reshape(self, *args, **kwargs):
  119. """reshape(self, shape, order='C', copy=False)
  120. Gives a new shape to a sparse array/matrix without changing its data.
  121. Parameters
  122. ----------
  123. shape : tuple of ints
  124. The new shape should be compatible with the original shape.
  125. order : {'C', 'F'}, optional
  126. Read the elements using this index order. 'C' means to read and
  127. write the elements using C-like index order; e.g., read entire first
  128. row, then second row, etc. 'F' means to read and write the elements
  129. using Fortran-like index order; e.g., read entire first column, then
  130. second column, etc.
  131. copy : bool, optional
  132. Indicates whether or not attributes of self should be copied
  133. whenever possible. The degree to which attributes are copied varies
  134. depending on the type of sparse array being used.
  135. Returns
  136. -------
  137. reshaped : sparse array/matrix
  138. A sparse array/matrix with the given `shape`, not necessarily of the same
  139. format as the current object.
  140. See Also
  141. --------
  142. numpy.reshape : NumPy's implementation of 'reshape' for ndarrays
  143. """
  144. # If the shape already matches, don't bother doing an actual reshape
  145. # Otherwise, the default is to convert to COO and use its reshape
  146. # Don't restrict ndim on this first call. That happens in constructor
  147. shape = check_shape(args, self.shape, allow_nd=range(1, 65))
  148. order, copy = check_reshape_kwargs(kwargs)
  149. if shape == self.shape:
  150. if copy:
  151. return self.copy()
  152. else:
  153. return self
  154. return self.tocoo(copy=copy).reshape(shape, order=order, copy=False)
  155. def resize(self, shape):
  156. """Resize the array/matrix in-place to dimensions given by ``shape``
  157. Any elements that lie within the new shape will remain at the same
  158. indices, while non-zero elements lying outside the new shape are
  159. removed.
  160. Parameters
  161. ----------
  162. shape : (int, int)
  163. number of rows and columns in the new array/matrix
  164. Notes
  165. -----
  166. The semantics are not identical to `numpy.ndarray.resize` or
  167. `numpy.resize`. Here, the same data will be maintained at each index
  168. before and after reshape, if that index is within the new bounds. In
  169. numpy, resizing maintains contiguity of the array, moving elements
  170. around in the logical array but not within a flattened representation.
  171. We give no guarantees about whether the underlying data attributes
  172. (arrays, etc.) will be modified in place or replaced with new objects.
  173. """
  174. # As an inplace operation, this requires implementation in each format.
  175. raise NotImplementedError(
  176. f'{type(self).__name__}.resize is not implemented')
  177. def astype(self, dtype, casting='unsafe', copy=True):
  178. """Cast the array/matrix elements to a specified type.
  179. Parameters
  180. ----------
  181. dtype : string or numpy dtype
  182. Typecode or data-type to which to cast the data.
  183. casting : {'no', 'equiv', 'safe', 'same_kind', 'unsafe'}, optional
  184. Controls what kind of data casting may occur.
  185. Defaults to 'unsafe' for backwards compatibility.
  186. 'no' means the data types should not be cast at all.
  187. 'equiv' means only byte-order changes are allowed.
  188. 'safe' means only casts which can preserve values are allowed.
  189. 'same_kind' means only safe casts or casts within a kind,
  190. like float64 to float32, are allowed.
  191. 'unsafe' means any data conversions may be done.
  192. copy : bool, optional
  193. If `copy` is `False`, the result might share some memory with this
  194. array/matrix. If `copy` is `True`, it is guaranteed that the result and
  195. this array/matrix do not share any memory.
  196. """
  197. dtype = getdtype(dtype)
  198. if self.dtype != dtype:
  199. return self.tocsr().astype(
  200. dtype, casting=casting, copy=copy).asformat(self.format)
  201. elif copy:
  202. return self.copy()
  203. else:
  204. return self
  205. @classmethod
  206. def _ascontainer(cls, X, **kwargs):
  207. if issubclass(cls, sparray):
  208. return np.asarray(X, **kwargs)
  209. else:
  210. return asmatrix(X, **kwargs)
  211. @classmethod
  212. def _container(cls, X, **kwargs):
  213. if issubclass(cls, sparray):
  214. return np.array(X, **kwargs)
  215. else:
  216. return matrix(X, **kwargs)
  217. def _asfptype(self):
  218. """Upcast array to a floating point format (if necessary)"""
  219. fp_types = ['f', 'd', 'F', 'D']
  220. if self.dtype.char in fp_types:
  221. return self
  222. else:
  223. for fp_type in fp_types:
  224. if self.dtype <= np.dtype(fp_type):
  225. return self.astype(fp_type, copy=False)
  226. raise TypeError(
  227. f'cannot upcast [{self.dtype.name}] to a floating point format'
  228. )
  229. def __iter__(self):
  230. for r in range(self.shape[0]):
  231. yield self[r]
  232. def _getmaxprint(self):
  233. """Maximum number of elements to display when printed."""
  234. return self.maxprint
  235. def count_nonzero(self, axis=None):
  236. """Number of non-zero entries, equivalent to
  237. np.count_nonzero(a.toarray(), axis=axis)
  238. Unlike the nnz property, which return the number of stored
  239. entries (the length of the data attribute), this method counts the
  240. actual number of non-zero entries in data.
  241. Duplicate entries are summed before counting.
  242. Parameters
  243. ----------
  244. axis : {-2, -1, 0, 1, None} optional
  245. Count nonzeros for the whole array, or along a specified axis.
  246. .. versionadded:: 1.15.0
  247. Returns
  248. -------
  249. numpy array
  250. A reduced array (no axis `axis`) holding the number of nonzero values
  251. for each of the indices of the nonaxis dimensions.
  252. Notes
  253. -----
  254. If you want to count nonzero and explicit zero stored values (e.g. nnz)
  255. along an axis, two fast idioms are provided by `numpy` functions for the
  256. common CSR, CSC, COO formats.
  257. For the major axis in CSR (rows) and CSC (cols) use `np.diff`:
  258. >>> import numpy as np
  259. >>> import scipy as sp
  260. >>> A = sp.sparse.csr_array([[4, 5, 0], [7, 0, 0]])
  261. >>> major_axis_stored_values = np.diff(A.indptr) # -> np.array([2, 1])
  262. For the minor axis in CSR (cols) and CSC (rows) use `numpy.bincount` with
  263. minlength ``A.shape[1]`` for CSR and ``A.shape[0]`` for CSC:
  264. >>> csr_minor_stored_values = np.bincount(A.indices, minlength=A.shape[1])
  265. For COO, use the minor axis approach for either `axis`:
  266. >>> A = A.tocoo()
  267. >>> coo_axis0_stored_values = np.bincount(A.coords[0], minlength=A.shape[1])
  268. >>> coo_axis1_stored_values = np.bincount(A.coords[1], minlength=A.shape[0])
  269. Examples
  270. --------
  271. >>> A = sp.sparse.csr_array([[4, 5, 0], [7, 0, 0]])
  272. >>> A.count_nonzero(axis=0)
  273. array([2, 1, 0])
  274. """
  275. clsname = self.__class__.__name__
  276. raise NotImplementedError(f"count_nonzero not implemented for {clsname}.")
  277. def _getnnz(self, axis=None):
  278. """Number of stored values, including explicit zeros.
  279. Parameters
  280. ----------
  281. axis : {-2, -1, 0, 1, None} optional
  282. Report stored values for the whole array, or along a specified axis.
  283. See also
  284. --------
  285. count_nonzero : Number of non-zero entries
  286. """
  287. clsname = self.__class__.__name__
  288. raise NotImplementedError(f"getnnz not implemented for {clsname}.")
  289. @property
  290. def nnz(self) -> int:
  291. """Number of stored values, including explicit zeros.
  292. See also
  293. --------
  294. count_nonzero : Number of non-zero entries
  295. """
  296. return self._getnnz()
  297. @property
  298. def size(self) -> int:
  299. """Number of stored values.
  300. See also
  301. --------
  302. count_nonzero : Number of non-zero values.
  303. """
  304. return self._getnnz()
  305. @property
  306. def format(self) -> str:
  307. """Format string for matrix."""
  308. return self._format
  309. @property
  310. def T(self):
  311. """Transpose."""
  312. return self.transpose()
  313. @property
  314. def real(self):
  315. return self._real()
  316. @property
  317. def imag(self):
  318. return self._imag()
  319. def __repr__(self):
  320. _, format_name = _formats[self.format]
  321. sparse_cls = 'array' if isinstance(self, sparray) else 'matrix'
  322. return (
  323. f"<{format_name} sparse {sparse_cls} of dtype '{self.dtype}'\n"
  324. f"\twith {self.nnz} stored elements and shape {self.shape}>"
  325. )
  326. def __str__(self):
  327. maxprint = self._getmaxprint()
  328. A = self.tocoo()
  329. # helper function, outputs "(i,j) v"
  330. def tostr(coords, data):
  331. pairs = zip(zip(*(c.tolist() for c in coords)), data)
  332. return '\n'.join(f' {idx}\t{val}' for idx, val in pairs)
  333. out = repr(self)
  334. if self.nnz == 0:
  335. return out
  336. out += '\n Coords\tValues\n'
  337. if self.nnz > maxprint:
  338. half = maxprint // 2
  339. out += tostr(tuple(c[:half] for c in A.coords), A.data[:half])
  340. out += "\n :\t:\n"
  341. half = maxprint - half
  342. out += tostr(tuple(c[-half:] for c in A.coords), A.data[-half:])
  343. else:
  344. out += tostr(A.coords, A.data)
  345. return out
  346. def __bool__(self): # Simple -- other ideas?
  347. if self.shape == (1, 1):
  348. return self.nnz != 0
  349. else:
  350. raise ValueError("The truth value of an array with more than one "
  351. "element is ambiguous. Use a.any() or a.all().")
  352. # What should len(sparse) return? For consistency with dense matrices,
  353. # perhaps it should be the number of rows? But for some uses the number of
  354. # non-zeros is more important. For now, raise an exception!
  355. def __len__(self):
  356. raise TypeError("sparse array length is ambiguous; use getnnz()"
  357. " or shape[0]")
  358. def asformat(self, format, copy=False):
  359. """Return this array/matrix in the passed format.
  360. Parameters
  361. ----------
  362. format : {str, None}
  363. The desired sparse format ("csr", "csc", "lil", "dok", "array", ...)
  364. or None for no conversion.
  365. copy : bool, optional
  366. If True, the result is guaranteed to not share data with self.
  367. Returns
  368. -------
  369. A : This array/matrix in the passed format.
  370. """
  371. if format is None or format == self.format:
  372. if copy:
  373. return self.copy()
  374. else:
  375. return self
  376. else:
  377. try:
  378. convert_method = getattr(self, 'to' + format)
  379. except AttributeError as e:
  380. raise ValueError(f'Format {format} is unknown.') from e
  381. # Forward the copy kwarg, if it's accepted.
  382. try:
  383. return convert_method(copy=copy)
  384. except TypeError:
  385. return convert_method()
  386. ###################################################################
  387. # NOTE: All arithmetic operations use csr_matrix by default.
  388. # Therefore a new sparse array format just needs to define a
  389. # .tocsr() method to provide arithmetic support. Any of these
  390. # methods can be overridden for efficiency.
  391. ####################################################################
  392. def multiply(self, other):
  393. """Element-wise multiplication by another array/matrix."""
  394. if isscalarlike(other):
  395. return self._mul_scalar(other)
  396. if self.ndim < 3:
  397. try:
  398. return self._multiply_2d_with_broadcasting(other)
  399. except AttributeError:
  400. return self.tocsr()._multiply_2d_with_broadcasting(other)
  401. if not (issparse(other) or isdense(other)):
  402. # If it's a list or whatever, treat it like an array
  403. other_a = np.asanyarray(other)
  404. if other_a.ndim == 0 and other_a.dtype == np.object_:
  405. # numpy creates a 0d object array if all else fails.
  406. # Not interpretable as an array; return NotImplemented so
  407. # other's __rmul__ can kick in if that's implemented.
  408. return NotImplemented
  409. # Allow custom sparse class indicated by attr sparse gh-6520
  410. try:
  411. other.shape
  412. except AttributeError:
  413. other = other_a
  414. if self.shape != other.shape:
  415. raise ValueError("inconsistent shapes: >2D multiply() does not yet "
  416. "support broadcasting")
  417. # self is >2D so must be COO
  418. if isdense(other):
  419. data = np.multiply(self.data, other[self.coords])
  420. result = self.copy()
  421. result.data = data.view(np.ndarray).ravel()
  422. return result
  423. elif issparse(other):
  424. csr_self = self.reshape(1, -1).tocsr()
  425. csr_other = other.reshape(1, -1).tocsr()
  426. return csr_self._binopt(csr_other, '_elmul_').reshape(self.shape)
  427. else:
  428. # Not scalar, dense or sparse. Return NotImplemented so that
  429. # other's __rmul__ can kick in if that's implemented.
  430. return NotImplemented
  431. def _maximum_minimum(self, other, np_op):
  432. if not (issparse(other) or isdense(other) or isscalarlike(other)):
  433. # If it's a list or whatever, treat it like an array
  434. other_a = np.asanyarray(other)
  435. if other_a.ndim == 0 and other_a.dtype == np.object_:
  436. # numpy creates a 0d object array if all else fails.
  437. # We don't know how to handle it either.
  438. raise NotImplementedError('maximum or minimum with an unrecognized '
  439. 'array type is not supported')
  440. # Allow custom sparse class indicated by attr sparse gh-6520
  441. try:
  442. other.shape
  443. except AttributeError:
  444. other = other_a
  445. if isscalarlike(other):
  446. if np_op(0, other):
  447. pos_neg = 'positive' if np_op == np.maximum else 'negative'
  448. warn(f"Taking {np_op.__name__} with a {pos_neg} number results in a"
  449. " dense matrix.", SparseEfficiencyWarning, stacklevel=3)
  450. return self.__class__(np_op(self.toarray(), other))
  451. else:
  452. if self.ndim < 3:
  453. cs_self = self if self.format in ('csr', 'csc') else self.tocsr()
  454. return cs_self._scalar_binopt(other, np_op)
  455. csr_self = self.reshape(1, -1).tocsr()
  456. result = csr_self._scalar_binopt(other, np_op)
  457. return result.tocoo().reshape(self.shape)
  458. elif isdense(other):
  459. return np_op(self.todense(), other)
  460. elif issparse(other):
  461. if self.shape != other.shape:
  462. raise ValueError(f"inconsistent shapes {self.shape=} {other.shape=}")
  463. if self.ndim < 3: # shape is same so other.ndim < 3
  464. cs_self = self if self.format in ('csr', 'csc') else self.tocsr()
  465. return cs_self._binopt(other, f'_{np_op.__name__}_')
  466. csr_self = self.reshape(1, -1).tocsr()
  467. csr_other = other.reshape(1, -1).tocsr()
  468. result = csr_self._binopt(csr_other, f'_{np_op.__name__}_')
  469. return result.tocoo().reshape(self.shape)
  470. else:
  471. raise ValueError("Operands not compatible.")
  472. def maximum(self, other):
  473. """Element-wise maximum between this and another array/matrix."""
  474. return self._maximum_minimum(other, np.maximum)
  475. def minimum(self, other):
  476. """Element-wise minimum between this and another array/matrix."""
  477. return self._maximum_minimum(other, np.minimum)
  478. def dot(self, other):
  479. """Ordinary dot product
  480. Examples
  481. --------
  482. >>> import numpy as np
  483. >>> from scipy.sparse import csr_array
  484. >>> A = csr_array([[1, 2, 0], [0, 0, 3], [4, 0, 5]])
  485. >>> v = np.array([1, 0, -1])
  486. >>> A.dot(v)
  487. array([ 1, -3, -1], dtype=int64)
  488. """
  489. if np.isscalar(other):
  490. return self * other
  491. else:
  492. return self @ other
  493. def power(self, n, dtype=None):
  494. """Element-wise power."""
  495. return self.tocsr().power(n, dtype=dtype)
  496. def _broadcast_to(self, shape, copy=False):
  497. if self.shape == shape:
  498. return self.copy() if copy else self
  499. else:
  500. return self.tocsr()._broadcast_to(shape, copy)
  501. def _comparison(self, other, op):
  502. # We convert to CSR format and use methods _binopt or _scalar_binopt
  503. # If ndim>2 we reshape to 2D, compare and then reshape back to nD
  504. if not (issparse(other) or isdense(other) or isscalarlike(other)):
  505. if is_pydata_spmatrix(other):
  506. # cannot compare with pydata other, but it might compare with us.
  507. return NotImplemented
  508. # If it's a list or whatever, treat it like an array
  509. other_a = np.asanyarray(other)
  510. if other_a.ndim == 0 and other_a.dtype == np.object_:
  511. # numpy creates a 0d object array if all else fails.
  512. # Not interpretable as an array; return NotImplemented so
  513. # other's dunder methods can kick in if implemented.
  514. return NotImplemented
  515. # Allow custom sparse class indicated by attr sparse gh-6520
  516. try:
  517. other.shape
  518. except AttributeError:
  519. other = other_a
  520. if isscalarlike(other):
  521. if not op(0, other):
  522. if np.isnan(other): # op is not `ne`, so results are all False.
  523. return self.__class__(self.shape, dtype=np.bool_)
  524. if self.ndim < 3:
  525. cs_self = self if self.format in ('csc', 'csr') else self.tocsr()
  526. return cs_self._scalar_binopt(other, op)
  527. csr_self = self.reshape(1, -1).tocsr()
  528. result = csr_self._scalar_binopt(other, op)
  529. return result.tocoo().reshape(self.shape)
  530. else:
  531. warn(f"Comparing a sparse matrix with {other} using {op_sym[op]} "
  532. f"is inefficient. Try using {op_sym[op_neg[op]]} instead.",
  533. SparseEfficiencyWarning, stacklevel=3)
  534. if np.isnan(other):
  535. # op is `ne` cuz op(0, other) and isnan(other). Return all True.
  536. return self.__class__(np.ones(self.shape, dtype=np.bool_))
  537. # op is eq, le, or ge. Use negated op and then negate.
  538. if self.ndim < 3:
  539. cs_self = self if self.format in ('csc', 'csr') else self.tocsr()
  540. inv = cs_self._scalar_binopt(other, op_neg[op])
  541. all_true = cs_self.__class__(np.ones(cs_self.shape, dtype=np.bool_))
  542. return all_true - inv
  543. csr_self = self.reshape(1, -1).tocsr()
  544. inv = csr_self._scalar_binopt(other, op_neg[op])
  545. all_true = csr_self.__class__(np.ones(csr_self.shape, dtype=np.bool_))
  546. result = all_true - inv
  547. return result.tocoo().reshape(self.shape)
  548. elif isdense(other):
  549. return op(self.todense(), other)
  550. elif issparse(other):
  551. # TODO sparse broadcasting
  552. if self.shape != other.shape:
  553. # eq and ne return True or False instead of an array when the shapes
  554. # don't match. Numpy doesn't do this. Is this what we want?
  555. if op in (operator.eq, operator.ne):
  556. return op is operator.ne
  557. raise ValueError("inconsistent shape")
  558. if self.ndim < 3:
  559. cs_self = self if self.format in ('csc', 'csr') else self.tocsr()
  560. cs_other = other
  561. else:
  562. cs_self = self.reshape(1, -1).tocsr()
  563. cs_other = other.reshape(1, -1).tocsr()
  564. if not op(0, 0):
  565. result = cs_self._binopt(cs_other, f'_{op.__name__}_')
  566. return result if self.ndim < 3 else result.tocoo().reshape(self.shape)
  567. else:
  568. # result will not be sparse. Use negated op and then negate.
  569. warn(f"Comparing two sparse matrices using {op_sym[op]} "
  570. f"is inefficient. Try using {op_sym[op_neg[op]]} instead.",
  571. SparseEfficiencyWarning, stacklevel=3)
  572. inv = cs_self._binopt(cs_other, f'_{op_neg[op].__name__}_')
  573. all_true = cs_self.__class__(np.ones(cs_self.shape, dtype=np.bool_))
  574. result = all_true - inv
  575. return result if self.ndim < 3 else result.tocoo().reshape(self.shape)
  576. else:
  577. # cannot compare with other, but it might compare with us.
  578. return NotImplemented
  579. def __eq__(self, other):
  580. return self._comparison(other, operator.eq)
  581. def __ne__(self, other):
  582. return self._comparison(other, operator.ne)
  583. def __lt__(self, other):
  584. return self._comparison(other, operator.lt)
  585. def __gt__(self, other):
  586. return self._comparison(other, operator.gt)
  587. def __le__(self, other):
  588. return self._comparison(other, operator.le)
  589. def __ge__(self, other):
  590. return self._comparison(other, operator.ge)
  591. def __abs__(self):
  592. return abs(self.tocsr())
  593. def __round__(self, ndigits=0):
  594. return round(self.tocsr(), ndigits=ndigits)
  595. def _add_sparse(self, other):
  596. return self.tocsr()._add_sparse(other)
  597. def _add_dense(self, other):
  598. return self.tocoo()._add_dense(other)
  599. def _sub_sparse(self, other):
  600. return self.tocsr()._sub_sparse(other)
  601. def _sub_dense(self, other):
  602. return self.todense() - other
  603. def _rsub_dense(self, other):
  604. # note: this can't be replaced by other + (-self) for unsigned types
  605. return other - self.todense()
  606. def __add__(self, other): # self + other
  607. if isscalarlike(other):
  608. if other == 0:
  609. return self.copy()
  610. # Now we would add this scalar to every element.
  611. raise NotImplementedError('adding a nonzero scalar to a '
  612. 'sparse array is not supported')
  613. elif issparse(other):
  614. if other.shape != self.shape:
  615. raise ValueError("inconsistent shapes")
  616. return self._add_sparse(other)
  617. elif isdense(other):
  618. other = np.broadcast_to(other, self.shape)
  619. return self._add_dense(other)
  620. else:
  621. return NotImplemented
  622. def __radd__(self,other): # other + self
  623. return self.__add__(other)
  624. def __sub__(self, other): # self - other
  625. if isscalarlike(other):
  626. if other == 0:
  627. return self.copy()
  628. raise NotImplementedError('subtracting a nonzero scalar from a '
  629. 'sparse array is not supported')
  630. elif issparse(other):
  631. if other.shape != self.shape:
  632. raise ValueError("inconsistent shapes")
  633. return self._sub_sparse(other)
  634. elif isdense(other):
  635. other = np.broadcast_to(other, self.shape)
  636. return self._sub_dense(other)
  637. else:
  638. return NotImplemented
  639. def __rsub__(self,other): # other - self
  640. if isscalarlike(other):
  641. if other == 0:
  642. return -self.copy()
  643. raise NotImplementedError('subtracting a sparse array from a '
  644. 'nonzero scalar is not supported')
  645. elif isdense(other):
  646. other = np.broadcast_to(other, self.shape)
  647. return self._rsub_dense(other)
  648. else:
  649. return NotImplemented
  650. def _matmul_dispatch(self, other):
  651. """np.array-like matmul & `np.matrix`-like mul, i.e. `dot` or `NotImplemented`
  652. interpret other and call one of the following
  653. self._mul_scalar()
  654. self._matmul_vector()
  655. self._matmul_multivector()
  656. self._matmul_sparse()
  657. """
  658. # This method has to be different from `__matmul__` because it is also
  659. # called by sparse matrix classes.
  660. # Currently matrix multiplication is only supported
  661. # for 2D arrays. Hence we unpacked and use only the
  662. # two last axes' lengths.
  663. M, N = self._shape_as_2d
  664. if other.__class__ is np.ndarray:
  665. # Fast path for the most common case
  666. if other.shape == (N,):
  667. return self._matmul_vector(other)
  668. elif other.shape == (N, 1):
  669. result = self._matmul_vector(other.ravel())
  670. if self.ndim == 1:
  671. return result.reshape(1)
  672. return result.reshape(M, 1)
  673. elif other.ndim == 2 and other.shape[0] == N:
  674. return self._matmul_multivector(other)
  675. if isscalarlike(other):
  676. # scalar value
  677. return self._mul_scalar(other)
  678. err_prefix = "matmul: dimension mismatch with signature"
  679. if issparse(other):
  680. if N != other.shape[0]:
  681. raise ValueError(
  682. f"{err_prefix} (n,k={N}),(k={other.shape[0]},m)->(n,m)"
  683. )
  684. return self._matmul_sparse(other)
  685. # If it's a list or whatever, treat it like an array
  686. other_a = np.asanyarray(other)
  687. if other_a.ndim == 0 and other_a.dtype == np.object_:
  688. # numpy creates a 0d object array if all else fails.
  689. # Not interpretable as an array; return NotImplemented so that
  690. # other's __rmatmul__ can kick in if that's implemented.
  691. return NotImplemented
  692. # Allow custom sparse class indicated by attr sparse gh-6520
  693. try:
  694. other.shape
  695. except AttributeError:
  696. other = other_a
  697. if other.ndim == 1 or other.ndim == 2 and other.shape[1] == 1:
  698. # dense row or column vector
  699. if other.shape[0] != N:
  700. raise ValueError(
  701. f"{err_prefix} (n,k={N}),(k={other.shape[0]},1?)->(n,1?)"
  702. )
  703. result = self._matmul_vector(np.ravel(other))
  704. if isinstance(other, np.matrix):
  705. result = self._ascontainer(result)
  706. if other.ndim == 2 and other.shape[1] == 1:
  707. # If 'other' was an (nx1) column vector, reshape the result
  708. if self.ndim == 1:
  709. result = result.reshape(1)
  710. else:
  711. result = result.reshape(-1, 1)
  712. return result
  713. elif other.ndim == 2:
  714. ##
  715. # dense 2D array or matrix ("multivector")
  716. if other.shape[0] != N:
  717. raise ValueError(
  718. f"{err_prefix} (n,k={N}),(k={other.shape[0]},m)->(n,m)"
  719. )
  720. result = self._matmul_multivector(np.asarray(other))
  721. if isinstance(other, np.matrix):
  722. result = self._ascontainer(result)
  723. return result
  724. else:
  725. raise ValueError('could not interpret dimensions')
  726. def __mul__(self, other):
  727. return self.multiply(other)
  728. def __rmul__(self, other): # other * self
  729. return self.multiply(other)
  730. # by default, use CSR for __mul__ handlers
  731. def _mul_scalar(self, other):
  732. return self.tocsr()._mul_scalar(other)
  733. def _matmul_vector(self, other):
  734. return self.tocsr()._matmul_vector(other)
  735. def _matmul_multivector(self, other):
  736. return self.tocsr()._matmul_multivector(other)
  737. def _matmul_sparse(self, other):
  738. return self.tocsr()._matmul_sparse(other)
  739. def _rmatmul_dispatch(self, other):
  740. if isscalarlike(other):
  741. return self._mul_scalar(other)
  742. else:
  743. # Don't use asarray unless we have to
  744. try:
  745. tr = other.transpose()
  746. except AttributeError:
  747. tr = np.asarray(other).transpose()
  748. ret = self.transpose()._matmul_dispatch(tr)
  749. if ret is NotImplemented:
  750. return NotImplemented
  751. return ret.transpose()
  752. #######################
  753. # matmul (@) operator #
  754. #######################
  755. def __matmul__(self, other):
  756. if isscalarlike(other):
  757. raise ValueError("Scalar operands are not allowed, "
  758. "use '*' instead")
  759. return self._matmul_dispatch(other)
  760. def __rmatmul__(self, other):
  761. if isscalarlike(other):
  762. raise ValueError("Scalar operands are not allowed, "
  763. "use '*' instead")
  764. return self._rmatmul_dispatch(other)
  765. ####################
  766. # Other Arithmetic #
  767. ####################
  768. def _divide(self, other, *, rdivide=False):
  769. if not (issparse(other) or isdense(other) or isscalarlike(other)):
  770. # If it's a list or whatever, treat it like an array
  771. other_a = np.asanyarray(other)
  772. if other_a.ndim == 0 and other_a.dtype == np.object_:
  773. # numpy creates a 0d object array if all else fails.
  774. # Not interpretable as an array; return NotImplemented so that
  775. # other's __rtruediv__ can kick in if that's implemented.
  776. return NotImplemented
  777. # Allow custom sparse class indicated by attr sparse gh-6520
  778. try:
  779. other.shape
  780. except AttributeError:
  781. other = other_a
  782. if isscalarlike(other):
  783. if rdivide:
  784. return np.divide(other, self.todense())
  785. if np.can_cast(self.dtype, np.float64):
  786. return self.astype(np.float64, copy=False)._mul_scalar(1 / other)
  787. else:
  788. r = self._mul_scalar(1 / other)
  789. scalar_dtype = np.asarray(other).dtype
  790. if (np.issubdtype(self.dtype, np.integer) and
  791. np.issubdtype(scalar_dtype, np.integer)):
  792. return r.astype(self.dtype, copy=False)
  793. else:
  794. return r
  795. elif isdense(other):
  796. if rdivide:
  797. return np.divide(other, self.todense())
  798. return self.multiply(np.divide(1, other))
  799. elif issparse(other):
  800. if rdivide:
  801. return other._divide(self, rdivide=False)
  802. csr_self = (self if self.ndim < 3 else self.reshape(1, -1)).tocsr()
  803. csr_other = (other if self.ndim < 3 else other.reshape(1, -1)).tocsr()
  804. if np.can_cast(self.dtype, np.float64):
  805. csr_self = csr_self.astype(np.float64, copy=False)
  806. result = csr_self._divide_sparse(csr_other)
  807. return result if self.ndim < 3 else result.reshape(self.shape)
  808. else:
  809. # not scalar, dense or sparse. Return NotImplemented so
  810. # other's __rtruediv__ can kick in if that's implemented.
  811. return NotImplemented
  812. def __truediv__(self, other):
  813. return self._divide(other)
  814. def __rtruediv__(self, other):
  815. # Implementing this as the inverse would be too magical -- bail out
  816. return NotImplemented
  817. def __neg__(self):
  818. return -self.tocsr()
  819. def __iadd__(self, other):
  820. return NotImplemented
  821. def __isub__(self, other):
  822. return NotImplemented
  823. def __imul__(self, other):
  824. return NotImplemented
  825. def __itruediv__(self, other):
  826. return NotImplemented
  827. def __pow__(self, *args, **kwargs):
  828. return self.power(*args, **kwargs)
  829. def transpose(self, axes=None, copy=False):
  830. """
  831. Reverses the dimensions of the sparse array/matrix.
  832. Parameters
  833. ----------
  834. axes : None, optional
  835. This argument is in the signature *solely* for NumPy
  836. compatibility reasons. Do not pass in anything except
  837. for the default value.
  838. copy : bool, optional
  839. Indicates whether or not attributes of `self` should be
  840. copied whenever possible. The degree to which attributes
  841. are copied varies depending on the type of sparse array/matrix
  842. being used.
  843. Returns
  844. -------
  845. p : `self` with the dimensions reversed.
  846. Notes
  847. -----
  848. If `self` is a `csr_array` or a `csc_array`, then this will return a
  849. `csc_array` or a `csr_array`, respectively.
  850. See Also
  851. --------
  852. numpy.transpose : NumPy's implementation of 'transpose' for ndarrays
  853. """
  854. return self.tocsr(copy=copy).transpose(axes=axes, copy=False)
  855. def conjugate(self, copy=True):
  856. """Element-wise complex conjugation.
  857. If the array/matrix is of non-complex data type and `copy` is False,
  858. this method does nothing and the data is not copied.
  859. Parameters
  860. ----------
  861. copy : bool, optional
  862. If True, the result is guaranteed to not share data with self.
  863. Returns
  864. -------
  865. A : The element-wise complex conjugate.
  866. """
  867. if np.issubdtype(self.dtype, np.complexfloating):
  868. return self.tocsr(copy=copy).conjugate(copy=False)
  869. elif copy:
  870. return self.copy()
  871. else:
  872. return self
  873. def conj(self, copy=True):
  874. return self.conjugate(copy=copy)
  875. conj.__doc__ = conjugate.__doc__
  876. def _real(self):
  877. return self.tocsr()._real()
  878. def _imag(self):
  879. return self.tocsr()._imag()
  880. def nonzero(self):
  881. """Nonzero indices of the array/matrix.
  882. Returns a tuple of arrays (row,col) containing the indices
  883. of the non-zero elements of the array.
  884. Examples
  885. --------
  886. >>> from scipy.sparse import csr_array
  887. >>> A = csr_array([[1, 2, 0], [0, 0, 3], [4, 0, 5]])
  888. >>> A.nonzero()
  889. (array([0, 0, 1, 2, 2], dtype=int32), array([0, 1, 2, 0, 2], dtype=int32))
  890. """
  891. # convert to COOrdinate format
  892. A = self.tocoo()
  893. nz_mask = A.data != 0
  894. return tuple(idx[nz_mask] for idx in A.coords)
  895. def _getcol(self, j):
  896. """Returns a copy of column j of the array, as an (m x 1) sparse
  897. array (column vector).
  898. """
  899. if self.ndim == 1:
  900. raise ValueError("getcol not provided for 1d arrays. Use indexing A[j]")
  901. # Subclasses should override this method for efficiency.
  902. # Post-multiply by a (n x 1) column vector 'a' containing all zeros
  903. # except for a_j = 1
  904. N = self.shape[-1]
  905. if j < 0:
  906. j += N
  907. if j < 0 or j >= N:
  908. raise IndexError("index out of bounds")
  909. col_selector = self._csc_container(([1], [[j], [0]]),
  910. shape=(N, 1), dtype=self.dtype)
  911. result = self @ col_selector
  912. return result
  913. def _getrow(self, i):
  914. """Returns a copy of row i of the array, as a (1 x n) sparse
  915. array (row vector).
  916. """
  917. if self.ndim == 1:
  918. raise ValueError("getrow not meaningful for a 1d array")
  919. # Subclasses should override this method for efficiency.
  920. # Pre-multiply by a (1 x m) row vector 'a' containing all zeros
  921. # except for a_i = 1
  922. M = self.shape[0]
  923. if i < 0:
  924. i += M
  925. if i < 0 or i >= M:
  926. raise IndexError("index out of bounds")
  927. row_selector = self._csr_container(([1], [[0], [i]]),
  928. shape=(1, M), dtype=self.dtype)
  929. return row_selector @ self
  930. # The following dunder methods cannot be implemented.
  931. #
  932. # def __array__(self):
  933. # # Sparse matrices rely on NumPy wrapping them in object arrays under
  934. # # the hood to make unary ufuncs work on them. So we cannot raise
  935. # # TypeError here - which would be handy to not give users object
  936. # # arrays they probably don't want (they're looking for `.toarray()`).
  937. # #
  938. # # Conversion with `toarray()` would also break things because of the
  939. # # behavior discussed above, plus we want to avoid densification by
  940. # # accident because that can too easily blow up memory.
  941. #
  942. # def __array_ufunc__(self):
  943. # # We cannot implement __array_ufunc__ due to mismatching semantics.
  944. # # See gh-7707 and gh-7349 for details.
  945. #
  946. # def __array_function__(self):
  947. # # We cannot implement __array_function__ due to mismatching semantics.
  948. # # See gh-10362 for details.
  949. def todense(self, order=None, out=None):
  950. """
  951. Return a dense representation of this sparse array.
  952. Parameters
  953. ----------
  954. order : {'C', 'F'}, optional
  955. Whether to store multi-dimensional data in C (row-major)
  956. or Fortran (column-major) order in memory. The default
  957. is 'None', which provides no ordering guarantees.
  958. Cannot be specified in conjunction with the `out`
  959. argument.
  960. out : ndarray, 2-D, optional
  961. If specified, uses this array as the output buffer
  962. instead of allocating a new array to return. The
  963. provided array must have the same shape and dtype as
  964. the sparse array on which you are calling the method.
  965. Returns
  966. -------
  967. arr : ndarray, 2-D
  968. An array with the same shape and containing the same
  969. data represented by the sparse array, with the requested
  970. memory order. If `out` was passed, the same object is
  971. returned after being modified in-place to contain the
  972. appropriate values.
  973. """
  974. return self._ascontainer(self.toarray(order=order, out=out))
  975. def toarray(self, order=None, out=None):
  976. """
  977. Return a dense ndarray representation of this sparse array/matrix.
  978. Parameters
  979. ----------
  980. order : {'C', 'F'}, optional
  981. Whether to store multidimensional data in C (row-major)
  982. or Fortran (column-major) order in memory. The default
  983. is 'None', which provides no ordering guarantees.
  984. Cannot be specified in conjunction with the `out`
  985. argument.
  986. out : ndarray, 2-D, optional
  987. If specified, uses this array as the output buffer
  988. instead of allocating a new array to return. The provided
  989. array must have the same shape and dtype as the sparse
  990. array/matrix on which you are calling the method. For most
  991. sparse types, `out` is required to be memory contiguous
  992. (either C or Fortran ordered).
  993. Returns
  994. -------
  995. arr : ndarray, 2-D
  996. An array with the same shape and containing the same
  997. data represented by the sparse array/matrix, with the requested
  998. memory order. If `out` was passed, the same object is
  999. returned after being modified in-place to contain the
  1000. appropriate values.
  1001. """
  1002. return self.tocoo(copy=False).toarray(order=order, out=out)
  1003. # Any sparse array format deriving from _spbase must define one of
  1004. # tocsr or tocoo. The other conversion methods may be implemented for
  1005. # efficiency, but are not required.
  1006. def tocsr(self, copy=False):
  1007. """Convert this array/matrix to Compressed Sparse Row format.
  1008. With copy=False, the data/indices may be shared between this array/matrix and
  1009. the resultant csr_array/matrix.
  1010. """
  1011. return self.tocoo(copy=copy).tocsr(copy=False)
  1012. def todok(self, copy=False):
  1013. """Convert this array/matrix to Dictionary Of Keys format.
  1014. With copy=False, the data/indices may be shared between this array/matrix and
  1015. the resultant dok_array/matrix.
  1016. """
  1017. return self.tocoo(copy=copy).todok(copy=False)
  1018. def tocoo(self, copy=False):
  1019. """Convert this array/matrix to COOrdinate format.
  1020. With copy=False, the data/indices may be shared between this array/matrix and
  1021. the resultant coo_array/matrix.
  1022. """
  1023. return self.tocsr(copy=False).tocoo(copy=copy)
  1024. def tolil(self, copy=False):
  1025. """Convert this array/matrix to List of Lists format.
  1026. With copy=False, the data/indices may be shared between this array/matrix and
  1027. the resultant lil_array/matrix.
  1028. """
  1029. return self.tocsr(copy=False).tolil(copy=copy)
  1030. def todia(self, copy=False):
  1031. """Convert this array/matrix to sparse DIAgonal format.
  1032. With copy=False, the data/indices may be shared between this array/matrix and
  1033. the resultant dia_array/matrix.
  1034. """
  1035. return self.tocoo(copy=copy).todia(copy=False)
  1036. def tobsr(self, blocksize=None, copy=False):
  1037. """Convert this array/matrix to Block Sparse Row format.
  1038. With copy=False, the data/indices may be shared between this array/matrix and
  1039. the resultant bsr_array/matrix.
  1040. When blocksize=(R, C) is provided, it will be used for construction of
  1041. the bsr_array/matrix.
  1042. """
  1043. return self.tocsr(copy=False).tobsr(blocksize=blocksize, copy=copy)
  1044. def tocsc(self, copy=False):
  1045. """Convert this array/matrix to Compressed Sparse Column format.
  1046. With copy=False, the data/indices may be shared between this array/matrix and
  1047. the resultant csc_array/matrix.
  1048. """
  1049. return self.tocsr(copy=copy).tocsc(copy=False)
  1050. def copy(self):
  1051. """Returns a copy of this array/matrix.
  1052. No data/indices will be shared between the returned value and current
  1053. array/matrix.
  1054. """
  1055. return self.__class__(self, copy=True)
  1056. def sum(self, axis=None, dtype=None, out=None):
  1057. """
  1058. Sum the array/matrix elements over a given axis.
  1059. Parameters
  1060. ----------
  1061. axis : {-2, -1, 0, 1, None} optional
  1062. Axis along which the sum is computed. The default is to
  1063. compute the sum of all the array/matrix elements, returning a scalar
  1064. (i.e., `axis` = `None`).
  1065. dtype : dtype, optional
  1066. The type of the returned array/matrix and of the accumulator in which
  1067. the elements are summed. The dtype of `a` is used by default
  1068. unless `a` has an integer dtype of less precision than the default
  1069. platform integer. In that case, if `a` is signed then the platform
  1070. integer is used while if `a` is unsigned then an unsigned integer
  1071. of the same precision as the platform integer is used.
  1072. .. versionadded:: 0.18.0
  1073. out : np.matrix, optional
  1074. Alternative output matrix in which to place the result. It must
  1075. have the same shape as the expected output, but the type of the
  1076. output values will be cast if necessary.
  1077. .. versionadded:: 0.18.0
  1078. Returns
  1079. -------
  1080. sum_along_axis : np.matrix
  1081. A matrix with the same shape as `self`, with the specified
  1082. axis removed.
  1083. See Also
  1084. --------
  1085. numpy.matrix.sum : NumPy's implementation of 'sum' for matrices
  1086. """
  1087. axis = validateaxis(axis, ndim=self.ndim)
  1088. # Mimic numpy's casting.
  1089. res_dtype = get_sum_dtype(self.dtype)
  1090. # Note: all valid 1D axis values are canonically `None`.
  1091. if axis is None:
  1092. if self.nnz == 0:
  1093. return np.sum(self._ascontainer([0]), dtype=dtype or res_dtype, out=out)
  1094. return np.sum(self._ascontainer(_todata(self)), dtype=dtype, out=out)
  1095. elif isspmatrix(self):
  1096. # Ensure spmatrix sums stay 2D
  1097. new_shape = (1, self.shape[1]) if axis == (0,) else (self.shape[0], 1)
  1098. else:
  1099. new_shape = tuple(self.shape[i] for i in range(self.ndim) if i not in axis)
  1100. if out is None:
  1101. # create out array with desired dtype
  1102. out = self._ascontainer(np.zeros(new_shape, dtype=dtype or res_dtype))
  1103. else:
  1104. if out.shape != new_shape:
  1105. raise ValueError("out dimensions do not match shape")
  1106. if self.ndim > 2:
  1107. return self._sum_nd(axis, res_dtype, out)
  1108. # We use multiplication by a matrix of ones to sum.
  1109. # For some sparse array formats more efficient methods are
  1110. # possible -- these should override this function.
  1111. if axis == (0,):
  1112. ones = self._ascontainer(np.ones((1, self.shape[0]), dtype=res_dtype))
  1113. # sets dtype while loading into out
  1114. out[...] = (ones @ self).reshape(new_shape)
  1115. else: # axis == (1,)
  1116. ones = self._ascontainer(np.ones((self.shape[1], 1), dtype=res_dtype))
  1117. # sets dtype while loading into out
  1118. out[...] = (self @ ones).reshape(new_shape)
  1119. return out
  1120. def mean(self, axis=None, dtype=None, out=None):
  1121. """
  1122. Compute the arithmetic mean along the specified axis.
  1123. Returns the average of the array/matrix elements. The average is taken
  1124. over all elements in the array/matrix by default, otherwise over the
  1125. specified axis. `float64` intermediate and return values are used
  1126. for integer inputs.
  1127. Parameters
  1128. ----------
  1129. axis : {-2, -1, 0, 1, None} optional
  1130. Axis along which the mean is computed. The default is to compute
  1131. the mean of all elements in the array/matrix (i.e., `axis` = `None`).
  1132. dtype : data-type, optional
  1133. Type to use in computing the mean. For integer inputs, the default
  1134. is `float64`; for floating point inputs, it is the same as the
  1135. input dtype.
  1136. .. versionadded:: 0.18.0
  1137. out : np.matrix, optional
  1138. Alternative output matrix in which to place the result. It must
  1139. have the same shape as the expected output, but the type of the
  1140. output values will be cast if necessary.
  1141. .. versionadded:: 0.18.0
  1142. Returns
  1143. -------
  1144. m : np.matrix
  1145. See Also
  1146. --------
  1147. numpy.matrix.mean : NumPy's implementation of 'mean' for matrices
  1148. """
  1149. axis = validateaxis(axis, ndim=self.ndim)
  1150. integral = (np.issubdtype(self.dtype, np.integer) or
  1151. np.issubdtype(self.dtype, np.bool_))
  1152. # intermediate dtype for summation
  1153. inter_dtype = np.float64 if integral else self.dtype
  1154. inter_self = self.astype(inter_dtype, copy=False)
  1155. if axis is None:
  1156. denom = math.prod(self.shape)
  1157. else:
  1158. denom = math.prod(self.shape[ax] for ax in axis)
  1159. res = (inter_self * (1.0 / denom)).sum(axis=axis, dtype=inter_dtype, out=out)
  1160. if dtype is not None and out is None:
  1161. return res.astype(dtype, copy=False)
  1162. return res
  1163. def diagonal(self, k=0):
  1164. """Returns the kth diagonal of the array/matrix.
  1165. Parameters
  1166. ----------
  1167. k : int, optional
  1168. Which diagonal to get, corresponding to elements a[i, i+k].
  1169. Default: 0 (the main diagonal).
  1170. .. versionadded:: 1.0
  1171. See also
  1172. --------
  1173. numpy.diagonal : Equivalent numpy function.
  1174. Examples
  1175. --------
  1176. >>> from scipy.sparse import csr_array
  1177. >>> A = csr_array([[1, 2, 0], [0, 0, 3], [4, 0, 5]])
  1178. >>> A.diagonal()
  1179. array([1, 0, 5])
  1180. >>> A.diagonal(k=1)
  1181. array([2, 3])
  1182. """
  1183. return self.tocsr().diagonal(k=k)
  1184. def trace(self, offset=0):
  1185. """Returns the sum along diagonals of the sparse array/matrix.
  1186. Parameters
  1187. ----------
  1188. offset : int, optional
  1189. Which diagonal to get, corresponding to elements a[i, i+offset].
  1190. Default: 0 (the main diagonal).
  1191. """
  1192. return self.diagonal(k=offset).sum()
  1193. def setdiag(self, values, k=0):
  1194. """
  1195. Set diagonal or off-diagonal elements of the array/matrix.
  1196. Parameters
  1197. ----------
  1198. values : array_like
  1199. New values of the diagonal elements.
  1200. Values may have any length. If the diagonal is longer than values,
  1201. then the remaining diagonal entries will not be set. If values are
  1202. longer than the diagonal, then the remaining values are ignored.
  1203. If a scalar value is given, all of the diagonal is set to it.
  1204. k : int, optional
  1205. Which off-diagonal to set, corresponding to elements a[i,i+k].
  1206. Default: 0 (the main diagonal).
  1207. """
  1208. M, N = self.shape
  1209. if (k > 0 and k >= N) or (k < 0 and -k >= M):
  1210. raise ValueError("k exceeds array dimensions")
  1211. self._setdiag(np.asarray(values), k)
  1212. def _setdiag(self, values, k):
  1213. """This part of the implementation gets overridden by the
  1214. different formats.
  1215. """
  1216. M, N = self.shape
  1217. if k < 0:
  1218. if values.ndim == 0:
  1219. # broadcast
  1220. max_index = min(M+k, N)
  1221. for i in range(max_index):
  1222. self[i - k, i] = values
  1223. else:
  1224. max_index = min(M+k, N, len(values))
  1225. if max_index <= 0:
  1226. return
  1227. for i, v in enumerate(values[:max_index]):
  1228. self[i - k, i] = v
  1229. else:
  1230. if values.ndim == 0:
  1231. # broadcast
  1232. max_index = min(M, N-k)
  1233. for i in range(max_index):
  1234. self[i, i + k] = values
  1235. else:
  1236. max_index = min(M, N-k, len(values))
  1237. if max_index <= 0:
  1238. return
  1239. for i, v in enumerate(values[:max_index]):
  1240. self[i, i + k] = v
  1241. def _process_toarray_args(self, order, out):
  1242. if out is not None:
  1243. if order is not None:
  1244. raise ValueError('order cannot be specified if out '
  1245. 'is not None')
  1246. if out.shape != self.shape or out.dtype != self.dtype:
  1247. raise ValueError('out array must be same dtype and shape as '
  1248. 'sparse array')
  1249. out[...] = 0.
  1250. return out
  1251. else:
  1252. return np.zeros(self.shape, dtype=self.dtype, order=order)
  1253. def _get_index_dtype(self, arrays=(), maxval=None, check_contents=False):
  1254. """
  1255. Determine index dtype for array.
  1256. This wraps _sputils.get_index_dtype, providing compatibility for both
  1257. array and matrix API sparse matrices. Matrix API sparse matrices would
  1258. attempt to downcast the indices - which can be computationally
  1259. expensive and undesirable for users. The array API changes this
  1260. behaviour.
  1261. See discussion: https://github.com/scipy/scipy/issues/16774
  1262. The get_index_dtype import is due to implementation details of the test
  1263. suite. It allows the decorator ``with_64bit_maxval_limit`` to mock a
  1264. lower int32 max value for checks on the matrix API's downcasting
  1265. behaviour.
  1266. """
  1267. from ._sputils import get_index_dtype
  1268. # Don't check contents for array API
  1269. return get_index_dtype(arrays,
  1270. maxval,
  1271. (check_contents and not isinstance(self, sparray)))
  1272. class sparray:
  1273. """A namespace class to separate sparray from spmatrix"""
  1274. @classmethod
  1275. def __class_getitem__(cls, arg, /):
  1276. """
  1277. Return a parametrized wrapper around the `~scipy.sparse.sparray` type.
  1278. .. versionadded:: 1.16.0
  1279. Returns
  1280. -------
  1281. alias : types.GenericAlias
  1282. A parametrized `~scipy.sparse.sparray` type.
  1283. Examples
  1284. --------
  1285. >>> import numpy as np
  1286. >>> from scipy.sparse import coo_array
  1287. >>> coo_array[np.int8, tuple[int]]
  1288. scipy.sparse._coo.coo_array[numpy.int8, tuple[int]]
  1289. """
  1290. from types import GenericAlias
  1291. return GenericAlias(cls, arg)
  1292. sparray.__doc__ = _spbase.__doc__
  1293. def isspmatrix(x):
  1294. """Is `x` of a sparse matrix type?
  1295. Parameters
  1296. ----------
  1297. x
  1298. object to check for being a sparse matrix
  1299. Returns
  1300. -------
  1301. bool
  1302. True if `x` is a sparse matrix, False otherwise
  1303. Examples
  1304. --------
  1305. >>> import numpy as np
  1306. >>> from scipy.sparse import csr_array, csr_matrix, isspmatrix
  1307. >>> isspmatrix(csr_matrix([[5]]))
  1308. True
  1309. >>> isspmatrix(csr_array([[5]]))
  1310. False
  1311. >>> isspmatrix(np.array([[5]]))
  1312. False
  1313. >>> isspmatrix(5)
  1314. False
  1315. """
  1316. return isinstance(x, spmatrix)