_compressed.py 51 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337
  1. """Base class for sparse matrix formats using compressed storage."""
  2. __all__ = []
  3. from warnings import warn
  4. import itertools
  5. import numpy as np
  6. from scipy._lib._util import _prune_array, copy_if_needed
  7. from ._base import _spbase, issparse, sparray, SparseEfficiencyWarning
  8. from ._data import _data_matrix, _minmax_mixin
  9. from . import _sparsetools
  10. from ._sparsetools import (get_csr_submatrix, csr_sample_offsets, csr_todense,
  11. csr_sample_values, csr_row_index, csr_row_slice,
  12. csr_column_index1, csr_column_index2, csr_diagonal,
  13. expandptr, csr_has_canonical_format, csr_eliminate_zeros,
  14. csr_sum_duplicates, csr_has_sorted_indices, csr_sort_indices,
  15. csr_matmat_maxnnz, csr_matmat)
  16. from ._index import IndexMixin
  17. from ._sputils import (upcast, upcast_char, to_native, isshape,
  18. getdtype, isintlike, downcast_intp_index,
  19. get_sum_dtype, check_shape, get_index_dtype, broadcast_shapes)
  20. class _cs_matrix(_data_matrix, _minmax_mixin, IndexMixin):
  21. """
  22. base array/matrix class for compressed row- and column-oriented arrays/matrices
  23. """
  24. def __init__(self, arg1, shape=None, dtype=None, copy=False, *, maxprint=None):
  25. _data_matrix.__init__(self, arg1, maxprint=maxprint)
  26. if issparse(arg1):
  27. if arg1.format == self.format and copy:
  28. arg1 = arg1.copy()
  29. else:
  30. arg1 = arg1.asformat(self.format)
  31. self.indptr, self.indices, self.data, self._shape = (
  32. arg1.indptr, arg1.indices, arg1.data, arg1._shape
  33. )
  34. elif isinstance(arg1, tuple):
  35. if isshape(arg1, allow_nd=self._allow_nd):
  36. # It's a tuple of matrix dimensions (M, N)
  37. # create empty matrix
  38. self._shape = check_shape(arg1, allow_nd=self._allow_nd)
  39. M, N = self._swap(self._shape_as_2d)
  40. # Select index dtype large enough to pass array and
  41. # scalar parameters to sparsetools
  42. idx_dtype = self._get_index_dtype(maxval=max(self.shape))
  43. self.data = np.zeros(0, getdtype(dtype, default=float))
  44. self.indices = np.zeros(0, idx_dtype)
  45. self.indptr = np.zeros(M + 1, dtype=idx_dtype)
  46. else:
  47. if len(arg1) == 2:
  48. # (data, ij) format
  49. coo = self._coo_container(arg1, shape=shape, dtype=dtype)
  50. arrays = coo._coo_to_compressed(self._swap)
  51. self.indptr, self.indices, self.data, self._shape = arrays
  52. self.sum_duplicates()
  53. elif len(arg1) == 3:
  54. # (data, indices, indptr) format
  55. (data, indices, indptr) = arg1
  56. # Select index dtype large enough to pass array and
  57. # scalar parameters to sparsetools
  58. maxval = None
  59. if shape is not None and 0 not in shape:
  60. maxval = max(shape)
  61. idx_dtype = self._get_index_dtype((indices, indptr),
  62. maxval=maxval,
  63. check_contents=True)
  64. if not copy:
  65. copy = copy_if_needed
  66. self.indices = np.array(indices, copy=copy, dtype=idx_dtype)
  67. self.indptr = np.array(indptr, copy=copy, dtype=idx_dtype)
  68. self.data = np.array(data, copy=copy, dtype=dtype)
  69. else:
  70. raise ValueError(f"unrecognized {self.__class__.__name__} "
  71. f"constructor input: {arg1}")
  72. else:
  73. # must be dense
  74. try:
  75. arg1 = np.asarray(arg1)
  76. except Exception as e:
  77. raise ValueError(f"unrecognized {self.__class__.__name__} "
  78. f"constructor input: {arg1}") from e
  79. if isinstance(self, sparray) and arg1.ndim != 2 and self.format == "csc":
  80. raise ValueError(f"CSC arrays don't support {arg1.ndim}D input. Use 2D")
  81. if arg1.ndim > 2:
  82. raise ValueError(f"CSR arrays don't yet support {arg1.ndim}D.")
  83. coo = self._coo_container(arg1, dtype=dtype)
  84. arrays = coo._coo_to_compressed(self._swap)
  85. self.indptr, self.indices, self.data, self._shape = arrays
  86. # Read matrix dimensions given, if any
  87. if shape is not None:
  88. self._shape = check_shape(shape, allow_nd=self._allow_nd)
  89. elif self.shape is None:
  90. # shape not already set, try to infer dimensions
  91. try:
  92. M = len(self.indptr) - 1
  93. N = self.indices.max() + 1
  94. except Exception as e:
  95. raise ValueError('unable to infer matrix dimensions') from e
  96. self._shape = check_shape(self._swap((M, N)), allow_nd=self._allow_nd)
  97. if dtype is not None:
  98. newdtype = getdtype(dtype)
  99. self.data = self.data.astype(newdtype, copy=False)
  100. self.check_format(full_check=False)
  101. def _getnnz(self, axis=None):
  102. if axis is None:
  103. return int(self.indptr[-1])
  104. elif self.ndim == 1:
  105. if axis in (0, -1):
  106. return int(self.indptr[-1])
  107. raise ValueError('axis out of bounds')
  108. else:
  109. if axis < 0:
  110. axis += 2
  111. axis, _ = self._swap((axis, 1 - axis))
  112. _, N = self._swap(self.shape)
  113. if axis == 0:
  114. return np.bincount(downcast_intp_index(self.indices), minlength=N)
  115. elif axis == 1:
  116. return np.diff(self.indptr)
  117. raise ValueError('axis out of bounds')
  118. _getnnz.__doc__ = _spbase._getnnz.__doc__
  119. def count_nonzero(self, axis=None):
  120. self.sum_duplicates()
  121. if axis is None:
  122. return np.count_nonzero(self.data)
  123. if self.ndim == 1:
  124. if axis not in (0, -1):
  125. raise ValueError('axis out of bounds')
  126. return np.count_nonzero(self.data)
  127. if axis < 0:
  128. axis += 2
  129. axis, _ = self._swap((axis, 1 - axis))
  130. if axis == 0:
  131. _, N = self._swap(self.shape)
  132. mask = self.data != 0
  133. idx = self.indices if mask.all() else self.indices[mask]
  134. return np.bincount(downcast_intp_index(idx), minlength=N)
  135. elif axis == 1:
  136. if self.data.all():
  137. return np.diff(self.indptr)
  138. pairs = itertools.pairwise(self.indptr)
  139. return np.array([np.count_nonzero(self.data[i:j]) for i, j in pairs])
  140. else:
  141. raise ValueError('axis out of bounds')
  142. count_nonzero.__doc__ = _spbase.count_nonzero.__doc__
  143. def check_format(self, full_check=True):
  144. """Check whether the array/matrix respects the CSR or CSC format.
  145. Parameters
  146. ----------
  147. full_check : bool, optional
  148. If `True`, run rigorous check, scanning arrays for valid values.
  149. Note that activating those check might copy arrays for casting,
  150. modifying indices and index pointers' inplace.
  151. If `False`, run basic checks on attributes. O(1) operations.
  152. Default is `True`.
  153. """
  154. # index arrays should have integer data types
  155. if self.indptr.dtype.kind != 'i':
  156. warn(f"indptr array has non-integer dtype ({self.indptr.dtype.name})",
  157. stacklevel=3)
  158. if self.indices.dtype.kind != 'i':
  159. warn(f"indices array has non-integer dtype ({self.indices.dtype.name})",
  160. stacklevel=3)
  161. # check array shapes
  162. for x in [self.data.ndim, self.indices.ndim, self.indptr.ndim]:
  163. if x != 1:
  164. raise ValueError('data, indices, and indptr should be 1-D')
  165. # check index pointer. Use _swap to determine proper bounds
  166. M, N = self._swap(self._shape_as_2d)
  167. if (len(self.indptr) != M + 1):
  168. raise ValueError(f"index pointer size {len(self.indptr)} should be {M + 1}")
  169. if (self.indptr[0] != 0):
  170. raise ValueError("index pointer should start with 0")
  171. # check index and data arrays
  172. if (len(self.indices) != len(self.data)):
  173. raise ValueError("indices and data should have the same size")
  174. if (self.indptr[-1] > len(self.indices)):
  175. raise ValueError("Last value of index pointer should be less than "
  176. "the size of index and data arrays")
  177. self.prune()
  178. if full_check:
  179. # check format validity (more expensive)
  180. if self.nnz > 0:
  181. if self.indices.max() >= N:
  182. raise ValueError(f"indices must be < {N}")
  183. if self.indices.min() < 0:
  184. raise ValueError("indices must be >= 0")
  185. if np.diff(self.indptr).min() < 0:
  186. raise ValueError("indptr must be a non-decreasing sequence")
  187. idx_dtype = self._get_index_dtype((self.indptr, self.indices))
  188. self.indptr = np.asarray(self.indptr, dtype=idx_dtype)
  189. self.indices = np.asarray(self.indices, dtype=idx_dtype)
  190. self.data = to_native(self.data)
  191. # if not self.has_sorted_indices():
  192. # warn('Indices were not in sorted order. Sorting indices.')
  193. # self.sort_indices()
  194. # assert(self.has_sorted_indices())
  195. # TODO check for duplicates?
  196. #######################
  197. # Boolean comparisons #
  198. #######################
  199. def _scalar_binopt(self, other, op):
  200. """Scalar version of self._binopt, for cases in which no new nonzeros
  201. are added. Produces a new sparse array in canonical form.
  202. """
  203. self.sum_duplicates()
  204. res = self._with_data(op(self.data, other), copy=True)
  205. res.eliminate_zeros()
  206. return res
  207. #################################
  208. # Arithmetic operator overrides #
  209. #################################
  210. def _add_dense(self, other):
  211. if other.shape != self.shape:
  212. raise ValueError(f'Incompatible shapes ({self.shape} and {other.shape})')
  213. dtype = upcast_char(self.dtype.char, other.dtype.char)
  214. order = self._swap('CF')[0]
  215. result = np.array(other, dtype=dtype, order=order, copy=True)
  216. y = result if result.flags.c_contiguous else result.T
  217. M, N = self._swap(self._shape_as_2d)
  218. csr_todense(M, N, self.indptr, self.indices, self.data, y)
  219. return self._container(result, copy=False)
  220. def _add_sparse(self, other):
  221. return self._binopt(other, '_plus_')
  222. def _sub_sparse(self, other):
  223. return self._binopt(other, '_minus_')
  224. def _multiply_2d_with_broadcasting(self, other):
  225. """Element-wise multiplication by array/matrix, vector, or scalar."""
  226. # Called after checking that other is not scalarlike and self.ndim <=2
  227. if issparse(other):
  228. if self.shape == other.shape:
  229. other = self.__class__(other)
  230. return self._binopt(other, '_elmul_')
  231. # Single element.
  232. if other.shape == (1, 1):
  233. result = self._mul_scalar(other.toarray()[0, 0])
  234. if self.ndim == 1:
  235. return result.reshape((1, self.shape[0]))
  236. return result
  237. if other.shape == (1,):
  238. return self._mul_scalar(other.toarray()[0])
  239. if self.shape in ((1,), (1, 1)):
  240. return other._mul_scalar(self.data.sum())
  241. # broadcast. treat 1d like a row
  242. sM, sN = self._shape_as_2d
  243. oM, oN = other._shape_as_2d
  244. # A row times a column.
  245. if sM == 1 and oN == 1:
  246. return other._matmul_sparse(self.reshape(sM, sN).tocsc())
  247. if sN == 1 and oM == 1:
  248. return self._matmul_sparse(other.reshape(oM, oN).tocsc())
  249. is_array = isinstance(self, sparray)
  250. # Other is a row.
  251. if oM == 1 and sN == oN:
  252. new_other = _make_diagonal_csr(other.toarray().ravel(), is_array)
  253. result = self._matmul_sparse(new_other)
  254. return result if self.ndim == 2 else result.reshape((1, oN))
  255. # self is a row.
  256. if sM == 1 and sN == oN:
  257. copy = _make_diagonal_csr(self.toarray().ravel(), is_array)
  258. return other._matmul_sparse(copy)
  259. # Other is a column.
  260. if oN == 1 and sM == oM:
  261. new_other = _make_diagonal_csr(other.toarray().ravel(), is_array)
  262. return new_other._matmul_sparse(self)
  263. # self is a column.
  264. if sN == 1 and sM == oM:
  265. new_self = _make_diagonal_csr(self.toarray().ravel(), is_array)
  266. return new_self._matmul_sparse(other)
  267. raise ValueError(f"inconsistent shapes {self.shape} and {other.shape}")
  268. # Assume other is a dense matrix/array, which produces a single-item
  269. # object array if other isn't convertible to ndarray.
  270. other = np.asanyarray(other)
  271. if other.ndim > 2:
  272. return np.multiply(self.toarray(), other)
  273. # Single element / wrapped object.
  274. if other.size == 1:
  275. if other.dtype == np.object_:
  276. # 'other' not convertible to ndarray.
  277. return NotImplemented
  278. bshape = broadcast_shapes(self.shape, other.shape)
  279. return self._mul_scalar(other.flat[0]).reshape(bshape)
  280. # Fast case for trivial sparse matrix.
  281. if self.shape in ((1,), (1, 1)):
  282. bshape = broadcast_shapes(self.shape, other.shape)
  283. return np.multiply(self.data.sum(), other).reshape(bshape)
  284. ret = self.tocoo()
  285. # Matching shapes.
  286. if self.shape == other.shape:
  287. data = np.multiply(ret.data, other[ret.coords])
  288. ret.data = data.view(np.ndarray).ravel()
  289. return ret
  290. # convert other to 2d
  291. other2d = np.atleast_2d(other)
  292. # Sparse row vector times...
  293. if self.shape[0] == 1 or self.ndim == 1:
  294. if other2d.shape[1] == 1: # Dense column vector.
  295. data = np.multiply(ret.data, other2d)
  296. elif other2d.shape[1] == self.shape[-1]: # Dense 2d matrix.
  297. data = np.multiply(ret.data, other2d[:, ret.col])
  298. else:
  299. raise ValueError(f"inconsistent shapes {self.shape} and {other.shape}")
  300. idx_dtype = self._get_index_dtype(ret.col,
  301. maxval=ret.nnz * other2d.shape[0])
  302. row = np.repeat(np.arange(other2d.shape[0], dtype=idx_dtype), ret.nnz)
  303. col = np.tile(ret.col.astype(idx_dtype, copy=False), other2d.shape[0])
  304. return self._coo_container(
  305. (data.view(np.ndarray).ravel(), (row, col)),
  306. shape=(other2d.shape[0], self.shape[-1]),
  307. copy=False
  308. )
  309. # Sparse column vector times...
  310. if self.shape[1] == 1:
  311. if other2d.shape[0] == 1: # Dense row vector.
  312. data = np.multiply(ret.data[:, None], other2d)
  313. elif other2d.shape[0] == self.shape[0]: # Dense 2d array.
  314. data = np.multiply(ret.data[:, None], other2d[ret.row])
  315. else:
  316. raise ValueError(f"inconsistent shapes {self.shape} and {other.shape}")
  317. idx_dtype = self._get_index_dtype(ret.row,
  318. maxval=ret.nnz * other2d.shape[1])
  319. row = np.repeat(ret.row.astype(idx_dtype, copy=False), other2d.shape[1])
  320. col = np.tile(np.arange(other2d.shape[1], dtype=idx_dtype), ret.nnz)
  321. return self._coo_container(
  322. (data.view(np.ndarray).ravel(), (row, col)),
  323. shape=(self.shape[0], other2d.shape[1]),
  324. copy=False
  325. )
  326. # Sparse matrix times dense row vector.
  327. if other2d.shape[0] == 1 and self.shape[1] == other2d.shape[1]:
  328. data = np.multiply(ret.data, other2d[:, ret.col].ravel())
  329. # Sparse matrix times dense column vector.
  330. elif other2d.shape[1] == 1 and self.shape[0] == other2d.shape[0]:
  331. data = np.multiply(ret.data, other2d[ret.row].ravel())
  332. else:
  333. raise ValueError(f"inconsistent shapes {self.shape} and {other.shape}")
  334. ret.data = data.view(np.ndarray).ravel()
  335. return ret
  336. ###########################
  337. # Multiplication handlers #
  338. ###########################
  339. def _matmul_vector(self, other):
  340. M, N = self._shape_as_2d
  341. # output array
  342. result = np.zeros(M, dtype=upcast_char(self.dtype.char, other.dtype.char))
  343. # csr_matvec or csc_matvec
  344. fn = getattr(_sparsetools, self.format + '_matvec')
  345. fn(M, N, self.indptr, self.indices, self.data, other, result)
  346. return result[0] if self.ndim == 1 else result
  347. def _matmul_multivector(self, other):
  348. M, N = self._shape_as_2d
  349. n_vecs = other.shape[-1] # number of column vectors
  350. result = np.zeros((M, n_vecs),
  351. dtype=upcast_char(self.dtype.char, other.dtype.char))
  352. # csr_matvecs or csc_matvecs
  353. fn = getattr(_sparsetools, self.format + '_matvecs')
  354. fn(M, N, n_vecs, self.indptr, self.indices, self.data,
  355. other.ravel(), result.ravel())
  356. if self.ndim == 1:
  357. return result.reshape((n_vecs,))
  358. return result
  359. def _matmul_sparse(self, other):
  360. M, K1 = self._shape_as_2d
  361. # if other is 1d, treat as a **column**
  362. o_ndim = other.ndim
  363. if o_ndim == 1:
  364. # convert 1d array to a 2d column when on the right of @
  365. other = other.reshape((1, other.shape[0])).T # Note: converts to CSC
  366. K2, N = other._shape if other.ndim == 2 else (other.shape[0], 1)
  367. # find new_shape: (M, N), (M,), (N,) or ()
  368. new_shape = ()
  369. if self.ndim == 2:
  370. new_shape += (M,)
  371. if o_ndim == 2:
  372. new_shape += (N,)
  373. faux_shape = (M if self.ndim == 2 else 1, N if o_ndim == 2 else 1)
  374. other = self.__class__(other) # convert to this format
  375. index_arrays = (self.indptr, self.indices, other.indptr, other.indices)
  376. M, N = self._swap((M, N))
  377. s, o = self._swap((self, other))
  378. idx_dtype = self._get_index_dtype(index_arrays)
  379. s_indptr = np.asarray(s.indptr, dtype=idx_dtype)
  380. s_indices = np.asarray(s.indices, dtype=idx_dtype)
  381. o_indptr = np.asarray(o.indptr, dtype=idx_dtype)
  382. o_indices = np.asarray(o.indices, dtype=idx_dtype)
  383. nnz = csr_matmat_maxnnz(M, N, s_indptr, s_indices, o_indptr, o_indices)
  384. if nnz == 0:
  385. if new_shape == ():
  386. return np.array(0, dtype=upcast(self.dtype, other.dtype))
  387. return self.__class__(new_shape, dtype=upcast(self.dtype, other.dtype))
  388. new_idx_dtype = self._get_index_dtype(index_arrays, maxval=nnz)
  389. if new_idx_dtype != idx_dtype:
  390. idx_dtype = new_idx_dtype
  391. s_indptr = np.asarray(s.indptr, dtype=idx_dtype)
  392. s_indices = np.asarray(s.indices, dtype=idx_dtype)
  393. o_indptr = np.asarray(o.indptr, dtype=idx_dtype)
  394. o_indices = np.asarray(o.indices, dtype=idx_dtype)
  395. indptr = np.empty(M + 1, dtype=idx_dtype)
  396. indices = np.empty(nnz, dtype=idx_dtype)
  397. data = np.empty(nnz, dtype=upcast(self.dtype, other.dtype))
  398. csr_matmat(M, N,
  399. s_indptr, s_indices, s.data,
  400. o_indptr, o_indices, o.data,
  401. indptr, indices, data)
  402. if new_shape == ():
  403. return np.array(data[0])
  404. res = self.__class__((data, indices, indptr), shape=faux_shape)
  405. if faux_shape != new_shape:
  406. if res.format != 'csr':
  407. res = res.tocsr()
  408. res = res.reshape(new_shape)
  409. return res
  410. def diagonal(self, k=0):
  411. M, N = self._swap(self.shape)
  412. k, _ = self._swap((k, -k))
  413. if k <= -M or k >= N:
  414. return np.empty(0, dtype=self.data.dtype)
  415. y = np.empty(min(M + min(k, 0), N - max(k, 0)), dtype=upcast(self.dtype))
  416. csr_diagonal(k, M, N, self.indptr, self.indices, self.data, y)
  417. return y
  418. diagonal.__doc__ = _spbase.diagonal.__doc__
  419. #####################
  420. # Reduce operations #
  421. #####################
  422. def sum(self, axis=None, dtype=None, out=None):
  423. """Sum the array/matrix over the given axis. If the axis is None, sum
  424. over both rows and columns, returning a scalar.
  425. """
  426. # The _spbase base class already does axis=None and major axis efficiently
  427. # so we only do the case axis= minor axis
  428. if (self.ndim == 2 and not hasattr(self, 'blocksize') and
  429. axis in self._swap(((1, -1), (0, -2)))[0]):
  430. # faster than multiplication for large minor axis in CSC/CSR
  431. res_dtype = get_sum_dtype(self.dtype)
  432. ret = np.zeros(len(self.indptr) - 1, dtype=res_dtype)
  433. major_index, value = self._minor_reduce(np.add)
  434. ret[major_index] = value
  435. ret = self._ascontainer(ret)
  436. if axis % 2 == 1:
  437. ret = ret.T
  438. return ret.sum(axis=(), dtype=dtype, out=out)
  439. else:
  440. return _spbase.sum(self, axis=axis, dtype=dtype, out=out)
  441. sum.__doc__ = _spbase.sum.__doc__
  442. def _minor_reduce(self, ufunc, data=None):
  443. """Reduce nonzeros with a ufunc over the minor axis when non-empty
  444. Can be applied to a function of self.data by supplying data parameter.
  445. Warning: this does not call sum_duplicates()
  446. Returns
  447. -------
  448. major_index : array of ints
  449. Major indices where nonzero
  450. value : array of self.dtype
  451. Reduce result for nonzeros in each major_index
  452. """
  453. if data is None:
  454. data = self.data
  455. major_index = np.flatnonzero(np.diff(self.indptr))
  456. value = ufunc.reduceat(data,
  457. downcast_intp_index(self.indptr[major_index]))
  458. return major_index, value
  459. #######################
  460. # Getting and Setting #
  461. #######################
  462. def _get_intXint(self, row, col):
  463. M, N = self._swap(self.shape)
  464. major, minor = self._swap((row, col))
  465. indptr, indices, data = get_csr_submatrix(
  466. M, N, self.indptr, self.indices, self.data,
  467. major, major + 1, minor, minor + 1)
  468. return data.sum(dtype=self.dtype)
  469. def _get_sliceXslice(self, row, col):
  470. major, minor = self._swap((row, col))
  471. if major.step in (1, None) and minor.step in (1, None):
  472. return self._get_submatrix(major, minor, copy=True)
  473. return self._major_slice(major)._minor_slice(minor)
  474. def _get_arrayXarray(self, row, col):
  475. # inner indexing
  476. idx_dtype = self.indices.dtype
  477. M, N = self._swap(self.shape)
  478. major, minor = self._swap((row, col))
  479. major = np.asarray(major, dtype=idx_dtype)
  480. minor = np.asarray(minor, dtype=idx_dtype)
  481. val = np.empty(major.size, dtype=self.dtype)
  482. csr_sample_values(M, N, self.indptr, self.indices, self.data,
  483. major.size, major.ravel(), minor.ravel(), val)
  484. if major.ndim == 1:
  485. return self._ascontainer(val)
  486. return self.__class__(val.reshape(major.shape))
  487. def _get_columnXarray(self, row, col):
  488. # outer indexing
  489. major, minor = self._swap((row, col))
  490. return self._major_index_fancy(major)._minor_index_fancy(minor)
  491. def _major_index_fancy(self, idx):
  492. """Index along the major axis where idx is an array of ints.
  493. """
  494. idx_dtype = self._get_index_dtype((self.indptr, self.indices))
  495. indices = np.asarray(idx, dtype=idx_dtype).ravel()
  496. N = self._swap(self._shape_as_2d)[1]
  497. M = len(indices)
  498. new_shape = self._swap((M, N)) if self.ndim == 2 else (M,)
  499. if M == 0:
  500. return self.__class__(new_shape, dtype=self.dtype)
  501. self_indptr = self.indptr.astype(idx_dtype, copy=False)
  502. self_indices = self.indices.astype(idx_dtype, copy=False)
  503. row_nnz = self_indptr[indices + 1] - self_indptr[indices]
  504. res_indptr = np.zeros(M + 1, dtype=idx_dtype)
  505. np.cumsum(row_nnz, out=res_indptr[1:])
  506. nnz = res_indptr[-1]
  507. res_indices = np.empty(nnz, dtype=idx_dtype)
  508. res_data = np.empty(nnz, dtype=self.dtype)
  509. csr_row_index(
  510. M,
  511. indices,
  512. self_indptr,
  513. self_indices,
  514. self.data,
  515. res_indices,
  516. res_data
  517. )
  518. return self.__class__((res_data, res_indices, res_indptr),
  519. shape=new_shape, copy=False)
  520. def _major_slice(self, idx, copy=False):
  521. """Index along the major axis where idx is a slice object.
  522. """
  523. if idx == slice(None):
  524. return self.copy() if copy else self
  525. M, N = self._swap(self._shape_as_2d)
  526. start, stop, step = idx.indices(M)
  527. M = len(range(start, stop, step))
  528. new_shape = self._swap((M, N)) if self.ndim == 2 else (M,)
  529. if M == 0:
  530. return self.__class__(new_shape, dtype=self.dtype)
  531. # Work out what slices are needed for `row_nnz`
  532. # start,stop can be -1, only if step is negative
  533. start0, stop0 = start, stop
  534. if stop == -1 and start >= 0:
  535. stop0 = None
  536. start1, stop1 = start + 1, stop + 1
  537. row_nnz = self.indptr[start1:stop1:step] - \
  538. self.indptr[start0:stop0:step]
  539. idx_dtype = self.indices.dtype
  540. res_indptr = np.zeros(M+1, dtype=idx_dtype)
  541. np.cumsum(row_nnz, out=res_indptr[1:])
  542. if step == 1:
  543. all_idx = slice(self.indptr[start], self.indptr[stop])
  544. res_indices = np.array(self.indices[all_idx], copy=copy)
  545. res_data = np.array(self.data[all_idx], copy=copy)
  546. else:
  547. nnz = res_indptr[-1]
  548. res_indices = np.empty(nnz, dtype=idx_dtype)
  549. res_data = np.empty(nnz, dtype=self.dtype)
  550. csr_row_slice(start, stop, step, self.indptr, self.indices,
  551. self.data, res_indices, res_data)
  552. return self.__class__((res_data, res_indices, res_indptr),
  553. shape=new_shape, copy=False)
  554. def _minor_index_fancy(self, idx):
  555. """Index along the minor axis where idx is an array of ints.
  556. """
  557. idx_dtype = self._get_index_dtype((self.indices, self.indptr))
  558. indices = self.indices.astype(idx_dtype, copy=False)
  559. indptr = self.indptr.astype(idx_dtype, copy=False)
  560. idx = np.asarray(idx, dtype=idx_dtype).ravel()
  561. M, N = self._swap(self._shape_as_2d)
  562. k = len(idx)
  563. new_shape = self._swap((M, k)) if self.ndim == 2 else (k,)
  564. if k == 0:
  565. return self.__class__(new_shape, dtype=self.dtype)
  566. # pass 1: count idx entries and compute new indptr
  567. col_offsets = np.zeros(N, dtype=idx_dtype)
  568. res_indptr = np.empty_like(self.indptr, dtype=idx_dtype)
  569. csr_column_index1(
  570. k,
  571. idx,
  572. M,
  573. N,
  574. indptr,
  575. indices,
  576. col_offsets,
  577. res_indptr,
  578. )
  579. # pass 2: copy indices/data for selected idxs
  580. col_order = np.argsort(idx).astype(idx_dtype, copy=False)
  581. nnz = res_indptr[-1]
  582. res_indices = np.empty(nnz, dtype=idx_dtype)
  583. res_data = np.empty(nnz, dtype=self.dtype)
  584. csr_column_index2(col_order, col_offsets, len(self.indices),
  585. indices, self.data, res_indices, res_data)
  586. return self.__class__((res_data, res_indices, res_indptr),
  587. shape=new_shape, copy=False)
  588. def _minor_slice(self, idx, copy=False):
  589. """Index along the minor axis where idx is a slice object.
  590. """
  591. if idx == slice(None):
  592. return self.copy() if copy else self
  593. M, N = self._swap(self._shape_as_2d)
  594. start, stop, step = idx.indices(N)
  595. N = len(range(start, stop, step))
  596. if N == 0:
  597. return self.__class__(self._swap((M, N)), dtype=self.dtype)
  598. if step == 1:
  599. return self._get_submatrix(minor=idx, copy=copy)
  600. # TODO: don't fall back to fancy indexing here
  601. return self._minor_index_fancy(np.arange(start, stop, step))
  602. def _get_submatrix(self, major=None, minor=None, copy=False):
  603. """Return a submatrix of this matrix.
  604. major, minor: None, int, or slice with step 1
  605. """
  606. M, N = self._swap(self._shape_as_2d)
  607. i0, i1 = _process_slice(major, M)
  608. j0, j1 = _process_slice(minor, N)
  609. if i0 == 0 and j0 == 0 and i1 == M and j1 == N:
  610. return self.copy() if copy else self
  611. indptr, indices, data = get_csr_submatrix(
  612. M, N, self.indptr, self.indices, self.data, i0, i1, j0, j1)
  613. shape = self._swap((i1 - i0, j1 - j0))
  614. if self.ndim == 1:
  615. shape = (shape[1],)
  616. return self.__class__((data, indices, indptr), shape=shape,
  617. dtype=self.dtype, copy=False)
  618. def _set_intXint(self, row, col, x):
  619. i, j = self._swap((row, col))
  620. self._set_many(i, j, x)
  621. def _set_arrayXarray(self, row, col, x):
  622. i, j = self._swap((row, col))
  623. self._set_many(i, j, x)
  624. def _set_arrayXarray_sparse(self, row, col, x):
  625. # clear entries that will be overwritten
  626. self._zero_many(*self._swap((row, col)))
  627. M, N = row.shape # matches col.shape
  628. broadcast_row = M != 1 and x.shape[0] == 1
  629. broadcast_col = N != 1 and x.shape[1] == 1
  630. r, c = x.row, x.col
  631. x = np.asarray(x.data, dtype=self.dtype)
  632. if x.size == 0:
  633. return
  634. if broadcast_row:
  635. r = np.repeat(np.arange(M), len(r))
  636. c = np.tile(c, M)
  637. x = np.tile(x, M)
  638. if broadcast_col:
  639. r = np.repeat(r, N)
  640. c = np.tile(np.arange(N), len(c))
  641. x = np.repeat(x, N)
  642. # only assign entries in the new sparsity structure
  643. i, j = self._swap((row[r, c], col[r, c]))
  644. self._set_many(i, j, x)
  645. def _setdiag(self, values, k):
  646. if 0 in self.shape:
  647. return
  648. if self.ndim == 1:
  649. raise NotImplementedError('diagonals cant be set in 1d arrays')
  650. M, N = self.shape
  651. broadcast = (values.ndim == 0)
  652. if k < 0:
  653. if broadcast:
  654. max_index = min(M + k, N)
  655. else:
  656. max_index = min(M + k, N, len(values))
  657. i = np.arange(-k, max_index - k, dtype=self.indices.dtype)
  658. j = np.arange(max_index, dtype=self.indices.dtype)
  659. else:
  660. if broadcast:
  661. max_index = min(M, N - k)
  662. else:
  663. max_index = min(M, N - k, len(values))
  664. i = np.arange(max_index, dtype=self.indices.dtype)
  665. j = np.arange(k, k + max_index, dtype=self.indices.dtype)
  666. if not broadcast:
  667. values = values[:len(i)]
  668. x = np.atleast_1d(np.asarray(values, dtype=self.dtype)).ravel()
  669. if x.squeeze().shape != i.squeeze().shape:
  670. x = np.broadcast_to(x, i.shape)
  671. if x.size == 0:
  672. return
  673. M, N = self._swap((M, N))
  674. i, j = self._swap((i, j))
  675. n_samples = x.size
  676. offsets = np.empty(n_samples, dtype=self.indices.dtype)
  677. ret = csr_sample_offsets(M, N, self.indptr, self.indices, n_samples,
  678. i, j, offsets)
  679. if ret == 1:
  680. # rinse and repeat
  681. self.sum_duplicates()
  682. csr_sample_offsets(M, N, self.indptr, self.indices, n_samples,
  683. i, j, offsets)
  684. if -1 not in offsets:
  685. # only affects existing non-zero cells
  686. self.data[offsets] = x
  687. return
  688. is_existing = (offsets >= 0)
  689. N_new = len(is_existing) - np.count_nonzero(is_existing)
  690. # Boundary between csc and convert to coo
  691. # The value 0.001 is justified in gh-19962#issuecomment-1920499678
  692. if N_new < self.nnz * 0.001:
  693. # replace existing entries
  694. self.data[offsets[is_existing]] = x[is_existing]
  695. # create new entries
  696. is_new = np.logical_not(is_existing, out=is_existing)
  697. del is_existing
  698. self._insert_many(i[is_new], j[is_new], x[is_new])
  699. else:
  700. # convert to coo for _set_diag
  701. do_sort = self.has_sorted_indices
  702. coo = self.tocoo()
  703. coo._setdiag(values, k)
  704. arrays = coo._coo_to_compressed(self._swap)
  705. self.indptr, self.indices, self.data, _ = arrays
  706. # Sort the indices (like in _insert_many)
  707. if do_sort:
  708. self.has_sorted_indices = False # force a sort
  709. self.sort_indices()
  710. def _prepare_indices(self, i, j):
  711. M, N = self._swap(self._shape_as_2d)
  712. def check_bounds(indices, bound):
  713. idx = indices.max()
  714. if idx >= bound:
  715. raise IndexError(f'index ({idx}) out of range (>= {bound})')
  716. idx = indices.min()
  717. if idx < -bound:
  718. raise IndexError(f'index ({idx}) out of range (< -{bound})')
  719. i = np.atleast_1d(np.asarray(i, dtype=self.indices.dtype)).ravel()
  720. j = np.atleast_1d(np.asarray(j, dtype=self.indices.dtype)).ravel()
  721. check_bounds(i, M)
  722. check_bounds(j, N)
  723. return i, j, M, N
  724. def _set_many(self, i, j, x):
  725. """Sets value at each (i, j) to x
  726. Here (i,j) index major and minor respectively, and must not contain
  727. duplicate entries.
  728. """
  729. i, j, M, N = self._prepare_indices(i, j)
  730. x = np.atleast_1d(np.asarray(x, dtype=self.dtype)).ravel()
  731. n_samples = x.size
  732. offsets = np.empty(n_samples, dtype=self.indices.dtype)
  733. ret = csr_sample_offsets(M, N, self.indptr, self.indices, n_samples,
  734. i, j, offsets)
  735. if ret == 1:
  736. # rinse and repeat
  737. self.sum_duplicates()
  738. csr_sample_offsets(M, N, self.indptr, self.indices, n_samples,
  739. i, j, offsets)
  740. if -1 not in offsets:
  741. # only affects existing non-zero cells
  742. self.data[offsets] = x
  743. return
  744. else:
  745. warn(f"Changing the sparsity structure of a {self.__class__.__name__} is"
  746. " expensive. lil and dok are more efficient.",
  747. SparseEfficiencyWarning, stacklevel=3)
  748. # replace where possible
  749. mask = offsets > -1
  750. self.data[offsets[mask]] = x[mask]
  751. # only insertions remain
  752. mask = ~mask
  753. i = i[mask]
  754. i[i < 0] += M
  755. j = j[mask]
  756. j[j < 0] += N
  757. self._insert_many(i, j, x[mask])
  758. def _zero_many(self, i, j):
  759. """Sets value at each (i, j) to zero, preserving sparsity structure.
  760. Here (i,j) index major and minor respectively.
  761. """
  762. i, j, M, N = self._prepare_indices(i, j)
  763. n_samples = len(i)
  764. offsets = np.empty(n_samples, dtype=self.indices.dtype)
  765. ret = csr_sample_offsets(M, N, self.indptr, self.indices, n_samples,
  766. i, j, offsets)
  767. if ret == 1:
  768. # rinse and repeat
  769. self.sum_duplicates()
  770. csr_sample_offsets(M, N, self.indptr, self.indices, n_samples,
  771. i, j, offsets)
  772. # only assign zeros to the existing sparsity structure
  773. self.data[offsets[offsets > -1]] = 0
  774. def _insert_many(self, i, j, x):
  775. """Inserts new nonzero at each (i, j) with value x
  776. Here (i,j) index major and minor respectively.
  777. i, j and x must be non-empty, 1d arrays.
  778. Inserts each major group (e.g. all entries per row) at a time.
  779. Maintains has_sorted_indices property.
  780. Modifies i, j, x in place.
  781. """
  782. order = np.argsort(i, kind='mergesort') # stable for duplicates
  783. i = i.take(order, mode='clip')
  784. j = j.take(order, mode='clip')
  785. x = x.take(order, mode='clip')
  786. do_sort = self.has_sorted_indices
  787. # Update index data type
  788. idx_dtype = self._get_index_dtype((self.indices, self.indptr),
  789. maxval=(self.indptr[-1] + x.size))
  790. self.indptr = np.asarray(self.indptr, dtype=idx_dtype)
  791. self.indices = np.asarray(self.indices, dtype=idx_dtype)
  792. i = np.asarray(i, dtype=idx_dtype)
  793. j = np.asarray(j, dtype=idx_dtype)
  794. # Collate old and new in chunks by major index
  795. indices_parts = []
  796. data_parts = []
  797. ui, ui_indptr = np.unique(i, return_index=True)
  798. ui_indptr = np.append(ui_indptr, len(j))
  799. new_nnzs = np.diff(ui_indptr)
  800. prev = 0
  801. for c, (ii, js, je) in enumerate(zip(ui, ui_indptr, ui_indptr[1:])):
  802. # old entries
  803. start = self.indptr[prev]
  804. stop = self.indptr[ii]
  805. indices_parts.append(self.indices[start:stop])
  806. data_parts.append(self.data[start:stop])
  807. # handle duplicate j: keep last setting
  808. uj, uj_indptr = np.unique(j[js:je][::-1], return_index=True)
  809. if len(uj) == je - js:
  810. indices_parts.append(j[js:je])
  811. data_parts.append(x[js:je])
  812. else:
  813. indices_parts.append(j[js:je][::-1][uj_indptr])
  814. data_parts.append(x[js:je][::-1][uj_indptr])
  815. new_nnzs[c] = len(uj)
  816. prev = ii
  817. # remaining old entries
  818. start = self.indptr[ii]
  819. indices_parts.append(self.indices[start:])
  820. data_parts.append(self.data[start:])
  821. # update attributes
  822. self.indices = np.concatenate(indices_parts)
  823. self.data = np.concatenate(data_parts)
  824. nnzs = np.empty(self.indptr.shape, dtype=idx_dtype)
  825. nnzs[0] = idx_dtype(0)
  826. indptr_diff = np.diff(self.indptr)
  827. indptr_diff[ui] += new_nnzs
  828. nnzs[1:] = indptr_diff
  829. self.indptr = np.cumsum(nnzs, out=nnzs)
  830. if do_sort:
  831. # TODO: only sort where necessary
  832. self.has_sorted_indices = False
  833. self.sort_indices()
  834. self.check_format(full_check=False)
  835. ######################
  836. # Conversion methods #
  837. ######################
  838. def tocoo(self, copy=True):
  839. if self.ndim == 1:
  840. csr = self.tocsr()
  841. return self._coo_container((csr.data, (csr.indices,)), csr.shape, copy=copy)
  842. major_dim, minor_dim = self._swap(self.shape)
  843. minor_indices = self.indices
  844. major_indices = np.empty(len(minor_indices), dtype=self.indices.dtype)
  845. expandptr(major_dim, self.indptr, major_indices)
  846. coords = self._swap((major_indices, minor_indices))
  847. return self._coo_container(
  848. (self.data, coords), self.shape, copy=copy, dtype=self.dtype
  849. )
  850. tocoo.__doc__ = _spbase.tocoo.__doc__
  851. def toarray(self, order=None, out=None):
  852. if out is None and order is None:
  853. order = self._swap('cf')[0]
  854. out = self._process_toarray_args(order, out)
  855. if not (out.flags.c_contiguous or out.flags.f_contiguous):
  856. raise ValueError('Output array must be C or F contiguous')
  857. # align ideal order with output array order
  858. if out.flags.c_contiguous:
  859. x = self.tocsr()
  860. y = out
  861. else:
  862. x = self.tocsc()
  863. y = out.T
  864. M, N = x._swap(x._shape_as_2d)
  865. csr_todense(M, N, x.indptr, x.indices, x.data, y)
  866. return out
  867. toarray.__doc__ = _spbase.toarray.__doc__
  868. ##############################################################
  869. # methods that examine or modify the internal data structure #
  870. ##############################################################
  871. def eliminate_zeros(self):
  872. """Remove zero entries from the array/matrix
  873. This is an *in place* operation.
  874. """
  875. M, N = self._swap(self._shape_as_2d)
  876. csr_eliminate_zeros(M, N, self.indptr, self.indices, self.data)
  877. self.prune() # nnz may have changed
  878. @property
  879. def has_canonical_format(self) -> bool:
  880. """Whether the array/matrix has sorted indices and no duplicates
  881. Returns
  882. - True: if the above applies
  883. - False: otherwise
  884. has_canonical_format implies has_sorted_indices, so if the latter flag
  885. is False, so will the former be; if the former is found True, the
  886. latter flag is also set.
  887. """
  888. # first check to see if result was cached
  889. if not getattr(self, '_has_sorted_indices', True):
  890. # not sorted => not canonical
  891. self._has_canonical_format = False
  892. elif not hasattr(self, '_has_canonical_format'):
  893. M = len(self.indptr) - 1
  894. self.has_canonical_format = bool(
  895. csr_has_canonical_format(M, self.indptr, self.indices)
  896. )
  897. return self._has_canonical_format
  898. @has_canonical_format.setter
  899. def has_canonical_format(self, val: bool):
  900. self._has_canonical_format = bool(val)
  901. if val:
  902. self.has_sorted_indices = True
  903. def sum_duplicates(self):
  904. """Eliminate duplicate entries by adding them together
  905. This is an *in place* operation.
  906. """
  907. if self.has_canonical_format:
  908. return
  909. self.sort_indices()
  910. M, N = self._swap(self._shape_as_2d)
  911. csr_sum_duplicates(M, N, self.indptr, self.indices, self.data)
  912. self.prune() # nnz may have changed
  913. self.has_canonical_format = True
  914. @property
  915. def has_sorted_indices(self) -> bool:
  916. """Whether the indices are sorted
  917. Returns
  918. - True: if the indices of the array/matrix are in sorted order
  919. - False: otherwise
  920. """
  921. # first check to see if result was cached
  922. if not hasattr(self, '_has_sorted_indices'):
  923. M = len(self.indptr) - 1
  924. self._has_sorted_indices = bool(
  925. csr_has_sorted_indices(M, self.indptr, self.indices)
  926. )
  927. return self._has_sorted_indices
  928. @has_sorted_indices.setter
  929. def has_sorted_indices(self, val: bool):
  930. self._has_sorted_indices = bool(val)
  931. def sorted_indices(self):
  932. """Return a copy of this array/matrix with sorted indices
  933. """
  934. A = self.copy()
  935. A.sort_indices()
  936. return A
  937. # an alternative that has linear complexity is the following
  938. # although the previous option is typically faster
  939. # return self.toother().toother()
  940. def sort_indices(self):
  941. """Sort the indices of this array/matrix *in place*
  942. """
  943. if not self.has_sorted_indices:
  944. M = len(self.indptr) - 1
  945. csr_sort_indices(M, self.indptr, self.indices, self.data)
  946. self.has_sorted_indices = True
  947. def prune(self):
  948. """Remove empty space after all non-zero elements.
  949. """
  950. major_dim = self._swap(self._shape_as_2d)[0]
  951. if len(self.indptr) != major_dim + 1:
  952. raise ValueError('index pointer has invalid length')
  953. if len(self.indices) < self.nnz:
  954. raise ValueError('indices array has fewer than nnz elements')
  955. if len(self.data) < self.nnz:
  956. raise ValueError('data array has fewer than nnz elements')
  957. self.indices = _prune_array(self.indices[:self.nnz])
  958. self.data = _prune_array(self.data[:self.nnz])
  959. def resize(self, *shape):
  960. shape = check_shape(shape, allow_nd=self._allow_nd)
  961. if hasattr(self, 'blocksize'):
  962. bm, bn = self.blocksize
  963. new_M, rm = divmod(shape[0], bm)
  964. new_N, rn = divmod(shape[1], bn)
  965. if rm or rn:
  966. raise ValueError(f"shape must be divisible into {self.blocksize}"
  967. f" blocks. Got {shape}")
  968. M, N = self.shape[0] // bm, self.shape[1] // bn
  969. else:
  970. new_M, new_N = self._swap(shape if len(shape)>1 else (1, shape[0]))
  971. M, N = self._swap(self._shape_as_2d)
  972. if new_M < M:
  973. self.indices = self.indices[:self.indptr[new_M]]
  974. self.data = self.data[:self.indptr[new_M]]
  975. self.indptr = self.indptr[:new_M + 1]
  976. elif new_M > M:
  977. self.indptr = np.resize(self.indptr, new_M + 1)
  978. self.indptr[M + 1:].fill(self.indptr[M])
  979. if new_N < N:
  980. mask = self.indices < new_N
  981. if not np.all(mask):
  982. self.indices = self.indices[mask]
  983. self.data = self.data[mask]
  984. major_index, val = self._minor_reduce(np.add, mask)
  985. self.indptr.fill(0)
  986. self.indptr[1:][major_index] = val
  987. np.cumsum(self.indptr, out=self.indptr)
  988. self._shape = shape
  989. resize.__doc__ = _spbase.resize.__doc__
  990. ###################
  991. # utility methods #
  992. ###################
  993. # needed by _data_matrix
  994. def _with_data(self, data, copy=True):
  995. """Returns a matrix with the same sparsity structure as self,
  996. but with different data. By default the structure arrays
  997. (i.e. .indptr and .indices) are copied.
  998. """
  999. if copy:
  1000. return self.__class__((data, self.indices.copy(),
  1001. self.indptr.copy()),
  1002. shape=self.shape,
  1003. dtype=data.dtype)
  1004. else:
  1005. return self.__class__((data, self.indices, self.indptr),
  1006. shape=self.shape, dtype=data.dtype)
  1007. def _binopt(self, other, op):
  1008. """apply the binary operation fn to two sparse matrices."""
  1009. other = self.__class__(other)
  1010. # e.g. csr_plus_csr, csr_minus_csr, etc.
  1011. fn = getattr(_sparsetools, "csr" + op + "csr")
  1012. maxnnz = self.nnz + other.nnz
  1013. idx_dtype = self._get_index_dtype((self.indptr, self.indices,
  1014. other.indptr, other.indices),
  1015. maxval=maxnnz)
  1016. indptr = np.empty(self.indptr.shape, dtype=idx_dtype)
  1017. indices = np.empty(maxnnz, dtype=idx_dtype)
  1018. bool_ops = ['_ne_', '_lt_', '_gt_', '_le_', '_ge_']
  1019. if op in bool_ops:
  1020. data = np.empty(maxnnz, dtype=np.bool_)
  1021. else:
  1022. data = np.empty(maxnnz, dtype=upcast(self.dtype, other.dtype))
  1023. M, N = self._swap(self._shape_as_2d)
  1024. fn(M, N,
  1025. np.asarray(self.indptr, dtype=idx_dtype),
  1026. np.asarray(self.indices, dtype=idx_dtype),
  1027. self.data,
  1028. np.asarray(other.indptr, dtype=idx_dtype),
  1029. np.asarray(other.indices, dtype=idx_dtype),
  1030. other.data,
  1031. indptr, indices, data)
  1032. A = self.__class__((data, indices, indptr), shape=self.shape)
  1033. A.prune()
  1034. return A
  1035. def _divide_sparse(self, other):
  1036. """
  1037. Divide this matrix by a second sparse matrix.
  1038. """
  1039. if other.shape != self.shape:
  1040. raise ValueError(f"inconsistent shapes {self.shape} and {other.shape}")
  1041. r = self._binopt(other, '_eldiv_')
  1042. if np.issubdtype(r.dtype, np.inexact):
  1043. # Eldiv leaves entries outside the combined sparsity
  1044. # pattern empty, so they must be filled manually.
  1045. # Everything outside of other's sparsity is NaN, and everything
  1046. # inside it is either zero or defined by eldiv.
  1047. out = np.empty(self.shape, dtype=self.dtype)
  1048. out.fill(np.nan)
  1049. coords = other.nonzero()
  1050. if self.ndim == 1:
  1051. coords = (coords[-1],)
  1052. out[coords] = 0
  1053. r = r.tocoo()
  1054. out[r.coords] = r.data
  1055. return self._container(out)
  1056. else:
  1057. # integers types go with nan <-> 0
  1058. out = r
  1059. return out
  1060. def _broadcast_to(self, shape, copy=False):
  1061. if self.shape == shape:
  1062. return self.copy() if copy else self
  1063. shape = check_shape(shape, allow_nd=(self._allow_nd))
  1064. if broadcast_shapes(self.shape, shape) != shape:
  1065. raise ValueError("cannot be broadcast")
  1066. if len(self.shape) == 1 and len(shape) == 1:
  1067. self.sum_duplicates()
  1068. if self.nnz == 0: # array has no non zero elements
  1069. return self.__class__(shape, dtype=self.dtype, copy=False)
  1070. N = shape[0]
  1071. data = np.full(N, self.data[0])
  1072. indices = np.arange(0,N)
  1073. indptr = np.array([0, N])
  1074. return self._csr_container((data, indices, indptr), shape=shape, copy=False)
  1075. # treat 1D as a 2D row
  1076. old_shape = self._shape_as_2d
  1077. if len(shape) != 2:
  1078. ndim = len(shape)
  1079. raise ValueError(f'CSR/CSC broadcast_to cannot have shape >2D. Got {ndim}D')
  1080. if self.nnz == 0: # array has no non zero elements
  1081. return self.__class__(shape, dtype=self.dtype, copy=False)
  1082. self.sum_duplicates()
  1083. M, N = self._swap(shape)
  1084. oM, oN = self._swap(old_shape)
  1085. if all(s == 1 for s in old_shape):
  1086. # Broadcast a single element to the entire shape
  1087. data = np.full(M * N, self.data[0])
  1088. indices = np.tile(np.arange(N), M)
  1089. indptr = np.arange(0, len(data) + 1, N)
  1090. elif oM == 1 and oN == N:
  1091. # Broadcast row-wise (columns for CSC)
  1092. data = np.tile(self.data, M)
  1093. indices = np.tile(self.indices, M)
  1094. indptr = np.arange(0, len(data) + 1, len(self.data))
  1095. elif oN == 1 and oM == M:
  1096. # Broadcast column-wise (rows for CSC)
  1097. data = np.repeat(self.data, N)
  1098. indices = np.tile(np.arange(N), len(self.data))
  1099. indptr = self.indptr * N
  1100. return self.__class__((data, indices, indptr), shape=shape, copy=False)
  1101. def _make_diagonal_csr(data, is_array=False):
  1102. """build diagonal csc_array/csr_array => self._csr_container
  1103. Parameter `data` should be a raveled numpy array holding the
  1104. values on the diagonal of the resulting sparse matrix.
  1105. """
  1106. from ._csr import csr_array, csr_matrix
  1107. csr_array = csr_array if is_array else csr_matrix
  1108. N = len(data)
  1109. idx_dtype = get_index_dtype(maxval=N)
  1110. indptr = np.arange(N + 1, dtype=idx_dtype)
  1111. indices = indptr[:-1]
  1112. return csr_array((data, indices, indptr), shape=(N, N))
  1113. def _process_slice(sl, num):
  1114. if sl is None:
  1115. i0, i1 = 0, num
  1116. elif isinstance(sl, slice):
  1117. i0, i1, stride = sl.indices(num)
  1118. if stride != 1:
  1119. raise ValueError('slicing with step != 1 not supported')
  1120. i0 = min(i0, i1) # give an empty slice when i0 > i1
  1121. elif isintlike(sl):
  1122. if sl < 0:
  1123. sl += num
  1124. i0, i1 = sl, sl + 1
  1125. if i0 < 0 or i1 > num:
  1126. raise IndexError(f'index out of bounds: 0 <= {i0} < {i1} <= {num}')
  1127. else:
  1128. raise TypeError('expected slice or scalar')
  1129. return i0, i1