_coo.py 73 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890
  1. """ A sparse matrix in COOrdinate or 'triplet' format"""
  2. __docformat__ = "restructuredtext en"
  3. __all__ = ['coo_array', 'coo_matrix', 'isspmatrix_coo']
  4. import math
  5. from warnings import warn
  6. import numpy as np
  7. from .._lib._util import copy_if_needed
  8. from ._matrix import spmatrix
  9. from ._sparsetools import (coo_tocsr, coo_todense, coo_todense_nd,
  10. coo_matvec, coo_matvec_nd, coo_matmat_dense,
  11. coo_matmat_dense_nd)
  12. from ._base import issparse, SparseEfficiencyWarning, _spbase, sparray
  13. from ._data import _data_matrix, _minmax_mixin
  14. from ._sputils import (upcast_char, to_native, isshape, getdtype,
  15. getdata, downcast_intp_index, get_index_dtype,
  16. check_shape, check_reshape_kwargs, isscalarlike,
  17. isintlike, isdense)
  18. from ._index import _validate_indices, _broadcast_arrays
  19. import operator
  20. class _coo_base(_data_matrix, _minmax_mixin):
  21. _format = 'coo'
  22. _allow_nd = range(1, 65)
  23. def __init__(self, arg1, shape=None, dtype=None, copy=False, *, maxprint=None):
  24. _data_matrix.__init__(self, arg1, maxprint=maxprint)
  25. if not copy:
  26. copy = copy_if_needed
  27. if isinstance(arg1, tuple):
  28. if isshape(arg1, allow_nd=self._allow_nd):
  29. self._shape = check_shape(arg1, allow_nd=self._allow_nd)
  30. idx_dtype = self._get_index_dtype(maxval=max(self._shape))
  31. data_dtype = getdtype(dtype, default=float)
  32. self.coords = tuple(np.array([], dtype=idx_dtype)
  33. for _ in range(len(self._shape)))
  34. self.data = np.array([], dtype=data_dtype)
  35. self.has_canonical_format = True
  36. else:
  37. try:
  38. obj, coords = arg1
  39. except (TypeError, ValueError) as e:
  40. raise TypeError('invalid input format') from e
  41. if shape is None:
  42. if any(len(idx) == 0 for idx in coords):
  43. raise ValueError('cannot infer dimensions from zero '
  44. 'sized index arrays')
  45. shape = tuple(operator.index(np.max(idx)) + 1
  46. for idx in coords)
  47. self._shape = check_shape(shape, allow_nd=self._allow_nd)
  48. idx_dtype = self._get_index_dtype(coords,
  49. maxval=max(self.shape),
  50. check_contents=True)
  51. self.coords = tuple(np.array(idx, copy=copy, dtype=idx_dtype)
  52. for idx in coords)
  53. self.data = getdata(obj, copy=copy, dtype=dtype)
  54. self.has_canonical_format = False
  55. else:
  56. if issparse(arg1):
  57. if arg1.format == self.format and copy:
  58. self.coords = tuple(idx.copy() for idx in arg1.coords)
  59. self.data = arg1.data.astype(getdtype(dtype, arg1)) # copy=True
  60. self._shape = check_shape(arg1.shape, allow_nd=self._allow_nd)
  61. self.has_canonical_format = arg1.has_canonical_format
  62. else:
  63. coo = arg1.tocoo(copy=copy)
  64. self.coords = tuple(coo.coords)
  65. self.data = coo.data.astype(getdtype(dtype, coo), copy=False)
  66. self._shape = check_shape(coo.shape, allow_nd=self._allow_nd)
  67. self.has_canonical_format = False
  68. else:
  69. # dense argument
  70. M = np.asarray(arg1)
  71. if not isinstance(self, sparray):
  72. M = np.atleast_2d(M)
  73. if M.ndim != 2:
  74. raise TypeError(f'expected 2D array or matrix, not {M.ndim}D')
  75. self._shape = check_shape(M.shape, allow_nd=self._allow_nd)
  76. if shape is not None:
  77. if check_shape(shape, allow_nd=self._allow_nd) != self._shape:
  78. message = f'inconsistent shapes: {shape} != {self._shape}'
  79. raise ValueError(message)
  80. index_dtype = self._get_index_dtype(maxval=max(self._shape))
  81. coords = M.nonzero()
  82. self.coords = tuple(idx.astype(index_dtype, copy=False)
  83. for idx in coords)
  84. self.data = getdata(M[coords], copy=copy, dtype=dtype)
  85. self.has_canonical_format = True
  86. if len(self._shape) > 2:
  87. self.coords = tuple(idx.astype(np.int64, copy=False) for idx in self.coords)
  88. self._check()
  89. @property
  90. def row(self):
  91. if self.ndim > 1:
  92. return self.coords[-2]
  93. result = np.zeros_like(self.col)
  94. result.setflags(write=False)
  95. return result
  96. @row.setter
  97. def row(self, new_row):
  98. if self.ndim < 2:
  99. raise ValueError('cannot set row attribute of a 1-dimensional sparse array')
  100. new_row = np.asarray(new_row, dtype=self.coords[-2].dtype)
  101. self.coords = self.coords[:-2] + (new_row,) + self.coords[-1:]
  102. @property
  103. def col(self):
  104. return self.coords[-1]
  105. @col.setter
  106. def col(self, new_col):
  107. new_col = np.asarray(new_col, dtype=self.coords[-1].dtype)
  108. self.coords = self.coords[:-1] + (new_col,)
  109. def reshape(self, *args, **kwargs):
  110. shape = check_shape(args, self.shape, allow_nd=self._allow_nd)
  111. order, copy = check_reshape_kwargs(kwargs)
  112. # Return early if reshape is not required
  113. if shape == self.shape:
  114. if copy:
  115. return self.copy()
  116. else:
  117. return self
  118. # When reducing the number of dimensions, we need to be careful about
  119. # index overflow. This is why we can't simply call
  120. # `np.ravel_multi_index()` followed by `np.unravel_index()` here.
  121. flat_coords = _ravel_coords(self.coords, self.shape, order=order)
  122. if len(shape) == 2:
  123. if order == 'C':
  124. new_coords = divmod(flat_coords, shape[1])
  125. else:
  126. new_coords = divmod(flat_coords, shape[0])[::-1]
  127. else:
  128. new_coords = np.unravel_index(flat_coords, shape, order=order)
  129. idx_dtype = self._get_index_dtype(self.coords, maxval=max(shape))
  130. new_coords = tuple(np.asarray(co, dtype=idx_dtype) for co in new_coords)
  131. # Handle copy here rather than passing on to the constructor so that no
  132. # copy will be made of `new_coords` regardless.
  133. if copy:
  134. new_data = self.data.copy()
  135. else:
  136. new_data = self.data
  137. return self.__class__((new_data, new_coords), shape=shape, copy=False)
  138. reshape.__doc__ = _spbase.reshape.__doc__
  139. def _getnnz(self, axis=None):
  140. if axis is None or (axis == 0 and self.ndim == 1):
  141. nnz = len(self.data)
  142. if any(len(idx) != nnz for idx in self.coords):
  143. raise ValueError('all index and data arrays must have the '
  144. 'same length')
  145. if self.data.ndim != 1 or any(idx.ndim != 1 for idx in self.coords):
  146. raise ValueError('coordinates and data arrays must be 1-D')
  147. return int(nnz)
  148. if axis < 0:
  149. axis += self.ndim
  150. if axis >= self.ndim:
  151. raise ValueError('axis out of bounds')
  152. return np.bincount(downcast_intp_index(self.coords[1 - axis]),
  153. minlength=self.shape[1 - axis])
  154. _getnnz.__doc__ = _spbase._getnnz.__doc__
  155. def count_nonzero(self, axis=None):
  156. self.sum_duplicates()
  157. if axis is None:
  158. return np.count_nonzero(self.data)
  159. if axis < 0:
  160. axis += self.ndim
  161. if axis < 0 or axis >= self.ndim:
  162. raise ValueError('axis out of bounds')
  163. mask = self.data != 0
  164. coord = self.coords[1 - axis][mask]
  165. return np.bincount(downcast_intp_index(coord), minlength=self.shape[1 - axis])
  166. count_nonzero.__doc__ = _spbase.count_nonzero.__doc__
  167. def _check(self):
  168. """ Checks data structure for consistency """
  169. if self.ndim != len(self.coords):
  170. raise ValueError('mismatching number of index arrays for shape; '
  171. f'got {len(self.coords)}, expected {self.ndim}')
  172. # index arrays should have integer data types
  173. for i, idx in enumerate(self.coords):
  174. if idx.dtype.kind != 'i':
  175. warn(f'index array {i} has non-integer dtype ({idx.dtype.name})',
  176. stacklevel=3)
  177. idx_dtype = self._get_index_dtype(self.coords, maxval=max(self.shape))
  178. self.coords = tuple(np.asarray(idx, dtype=idx_dtype)
  179. for idx in self.coords)
  180. self.data = to_native(self.data)
  181. if self.nnz > 0:
  182. for i, idx in enumerate(self.coords):
  183. if idx.max() >= self.shape[i]:
  184. raise ValueError(f'axis {i} index {idx.max()} exceeds '
  185. f'matrix dimension {self.shape[i]}')
  186. if idx.min() < 0:
  187. raise ValueError(f'negative axis {i} index: {idx.min()}')
  188. def transpose(self, axes=None, copy=False):
  189. if axes is None:
  190. axes = range(self.ndim)[::-1]
  191. elif isinstance(self, sparray):
  192. if not hasattr(axes, "__len__") or len(axes) != self.ndim:
  193. raise ValueError("axes don't match matrix dimensions")
  194. if len(set(axes)) != self.ndim:
  195. raise ValueError("repeated axis in transpose")
  196. elif axes != (1, 0):
  197. raise ValueError("Sparse matrices do not support an 'axes' "
  198. "parameter because swapping dimensions is the "
  199. "only logical permutation.")
  200. permuted_shape = tuple(self._shape[i] for i in axes)
  201. permuted_coords = tuple(self.coords[i] for i in axes)
  202. return self.__class__((self.data, permuted_coords),
  203. shape=permuted_shape, copy=copy)
  204. transpose.__doc__ = _spbase.transpose.__doc__
  205. def resize(self, *shape) -> None:
  206. shape = check_shape(shape, allow_nd=self._allow_nd)
  207. if self.ndim > 2:
  208. raise ValueError("only 1-D or 2-D input accepted")
  209. if len(shape) > 2:
  210. raise ValueError("shape argument must be 1-D or 2-D")
  211. # Check for added dimensions.
  212. if len(shape) > self.ndim:
  213. flat_coords = _ravel_coords(self.coords, self.shape)
  214. max_size = math.prod(shape)
  215. self.coords = np.unravel_index(flat_coords[:max_size], shape)
  216. self.data = self.data[:max_size]
  217. self._shape = shape
  218. return
  219. # Check for removed dimensions.
  220. if len(shape) < self.ndim:
  221. tmp_shape = (
  222. self._shape[:len(shape) - 1] # Original shape without last axis
  223. + (-1,) # Last axis is used to flatten the array
  224. + (1,) * (self.ndim - len(shape)) # Pad with ones
  225. )
  226. tmp = self.reshape(tmp_shape)
  227. self.coords = tmp.coords[:len(shape)]
  228. self._shape = tmp.shape[:len(shape)]
  229. # Handle truncation of existing dimensions.
  230. is_truncating = any(old > new for old, new in zip(self.shape, shape))
  231. if is_truncating:
  232. mask = np.logical_and.reduce([
  233. idx < size for idx, size in zip(self.coords, shape)
  234. ])
  235. if not mask.all():
  236. self.coords = tuple(idx[mask] for idx in self.coords)
  237. self.data = self.data[mask]
  238. self._shape = shape
  239. resize.__doc__ = _spbase.resize.__doc__
  240. def toarray(self, order=None, out=None):
  241. B = self._process_toarray_args(order, out)
  242. fortran = int(B.flags.f_contiguous)
  243. if not fortran and not B.flags.c_contiguous:
  244. raise ValueError("Output array must be C or F contiguous")
  245. # This handles both 0D and 1D cases correctly regardless of the
  246. # original shape.
  247. if self.ndim == 1:
  248. coo_todense_nd(np.array([1]), self.nnz, self.ndim,
  249. self.coords[0], self.data, B.ravel('A'), fortran)
  250. elif self.ndim == 2:
  251. M, N = self.shape
  252. coo_todense(M, N, self.nnz, self.row, self.col, self.data,
  253. B.ravel('A'), fortran)
  254. else: # dim>2
  255. if fortran:
  256. strides = np.append(1, np.cumprod(self.shape[:-1]))
  257. else:
  258. strides = np.append(np.cumprod(self.shape[1:][::-1])[::-1], 1)
  259. coords = np.concatenate(self.coords)
  260. coo_todense_nd(strides, self.nnz, self.ndim,
  261. coords, self.data, B.ravel('A'), fortran)
  262. # Note: reshape() doesn't copy here, but does return a new array (view).
  263. return B.reshape(self.shape)
  264. toarray.__doc__ = _spbase.toarray.__doc__
  265. def tocsc(self, copy=False):
  266. """Convert this array/matrix to Compressed Sparse Column format
  267. Duplicate entries will be summed together.
  268. Examples
  269. --------
  270. >>> from numpy import array
  271. >>> from scipy.sparse import coo_array
  272. >>> row = array([0, 0, 1, 3, 1, 0, 0])
  273. >>> col = array([0, 2, 1, 3, 1, 0, 0])
  274. >>> data = array([1, 1, 1, 1, 1, 1, 1])
  275. >>> A = coo_array((data, (row, col)), shape=(4, 4)).tocsc()
  276. >>> A.toarray()
  277. array([[3, 0, 1, 0],
  278. [0, 2, 0, 0],
  279. [0, 0, 0, 0],
  280. [0, 0, 0, 1]])
  281. """
  282. if self.ndim != 2:
  283. raise ValueError(f'Cannot convert. CSC format must be 2D. Got {self.ndim}D')
  284. if self.nnz == 0:
  285. return self._csc_container(self.shape, dtype=self.dtype)
  286. else:
  287. from ._csc import csc_array
  288. indptr, indices, data, shape = self._coo_to_compressed(csc_array._swap)
  289. x = self._csc_container((data, indices, indptr), shape=shape)
  290. if not self.has_canonical_format:
  291. x.sum_duplicates()
  292. return x
  293. def tocsr(self, copy=False):
  294. """Convert this array/matrix to Compressed Sparse Row format
  295. Duplicate entries will be summed together.
  296. Examples
  297. --------
  298. >>> from numpy import array
  299. >>> from scipy.sparse import coo_array
  300. >>> row = array([0, 0, 1, 3, 1, 0, 0])
  301. >>> col = array([0, 2, 1, 3, 1, 0, 0])
  302. >>> data = array([1, 1, 1, 1, 1, 1, 1])
  303. >>> A = coo_array((data, (row, col)), shape=(4, 4)).tocsr()
  304. >>> A.toarray()
  305. array([[3, 0, 1, 0],
  306. [0, 2, 0, 0],
  307. [0, 0, 0, 0],
  308. [0, 0, 0, 1]])
  309. """
  310. if self.ndim > 2:
  311. raise ValueError(f'Cannot convert. CSR must be 1D or 2D. Got {self.ndim}D')
  312. if self.nnz == 0:
  313. return self._csr_container(self.shape, dtype=self.dtype)
  314. else:
  315. from ._csr import csr_array
  316. arrays = self._coo_to_compressed(csr_array._swap, copy=copy)
  317. indptr, indices, data, shape = arrays
  318. x = self._csr_container((data, indices, indptr), shape=self.shape)
  319. if not self.has_canonical_format:
  320. x.sum_duplicates()
  321. return x
  322. def _coo_to_compressed(self, swap, copy=False):
  323. """convert (shape, coords, data) to (indptr, indices, data, shape)"""
  324. M, N = swap(self._shape_as_2d)
  325. # convert idx_dtype intc to int32 for pythran.
  326. # tested in scipy/optimize/tests/test__numdiff.py::test_group_columns
  327. idx_dtype = self._get_index_dtype(self.coords, maxval=max(self.nnz, N))
  328. if self.ndim == 1:
  329. indices = self.coords[0].copy() if copy else self.coords[0]
  330. nnz = len(indices)
  331. indptr = np.array([0, nnz], dtype=idx_dtype)
  332. data = self.data.copy() if copy else self.data
  333. return indptr, indices, data, self.shape
  334. # ndim == 2
  335. major, minor = swap(self.coords)
  336. nnz = len(major)
  337. major = major.astype(idx_dtype, copy=False)
  338. minor = minor.astype(idx_dtype, copy=False)
  339. indptr = np.empty(M + 1, dtype=idx_dtype)
  340. indices = np.empty_like(minor, dtype=idx_dtype)
  341. data = np.empty_like(self.data, dtype=self.dtype)
  342. coo_tocsr(M, N, nnz, major, minor, self.data, indptr, indices, data)
  343. return indptr, indices, data, self.shape
  344. def tocoo(self, copy=False):
  345. if copy:
  346. return self.copy()
  347. else:
  348. return self
  349. tocoo.__doc__ = _spbase.tocoo.__doc__
  350. def todia(self, copy=False):
  351. if self.ndim != 2:
  352. raise ValueError(f'Cannot convert. DIA format must be 2D. Got {self.ndim}D')
  353. self.sum_duplicates()
  354. ks = self.col - self.row # the diagonal for each nonzero
  355. diags, diag_idx = np.unique(ks, return_inverse=True)
  356. if len(diags) > 100:
  357. # probably undesired, should todia() have a maxdiags parameter?
  358. warn(f"Constructing a DIA matrix with {len(diags)} diagonals "
  359. "is inefficient",
  360. SparseEfficiencyWarning, stacklevel=2)
  361. #initialize and fill in data array
  362. if self.data.size == 0:
  363. data = np.zeros((0, 0), dtype=self.dtype)
  364. else:
  365. data = np.zeros((len(diags), self.col.max()+1), dtype=self.dtype)
  366. data[diag_idx, self.col] = self.data
  367. return self._dia_container((data, diags), shape=self.shape)
  368. todia.__doc__ = _spbase.todia.__doc__
  369. def todok(self, copy=False):
  370. if self.ndim > 2:
  371. raise ValueError(f'Cannot convert. DOK must be 1D or 2D. Got {self.ndim}D')
  372. self.sum_duplicates()
  373. dok = self._dok_container(self.shape, dtype=self.dtype)
  374. # ensure that 1d coordinates are not tuples
  375. if self.ndim == 1:
  376. coords = self.coords[0]
  377. else:
  378. coords = zip(*self.coords)
  379. dok._dict = dict(zip(coords, self.data))
  380. return dok
  381. todok.__doc__ = _spbase.todok.__doc__
  382. def diagonal(self, k=0):
  383. if self.ndim != 2:
  384. raise ValueError("diagonal requires two dimensions")
  385. rows, cols = self.shape
  386. if k <= -rows or k >= cols:
  387. return np.empty(0, dtype=self.data.dtype)
  388. diag = np.zeros(min(rows + min(k, 0), cols - max(k, 0)),
  389. dtype=self.dtype)
  390. diag_mask = (self.row + k) == self.col
  391. if self.has_canonical_format:
  392. row = self.row[diag_mask]
  393. data = self.data[diag_mask]
  394. else:
  395. inds = tuple(idx[diag_mask] for idx in self.coords)
  396. (row, _), data = self._sum_duplicates(inds, self.data[diag_mask])
  397. diag[row + min(k, 0)] = data
  398. return diag
  399. diagonal.__doc__ = _data_matrix.diagonal.__doc__
  400. def _setdiag(self, values, k):
  401. if self.ndim != 2:
  402. raise ValueError("setting a diagonal requires two dimensions")
  403. M, N = self.shape
  404. if values.ndim and not len(values):
  405. return
  406. idx_dtype = self.row.dtype
  407. # Determine which triples to keep and where to put the new ones.
  408. full_keep = self.col - self.row != k
  409. if k < 0:
  410. max_index = min(M+k, N)
  411. if values.ndim:
  412. max_index = min(max_index, len(values))
  413. keep = np.logical_or(full_keep, self.col >= max_index)
  414. new_row = np.arange(-k, -k + max_index, dtype=idx_dtype)
  415. new_col = np.arange(max_index, dtype=idx_dtype)
  416. else:
  417. max_index = min(M, N-k)
  418. if values.ndim:
  419. max_index = min(max_index, len(values))
  420. keep = np.logical_or(full_keep, self.row >= max_index)
  421. new_row = np.arange(max_index, dtype=idx_dtype)
  422. new_col = np.arange(k, k + max_index, dtype=idx_dtype)
  423. # Define the array of data consisting of the entries to be added.
  424. if values.ndim:
  425. new_data = values[:max_index]
  426. else:
  427. new_data = np.empty(max_index, dtype=self.dtype)
  428. new_data[:] = values
  429. # Update the internal structure.
  430. self.coords = (np.concatenate((self.row[keep], new_row)),
  431. np.concatenate((self.col[keep], new_col)))
  432. self.data = np.concatenate((self.data[keep], new_data))
  433. self.has_canonical_format = False
  434. # needed by _data_matrix
  435. def _with_data(self, data, copy=True):
  436. """Returns a matrix with the same sparsity structure as self,
  437. but with different data. By default the index arrays are copied.
  438. """
  439. if copy:
  440. coords = tuple(idx.copy() for idx in self.coords)
  441. else:
  442. coords = self.coords
  443. return self.__class__((data, coords), shape=self.shape, dtype=data.dtype)
  444. def __getitem__(self, key):
  445. index, new_shape, arr_int_pos, none_pos = _validate_indices(
  446. key, self.shape, self.format
  447. )
  448. # handle int, slice and int-array indices
  449. index_mask = np.ones(len(self.data), dtype=np.bool_)
  450. slice_coords = []
  451. arr_coords = []
  452. arr_indices = []
  453. for i, (idx, co) in enumerate(zip(index, self.coords)):
  454. if isinstance(idx, int):
  455. index_mask &= (co == idx)
  456. elif isinstance(idx, slice):
  457. if idx == slice(None):
  458. slice_coords.append(co)
  459. else:
  460. start, stop, step = idx.indices(self.shape[i])
  461. if step != 1:
  462. if step < 0:
  463. in_range = (co <= start) & (co > stop)
  464. else:
  465. in_range = (co >= start) & (co < stop)
  466. new_ix, m = np.divmod(co - start, step)
  467. index_mask &= (m == 0) & in_range
  468. else:
  469. in_range = (co >= start) & (co < stop)
  470. new_ix = co - start
  471. index_mask &= in_range
  472. slice_coords.append(new_ix)
  473. else: # array
  474. arr_coords.append(co)
  475. arr_indices.append(idx)
  476. # shortcut for scalar output
  477. if new_shape == ():
  478. return self.data[index_mask].sum().astype(self.dtype, copy=False)
  479. new_coords = [co[index_mask] for co in slice_coords]
  480. new_data = self.data[index_mask]
  481. # handle array indices
  482. if arr_indices:
  483. arr_indices = _broadcast_arrays(*arr_indices)
  484. arr_shape = arr_indices[0].shape # already broadcast in validate_indices
  485. # There are three dimensions required to check array indices against coords
  486. # Their lengths are described as:
  487. # a) number of indices that are arrays - arr_dim
  488. # b) number of coords to check - masked_nnz (already masked by slices)
  489. # c) size of the index arrays - arr_size
  490. # Note for this, integer indices are treated like slices, not like arrays.
  491. #
  492. # Goal:
  493. # Find new_coords and index positions that match across all arr_dim axes.
  494. # Approach: Track matches using bool array. Size: masked_nnz by arr_size.
  495. # True means all arr_indices match at that coord and index position.
  496. # Equate with broadcasting and check for all equal across (arr_dim) axis 0.
  497. # 1st array is "keyarr" (arr_dim by 1 by arr_size) from arr_indices.
  498. # 2nd array is "arr_coords" (arr_dim by masked_nnz by 1) from arr_coords.
  499. keyarr = np.array(arr_indices).reshape(len(arr_indices), 1, -1)
  500. arr_coords = np.array([co[index_mask] for co in arr_coords])[:, :, None]
  501. found = (keyarr == arr_coords).all(axis=0)
  502. arr_co, arr_ix = found.nonzero()
  503. new_data = new_data[arr_co]
  504. new_coords = [co[arr_co] for co in new_coords]
  505. new_arr_coords = list(np.unravel_index(arr_ix, shape=arr_shape))
  506. # check for contiguous positions of array and int indices
  507. if len(arr_int_pos) == arr_int_pos[-1] - arr_int_pos[0] + 1:
  508. # Contiguous. Put all array index shape at pos of array indices
  509. pos = arr_int_pos[0]
  510. new_coords = new_coords[:pos] + new_arr_coords + new_coords[pos:]
  511. else:
  512. # Not contiguous. Put all array coords at front
  513. new_coords = new_arr_coords + new_coords
  514. if none_pos:
  515. if new_coords:
  516. coord_like = np.zeros_like(new_coords[0])
  517. else:
  518. coord_like = np.zeros(len(new_data), dtype=self.coords[0].dtype)
  519. new_coords.insert(none_pos[0], coord_like)
  520. for i in none_pos[1:]:
  521. new_coords.insert(i, coord_like.copy())
  522. return coo_array((new_data, new_coords), shape=new_shape, dtype=self.dtype)
  523. def __setitem__(self, key, x):
  524. # enact self[key] = x
  525. index, new_shape, arr_int_pos, none_pos = _validate_indices(
  526. key, self.shape, self.format
  527. )
  528. # remove None's at beginning of index. Should not impact indexing coords
  529. # and will mistakenly align with x_coord columns if not removed.
  530. if none_pos:
  531. new_shape = list(new_shape)
  532. for j in none_pos[::-1]:
  533. new_shape.pop(j)
  534. new_shape = tuple(new_shape)
  535. # broadcast arrays
  536. if arr_int_pos:
  537. index = list(index)
  538. arr_pos = {i: arr for i in arr_int_pos if not isintlike(arr := index[i])}
  539. arr_indices = _broadcast_arrays(*arr_pos.values())
  540. for i, arr in zip(arr_pos, arr_indices):
  541. index[i] = arr
  542. # get coords and data from x
  543. if issparse(x):
  544. if 0 in x.shape:
  545. return # Nothing to set.
  546. x_data, x_coords = _get_sparse_data_and_coords(x, new_shape, self.dtype)
  547. else:
  548. x = np.asarray(x, dtype=self.dtype)
  549. if x.size == 0:
  550. return # Nothing to set.
  551. x_data, x_coords = _get_dense_data_and_coords(x, new_shape)
  552. # Approach:
  553. # Set indexed values to zero (drop from `self.coords` and `self.data`)
  554. # create new coords and data arrays for setting nonzeros
  555. # concatenate old (undropped) values with new coords and data
  556. old_data, old_coords = self._zero_many(index)
  557. if len(x_coords) == 1 and len(x_coords[0]) == 0:
  558. self.data, self.coords = old_data, old_coords
  559. # leave self.has_canonical_format unchanged
  560. return
  561. # To process array indices, need the x_coords for those axes
  562. # and need to ravel the array part of x_coords to build new_coords
  563. # Along the way, Find pos and shape of array-index portion of key.
  564. # arr_shape is None and pos = -1 when no arrays are used as indices.
  565. arr_shape = None
  566. pos = -1
  567. if arr_int_pos:
  568. # Get arr_shape if any arrays are in the index.
  569. # Also ravel the corresponding x_coords.
  570. for idx in index:
  571. if not isinstance(idx, slice) and not isintlike(idx):
  572. arr_shape = idx.shape
  573. # Find x_coord pos of integer and array portion of index.
  574. # If contiguous put int and array axes at pos of those indices.
  575. # If not contiguous, put all int and array axes at pos=0.
  576. if len(arr_int_pos) == (arr_int_pos[-1] - arr_int_pos[0] + 1):
  577. pos = arr_int_pos[0]
  578. else:
  579. pos = 0
  580. # compute the raveled coords of the array part of x_coords.
  581. # Used to build the new coords from the index arrays.
  582. x_arr_coo = x_coords[pos:pos + len(arr_shape)]
  583. # could use np.ravel_multi_index but _ravel_coords avoids overflow
  584. x_arr_coo_ravel = _ravel_coords(x_arr_coo, arr_shape)
  585. break
  586. # find map from x_coord slice axes to index axes
  587. x_ax = 0
  588. x_axes = {}
  589. for i, idx in enumerate(index):
  590. if i == pos:
  591. x_ax += len(arr_shape)
  592. if isinstance(idx, slice):
  593. x_axes[i] = x_ax
  594. x_ax += 1
  595. # Build new_coords and new_data
  596. new_coords = [None] * self.ndim
  597. new_nnz = len(x_data)
  598. for i, idx in enumerate(index):
  599. if isintlike(idx):
  600. new_coords[i] = (np.broadcast_to(idx, (new_nnz,)))
  601. continue
  602. elif isinstance(idx, slice):
  603. start, stop, step = idx.indices(self.shape[i])
  604. new_coords[i] = (start + x_coords[x_axes[i]] * step)
  605. else: # array idx
  606. new_coords[i] = idx.ravel()[x_arr_coo_ravel]
  607. # seems like a copy is prudent when setting data in this array
  608. new_data = x_data.copy()
  609. # deduplicate entries created by multiple coords matching in the array index
  610. # NumPy does not specify which value is put into the spot (last one assigned)
  611. # so only one value should appear if we want to match NumPy.
  612. # If matching NumPy is not crucial, we could make dups a feature where
  613. # integer array indices with repeated indices creates duplicate values.
  614. # Not doing that here. We are just removing duplicates (keep 1st data found)
  615. new_coords = np.array(new_coords)
  616. _, ind = np.unique(new_coords, axis=1, return_index=True)
  617. # update values with stack of old and new data and coords.
  618. self.data = np.hstack([old_data, new_data[ind]])
  619. self.coords = tuple(np.hstack(c) for c in zip(old_coords, new_coords[:, ind]))
  620. self.has_canonical_format = False
  621. def _zero_many(self, index):
  622. # handle int, slice and integer-array indices
  623. # index_mask accumulates a bool array of nonzeros that match index
  624. index_mask=np.ones(len(self.data), dtype=np.bool_)
  625. arr_coords = []
  626. arr_indices = []
  627. for i, (idx, co) in enumerate(zip(index, self.coords)):
  628. if isinstance(idx, int):
  629. index_mask &= (co == idx)
  630. elif isinstance(idx, slice) and idx != slice(None):
  631. start, stop, step = idx.indices(self.shape[i])
  632. if step != 1:
  633. if step < 0:
  634. in_range = (co <= start) & (co > stop)
  635. else:
  636. in_range = (co >= start) & (co < stop)
  637. m = np.mod(co - start, step)
  638. index_mask &= (m == 0) & in_range
  639. else:
  640. in_range = (co >= start) & (co < stop)
  641. index_mask &= in_range
  642. elif isinstance(idx, slice) and idx == slice(None):
  643. # slice is full axis so no changes to index_mask
  644. pass
  645. else: # array
  646. arr_coords.append(co)
  647. arr_indices.append(idx)
  648. # match array indices with masked coords. See comments in __getitem__
  649. if arr_indices:
  650. keyarr = np.array(arr_indices).reshape(len(arr_indices), 1, -1)
  651. arr_coords = np.array([co[index_mask] for co in arr_coords])[:, :, None]
  652. found = (keyarr == arr_coords).all(axis=0)
  653. arr_coo, _ = found.nonzero()
  654. arr_index_mask = np.zeros_like(index_mask)
  655. arr_index_mask[index_mask.nonzero()[0][arr_coo]] = True
  656. index_mask &= arr_index_mask
  657. # remove matching coords and data to set them to zero
  658. pruned_coords = [co[~index_mask] for co in self.coords]
  659. pruned_data = self.data[~index_mask]
  660. return pruned_data, pruned_coords
  661. def sum_duplicates(self) -> None:
  662. """Eliminate duplicate entries by adding them together
  663. This is an *in place* operation
  664. """
  665. if self.has_canonical_format:
  666. return
  667. summed = self._sum_duplicates(self.coords, self.data)
  668. self.coords, self.data = summed
  669. self.has_canonical_format = True
  670. def _sum_duplicates(self, coords, data):
  671. # Assumes coords not in canonical format.
  672. if len(data) == 0:
  673. return coords, data
  674. # Sort coords w.r.t. rows, then cols. This corresponds to C-order,
  675. # which we rely on for argmin/argmax to return the first index in the
  676. # same way that numpy does (in the case of ties).
  677. order = np.lexsort(coords[::-1])
  678. coords = tuple(idx[order] for idx in coords)
  679. data = data[order]
  680. unique_mask = np.logical_or.reduce([
  681. idx[1:] != idx[:-1] for idx in coords
  682. ])
  683. unique_mask = np.append(True, unique_mask)
  684. coords = tuple(idx[unique_mask] for idx in coords)
  685. unique_inds, = np.nonzero(unique_mask)
  686. data = np.add.reduceat(data, downcast_intp_index(unique_inds), dtype=self.dtype)
  687. return coords, data
  688. def eliminate_zeros(self):
  689. """Remove zero entries from the array/matrix
  690. This is an *in place* operation
  691. """
  692. mask = self.data != 0
  693. self.data = self.data[mask]
  694. self.coords = tuple(idx[mask] for idx in self.coords)
  695. #######################
  696. # Arithmetic handlers #
  697. #######################
  698. def _add_dense(self, other):
  699. if other.shape != self.shape:
  700. raise ValueError(f'Incompatible shapes ({self.shape} and {other.shape})')
  701. dtype = upcast_char(self.dtype.char, other.dtype.char)
  702. result = np.array(other, dtype=dtype, copy=True)
  703. fortran = int(result.flags.f_contiguous)
  704. if self.ndim == 1:
  705. coo_todense_nd(np.array([1]), self.nnz, self.ndim,
  706. self.coords[0], self.data, result.ravel('A'), fortran)
  707. elif self.ndim == 2:
  708. M, N = self._shape_as_2d
  709. coo_todense(M, N, self.nnz, self.row, self.col, self.data,
  710. result.ravel('A'), fortran)
  711. else:
  712. if fortran:
  713. strides = np.append(1, np.cumprod(self.shape[:-1]))
  714. else:
  715. strides = np.append(np.cumprod(self.shape[1:][::-1])[::-1], 1)
  716. coords = np.concatenate(self.coords)
  717. coo_todense_nd(strides, self.nnz, self.ndim,
  718. coords, self.data, result.ravel('A'), fortran)
  719. return self._container(result, copy=False)
  720. def _add_sparse(self, other):
  721. if self.ndim < 3:
  722. return self.tocsr()._add_sparse(other)
  723. if other.shape != self.shape:
  724. raise ValueError(f'Incompatible shapes ({self.shape} and {other.shape})')
  725. other = self.__class__(other)
  726. new_data = np.concatenate((self.data, other.data))
  727. new_coords = tuple(np.concatenate((self.coords, other.coords), axis=1))
  728. A = self.__class__((new_data, new_coords), shape=self.shape)
  729. return A
  730. def _sub_sparse(self, other):
  731. if self.ndim < 3:
  732. return self.tocsr()._sub_sparse(other)
  733. if other.shape != self.shape:
  734. raise ValueError(f'Incompatible shapes ({self.shape} and {other.shape})')
  735. other = self.__class__(other)
  736. new_data = np.concatenate((self.data, -other.data))
  737. new_coords = tuple(np.concatenate((self.coords, other.coords), axis=1))
  738. A = coo_array((new_data, new_coords), shape=self.shape)
  739. return A
  740. def _matmul_vector(self, other):
  741. if self.ndim > 2:
  742. result = np.zeros(math.prod(self.shape[:-1]),
  743. dtype=upcast_char(self.dtype.char, other.dtype.char))
  744. shape = np.array(self.shape)
  745. strides = np.append(np.cumprod(shape[:-1][::-1])[::-1][1:], 1)
  746. coords = np.concatenate(self.coords)
  747. coo_matvec_nd(self.nnz, len(self.shape), strides, coords, self.data,
  748. other, result)
  749. result = result.reshape(self.shape[:-1])
  750. return result
  751. # self.ndim <= 2
  752. result_shape = self.shape[0] if self.ndim > 1 else 1
  753. result = np.zeros(result_shape,
  754. dtype=upcast_char(self.dtype.char, other.dtype.char))
  755. if self.ndim == 2:
  756. col = self.col
  757. row = self.row
  758. elif self.ndim == 1:
  759. col = self.coords[0]
  760. row = np.zeros_like(col)
  761. else:
  762. raise NotImplementedError(
  763. f"coo_matvec not implemented for ndim={self.ndim}")
  764. coo_matvec(self.nnz, row, col, self.data, other, result)
  765. # Array semantics return a scalar here, not a single-element array.
  766. if isinstance(self, sparray) and result_shape == 1:
  767. return result[0]
  768. return result
  769. def _rmatmul_dispatch(self, other):
  770. if isscalarlike(other):
  771. return self._mul_scalar(other)
  772. else:
  773. # Don't use asarray unless we have to
  774. try:
  775. o_ndim = other.ndim
  776. except AttributeError:
  777. other = np.asarray(other)
  778. o_ndim = other.ndim
  779. perm = tuple(range(o_ndim)[:-2]) + tuple(range(o_ndim)[-2:][::-1])
  780. tr = other.transpose(perm)
  781. s_ndim = self.ndim
  782. perm = tuple(range(s_ndim)[:-2]) + tuple(range(s_ndim)[-2:][::-1])
  783. ret = self.transpose(perm)._matmul_dispatch(tr)
  784. if ret is NotImplemented:
  785. return NotImplemented
  786. if s_ndim == 1 or o_ndim == 1:
  787. perm = range(ret.ndim)
  788. else:
  789. perm = tuple(range(ret.ndim)[:-2]) + tuple(range(ret.ndim)[-2:][::-1])
  790. return ret.transpose(perm)
  791. def _matmul_dispatch(self, other):
  792. if isscalarlike(other):
  793. return self.multiply(other)
  794. if not (issparse(other) or isdense(other)):
  795. # If it's a list or whatever, treat it like an array
  796. other_a = np.asanyarray(other)
  797. if other_a.ndim == 0 and other_a.dtype == np.object_:
  798. # Not interpretable as an array; return NotImplemented so that
  799. # other's __rmatmul__ can kick in if that's implemented.
  800. return NotImplemented
  801. # Allow custom sparse class indicated by attr sparse gh-6520
  802. try:
  803. other.shape
  804. except AttributeError:
  805. other = other_a
  806. if self.ndim < 3 and other.ndim < 3:
  807. return _spbase._matmul_dispatch(self, other)
  808. N = self.shape[-1]
  809. err_prefix = "matmul: dimension mismatch with signature"
  810. if other.__class__ is np.ndarray:
  811. if other.shape == (N,):
  812. return self._matmul_vector(other)
  813. if other.shape == (N, 1):
  814. result = self._matmul_vector(other.ravel())
  815. return result.reshape(*self.shape[:-1], 1)
  816. if other.ndim == 1:
  817. msg = f"{err_prefix} (n,k={N}),(k={other.shape[0]},)->(n,)"
  818. raise ValueError(msg)
  819. if other.shape[-2] == N:
  820. # check for batch dimensions compatibility
  821. batch_shape_A = self.shape[:-2]
  822. batch_shape_B = other.shape[:-2]
  823. if batch_shape_A != batch_shape_B:
  824. try:
  825. # This will raise an error if the shapes are not broadcastable
  826. np.broadcast_shapes(batch_shape_A, batch_shape_B)
  827. except ValueError:
  828. raise ValueError("Batch dimensions are not broadcastable")
  829. return self._matmul_multivector(other)
  830. else:
  831. raise ValueError(
  832. f"{err_prefix} (n,..,k={N}),(k={other.shape[-2]},..,m)->(n,..,m)"
  833. )
  834. if isscalarlike(other):
  835. # scalar value
  836. return self._mul_scalar(other)
  837. if issparse(other):
  838. self_is_1d = self.ndim == 1
  839. other_is_1d = other.ndim == 1
  840. # reshape to 2-D if self or other is 1-D
  841. if self_is_1d:
  842. self = self.reshape(self._shape_as_2d) # prepend 1 to shape
  843. if other_is_1d:
  844. other = other.reshape((other.shape[0], 1)) # append 1 to shape
  845. # Check if the inner dimensions match for matrix multiplication
  846. if N != other.shape[-2]:
  847. raise ValueError(
  848. f"{err_prefix} (n,..,k={N}),(k={other.shape[-2]},..,m)->(n,..,m)"
  849. )
  850. # If A or B has more than 2 dimensions, check for
  851. # batch dimensions compatibility
  852. if self.ndim > 2 or other.ndim > 2:
  853. batch_shape_A = self.shape[:-2]
  854. batch_shape_B = other.shape[:-2]
  855. if batch_shape_A != batch_shape_B:
  856. try:
  857. # This will raise an error if the shapes are not broadcastable
  858. np.broadcast_shapes(batch_shape_A, batch_shape_B)
  859. except ValueError:
  860. raise ValueError("Batch dimensions are not broadcastable")
  861. result = self._matmul_sparse(other)
  862. # reshape back if a or b were originally 1-D
  863. if self_is_1d:
  864. # if self was originally 1-D, reshape result accordingly
  865. result = result.reshape(tuple(result.shape[:-2]) +
  866. tuple(result.shape[-1:]))
  867. if other_is_1d:
  868. result = result.reshape(result.shape[:-1])
  869. return result
  870. def _matmul_multivector(self, other):
  871. result_dtype = upcast_char(self.dtype.char, other.dtype.char)
  872. if self.ndim >= 3 or other.ndim >= 3:
  873. # if self has shape (N,), reshape to (1,N)
  874. if self.ndim == 1:
  875. result = self.reshape(1, self.shape[0])._matmul_multivector(other)
  876. return result.reshape(tuple(other.shape[:-2]) + tuple(other.shape[-1:]))
  877. broadcast_shape = np.broadcast_shapes(self.shape[:-2], other.shape[:-2])
  878. self_shape = broadcast_shape + self.shape[-2:]
  879. other_shape = broadcast_shape + other.shape[-2:]
  880. self = self._broadcast_to(self_shape)
  881. other = np.broadcast_to(other, other_shape)
  882. result_shape = broadcast_shape + self.shape[-2:-1] + other.shape[-1:]
  883. result = np.zeros(result_shape, dtype=result_dtype)
  884. coo_matmat_dense_nd(self.nnz, len(self.shape), other.shape[-1],
  885. np.array(other_shape), np.array(result_shape),
  886. np.concatenate(self.coords),
  887. self.data, other.ravel('C'), result)
  888. return result
  889. if self.ndim == 2:
  890. result_shape = (self.shape[0], other.shape[1])
  891. col = self.col
  892. row = self.row
  893. elif self.ndim == 1:
  894. result_shape = (other.shape[1],)
  895. col = self.coords[0]
  896. row = np.zeros_like(col)
  897. result = np.zeros(result_shape, dtype=result_dtype)
  898. coo_matmat_dense(self.nnz, other.shape[-1], row, col,
  899. self.data, other.ravel('C'), result)
  900. return result.view(type=type(other))
  901. def dot(self, other):
  902. """Return the dot product of two arrays.
  903. Strictly speaking a dot product involves two vectors.
  904. But in the sense that an array with ndim >= 1 is a collection
  905. of vectors, the function computes the collection of dot products
  906. between each vector in the first array with each vector in the
  907. second array. The axis upon which the sum of products is performed
  908. is the last axis of the first array and the second to last axis of
  909. the second array. If the second array is 1-D, the last axis is used.
  910. Thus, if both arrays are 1-D, the inner product is returned.
  911. If both are 2-D, we have matrix multiplication. If `other` is 1-D,
  912. the sum product is taken along the last axis of each array. If
  913. `other` is N-D for N>=2, the sum product is over the last axis of
  914. the first array and the second-to-last axis of the second array.
  915. Parameters
  916. ----------
  917. other : array_like (dense or sparse)
  918. Second array
  919. Returns
  920. -------
  921. output : array (sparse or dense)
  922. The dot product of this array with `other`.
  923. It will be dense/sparse if `other` is dense/sparse.
  924. Examples
  925. --------
  926. >>> import numpy as np
  927. >>> from scipy.sparse import coo_array
  928. >>> A = coo_array([[1, 2, 0], [0, 0, 3], [4, 0, 5]])
  929. >>> v = np.array([1, 0, -1])
  930. >>> A.dot(v)
  931. array([ 1, -3, -1], dtype=int64)
  932. For 2-D arrays it is the matrix product:
  933. >>> A = coo_array([[1, 0], [0, 1]])
  934. >>> B = coo_array([[4, 1], [2, 2]])
  935. >>> A.dot(B).toarray()
  936. array([[4, 1],
  937. [2, 2]])
  938. For 3-D arrays the shape extends unused axes by other unused axes.
  939. >>> A = coo_array(np.arange(3*4*5*6)).reshape((3,4,5,6))
  940. >>> B = coo_array(np.arange(3*4*5*6)).reshape((5,4,6,3))
  941. >>> A.dot(B).shape
  942. (3, 4, 5, 5, 4, 3)
  943. """
  944. # handle non-array input: lists, ints, etc
  945. if not (issparse(other) or isdense(other) or isscalarlike(other)):
  946. # If it's a list or whatever, treat it like an array
  947. o_array = np.asanyarray(other)
  948. if o_array.ndim == 0 and o_array.dtype == np.object_:
  949. raise TypeError(f"dot argument not supported type: '{type(other)}'")
  950. try:
  951. other.shape
  952. except AttributeError:
  953. other = o_array
  954. # Handle scalar multiplication
  955. if isscalarlike(other):
  956. return self * other
  957. # other.shape[-2:][0] gets last index of 1d, next to last index of >1d
  958. if self.shape[-1] != other.shape[-2:][0]:
  959. raise ValueError(f"shapes {self.shape} and {other.shape}"
  960. " are not aligned for n-D dot")
  961. if self.ndim < 3 and other.ndim < 3:
  962. return self @ other
  963. if isdense(other):
  964. return self._dense_dot(other)
  965. return self._sparse_dot(other.tocoo())
  966. def _sparse_dot(self, other):
  967. # already checked: at least one is >2d, neither scalar, both are coo
  968. # Ravel non-reduced axes coordinates
  969. self_2d, s_new_shape = _convert_to_2d(self, [self.ndim - 1])
  970. other_2d, o_new_shape = _convert_to_2d(other, [max(0, other.ndim - 2)])
  971. prod = self_2d @ other_2d.T # routes via 2-D CSR
  972. prod = prod.tocoo()
  973. # Combine the shapes of the non-contracted axes
  974. combined_shape = s_new_shape + o_new_shape
  975. # Unravel the 2D coordinates to get multi-dimensional coordinates
  976. coords = []
  977. new_shapes = (s_new_shape, o_new_shape) if s_new_shape else (o_new_shape,)
  978. for c, s in zip(prod.coords, new_shapes):
  979. coords.extend(np.unravel_index(c, s))
  980. # Construct the resulting COO array with coords and shape
  981. return coo_array((prod.data, coords), shape=combined_shape)
  982. def _dense_dot(self, other):
  983. # already checked: self is >0d, other is dense and >0d
  984. # Ravel non-reduced axes coordinates
  985. s_ndim = self.ndim
  986. if s_ndim <= 2:
  987. s_new_shape = () if s_ndim == 1 else (self.shape[0],)
  988. self_2d = self
  989. else:
  990. self_2d, s_new_shape = _convert_to_2d(self, [self.ndim - 1])
  991. o_ndim = other.ndim
  992. if o_ndim <= 2:
  993. o_new_shape = () if o_ndim == 1 else (other.shape[-1],)
  994. other_2d = other
  995. else:
  996. o_new_shape = other.shape[:-2] + other.shape[-1:]
  997. reorder_dims = (o_ndim - 2, *range(o_ndim - 2), o_ndim - 1)
  998. o_reorg = np.transpose(other, reorder_dims)
  999. other_2d = o_reorg.reshape((other.shape[-2], math.prod(o_new_shape)))
  1000. prod = self_2d @ other_2d # routes via 2-D CSR
  1001. # Combine the shapes of the non-contracted axes
  1002. combined_shape = s_new_shape + o_new_shape
  1003. return prod.reshape(combined_shape)
  1004. def tensordot(self, other, axes=2):
  1005. """Return the tensordot product with another array along the given axes.
  1006. The tensordot differs from dot and matmul in that any axis can be
  1007. chosen for each of the first and second array and the sum of the
  1008. products is computed just like for matrix multiplication, only not
  1009. just for the rows of the first times the columns of the second. It
  1010. takes the dot product of the collection of vectors along the specified
  1011. axes. Here we can even take the sum of the products along two or even
  1012. more axes if desired. So, tensordot is a dot product computation
  1013. applied to arrays of any dimension >= 1. It is like matmul but over
  1014. arbitrary axes for each matrix.
  1015. Given two tensors, `a` and `b`, and the desired axes specified as a
  1016. 2-tuple/list/array containing two sequences of axis numbers,
  1017. ``(a_axes, b_axes)``, sum the products of `a`'s and `b`'s elements
  1018. (components) over the axes specified by ``a_axes`` and ``b_axes``.
  1019. The `axes` input can be a single non-negative integer, ``N``;
  1020. if it is, then the last ``N`` dimensions of `a` and the first
  1021. ``N`` dimensions of `b` are summed over.
  1022. Parameters
  1023. ----------
  1024. a, b : array_like
  1025. Tensors to "dot".
  1026. axes : int or (2,) array_like
  1027. * integer_like
  1028. If an int N, sum over the last N axes of `a` and the first N axes
  1029. of `b` in order. The sizes of the corresponding axes must match.
  1030. * (2,) array_like
  1031. A 2-tuple of sequences of axes to be summed over, the first applying
  1032. to `a`, the second to `b`. The sequences must be the same length.
  1033. The shape of the corresponding axes must match between `a` and `b`.
  1034. Returns
  1035. -------
  1036. output : coo_array
  1037. The tensor dot product of this array with `other`.
  1038. It will be dense/sparse if `other` is dense/sparse.
  1039. See Also
  1040. --------
  1041. dot
  1042. Examples
  1043. --------
  1044. >>> import numpy as np
  1045. >>> import scipy.sparse
  1046. >>> A = scipy.sparse.coo_array([[[2, 3], [0, 0]], [[0, 1], [0, 5]]])
  1047. >>> A.shape
  1048. (2, 2, 2)
  1049. Integer axes N are shorthand for (range(-N, 0), range(0, N)):
  1050. >>> A.tensordot(A, axes=1).toarray()
  1051. array([[[[ 4, 9],
  1052. [ 0, 15]],
  1053. <BLANKLINE>
  1054. [[ 0, 0],
  1055. [ 0, 0]]],
  1056. <BLANKLINE>
  1057. <BLANKLINE>
  1058. [[[ 0, 1],
  1059. [ 0, 5]],
  1060. <BLANKLINE>
  1061. [[ 0, 5],
  1062. [ 0, 25]]]])
  1063. >>> A.tensordot(A, axes=2).toarray()
  1064. array([[ 4, 6],
  1065. [ 0, 25]])
  1066. >>> A.tensordot(A, axes=3)
  1067. array(39)
  1068. Using tuple for axes:
  1069. >>> a = scipy.sparse.coo_array(np.arange(60).reshape(3,4,5))
  1070. >>> b = np.arange(24).reshape(4,3,2)
  1071. >>> c = a.tensordot(b, axes=([1,0],[0,1]))
  1072. >>> c.shape
  1073. (5, 2)
  1074. >>> c
  1075. array([[4400, 4730],
  1076. [4532, 4874],
  1077. [4664, 5018],
  1078. [4796, 5162],
  1079. [4928, 5306]])
  1080. """
  1081. if not isdense(other) and not issparse(other):
  1082. # If it's a list or whatever, treat it like an array
  1083. other_array = np.asanyarray(other)
  1084. if other_array.ndim == 0 and other_array.dtype == np.object_:
  1085. raise TypeError(f"tensordot arg not supported type: '{type(other)}'")
  1086. try:
  1087. other.shape
  1088. except AttributeError:
  1089. other = other_array
  1090. axes_self, axes_other = _process_axes(self.ndim, other.ndim, axes)
  1091. # Check for shape compatibility along specified axes
  1092. if any(self.shape[ax] != other.shape[bx]
  1093. for ax, bx in zip(axes_self, axes_other)):
  1094. raise ValueError("sizes of the corresponding axes must match")
  1095. if isdense(other):
  1096. return self._dense_tensordot(other, axes_self, axes_other)
  1097. else:
  1098. return self._sparse_tensordot(other, axes_self, axes_other)
  1099. def _sparse_tensordot(self, other, s_axes, o_axes):
  1100. # Prepare the tensors for tensordot operation
  1101. # Ravel non-reduced axes coordinates
  1102. self_2d, s_new_shape = _convert_to_2d(self, s_axes)
  1103. other_2d, o_new_shape = _convert_to_2d(other, o_axes)
  1104. # Perform matrix multiplication (routed via 2-D CSR)
  1105. prod = self_2d @ other_2d.T
  1106. # handle case of scalar result (axis includes all axes for both)
  1107. if not issparse(prod):
  1108. return prod
  1109. prod = prod.tocoo()
  1110. # Combine the shapes of the non-contracted axes
  1111. combined_shape = s_new_shape + o_new_shape
  1112. # Unravel the 2D coordinates to get multi-dimensional coordinates
  1113. coords = []
  1114. new_shapes = (s_new_shape, o_new_shape) if s_new_shape else (o_new_shape,)
  1115. for c, s in zip(prod.coords, new_shapes):
  1116. if s:
  1117. coords.extend(np.unravel_index(c, s))
  1118. # Construct the resulting COO array with coords and shape
  1119. return coo_array((prod.data, coords), shape=combined_shape)
  1120. def _dense_tensordot(self, other, s_axes, o_axes):
  1121. s_ndim = len(self.shape)
  1122. o_ndim = len(other.shape)
  1123. s_non_axes = [i for i in range(s_ndim) if i not in s_axes]
  1124. s_axes_shape = [self.shape[i] for i in s_axes]
  1125. s_non_axes_shape = [self.shape[i] for i in s_non_axes]
  1126. o_non_axes = [i for i in range(o_ndim) if i not in o_axes]
  1127. o_axes_shape = [other.shape[i] for i in o_axes]
  1128. o_non_axes_shape = [other.shape[i] for i in o_non_axes]
  1129. left = self.transpose(s_non_axes + s_axes)
  1130. right = np.transpose(other, o_non_axes[:-1] + o_axes + o_non_axes[-1:])
  1131. reshape_left = (*s_non_axes_shape, math.prod(s_axes_shape))
  1132. reshape_right = (*o_non_axes_shape[:-1], math.prod(o_axes_shape),
  1133. *o_non_axes_shape[-1:])
  1134. return left.reshape(reshape_left).dot(right.reshape(reshape_right))
  1135. def _matmul_sparse(self, other):
  1136. """
  1137. Perform sparse-sparse matrix multiplication for two n-D COO arrays.
  1138. The method converts input n-D arrays to 2-D block array format,
  1139. uses csr_matmat to multiply them, and then converts the
  1140. result back to n-D COO array.
  1141. Parameters:
  1142. self (COO): The first n-D sparse array in COO format.
  1143. other (COO): The second n-D sparse array in COO format.
  1144. Returns:
  1145. prod (COO): The resulting n-D sparse array after multiplication.
  1146. """
  1147. if self.ndim < 3 and other.ndim < 3:
  1148. return _spbase._matmul_sparse(self, other)
  1149. # Get the shapes of self and other
  1150. self_shape = self.shape
  1151. other_shape = other.shape
  1152. # Determine the new shape to broadcast self and other
  1153. broadcast_shape = np.broadcast_shapes(self_shape[:-2], other_shape[:-2])
  1154. self_new_shape = tuple(broadcast_shape) + self_shape[-2:]
  1155. other_new_shape = tuple(broadcast_shape) + other_shape[-2:]
  1156. self_broadcasted = self._broadcast_to(self_new_shape)
  1157. other_broadcasted = other._broadcast_to(other_new_shape)
  1158. # Convert n-D COO arrays to 2-D block diagonal arrays
  1159. self_block_diag = _block_diag(self_broadcasted)
  1160. other_block_diag = _block_diag(other_broadcasted)
  1161. # Use csr_matmat to perform sparse matrix multiplication
  1162. prod_block_diag = (self_block_diag @ other_block_diag).tocoo()
  1163. # Convert the 2-D block diagonal array back to n-D
  1164. return _extract_block_diag(
  1165. prod_block_diag,
  1166. shape=(*broadcast_shape, self.shape[-2], other.shape[-1]),
  1167. )
  1168. def _broadcast_to(self, new_shape, copy=False):
  1169. if self.shape == new_shape:
  1170. return self.copy() if copy else self
  1171. old_shape = self.shape
  1172. # Check if the new shape is compatible for broadcasting
  1173. if len(new_shape) < len(old_shape):
  1174. raise ValueError("New shape must have at least as many dimensions"
  1175. " as the current shape")
  1176. # Add leading ones to shape to ensure same length as `new_shape`
  1177. shape = (1,) * (len(new_shape) - len(old_shape)) + tuple(old_shape)
  1178. # Ensure the old shape can be broadcast to the new shape
  1179. if any((o != 1 and o != n) for o, n in zip(shape, new_shape)):
  1180. raise ValueError(f"current shape {old_shape} cannot be "
  1181. "broadcast to new shape {new_shape}")
  1182. # Reshape the COO array to match the new dimensions
  1183. self = self.reshape(shape)
  1184. idx_dtype = get_index_dtype(self.coords, maxval=max(new_shape))
  1185. coords = self.coords
  1186. new_data = self.data
  1187. new_coords = coords[-1:] # Copy last coordinate to start
  1188. cum_repeat = 1 # Cumulative repeat factor for broadcasting
  1189. if shape[-1] != new_shape[-1]: # broadcasting the n-th (col) dimension
  1190. repeat_count = new_shape[-1]
  1191. cum_repeat *= repeat_count
  1192. new_data = np.tile(new_data, repeat_count)
  1193. new_dim = np.repeat(np.arange(0, repeat_count, dtype=idx_dtype), self.nnz)
  1194. new_coords = (new_dim,)
  1195. for i in range(-2, -(len(shape)+1), -1):
  1196. if shape[i] != new_shape[i]:
  1197. repeat_count = new_shape[i] # number of times to repeat data, coords
  1198. cum_repeat *= repeat_count # update cumulative repeat factor
  1199. nnz = len(new_data) # Number of non-zero elements so far
  1200. # Tile data and coordinates to match the new repeat count
  1201. new_data = np.tile(new_data, repeat_count)
  1202. new_coords = tuple(np.tile(new_coords[i+1:], repeat_count))
  1203. # Create new dimensions and stack them
  1204. new_dim = np.repeat(np.arange(0, repeat_count, dtype=idx_dtype), nnz)
  1205. new_coords = (new_dim,) + new_coords
  1206. else:
  1207. # If no broadcasting needed, tile the coordinates
  1208. new_dim = np.tile(coords[i], cum_repeat)
  1209. new_coords = (new_dim,) + new_coords
  1210. return coo_array((new_data, new_coords), new_shape)
  1211. def _sum_nd(self, axis, res_dtype, out):
  1212. # axis and out are preprocessed. out.shape is new_shape
  1213. A2d, new_shape = _convert_to_2d(self, axis)
  1214. ones = np.ones((A2d.shape[1], 1), dtype=res_dtype)
  1215. # sets dtype while loading into out
  1216. out[...] = (A2d @ ones).reshape(new_shape)
  1217. return out
  1218. def _min_or_max_axis_nd(self, axis, min_or_max, explicit):
  1219. A2d, new_shape = _convert_to_2d(self, axis)
  1220. res = A2d._min_or_max_axis(1, min_or_max, explicit)
  1221. unraveled_coords = np.unravel_index(res.coords[0], new_shape)
  1222. return coo_array((res.data, unraveled_coords), new_shape)
  1223. def _argminmax_axis_nd(self, axis, argminmax, compare, explicit):
  1224. A2d, new_shape = _convert_to_2d(self, axis)
  1225. res_flat = A2d._argminmax_axis(1, argminmax, compare, explicit)
  1226. return res_flat.reshape(new_shape)
  1227. def _block_diag(self):
  1228. """
  1229. Converts an N-D COO array into a 2-D COO array in block diagonal form.
  1230. Parameters:
  1231. self (coo_array): An N-Dimensional COO sparse array.
  1232. Returns:
  1233. coo_array: A 2-Dimensional COO sparse array in block diagonal form.
  1234. """
  1235. if self.ndim<2:
  1236. raise ValueError("array must have atleast dim=2")
  1237. num_blocks = math.prod(self.shape[:-2])
  1238. n_col = self.shape[-1]
  1239. n_row = self.shape[-2]
  1240. res_arr = self.reshape((num_blocks, n_row, n_col))
  1241. new_coords = (
  1242. res_arr.coords[1] + res_arr.coords[0] * res_arr.shape[1],
  1243. res_arr.coords[2] + res_arr.coords[0] * res_arr.shape[2],
  1244. )
  1245. new_shape = (num_blocks * n_row, num_blocks * n_col)
  1246. return coo_array((self.data, tuple(new_coords)), shape=new_shape)
  1247. def _extract_block_diag(self, shape):
  1248. n_row, n_col = shape[-2], shape[-1]
  1249. # Extract data and coordinates from the block diagonal COO array
  1250. data = self.data
  1251. row, col = self.row, self.col
  1252. # Initialize new coordinates array
  1253. new_coords = np.empty((len(shape), self.nnz), dtype=int)
  1254. # Calculate within-block indices
  1255. new_coords[-2] = row % n_row
  1256. new_coords[-1] = col % n_col
  1257. # Calculate coordinates for higher dimensions
  1258. temp_block_idx = row // n_row
  1259. for i in range(len(shape) - 3, -1, -1):
  1260. size = shape[i]
  1261. new_coords[i] = temp_block_idx % size
  1262. temp_block_idx = temp_block_idx // size
  1263. # Create the new COO array with the original n-D shape
  1264. return coo_array((data, tuple(new_coords)), shape=shape)
  1265. def _get_sparse_data_and_coords(x, new_shape, dtype):
  1266. x = x.tocoo()
  1267. x.sum_duplicates()
  1268. x_coords = list(x.coords)
  1269. x_data = x.data.astype(dtype, copy=False)
  1270. x_shape = x.shape
  1271. if new_shape == x_shape:
  1272. return x_data, x_coords
  1273. # broadcasting needed
  1274. len_diff = len(new_shape) - len(x_shape)
  1275. if len_diff > 0:
  1276. # prepend ones to shape of x to match ndim
  1277. x_shape = [1] * len_diff + list(x_shape)
  1278. coord_zeros = np.zeroslike(x_coords[0])
  1279. x_coords = tuple([coord_zeros] * len_diff + x_coords)
  1280. # taking away axes (squeezing) is not part of broadcasting, but long
  1281. # spmatrix history of using 2d vectors in 1d space, so we manually
  1282. # squeeze the front and back axes here to be compatible
  1283. if len_diff < 0:
  1284. for _ in range(-len_diff):
  1285. if x_shape[0] == 1:
  1286. x_shape = x_shape[1:]
  1287. x_coords = x_coords[1:]
  1288. elif x_shape[-1] == 1:
  1289. x_shape = x_shape[:-1]
  1290. x_coords = x_coords[:-1]
  1291. else:
  1292. raise ValueError("shape mismatch in assignment")
  1293. # broadcast with copy (will need to copy eventually anyway)
  1294. tot_expand = 1
  1295. for i, (nn, nx) in enumerate(zip(new_shape, x_shape)):
  1296. if nn == nx:
  1297. continue
  1298. if nx != 1:
  1299. raise ValueError("shape mismatch in assignment")
  1300. x_nnz = len(x_coords[0])
  1301. x_coords[i] = np.repeat(np.arange(nn), x_nnz)
  1302. for j, co in enumerate(x_coords):
  1303. if j == i:
  1304. continue
  1305. x_coords[j] = np.tile(co, nn)
  1306. tot_expand *= nn
  1307. x_data = np.tile(x_data.ravel(), tot_expand)
  1308. return x_data, x_coords
  1309. def _get_dense_data_and_coords(x, new_shape):
  1310. if x.shape != new_shape:
  1311. x = np.broadcast_to(x.squeeze(), new_shape)
  1312. # shift scalar input to 1d so has coords
  1313. if new_shape == ():
  1314. x_coords = tuple([np.array([0])] * len(new_shape))
  1315. x_data = x.ravel()
  1316. else:
  1317. x_coords = x.nonzero()
  1318. x_data = x[x_coords]
  1319. return x_data, x_coords
  1320. def _process_axes(ndim_a, ndim_b, axes):
  1321. if isinstance(axes, int):
  1322. if axes < 1 or axes > min(ndim_a, ndim_b):
  1323. raise ValueError("axes integer is out of bounds for input arrays")
  1324. axes_a = list(range(ndim_a - axes, ndim_a))
  1325. axes_b = list(range(axes))
  1326. elif isinstance(axes, tuple | list):
  1327. if len(axes) != 2:
  1328. raise ValueError("axes must be a tuple/list of length 2")
  1329. axes_a, axes_b = axes
  1330. if len(axes_a) != len(axes_b):
  1331. raise ValueError("axes lists/tuples must be of the same length")
  1332. if any(ax >= ndim_a or ax < -ndim_a for ax in axes_a) or \
  1333. any(bx >= ndim_b or bx < -ndim_b for bx in axes_b):
  1334. raise ValueError("axes indices are out of bounds for input arrays")
  1335. else:
  1336. raise TypeError("axes must be an integer or a tuple/list of integers")
  1337. axes_a = [axis + ndim_a if axis < 0 else axis for axis in axes_a]
  1338. axes_b = [axis + ndim_b if axis < 0 else axis for axis in axes_b]
  1339. return axes_a, axes_b
  1340. def _convert_to_2d(coo, axis):
  1341. axis_coords = tuple(coo.coords[i] for i in axis)
  1342. axis_shape = tuple(coo.shape[i] for i in axis)
  1343. axis_ravel = _ravel_coords(axis_coords, axis_shape)
  1344. ndim = len(coo.coords)
  1345. non_axis = tuple(i for i in range(ndim) if i not in axis)
  1346. if non_axis:
  1347. non_axis_coords = tuple(coo.coords[i] for i in non_axis)
  1348. non_axis_shape = tuple(coo.shape[i] for i in non_axis)
  1349. non_axis_ravel = _ravel_coords(non_axis_coords, non_axis_shape)
  1350. coords_2d = (non_axis_ravel, axis_ravel)
  1351. shape_2d = (math.prod(non_axis_shape), math.prod(axis_shape))
  1352. else: # all axes included in axis so result will have 1 element
  1353. coords_2d = (axis_ravel,)
  1354. shape_2d = (math.prod(axis_shape),)
  1355. non_axis_shape = ()
  1356. new_coo = coo_array((coo.data, coords_2d), shape=shape_2d)
  1357. return new_coo, non_axis_shape
  1358. def _ravel_coords(coords, shape, order='C'):
  1359. """Like np.ravel_multi_index, but avoids some overflow issues."""
  1360. if len(coords) == 1:
  1361. return coords[0]
  1362. # Handle overflow as in https://github.com/scipy/scipy/pull/9132
  1363. if len(coords) == 2:
  1364. nrows, ncols = shape
  1365. row, col = coords
  1366. if order == 'C':
  1367. maxval = (ncols * max(0, nrows - 1) + max(0, ncols - 1))
  1368. idx_dtype = get_index_dtype(maxval=maxval)
  1369. return np.multiply(ncols, row, dtype=idx_dtype) + col
  1370. elif order == 'F':
  1371. maxval = (nrows * max(0, ncols - 1) + max(0, nrows - 1))
  1372. idx_dtype = get_index_dtype(maxval=maxval)
  1373. return np.multiply(nrows, col, dtype=idx_dtype) + row
  1374. else:
  1375. raise ValueError("'order' must be 'C' or 'F'")
  1376. return np.ravel_multi_index(coords, shape, order=order)
  1377. def isspmatrix_coo(x):
  1378. """Is `x` of coo_matrix type?
  1379. Parameters
  1380. ----------
  1381. x
  1382. object to check for being a coo matrix
  1383. Returns
  1384. -------
  1385. bool
  1386. True if `x` is a coo matrix, False otherwise
  1387. Examples
  1388. --------
  1389. >>> from scipy.sparse import coo_array, coo_matrix, csr_matrix, isspmatrix_coo
  1390. >>> isspmatrix_coo(coo_matrix([[5]]))
  1391. True
  1392. >>> isspmatrix_coo(coo_array([[5]]))
  1393. False
  1394. >>> isspmatrix_coo(csr_matrix([[5]]))
  1395. False
  1396. """
  1397. return isinstance(x, coo_matrix)
  1398. # This namespace class separates array from matrix with isinstance
  1399. class coo_array(_coo_base, sparray):
  1400. """
  1401. A sparse array in COOrdinate format.
  1402. Also known as the 'ijv' or 'triplet' format.
  1403. This can be instantiated in several ways:
  1404. coo_array(D)
  1405. where D is an ndarray
  1406. coo_array(S)
  1407. with another sparse array or matrix S (equivalent to S.tocoo())
  1408. coo_array(shape, [dtype])
  1409. to construct an empty sparse array with shape `shape`
  1410. dtype is optional, defaulting to dtype='d'.
  1411. coo_array((data, coords), [shape])
  1412. to construct from existing data and index arrays:
  1413. 1. data[:] the entries of the sparse array, in any order
  1414. 2. coords[i][:] the axis-i coordinates of the data entries
  1415. Where ``A[coords] = data``, and coords is a tuple of index arrays.
  1416. When shape is not specified, it is inferred from the index arrays.
  1417. Attributes
  1418. ----------
  1419. dtype : dtype
  1420. Data type of the sparse array
  1421. shape : tuple of integers
  1422. Shape of the sparse array
  1423. ndim : int
  1424. Number of dimensions of the sparse array
  1425. nnz
  1426. size
  1427. data
  1428. COO format data array of the sparse array
  1429. coords
  1430. COO format tuple of index arrays
  1431. has_canonical_format : bool
  1432. Whether the matrix has sorted coordinates and no duplicates
  1433. format
  1434. T
  1435. Notes
  1436. -----
  1437. Sparse arrays can be used in arithmetic operations: they support
  1438. addition, subtraction, multiplication, division, and matrix power.
  1439. Advantages of the COO format
  1440. - facilitates fast conversion among sparse formats
  1441. - permits duplicate entries (see example)
  1442. - very fast conversion to and from CSR/CSC formats
  1443. Disadvantages of the COO format
  1444. - does not directly support:
  1445. + arithmetic operations
  1446. + slicing
  1447. Intended Usage
  1448. - COO is a fast format for constructing sparse arrays
  1449. - Once a COO array has been constructed, convert to CSR or
  1450. CSC format for fast arithmetic and matrix vector operations
  1451. - By default when converting to CSR or CSC format, duplicate (i,j)
  1452. entries will be summed together. This facilitates efficient
  1453. construction of finite element matrices and the like. (see example)
  1454. Canonical format
  1455. - Entries and coordinates sorted by row, then column.
  1456. - There are no duplicate entries (i.e. duplicate (i,j) locations)
  1457. - Data arrays MAY have explicit zeros.
  1458. Examples
  1459. --------
  1460. >>> # Constructing an empty sparse array
  1461. >>> import numpy as np
  1462. >>> from scipy.sparse import coo_array
  1463. >>> coo_array((3, 4), dtype=np.int8).toarray()
  1464. array([[0, 0, 0, 0],
  1465. [0, 0, 0, 0],
  1466. [0, 0, 0, 0]], dtype=int8)
  1467. >>> # Constructing a sparse array using ijv format
  1468. >>> row = np.array([0, 3, 1, 0])
  1469. >>> col = np.array([0, 3, 1, 2])
  1470. >>> data = np.array([4, 5, 7, 9])
  1471. >>> coo_array((data, (row, col)), shape=(4, 4)).toarray()
  1472. array([[4, 0, 9, 0],
  1473. [0, 7, 0, 0],
  1474. [0, 0, 0, 0],
  1475. [0, 0, 0, 5]])
  1476. >>> # Constructing a sparse array with duplicate coordinates
  1477. >>> row = np.array([0, 0, 1, 3, 1, 0, 0])
  1478. >>> col = np.array([0, 2, 1, 3, 1, 0, 0])
  1479. >>> data = np.array([1, 1, 1, 1, 1, 1, 1])
  1480. >>> coo = coo_array((data, (row, col)), shape=(4, 4))
  1481. >>> # Duplicate coordinates are maintained until implicitly or explicitly summed
  1482. >>> np.max(coo.data)
  1483. 1
  1484. >>> coo.toarray()
  1485. array([[3, 0, 1, 0],
  1486. [0, 2, 0, 0],
  1487. [0, 0, 0, 0],
  1488. [0, 0, 0, 1]])
  1489. """
  1490. class coo_matrix(spmatrix, _coo_base):
  1491. """
  1492. A sparse matrix in COOrdinate format.
  1493. Also known as the 'ijv' or 'triplet' format.
  1494. This can be instantiated in several ways:
  1495. coo_matrix(D)
  1496. where D is a 2-D ndarray
  1497. coo_matrix(S)
  1498. with another sparse array or matrix S (equivalent to S.tocoo())
  1499. coo_matrix((M, N), [dtype])
  1500. to construct an empty matrix with shape (M, N)
  1501. dtype is optional, defaulting to dtype='d'.
  1502. coo_matrix((data, (i, j)), [shape=(M, N)])
  1503. to construct from three arrays:
  1504. 1. data[:] the entries of the matrix, in any order
  1505. 2. i[:] the row indices of the matrix entries
  1506. 3. j[:] the column indices of the matrix entries
  1507. Where ``A[i[k], j[k]] = data[k]``. When shape is not
  1508. specified, it is inferred from the index arrays
  1509. Attributes
  1510. ----------
  1511. dtype : dtype
  1512. Data type of the matrix
  1513. shape : 2-tuple
  1514. Shape of the matrix
  1515. ndim : int
  1516. Number of dimensions (this is always 2)
  1517. nnz
  1518. size
  1519. data
  1520. COO format data array of the matrix
  1521. row
  1522. COO format row index array of the matrix
  1523. col
  1524. COO format column index array of the matrix
  1525. has_canonical_format : bool
  1526. Whether the matrix has sorted indices and no duplicates
  1527. format
  1528. T
  1529. Notes
  1530. -----
  1531. Sparse matrices can be used in arithmetic operations: they support
  1532. addition, subtraction, multiplication, division, and matrix power.
  1533. Advantages of the COO format
  1534. - facilitates fast conversion among sparse formats
  1535. - permits duplicate entries (see example)
  1536. - very fast conversion to and from CSR/CSC formats
  1537. Disadvantages of the COO format
  1538. - does not directly support:
  1539. + arithmetic operations
  1540. + slicing
  1541. Intended Usage
  1542. - COO is a fast format for constructing sparse matrices
  1543. - Once a COO matrix has been constructed, convert to CSR or
  1544. CSC format for fast arithmetic and matrix vector operations
  1545. - By default when converting to CSR or CSC format, duplicate (i,j)
  1546. entries will be summed together. This facilitates efficient
  1547. construction of finite element matrices and the like. (see example)
  1548. Canonical format
  1549. - Entries and coordinates sorted by row, then column.
  1550. - There are no duplicate entries (i.e. duplicate (i,j) locations)
  1551. - Data arrays MAY have explicit zeros.
  1552. Examples
  1553. --------
  1554. >>> # Constructing an empty matrix
  1555. >>> import numpy as np
  1556. >>> from scipy.sparse import coo_matrix
  1557. >>> coo_matrix((3, 4), dtype=np.int8).toarray()
  1558. array([[0, 0, 0, 0],
  1559. [0, 0, 0, 0],
  1560. [0, 0, 0, 0]], dtype=int8)
  1561. >>> # Constructing a matrix using ijv format
  1562. >>> row = np.array([0, 3, 1, 0])
  1563. >>> col = np.array([0, 3, 1, 2])
  1564. >>> data = np.array([4, 5, 7, 9])
  1565. >>> coo_matrix((data, (row, col)), shape=(4, 4)).toarray()
  1566. array([[4, 0, 9, 0],
  1567. [0, 7, 0, 0],
  1568. [0, 0, 0, 0],
  1569. [0, 0, 0, 5]])
  1570. >>> # Constructing a matrix with duplicate coordinates
  1571. >>> row = np.array([0, 0, 1, 3, 1, 0, 0])
  1572. >>> col = np.array([0, 2, 1, 3, 1, 0, 0])
  1573. >>> data = np.array([1, 1, 1, 1, 1, 1, 1])
  1574. >>> coo = coo_matrix((data, (row, col)), shape=(4, 4))
  1575. >>> # Duplicate coordinates are maintained until implicitly or explicitly summed
  1576. >>> np.max(coo.data)
  1577. 1
  1578. >>> coo.toarray()
  1579. array([[3, 0, 1, 0],
  1580. [0, 2, 0, 0],
  1581. [0, 0, 0, 0],
  1582. [0, 0, 0, 1]])
  1583. """
  1584. def __setstate__(self, state):
  1585. if 'coords' not in state:
  1586. # For retro-compatibility with the previous attributes
  1587. # storing nnz coordinates for 2D COO matrix.
  1588. state['coords'] = (state.pop('row'), state.pop('col'))
  1589. self.__dict__.update(state)
  1590. def __getitem__(self, key):
  1591. raise TypeError("'coo_matrix' object is not subscriptable")
  1592. def __setitem__(self, key, x):
  1593. raise TypeError("'coo_matrix' object does not support item assignment")