| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890 |
- """ A sparse matrix in COOrdinate or 'triplet' format"""
- __docformat__ = "restructuredtext en"
- __all__ = ['coo_array', 'coo_matrix', 'isspmatrix_coo']
- import math
- from warnings import warn
- import numpy as np
- from .._lib._util import copy_if_needed
- from ._matrix import spmatrix
- from ._sparsetools import (coo_tocsr, coo_todense, coo_todense_nd,
- coo_matvec, coo_matvec_nd, coo_matmat_dense,
- coo_matmat_dense_nd)
- from ._base import issparse, SparseEfficiencyWarning, _spbase, sparray
- from ._data import _data_matrix, _minmax_mixin
- from ._sputils import (upcast_char, to_native, isshape, getdtype,
- getdata, downcast_intp_index, get_index_dtype,
- check_shape, check_reshape_kwargs, isscalarlike,
- isintlike, isdense)
- from ._index import _validate_indices, _broadcast_arrays
- import operator
- class _coo_base(_data_matrix, _minmax_mixin):
- _format = 'coo'
- _allow_nd = range(1, 65)
- def __init__(self, arg1, shape=None, dtype=None, copy=False, *, maxprint=None):
- _data_matrix.__init__(self, arg1, maxprint=maxprint)
- if not copy:
- copy = copy_if_needed
- if isinstance(arg1, tuple):
- if isshape(arg1, allow_nd=self._allow_nd):
- self._shape = check_shape(arg1, allow_nd=self._allow_nd)
- idx_dtype = self._get_index_dtype(maxval=max(self._shape))
- data_dtype = getdtype(dtype, default=float)
- self.coords = tuple(np.array([], dtype=idx_dtype)
- for _ in range(len(self._shape)))
- self.data = np.array([], dtype=data_dtype)
- self.has_canonical_format = True
- else:
- try:
- obj, coords = arg1
- except (TypeError, ValueError) as e:
- raise TypeError('invalid input format') from e
- if shape is None:
- if any(len(idx) == 0 for idx in coords):
- raise ValueError('cannot infer dimensions from zero '
- 'sized index arrays')
- shape = tuple(operator.index(np.max(idx)) + 1
- for idx in coords)
- self._shape = check_shape(shape, allow_nd=self._allow_nd)
- idx_dtype = self._get_index_dtype(coords,
- maxval=max(self.shape),
- check_contents=True)
- self.coords = tuple(np.array(idx, copy=copy, dtype=idx_dtype)
- for idx in coords)
- self.data = getdata(obj, copy=copy, dtype=dtype)
- self.has_canonical_format = False
- else:
- if issparse(arg1):
- if arg1.format == self.format and copy:
- self.coords = tuple(idx.copy() for idx in arg1.coords)
- self.data = arg1.data.astype(getdtype(dtype, arg1)) # copy=True
- self._shape = check_shape(arg1.shape, allow_nd=self._allow_nd)
- self.has_canonical_format = arg1.has_canonical_format
- else:
- coo = arg1.tocoo(copy=copy)
- self.coords = tuple(coo.coords)
- self.data = coo.data.astype(getdtype(dtype, coo), copy=False)
- self._shape = check_shape(coo.shape, allow_nd=self._allow_nd)
- self.has_canonical_format = False
- else:
- # dense argument
- M = np.asarray(arg1)
- if not isinstance(self, sparray):
- M = np.atleast_2d(M)
- if M.ndim != 2:
- raise TypeError(f'expected 2D array or matrix, not {M.ndim}D')
- self._shape = check_shape(M.shape, allow_nd=self._allow_nd)
- if shape is not None:
- if check_shape(shape, allow_nd=self._allow_nd) != self._shape:
- message = f'inconsistent shapes: {shape} != {self._shape}'
- raise ValueError(message)
- index_dtype = self._get_index_dtype(maxval=max(self._shape))
- coords = M.nonzero()
- self.coords = tuple(idx.astype(index_dtype, copy=False)
- for idx in coords)
- self.data = getdata(M[coords], copy=copy, dtype=dtype)
- self.has_canonical_format = True
- if len(self._shape) > 2:
- self.coords = tuple(idx.astype(np.int64, copy=False) for idx in self.coords)
- self._check()
- @property
- def row(self):
- if self.ndim > 1:
- return self.coords[-2]
- result = np.zeros_like(self.col)
- result.setflags(write=False)
- return result
- @row.setter
- def row(self, new_row):
- if self.ndim < 2:
- raise ValueError('cannot set row attribute of a 1-dimensional sparse array')
- new_row = np.asarray(new_row, dtype=self.coords[-2].dtype)
- self.coords = self.coords[:-2] + (new_row,) + self.coords[-1:]
- @property
- def col(self):
- return self.coords[-1]
- @col.setter
- def col(self, new_col):
- new_col = np.asarray(new_col, dtype=self.coords[-1].dtype)
- self.coords = self.coords[:-1] + (new_col,)
- def reshape(self, *args, **kwargs):
- shape = check_shape(args, self.shape, allow_nd=self._allow_nd)
- order, copy = check_reshape_kwargs(kwargs)
- # Return early if reshape is not required
- if shape == self.shape:
- if copy:
- return self.copy()
- else:
- return self
- # When reducing the number of dimensions, we need to be careful about
- # index overflow. This is why we can't simply call
- # `np.ravel_multi_index()` followed by `np.unravel_index()` here.
- flat_coords = _ravel_coords(self.coords, self.shape, order=order)
- if len(shape) == 2:
- if order == 'C':
- new_coords = divmod(flat_coords, shape[1])
- else:
- new_coords = divmod(flat_coords, shape[0])[::-1]
- else:
- new_coords = np.unravel_index(flat_coords, shape, order=order)
- idx_dtype = self._get_index_dtype(self.coords, maxval=max(shape))
- new_coords = tuple(np.asarray(co, dtype=idx_dtype) for co in new_coords)
- # Handle copy here rather than passing on to the constructor so that no
- # copy will be made of `new_coords` regardless.
- if copy:
- new_data = self.data.copy()
- else:
- new_data = self.data
- return self.__class__((new_data, new_coords), shape=shape, copy=False)
- reshape.__doc__ = _spbase.reshape.__doc__
- def _getnnz(self, axis=None):
- if axis is None or (axis == 0 and self.ndim == 1):
- nnz = len(self.data)
- if any(len(idx) != nnz for idx in self.coords):
- raise ValueError('all index and data arrays must have the '
- 'same length')
- if self.data.ndim != 1 or any(idx.ndim != 1 for idx in self.coords):
- raise ValueError('coordinates and data arrays must be 1-D')
- return int(nnz)
- if axis < 0:
- axis += self.ndim
- if axis >= self.ndim:
- raise ValueError('axis out of bounds')
- return np.bincount(downcast_intp_index(self.coords[1 - axis]),
- minlength=self.shape[1 - axis])
- _getnnz.__doc__ = _spbase._getnnz.__doc__
- def count_nonzero(self, axis=None):
- self.sum_duplicates()
- if axis is None:
- return np.count_nonzero(self.data)
- if axis < 0:
- axis += self.ndim
- if axis < 0 or axis >= self.ndim:
- raise ValueError('axis out of bounds')
- mask = self.data != 0
- coord = self.coords[1 - axis][mask]
- return np.bincount(downcast_intp_index(coord), minlength=self.shape[1 - axis])
- count_nonzero.__doc__ = _spbase.count_nonzero.__doc__
- def _check(self):
- """ Checks data structure for consistency """
- if self.ndim != len(self.coords):
- raise ValueError('mismatching number of index arrays for shape; '
- f'got {len(self.coords)}, expected {self.ndim}')
- # index arrays should have integer data types
- for i, idx in enumerate(self.coords):
- if idx.dtype.kind != 'i':
- warn(f'index array {i} has non-integer dtype ({idx.dtype.name})',
- stacklevel=3)
- idx_dtype = self._get_index_dtype(self.coords, maxval=max(self.shape))
- self.coords = tuple(np.asarray(idx, dtype=idx_dtype)
- for idx in self.coords)
- self.data = to_native(self.data)
- if self.nnz > 0:
- for i, idx in enumerate(self.coords):
- if idx.max() >= self.shape[i]:
- raise ValueError(f'axis {i} index {idx.max()} exceeds '
- f'matrix dimension {self.shape[i]}')
- if idx.min() < 0:
- raise ValueError(f'negative axis {i} index: {idx.min()}')
- def transpose(self, axes=None, copy=False):
- if axes is None:
- axes = range(self.ndim)[::-1]
- elif isinstance(self, sparray):
- if not hasattr(axes, "__len__") or len(axes) != self.ndim:
- raise ValueError("axes don't match matrix dimensions")
- if len(set(axes)) != self.ndim:
- raise ValueError("repeated axis in transpose")
- elif axes != (1, 0):
- raise ValueError("Sparse matrices do not support an 'axes' "
- "parameter because swapping dimensions is the "
- "only logical permutation.")
- permuted_shape = tuple(self._shape[i] for i in axes)
- permuted_coords = tuple(self.coords[i] for i in axes)
- return self.__class__((self.data, permuted_coords),
- shape=permuted_shape, copy=copy)
- transpose.__doc__ = _spbase.transpose.__doc__
- def resize(self, *shape) -> None:
- shape = check_shape(shape, allow_nd=self._allow_nd)
- if self.ndim > 2:
- raise ValueError("only 1-D or 2-D input accepted")
- if len(shape) > 2:
- raise ValueError("shape argument must be 1-D or 2-D")
- # Check for added dimensions.
- if len(shape) > self.ndim:
- flat_coords = _ravel_coords(self.coords, self.shape)
- max_size = math.prod(shape)
- self.coords = np.unravel_index(flat_coords[:max_size], shape)
- self.data = self.data[:max_size]
- self._shape = shape
- return
- # Check for removed dimensions.
- if len(shape) < self.ndim:
- tmp_shape = (
- self._shape[:len(shape) - 1] # Original shape without last axis
- + (-1,) # Last axis is used to flatten the array
- + (1,) * (self.ndim - len(shape)) # Pad with ones
- )
- tmp = self.reshape(tmp_shape)
- self.coords = tmp.coords[:len(shape)]
- self._shape = tmp.shape[:len(shape)]
- # Handle truncation of existing dimensions.
- is_truncating = any(old > new for old, new in zip(self.shape, shape))
- if is_truncating:
- mask = np.logical_and.reduce([
- idx < size for idx, size in zip(self.coords, shape)
- ])
- if not mask.all():
- self.coords = tuple(idx[mask] for idx in self.coords)
- self.data = self.data[mask]
- self._shape = shape
- resize.__doc__ = _spbase.resize.__doc__
- def toarray(self, order=None, out=None):
- B = self._process_toarray_args(order, out)
- fortran = int(B.flags.f_contiguous)
- if not fortran and not B.flags.c_contiguous:
- raise ValueError("Output array must be C or F contiguous")
- # This handles both 0D and 1D cases correctly regardless of the
- # original shape.
- if self.ndim == 1:
- coo_todense_nd(np.array([1]), self.nnz, self.ndim,
- self.coords[0], self.data, B.ravel('A'), fortran)
- elif self.ndim == 2:
- M, N = self.shape
- coo_todense(M, N, self.nnz, self.row, self.col, self.data,
- B.ravel('A'), fortran)
- else: # dim>2
- if fortran:
- strides = np.append(1, np.cumprod(self.shape[:-1]))
- else:
- strides = np.append(np.cumprod(self.shape[1:][::-1])[::-1], 1)
- coords = np.concatenate(self.coords)
- coo_todense_nd(strides, self.nnz, self.ndim,
- coords, self.data, B.ravel('A'), fortran)
- # Note: reshape() doesn't copy here, but does return a new array (view).
- return B.reshape(self.shape)
- toarray.__doc__ = _spbase.toarray.__doc__
- def tocsc(self, copy=False):
- """Convert this array/matrix to Compressed Sparse Column format
- Duplicate entries will be summed together.
- Examples
- --------
- >>> from numpy import array
- >>> from scipy.sparse import coo_array
- >>> row = array([0, 0, 1, 3, 1, 0, 0])
- >>> col = array([0, 2, 1, 3, 1, 0, 0])
- >>> data = array([1, 1, 1, 1, 1, 1, 1])
- >>> A = coo_array((data, (row, col)), shape=(4, 4)).tocsc()
- >>> A.toarray()
- array([[3, 0, 1, 0],
- [0, 2, 0, 0],
- [0, 0, 0, 0],
- [0, 0, 0, 1]])
- """
- if self.ndim != 2:
- raise ValueError(f'Cannot convert. CSC format must be 2D. Got {self.ndim}D')
- if self.nnz == 0:
- return self._csc_container(self.shape, dtype=self.dtype)
- else:
- from ._csc import csc_array
- indptr, indices, data, shape = self._coo_to_compressed(csc_array._swap)
- x = self._csc_container((data, indices, indptr), shape=shape)
- if not self.has_canonical_format:
- x.sum_duplicates()
- return x
- def tocsr(self, copy=False):
- """Convert this array/matrix to Compressed Sparse Row format
- Duplicate entries will be summed together.
- Examples
- --------
- >>> from numpy import array
- >>> from scipy.sparse import coo_array
- >>> row = array([0, 0, 1, 3, 1, 0, 0])
- >>> col = array([0, 2, 1, 3, 1, 0, 0])
- >>> data = array([1, 1, 1, 1, 1, 1, 1])
- >>> A = coo_array((data, (row, col)), shape=(4, 4)).tocsr()
- >>> A.toarray()
- array([[3, 0, 1, 0],
- [0, 2, 0, 0],
- [0, 0, 0, 0],
- [0, 0, 0, 1]])
- """
- if self.ndim > 2:
- raise ValueError(f'Cannot convert. CSR must be 1D or 2D. Got {self.ndim}D')
- if self.nnz == 0:
- return self._csr_container(self.shape, dtype=self.dtype)
- else:
- from ._csr import csr_array
- arrays = self._coo_to_compressed(csr_array._swap, copy=copy)
- indptr, indices, data, shape = arrays
- x = self._csr_container((data, indices, indptr), shape=self.shape)
- if not self.has_canonical_format:
- x.sum_duplicates()
- return x
- def _coo_to_compressed(self, swap, copy=False):
- """convert (shape, coords, data) to (indptr, indices, data, shape)"""
- M, N = swap(self._shape_as_2d)
- # convert idx_dtype intc to int32 for pythran.
- # tested in scipy/optimize/tests/test__numdiff.py::test_group_columns
- idx_dtype = self._get_index_dtype(self.coords, maxval=max(self.nnz, N))
- if self.ndim == 1:
- indices = self.coords[0].copy() if copy else self.coords[0]
- nnz = len(indices)
- indptr = np.array([0, nnz], dtype=idx_dtype)
- data = self.data.copy() if copy else self.data
- return indptr, indices, data, self.shape
- # ndim == 2
- major, minor = swap(self.coords)
- nnz = len(major)
- major = major.astype(idx_dtype, copy=False)
- minor = minor.astype(idx_dtype, copy=False)
- indptr = np.empty(M + 1, dtype=idx_dtype)
- indices = np.empty_like(minor, dtype=idx_dtype)
- data = np.empty_like(self.data, dtype=self.dtype)
- coo_tocsr(M, N, nnz, major, minor, self.data, indptr, indices, data)
- return indptr, indices, data, self.shape
- def tocoo(self, copy=False):
- if copy:
- return self.copy()
- else:
- return self
- tocoo.__doc__ = _spbase.tocoo.__doc__
- def todia(self, copy=False):
- if self.ndim != 2:
- raise ValueError(f'Cannot convert. DIA format must be 2D. Got {self.ndim}D')
- self.sum_duplicates()
- ks = self.col - self.row # the diagonal for each nonzero
- diags, diag_idx = np.unique(ks, return_inverse=True)
- if len(diags) > 100:
- # probably undesired, should todia() have a maxdiags parameter?
- warn(f"Constructing a DIA matrix with {len(diags)} diagonals "
- "is inefficient",
- SparseEfficiencyWarning, stacklevel=2)
- #initialize and fill in data array
- if self.data.size == 0:
- data = np.zeros((0, 0), dtype=self.dtype)
- else:
- data = np.zeros((len(diags), self.col.max()+1), dtype=self.dtype)
- data[diag_idx, self.col] = self.data
- return self._dia_container((data, diags), shape=self.shape)
- todia.__doc__ = _spbase.todia.__doc__
- def todok(self, copy=False):
- if self.ndim > 2:
- raise ValueError(f'Cannot convert. DOK must be 1D or 2D. Got {self.ndim}D')
- self.sum_duplicates()
- dok = self._dok_container(self.shape, dtype=self.dtype)
- # ensure that 1d coordinates are not tuples
- if self.ndim == 1:
- coords = self.coords[0]
- else:
- coords = zip(*self.coords)
- dok._dict = dict(zip(coords, self.data))
- return dok
- todok.__doc__ = _spbase.todok.__doc__
- def diagonal(self, k=0):
- if self.ndim != 2:
- raise ValueError("diagonal requires two dimensions")
- rows, cols = self.shape
- if k <= -rows or k >= cols:
- return np.empty(0, dtype=self.data.dtype)
- diag = np.zeros(min(rows + min(k, 0), cols - max(k, 0)),
- dtype=self.dtype)
- diag_mask = (self.row + k) == self.col
- if self.has_canonical_format:
- row = self.row[diag_mask]
- data = self.data[diag_mask]
- else:
- inds = tuple(idx[diag_mask] for idx in self.coords)
- (row, _), data = self._sum_duplicates(inds, self.data[diag_mask])
- diag[row + min(k, 0)] = data
- return diag
- diagonal.__doc__ = _data_matrix.diagonal.__doc__
- def _setdiag(self, values, k):
- if self.ndim != 2:
- raise ValueError("setting a diagonal requires two dimensions")
- M, N = self.shape
- if values.ndim and not len(values):
- return
- idx_dtype = self.row.dtype
- # Determine which triples to keep and where to put the new ones.
- full_keep = self.col - self.row != k
- if k < 0:
- max_index = min(M+k, N)
- if values.ndim:
- max_index = min(max_index, len(values))
- keep = np.logical_or(full_keep, self.col >= max_index)
- new_row = np.arange(-k, -k + max_index, dtype=idx_dtype)
- new_col = np.arange(max_index, dtype=idx_dtype)
- else:
- max_index = min(M, N-k)
- if values.ndim:
- max_index = min(max_index, len(values))
- keep = np.logical_or(full_keep, self.row >= max_index)
- new_row = np.arange(max_index, dtype=idx_dtype)
- new_col = np.arange(k, k + max_index, dtype=idx_dtype)
- # Define the array of data consisting of the entries to be added.
- if values.ndim:
- new_data = values[:max_index]
- else:
- new_data = np.empty(max_index, dtype=self.dtype)
- new_data[:] = values
- # Update the internal structure.
- self.coords = (np.concatenate((self.row[keep], new_row)),
- np.concatenate((self.col[keep], new_col)))
- self.data = np.concatenate((self.data[keep], new_data))
- self.has_canonical_format = False
- # needed by _data_matrix
- def _with_data(self, data, copy=True):
- """Returns a matrix with the same sparsity structure as self,
- but with different data. By default the index arrays are copied.
- """
- if copy:
- coords = tuple(idx.copy() for idx in self.coords)
- else:
- coords = self.coords
- return self.__class__((data, coords), shape=self.shape, dtype=data.dtype)
- def __getitem__(self, key):
- index, new_shape, arr_int_pos, none_pos = _validate_indices(
- key, self.shape, self.format
- )
- # handle int, slice and int-array indices
- index_mask = np.ones(len(self.data), dtype=np.bool_)
- slice_coords = []
- arr_coords = []
- arr_indices = []
- for i, (idx, co) in enumerate(zip(index, self.coords)):
- if isinstance(idx, int):
- index_mask &= (co == idx)
- elif isinstance(idx, slice):
- if idx == slice(None):
- slice_coords.append(co)
- else:
- start, stop, step = idx.indices(self.shape[i])
- if step != 1:
- if step < 0:
- in_range = (co <= start) & (co > stop)
- else:
- in_range = (co >= start) & (co < stop)
- new_ix, m = np.divmod(co - start, step)
- index_mask &= (m == 0) & in_range
- else:
- in_range = (co >= start) & (co < stop)
- new_ix = co - start
- index_mask &= in_range
- slice_coords.append(new_ix)
- else: # array
- arr_coords.append(co)
- arr_indices.append(idx)
- # shortcut for scalar output
- if new_shape == ():
- return self.data[index_mask].sum().astype(self.dtype, copy=False)
- new_coords = [co[index_mask] for co in slice_coords]
- new_data = self.data[index_mask]
- # handle array indices
- if arr_indices:
- arr_indices = _broadcast_arrays(*arr_indices)
- arr_shape = arr_indices[0].shape # already broadcast in validate_indices
- # There are three dimensions required to check array indices against coords
- # Their lengths are described as:
- # a) number of indices that are arrays - arr_dim
- # b) number of coords to check - masked_nnz (already masked by slices)
- # c) size of the index arrays - arr_size
- # Note for this, integer indices are treated like slices, not like arrays.
- #
- # Goal:
- # Find new_coords and index positions that match across all arr_dim axes.
- # Approach: Track matches using bool array. Size: masked_nnz by arr_size.
- # True means all arr_indices match at that coord and index position.
- # Equate with broadcasting and check for all equal across (arr_dim) axis 0.
- # 1st array is "keyarr" (arr_dim by 1 by arr_size) from arr_indices.
- # 2nd array is "arr_coords" (arr_dim by masked_nnz by 1) from arr_coords.
- keyarr = np.array(arr_indices).reshape(len(arr_indices), 1, -1)
- arr_coords = np.array([co[index_mask] for co in arr_coords])[:, :, None]
- found = (keyarr == arr_coords).all(axis=0)
- arr_co, arr_ix = found.nonzero()
- new_data = new_data[arr_co]
- new_coords = [co[arr_co] for co in new_coords]
- new_arr_coords = list(np.unravel_index(arr_ix, shape=arr_shape))
- # check for contiguous positions of array and int indices
- if len(arr_int_pos) == arr_int_pos[-1] - arr_int_pos[0] + 1:
- # Contiguous. Put all array index shape at pos of array indices
- pos = arr_int_pos[0]
- new_coords = new_coords[:pos] + new_arr_coords + new_coords[pos:]
- else:
- # Not contiguous. Put all array coords at front
- new_coords = new_arr_coords + new_coords
- if none_pos:
- if new_coords:
- coord_like = np.zeros_like(new_coords[0])
- else:
- coord_like = np.zeros(len(new_data), dtype=self.coords[0].dtype)
- new_coords.insert(none_pos[0], coord_like)
- for i in none_pos[1:]:
- new_coords.insert(i, coord_like.copy())
- return coo_array((new_data, new_coords), shape=new_shape, dtype=self.dtype)
- def __setitem__(self, key, x):
- # enact self[key] = x
- index, new_shape, arr_int_pos, none_pos = _validate_indices(
- key, self.shape, self.format
- )
- # remove None's at beginning of index. Should not impact indexing coords
- # and will mistakenly align with x_coord columns if not removed.
- if none_pos:
- new_shape = list(new_shape)
- for j in none_pos[::-1]:
- new_shape.pop(j)
- new_shape = tuple(new_shape)
- # broadcast arrays
- if arr_int_pos:
- index = list(index)
- arr_pos = {i: arr for i in arr_int_pos if not isintlike(arr := index[i])}
- arr_indices = _broadcast_arrays(*arr_pos.values())
- for i, arr in zip(arr_pos, arr_indices):
- index[i] = arr
- # get coords and data from x
- if issparse(x):
- if 0 in x.shape:
- return # Nothing to set.
- x_data, x_coords = _get_sparse_data_and_coords(x, new_shape, self.dtype)
- else:
- x = np.asarray(x, dtype=self.dtype)
- if x.size == 0:
- return # Nothing to set.
- x_data, x_coords = _get_dense_data_and_coords(x, new_shape)
- # Approach:
- # Set indexed values to zero (drop from `self.coords` and `self.data`)
- # create new coords and data arrays for setting nonzeros
- # concatenate old (undropped) values with new coords and data
- old_data, old_coords = self._zero_many(index)
- if len(x_coords) == 1 and len(x_coords[0]) == 0:
- self.data, self.coords = old_data, old_coords
- # leave self.has_canonical_format unchanged
- return
- # To process array indices, need the x_coords for those axes
- # and need to ravel the array part of x_coords to build new_coords
- # Along the way, Find pos and shape of array-index portion of key.
- # arr_shape is None and pos = -1 when no arrays are used as indices.
- arr_shape = None
- pos = -1
- if arr_int_pos:
- # Get arr_shape if any arrays are in the index.
- # Also ravel the corresponding x_coords.
- for idx in index:
- if not isinstance(idx, slice) and not isintlike(idx):
- arr_shape = idx.shape
- # Find x_coord pos of integer and array portion of index.
- # If contiguous put int and array axes at pos of those indices.
- # If not contiguous, put all int and array axes at pos=0.
- if len(arr_int_pos) == (arr_int_pos[-1] - arr_int_pos[0] + 1):
- pos = arr_int_pos[0]
- else:
- pos = 0
- # compute the raveled coords of the array part of x_coords.
- # Used to build the new coords from the index arrays.
- x_arr_coo = x_coords[pos:pos + len(arr_shape)]
- # could use np.ravel_multi_index but _ravel_coords avoids overflow
- x_arr_coo_ravel = _ravel_coords(x_arr_coo, arr_shape)
- break
- # find map from x_coord slice axes to index axes
- x_ax = 0
- x_axes = {}
- for i, idx in enumerate(index):
- if i == pos:
- x_ax += len(arr_shape)
- if isinstance(idx, slice):
- x_axes[i] = x_ax
- x_ax += 1
- # Build new_coords and new_data
- new_coords = [None] * self.ndim
- new_nnz = len(x_data)
- for i, idx in enumerate(index):
- if isintlike(idx):
- new_coords[i] = (np.broadcast_to(idx, (new_nnz,)))
- continue
- elif isinstance(idx, slice):
- start, stop, step = idx.indices(self.shape[i])
- new_coords[i] = (start + x_coords[x_axes[i]] * step)
- else: # array idx
- new_coords[i] = idx.ravel()[x_arr_coo_ravel]
- # seems like a copy is prudent when setting data in this array
- new_data = x_data.copy()
- # deduplicate entries created by multiple coords matching in the array index
- # NumPy does not specify which value is put into the spot (last one assigned)
- # so only one value should appear if we want to match NumPy.
- # If matching NumPy is not crucial, we could make dups a feature where
- # integer array indices with repeated indices creates duplicate values.
- # Not doing that here. We are just removing duplicates (keep 1st data found)
- new_coords = np.array(new_coords)
- _, ind = np.unique(new_coords, axis=1, return_index=True)
- # update values with stack of old and new data and coords.
- self.data = np.hstack([old_data, new_data[ind]])
- self.coords = tuple(np.hstack(c) for c in zip(old_coords, new_coords[:, ind]))
- self.has_canonical_format = False
- def _zero_many(self, index):
- # handle int, slice and integer-array indices
- # index_mask accumulates a bool array of nonzeros that match index
- index_mask=np.ones(len(self.data), dtype=np.bool_)
- arr_coords = []
- arr_indices = []
- for i, (idx, co) in enumerate(zip(index, self.coords)):
- if isinstance(idx, int):
- index_mask &= (co == idx)
- elif isinstance(idx, slice) and idx != slice(None):
- start, stop, step = idx.indices(self.shape[i])
- if step != 1:
- if step < 0:
- in_range = (co <= start) & (co > stop)
- else:
- in_range = (co >= start) & (co < stop)
- m = np.mod(co - start, step)
- index_mask &= (m == 0) & in_range
- else:
- in_range = (co >= start) & (co < stop)
- index_mask &= in_range
- elif isinstance(idx, slice) and idx == slice(None):
- # slice is full axis so no changes to index_mask
- pass
- else: # array
- arr_coords.append(co)
- arr_indices.append(idx)
- # match array indices with masked coords. See comments in __getitem__
- if arr_indices:
- keyarr = np.array(arr_indices).reshape(len(arr_indices), 1, -1)
- arr_coords = np.array([co[index_mask] for co in arr_coords])[:, :, None]
- found = (keyarr == arr_coords).all(axis=0)
- arr_coo, _ = found.nonzero()
- arr_index_mask = np.zeros_like(index_mask)
- arr_index_mask[index_mask.nonzero()[0][arr_coo]] = True
- index_mask &= arr_index_mask
- # remove matching coords and data to set them to zero
- pruned_coords = [co[~index_mask] for co in self.coords]
- pruned_data = self.data[~index_mask]
- return pruned_data, pruned_coords
- def sum_duplicates(self) -> None:
- """Eliminate duplicate entries by adding them together
- This is an *in place* operation
- """
- if self.has_canonical_format:
- return
- summed = self._sum_duplicates(self.coords, self.data)
- self.coords, self.data = summed
- self.has_canonical_format = True
- def _sum_duplicates(self, coords, data):
- # Assumes coords not in canonical format.
- if len(data) == 0:
- return coords, data
- # Sort coords w.r.t. rows, then cols. This corresponds to C-order,
- # which we rely on for argmin/argmax to return the first index in the
- # same way that numpy does (in the case of ties).
- order = np.lexsort(coords[::-1])
- coords = tuple(idx[order] for idx in coords)
- data = data[order]
- unique_mask = np.logical_or.reduce([
- idx[1:] != idx[:-1] for idx in coords
- ])
- unique_mask = np.append(True, unique_mask)
- coords = tuple(idx[unique_mask] for idx in coords)
- unique_inds, = np.nonzero(unique_mask)
- data = np.add.reduceat(data, downcast_intp_index(unique_inds), dtype=self.dtype)
- return coords, data
- def eliminate_zeros(self):
- """Remove zero entries from the array/matrix
- This is an *in place* operation
- """
- mask = self.data != 0
- self.data = self.data[mask]
- self.coords = tuple(idx[mask] for idx in self.coords)
- #######################
- # Arithmetic handlers #
- #######################
- def _add_dense(self, other):
- if other.shape != self.shape:
- raise ValueError(f'Incompatible shapes ({self.shape} and {other.shape})')
- dtype = upcast_char(self.dtype.char, other.dtype.char)
- result = np.array(other, dtype=dtype, copy=True)
- fortran = int(result.flags.f_contiguous)
- if self.ndim == 1:
- coo_todense_nd(np.array([1]), self.nnz, self.ndim,
- self.coords[0], self.data, result.ravel('A'), fortran)
- elif self.ndim == 2:
- M, N = self._shape_as_2d
- coo_todense(M, N, self.nnz, self.row, self.col, self.data,
- result.ravel('A'), fortran)
- else:
- if fortran:
- strides = np.append(1, np.cumprod(self.shape[:-1]))
- else:
- strides = np.append(np.cumprod(self.shape[1:][::-1])[::-1], 1)
- coords = np.concatenate(self.coords)
- coo_todense_nd(strides, self.nnz, self.ndim,
- coords, self.data, result.ravel('A'), fortran)
- return self._container(result, copy=False)
- def _add_sparse(self, other):
- if self.ndim < 3:
- return self.tocsr()._add_sparse(other)
- if other.shape != self.shape:
- raise ValueError(f'Incompatible shapes ({self.shape} and {other.shape})')
- other = self.__class__(other)
- new_data = np.concatenate((self.data, other.data))
- new_coords = tuple(np.concatenate((self.coords, other.coords), axis=1))
- A = self.__class__((new_data, new_coords), shape=self.shape)
- return A
- def _sub_sparse(self, other):
- if self.ndim < 3:
- return self.tocsr()._sub_sparse(other)
- if other.shape != self.shape:
- raise ValueError(f'Incompatible shapes ({self.shape} and {other.shape})')
- other = self.__class__(other)
- new_data = np.concatenate((self.data, -other.data))
- new_coords = tuple(np.concatenate((self.coords, other.coords), axis=1))
- A = coo_array((new_data, new_coords), shape=self.shape)
- return A
- def _matmul_vector(self, other):
- if self.ndim > 2:
- result = np.zeros(math.prod(self.shape[:-1]),
- dtype=upcast_char(self.dtype.char, other.dtype.char))
- shape = np.array(self.shape)
- strides = np.append(np.cumprod(shape[:-1][::-1])[::-1][1:], 1)
- coords = np.concatenate(self.coords)
- coo_matvec_nd(self.nnz, len(self.shape), strides, coords, self.data,
- other, result)
- result = result.reshape(self.shape[:-1])
- return result
- # self.ndim <= 2
- result_shape = self.shape[0] if self.ndim > 1 else 1
- result = np.zeros(result_shape,
- dtype=upcast_char(self.dtype.char, other.dtype.char))
- if self.ndim == 2:
- col = self.col
- row = self.row
- elif self.ndim == 1:
- col = self.coords[0]
- row = np.zeros_like(col)
- else:
- raise NotImplementedError(
- f"coo_matvec not implemented for ndim={self.ndim}")
- coo_matvec(self.nnz, row, col, self.data, other, result)
- # Array semantics return a scalar here, not a single-element array.
- if isinstance(self, sparray) and result_shape == 1:
- return result[0]
- return result
- def _rmatmul_dispatch(self, other):
- if isscalarlike(other):
- return self._mul_scalar(other)
- else:
- # Don't use asarray unless we have to
- try:
- o_ndim = other.ndim
- except AttributeError:
- other = np.asarray(other)
- o_ndim = other.ndim
- perm = tuple(range(o_ndim)[:-2]) + tuple(range(o_ndim)[-2:][::-1])
- tr = other.transpose(perm)
- s_ndim = self.ndim
- perm = tuple(range(s_ndim)[:-2]) + tuple(range(s_ndim)[-2:][::-1])
- ret = self.transpose(perm)._matmul_dispatch(tr)
- if ret is NotImplemented:
- return NotImplemented
- if s_ndim == 1 or o_ndim == 1:
- perm = range(ret.ndim)
- else:
- perm = tuple(range(ret.ndim)[:-2]) + tuple(range(ret.ndim)[-2:][::-1])
- return ret.transpose(perm)
- def _matmul_dispatch(self, other):
- if isscalarlike(other):
- return self.multiply(other)
- if not (issparse(other) or isdense(other)):
- # If it's a list or whatever, treat it like an array
- other_a = np.asanyarray(other)
- if other_a.ndim == 0 and other_a.dtype == np.object_:
- # Not interpretable as an array; return NotImplemented so that
- # other's __rmatmul__ can kick in if that's implemented.
- return NotImplemented
- # Allow custom sparse class indicated by attr sparse gh-6520
- try:
- other.shape
- except AttributeError:
- other = other_a
- if self.ndim < 3 and other.ndim < 3:
- return _spbase._matmul_dispatch(self, other)
- N = self.shape[-1]
- err_prefix = "matmul: dimension mismatch with signature"
- if other.__class__ is np.ndarray:
- if other.shape == (N,):
- return self._matmul_vector(other)
- if other.shape == (N, 1):
- result = self._matmul_vector(other.ravel())
- return result.reshape(*self.shape[:-1], 1)
- if other.ndim == 1:
- msg = f"{err_prefix} (n,k={N}),(k={other.shape[0]},)->(n,)"
- raise ValueError(msg)
- if other.shape[-2] == N:
- # check for batch dimensions compatibility
- batch_shape_A = self.shape[:-2]
- batch_shape_B = other.shape[:-2]
- if batch_shape_A != batch_shape_B:
- try:
- # This will raise an error if the shapes are not broadcastable
- np.broadcast_shapes(batch_shape_A, batch_shape_B)
- except ValueError:
- raise ValueError("Batch dimensions are not broadcastable")
- return self._matmul_multivector(other)
- else:
- raise ValueError(
- f"{err_prefix} (n,..,k={N}),(k={other.shape[-2]},..,m)->(n,..,m)"
- )
- if isscalarlike(other):
- # scalar value
- return self._mul_scalar(other)
- if issparse(other):
- self_is_1d = self.ndim == 1
- other_is_1d = other.ndim == 1
- # reshape to 2-D if self or other is 1-D
- if self_is_1d:
- self = self.reshape(self._shape_as_2d) # prepend 1 to shape
- if other_is_1d:
- other = other.reshape((other.shape[0], 1)) # append 1 to shape
- # Check if the inner dimensions match for matrix multiplication
- if N != other.shape[-2]:
- raise ValueError(
- f"{err_prefix} (n,..,k={N}),(k={other.shape[-2]},..,m)->(n,..,m)"
- )
- # If A or B has more than 2 dimensions, check for
- # batch dimensions compatibility
- if self.ndim > 2 or other.ndim > 2:
- batch_shape_A = self.shape[:-2]
- batch_shape_B = other.shape[:-2]
- if batch_shape_A != batch_shape_B:
- try:
- # This will raise an error if the shapes are not broadcastable
- np.broadcast_shapes(batch_shape_A, batch_shape_B)
- except ValueError:
- raise ValueError("Batch dimensions are not broadcastable")
- result = self._matmul_sparse(other)
- # reshape back if a or b were originally 1-D
- if self_is_1d:
- # if self was originally 1-D, reshape result accordingly
- result = result.reshape(tuple(result.shape[:-2]) +
- tuple(result.shape[-1:]))
- if other_is_1d:
- result = result.reshape(result.shape[:-1])
- return result
- def _matmul_multivector(self, other):
- result_dtype = upcast_char(self.dtype.char, other.dtype.char)
- if self.ndim >= 3 or other.ndim >= 3:
- # if self has shape (N,), reshape to (1,N)
- if self.ndim == 1:
- result = self.reshape(1, self.shape[0])._matmul_multivector(other)
- return result.reshape(tuple(other.shape[:-2]) + tuple(other.shape[-1:]))
- broadcast_shape = np.broadcast_shapes(self.shape[:-2], other.shape[:-2])
- self_shape = broadcast_shape + self.shape[-2:]
- other_shape = broadcast_shape + other.shape[-2:]
- self = self._broadcast_to(self_shape)
- other = np.broadcast_to(other, other_shape)
- result_shape = broadcast_shape + self.shape[-2:-1] + other.shape[-1:]
- result = np.zeros(result_shape, dtype=result_dtype)
- coo_matmat_dense_nd(self.nnz, len(self.shape), other.shape[-1],
- np.array(other_shape), np.array(result_shape),
- np.concatenate(self.coords),
- self.data, other.ravel('C'), result)
- return result
- if self.ndim == 2:
- result_shape = (self.shape[0], other.shape[1])
- col = self.col
- row = self.row
- elif self.ndim == 1:
- result_shape = (other.shape[1],)
- col = self.coords[0]
- row = np.zeros_like(col)
- result = np.zeros(result_shape, dtype=result_dtype)
- coo_matmat_dense(self.nnz, other.shape[-1], row, col,
- self.data, other.ravel('C'), result)
- return result.view(type=type(other))
- def dot(self, other):
- """Return the dot product of two arrays.
- Strictly speaking a dot product involves two vectors.
- But in the sense that an array with ndim >= 1 is a collection
- of vectors, the function computes the collection of dot products
- between each vector in the first array with each vector in the
- second array. The axis upon which the sum of products is performed
- is the last axis of the first array and the second to last axis of
- the second array. If the second array is 1-D, the last axis is used.
- Thus, if both arrays are 1-D, the inner product is returned.
- If both are 2-D, we have matrix multiplication. If `other` is 1-D,
- the sum product is taken along the last axis of each array. If
- `other` is N-D for N>=2, the sum product is over the last axis of
- the first array and the second-to-last axis of the second array.
- Parameters
- ----------
- other : array_like (dense or sparse)
- Second array
- Returns
- -------
- output : array (sparse or dense)
- The dot product of this array with `other`.
- It will be dense/sparse if `other` is dense/sparse.
- Examples
- --------
- >>> import numpy as np
- >>> from scipy.sparse import coo_array
- >>> A = coo_array([[1, 2, 0], [0, 0, 3], [4, 0, 5]])
- >>> v = np.array([1, 0, -1])
- >>> A.dot(v)
- array([ 1, -3, -1], dtype=int64)
- For 2-D arrays it is the matrix product:
- >>> A = coo_array([[1, 0], [0, 1]])
- >>> B = coo_array([[4, 1], [2, 2]])
- >>> A.dot(B).toarray()
- array([[4, 1],
- [2, 2]])
- For 3-D arrays the shape extends unused axes by other unused axes.
- >>> A = coo_array(np.arange(3*4*5*6)).reshape((3,4,5,6))
- >>> B = coo_array(np.arange(3*4*5*6)).reshape((5,4,6,3))
- >>> A.dot(B).shape
- (3, 4, 5, 5, 4, 3)
- """
- # handle non-array input: lists, ints, etc
- if not (issparse(other) or isdense(other) or isscalarlike(other)):
- # If it's a list or whatever, treat it like an array
- o_array = np.asanyarray(other)
- if o_array.ndim == 0 and o_array.dtype == np.object_:
- raise TypeError(f"dot argument not supported type: '{type(other)}'")
- try:
- other.shape
- except AttributeError:
- other = o_array
- # Handle scalar multiplication
- if isscalarlike(other):
- return self * other
- # other.shape[-2:][0] gets last index of 1d, next to last index of >1d
- if self.shape[-1] != other.shape[-2:][0]:
- raise ValueError(f"shapes {self.shape} and {other.shape}"
- " are not aligned for n-D dot")
- if self.ndim < 3 and other.ndim < 3:
- return self @ other
- if isdense(other):
- return self._dense_dot(other)
- return self._sparse_dot(other.tocoo())
- def _sparse_dot(self, other):
- # already checked: at least one is >2d, neither scalar, both are coo
- # Ravel non-reduced axes coordinates
- self_2d, s_new_shape = _convert_to_2d(self, [self.ndim - 1])
- other_2d, o_new_shape = _convert_to_2d(other, [max(0, other.ndim - 2)])
- prod = self_2d @ other_2d.T # routes via 2-D CSR
- prod = prod.tocoo()
- # Combine the shapes of the non-contracted axes
- combined_shape = s_new_shape + o_new_shape
- # Unravel the 2D coordinates to get multi-dimensional coordinates
- coords = []
- new_shapes = (s_new_shape, o_new_shape) if s_new_shape else (o_new_shape,)
- for c, s in zip(prod.coords, new_shapes):
- coords.extend(np.unravel_index(c, s))
- # Construct the resulting COO array with coords and shape
- return coo_array((prod.data, coords), shape=combined_shape)
- def _dense_dot(self, other):
- # already checked: self is >0d, other is dense and >0d
- # Ravel non-reduced axes coordinates
- s_ndim = self.ndim
- if s_ndim <= 2:
- s_new_shape = () if s_ndim == 1 else (self.shape[0],)
- self_2d = self
- else:
- self_2d, s_new_shape = _convert_to_2d(self, [self.ndim - 1])
- o_ndim = other.ndim
- if o_ndim <= 2:
- o_new_shape = () if o_ndim == 1 else (other.shape[-1],)
- other_2d = other
- else:
- o_new_shape = other.shape[:-2] + other.shape[-1:]
- reorder_dims = (o_ndim - 2, *range(o_ndim - 2), o_ndim - 1)
- o_reorg = np.transpose(other, reorder_dims)
- other_2d = o_reorg.reshape((other.shape[-2], math.prod(o_new_shape)))
- prod = self_2d @ other_2d # routes via 2-D CSR
- # Combine the shapes of the non-contracted axes
- combined_shape = s_new_shape + o_new_shape
- return prod.reshape(combined_shape)
- def tensordot(self, other, axes=2):
- """Return the tensordot product with another array along the given axes.
- The tensordot differs from dot and matmul in that any axis can be
- chosen for each of the first and second array and the sum of the
- products is computed just like for matrix multiplication, only not
- just for the rows of the first times the columns of the second. It
- takes the dot product of the collection of vectors along the specified
- axes. Here we can even take the sum of the products along two or even
- more axes if desired. So, tensordot is a dot product computation
- applied to arrays of any dimension >= 1. It is like matmul but over
- arbitrary axes for each matrix.
- Given two tensors, `a` and `b`, and the desired axes specified as a
- 2-tuple/list/array containing two sequences of axis numbers,
- ``(a_axes, b_axes)``, sum the products of `a`'s and `b`'s elements
- (components) over the axes specified by ``a_axes`` and ``b_axes``.
- The `axes` input can be a single non-negative integer, ``N``;
- if it is, then the last ``N`` dimensions of `a` and the first
- ``N`` dimensions of `b` are summed over.
- Parameters
- ----------
- a, b : array_like
- Tensors to "dot".
- axes : int or (2,) array_like
- * integer_like
- If an int N, sum over the last N axes of `a` and the first N axes
- of `b` in order. The sizes of the corresponding axes must match.
- * (2,) array_like
- A 2-tuple of sequences of axes to be summed over, the first applying
- to `a`, the second to `b`. The sequences must be the same length.
- The shape of the corresponding axes must match between `a` and `b`.
- Returns
- -------
- output : coo_array
- The tensor dot product of this array with `other`.
- It will be dense/sparse if `other` is dense/sparse.
- See Also
- --------
- dot
- Examples
- --------
- >>> import numpy as np
- >>> import scipy.sparse
- >>> A = scipy.sparse.coo_array([[[2, 3], [0, 0]], [[0, 1], [0, 5]]])
- >>> A.shape
- (2, 2, 2)
- Integer axes N are shorthand for (range(-N, 0), range(0, N)):
- >>> A.tensordot(A, axes=1).toarray()
- array([[[[ 4, 9],
- [ 0, 15]],
- <BLANKLINE>
- [[ 0, 0],
- [ 0, 0]]],
- <BLANKLINE>
- <BLANKLINE>
- [[[ 0, 1],
- [ 0, 5]],
- <BLANKLINE>
- [[ 0, 5],
- [ 0, 25]]]])
- >>> A.tensordot(A, axes=2).toarray()
- array([[ 4, 6],
- [ 0, 25]])
- >>> A.tensordot(A, axes=3)
- array(39)
- Using tuple for axes:
- >>> a = scipy.sparse.coo_array(np.arange(60).reshape(3,4,5))
- >>> b = np.arange(24).reshape(4,3,2)
- >>> c = a.tensordot(b, axes=([1,0],[0,1]))
- >>> c.shape
- (5, 2)
- >>> c
- array([[4400, 4730],
- [4532, 4874],
- [4664, 5018],
- [4796, 5162],
- [4928, 5306]])
- """
- if not isdense(other) and not issparse(other):
- # If it's a list or whatever, treat it like an array
- other_array = np.asanyarray(other)
- if other_array.ndim == 0 and other_array.dtype == np.object_:
- raise TypeError(f"tensordot arg not supported type: '{type(other)}'")
- try:
- other.shape
- except AttributeError:
- other = other_array
- axes_self, axes_other = _process_axes(self.ndim, other.ndim, axes)
- # Check for shape compatibility along specified axes
- if any(self.shape[ax] != other.shape[bx]
- for ax, bx in zip(axes_self, axes_other)):
- raise ValueError("sizes of the corresponding axes must match")
- if isdense(other):
- return self._dense_tensordot(other, axes_self, axes_other)
- else:
- return self._sparse_tensordot(other, axes_self, axes_other)
- def _sparse_tensordot(self, other, s_axes, o_axes):
- # Prepare the tensors for tensordot operation
- # Ravel non-reduced axes coordinates
- self_2d, s_new_shape = _convert_to_2d(self, s_axes)
- other_2d, o_new_shape = _convert_to_2d(other, o_axes)
- # Perform matrix multiplication (routed via 2-D CSR)
- prod = self_2d @ other_2d.T
- # handle case of scalar result (axis includes all axes for both)
- if not issparse(prod):
- return prod
- prod = prod.tocoo()
- # Combine the shapes of the non-contracted axes
- combined_shape = s_new_shape + o_new_shape
- # Unravel the 2D coordinates to get multi-dimensional coordinates
- coords = []
- new_shapes = (s_new_shape, o_new_shape) if s_new_shape else (o_new_shape,)
- for c, s in zip(prod.coords, new_shapes):
- if s:
- coords.extend(np.unravel_index(c, s))
- # Construct the resulting COO array with coords and shape
- return coo_array((prod.data, coords), shape=combined_shape)
- def _dense_tensordot(self, other, s_axes, o_axes):
- s_ndim = len(self.shape)
- o_ndim = len(other.shape)
- s_non_axes = [i for i in range(s_ndim) if i not in s_axes]
- s_axes_shape = [self.shape[i] for i in s_axes]
- s_non_axes_shape = [self.shape[i] for i in s_non_axes]
- o_non_axes = [i for i in range(o_ndim) if i not in o_axes]
- o_axes_shape = [other.shape[i] for i in o_axes]
- o_non_axes_shape = [other.shape[i] for i in o_non_axes]
- left = self.transpose(s_non_axes + s_axes)
- right = np.transpose(other, o_non_axes[:-1] + o_axes + o_non_axes[-1:])
- reshape_left = (*s_non_axes_shape, math.prod(s_axes_shape))
- reshape_right = (*o_non_axes_shape[:-1], math.prod(o_axes_shape),
- *o_non_axes_shape[-1:])
- return left.reshape(reshape_left).dot(right.reshape(reshape_right))
- def _matmul_sparse(self, other):
- """
- Perform sparse-sparse matrix multiplication for two n-D COO arrays.
- The method converts input n-D arrays to 2-D block array format,
- uses csr_matmat to multiply them, and then converts the
- result back to n-D COO array.
- Parameters:
- self (COO): The first n-D sparse array in COO format.
- other (COO): The second n-D sparse array in COO format.
- Returns:
- prod (COO): The resulting n-D sparse array after multiplication.
- """
- if self.ndim < 3 and other.ndim < 3:
- return _spbase._matmul_sparse(self, other)
- # Get the shapes of self and other
- self_shape = self.shape
- other_shape = other.shape
- # Determine the new shape to broadcast self and other
- broadcast_shape = np.broadcast_shapes(self_shape[:-2], other_shape[:-2])
- self_new_shape = tuple(broadcast_shape) + self_shape[-2:]
- other_new_shape = tuple(broadcast_shape) + other_shape[-2:]
- self_broadcasted = self._broadcast_to(self_new_shape)
- other_broadcasted = other._broadcast_to(other_new_shape)
- # Convert n-D COO arrays to 2-D block diagonal arrays
- self_block_diag = _block_diag(self_broadcasted)
- other_block_diag = _block_diag(other_broadcasted)
- # Use csr_matmat to perform sparse matrix multiplication
- prod_block_diag = (self_block_diag @ other_block_diag).tocoo()
- # Convert the 2-D block diagonal array back to n-D
- return _extract_block_diag(
- prod_block_diag,
- shape=(*broadcast_shape, self.shape[-2], other.shape[-1]),
- )
- def _broadcast_to(self, new_shape, copy=False):
- if self.shape == new_shape:
- return self.copy() if copy else self
- old_shape = self.shape
- # Check if the new shape is compatible for broadcasting
- if len(new_shape) < len(old_shape):
- raise ValueError("New shape must have at least as many dimensions"
- " as the current shape")
- # Add leading ones to shape to ensure same length as `new_shape`
- shape = (1,) * (len(new_shape) - len(old_shape)) + tuple(old_shape)
- # Ensure the old shape can be broadcast to the new shape
- if any((o != 1 and o != n) for o, n in zip(shape, new_shape)):
- raise ValueError(f"current shape {old_shape} cannot be "
- "broadcast to new shape {new_shape}")
- # Reshape the COO array to match the new dimensions
- self = self.reshape(shape)
- idx_dtype = get_index_dtype(self.coords, maxval=max(new_shape))
- coords = self.coords
- new_data = self.data
- new_coords = coords[-1:] # Copy last coordinate to start
- cum_repeat = 1 # Cumulative repeat factor for broadcasting
- if shape[-1] != new_shape[-1]: # broadcasting the n-th (col) dimension
- repeat_count = new_shape[-1]
- cum_repeat *= repeat_count
- new_data = np.tile(new_data, repeat_count)
- new_dim = np.repeat(np.arange(0, repeat_count, dtype=idx_dtype), self.nnz)
- new_coords = (new_dim,)
- for i in range(-2, -(len(shape)+1), -1):
- if shape[i] != new_shape[i]:
- repeat_count = new_shape[i] # number of times to repeat data, coords
- cum_repeat *= repeat_count # update cumulative repeat factor
- nnz = len(new_data) # Number of non-zero elements so far
- # Tile data and coordinates to match the new repeat count
- new_data = np.tile(new_data, repeat_count)
- new_coords = tuple(np.tile(new_coords[i+1:], repeat_count))
- # Create new dimensions and stack them
- new_dim = np.repeat(np.arange(0, repeat_count, dtype=idx_dtype), nnz)
- new_coords = (new_dim,) + new_coords
- else:
- # If no broadcasting needed, tile the coordinates
- new_dim = np.tile(coords[i], cum_repeat)
- new_coords = (new_dim,) + new_coords
- return coo_array((new_data, new_coords), new_shape)
- def _sum_nd(self, axis, res_dtype, out):
- # axis and out are preprocessed. out.shape is new_shape
- A2d, new_shape = _convert_to_2d(self, axis)
- ones = np.ones((A2d.shape[1], 1), dtype=res_dtype)
- # sets dtype while loading into out
- out[...] = (A2d @ ones).reshape(new_shape)
- return out
- def _min_or_max_axis_nd(self, axis, min_or_max, explicit):
- A2d, new_shape = _convert_to_2d(self, axis)
- res = A2d._min_or_max_axis(1, min_or_max, explicit)
- unraveled_coords = np.unravel_index(res.coords[0], new_shape)
- return coo_array((res.data, unraveled_coords), new_shape)
- def _argminmax_axis_nd(self, axis, argminmax, compare, explicit):
- A2d, new_shape = _convert_to_2d(self, axis)
- res_flat = A2d._argminmax_axis(1, argminmax, compare, explicit)
- return res_flat.reshape(new_shape)
- def _block_diag(self):
- """
- Converts an N-D COO array into a 2-D COO array in block diagonal form.
- Parameters:
- self (coo_array): An N-Dimensional COO sparse array.
- Returns:
- coo_array: A 2-Dimensional COO sparse array in block diagonal form.
- """
- if self.ndim<2:
- raise ValueError("array must have atleast dim=2")
- num_blocks = math.prod(self.shape[:-2])
- n_col = self.shape[-1]
- n_row = self.shape[-2]
- res_arr = self.reshape((num_blocks, n_row, n_col))
- new_coords = (
- res_arr.coords[1] + res_arr.coords[0] * res_arr.shape[1],
- res_arr.coords[2] + res_arr.coords[0] * res_arr.shape[2],
- )
- new_shape = (num_blocks * n_row, num_blocks * n_col)
- return coo_array((self.data, tuple(new_coords)), shape=new_shape)
- def _extract_block_diag(self, shape):
- n_row, n_col = shape[-2], shape[-1]
- # Extract data and coordinates from the block diagonal COO array
- data = self.data
- row, col = self.row, self.col
- # Initialize new coordinates array
- new_coords = np.empty((len(shape), self.nnz), dtype=int)
- # Calculate within-block indices
- new_coords[-2] = row % n_row
- new_coords[-1] = col % n_col
- # Calculate coordinates for higher dimensions
- temp_block_idx = row // n_row
- for i in range(len(shape) - 3, -1, -1):
- size = shape[i]
- new_coords[i] = temp_block_idx % size
- temp_block_idx = temp_block_idx // size
- # Create the new COO array with the original n-D shape
- return coo_array((data, tuple(new_coords)), shape=shape)
- def _get_sparse_data_and_coords(x, new_shape, dtype):
- x = x.tocoo()
- x.sum_duplicates()
- x_coords = list(x.coords)
- x_data = x.data.astype(dtype, copy=False)
- x_shape = x.shape
- if new_shape == x_shape:
- return x_data, x_coords
- # broadcasting needed
- len_diff = len(new_shape) - len(x_shape)
- if len_diff > 0:
- # prepend ones to shape of x to match ndim
- x_shape = [1] * len_diff + list(x_shape)
- coord_zeros = np.zeroslike(x_coords[0])
- x_coords = tuple([coord_zeros] * len_diff + x_coords)
- # taking away axes (squeezing) is not part of broadcasting, but long
- # spmatrix history of using 2d vectors in 1d space, so we manually
- # squeeze the front and back axes here to be compatible
- if len_diff < 0:
- for _ in range(-len_diff):
- if x_shape[0] == 1:
- x_shape = x_shape[1:]
- x_coords = x_coords[1:]
- elif x_shape[-1] == 1:
- x_shape = x_shape[:-1]
- x_coords = x_coords[:-1]
- else:
- raise ValueError("shape mismatch in assignment")
- # broadcast with copy (will need to copy eventually anyway)
- tot_expand = 1
- for i, (nn, nx) in enumerate(zip(new_shape, x_shape)):
- if nn == nx:
- continue
- if nx != 1:
- raise ValueError("shape mismatch in assignment")
- x_nnz = len(x_coords[0])
- x_coords[i] = np.repeat(np.arange(nn), x_nnz)
- for j, co in enumerate(x_coords):
- if j == i:
- continue
- x_coords[j] = np.tile(co, nn)
- tot_expand *= nn
- x_data = np.tile(x_data.ravel(), tot_expand)
- return x_data, x_coords
- def _get_dense_data_and_coords(x, new_shape):
- if x.shape != new_shape:
- x = np.broadcast_to(x.squeeze(), new_shape)
- # shift scalar input to 1d so has coords
- if new_shape == ():
- x_coords = tuple([np.array([0])] * len(new_shape))
- x_data = x.ravel()
- else:
- x_coords = x.nonzero()
- x_data = x[x_coords]
- return x_data, x_coords
- def _process_axes(ndim_a, ndim_b, axes):
- if isinstance(axes, int):
- if axes < 1 or axes > min(ndim_a, ndim_b):
- raise ValueError("axes integer is out of bounds for input arrays")
- axes_a = list(range(ndim_a - axes, ndim_a))
- axes_b = list(range(axes))
- elif isinstance(axes, tuple | list):
- if len(axes) != 2:
- raise ValueError("axes must be a tuple/list of length 2")
- axes_a, axes_b = axes
- if len(axes_a) != len(axes_b):
- raise ValueError("axes lists/tuples must be of the same length")
- if any(ax >= ndim_a or ax < -ndim_a for ax in axes_a) or \
- any(bx >= ndim_b or bx < -ndim_b for bx in axes_b):
- raise ValueError("axes indices are out of bounds for input arrays")
- else:
- raise TypeError("axes must be an integer or a tuple/list of integers")
- axes_a = [axis + ndim_a if axis < 0 else axis for axis in axes_a]
- axes_b = [axis + ndim_b if axis < 0 else axis for axis in axes_b]
- return axes_a, axes_b
- def _convert_to_2d(coo, axis):
- axis_coords = tuple(coo.coords[i] for i in axis)
- axis_shape = tuple(coo.shape[i] for i in axis)
- axis_ravel = _ravel_coords(axis_coords, axis_shape)
- ndim = len(coo.coords)
- non_axis = tuple(i for i in range(ndim) if i not in axis)
- if non_axis:
- non_axis_coords = tuple(coo.coords[i] for i in non_axis)
- non_axis_shape = tuple(coo.shape[i] for i in non_axis)
- non_axis_ravel = _ravel_coords(non_axis_coords, non_axis_shape)
- coords_2d = (non_axis_ravel, axis_ravel)
- shape_2d = (math.prod(non_axis_shape), math.prod(axis_shape))
- else: # all axes included in axis so result will have 1 element
- coords_2d = (axis_ravel,)
- shape_2d = (math.prod(axis_shape),)
- non_axis_shape = ()
- new_coo = coo_array((coo.data, coords_2d), shape=shape_2d)
- return new_coo, non_axis_shape
- def _ravel_coords(coords, shape, order='C'):
- """Like np.ravel_multi_index, but avoids some overflow issues."""
- if len(coords) == 1:
- return coords[0]
- # Handle overflow as in https://github.com/scipy/scipy/pull/9132
- if len(coords) == 2:
- nrows, ncols = shape
- row, col = coords
- if order == 'C':
- maxval = (ncols * max(0, nrows - 1) + max(0, ncols - 1))
- idx_dtype = get_index_dtype(maxval=maxval)
- return np.multiply(ncols, row, dtype=idx_dtype) + col
- elif order == 'F':
- maxval = (nrows * max(0, ncols - 1) + max(0, nrows - 1))
- idx_dtype = get_index_dtype(maxval=maxval)
- return np.multiply(nrows, col, dtype=idx_dtype) + row
- else:
- raise ValueError("'order' must be 'C' or 'F'")
- return np.ravel_multi_index(coords, shape, order=order)
- def isspmatrix_coo(x):
- """Is `x` of coo_matrix type?
- Parameters
- ----------
- x
- object to check for being a coo matrix
- Returns
- -------
- bool
- True if `x` is a coo matrix, False otherwise
- Examples
- --------
- >>> from scipy.sparse import coo_array, coo_matrix, csr_matrix, isspmatrix_coo
- >>> isspmatrix_coo(coo_matrix([[5]]))
- True
- >>> isspmatrix_coo(coo_array([[5]]))
- False
- >>> isspmatrix_coo(csr_matrix([[5]]))
- False
- """
- return isinstance(x, coo_matrix)
- # This namespace class separates array from matrix with isinstance
- class coo_array(_coo_base, sparray):
- """
- A sparse array in COOrdinate format.
- Also known as the 'ijv' or 'triplet' format.
- This can be instantiated in several ways:
- coo_array(D)
- where D is an ndarray
- coo_array(S)
- with another sparse array or matrix S (equivalent to S.tocoo())
- coo_array(shape, [dtype])
- to construct an empty sparse array with shape `shape`
- dtype is optional, defaulting to dtype='d'.
- coo_array((data, coords), [shape])
- to construct from existing data and index arrays:
- 1. data[:] the entries of the sparse array, in any order
- 2. coords[i][:] the axis-i coordinates of the data entries
- Where ``A[coords] = data``, and coords is a tuple of index arrays.
- When shape is not specified, it is inferred from the index arrays.
- Attributes
- ----------
- dtype : dtype
- Data type of the sparse array
- shape : tuple of integers
- Shape of the sparse array
- ndim : int
- Number of dimensions of the sparse array
- nnz
- size
- data
- COO format data array of the sparse array
- coords
- COO format tuple of index arrays
- has_canonical_format : bool
- Whether the matrix has sorted coordinates and no duplicates
- format
- T
- Notes
- -----
- Sparse arrays can be used in arithmetic operations: they support
- addition, subtraction, multiplication, division, and matrix power.
- Advantages of the COO format
- - facilitates fast conversion among sparse formats
- - permits duplicate entries (see example)
- - very fast conversion to and from CSR/CSC formats
- Disadvantages of the COO format
- - does not directly support:
- + arithmetic operations
- + slicing
- Intended Usage
- - COO is a fast format for constructing sparse arrays
- - Once a COO array has been constructed, convert to CSR or
- CSC format for fast arithmetic and matrix vector operations
- - By default when converting to CSR or CSC format, duplicate (i,j)
- entries will be summed together. This facilitates efficient
- construction of finite element matrices and the like. (see example)
- Canonical format
- - Entries and coordinates sorted by row, then column.
- - There are no duplicate entries (i.e. duplicate (i,j) locations)
- - Data arrays MAY have explicit zeros.
- Examples
- --------
- >>> # Constructing an empty sparse array
- >>> import numpy as np
- >>> from scipy.sparse import coo_array
- >>> coo_array((3, 4), dtype=np.int8).toarray()
- array([[0, 0, 0, 0],
- [0, 0, 0, 0],
- [0, 0, 0, 0]], dtype=int8)
- >>> # Constructing a sparse array using ijv format
- >>> row = np.array([0, 3, 1, 0])
- >>> col = np.array([0, 3, 1, 2])
- >>> data = np.array([4, 5, 7, 9])
- >>> coo_array((data, (row, col)), shape=(4, 4)).toarray()
- array([[4, 0, 9, 0],
- [0, 7, 0, 0],
- [0, 0, 0, 0],
- [0, 0, 0, 5]])
- >>> # Constructing a sparse array with duplicate coordinates
- >>> row = np.array([0, 0, 1, 3, 1, 0, 0])
- >>> col = np.array([0, 2, 1, 3, 1, 0, 0])
- >>> data = np.array([1, 1, 1, 1, 1, 1, 1])
- >>> coo = coo_array((data, (row, col)), shape=(4, 4))
- >>> # Duplicate coordinates are maintained until implicitly or explicitly summed
- >>> np.max(coo.data)
- 1
- >>> coo.toarray()
- array([[3, 0, 1, 0],
- [0, 2, 0, 0],
- [0, 0, 0, 0],
- [0, 0, 0, 1]])
- """
- class coo_matrix(spmatrix, _coo_base):
- """
- A sparse matrix in COOrdinate format.
- Also known as the 'ijv' or 'triplet' format.
- This can be instantiated in several ways:
- coo_matrix(D)
- where D is a 2-D ndarray
- coo_matrix(S)
- with another sparse array or matrix S (equivalent to S.tocoo())
- coo_matrix((M, N), [dtype])
- to construct an empty matrix with shape (M, N)
- dtype is optional, defaulting to dtype='d'.
- coo_matrix((data, (i, j)), [shape=(M, N)])
- to construct from three arrays:
- 1. data[:] the entries of the matrix, in any order
- 2. i[:] the row indices of the matrix entries
- 3. j[:] the column indices of the matrix entries
- Where ``A[i[k], j[k]] = data[k]``. When shape is not
- specified, it is inferred from the index arrays
- Attributes
- ----------
- dtype : dtype
- Data type of the matrix
- shape : 2-tuple
- Shape of the matrix
- ndim : int
- Number of dimensions (this is always 2)
- nnz
- size
- data
- COO format data array of the matrix
- row
- COO format row index array of the matrix
- col
- COO format column index array of the matrix
- has_canonical_format : bool
- Whether the matrix has sorted indices and no duplicates
- format
- T
- Notes
- -----
- Sparse matrices can be used in arithmetic operations: they support
- addition, subtraction, multiplication, division, and matrix power.
- Advantages of the COO format
- - facilitates fast conversion among sparse formats
- - permits duplicate entries (see example)
- - very fast conversion to and from CSR/CSC formats
- Disadvantages of the COO format
- - does not directly support:
- + arithmetic operations
- + slicing
- Intended Usage
- - COO is a fast format for constructing sparse matrices
- - Once a COO matrix has been constructed, convert to CSR or
- CSC format for fast arithmetic and matrix vector operations
- - By default when converting to CSR or CSC format, duplicate (i,j)
- entries will be summed together. This facilitates efficient
- construction of finite element matrices and the like. (see example)
- Canonical format
- - Entries and coordinates sorted by row, then column.
- - There are no duplicate entries (i.e. duplicate (i,j) locations)
- - Data arrays MAY have explicit zeros.
- Examples
- --------
- >>> # Constructing an empty matrix
- >>> import numpy as np
- >>> from scipy.sparse import coo_matrix
- >>> coo_matrix((3, 4), dtype=np.int8).toarray()
- array([[0, 0, 0, 0],
- [0, 0, 0, 0],
- [0, 0, 0, 0]], dtype=int8)
- >>> # Constructing a matrix using ijv format
- >>> row = np.array([0, 3, 1, 0])
- >>> col = np.array([0, 3, 1, 2])
- >>> data = np.array([4, 5, 7, 9])
- >>> coo_matrix((data, (row, col)), shape=(4, 4)).toarray()
- array([[4, 0, 9, 0],
- [0, 7, 0, 0],
- [0, 0, 0, 0],
- [0, 0, 0, 5]])
- >>> # Constructing a matrix with duplicate coordinates
- >>> row = np.array([0, 0, 1, 3, 1, 0, 0])
- >>> col = np.array([0, 2, 1, 3, 1, 0, 0])
- >>> data = np.array([1, 1, 1, 1, 1, 1, 1])
- >>> coo = coo_matrix((data, (row, col)), shape=(4, 4))
- >>> # Duplicate coordinates are maintained until implicitly or explicitly summed
- >>> np.max(coo.data)
- 1
- >>> coo.toarray()
- array([[3, 0, 1, 0],
- [0, 2, 0, 0],
- [0, 0, 0, 0],
- [0, 0, 0, 1]])
- """
- def __setstate__(self, state):
- if 'coords' not in state:
- # For retro-compatibility with the previous attributes
- # storing nnz coordinates for 2D COO matrix.
- state['coords'] = (state.pop('row'), state.pop('col'))
- self.__dict__.update(state)
- def __getitem__(self, key):
- raise TypeError("'coo_matrix' object is not subscriptable")
- def __setitem__(self, key, x):
- raise TypeError("'coo_matrix' object does not support item assignment")
|