| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491 |
- """Indexing mixin for sparse array/matrix classes.
- """
- import numpy as np
- from ._sputils import isintlike
- from ._base import sparray, issparse
- INT_TYPES = (int, np.integer)
- def _broadcast_arrays(*arrays):
- """
- Same as np.broadcast_arrays(a, b) but old writeability rules.
- NumPy >= 1.17.0 transitions broadcast_arrays to return
- read-only arrays. Set writeability explicitly to avoid warnings.
- Retain the old writeability rules, as our Cython code assumes
- the old behavior.
- """
- broadcast_arrays = np.broadcast_arrays(*arrays)
- for x, a in zip(broadcast_arrays, arrays):
- x.flags.writeable = a.flags.writeable
- return broadcast_arrays
- class IndexMixin:
- """
- This class provides common dispatching and validation logic for indexing.
- """
- def __getitem__(self, key):
- index, new_shape, _, _ = _validate_indices(key, self.shape, self.format)
- if len(new_shape) > 2:
- raise IndexError("Indexing that leads to >2D is not supported by "
- f"{self.format} format. Try converting to COO format")
- # 1D array
- if len(index) == 1:
- idx = index[0]
- if isinstance(idx, np.ndarray):
- if idx.shape == ():
- idx = idx.item()
- if isinstance(idx, INT_TYPES):
- res = self._get_int(idx)
- elif isinstance(idx, slice):
- res = self._get_slice(idx)
- else: # assume array idx
- res = self._get_array(idx)
- # package the result and return
- if not isinstance(self, sparray):
- return res
- # handle np.newaxis in idx when result would otherwise be a scalar
- if res.shape == () and new_shape != ():
- if len(new_shape) == 1:
- return self.__class__([res], shape=new_shape, dtype=self.dtype)
- if len(new_shape) == 2:
- return self.__class__([[res]], shape=new_shape, dtype=self.dtype)
- return res.reshape(new_shape)
- # 2D array
- row, col = index
- # Dispatch to specialized methods.
- if isinstance(row, INT_TYPES):
- if isinstance(col, INT_TYPES):
- res = self._get_intXint(row, col)
- elif isinstance(col, slice):
- res = self._get_intXslice(row, col)
- elif col.ndim == 1:
- res = self._get_intXarray(row, col)
- elif col.ndim == 2:
- res = self._get_intXarray(row, col)
- else:
- raise IndexError('index results in >2 dimensions')
- elif isinstance(row, slice):
- if isinstance(col, INT_TYPES):
- res = self._get_sliceXint(row, col)
- elif isinstance(col, slice):
- if row == slice(None) and row == col:
- res = self.copy()
- else:
- res = self._get_sliceXslice(row, col)
- elif col.ndim == 1:
- res = self._get_sliceXarray(row, col)
- else:
- raise IndexError('index results in >2 dimensions')
- else:
- if isinstance(col, INT_TYPES):
- res = self._get_arrayXint(row, col)
- elif isinstance(col, slice):
- res = self._get_arrayXslice(row, col)
- # arrayXarray preprocess
- elif (row.ndim == 2 and row.shape[1] == 1
- and (col.ndim == 1 or col.shape[0] == 1)):
- # outer indexing
- res = self._get_columnXarray(row[:, 0], col.reshape(-1))
- else:
- # inner indexing
- row, col = _broadcast_arrays(row, col)
- if row.shape != col.shape:
- raise IndexError('number of row and column indices differ')
- if row.size == 0:
- res = self.__class__(np.atleast_2d(row).shape, dtype=self.dtype)
- else:
- res = self._get_arrayXarray(row, col)
- # handle spmatrix (must be 2d, dont let 1d new_shape start reshape)
- if not isinstance(self, sparray):
- if new_shape == () or (len(new_shape) == 1 and res.ndim != 0):
- # res handles cases not inflated by None
- return res
- if len(new_shape) == 1:
- # shape inflated to 1D by None in index. Make 2D
- new_shape = (1,) + new_shape
- # reshape if needed (when None changes shape, e.g. A[1,:,None])
- return res if new_shape == res.shape else res.reshape(new_shape)
- # package the result and return
- if res.shape != new_shape:
- # handle formats that support indexing but not 1D (lil for now)
- if self.format == "lil" and len(new_shape) != 2:
- if res.shape == ():
- return self._coo_container([res], shape = new_shape)
- return res.tocoo().reshape(new_shape)
- return res.reshape(new_shape)
- return res
- def __setitem__(self, key, x):
- index, new_shape, _, _ = _validate_indices(key, self.shape, self.format)
- # 1D array
- if len(index) == 1:
- idx = index[0]
- if issparse(x):
- x = x.toarray()
- else:
- x = np.asarray(x, dtype=self.dtype)
- if isinstance(idx, INT_TYPES):
- if x.size != 1:
- raise ValueError('Trying to assign a sequence to an item')
- self._set_int(idx, x.flat[0])
- return
- if isinstance(idx, slice):
- # check for simple case of slice that gives 1 item
- # Note: Python `range` does not use lots of memory
- idx_range = range(*idx.indices(self.shape[0]))
- N = len(idx_range)
- if N == 1 and x.size == 1:
- self._set_int(idx_range[0], x.flat[0])
- return
- idx = np.arange(*idx.indices(self.shape[0]))
- idx_shape = idx.shape
- else:
- idx_shape = idx.squeeze().shape
- # broadcast scalar to full 1d
- if x.squeeze().shape != idx_shape:
- x = np.broadcast_to(x, idx.shape)
- if x.size != 0:
- self._set_array(idx, x)
- return
- # 2D array
- row, col = index
- if isinstance(row, INT_TYPES) and isinstance(col, INT_TYPES):
- if issparse(x):
- x = x.toarray()
- else:
- x = np.asarray(x, dtype=self.dtype)
- if x.size != 1:
- raise ValueError('Trying to assign a sequence to an item')
- self._set_intXint(row, col, x.flat[0])
- return
- if isinstance(row, slice):
- row = np.arange(*row.indices(self.shape[0]))[:, None]
- else:
- row = np.atleast_1d(row)
- if isinstance(col, slice):
- col = np.arange(*col.indices(self.shape[1]))[None, :]
- if row.ndim == 1:
- row = row[:, None]
- else:
- col = np.atleast_1d(col)
- i, j = _broadcast_arrays(row, col)
- if i.shape != j.shape:
- raise IndexError('number of row and column indices differ')
- if issparse(x):
- if 0 in x.shape:
- return
- if i.ndim == 1:
- # Inner indexing, so treat them like row vectors.
- i = i[None]
- j = j[None]
- x = x.tocoo(copy=False).reshape(x._shape_as_2d, copy=True)
- broadcast_row = x.shape[0] == 1 and i.shape[0] != 1
- broadcast_col = x.shape[1] == 1 and i.shape[1] != 1
- if not ((broadcast_row or x.shape[0] == i.shape[0]) and
- (broadcast_col or x.shape[1] == i.shape[1])):
- raise ValueError('shape mismatch in assignment')
- x.sum_duplicates()
- self._set_arrayXarray_sparse(i, j, x)
- else:
- # Make x and i into the same shape
- x = np.asarray(x, dtype=self.dtype)
- if x.squeeze().shape != i.squeeze().shape:
- x = np.broadcast_to(x, i.shape)
- if x.size == 0:
- return
- x = x.reshape(i.shape)
- self._set_arrayXarray(i, j, x)
- def _getrow(self, i):
- """Return a copy of row i of the matrix, as a (1 x n) row vector.
- """
- M, N = self.shape
- i = int(i)
- if i < -M or i >= M:
- raise IndexError(f'index ({i}) out of range')
- if i < 0:
- i += M
- return self._get_intXslice(i, slice(None))
- def _getcol(self, i):
- """Return a copy of column i of the matrix, as a (m x 1) column vector.
- """
- M, N = self.shape
- i = int(i)
- if i < -N or i >= N:
- raise IndexError(f'index ({i}) out of range')
- if i < 0:
- i += N
- return self._get_sliceXint(slice(None), i)
- def _get_int(self, idx):
- raise NotImplementedError()
- def _get_slice(self, idx):
- raise NotImplementedError()
- def _get_array(self, idx):
- raise NotImplementedError()
- def _get_intXint(self, row, col):
- raise NotImplementedError()
- def _get_intXarray(self, row, col):
- raise NotImplementedError()
- def _get_intXslice(self, row, col):
- raise NotImplementedError()
- def _get_sliceXint(self, row, col):
- raise NotImplementedError()
- def _get_sliceXslice(self, row, col):
- raise NotImplementedError()
- def _get_sliceXarray(self, row, col):
- raise NotImplementedError()
- def _get_arrayXint(self, row, col):
- raise NotImplementedError()
- def _get_arrayXslice(self, row, col):
- raise NotImplementedError()
- def _get_columnXarray(self, row, col):
- raise NotImplementedError()
- def _get_arrayXarray(self, row, col):
- raise NotImplementedError()
- def _set_int(self, idx, x):
- raise NotImplementedError()
- def _set_array(self, idx, x):
- raise NotImplementedError()
- def _set_intXint(self, row, col, x):
- raise NotImplementedError()
- def _set_arrayXarray(self, row, col, x):
- raise NotImplementedError()
- def _set_arrayXarray_sparse(self, row, col, x):
- # Fall back to densifying x
- x = np.asarray(x.toarray(), dtype=self.dtype)
- x, _ = _broadcast_arrays(x, row)
- self._set_arrayXarray(row, col, x)
- def _validate_indices(key, self_shape, self_format):
- """Returns four sequences: (index, requested shape, arrays, nones)
- index : tuple of validated idx objects. bool arrays->nonzero(),
- arrays broadcast, ints and slices as they are, Nones removed
- requested shape : the shape of the indexed space, including Nones
- arr_pos : position within index of all arrays or ints (for array fancy indexing)
- none_pos : insert positions to put newaxis coords in indexed space.
- """
- self_ndim = len(self_shape)
- # single ellipsis
- if key is Ellipsis:
- return (slice(None),) * self_ndim, self_shape, [], []
- if not isinstance(key, tuple):
- key = [key]
- # pass 1:
- # - expand ellipsis to allow matching to self_shape
- # - preprocess boolean array index
- # - error on sparse array as an index
- # - count the ndim of the index and check if too long
- ellps_pos = None
- index_1st = []
- prelim_ndim = 0
- for i, idx in enumerate(key):
- if idx is Ellipsis:
- if ellps_pos is not None:
- raise IndexError('an index can only have a single ellipsis')
- ellps_pos = i
- elif idx is None:
- index_1st.append(idx)
- elif isinstance(idx, slice) or isintlike(idx):
- index_1st.append(idx)
- prelim_ndim += 1
- elif (ix := _compatible_boolean_index(idx, self_ndim)) is not None:
- # can't check the shape of ix until we resolve ellipsis (pass 2)
- index_1st.append(ix)
- prelim_ndim += ix.ndim
- elif issparse(idx):
- # TODO: make sparse indexing work for sparray
- raise IndexError(
- 'Indexing with sparse matrices is not supported '
- 'except boolean indexing where matrix and index '
- 'are equal shapes.')
- else: # dense array
- index_1st.append(np.asarray(idx))
- prelim_ndim += 1
- if prelim_ndim > self_ndim:
- raise IndexError(
- 'Too many indices for array or tuple index out of range. '
- f'Key {key} needs {prelim_ndim}D. Array is {self_ndim}D'
- )
- ellip_slices = (self_ndim - prelim_ndim) * [slice(None)]
- if ellip_slices:
- if ellps_pos is None:
- index_1st.extend(ellip_slices)
- else:
- index_1st = index_1st[:ellps_pos] + ellip_slices + index_1st[ellps_pos:]
- # second pass (have processed ellipsis and preprocessed arrays)
- # pass 2:
- # note: integer arrays provide info for one axis even if >1D array.
- # The shape of array affects outgoing(get)/incoming(set) shape only
- # - form `new_shape` (shape of outgo/incom-ing result of key
- # - form `index` (validated form of each slice/int/array index)
- # - validate and make canonical: slice and int
- # - turn bool arrays to int arrays via `.nonzero()`
- # - collect positions of Newaxis/None in `none_positions`
- # - collect positions of "array or int" in `arr_int_pos`
- idx_shape = []
- index_ndim = 0
- index = []
- array_indices = []
- none_positions = []
- arr_int_pos = [] # track positions of arrays and integers
- for i, idx in enumerate(index_1st):
- if idx is None:
- none_positions.append(len(idx_shape))
- idx_shape.append(1)
- elif isinstance(idx, slice):
- index.append(idx)
- Ms = self_shape[index_ndim]
- len_slice = len(range(*idx.indices(Ms)))
- idx_shape.append(len_slice)
- index_ndim += 1
- elif isintlike(idx):
- N = self_shape[index_ndim]
- if not (-N <= idx < N):
- raise IndexError(f'index ({idx}) out of range')
- idx = int(idx + N if idx < 0 else idx)
- index.append(idx)
- arr_int_pos.append(index_ndim)
- index_ndim += 1
- # bool array (checked in first pass)
- elif idx.dtype.kind == 'b':
- tmp_ndim = index_ndim + idx.ndim
- mid_shape = self_shape[index_ndim:tmp_ndim]
- if idx.shape != mid_shape:
- raise IndexError(
- f"bool index {i} has shape {mid_shape} instead of {idx.shape}"
- )
- index.extend(idx.nonzero())
- array_indices.extend(range(index_ndim, tmp_ndim))
- arr_int_pos.extend(range(index_ndim, tmp_ndim))
- index_ndim = tmp_ndim
- else: # dense array
- N = self_shape[index_ndim]
- idx = _asindices(idx, N, self_format)
- index.append(idx)
- arr_int_pos.append(index_ndim)
- array_indices.append(index_ndim)
- index_ndim += 1
- if len(array_indices) > 1:
- arr_shapes = [index[i].shape for i in array_indices]
- try:
- arr_shape = np.broadcast_shapes(*arr_shapes)
- except ValueError:
- shapes = " ".join(str(shp) for shp in arr_shapes)
- msg = (f'shape mismatch: indexing arrays could not be broadcast '
- f'together with shapes {shapes}')
- raise IndexError(msg)
- # len(array_indices) implies arr_int_pos has at least one element
- # if arrays and ints not adjacent, move to front of shape
- if len(arr_int_pos) != (arr_int_pos[-1] - arr_int_pos[0] + 1):
- idx_shape = list(arr_shape) + idx_shape
- else:
- arr_pos = arr_int_pos[0]
- idx_shape = idx_shape[:arr_pos] + list(arr_shape) + idx_shape[arr_pos:]
- elif len(array_indices) == 1:
- arr_shape = index[array_indices[0]].shape
- arr_pos = arr_int_pos[0]
- idx_shape = idx_shape[:arr_pos] + list(arr_shape) + idx_shape[arr_pos:]
- return tuple(index), tuple(idx_shape), arr_int_pos, none_positions
- def _asindices(idx, length, format):
- """Convert `idx` to a valid index for an axis with a given length.
- Subclasses that need special validation can override this method.
- """
- try:
- ix = np.asarray(idx)
- except (ValueError, TypeError, MemoryError) as e:
- raise IndexError('invalid index') from e
- if format != "coo" and ix.ndim not in (1, 2) or format == "coo" and ix.ndim == 0:
- raise IndexError(f'Index dimension must be 1 or 2. Got {ix.ndim}')
- # LIL routines handle bounds-checking for us, so don't do it here.
- if format == "lil":
- return ix
- if ix.size == 0:
- return ix
- # Check bounds
- max_indx = ix.max()
- if max_indx >= length:
- raise IndexError(f'index ({max_indx}) out of range')
- min_indx = ix.min()
- if min_indx < 0:
- if min_indx < -length:
- raise IndexError(f'index ({min_indx}) out of range')
- if ix is idx or not ix.flags.owndata:
- ix = ix.copy()
- ix[ix < 0] += length
- return ix
- def _compatible_boolean_index(idx, desired_ndim):
- """Check for boolean array or array-like. peek before asarray for array-like"""
- # use attribute ndim to indicate a compatible array and check dtype
- # if not, look at 1st element as quick rejection of bool, else slower asanyarray
- if not hasattr(idx, 'ndim'):
- # is first element boolean?
- try:
- ix = next(iter(idx), None)
- for _ in range(desired_ndim):
- if isinstance(ix, bool):
- break
- ix = next(iter(ix), None)
- else:
- return None
- except TypeError:
- return None
- # since first is boolean, construct array and check all elements
- idx = np.asanyarray(idx)
- if idx.dtype.kind == 'b':
- return idx
- return None
|