_data.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569
  1. """Base class for sparse matrice with a .data attribute
  2. subclasses must provide a _with_data() method that
  3. creates a new matrix with the same sparsity pattern
  4. as self but with a different data array
  5. """
  6. import math
  7. import numpy as np
  8. from ._base import _spbase, sparray, _ufuncs_with_fixed_point_at_zero
  9. from ._sputils import isscalarlike, validateaxis
  10. __all__ = []
  11. # TODO implement all relevant operations
  12. # use .data.__methods__() instead of /=, *=, etc.
  13. class _data_matrix(_spbase):
  14. def __init__(self, arg1, *, maxprint=None):
  15. _spbase.__init__(self, arg1, maxprint=maxprint)
  16. @property
  17. def dtype(self):
  18. return self.data.dtype
  19. @dtype.setter
  20. def dtype(self, newtype):
  21. self.data = self.data.view(newtype)
  22. def _deduped_data(self):
  23. if hasattr(self, 'sum_duplicates'):
  24. self.sum_duplicates()
  25. return self.data
  26. def __abs__(self):
  27. return self._with_data(abs(self._deduped_data()))
  28. def __round__(self, ndigits=0):
  29. return self._with_data(np.around(self._deduped_data(), decimals=ndigits))
  30. def _real(self):
  31. return self._with_data(self.data.real)
  32. def _imag(self):
  33. return self._with_data(self.data.imag)
  34. def __neg__(self):
  35. if self.dtype.kind == 'b':
  36. raise NotImplementedError('negating a boolean sparse array is not '
  37. 'supported')
  38. return self._with_data(-self.data)
  39. def __imul__(self, other): # self *= other
  40. if isscalarlike(other):
  41. self.data *= other
  42. return self
  43. return NotImplemented
  44. def __itruediv__(self, other): # self /= other
  45. if isscalarlike(other):
  46. recip = 1.0 / other
  47. self.data *= recip
  48. return self
  49. else:
  50. return NotImplemented
  51. def astype(self, dtype, casting='unsafe', copy=True):
  52. dtype = np.dtype(dtype)
  53. if self.dtype != dtype:
  54. matrix = self._with_data(
  55. self.data.astype(dtype, casting=casting, copy=True),
  56. copy=True
  57. )
  58. return matrix._with_data(matrix._deduped_data(), copy=False)
  59. elif copy:
  60. return self.copy()
  61. else:
  62. return self
  63. astype.__doc__ = _spbase.astype.__doc__
  64. def conjugate(self, copy=True):
  65. if np.issubdtype(self.dtype, np.complexfloating):
  66. return self._with_data(self.data.conjugate(), copy=copy)
  67. elif copy:
  68. return self.copy()
  69. else:
  70. return self
  71. conjugate.__doc__ = _spbase.conjugate.__doc__
  72. def copy(self):
  73. return self._with_data(self.data.copy(), copy=True)
  74. copy.__doc__ = _spbase.copy.__doc__
  75. def power(self, n, dtype=None):
  76. """
  77. This function performs element-wise power.
  78. Parameters
  79. ----------
  80. n : scalar
  81. n is a non-zero scalar (nonzero avoids dense ones creation)
  82. If zero power is desired, special case it to use `np.ones`
  83. dtype : If dtype is not specified, the current dtype will be preserved.
  84. Raises
  85. ------
  86. NotImplementedError : if n is a zero scalar
  87. If zero power is desired, special case it to use
  88. ``np.ones(A.shape, dtype=A.dtype)``
  89. """
  90. if not isscalarlike(n):
  91. raise NotImplementedError("input is not scalar")
  92. if not n:
  93. raise NotImplementedError(
  94. "zero power is not supported as it would densify the matrix.\n"
  95. "Use `np.ones(A.shape, dtype=A.dtype)` for this case."
  96. )
  97. data = self._deduped_data()
  98. if dtype is not None:
  99. data = data.astype(dtype, copy=False)
  100. return self._with_data(data ** n)
  101. ###########################
  102. # Multiplication handlers #
  103. ###########################
  104. def _mul_scalar(self, other):
  105. return self._with_data(self.data * other)
  106. # Add the numpy unary ufuncs for which func(0) = 0 to _data_matrix.
  107. for npfunc in _ufuncs_with_fixed_point_at_zero:
  108. name = npfunc.__name__
  109. def _create_method(op):
  110. def method(self):
  111. result = op(self._deduped_data())
  112. return self._with_data(result, copy=True)
  113. method.__doc__ = (f"Element-wise {name}.\n\n"
  114. f"See `numpy.{name}` for more information.")
  115. method.__name__ = name
  116. return method
  117. setattr(_data_matrix, name, _create_method(npfunc))
  118. def _find_missing_index(ind, n):
  119. for k, a in enumerate(ind):
  120. if k != a:
  121. return k
  122. k += 1
  123. if k < n:
  124. return k
  125. else:
  126. return -1
  127. class _minmax_mixin:
  128. """Mixin for min and max methods.
  129. These are not implemented for dia_matrix, hence the separate class.
  130. """
  131. def _min_or_max_axis(self, axis, min_or_max, explicit):
  132. # already checked that self.shape[axis] is not zero
  133. N = self.shape[axis]
  134. M = self.shape[1 - axis]
  135. idx_dtype = self._get_index_dtype(maxval=M)
  136. mat = self.tocsc() if axis == 0 else self.tocsr()
  137. mat.sum_duplicates()
  138. major_index, value = mat._minor_reduce(min_or_max)
  139. if not explicit:
  140. not_full = np.diff(mat.indptr)[major_index] < N
  141. value[not_full] = min_or_max(value[not_full], 0)
  142. mask = value != 0
  143. major_index = np.compress(mask, major_index).astype(idx_dtype, copy=False)
  144. value = np.compress(mask, value)
  145. if isinstance(self, sparray):
  146. coords = (major_index,)
  147. shape = (M,)
  148. return self._coo_container((value, coords), shape=shape, dtype=self.dtype)
  149. if axis == 0:
  150. return self._coo_container(
  151. (value, (np.zeros(len(value), dtype=idx_dtype), major_index)),
  152. dtype=self.dtype, shape=(1, M)
  153. )
  154. else:
  155. return self._coo_container(
  156. (value, (major_index, np.zeros(len(value), dtype=idx_dtype))),
  157. dtype=self.dtype, shape=(M, 1)
  158. )
  159. def _min_or_max(self, axis, out, min_or_max, explicit):
  160. if out is not None:
  161. raise ValueError("Sparse min/max does not support an 'out' parameter.")
  162. axis = validateaxis(axis, ndim=self.ndim)
  163. if axis is None:
  164. if 0 in self.shape:
  165. raise ValueError("zero-size array to reduction operation")
  166. zero = self.dtype.type(0)
  167. if self.nnz == 0:
  168. return zero
  169. m = min_or_max.reduce(self._deduped_data().ravel())
  170. if self.nnz != math.prod(self.shape) and not explicit:
  171. m = min_or_max(zero, m)
  172. return m
  173. if any(self.shape[d] == 0 for d in axis):
  174. raise ValueError("zero-size array to reduction operation")
  175. if self.ndim == 2:
  176. # note: 2D ensures that len(axis)==1 so we pass in the int axis[0]
  177. return self._min_or_max_axis(axis[0], min_or_max, explicit)
  178. return self._min_or_max_axis_nd(axis, min_or_max, explicit)
  179. def _argminmax_axis(self, axis, argminmax, compare, explicit):
  180. zero = self.dtype.type(0)
  181. mat = self.tocsc() if axis == 0 else self.tocsr()
  182. mat.sum_duplicates()
  183. ret_size, line_size = mat._swap(mat.shape)
  184. ret = np.zeros(ret_size, dtype=int)
  185. nz_lines, = np.nonzero(np.diff(mat.indptr))
  186. for i in nz_lines:
  187. p, q = mat.indptr[i:i + 2]
  188. data = mat.data[p:q]
  189. indices = mat.indices[p:q]
  190. extreme_index = argminmax(data)
  191. extreme_value = data[extreme_index]
  192. if explicit:
  193. if q - p > 0:
  194. ret[i] = indices[extreme_index]
  195. else:
  196. if compare(extreme_value, zero) or q - p == line_size:
  197. ret[i] = indices[extreme_index]
  198. else:
  199. zero_ind = _find_missing_index(indices, line_size)
  200. if extreme_value == zero:
  201. ret[i] = min(extreme_index, zero_ind)
  202. else:
  203. ret[i] = zero_ind
  204. if isinstance(self, sparray):
  205. return ret
  206. if axis == 1:
  207. ret = ret.reshape(-1, 1)
  208. return self._ascontainer(ret)
  209. def _argminmax(self, axis, out, argminmax, compare, explicit):
  210. if out is not None:
  211. minmax = "argmin" if argminmax == np.argmin else "argmax"
  212. raise ValueError(f"Sparse {minmax} does not support an 'out' parameter.")
  213. axis = validateaxis(axis, ndim=self.ndim)
  214. if axis is not None:
  215. if any(self.shape[i] == 0 for i in axis):
  216. minmax = "argmin" if argminmax == np.argmin else "argmax"
  217. raise ValueError(f"Cannot apply {minmax} along a zero-sized dimension.")
  218. if self.ndim == 2:
  219. # note: 2D ensures that len(axis)==1 so we pass in the int axis[0]
  220. return self._argminmax_axis(axis[0], argminmax, compare, explicit)
  221. return self._argminmax_axis_nd(axis, argminmax, compare, explicit)
  222. if 0 in self.shape:
  223. minmax = "argmin" if argminmax == np.argmin else "argmax"
  224. raise ValueError(f"Cannot apply {minmax} to an empty matrix.")
  225. if self.nnz == 0:
  226. if explicit:
  227. minmax = "argmin" if argminmax == np.argmin else "argmax"
  228. raise ValueError(f"Cannot apply {minmax} to zero matrix "
  229. "when explicit=True.")
  230. return 0
  231. zero = self.dtype.type(0)
  232. mat = self.tocoo()
  233. # Convert to canonical form: no duplicates, sorted indices.
  234. mat.sum_duplicates()
  235. extreme_index = argminmax(mat.data)
  236. if explicit:
  237. return extreme_index
  238. extreme_value = mat.data[extreme_index]
  239. if mat.ndim > 2:
  240. mat = mat.reshape(-1)
  241. # If the min value is less than zero, or max is greater than zero,
  242. # then we do not need to worry about implicit zeros.
  243. # And we use a "cheap test" for the rare case of no implicit zeros.
  244. maxnnz = math.prod(self.shape)
  245. if compare(extreme_value, zero) or mat.nnz == maxnnz:
  246. # cast to Python int to avoid overflow and RuntimeError
  247. if mat.ndim == 1: # includes nD case that was reshaped above
  248. return int(mat.col[extreme_index])
  249. # ndim == 2
  250. num_col = mat.shape[-1]
  251. return int(mat.row[extreme_index]) * num_col + int(mat.col[extreme_index])
  252. # At this stage, any implicit zero could be the min or max value.
  253. # After sum_duplicates(), the `row` and `col` arrays are guaranteed to
  254. # be sorted in C-order, which means the linearized indices are sorted.
  255. if mat.ndim == 1: # includes nD case that was reshaped above
  256. linear_indices = mat.coords[-1]
  257. else: # ndim == 2
  258. num_col = mat.shape[-1]
  259. linear_indices = mat.row * num_col + mat.col
  260. first_implicit_zero_index = _find_missing_index(linear_indices, maxnnz)
  261. if extreme_value == zero:
  262. return min(first_implicit_zero_index, extreme_index)
  263. return first_implicit_zero_index
  264. def max(self, axis=None, out=None, *, explicit=False):
  265. """Return the maximum of the array/matrix or maximum along an axis.
  266. By default, all elements are taken into account, not just the non-zero ones.
  267. But with `explicit` set, only the stored elements are considered.
  268. Parameters
  269. ----------
  270. axis : {-2, -1, 0, 1, None} optional
  271. Axis along which the sum is computed. The default is to
  272. compute the maximum over all elements, returning
  273. a scalar (i.e., `axis` = `None`).
  274. out : None, optional
  275. This argument is in the signature *solely* for NumPy
  276. compatibility reasons. Do not pass in anything except
  277. for the default value, as this argument is not used.
  278. explicit : {False, True} optional (default: False)
  279. When set to True, only the stored elements will be considered.
  280. If a row/column is empty, the sparse.coo_array returned
  281. has no stored element (i.e. an implicit zero) for that row/column.
  282. .. versionadded:: 1.15.0
  283. Returns
  284. -------
  285. amax : coo_array or scalar
  286. Maximum of `a`. If `axis` is None, the result is a scalar value.
  287. If `axis` is given, the result is a sparse.coo_array of dimension
  288. ``a.ndim - 1``.
  289. See Also
  290. --------
  291. min : The minimum value of a sparse array/matrix along a given axis.
  292. numpy.max : NumPy's implementation of 'max'
  293. """
  294. return self._min_or_max(axis, out, np.maximum, explicit)
  295. def min(self, axis=None, out=None, *, explicit=False):
  296. """Return the minimum of the array/matrix or maximum along an axis.
  297. By default, all elements are taken into account, not just the non-zero ones.
  298. But with `explicit` set, only the stored elements are considered.
  299. Parameters
  300. ----------
  301. axis : {-2, -1, 0, 1, None} optional
  302. Axis along which the sum is computed. The default is to
  303. compute the minimum over all elements, returning
  304. a scalar (i.e., `axis` = `None`).
  305. out : None, optional
  306. This argument is in the signature *solely* for NumPy
  307. compatibility reasons. Do not pass in anything except for
  308. the default value, as this argument is not used.
  309. explicit : {False, True} optional (default: False)
  310. When set to True, only the stored elements will be considered.
  311. If a row/column is empty, the sparse.coo_array returned
  312. has no stored element (i.e. an implicit zero) for that row/column.
  313. .. versionadded:: 1.15.0
  314. Returns
  315. -------
  316. amin : coo_matrix or scalar
  317. Minimum of `a`. If `axis` is None, the result is a scalar value.
  318. If `axis` is given, the result is a sparse.coo_array of dimension
  319. ``a.ndim - 1``.
  320. See Also
  321. --------
  322. max : The maximum value of a sparse array/matrix along a given axis.
  323. numpy.min : NumPy's implementation of 'min'
  324. """
  325. return self._min_or_max(axis, out, np.minimum, explicit)
  326. def nanmax(self, axis=None, out=None, *, explicit=False):
  327. """Return the maximum, ignoring any Nans, along an axis.
  328. Return the maximum, ignoring any Nans, of the array/matrix along an axis.
  329. By default this takes all elements into account, but with `explicit` set,
  330. only stored elements are considered.
  331. .. versionadded:: 1.11.0
  332. Parameters
  333. ----------
  334. axis : {-2, -1, 0, 1, None} optional
  335. Axis along which the maximum is computed. The default is to
  336. compute the maximum over all elements, returning
  337. a scalar (i.e., `axis` = `None`).
  338. out : None, optional
  339. This argument is in the signature *solely* for NumPy
  340. compatibility reasons. Do not pass in anything except
  341. for the default value, as this argument is not used.
  342. explicit : {False, True} optional (default: False)
  343. When set to True, only the stored elements will be considered.
  344. If a row/column is empty, the sparse.coo_array returned
  345. has no stored element (i.e. an implicit zero) for that row/column.
  346. .. versionadded:: 1.15.0
  347. Returns
  348. -------
  349. amax : coo_array or scalar
  350. Maximum of `a`. If `axis` is None, the result is a scalar value.
  351. If `axis` is given, the result is a sparse.coo_array of dimension
  352. ``a.ndim - 1``.
  353. See Also
  354. --------
  355. nanmin : The minimum value of a sparse array/matrix along a given axis,
  356. ignoring NaNs.
  357. max : The maximum value of a sparse array/matrix along a given axis,
  358. propagating NaNs.
  359. numpy.nanmax : NumPy's implementation of 'nanmax'.
  360. """
  361. return self._min_or_max(axis, out, np.fmax, explicit)
  362. def nanmin(self, axis=None, out=None, *, explicit=False):
  363. """Return the minimum, ignoring any Nans, along an axis.
  364. Return the minimum, ignoring any Nans, of the array/matrix along an axis.
  365. By default this takes all elements into account, but with `explicit` set,
  366. only stored elements are considered.
  367. .. versionadded:: 1.11.0
  368. Parameters
  369. ----------
  370. axis : {-2, -1, 0, 1, None} optional
  371. Axis along which the minimum is computed. The default is to
  372. compute the minimum over all elements, returning
  373. a scalar (i.e., `axis` = `None`).
  374. out : None, optional
  375. This argument is in the signature *solely* for NumPy
  376. compatibility reasons. Do not pass in anything except for
  377. the default value, as this argument is not used.
  378. explicit : {False, True} optional (default: False)
  379. When set to True, only the stored elements will be considered.
  380. If a row/column is empty, the sparse.coo_array returned
  381. has no stored element (i.e. an implicit zero) for that row/column.
  382. .. versionadded:: 1.15.0
  383. Returns
  384. -------
  385. amin : coo_array or scalar
  386. Minimum of `a`. If `axis` is None, the result is a scalar value.
  387. If `axis` is given, the result is a sparse.coo_array of dimension
  388. ``a.ndim - 1``.
  389. See Also
  390. --------
  391. nanmax : The maximum value of a sparse array/matrix along a given axis,
  392. ignoring NaNs.
  393. min : The minimum value of a sparse array/matrix along a given axis,
  394. propagating NaNs.
  395. numpy.nanmin : NumPy's implementation of 'nanmin'.
  396. """
  397. return self._min_or_max(axis, out, np.fmin, explicit)
  398. def argmax(self, axis=None, out=None, *, explicit=False):
  399. """Return indices of maximum elements along an axis.
  400. By default, implicit zero elements are taken into account. If there are
  401. several minimum values, the index of the first occurrence is returned.
  402. If `explicit` is set, only explicitly stored elements will be considered.
  403. Parameters
  404. ----------
  405. axis : {-2, -1, 0, 1, None}, optional
  406. Axis along which the argmax is computed. If None (default), index
  407. of the maximum element in the flatten data is returned.
  408. out : None, optional
  409. This argument is in the signature *solely* for NumPy
  410. compatibility reasons. Do not pass in anything except for
  411. the default value, as this argument is not used.
  412. explicit : {False, True} optional (default: False)
  413. When set to True, only explicitly stored elements will be considered.
  414. If axis is not None and an axis has no stored elements, argmax
  415. is undefined, so the index ``0`` is returned for that row/column.
  416. .. versionadded:: 1.15.0
  417. Returns
  418. -------
  419. ind : numpy.matrix or int
  420. Indices of maximum elements. If matrix, its size along `axis` is 1.
  421. """
  422. return self._argminmax(axis, out, np.argmax, np.greater, explicit)
  423. def argmin(self, axis=None, out=None, *, explicit=False):
  424. """Return indices of minimum elements along an axis.
  425. By default, implicit zero elements are taken into account. If there are
  426. several minimum values, the index of the first occurrence is returned.
  427. If `explicit` is set, only explicitly stored elements will be considered.
  428. Parameters
  429. ----------
  430. axis : {-2, -1, 0, 1, None}, optional
  431. Axis along which the argmin is computed. If None (default), index
  432. of the minimum element in the flatten data is returned.
  433. out : None, optional
  434. This argument is in the signature *solely* for NumPy
  435. compatibility reasons. Do not pass in anything except for
  436. the default value, as this argument is not used.
  437. explicit : {False, True} optional (default: False)
  438. When set to True, only explicitly stored elements will be considered.
  439. If axis is not None and an axis has no stored elements, argmin
  440. is undefined, so the index ``0`` is returned for that row/column.
  441. .. versionadded:: 1.15.0
  442. Returns
  443. -------
  444. ind : numpy.matrix or int
  445. Indices of minimum elements. If matrix, its size along `axis` is 1.
  446. """
  447. return self._argminmax(axis, out, np.argmin, np.less, explicit)