_csc.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367
  1. """Compressed Sparse Column matrix format"""
  2. __docformat__ = "restructuredtext en"
  3. __all__ = ['csc_array', 'csc_matrix', 'isspmatrix_csc']
  4. import numpy as np
  5. from ._matrix import spmatrix
  6. from ._base import _spbase, sparray
  7. from ._sparsetools import csr_tocsc, expandptr
  8. from ._sputils import upcast
  9. from ._compressed import _cs_matrix
  10. class _csc_base(_cs_matrix):
  11. _format = 'csc'
  12. def transpose(self, axes=None, copy=False):
  13. if axes is not None and axes != (1, 0):
  14. raise ValueError("Sparse arrays/matrices do not support "
  15. "an 'axes' parameter because swapping "
  16. "dimensions is the only logical permutation.")
  17. M, N = self.shape
  18. return self._csr_container((self.data, self.indices,
  19. self.indptr), (N, M), copy=copy)
  20. transpose.__doc__ = _spbase.transpose.__doc__
  21. def __iter__(self):
  22. yield from self.tocsr()
  23. def tocsc(self, copy=False):
  24. if copy:
  25. return self.copy()
  26. else:
  27. return self
  28. tocsc.__doc__ = _spbase.tocsc.__doc__
  29. def tocsr(self, copy=False):
  30. M,N = self.shape
  31. idx_dtype = self._get_index_dtype((self.indptr, self.indices),
  32. maxval=max(self.nnz, N))
  33. indptr = np.empty(M + 1, dtype=idx_dtype)
  34. indices = np.empty(self.nnz, dtype=idx_dtype)
  35. data = np.empty(self.nnz, dtype=upcast(self.dtype))
  36. csr_tocsc(N, M,
  37. self.indptr.astype(idx_dtype, copy=False),
  38. self.indices.astype(idx_dtype, copy=False),
  39. self.data,
  40. indptr,
  41. indices,
  42. data)
  43. A = self._csr_container(
  44. (data, indices, indptr),
  45. shape=self.shape, copy=False
  46. )
  47. A.has_sorted_indices = True
  48. return A
  49. tocsr.__doc__ = _spbase.tocsr.__doc__
  50. def nonzero(self):
  51. # CSC can't use _cs_matrix's .nonzero method because it
  52. # returns the indices sorted for self transposed.
  53. # Get row and col indices, from _cs_matrix.tocoo
  54. major_dim, minor_dim = self._swap(self.shape)
  55. minor_indices = self.indices
  56. major_indices = np.empty(len(minor_indices), dtype=self.indices.dtype)
  57. expandptr(major_dim, self.indptr, major_indices)
  58. row, col = self._swap((major_indices, minor_indices))
  59. # Remove explicit zeros
  60. nz_mask = self.data != 0
  61. row = row[nz_mask]
  62. col = col[nz_mask]
  63. # Sort them to be in C-style order
  64. ind = np.argsort(row, kind='mergesort')
  65. row = row[ind]
  66. col = col[ind]
  67. return row, col
  68. nonzero.__doc__ = _cs_matrix.nonzero.__doc__
  69. def _getrow(self, i):
  70. """Returns a copy of row i of the matrix, as a (1 x n)
  71. CSR matrix (row vector).
  72. """
  73. M, N = self.shape
  74. i = int(i)
  75. if i < 0:
  76. i += M
  77. if i < 0 or i >= M:
  78. raise IndexError(f'index ({i}) out of range')
  79. return self._get_submatrix(minor=i).tocsr()
  80. def _getcol(self, i):
  81. """Returns a copy of column i of the matrix, as a (m x 1)
  82. CSC matrix (column vector).
  83. """
  84. M, N = self.shape
  85. i = int(i)
  86. if i < 0:
  87. i += N
  88. if i < 0 or i >= N:
  89. raise IndexError(f'index ({i}) out of range')
  90. return self._get_submatrix(major=i, copy=True)
  91. def _get_intXarray(self, row, col):
  92. return self._major_index_fancy(col)._get_submatrix(minor=row)
  93. def _get_intXslice(self, row, col):
  94. if col.step in (1, None):
  95. return self._get_submatrix(major=col, minor=row, copy=True)
  96. return self._major_slice(col)._get_submatrix(minor=row)
  97. def _get_sliceXint(self, row, col):
  98. if row.step in (1, None):
  99. return self._get_submatrix(major=col, minor=row, copy=True)
  100. return self._get_submatrix(major=col)._minor_slice(row)
  101. def _get_sliceXarray(self, row, col):
  102. return self._major_index_fancy(col)._minor_slice(row)
  103. def _get_arrayXint(self, row, col):
  104. res = self._get_submatrix(major=col)._minor_index_fancy(row)
  105. if row.ndim > 1:
  106. return res.reshape(row.shape)
  107. return res
  108. def _get_arrayXslice(self, row, col):
  109. return self._major_slice(col)._minor_index_fancy(row)
  110. # these functions are used by the parent class (_cs_matrix)
  111. # to remove redundancy between csc_array and csr_matrix
  112. @staticmethod
  113. def _swap(x):
  114. """swap the members of x if this is a column-oriented matrix
  115. """
  116. return x[1], x[0]
  117. def isspmatrix_csc(x):
  118. """Is `x` of csc_matrix type?
  119. Parameters
  120. ----------
  121. x
  122. object to check for being a csc matrix
  123. Returns
  124. -------
  125. bool
  126. True if `x` is a csc matrix, False otherwise
  127. Examples
  128. --------
  129. >>> from scipy.sparse import csc_array, csc_matrix, coo_matrix, isspmatrix_csc
  130. >>> isspmatrix_csc(csc_matrix([[5]]))
  131. True
  132. >>> isspmatrix_csc(csc_array([[5]]))
  133. False
  134. >>> isspmatrix_csc(coo_matrix([[5]]))
  135. False
  136. """
  137. return isinstance(x, csc_matrix)
  138. # This namespace class separates array from matrix with isinstance
  139. class csc_array(_csc_base, sparray):
  140. """
  141. Compressed Sparse Column array.
  142. This can be instantiated in several ways:
  143. csc_array(D)
  144. where D is a 2-D ndarray
  145. csc_array(S)
  146. with another sparse array or matrix S (equivalent to S.tocsc())
  147. csc_array((M, N), [dtype])
  148. to construct an empty array with shape (M, N)
  149. dtype is optional, defaulting to dtype='d'.
  150. csc_array((data, (row_ind, col_ind)), [shape=(M, N)])
  151. where ``data``, ``row_ind`` and ``col_ind`` satisfy the
  152. relationship ``a[row_ind[k], col_ind[k]] = data[k]``.
  153. csc_array((data, indices, indptr), [shape=(M, N)])
  154. is the standard CSC representation where the row indices for
  155. column i are stored in ``indices[indptr[i]:indptr[i+1]]``
  156. and their corresponding values are stored in
  157. ``data[indptr[i]:indptr[i+1]]``. If the shape parameter is
  158. not supplied, the array dimensions are inferred from
  159. the index arrays.
  160. Attributes
  161. ----------
  162. dtype : dtype
  163. Data type of the array
  164. shape : 2-tuple
  165. Shape of the array
  166. ndim : int
  167. Number of dimensions (this is always 2)
  168. nnz
  169. size
  170. data
  171. CSC format data array of the array
  172. indices
  173. CSC format index array of the array
  174. indptr
  175. CSC format index pointer array of the array
  176. has_sorted_indices
  177. has_canonical_format
  178. T
  179. Notes
  180. -----
  181. Sparse arrays can be used in arithmetic operations: they support
  182. addition, subtraction, multiplication, division, and matrix power.
  183. Advantages of the CSC format
  184. - efficient arithmetic operations CSC + CSC, CSC * CSC, etc.
  185. - efficient column slicing
  186. - fast matrix vector products (CSR, BSR may be faster)
  187. Disadvantages of the CSC format
  188. - slow row slicing operations (consider CSR)
  189. - changes to the sparsity structure are expensive (consider LIL or DOK)
  190. Canonical format
  191. - Within each column, indices are sorted by row.
  192. - There are no duplicate entries.
  193. Examples
  194. --------
  195. >>> import numpy as np
  196. >>> from scipy.sparse import csc_array
  197. >>> csc_array((3, 4), dtype=np.int8).toarray()
  198. array([[0, 0, 0, 0],
  199. [0, 0, 0, 0],
  200. [0, 0, 0, 0]], dtype=int8)
  201. >>> row = np.array([0, 2, 2, 0, 1, 2])
  202. >>> col = np.array([0, 0, 1, 2, 2, 2])
  203. >>> data = np.array([1, 2, 3, 4, 5, 6])
  204. >>> csc_array((data, (row, col)), shape=(3, 3)).toarray()
  205. array([[1, 0, 4],
  206. [0, 0, 5],
  207. [2, 3, 6]])
  208. >>> indptr = np.array([0, 2, 3, 6])
  209. >>> indices = np.array([0, 2, 2, 0, 1, 2])
  210. >>> data = np.array([1, 2, 3, 4, 5, 6])
  211. >>> csc_array((data, indices, indptr), shape=(3, 3)).toarray()
  212. array([[1, 0, 4],
  213. [0, 0, 5],
  214. [2, 3, 6]])
  215. """
  216. class csc_matrix(spmatrix, _csc_base):
  217. """
  218. Compressed Sparse Column matrix.
  219. This can be instantiated in several ways:
  220. csc_matrix(D)
  221. where D is a 2-D ndarray
  222. csc_matrix(S)
  223. with another sparse array or matrix S (equivalent to S.tocsc())
  224. csc_matrix((M, N), [dtype])
  225. to construct an empty matrix with shape (M, N)
  226. dtype is optional, defaulting to dtype='d'.
  227. csc_matrix((data, (row_ind, col_ind)), [shape=(M, N)])
  228. where ``data``, ``row_ind`` and ``col_ind`` satisfy the
  229. relationship ``a[row_ind[k], col_ind[k]] = data[k]``.
  230. csc_matrix((data, indices, indptr), [shape=(M, N)])
  231. is the standard CSC representation where the row indices for
  232. column i are stored in ``indices[indptr[i]:indptr[i+1]]``
  233. and their corresponding values are stored in
  234. ``data[indptr[i]:indptr[i+1]]``. If the shape parameter is
  235. not supplied, the matrix dimensions are inferred from
  236. the index arrays.
  237. Attributes
  238. ----------
  239. dtype : dtype
  240. Data type of the matrix
  241. shape : 2-tuple
  242. Shape of the matrix
  243. ndim : int
  244. Number of dimensions (this is always 2)
  245. nnz
  246. size
  247. data
  248. CSC format data array of the matrix
  249. indices
  250. CSC format index array of the matrix
  251. indptr
  252. CSC format index pointer array of the matrix
  253. has_sorted_indices
  254. has_canonical_format
  255. T
  256. Notes
  257. -----
  258. Sparse matrices can be used in arithmetic operations: they support
  259. addition, subtraction, multiplication, division, and matrix power.
  260. Advantages of the CSC format
  261. - efficient arithmetic operations CSC + CSC, CSC * CSC, etc.
  262. - efficient column slicing
  263. - fast matrix vector products (CSR, BSR may be faster)
  264. Disadvantages of the CSC format
  265. - slow row slicing operations (consider CSR)
  266. - changes to the sparsity structure are expensive (consider LIL or DOK)
  267. Canonical format
  268. - Within each column, indices are sorted by row.
  269. - There are no duplicate entries.
  270. Examples
  271. --------
  272. >>> import numpy as np
  273. >>> from scipy.sparse import csc_matrix
  274. >>> csc_matrix((3, 4), dtype=np.int8).toarray()
  275. array([[0, 0, 0, 0],
  276. [0, 0, 0, 0],
  277. [0, 0, 0, 0]], dtype=int8)
  278. >>> row = np.array([0, 2, 2, 0, 1, 2])
  279. >>> col = np.array([0, 0, 1, 2, 2, 2])
  280. >>> data = np.array([1, 2, 3, 4, 5, 6])
  281. >>> csc_matrix((data, (row, col)), shape=(3, 3)).toarray()
  282. array([[1, 0, 4],
  283. [0, 0, 5],
  284. [2, 3, 6]])
  285. >>> indptr = np.array([0, 2, 3, 6])
  286. >>> indices = np.array([0, 2, 2, 0, 1, 2])
  287. >>> data = np.array([1, 2, 3, 4, 5, 6])
  288. >>> csc_matrix((data, indices, indptr), shape=(3, 3)).toarray()
  289. array([[1, 0, 4],
  290. [0, 0, 5],
  291. [2, 3, 6]])
  292. """