_lil.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625
  1. """List of Lists sparse matrix class
  2. """
  3. __docformat__ = "restructuredtext en"
  4. __all__ = ['lil_array', 'lil_matrix', 'isspmatrix_lil']
  5. from bisect import bisect_left
  6. import numpy as np
  7. from ._matrix import spmatrix
  8. from ._base import _spbase, sparray, issparse
  9. from ._index import IndexMixin, INT_TYPES, _broadcast_arrays
  10. from ._sputils import (getdtype, isshape, isscalarlike, upcast_scalar,
  11. check_shape, check_reshape_kwargs)
  12. from . import _csparsetools
  13. class _lil_base(_spbase, IndexMixin):
  14. _format = 'lil'
  15. def __init__(self, arg1, shape=None, dtype=None, copy=False, *, maxprint=None):
  16. _spbase.__init__(self, arg1, maxprint=maxprint)
  17. self.dtype = getdtype(dtype, arg1, default=float)
  18. # First get the shape
  19. if issparse(arg1):
  20. if arg1.format == "lil" and copy:
  21. A = arg1.copy()
  22. else:
  23. A = arg1.tolil()
  24. if dtype is not None:
  25. newdtype = getdtype(dtype)
  26. A = A.astype(newdtype, copy=False)
  27. self._shape = check_shape(A.shape)
  28. self.dtype = A.dtype
  29. self.rows = A.rows
  30. self.data = A.data
  31. elif isinstance(arg1,tuple):
  32. if isshape(arg1):
  33. if shape is not None:
  34. raise ValueError('invalid use of shape parameter')
  35. M, N = arg1
  36. self._shape = check_shape((M, N))
  37. self.rows = np.empty((M,), dtype=object)
  38. self.data = np.empty((M,), dtype=object)
  39. for i in range(M):
  40. self.rows[i] = []
  41. self.data[i] = []
  42. else:
  43. raise TypeError('unrecognized lil_array constructor usage')
  44. else:
  45. # assume A is dense
  46. try:
  47. A = self._ascontainer(arg1)
  48. except TypeError as e:
  49. raise TypeError('unsupported matrix type') from e
  50. if isinstance(self, sparray) and A.ndim != 2:
  51. raise ValueError(f"LIL arrays don't support {A.ndim}D input. Use 2D")
  52. A = self._csr_container(A, dtype=dtype).tolil()
  53. self._shape = check_shape(A.shape)
  54. self.dtype = getdtype(A.dtype)
  55. self.rows = A.rows
  56. self.data = A.data
  57. def __iadd__(self,other):
  58. self[:,:] = self + other
  59. return self
  60. def __isub__(self,other):
  61. self[:,:] = self - other
  62. return self
  63. def __imul__(self,other):
  64. if isscalarlike(other):
  65. self[:,:] = self * other
  66. return self
  67. else:
  68. return NotImplemented
  69. def __itruediv__(self,other):
  70. if isscalarlike(other):
  71. self[:,:] = self / other
  72. return self
  73. else:
  74. return NotImplemented
  75. # Whenever the dimensions change, empty lists should be created for each
  76. # row
  77. def _getnnz(self, axis=None):
  78. if axis is None:
  79. return sum([len(rowvals) for rowvals in self.data])
  80. if axis < 0:
  81. axis += 2
  82. if axis == 0:
  83. out = np.zeros(self.shape[1], dtype=np.intp)
  84. for row in self.rows:
  85. out[row] += 1
  86. return out
  87. elif axis == 1:
  88. return np.array([len(rowvals) for rowvals in self.data], dtype=np.intp)
  89. else:
  90. raise ValueError('axis out of bounds')
  91. _getnnz.__doc__ = _spbase._getnnz.__doc__
  92. def count_nonzero(self, axis=None):
  93. if axis is None:
  94. return sum(np.count_nonzero(rowvals) for rowvals in self.data)
  95. if axis < 0:
  96. axis += 2
  97. if axis == 0:
  98. out = np.zeros(self.shape[1], dtype=np.intp)
  99. for row, data in zip(self.rows, self.data):
  100. mask = [c for c, d in zip(row, data) if d != 0]
  101. out[mask] += 1
  102. return out
  103. elif axis == 1:
  104. return np.array(
  105. [np.count_nonzero(rowvals) for rowvals in self.data], dtype=np.intp,
  106. )
  107. else:
  108. raise ValueError('axis out of bounds')
  109. count_nonzero.__doc__ = _spbase.count_nonzero.__doc__
  110. def getrowview(self, i):
  111. """Returns a view of the 'i'th row (without copying).
  112. """
  113. new = self._lil_container((1, self.shape[1]), dtype=self.dtype)
  114. new.rows[0] = self.rows[i]
  115. new.data[0] = self.data[i]
  116. return new
  117. def getrow(self, i):
  118. """Returns a copy of the 'i'th row.
  119. """
  120. M, N = self.shape
  121. if i < 0:
  122. i += M
  123. if i < 0 or i >= M:
  124. raise IndexError('row index out of bounds')
  125. new = self._lil_container((1, N), dtype=self.dtype)
  126. new.rows[0] = self.rows[i][:]
  127. new.data[0] = self.data[i][:]
  128. return new
  129. def __getitem__(self, key):
  130. # Fast path for simple (int, int) indexing.
  131. if (isinstance(key, tuple) and len(key) == 2 and
  132. isinstance(key[0], INT_TYPES) and
  133. isinstance(key[1], INT_TYPES)):
  134. # lil_get1 handles validation for us.
  135. return self._get_intXint(*key)
  136. # Everything else takes the normal path.
  137. return IndexMixin.__getitem__(self, key)
  138. def _get_intXint(self, row, col):
  139. v = _csparsetools.lil_get1(self.shape[0], self.shape[1], self.rows,
  140. self.data, row, col)
  141. return self.dtype.type(v)
  142. def _get_sliceXint(self, row, col):
  143. row = range(*row.indices(self.shape[0]))
  144. return self._get_row_ranges(row, slice(col, col+1))
  145. def _get_arrayXint(self, row, col):
  146. res = self._get_row_ranges(row.ravel(), slice(col, col+1))
  147. if row.ndim > 1:
  148. return res.reshape(row.shape)
  149. return res
  150. def _get_intXslice(self, row, col):
  151. return self._get_row_ranges((row,), col)
  152. def _get_sliceXslice(self, row, col):
  153. row = range(*row.indices(self.shape[0]))
  154. return self._get_row_ranges(row, col)
  155. def _get_arrayXslice(self, row, col):
  156. return self._get_row_ranges(row, col)
  157. def _get_intXarray(self, row, col):
  158. row = np.array(row, dtype=col.dtype, ndmin=1)
  159. return self._get_columnXarray(row, col)
  160. def _get_sliceXarray(self, row, col):
  161. row = np.arange(*row.indices(self.shape[0]))
  162. return self._get_columnXarray(row, col)
  163. def _get_columnXarray(self, row, col):
  164. # outer indexing
  165. row, col = _broadcast_arrays(row[:,None], col)
  166. return self._get_arrayXarray(row, col)
  167. def _get_arrayXarray(self, row, col):
  168. # inner indexing
  169. i, j = map(np.atleast_2d, _prepare_index_for_memoryview(row, col))
  170. new = self._lil_container(i.shape, dtype=self.dtype)
  171. _csparsetools.lil_fancy_get(self.shape[0], self.shape[1],
  172. self.rows, self.data,
  173. new.rows, new.data,
  174. i, j)
  175. return new
  176. def _get_row_ranges(self, rows, col_slice):
  177. """
  178. Fast path for indexing in the case where column index is slice.
  179. This gains performance improvement over brute force by more
  180. efficient skipping of zeros, by accessing the elements
  181. column-wise in order.
  182. Parameters
  183. ----------
  184. rows : sequence or range
  185. Rows indexed. If range, must be within valid bounds.
  186. col_slice : slice
  187. Columns indexed
  188. """
  189. j_start, j_stop, j_stride = col_slice.indices(self.shape[1])
  190. col_range = range(j_start, j_stop, j_stride)
  191. nj = len(col_range)
  192. new = self._lil_container((len(rows), nj), dtype=self.dtype)
  193. _csparsetools.lil_get_row_ranges(self.shape[0], self.shape[1],
  194. self.rows, self.data,
  195. new.rows, new.data,
  196. rows,
  197. j_start, j_stop, j_stride, nj)
  198. return new
  199. def _set_intXint(self, row, col, x):
  200. _csparsetools.lil_insert(self.shape[0], self.shape[1], self.rows,
  201. self.data, row, col, x)
  202. def _set_arrayXarray(self, row, col, x):
  203. i, j, x = map(np.atleast_2d, _prepare_index_for_memoryview(row, col, x))
  204. _csparsetools.lil_fancy_set(self.shape[0], self.shape[1],
  205. self.rows, self.data,
  206. i, j, x)
  207. def _set_arrayXarray_sparse(self, row, col, x):
  208. # Fall back to densifying x
  209. x = np.asarray(x.toarray(), dtype=self.dtype)
  210. x, _ = _broadcast_arrays(x, row)
  211. self._set_arrayXarray(row, col, x)
  212. def __setitem__(self, key, x):
  213. if isinstance(key, tuple) and len(key) == 2:
  214. row, col = key
  215. # Fast path for simple (int, int) indexing.
  216. if isinstance(row, INT_TYPES) and isinstance(col, INT_TYPES):
  217. if issparse(x):
  218. x = x.toarray()
  219. if isinstance(x, np.ndarray):
  220. x = x.item()
  221. x = self.dtype.type(x)
  222. if x.size > 1:
  223. raise ValueError("Trying to assign a sequence to an item")
  224. return self._set_intXint(row, col, x)
  225. # Fast path for full-matrix sparse assignment.
  226. if (isinstance(row, slice) and isinstance(col, slice) and
  227. row == slice(None) and col == slice(None) and
  228. issparse(x) and x.shape == self.shape):
  229. x = self._lil_container(x, dtype=self.dtype)
  230. self.rows = x.rows
  231. self.data = x.data
  232. return
  233. # Everything else takes the normal path.
  234. IndexMixin.__setitem__(self, key, x)
  235. def _mul_scalar(self, other):
  236. if other == 0:
  237. # Multiply by zero: return the zero matrix
  238. new = self._lil_container(self.shape, dtype=self.dtype)
  239. else:
  240. res_dtype = upcast_scalar(self.dtype, other)
  241. new = self.astype(res_dtype) # sure to make a copy
  242. # Multiply this scalar by every element.
  243. for j, rowvals in enumerate(new.data):
  244. new.data[j] = [val*other for val in rowvals]
  245. return new
  246. def __truediv__(self, other): # self / other
  247. if isscalarlike(other):
  248. new = self.copy()
  249. new.dtype = np.result_type(self, other)
  250. # Divide every element by this scalar
  251. for j, rowvals in enumerate(new.data):
  252. new.data[j] = [val/other for val in rowvals]
  253. return new
  254. else:
  255. return self.tocsr() / other
  256. def copy(self):
  257. M, N = self.shape
  258. new = self._lil_container(self.shape, dtype=self.dtype)
  259. # This is ~14x faster than calling deepcopy() on rows and data.
  260. _csparsetools.lil_get_row_ranges(M, N, self.rows, self.data,
  261. new.rows, new.data, range(M),
  262. 0, N, 1, N)
  263. return new
  264. copy.__doc__ = _spbase.copy.__doc__
  265. def reshape(self, *args, **kwargs):
  266. shape = check_shape(args, self.shape)
  267. order, copy = check_reshape_kwargs(kwargs)
  268. # Return early if reshape is not required
  269. if shape == self.shape:
  270. if copy:
  271. return self.copy()
  272. else:
  273. return self
  274. new = self._lil_container(shape, dtype=self.dtype)
  275. if order == 'C':
  276. ncols = self.shape[1]
  277. for i, row in enumerate(self.rows):
  278. for col, j in enumerate(row):
  279. new_r, new_c = np.unravel_index(i * ncols + j, shape)
  280. new[new_r, new_c] = self[i, j]
  281. elif order == 'F':
  282. nrows = self.shape[0]
  283. for i, row in enumerate(self.rows):
  284. for col, j in enumerate(row):
  285. new_r, new_c = np.unravel_index(i + j * nrows, shape, order)
  286. new[new_r, new_c] = self[i, j]
  287. else:
  288. raise ValueError("'order' must be 'C' or 'F'")
  289. return new
  290. reshape.__doc__ = _spbase.reshape.__doc__
  291. def resize(self, *shape):
  292. shape = check_shape(shape)
  293. new_M, new_N = shape
  294. M, N = self.shape
  295. if new_M < M:
  296. self.rows = self.rows[:new_M]
  297. self.data = self.data[:new_M]
  298. elif new_M > M:
  299. self.rows = np.resize(self.rows, new_M)
  300. self.data = np.resize(self.data, new_M)
  301. for i in range(M, new_M):
  302. self.rows[i] = []
  303. self.data[i] = []
  304. if new_N < N:
  305. for row, data in zip(self.rows, self.data):
  306. trunc = bisect_left(row, new_N)
  307. del row[trunc:]
  308. del data[trunc:]
  309. self._shape = shape
  310. resize.__doc__ = _spbase.resize.__doc__
  311. def toarray(self, order=None, out=None):
  312. d = self._process_toarray_args(order, out)
  313. for i, row in enumerate(self.rows):
  314. for pos, j in enumerate(row):
  315. d[i, j] = self.data[i][pos]
  316. return d
  317. toarray.__doc__ = _spbase.toarray.__doc__
  318. def transpose(self, axes=None, copy=False):
  319. return self.tocsr(copy=copy).transpose(axes=axes, copy=False).tolil(copy=False)
  320. transpose.__doc__ = _spbase.transpose.__doc__
  321. def tolil(self, copy=False):
  322. if copy:
  323. return self.copy()
  324. else:
  325. return self
  326. tolil.__doc__ = _spbase.tolil.__doc__
  327. def tocsr(self, copy=False):
  328. M, N = self.shape
  329. if M == 0 or N == 0:
  330. return self._csr_container((M, N), dtype=self.dtype)
  331. # construct indptr array
  332. if M*N <= np.iinfo(np.int32).max:
  333. # fast path: it is known that 64-bit indexing will not be needed.
  334. idx_dtype = np.int32
  335. indptr = np.empty(M + 1, dtype=idx_dtype)
  336. indptr[0] = 0
  337. _csparsetools.lil_get_lengths(self.rows, indptr[1:])
  338. np.cumsum(indptr, out=indptr)
  339. nnz = indptr[-1]
  340. else:
  341. idx_dtype = self._get_index_dtype(maxval=N)
  342. lengths = np.empty(M, dtype=idx_dtype)
  343. _csparsetools.lil_get_lengths(self.rows, lengths)
  344. nnz = lengths.sum(dtype=np.int64)
  345. idx_dtype = self._get_index_dtype(maxval=max(N, nnz))
  346. indptr = np.empty(M + 1, dtype=idx_dtype)
  347. indptr[0] = 0
  348. np.cumsum(lengths, dtype=idx_dtype, out=indptr[1:])
  349. indices = np.empty(nnz, dtype=idx_dtype)
  350. data = np.empty(nnz, dtype=self.dtype)
  351. _csparsetools.lil_flatten_to_array(self.rows, indices)
  352. _csparsetools.lil_flatten_to_array(self.data, data)
  353. # init csr matrix
  354. return self._csr_container((data, indices, indptr), shape=self.shape)
  355. tocsr.__doc__ = _spbase.tocsr.__doc__
  356. def _prepare_index_for_memoryview(i, j, x=None):
  357. """
  358. Convert index and data arrays to form suitable for passing to the
  359. Cython fancy getset routines.
  360. The conversions are necessary since to (i) ensure the integer
  361. index arrays are in one of the accepted types, and (ii) to ensure
  362. the arrays are writable so that Cython memoryview support doesn't
  363. choke on them.
  364. Parameters
  365. ----------
  366. i, j
  367. Index arrays
  368. x : optional
  369. Data arrays
  370. Returns
  371. -------
  372. i, j, x
  373. Re-formatted arrays (x is omitted, if input was None)
  374. """
  375. if i.dtype > j.dtype:
  376. j = j.astype(i.dtype)
  377. elif i.dtype < j.dtype:
  378. i = i.astype(j.dtype)
  379. if not i.flags.writeable or i.dtype not in (np.int32, np.int64):
  380. i = i.astype(np.intp)
  381. if not j.flags.writeable or j.dtype not in (np.int32, np.int64):
  382. j = j.astype(np.intp)
  383. if x is not None:
  384. if not x.flags.writeable:
  385. x = x.copy()
  386. return i, j, x
  387. else:
  388. return i, j
  389. def isspmatrix_lil(x):
  390. """Is `x` of lil_matrix type?
  391. Parameters
  392. ----------
  393. x
  394. object to check for being a lil matrix
  395. Returns
  396. -------
  397. bool
  398. True if `x` is a lil matrix, False otherwise
  399. Examples
  400. --------
  401. >>> from scipy.sparse import lil_array, lil_matrix, coo_matrix, isspmatrix_lil
  402. >>> isspmatrix_lil(lil_matrix([[5]]))
  403. True
  404. >>> isspmatrix_lil(lil_array([[5]]))
  405. False
  406. >>> isspmatrix_lil(coo_matrix([[5]]))
  407. False
  408. """
  409. return isinstance(x, lil_matrix)
  410. # This namespace class separates array from matrix with isinstance
  411. class lil_array(_lil_base, sparray):
  412. """
  413. Row-based LIst of Lists sparse array.
  414. This is a structure for constructing sparse arrays incrementally.
  415. Note that inserting a single item can take linear time in the worst case;
  416. to construct the array efficiently, make sure the items are pre-sorted by
  417. index, per row.
  418. This can be instantiated in several ways:
  419. lil_array(D)
  420. where D is a 2-D ndarray
  421. lil_array(S)
  422. with another sparse array or matrix S (equivalent to S.tolil())
  423. lil_array((M, N), [dtype])
  424. to construct an empty array with shape (M, N)
  425. dtype is optional, defaulting to dtype='d'.
  426. Attributes
  427. ----------
  428. dtype : dtype
  429. Data type of the array
  430. shape : 2-tuple
  431. Shape of the array
  432. ndim : int
  433. Number of dimensions (this is always 2)
  434. nnz
  435. size
  436. data
  437. LIL format data array of the array
  438. rows
  439. LIL format row index array of the array
  440. T
  441. Notes
  442. -----
  443. Sparse arrays can be used in arithmetic operations: they support
  444. addition, subtraction, multiplication, division, and matrix power.
  445. Advantages of the LIL format
  446. - supports flexible slicing
  447. - changes to the array sparsity structure are efficient
  448. Disadvantages of the LIL format
  449. - arithmetic operations LIL + LIL are slow (consider CSR or CSC)
  450. - slow column slicing (consider CSC)
  451. - slow matrix vector products (consider CSR or CSC)
  452. Intended Usage
  453. - LIL is a convenient format for constructing sparse arrays
  454. - once an array has been constructed, convert to CSR or
  455. CSC format for fast arithmetic and matrix vector operations
  456. - consider using the COO format when constructing large arrays
  457. Data Structure
  458. - An array (``self.rows``) of rows, each of which is a sorted
  459. list of column indices of non-zero elements.
  460. - The corresponding nonzero values are stored in similar
  461. fashion in ``self.data``.
  462. """
  463. class lil_matrix(spmatrix, _lil_base):
  464. """
  465. Row-based LIst of Lists sparse matrix.
  466. This is a structure for constructing sparse matrices incrementally.
  467. Note that inserting a single item can take linear time in the worst case;
  468. to construct the matrix efficiently, make sure the items are pre-sorted by
  469. index, per row.
  470. This can be instantiated in several ways:
  471. lil_matrix(D)
  472. where D is a 2-D ndarray
  473. lil_matrix(S)
  474. with another sparse array or matrix S (equivalent to S.tolil())
  475. lil_matrix((M, N), [dtype])
  476. to construct an empty matrix with shape (M, N)
  477. dtype is optional, defaulting to dtype='d'.
  478. Attributes
  479. ----------
  480. dtype : dtype
  481. Data type of the matrix
  482. shape : 2-tuple
  483. Shape of the matrix
  484. ndim : int
  485. Number of dimensions (this is always 2)
  486. nnz
  487. size
  488. data
  489. LIL format data array of the matrix
  490. rows
  491. LIL format row index array of the matrix
  492. T
  493. Notes
  494. -----
  495. Sparse matrices can be used in arithmetic operations: they support
  496. addition, subtraction, multiplication, division, and matrix power.
  497. Advantages of the LIL format
  498. - supports flexible slicing
  499. - changes to the matrix sparsity structure are efficient
  500. Disadvantages of the LIL format
  501. - arithmetic operations LIL + LIL are slow (consider CSR or CSC)
  502. - slow column slicing (consider CSC)
  503. - slow matrix vector products (consider CSR or CSC)
  504. Intended Usage
  505. - LIL is a convenient format for constructing sparse matrices
  506. - once a matrix has been constructed, convert to CSR or
  507. CSC format for fast arithmetic and matrix vector operations
  508. - consider using the COO format when constructing large matrices
  509. Data Structure
  510. - An array (``self.rows``) of rows, each of which is a sorted
  511. list of column indices of non-zero elements.
  512. - The corresponding nonzero values are stored in similar
  513. fashion in ``self.data``.
  514. """