test_csr.py 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227
  1. import numpy as np
  2. from numpy.testing import assert_array_almost_equal, assert_, assert_array_equal
  3. from scipy.sparse import csr_matrix, csc_matrix, csr_array, csc_array, hstack
  4. from scipy import sparse
  5. import pytest
  6. def _check_csr_rowslice(i, sl, X, Xcsr):
  7. np_slice = X[i, sl]
  8. csr_slice = Xcsr[i, sl]
  9. assert_array_almost_equal(np_slice, csr_slice.toarray()[0])
  10. assert_(type(csr_slice) is csr_matrix)
  11. def test_csr_rowslice():
  12. N = 10
  13. np.random.seed(0)
  14. X = np.random.random((N, N))
  15. X[X > 0.7] = 0
  16. Xcsr = csr_matrix(X)
  17. slices = [slice(None, None, None),
  18. slice(None, None, -1),
  19. slice(1, -2, 2),
  20. slice(-2, 1, -2)]
  21. for i in range(N):
  22. for sl in slices:
  23. _check_csr_rowslice(i, sl, X, Xcsr)
  24. def test_csr_getrow():
  25. N = 10
  26. np.random.seed(0)
  27. X = np.random.random((N, N))
  28. X[X > 0.7] = 0
  29. Xcsr = csr_matrix(X)
  30. for i in range(N):
  31. arr_row = X[i:i + 1, :]
  32. csr_row = Xcsr.getrow(i)
  33. assert_array_almost_equal(arr_row, csr_row.toarray())
  34. assert_(type(csr_row) is csr_matrix)
  35. def test_csr_getcol():
  36. N = 10
  37. np.random.seed(0)
  38. X = np.random.random((N, N))
  39. X[X > 0.7] = 0
  40. Xcsr = csr_matrix(X)
  41. for i in range(N):
  42. arr_col = X[:, i:i + 1]
  43. csr_col = Xcsr.getcol(i)
  44. assert_array_almost_equal(arr_col, csr_col.toarray())
  45. assert_(type(csr_col) is csr_matrix)
  46. @pytest.mark.parametrize("matrix_input, axis, expected_shape",
  47. [(csr_matrix([[1, 0, 0, 0],
  48. [0, 0, 0, 0],
  49. [0, 2, 3, 0]]),
  50. 0, (0, 4)),
  51. (csr_matrix([[1, 0, 0, 0],
  52. [0, 0, 0, 0],
  53. [0, 2, 3, 0]]),
  54. 1, (3, 0)),
  55. (csr_matrix([[1, 0, 0, 0],
  56. [0, 0, 0, 0],
  57. [0, 2, 3, 0]]),
  58. 'both', (0, 0)),
  59. (csr_matrix([[0, 1, 0, 0, 0],
  60. [0, 0, 0, 0, 0],
  61. [0, 0, 2, 3, 0]]),
  62. 0, (0, 5))])
  63. def test_csr_empty_slices(matrix_input, axis, expected_shape):
  64. # see gh-11127 for related discussion
  65. slice_1 = matrix_input.toarray().shape[0] - 1
  66. slice_2 = slice_1
  67. slice_3 = slice_2 - 1
  68. if axis == 0:
  69. actual_shape_1 = matrix_input[slice_1:slice_2, :].toarray().shape
  70. actual_shape_2 = matrix_input[slice_1:slice_3, :].toarray().shape
  71. elif axis == 1:
  72. actual_shape_1 = matrix_input[:, slice_1:slice_2].toarray().shape
  73. actual_shape_2 = matrix_input[:, slice_1:slice_3].toarray().shape
  74. elif axis == 'both':
  75. actual_shape_1 = matrix_input[slice_1:slice_2, slice_1:slice_2].toarray().shape
  76. actual_shape_2 = matrix_input[slice_1:slice_3, slice_1:slice_3].toarray().shape
  77. assert actual_shape_1 == expected_shape
  78. assert actual_shape_1 == actual_shape_2
  79. def test_csr_bool_indexing():
  80. data = csr_matrix([[0, 1, 2], [3, 4, 5], [6, 7, 8]])
  81. list_indices1 = [False, True, False]
  82. array_indices1 = np.array(list_indices1)
  83. list_indices2 = [[False, True, False], [False, True, False], [False, True, False]]
  84. array_indices2 = np.array(list_indices2)
  85. list_indices3 = ([False, True, False], [False, True, False])
  86. array_indices3 = (np.array(list_indices3[0]), np.array(list_indices3[1]))
  87. slice_list1 = data[list_indices1].toarray()
  88. slice_array1 = data[array_indices1].toarray()
  89. slice_list2 = data[list_indices2]
  90. slice_array2 = data[array_indices2]
  91. slice_list3 = data[list_indices3]
  92. slice_array3 = data[array_indices3]
  93. assert (slice_list1 == slice_array1).all()
  94. assert (slice_list2 == slice_array2).all()
  95. assert (slice_list3 == slice_array3).all()
  96. @pytest.mark.xfail_on_32bit("Can't create large array for test")
  97. @pytest.mark.timeout(2) # only slow when broken (conversion to 2d index arrays)
  98. @pytest.mark.parametrize("cls", [csr_matrix, csr_array, csc_matrix, csc_array])
  99. def test_fancy_indexing_broadcasts_without_making_dense_2d(cls):
  100. # Fixes Issue gh-24339
  101. J = np.arange(100_000)
  102. I = J.reshape((100_000, 1))
  103. S = cls((100_000, 100_000))
  104. # checking nnz, but really testing indexing.
  105. assert S[I, J].nnz == 0 # 1D row array for columns -> broadcasts to 2D
  106. assert S[I, J.reshape(1, -1)].nnz == 0 # 2D row array as index for columns
  107. def test_csr_hstack_int64():
  108. """
  109. Tests if hstack properly promotes to indices and indptr arrays to np.int64
  110. when using np.int32 during concatenation would result in either array
  111. overflowing.
  112. """
  113. max_int32 = np.iinfo(np.int32).max
  114. # First case: indices would overflow with int32
  115. data = [1.0]
  116. row = [0]
  117. max_indices_1 = max_int32 - 1
  118. max_indices_2 = 3
  119. # Individual indices arrays are representable with int32
  120. col_1 = [max_indices_1 - 1]
  121. col_2 = [max_indices_2 - 1]
  122. X_1 = csr_matrix((data, (row, col_1)))
  123. X_2 = csr_matrix((data, (row, col_2)))
  124. assert max(max_indices_1 - 1, max_indices_2 - 1) < max_int32
  125. assert X_1.indices.dtype == X_1.indptr.dtype == np.int32
  126. assert X_2.indices.dtype == X_2.indptr.dtype == np.int32
  127. # ... but when concatenating their CSR matrices, the resulting indices
  128. # array can't be represented with int32 and must be promoted to int64.
  129. X_hs = hstack([X_1, X_2], format="csr")
  130. assert X_hs.indices.max() == max_indices_1 + max_indices_2 - 1
  131. assert max_indices_1 + max_indices_2 - 1 > max_int32
  132. assert X_hs.indices.dtype == X_hs.indptr.dtype == np.int64
  133. # Even if the matrices are empty, we must account for their size
  134. # contribution so that we may safely set the final elements.
  135. X_1_empty = csr_matrix(X_1.shape)
  136. X_2_empty = csr_matrix(X_2.shape)
  137. X_hs_empty = hstack([X_1_empty, X_2_empty], format="csr")
  138. assert X_hs_empty.shape == X_hs.shape
  139. assert X_hs_empty.indices.dtype == np.int64
  140. # Should be just small enough to stay in int32 after stack. Note that
  141. # we theoretically could support indices.max() == max_int32, but due to an
  142. # edge-case in the underlying sparsetools code
  143. # (namely the `coo_tocsr` routine),
  144. # we require that max(X_hs_32.shape) < max_int32 as well.
  145. # Hence we can only support max_int32 - 1.
  146. col_3 = [max_int32 - max_indices_1 - 1]
  147. X_3 = csr_matrix((data, (row, col_3)))
  148. X_hs_32 = hstack([X_1, X_3], format="csr")
  149. assert X_hs_32.indices.dtype == np.int32
  150. assert X_hs_32.indices.max() == max_int32 - 1
  151. @pytest.mark.parametrize("cls", [csr_matrix, csr_array, csc_matrix, csc_array])
  152. def test_mixed_index_dtype_int_indexing(cls):
  153. # https://github.com/scipy/scipy/issues/20182
  154. rng = np.random.default_rng(0)
  155. base_mtx = cls(sparse.random(50, 50, random_state=rng, density=0.1))
  156. indptr_64bit = base_mtx.copy()
  157. indices_64bit = base_mtx.copy()
  158. indptr_64bit.indptr = base_mtx.indptr.astype(np.int64)
  159. indices_64bit.indices = base_mtx.indices.astype(np.int64)
  160. for mtx in [base_mtx, indptr_64bit, indices_64bit]:
  161. np.testing.assert_array_equal(
  162. mtx[[1,2], :].toarray(),
  163. base_mtx[[1, 2], :].toarray()
  164. )
  165. np.testing.assert_array_equal(
  166. mtx[:, [1, 2]].toarray(),
  167. base_mtx[:, [1, 2]].toarray()
  168. )
  169. def test_broadcast_to():
  170. a = np.array([1, 0, 2])
  171. b = np.array([3])
  172. e = np.zeros((0,))
  173. res_a = csr_array(a)._broadcast_to((2,3))
  174. res_b = csr_array(b)._broadcast_to((4,))
  175. res_c = csr_array(b)._broadcast_to((2,4))
  176. res_d = csr_array(b)._broadcast_to((1,))
  177. res_e = csr_array(e)._broadcast_to((4,0))
  178. assert_array_equal(res_a.toarray(), np.broadcast_to(a, (2,3)))
  179. assert_array_equal(res_b.toarray(), np.broadcast_to(b, (4,)))
  180. assert_array_equal(res_c.toarray(), np.broadcast_to(b, (2,4)))
  181. assert_array_equal(res_d.toarray(), np.broadcast_to(b, (1,)))
  182. assert_array_equal(res_e.toarray(), np.broadcast_to(e, (4,0)))
  183. with pytest.raises(ValueError, match="cannot be broadcast"):
  184. csr_matrix([[1, 2, 0], [3, 0, 1]])._broadcast_to(shape=(2, 1))
  185. with pytest.raises(ValueError, match="cannot be broadcast"):
  186. csr_matrix([[0, 1, 2]])._broadcast_to(shape=(3, 2))
  187. with pytest.raises(ValueError, match="cannot be broadcast"):
  188. csr_array([0, 1, 2])._broadcast_to(shape=(3, 2))