test_sparsetools.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340
  1. import sys
  2. import os
  3. import gc
  4. import threading
  5. import numpy as np
  6. from numpy.testing import assert_equal, assert_, assert_allclose
  7. from scipy.sparse import (_sparsetools, coo_matrix, csr_matrix, csc_matrix,
  8. bsr_matrix, dia_matrix)
  9. from scipy.sparse._sputils import supported_dtypes
  10. from scipy._lib._testutils import check_free_memory
  11. import pytest
  12. from pytest import raises as assert_raises
  13. def int_to_int8(n):
  14. """
  15. Wrap an integer to the interval [-128, 127].
  16. """
  17. return (n + 128) % 256 - 128
  18. def test_exception():
  19. assert_raises(MemoryError, _sparsetools.test_throw_error)
  20. def test_threads():
  21. # Smoke test for parallel threaded execution; doesn't actually
  22. # check that code runs in parallel, but just that it produces
  23. # expected results.
  24. nthreads = 10
  25. niter = 100
  26. n = 20
  27. a = csr_matrix(np.ones([n, n]))
  28. bres = []
  29. class Worker(threading.Thread):
  30. def run(self):
  31. b = a.copy()
  32. for j in range(niter):
  33. _sparsetools.csr_plus_csr(n, n,
  34. a.indptr, a.indices, a.data,
  35. a.indptr, a.indices, a.data,
  36. b.indptr, b.indices, b.data)
  37. bres.append(b)
  38. threads = [Worker() for _ in range(nthreads)]
  39. for thread in threads:
  40. thread.start()
  41. for thread in threads:
  42. thread.join()
  43. for b in bres:
  44. assert_(np.all(b.toarray() == 2))
  45. def test_regression_std_vector_dtypes():
  46. # Regression test for gh-3780, checking the std::vector typemaps
  47. # in sparsetools.cxx are complete.
  48. for dtype in supported_dtypes:
  49. ad = np.array([[1, 2], [3, 4]]).astype(dtype)
  50. a = csr_matrix(ad, dtype=dtype)
  51. # getcol is one function using std::vector typemaps, and should not fail
  52. assert_equal(a.getcol(0).toarray(), ad[:, :1])
  53. @pytest.mark.slow
  54. @pytest.mark.xfail_on_32bit("Can't create large array for test")
  55. def test_nnz_overflow():
  56. # Regression test for gh-7230 / gh-7871, checking that coo_toarray
  57. # with nnz > int32max doesn't overflow.
  58. nnz = np.iinfo(np.int32).max + 1
  59. # Ensure ~20 GB of RAM is free to run this test.
  60. check_free_memory((4 + 4 + 1) * nnz / 1e6 + 0.5)
  61. # Use nnz duplicate entries to keep the dense version small.
  62. row = np.zeros(nnz, dtype=np.int32)
  63. col = np.zeros(nnz, dtype=np.int32)
  64. data = np.zeros(nnz, dtype=np.int8)
  65. data[-1] = 4
  66. s = coo_matrix((data, (row, col)), shape=(1, 1), copy=False)
  67. # Sums nnz duplicates to produce a 1x1 array containing 4.
  68. d = s.toarray()
  69. assert_allclose(d, [[4]])
  70. @pytest.mark.skipif(
  71. not (sys.platform.startswith('linux') and np.dtype(np.intp).itemsize >= 8),
  72. reason="test requires 64-bit Linux"
  73. )
  74. class TestInt32Overflow:
  75. """
  76. Some of the sparsetools routines use dense 2D matrices whose
  77. total size is not bounded by the nnz of the sparse matrix. These
  78. routines used to suffer from int32 wraparounds; here, we try to
  79. check that the wraparounds don't occur any more.
  80. """
  81. # choose n large enough
  82. n = 50000
  83. def setup_method(self):
  84. assert self.n**2 > np.iinfo(np.int32).max
  85. # check there's enough memory even if everything is run at the
  86. # same time
  87. try:
  88. parallel_count = int(os.environ.get('PYTEST_XDIST_WORKER_COUNT', '1'))
  89. except ValueError:
  90. parallel_count = np.inf
  91. check_free_memory(3000 * parallel_count)
  92. def teardown_method(self):
  93. gc.collect()
  94. @pytest.mark.fail_slow(2) # keep in fast set, only non-slow test
  95. def test_coo_todense(self):
  96. # Check *_todense routines (cf. gh-2179)
  97. #
  98. # All of them in the end call coo_matrix.todense
  99. n = self.n
  100. i = np.array([0, n-1])
  101. j = np.array([0, n-1])
  102. data = np.array([1, 2], dtype=np.int8)
  103. m = coo_matrix((data, (i, j)))
  104. r = m.todense()
  105. assert_equal(r[0,0], 1)
  106. assert_equal(r[-1,-1], 2)
  107. del r
  108. gc.collect()
  109. @pytest.mark.slow
  110. def test_matvecs(self):
  111. # Check *_matvecs routines
  112. n = self.n
  113. i = np.array([0, n-1])
  114. j = np.array([0, n-1])
  115. data = np.array([1, 2], dtype=np.int8)
  116. m = coo_matrix((data, (i, j)))
  117. b = np.ones((n, n), dtype=np.int8)
  118. for sptype in (csr_matrix, csc_matrix, bsr_matrix):
  119. m2 = sptype(m)
  120. r = m2.dot(b)
  121. assert_equal(r[0,0], 1)
  122. assert_equal(r[-1,-1], 2)
  123. del r
  124. gc.collect()
  125. del b
  126. gc.collect()
  127. @pytest.mark.slow
  128. def test_dia_matvec(self):
  129. # Check: huge dia_matrix _matvec
  130. n = self.n
  131. data = np.ones((n, n), dtype=np.int8)
  132. offsets = np.arange(n)
  133. m = dia_matrix((data, offsets), shape=(n, n))
  134. v = np.ones(m.shape[1], dtype=np.int8)
  135. r = m.dot(v)
  136. assert_equal(r[0], int_to_int8(n))
  137. del data, offsets, m, v, r
  138. gc.collect()
  139. _bsr_ops = [pytest.param("matmat", marks=pytest.mark.xslow),
  140. pytest.param("matvecs", marks=pytest.mark.xslow),
  141. "matvec",
  142. "diagonal",
  143. "sort_indices",
  144. pytest.param("transpose", marks=pytest.mark.xslow)]
  145. @pytest.mark.slow
  146. @pytest.mark.parametrize("op", _bsr_ops)
  147. def test_bsr_1_block(self, op):
  148. # Check: huge bsr_matrix (1-block)
  149. #
  150. # The point here is that indices inside a block may overflow.
  151. def get_matrix():
  152. n = self.n
  153. data = np.ones((1, n, n), dtype=np.int8)
  154. indptr = np.array([0, 1], dtype=np.int32)
  155. indices = np.array([0], dtype=np.int32)
  156. m = bsr_matrix((data, indices, indptr), blocksize=(n, n), copy=False)
  157. del data, indptr, indices
  158. return m
  159. gc.collect()
  160. try:
  161. getattr(self, "_check_bsr_" + op)(get_matrix)
  162. finally:
  163. gc.collect()
  164. @pytest.mark.slow
  165. @pytest.mark.parametrize("op", _bsr_ops)
  166. def test_bsr_n_block(self, op):
  167. # Check: huge bsr_matrix (n-block)
  168. #
  169. # The point here is that while indices within a block don't
  170. # overflow, accumulators across many block may.
  171. def get_matrix():
  172. n = self.n
  173. data = np.ones((n, n, 1), dtype=np.int8)
  174. indptr = np.array([0, n], dtype=np.int32)
  175. indices = np.arange(n, dtype=np.int32)
  176. m = bsr_matrix((data, indices, indptr), blocksize=(n, 1), copy=False)
  177. del data, indptr, indices
  178. return m
  179. gc.collect()
  180. try:
  181. getattr(self, "_check_bsr_" + op)(get_matrix)
  182. finally:
  183. gc.collect()
  184. def _check_bsr_matvecs(self, m): # skip name check
  185. m = m()
  186. n = self.n
  187. # _matvecs
  188. r = m.dot(np.ones((n, 2), dtype=np.int8))
  189. assert_equal(r[0, 0], int_to_int8(n))
  190. def _check_bsr_matvec(self, m): # skip name check
  191. m = m()
  192. n = self.n
  193. # _matvec
  194. r = m.dot(np.ones((n,), dtype=np.int8))
  195. assert_equal(r[0], int_to_int8(n))
  196. def _check_bsr_diagonal(self, m): # skip name check
  197. m = m()
  198. n = self.n
  199. # _diagonal
  200. r = m.diagonal()
  201. assert_equal(r, np.ones(n))
  202. def _check_bsr_sort_indices(self, m): # skip name check
  203. # _sort_indices
  204. m = m()
  205. m.sort_indices()
  206. def _check_bsr_transpose(self, m): # skip name check
  207. # _transpose
  208. m = m()
  209. m.transpose()
  210. def _check_bsr_matmat(self, m): # skip name check
  211. m = m()
  212. n = self.n
  213. # _bsr_matmat
  214. m2 = bsr_matrix(np.ones((n, 2), dtype=np.int8), blocksize=(m.blocksize[1], 2))
  215. m.dot(m2) # shouldn't SIGSEGV
  216. del m2
  217. # _bsr_matmat
  218. m2 = bsr_matrix(np.ones((2, n), dtype=np.int8), blocksize=(2, m.blocksize[0]))
  219. m2.dot(m) # shouldn't SIGSEGV
  220. @pytest.mark.skip(reason="64-bit indices in sparse matrices not available")
  221. def test_csr_matmat_int64_overflow():
  222. n = 3037000500
  223. assert n**2 > np.iinfo(np.int64).max
  224. # the test would take crazy amounts of memory
  225. check_free_memory(n * (8*2 + 1) * 3 / 1e6)
  226. # int64 overflow
  227. data = np.ones((n,), dtype=np.int8)
  228. indptr = np.arange(n+1, dtype=np.int64)
  229. indices = np.zeros(n, dtype=np.int64)
  230. a = csr_matrix((data, indices, indptr))
  231. b = a.T
  232. assert_raises(RuntimeError, a.dot, b)
  233. def test_upcast():
  234. a0 = csr_matrix([[np.pi, np.pi*1j], [3, 4]], dtype=complex)
  235. b0 = np.array([256+1j, 2**32], dtype=complex)
  236. for a_dtype in supported_dtypes:
  237. for b_dtype in supported_dtypes:
  238. msg = f"({a_dtype!r}, {b_dtype!r})"
  239. if np.issubdtype(a_dtype, np.complexfloating):
  240. a = a0.copy().astype(a_dtype)
  241. else:
  242. a = a0.real.copy().astype(a_dtype)
  243. if np.issubdtype(b_dtype, np.complexfloating):
  244. b = b0.copy().astype(b_dtype)
  245. else:
  246. with np.errstate(invalid="ignore"):
  247. # Casting a large value (2**32) to int8 causes a warning in
  248. # numpy >1.23
  249. b = b0.real.copy().astype(b_dtype)
  250. if not (a_dtype == np.bool_ and b_dtype == np.bool_):
  251. c = np.zeros((2,), dtype=np.bool_)
  252. assert_raises(ValueError, _sparsetools.csr_matvec,
  253. 2, 2, a.indptr, a.indices, a.data, b, c)
  254. if ((np.issubdtype(a_dtype, np.complexfloating) and
  255. not np.issubdtype(b_dtype, np.complexfloating)) or
  256. (not np.issubdtype(a_dtype, np.complexfloating) and
  257. np.issubdtype(b_dtype, np.complexfloating))):
  258. c = np.zeros((2,), dtype=np.float64)
  259. assert_raises(ValueError, _sparsetools.csr_matvec,
  260. 2, 2, a.indptr, a.indices, a.data, b, c)
  261. c = np.zeros((2,), dtype=np.result_type(a_dtype, b_dtype))
  262. _sparsetools.csr_matvec(2, 2, a.indptr, a.indices, a.data, b, c)
  263. assert_allclose(c, np.dot(a.toarray(), b), err_msg=msg)
  264. def test_endianness():
  265. d = np.ones((3,4))
  266. offsets = [-1,0,1]
  267. a = dia_matrix((d.astype('<f8'), offsets), (4, 4))
  268. b = dia_matrix((d.astype('>f8'), offsets), (4, 4))
  269. v = np.arange(4)
  270. assert_allclose(a.dot(v), [1, 3, 6, 5])
  271. assert_allclose(b.dot(v), [1, 3, 6, 5])