test_64bit.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303
  1. """ Test functions involving 64bit or 32bit indexing """
  2. import pytest
  3. import numpy as np
  4. from scipy.sparse import (
  5. bsr_array, coo_array, csc_array, csr_array, dia_array,
  6. bsr_matrix, coo_matrix, csc_matrix, csr_matrix, dia_matrix,
  7. )
  8. # rename to avoid pytest collecting them in this module
  9. from .test_base import (
  10. TestBSR as _TestBSR,
  11. TestCOO as _TestCOO,
  12. TestCSC as _TestCSC,
  13. TestCSR as _TestCSR,
  14. TestDIA as _TestDIA,
  15. TestDOK as _TestDOK,
  16. TestLIL as _TestLIL,
  17. TestBSRMatrix as _TestBSRMatrix,
  18. TestCOOMatrix as _TestCOOMatrix,
  19. TestCSCMatrix as _TestCSCMatrix,
  20. TestCSRMatrix as _TestCSRMatrix,
  21. TestDIAMatrix as _TestDIAMatrix,
  22. TestDOKMatrix as _TestDOKMatrix,
  23. TestLILMatrix as _TestLILMatrix,
  24. with_64bit_maxval_limit,
  25. )
  26. # name : reason not tested here
  27. SKIP_TESTS = {
  28. 'test_expm': 'expm for 64-bit indices not available',
  29. 'test_inv': 'linsolve for 64-bit indices not available',
  30. 'test_solve': 'linsolve for 64-bit indices not available',
  31. 'test_scalar_idx_dtype': 'test implemented in base class',
  32. 'test_large_dimensions_reshape': 'test actually requires 64-bit to work',
  33. 'test_constructor_smallcol': 'test verifies int32 indexes',
  34. 'test_constructor_largecol': 'test verifies int64 indexes',
  35. 'test_tocoo_tocsr_tocsc_gh19245': 'test verifies int32 indexes',
  36. }
  37. def cases_64bit(sp_api):
  38. """Yield all tests for all formats
  39. This is more than testing get_index_dtype. It allows checking whether upcasting
  40. or downcasting the index dtypes affects test results. The approach used here
  41. does not try to figure out which tests might fail due to 32/64-bit issues.
  42. We just run them all.
  43. So, each test method in that uses cases_64bit reruns most of the test suite!
  44. """
  45. if sp_api == "sparray":
  46. TEST_CLASSES = [_TestBSR, _TestCOO, _TestCSC, _TestCSR, _TestDIA]
  47. elif sp_api == "sparray-extra":
  48. # lil/dok->other conversion operations use get_index_dtype
  49. # so we include lil & dok test suite even though they do not
  50. # use get_index_dtype within the class. That means many of
  51. # these tests are superfluous, but it's hard to pick which
  52. TEST_CLASSES = [_TestDOK, _TestLIL]
  53. elif sp_api == "spmatrix":
  54. TEST_CLASSES = [_TestBSRMatrix, _TestCOOMatrix, _TestCSCMatrix,
  55. _TestCSRMatrix, _TestDIAMatrix]
  56. elif sp_api == "spmatrix-extra":
  57. # lil/dok->other conversion operations use get_index_dtype
  58. TEST_CLASSES = [_TestDOKMatrix, _TestLILMatrix]
  59. else:
  60. raise ValueError(f"parameter {sp_api=} is not valid")
  61. for cls in TEST_CLASSES:
  62. for method_name in sorted(dir(cls)):
  63. method = getattr(cls, method_name)
  64. if (method_name.startswith('test_') and
  65. not getattr(method, 'slow', False)):
  66. marks = []
  67. msg = SKIP_TESTS.get(method_name)
  68. if msg:
  69. marks.append(pytest.mark.skip(reason=msg))
  70. markers = getattr(method, 'pytestmark', [])
  71. for mark in markers:
  72. if mark.name in ('skipif', 'skip', 'xfail', 'xslow'):
  73. marks.append(mark)
  74. yield pytest.param(cls, method_name, marks=marks)
  75. @pytest.mark.thread_unsafe(reason="fails in parallel")
  76. class RunAll64Bit:
  77. def _check_resiliency(self, cls, method_name, **kw):
  78. # Resiliency test, to check that sparse matrices deal reasonably
  79. # with varying index data types.
  80. @with_64bit_maxval_limit(**kw)
  81. def check(cls, method_name):
  82. instance = cls()
  83. if hasattr(instance, 'setup_method'):
  84. instance.setup_method()
  85. try:
  86. getattr(instance, method_name)()
  87. finally:
  88. if hasattr(instance, 'teardown_method'):
  89. instance.teardown_method()
  90. check(cls, method_name)
  91. class Test64BitArray(RunAll64Bit):
  92. # inheritance of pytest test classes does not separate marks for subclasses.
  93. # So we define these functions in both Array and Matrix versions.
  94. @pytest.mark.parametrize('cls,method_name', cases_64bit("sparray"))
  95. def test_resiliency_limit_10(self, cls, method_name):
  96. self._check_resiliency(cls, method_name, maxval_limit=10)
  97. @pytest.mark.parametrize('cls,method_name', cases_64bit("sparray"))
  98. def test_resiliency_all_32(self, cls, method_name):
  99. self._check_resiliency(cls, method_name, fixed_dtype=np.int32)
  100. @pytest.mark.parametrize('cls,method_name', cases_64bit("sparray"))
  101. def test_resiliency_all_64(self, cls, method_name):
  102. self._check_resiliency(cls, method_name, fixed_dtype=np.int64)
  103. @pytest.mark.fail_slow(2)
  104. @pytest.mark.parametrize('cls,method_name', cases_64bit("sparray"))
  105. def test_resiliency_random(self, cls, method_name):
  106. self._check_resiliency(cls, method_name)
  107. class Test64BitMatrix(RunAll64Bit):
  108. # assert_32bit=True only for spmatrix cuz sparray does not check index content
  109. @pytest.mark.fail_slow(5)
  110. @pytest.mark.parametrize('cls,method_name', cases_64bit("spmatrix"))
  111. def test_no_64(self, cls, method_name):
  112. self._check_resiliency(cls, method_name, assert_32bit=True)
  113. class Test64BitMatrixSameAsArray(RunAll64Bit):
  114. # inheritance of pytest test classes does not separate marks for subclasses.
  115. # So we define these functions in both Array and Matrix versions.
  116. @pytest.mark.parametrize('cls,method_name', cases_64bit("spmatrix"))
  117. def test_resiliency_limit_10(self, cls, method_name):
  118. self._check_resiliency(cls, method_name, maxval_limit=10)
  119. @pytest.mark.parametrize('cls,method_name', cases_64bit("spmatrix"))
  120. def test_resiliency_all_32(self, cls, method_name):
  121. self._check_resiliency(cls, method_name, fixed_dtype=np.int32)
  122. @pytest.mark.parametrize('cls,method_name', cases_64bit("spmatrix"))
  123. def test_resiliency_all_64(self, cls, method_name):
  124. self._check_resiliency(cls, method_name, fixed_dtype=np.int64)
  125. @pytest.mark.fail_slow(2)
  126. @pytest.mark.parametrize('cls,method_name', cases_64bit("spmatrix"))
  127. def test_resiliency_random(self, cls, method_name):
  128. # Resiliency check that sparse deals with varying index data types.
  129. self._check_resiliency(cls, method_name)
  130. # Extra: LIL and DOK classes. no direct get_index_dtype, but convert to classes that do
  131. @pytest.mark.xslow
  132. class Test64BitArrayExtra(RunAll64Bit):
  133. # inheritance of pytest test classes does not separate marks for subclasses.
  134. # So we define these functions in both Array and Matrix versions.
  135. @pytest.mark.parametrize('cls,method_name', cases_64bit("sparray-extra"))
  136. def test_resiliency_limit_10(self, cls, method_name):
  137. self._check_resiliency(cls, method_name, maxval_limit=10)
  138. @pytest.mark.parametrize('cls,method_name', cases_64bit("sparray-extra"))
  139. def test_resiliency_all_32(self, cls, method_name):
  140. self._check_resiliency(cls, method_name, fixed_dtype=np.int32)
  141. @pytest.mark.parametrize('cls,method_name', cases_64bit("sparray-extra"))
  142. def test_resiliency_all_64(self, cls, method_name):
  143. self._check_resiliency(cls, method_name, fixed_dtype=np.int64)
  144. @pytest.mark.fail_slow(2)
  145. @pytest.mark.parametrize('cls,method_name', cases_64bit("sparray-extra"))
  146. def test_resiliency_random(self, cls, method_name):
  147. # Resiliency check that sparse deals with varying index data types.
  148. self._check_resiliency(cls, method_name)
  149. # Extra: LIL and DOK classes. no direct get_index_dtype, but convert to classes that do
  150. @pytest.mark.xslow
  151. class Test64BitMatrixExtra(RunAll64Bit):
  152. # assert_32bit=True only for spmatrix cuz sparray does not check index content
  153. @pytest.mark.fail_slow(5)
  154. @pytest.mark.parametrize('cls,method_name', cases_64bit("spmatrix-extra"))
  155. def test_no_64(self, cls, method_name):
  156. self._check_resiliency(cls, method_name, assert_32bit=True)
  157. # inheritance of pytest test classes does not separate marks for subclasses.
  158. # So we define these functions in both Array and Matrix versions.
  159. @pytest.mark.parametrize('cls,method_name', cases_64bit("spmatrix-extra"))
  160. def test_resiliency_limit_10(self, cls, method_name):
  161. self._check_resiliency(cls, method_name, maxval_limit=10)
  162. @pytest.mark.parametrize('cls,method_name', cases_64bit("spmatrix-extra"))
  163. def test_resiliency_all_32(self, cls, method_name):
  164. self._check_resiliency(cls, method_name, fixed_dtype=np.int32)
  165. @pytest.mark.parametrize('cls,method_name', cases_64bit("spmatrix-extra"))
  166. def test_resiliency_all_64(self, cls, method_name):
  167. self._check_resiliency(cls, method_name, fixed_dtype=np.int64)
  168. @pytest.mark.fail_slow(2)
  169. @pytest.mark.parametrize('cls,method_name', cases_64bit("spmatrix-extra"))
  170. def test_resiliency_random(self, cls, method_name):
  171. # Resiliency check that sparse deals with varying index data types.
  172. self._check_resiliency(cls, method_name)
  173. @pytest.mark.thread_unsafe(reason="Fails in parallel for unknown reasons")
  174. class Test64BitTools:
  175. # classes that use get_index_dtype
  176. MAT_CLASSES = [
  177. bsr_matrix, coo_matrix, csc_matrix, csr_matrix, dia_matrix,
  178. bsr_array, coo_array, csc_array, csr_array, dia_array,
  179. ]
  180. def _compare_index_dtype(self, m, dtype):
  181. dtype = np.dtype(dtype)
  182. if m.format in ['csc', 'csr', 'bsr']:
  183. return (m.indices.dtype == dtype) and (m.indptr.dtype == dtype)
  184. elif m.format == 'coo':
  185. return (m.row.dtype == dtype) and (m.col.dtype == dtype)
  186. elif m.format == 'dia':
  187. return (m.offsets.dtype == dtype)
  188. else:
  189. raise ValueError(f"matrix {m!r} has no integer indices")
  190. def test_decorator_maxval_limit(self):
  191. # Test that the with_64bit_maxval_limit decorator works
  192. @with_64bit_maxval_limit(maxval_limit=10)
  193. def check(mat_cls):
  194. m = mat_cls(np.random.rand(10, 1))
  195. assert self._compare_index_dtype(m, np.int32)
  196. m = mat_cls(np.random.rand(11, 1))
  197. assert self._compare_index_dtype(m, np.int64)
  198. for mat_cls in self.MAT_CLASSES:
  199. check(mat_cls)
  200. def test_decorator_maxval_random(self):
  201. # Test that the with_64bit_maxval_limit decorator works (2)
  202. @with_64bit_maxval_limit(random=True)
  203. def check(mat_cls):
  204. seen_32 = False
  205. seen_64 = False
  206. for k in range(100):
  207. m = mat_cls(np.random.rand(9, 9))
  208. seen_32 = seen_32 or self._compare_index_dtype(m, np.int32)
  209. seen_64 = seen_64 or self._compare_index_dtype(m, np.int64)
  210. if seen_32 and seen_64:
  211. break
  212. else:
  213. raise AssertionError("both 32 and 64 bit indices not seen")
  214. for mat_cls in self.MAT_CLASSES:
  215. check(mat_cls)
  216. def test_downcast_intp(self):
  217. # Check that bincount and ufunc.reduceat intp downcasts are
  218. # dealt with. The point here is to trigger points in the code
  219. # that can fail on 32-bit systems when using 64-bit indices,
  220. # due to use of functions that only work with intp-size indices.
  221. @with_64bit_maxval_limit(fixed_dtype=np.int64, downcast_maxval=1)
  222. def check_limited(csc_container, csr_container, coo_container):
  223. # These involve indices larger than `downcast_maxval`
  224. a = csc_container([[1, 2], [3, 4], [5, 6]])
  225. pytest.raises(AssertionError, a.count_nonzero, axis=1)
  226. pytest.raises(AssertionError, a.sum, axis=0)
  227. a = csr_container([[1, 2, 3], [3, 4, 6]])
  228. pytest.raises(AssertionError, a.count_nonzero, axis=0)
  229. pytest.raises(AssertionError, a.sum, axis=1)
  230. a = coo_container([[1, 2, 3], [3, 4, 5]])
  231. pytest.raises(AssertionError, a.count_nonzero, axis=0)
  232. a.has_canonical_format = False
  233. pytest.raises(AssertionError, a.sum_duplicates)
  234. @with_64bit_maxval_limit(fixed_dtype=np.int64)
  235. def check_unlimited(csc_container, csr_container, coo_container):
  236. # These involve indices smaller than `downcast_maxval`
  237. a = csc_container([[1, 2], [3, 4], [5, 6]])
  238. a.count_nonzero(axis=1)
  239. a.sum(axis=0)
  240. a = csr_container([[1, 2, 3], [3, 4, 6]])
  241. a.count_nonzero(axis=0)
  242. a.sum(axis=1)
  243. a = coo_container([[1, 2, 3], [3, 4, 5]])
  244. a.count_nonzero(axis=0)
  245. a.has_canonical_format = False
  246. a.sum_duplicates()
  247. check_limited(csc_array, csr_array, coo_array)
  248. check_unlimited(csc_array, csr_array, coo_array)
  249. check_limited(csc_matrix, csr_matrix, coo_matrix)
  250. check_unlimited(csc_matrix, csr_matrix, coo_matrix)