test_sketches.py 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119
  1. """Tests for _sketches.py."""
  2. import numpy as np
  3. from numpy.testing import assert_, assert_equal
  4. from scipy.linalg import clarkson_woodruff_transform
  5. from scipy.linalg._sketches import cwt_matrix
  6. from scipy.sparse import issparse, rand
  7. from scipy.sparse.linalg import norm
  8. class TestClarksonWoodruffTransform:
  9. """
  10. Testing the Clarkson Woodruff Transform
  11. """
  12. # set seed for generating test matrices
  13. rng = np.random.default_rng(1179103485)
  14. # Test matrix parameters
  15. n_rows = 2000
  16. n_cols = 100
  17. density = 0.1
  18. # Sketch matrix dimensions
  19. n_sketch_rows = 200
  20. # Seeds to test with
  21. seeds = [1755490010, 934377150, 1391612830, 1752708722, 2008891431,
  22. 1302443994, 1521083269, 1501189312, 1126232505, 1533465685]
  23. A_dense = rng.random((n_rows, n_cols))
  24. A_csc = rand(
  25. n_rows, n_cols, density=density, format='csc', random_state=rng,
  26. )
  27. A_csr = rand(
  28. n_rows, n_cols, density=density, format='csr', random_state=rng,
  29. )
  30. A_coo = rand(
  31. n_rows, n_cols, density=density, format='coo', random_state=rng,
  32. )
  33. # Collect the test matrices
  34. test_matrices = [
  35. A_dense, A_csc, A_csr, A_coo,
  36. ]
  37. # Test vector with norm ~1
  38. x = rng.random((n_rows, 1)) / np.sqrt(n_rows)
  39. del rng # Not deterministic in pytest-run-parallel
  40. def test_sketch_dimensions(self):
  41. for A in self.test_matrices:
  42. for seed in self.seeds:
  43. # seed to ensure backwards compatibility post SPEC7
  44. sketch = clarkson_woodruff_transform(
  45. A, self.n_sketch_rows, seed=seed
  46. )
  47. assert_(sketch.shape == (self.n_sketch_rows, self.n_cols))
  48. def test_seed_returns_identical_transform_matrix(self):
  49. for seed in self.seeds:
  50. S1 = cwt_matrix(
  51. self.n_sketch_rows, self.n_rows, rng=seed
  52. ).toarray()
  53. S2 = cwt_matrix(
  54. self.n_sketch_rows, self.n_rows, rng=seed
  55. ).toarray()
  56. assert_equal(S1, S2)
  57. def test_seed_returns_identically(self):
  58. for A in self.test_matrices:
  59. for seed in self.seeds:
  60. sketch1 = clarkson_woodruff_transform(
  61. A, self.n_sketch_rows, rng=seed
  62. )
  63. sketch2 = clarkson_woodruff_transform(
  64. A, self.n_sketch_rows, rng=seed
  65. )
  66. if issparse(sketch1):
  67. sketch1 = sketch1.toarray()
  68. if issparse(sketch2):
  69. sketch2 = sketch2.toarray()
  70. assert_equal(sketch1, sketch2)
  71. def test_sketch_preserves_frobenius_norm(self):
  72. # Given the probabilistic nature of the sketches
  73. # we run the test multiple times and check that
  74. # we pass all/almost all the tries.
  75. n_errors = 0
  76. for A in self.test_matrices:
  77. if issparse(A):
  78. true_norm = norm(A)
  79. else:
  80. true_norm = np.linalg.norm(A)
  81. for seed in self.seeds:
  82. sketch = clarkson_woodruff_transform(
  83. A, self.n_sketch_rows, rng=seed,
  84. )
  85. if issparse(sketch):
  86. sketch_norm = norm(sketch)
  87. else:
  88. sketch_norm = np.linalg.norm(sketch)
  89. if np.abs(true_norm - sketch_norm) > 0.1 * true_norm:
  90. n_errors += 1
  91. assert_(n_errors == 0)
  92. def test_sketch_preserves_vector_norm(self):
  93. n_errors = 0
  94. n_sketch_rows = int(np.ceil(2. / (0.01 * 0.5**2)))
  95. true_norm = np.linalg.norm(self.x)
  96. for seed in self.seeds:
  97. sketch = clarkson_woodruff_transform(
  98. self.x, n_sketch_rows, rng=seed,
  99. )
  100. sketch_norm = np.linalg.norm(sketch)
  101. if np.abs(true_norm - sketch_norm) > 0.5 * true_norm:
  102. n_errors += 1
  103. assert_(n_errors == 0)