test_file_alignment.py 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106
  1. import pytest
  2. import h5py
  3. from .common import TestCase
  4. def is_aligned(dataset, offset=4096):
  5. # Here we check if the dataset is aligned
  6. return dataset.id.get_offset() % offset == 0
  7. def dataset_name(i):
  8. return f"data{i:03}"
  9. class TestFileAlignment(TestCase):
  10. """
  11. Ensure that setting the file alignment has the desired effect
  12. in the internal structure.
  13. """
  14. def test_no_alignment_set(self):
  15. fname = self.mktemp()
  16. # 881 is a prime number, so hopefully this help randomize the alignment
  17. # enough
  18. # A nice even number might give a pathological case where
  19. # While we don't want the data to be aligned, it ends up aligned...
  20. shape = (881,)
  21. with h5py.File(fname, 'w') as h5file:
  22. # Create up to 1000 datasets
  23. # At least one of them should be misaligned.
  24. # While this isn't perfect, it seems that there
  25. # The case where 1000 datasets get created is one where the data
  26. # is aligned. Therefore, during correct operation, this test is
  27. # expected to finish quickly
  28. for i in range(1000):
  29. dataset = h5file.create_dataset(
  30. dataset_name(i), shape, dtype='uint8')
  31. # Assign data so that the dataset is instantiated in
  32. # the file
  33. dataset[...] = i
  34. if not is_aligned(dataset):
  35. # Break early asserting that the file is not aligned
  36. break
  37. else:
  38. raise RuntimeError("Data was all found to be aligned to 4096")
  39. @pytest.mark.parallel_threads_limit(2) # Quite slow otherwise
  40. def test_alignment_set_above_threshold(self):
  41. # 2022/01/19 hmaarrfk
  42. # UnitTest (TestCase) doesn't play well with pytest parametrization.
  43. alignment_threshold = 1000
  44. alignment_interval = 4096
  45. for shape in [
  46. (1033,), # A prime number above the threshold
  47. (1000,), # Exactly equal to the threshold
  48. (1001,), # one above the threshold
  49. ]:
  50. fname = self.mktemp()
  51. with h5py.File(fname, 'w',
  52. alignment_threshold=alignment_threshold,
  53. alignment_interval=alignment_interval) as h5file:
  54. # Create up to 1000 datasets
  55. # They are all expected to be aligned
  56. for i in range(1000):
  57. dataset = h5file.create_dataset(
  58. dataset_name(i), shape, dtype='uint8')
  59. # Assign data so that the dataset is instantiated in
  60. # the file
  61. dataset[...] = (i % 256) # Truncate to uint8
  62. assert is_aligned(dataset, offset=alignment_interval)
  63. def test_alignment_set_below_threshold(self):
  64. # 2022/01/19 hmaarrfk
  65. # UnitTest (TestCase) doesn't play well with pytest parametrization.
  66. alignment_threshold = 1000
  67. alignment_interval = 1024
  68. for shape in [
  69. (881,), # A prime number below the threshold
  70. (999,), # Exactly one below the threshold
  71. ]:
  72. fname = self.mktemp()
  73. with h5py.File(fname, 'w',
  74. alignment_threshold=alignment_threshold,
  75. alignment_interval=alignment_interval) as h5file:
  76. # Create up to 1000 datasets
  77. # At least one of them should be misaligned.
  78. # While this isn't perfect, it seems that there
  79. # The case where 1000 datasets get created is one where the
  80. # data is aligned. Therefore, during correct operation, this
  81. # test is expected to finish quickly
  82. for i in range(1000):
  83. dataset = h5file.create_dataset(
  84. dataset_name(i), shape, dtype='uint8')
  85. # Assign data so that the dataset is instantiated in
  86. # the file
  87. dataset[...] = i
  88. if not is_aligned(dataset, offset=alignment_interval):
  89. # Break early asserting that the file is not aligned
  90. break
  91. else:
  92. raise RuntimeError(
  93. "Data was all found to be aligned to "
  94. f"{alignment_interval}. This is highly unlikely.")