| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106 |
- import pytest
- import h5py
- from .common import TestCase
- def is_aligned(dataset, offset=4096):
- # Here we check if the dataset is aligned
- return dataset.id.get_offset() % offset == 0
- def dataset_name(i):
- return f"data{i:03}"
- class TestFileAlignment(TestCase):
- """
- Ensure that setting the file alignment has the desired effect
- in the internal structure.
- """
- def test_no_alignment_set(self):
- fname = self.mktemp()
- # 881 is a prime number, so hopefully this help randomize the alignment
- # enough
- # A nice even number might give a pathological case where
- # While we don't want the data to be aligned, it ends up aligned...
- shape = (881,)
- with h5py.File(fname, 'w') as h5file:
- # Create up to 1000 datasets
- # At least one of them should be misaligned.
- # While this isn't perfect, it seems that there
- # The case where 1000 datasets get created is one where the data
- # is aligned. Therefore, during correct operation, this test is
- # expected to finish quickly
- for i in range(1000):
- dataset = h5file.create_dataset(
- dataset_name(i), shape, dtype='uint8')
- # Assign data so that the dataset is instantiated in
- # the file
- dataset[...] = i
- if not is_aligned(dataset):
- # Break early asserting that the file is not aligned
- break
- else:
- raise RuntimeError("Data was all found to be aligned to 4096")
- @pytest.mark.parallel_threads_limit(2) # Quite slow otherwise
- def test_alignment_set_above_threshold(self):
- # 2022/01/19 hmaarrfk
- # UnitTest (TestCase) doesn't play well with pytest parametrization.
- alignment_threshold = 1000
- alignment_interval = 4096
- for shape in [
- (1033,), # A prime number above the threshold
- (1000,), # Exactly equal to the threshold
- (1001,), # one above the threshold
- ]:
- fname = self.mktemp()
- with h5py.File(fname, 'w',
- alignment_threshold=alignment_threshold,
- alignment_interval=alignment_interval) as h5file:
- # Create up to 1000 datasets
- # They are all expected to be aligned
- for i in range(1000):
- dataset = h5file.create_dataset(
- dataset_name(i), shape, dtype='uint8')
- # Assign data so that the dataset is instantiated in
- # the file
- dataset[...] = (i % 256) # Truncate to uint8
- assert is_aligned(dataset, offset=alignment_interval)
- def test_alignment_set_below_threshold(self):
- # 2022/01/19 hmaarrfk
- # UnitTest (TestCase) doesn't play well with pytest parametrization.
- alignment_threshold = 1000
- alignment_interval = 1024
- for shape in [
- (881,), # A prime number below the threshold
- (999,), # Exactly one below the threshold
- ]:
- fname = self.mktemp()
- with h5py.File(fname, 'w',
- alignment_threshold=alignment_threshold,
- alignment_interval=alignment_interval) as h5file:
- # Create up to 1000 datasets
- # At least one of them should be misaligned.
- # While this isn't perfect, it seems that there
- # The case where 1000 datasets get created is one where the
- # data is aligned. Therefore, during correct operation, this
- # test is expected to finish quickly
- for i in range(1000):
- dataset = h5file.create_dataset(
- dataset_name(i), shape, dtype='uint8')
- # Assign data so that the dataset is instantiated in
- # the file
- dataset[...] = i
- if not is_aligned(dataset, offset=alignment_interval):
- # Break early asserting that the file is not aligned
- break
- else:
- raise RuntimeError(
- "Data was all found to be aligned to "
- f"{alignment_interval}. This is highly unlikely.")
|