| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652 |
- # This file is part of h5py, a Python interface to the HDF5 library.
- #
- # http://www.h5py.org
- #
- # Copyright 2008-2013 Andrew Collette and contributors
- #
- # License: Standard 3-clause BSD; see "license.txt" for full license terms
- # and contributor agreement.
- """
- Implements high-level support for HDF5 file objects.
- """
- import inspect
- import os
- import sys
- from warnings import warn
- from .compat import filename_decode, filename_encode
- from .base import phil, with_phil
- from .group import Group, set_fapl_file_locking
- from .. import h5, h5f, h5p, h5i, h5fd, _objects
- from .. import version
- mpi = h5.get_config().mpi
- ros3 = h5.get_config().ros3
- direct_vfd = h5.get_config().direct_vfd
- hdf5_version = version.hdf5_version_tuple[0:3]
- libver_dict = {'earliest': h5f.LIBVER_EARLIEST, 'latest': h5f.LIBVER_LATEST,
- 'v108': h5f.LIBVER_V18, 'v110': h5f.LIBVER_V110}
- libver_dict_r = dict((y, x) for x, y in libver_dict.items())
- if hdf5_version >= (1, 11, 4):
- libver_dict.update({'v112': h5f.LIBVER_V112})
- libver_dict_r.update({h5f.LIBVER_V112: 'v112'})
- if hdf5_version >= (1, 13, 0):
- libver_dict.update({'v114': h5f.LIBVER_V114})
- libver_dict_r.update({h5f.LIBVER_V114: 'v114'})
- if hdf5_version >= (2, 0, 0):
- libver_dict.update({'v200': h5f.LIBVER_V200})
- libver_dict_r.update({h5f.LIBVER_V200: 'v200'})
- def _set_fapl_mpio(plist, **kwargs):
- """Set file access property list for mpio driver"""
- if not mpi:
- raise ValueError("h5py was built without MPI support, can't use mpio driver")
- import mpi4py.MPI
- kwargs.setdefault('info', mpi4py.MPI.Info())
- plist.set_fapl_mpio(**kwargs)
- def _set_fapl_fileobj(plist, **kwargs):
- """Set the Python file object driver in a file access property list"""
- plist.set_fileobj_driver(h5fd.fileobj_driver, kwargs.get('fileobj'))
- _drivers = {
- 'sec2': lambda plist, **kwargs: plist.set_fapl_sec2(**kwargs),
- 'stdio': lambda plist, **kwargs: plist.set_fapl_stdio(**kwargs),
- 'core': lambda plist, **kwargs: plist.set_fapl_core(**kwargs),
- 'family': lambda plist, **kwargs: plist.set_fapl_family(
- memb_fapl=plist.copy(),
- **kwargs
- ),
- 'mpio': _set_fapl_mpio,
- 'fileobj': _set_fapl_fileobj,
- 'split': lambda plist, **kwargs: plist.set_fapl_split(**kwargs),
- }
- if ros3:
- _drivers['ros3'] = lambda plist, **kwargs: plist.set_fapl_ros3(**kwargs)
- if direct_vfd:
- _drivers['direct'] = lambda plist, **kwargs: plist.set_fapl_direct(**kwargs) # noqa
- def register_driver(name, set_fapl):
- """Register a custom driver.
- Parameters
- ----------
- name : str
- The name of the driver.
- set_fapl : callable[PropFAID, **kwargs] -> NoneType
- The function to set the fapl to use your custom driver.
- """
- _drivers[name] = set_fapl
- def unregister_driver(name):
- """Unregister a custom driver.
- Parameters
- ----------
- name : str
- The name of the driver.
- """
- del _drivers[name]
- def registered_drivers():
- """Return a frozenset of the names of all of the registered drivers.
- """
- return frozenset(_drivers)
- def make_fapl(
- driver, libver=None, rdcc_nslots=None, rdcc_nbytes=None, rdcc_w0=None,
- locking=None, page_buf_size=None, min_meta_keep=0, min_raw_keep=0,
- alignment_threshold=1, alignment_interval=1, meta_block_size=None,
- **kwds
- ):
- """ Set up a file access property list """
- plist = h5p.create(h5p.FILE_ACCESS)
- if libver is not None:
- if libver in libver_dict:
- low = libver_dict[libver]
- high = h5f.LIBVER_LATEST
- else:
- low, high = (libver_dict[x] for x in libver)
- else:
- # we default to earliest
- low, high = h5f.LIBVER_EARLIEST, h5f.LIBVER_LATEST
- plist.set_libver_bounds(low, high)
- plist.set_alignment(alignment_threshold, alignment_interval)
- cache_settings = list(plist.get_cache())
- if rdcc_nslots is not None:
- cache_settings[1] = rdcc_nslots
- if rdcc_nbytes is not None:
- cache_settings[2] = rdcc_nbytes
- if rdcc_w0 is not None:
- cache_settings[3] = rdcc_w0
- plist.set_cache(*cache_settings)
- if page_buf_size:
- plist.set_page_buffer_size(int(page_buf_size), int(min_meta_keep),
- int(min_raw_keep))
- if meta_block_size is not None:
- plist.set_meta_block_size(int(meta_block_size))
- if locking is not None:
- set_fapl_file_locking(plist, locking)
- if driver is None or (driver == 'windows' and sys.platform == 'win32'):
- # Prevent swallowing unused key arguments
- if kwds:
- msg = "'{key}' is an invalid keyword argument for this function" \
- .format(key=next(iter(kwds)))
- raise TypeError(msg)
- return plist
- try:
- set_fapl = _drivers[driver]
- except KeyError as exc:
- raise ValueError(f'Unknown driver type {driver!r}') from exc
- else:
- if driver == 'ros3':
- token = kwds.pop('session_token', None)
- set_fapl(plist, **kwds)
- if token:
- if hdf5_version < (1, 14, 2):
- raise ValueError('HDF5 >= 1.14.2 required for AWS session token')
- plist.set_fapl_ros3_token(token)
- else:
- set_fapl(plist, **kwds)
- return plist
- def make_fcpl(track_order=False, track_times=False, fs_strategy=None, fs_persist=False,
- fs_threshold=1, fs_page_size=None):
- """ Set up a file creation property list """
- plist = h5p.create(h5p.FILE_CREATE)
- if track_order:
- plist.set_link_creation_order(
- h5p.CRT_ORDER_TRACKED | h5p.CRT_ORDER_INDEXED)
- plist.set_attr_creation_order(
- h5p.CRT_ORDER_TRACKED | h5p.CRT_ORDER_INDEXED)
- if track_times is None:
- track_times = False # Allow explicit None to mean h5py's default
- if track_times in (True, False):
- plist.set_obj_track_times(track_times)
- else:
- raise TypeError("track_times must be either True or False")
- if fs_strategy:
- strategies = {
- 'fsm': h5f.FSPACE_STRATEGY_FSM_AGGR,
- 'page': h5f.FSPACE_STRATEGY_PAGE,
- 'aggregate': h5f.FSPACE_STRATEGY_AGGR,
- 'none': h5f.FSPACE_STRATEGY_NONE
- }
- fs_strat_num = strategies.get(fs_strategy, -1)
- if fs_strat_num == -1:
- raise ValueError("Invalid file space strategy type")
- plist.set_file_space_strategy(fs_strat_num, fs_persist, fs_threshold)
- if fs_page_size and fs_strategy == 'page':
- plist.set_file_space_page_size(int(fs_page_size))
- return plist
- def make_fid(name, mode, userblock_size, fapl, fcpl=None, swmr=False):
- """ Get a new FileID by opening or creating a file.
- Also validates mode argument."""
- if userblock_size is not None:
- if mode in ('r', 'r+'):
- raise ValueError("User block may only be specified "
- "when creating a file")
- try:
- userblock_size = int(userblock_size)
- except (TypeError, ValueError):
- raise ValueError("User block size must be an integer") from None
- if fcpl is None:
- fcpl = h5p.create(h5p.FILE_CREATE)
- fcpl.set_userblock(userblock_size)
- if mode == 'r':
- flags = h5f.ACC_RDONLY
- if swmr:
- flags |= h5f.ACC_SWMR_READ
- fid = h5f.open(name, flags, fapl=fapl)
- elif mode == 'r+':
- fid = h5f.open(name, h5f.ACC_RDWR, fapl=fapl)
- elif mode in ['w-', 'x']:
- fid = h5f.create(name, h5f.ACC_EXCL, fapl=fapl, fcpl=fcpl)
- elif mode == 'w':
- fid = h5f.create(name, h5f.ACC_TRUNC, fapl=fapl, fcpl=fcpl)
- elif mode == 'a':
- # Open in append mode (read/write).
- # If that fails, create a new file only if it won't clobber an
- # existing one (ACC_EXCL)
- try:
- fid = h5f.open(name, h5f.ACC_RDWR, fapl=fapl)
- # Not all drivers raise FileNotFoundError (commented those that do not)
- except FileNotFoundError if fapl.get_driver() in (
- h5fd.SEC2,
- h5fd.DIRECT if direct_vfd else -1,
- # h5fd.STDIO,
- # h5fd.CORE,
- h5fd.FAMILY,
- h5fd.WINDOWS,
- # h5fd.MPIO,
- # h5fd.MPIPOSIX,
- h5fd.fileobj_driver,
- h5fd.ROS3D if ros3 else -1,
- ) else OSError:
- fid = h5f.create(name, h5f.ACC_EXCL, fapl=fapl, fcpl=fcpl)
- else:
- raise ValueError("Invalid mode; must be one of r, r+, w, w-, x, a")
- try:
- if userblock_size is not None:
- existing_fcpl = fid.get_create_plist()
- if existing_fcpl.get_userblock() != userblock_size:
- raise ValueError("Requested userblock size (%d) does not match that of existing file (%d)" % (userblock_size, existing_fcpl.get_userblock()))
- except Exception as e:
- fid.close()
- raise e
- return fid
- class File(Group):
- """
- Represents an HDF5 file.
- """
- @property
- def attrs(self):
- """ Attributes attached to this object """
- # hdf5 complains that a file identifier is an invalid location for an
- # attribute. Instead of self, pass the root group to AttributeManager:
- from . import attrs
- with phil:
- return attrs.AttributeManager(self['/'])
- @property
- @with_phil
- def filename(self):
- """File name on disk"""
- return filename_decode(h5f.get_name(self.id))
- @property
- @with_phil
- def driver(self):
- """Low-level HDF5 file driver used to open file"""
- drivers = {h5fd.SEC2: 'sec2',
- h5fd.STDIO: 'stdio',
- h5fd.CORE: 'core',
- h5fd.FAMILY: 'family',
- h5fd.WINDOWS: 'windows',
- h5fd.MPIO: 'mpio',
- h5fd.MPIPOSIX: 'mpiposix',
- h5fd.fileobj_driver: 'fileobj'}
- if ros3:
- drivers[h5fd.ROS3D] = 'ros3'
- if direct_vfd:
- drivers[h5fd.DIRECT] = 'direct'
- return drivers.get(self.id.get_access_plist().get_driver(), 'unknown')
- @property
- @with_phil
- def mode(self):
- """ Python mode used to open file """
- write_intent = h5f.ACC_RDWR | h5f.ACC_SWMR_WRITE
- return 'r+' if self.id.get_intent() & write_intent else 'r'
- @property
- @with_phil
- def libver(self):
- """File format version bounds (2-tuple: low, high)"""
- bounds = self.id.get_access_plist().get_libver_bounds()
- return tuple(libver_dict_r[x] for x in bounds)
- @property
- @with_phil
- def userblock_size(self):
- """ User block size (in bytes) """
- fcpl = self.id.get_create_plist()
- return fcpl.get_userblock()
- @property
- @with_phil
- def meta_block_size(self):
- """ Meta block size (in bytes) """
- fapl = self.id.get_access_plist()
- return fapl.get_meta_block_size()
- if mpi:
- @property
- @with_phil
- def atomic(self):
- """ Set/get MPI-IO atomic mode
- """
- return self.id.get_mpi_atomicity()
- @atomic.setter
- @with_phil
- def atomic(self, value):
- # pylint: disable=missing-docstring
- self.id.set_mpi_atomicity(value)
- @property
- @with_phil
- def swmr_mode(self):
- """ Controls single-writer multiple-reader mode """
- return bool(self.id.get_intent() & (h5f.ACC_SWMR_READ | h5f.ACC_SWMR_WRITE))
- @swmr_mode.setter
- @with_phil
- def swmr_mode(self, value):
- # pylint: disable=missing-docstring
- if value:
- self.id.start_swmr_write()
- else:
- raise ValueError("It is not possible to forcibly switch SWMR mode off.")
- def __init__(self, name, mode='r', driver=None, libver=None, userblock_size=None, swmr=False,
- rdcc_nslots=None, rdcc_nbytes=None, rdcc_w0=None, track_order=None,
- fs_strategy=None, fs_persist=False, fs_threshold=1, fs_page_size=None,
- page_buf_size=None, min_meta_keep=0, min_raw_keep=0, locking=None,
- alignment_threshold=1, alignment_interval=1, meta_block_size=None,
- *, track_times=False, **kwds):
- """Create a new file object.
- See the h5py user guide for a detailed explanation of the options.
- name
- Name of the file on disk, or file-like object. Note: for files
- created with the 'core' driver, HDF5 still requires this be
- non-empty.
- mode
- r Readonly, file must exist (default)
- r+ Read/write, file must exist
- w Create file, truncate if exists
- w- or x Create file, fail if exists
- a Read/write if exists, create otherwise
- driver
- Name of the driver to use. Legal values are None (default,
- recommended), 'core', 'sec2', 'direct', 'stdio', 'mpio', 'ros3'.
- libver
- Library version bounds. Supported values: 'earliest', 'v108',
- 'v110', 'v112', 'v114', 'v200' and 'latest' depending on the
- version of libhdf5 h5py is built against.
- userblock_size
- Desired size of user block. Only allowed when creating a new
- file (mode w, w- or x).
- swmr
- Open the file in SWMR read mode. Only used when mode = 'r'.
- rdcc_nslots
- The number of chunk slots in the raw data chunk cache for this
- file. Increasing this value reduces the number of cache collisions,
- but slightly increases the memory used. Due to the hashing
- strategy, this value should ideally be a prime number. As a rule of
- thumb, this value should be at least 10 times the number of chunks
- that can fit in rdcc_nbytes bytes. For maximum performance, this
- value should be set approximately 100 times that number of
- chunks. The default value is 521. Applies to all datasets unless individually changed.
- rdcc_nbytes
- Total size of the dataset chunk cache in bytes. The default size per
- dataset is 1024**2 (1 MiB) for HDF5 before 2.0 and 8 MiB for HDF5
- 2.0 and later. Applies to all datasets unless individually changed.
- rdcc_w0
- The chunk preemption policy for all datasets. This must be
- between 0 and 1 inclusive and indicates the weighting according to
- which chunks which have been fully read or written are penalized
- when determining which chunks to flush from cache. A value of 0
- means fully read or written chunks are treated no differently than
- other chunks (the preemption is strictly LRU) while a value of 1
- means fully read or written chunks are always preempted before
- other chunks. If your application only reads or writes data once,
- this can be safely set to 1. Otherwise, this should be set lower
- depending on how often you re-read or re-write the same data. The
- default value is 0.75. Applies to all datasets unless individually changed.
- track_order
- Track dataset/group/attribute creation order under root group
- if True. If None use global default h5.get_config().track_order.
- track_times: bool or None, default: False
- If True, store timestamps for this group in the file.
- If None, fall back to the default value.
- fs_strategy
- The file space handling strategy to be used. Only allowed when
- creating a new file (mode w, w- or x). Defined as:
- "fsm" FSM, Aggregators, VFD
- "page" Paged FSM, VFD
- "aggregate" Aggregators, VFD
- "none" VFD
- If None use HDF5 defaults.
- fs_page_size
- File space page size in bytes. Only used when fs_strategy="page". If
- None use the HDF5 default (4096 bytes).
- fs_persist
- A boolean value to indicate whether free space should be persistent
- or not. Only allowed when creating a new file. The default value
- is False.
- fs_threshold
- The smallest free-space section size that the free space manager
- will track. Only allowed when creating a new file. The default
- value is 1.
- page_buf_size
- Page buffer size in bytes. Only allowed for HDF5 files created with
- fs_strategy="page". Must be a power of two value and greater or
- equal than the file space page size when creating the file. It is
- not used by default.
- min_meta_keep
- Minimum percentage of metadata to keep in the page buffer before
- allowing pages containing metadata to be evicted. Applicable only if
- page_buf_size is set. Default value is zero.
- min_raw_keep
- Minimum percentage of raw data to keep in the page buffer before
- allowing pages containing raw data to be evicted. Applicable only if
- page_buf_size is set. Default value is zero.
- locking
- The file locking behavior. Defined as:
- - False (or "false") -- Disable file locking
- - True (or "true") -- Enable file locking
- - "best-effort" -- Enable file locking but ignore some errors
- - None -- Use HDF5 defaults
- .. warning::
- The HDF5_USE_FILE_LOCKING environment variable can override
- this parameter.
- alignment_threshold
- Together with ``alignment_interval``, this property ensures that
- any file object greater than or equal in size to the alignment
- threshold (in bytes) will be aligned on an address which is a
- multiple of alignment interval.
- alignment_interval
- This property should be used in conjunction with
- ``alignment_threshold``. See the description above. For more
- details, see
- https://support.hdfgroup.org/documentation/hdf5/latest/group___f_a_p_l.html#gab99d5af749aeb3896fd9e3ceb273677a
- meta_block_size
- Set the current minimum size, in bytes, of new metadata block allocations.
- See https://support.hdfgroup.org/documentation/hdf5/latest/group___f_a_p_l.html#ga8822e3dedc8e1414f20871a87d533cb1
- Additional keywords
- Passed on to the selected file driver.
- """
- if driver == 'ros3':
- if not ros3:
- raise ValueError("h5py was built without ROS3 support, can't use ros3 driver")
- if hdf5_version < (2, 0, 0):
- from urllib.parse import urlparse
- url = urlparse(name)
- if url.scheme == 's3':
- aws_region = kwds.get('aws_region', b'').decode('ascii')
- if len(aws_region) == 0:
- raise ValueError('AWS region required for s3:// location')
- name = f'https://s3.{aws_region}.amazonaws.com/{url.netloc}{url.path}'
- elif url.scheme not in ('https', 'http'):
- raise ValueError(f'{name}: S3 location must begin with '
- 'either "https://", "http://", or "s3://"')
- if isinstance(name, _objects.ObjectID):
- if fs_strategy:
- raise ValueError("Unable to set file space strategy of an existing file")
- with phil:
- fid = h5i.get_file_id(name)
- else:
- if hasattr(name, 'read') and hasattr(name, 'seek'):
- if driver not in (None, 'fileobj'):
- raise ValueError("Driver must be 'fileobj' for file-like object if specified.")
- driver = 'fileobj'
- if kwds.get('fileobj', name) != name:
- raise ValueError("Invalid value of 'fileobj' argument; "
- "must equal to file-like object if specified.")
- kwds.update(fileobj=name)
- name = repr(name).encode('ASCII', 'replace')
- else:
- name = filename_encode(name)
- if track_order is None:
- track_order = h5.get_config().track_order
- if fs_strategy and mode not in ('w', 'w-', 'x'):
- raise ValueError("Unable to set file space strategy of an existing file")
- if swmr and mode != 'r':
- warn(
- "swmr=True only affects read ('r') mode. For swmr write "
- "mode, set f.swmr_mode = True after opening the file.",
- stacklevel=2,
- )
- with phil:
- fapl = make_fapl(driver, libver, rdcc_nslots, rdcc_nbytes, rdcc_w0,
- locking, page_buf_size, min_meta_keep, min_raw_keep,
- alignment_threshold=alignment_threshold,
- alignment_interval=alignment_interval,
- meta_block_size=meta_block_size,
- **kwds)
- fcpl = make_fcpl(track_order=track_order, track_times=track_times,
- fs_strategy=fs_strategy, fs_persist=fs_persist,
- fs_threshold=fs_threshold, fs_page_size=fs_page_size)
- fid = make_fid(name, mode, userblock_size, fapl, fcpl, swmr=swmr)
- if isinstance(libver, tuple):
- self._libver = libver
- else:
- self._libver = (libver, 'latest')
- super().__init__(fid)
- _in_memory_file_counter = 0
- @classmethod
- @with_phil
- def in_memory(cls, file_image=None, **kwargs):
- """Create an HDF5 file in memory, without an underlying file
- file_image
- The initial file contents as bytes (or anything that supports the
- Python buffer interface). HDF5 takes a copy of this data.
- block_size
- Chunk size for new memory alloactions (default 64 KiB).
- Other keyword arguments are like File(), although name, mode,
- driver and locking can't be passed.
- """
- for k in ('driver', 'locking', 'backing_store'):
- if k in kwargs:
- raise TypeError(
- f"File.in_memory() got an unexpected keyword argument {k!r}"
- )
- fcpl_kwargs = {}
- for k in inspect.signature(make_fcpl).parameters:
- if k in kwargs:
- fcpl_kwargs[k] = kwargs.pop(k)
- fcpl = make_fcpl(**fcpl_kwargs)
- fapl = make_fapl(driver="core", backing_store=False, **kwargs)
- if file_image:
- if fcpl_kwargs:
- kw = ', '.join(fcpl_kwargs)
- raise TypeError(f"{kw} parameters cannot be used with file_image")
- fapl.set_file_image(file_image)
- # We have to give HDF5 a filename, but it should never use it.
- # This is a hint both in memory, and in case a bug ever creates a file.
- # The name also needs to be different from any other open file;
- # we use a simple counter (protected by the 'phil' lock) for this.
- name = b"h5py_in_memory_nonfile_%d" % cls._in_memory_file_counter
- cls._in_memory_file_counter += 1
- if file_image:
- fid = h5f.open(name, h5f.ACC_RDWR, fapl=fapl)
- else:
- fid = h5f.create(name, h5f.ACC_EXCL, fapl=fapl, fcpl=fcpl)
- return cls(fid)
- def close(self):
- """ Close the file. All open objects become invalid """
- with phil:
- # Check that the file is still open, otherwise skip
- if self.id.valid:
- # We have to explicitly murder all open objects related to the file
- # Close file-resident objects first, then the files.
- # Otherwise we get errors in MPI mode.
- self.id._close_open_objects(h5f.OBJ_LOCAL | ~h5f.OBJ_FILE)
- self.id._close_open_objects(h5f.OBJ_LOCAL | h5f.OBJ_FILE)
- self.id.close()
- def flush(self):
- """ Tell the HDF5 library to flush its buffers.
- """
- with phil:
- h5f.flush(self.id)
- @with_phil
- def __enter__(self):
- return self
- @with_phil
- def __exit__(self, *args):
- if self.id:
- self.close()
- @with_phil
- def __repr__(self):
- if not self.id:
- r = '<Closed HDF5 file>'
- else:
- # Filename has to be forced to Unicode if it comes back bytes
- # Mode is always a "native" string
- filename = self.filename
- if isinstance(filename, bytes): # Can't decode fname
- filename = filename.decode('utf8', 'replace')
- r = f'<HDF5 file "{os.path.basename(filename)}" (mode {self.mode})>'
- return r
|