group.py 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898
  1. # This file is part of h5py, a Python interface to the HDF5 library.
  2. #
  3. # http://www.h5py.org
  4. #
  5. # Copyright 2008-2013 Andrew Collette and contributors
  6. #
  7. # License: Standard 3-clause BSD; see "license.txt" for full license terms
  8. # and contributor agreement.
  9. """
  10. Implements support for high-level access to HDF5 groups.
  11. """
  12. from contextlib import contextmanager
  13. import posixpath as pp
  14. import numpy
  15. from .compat import filename_decode, filename_encode
  16. from .. import h5, h5f, h5g, h5i, h5o, h5r, h5t, h5l, h5p
  17. from . import base
  18. from .base import HLObject, MutableMappingHDF5, phil, with_phil
  19. from . import dataset
  20. from . import datatype
  21. from .vds import vds_support
  22. def set_fapl_file_locking(fapl, locking):
  23. if locking in ("false", False):
  24. fapl.set_file_locking(False, ignore_when_disabled=False)
  25. elif locking in ("true", True):
  26. fapl.set_file_locking(True, ignore_when_disabled=False)
  27. elif locking == "best-effort":
  28. fapl.set_file_locking(True, ignore_when_disabled=True)
  29. else:
  30. raise ValueError(f"Unsupported locking value: {locking}")
  31. def make_lapl(file, elink_mode=None, elink_swmr=None, elink_locking=None):
  32. """Set up a link access property list"""
  33. if elink_mode is None and elink_swmr is None and elink_locking is None:
  34. return None
  35. if file.mode == "r" and elink_mode == "r+":
  36. raise ValueError("Opening external links in write mode from a file opened in read-only mode is not supported")
  37. lapl = h5p.create(h5p.LINK_ACCESS)
  38. if elink_mode is not None or elink_swmr is not None:
  39. mode = file.mode if elink_mode is None else elink_mode
  40. swmr_mode = file.swmr_mode if elink_swmr is None else elink_swmr
  41. if file.mode == "r+" and file.swmr_mode and (mode != "r+" or not swmr_mode):
  42. raise ValueError("Changing external links access mode from a file opened in SWMR write mode is not supported")
  43. if mode == "r":
  44. flags = h5f.ACC_RDONLY
  45. if swmr_mode:
  46. flags |= h5f.ACC_SWMR_READ
  47. elif mode == "r+":
  48. flags = h5f.ACC_RDWR
  49. if swmr_mode:
  50. flags |= h5f.ACC_SWMR_WRITE
  51. else:
  52. raise RuntimeError(f"Unsupported link access mode: {mode}")
  53. lapl.set_elink_acc_flags(flags)
  54. if elink_locking is not None:
  55. fapl = file.id.get_access_plist()
  56. set_fapl_file_locking(fapl, elink_locking)
  57. lapl.set_elink_fapl(fapl)
  58. return lapl
  59. class Group(HLObject, MutableMappingHDF5):
  60. """ Represents an HDF5 group.
  61. """
  62. def __init__(self, bind):
  63. """ Create a new Group object by binding to a low-level GroupID.
  64. """
  65. with phil:
  66. if not isinstance(bind, h5g.GroupID):
  67. raise ValueError("%s is not a GroupID" % bind)
  68. super().__init__(bind)
  69. def create_group(self, name, track_order=None, *, track_times=False):
  70. """ Create and return a new subgroup.
  71. Name may be absolute or relative. Fails if the target name already
  72. exists.
  73. track_order
  74. Track dataset/group/attribute creation order under this group
  75. if True. If None use global default h5.get_config().track_order.
  76. track_times: bool or None, default: False
  77. If True, store timestamps for this group in the file.
  78. If None, fall back to the default value.
  79. """
  80. if track_order is None:
  81. track_order = h5.get_config().track_order
  82. with phil:
  83. name, lcpl = self._e(name, lcpl=True)
  84. gcpl = h5p.create(h5p.GROUP_CREATE)
  85. if track_order:
  86. order_flags = h5p.CRT_ORDER_TRACKED | h5p.CRT_ORDER_INDEXED
  87. gcpl.set_link_creation_order(order_flags)
  88. gcpl.set_attr_creation_order(order_flags)
  89. if track_times is None:
  90. track_times = False # Allow explicit None to mean h5py's default
  91. if track_times in (True, False):
  92. gcpl.set_obj_track_times(track_times)
  93. else:
  94. raise TypeError("track_times must be either True, False, or None")
  95. gid = h5g.create(self.id, name, lcpl=lcpl, gcpl=gcpl)
  96. return Group(gid)
  97. def create_dataset(self, name, shape=None, dtype=None, data=None, **kwds):
  98. """ Create a new HDF5 dataset
  99. name
  100. Name of the dataset (absolute or relative). Provide None to make
  101. an anonymous dataset.
  102. shape
  103. Dataset shape. Use "()" for scalar datasets. Required if "data"
  104. isn't provided.
  105. dtype
  106. Numpy dtype or string. If omitted, dtype('f') will be used.
  107. Required if "data" isn't provided; otherwise, overrides data
  108. array's dtype.
  109. data
  110. Provide data to initialize the dataset. If used, you can omit
  111. shape and dtype arguments.
  112. Keyword-only arguments:
  113. chunks
  114. (Tuple or int) Chunk shape, or True to enable auto-chunking. Integers can
  115. be used for 1D shape.
  116. maxshape
  117. (Tuple or int) Make the dataset resizable up to this shape. Use None for
  118. axes within the tuple you want to be unlimited. Integers can be used for 1D shape.
  119. For 1D datasets with unlimited maxshape, a shape tuple of length 1 must be
  120. provided, ``(None,)``. Passing ``None`` sets ``maxshape` to `shape`, making the
  121. dataset un-resizable, which is the default.
  122. compression
  123. (String or int) Compression strategy. Legal values are 'gzip',
  124. 'szip', 'lzf'. If an integer in range(10), this indicates gzip
  125. compression level. Otherwise, an integer indicates the number of a
  126. dynamically loaded compression filter.
  127. compression_opts
  128. Compression settings. This is an integer for gzip, 2-tuple for
  129. szip, etc. If specifying a dynamically loaded compression filter
  130. number, this must be a tuple of values.
  131. scaleoffset
  132. (Integer) Enable scale/offset filter for (usually) lossy
  133. compression of integer or floating-point data. For integer
  134. data, the value of scaleoffset is the number of bits to
  135. retain (pass 0 to let HDF5 determine the minimum number of
  136. bits necessary for lossless compression). For floating point
  137. data, scaleoffset is the number of digits after the decimal
  138. place to retain; stored values thus have absolute error
  139. less than 0.5*10**(-scaleoffset).
  140. shuffle
  141. (T/F) Enable shuffle filter.
  142. fletcher32
  143. (T/F) Enable fletcher32 error detection. Not permitted in
  144. conjunction with the scale/offset filter.
  145. fillvalue
  146. (Scalar) Use this value for uninitialized parts of the dataset.
  147. track_times
  148. (T/F) Enable dataset creation timestamps.
  149. track_order
  150. (T/F) Track attribute creation order if True. If omitted use
  151. global default h5.get_config().track_order.
  152. external
  153. (Iterable of tuples) Sets the external storage property, thus
  154. designating that the dataset will be stored in one or more
  155. non-HDF5 files external to the HDF5 file. Adds each tuple
  156. of (name, offset, size) to the dataset's list of external files.
  157. Each name must be a str, bytes, or os.PathLike; each offset and
  158. size, an integer. If only a name is given instead of an iterable
  159. of tuples, it is equivalent to [(name, 0, h5py.h5f.UNLIMITED)].
  160. efile_prefix
  161. (String) External dataset file prefix for dataset access property
  162. list. Does not persist in the file.
  163. virtual_prefix
  164. (String) Virtual dataset file prefix for dataset access property
  165. list. Does not persist in the file.
  166. allow_unknown_filter
  167. (T/F) Do not check that the requested filter is available for use.
  168. This should only be used with ``write_direct_chunk``, where the caller
  169. compresses the data before handing it to h5py.
  170. rdcc_nbytes
  171. Total size of the dataset's chunk cache in bytes. The default size
  172. is 1024**2 (1 MiB) for HDF5 before 2.0 and 8 MiB for HDF5 2.0 or later.
  173. rdcc_w0
  174. The chunk preemption policy for this dataset. This must be
  175. between 0 and 1 inclusive and indicates the weighting according to
  176. which chunks which have been fully read or written are penalized
  177. when determining which chunks to flush from cache. A value of 0
  178. means fully read or written chunks are treated no differently than
  179. other chunks (the preemption is strictly LRU) while a value of 1
  180. means fully read or written chunks are always preempted before
  181. other chunks. If your application only reads or writes data once,
  182. this can be safely set to 1. Otherwise, this should be set lower
  183. depending on how often you re-read or re-write the same data. The
  184. default value is 0.75.
  185. rdcc_nslots
  186. The number of chunk slots in the dataset's chunk cache. Increasing
  187. this value reduces the number of cache collisions, but slightly
  188. increases the memory used. Due to the hashing strategy, this value
  189. should ideally be a prime number. As a rule of thumb, this value
  190. should be at least 10 times the number of chunks that can fit in
  191. rdcc_nbytes bytes. For maximum performance, this value should be set
  192. approximately 100 times that number of chunks. The default value is
  193. 521.
  194. """
  195. if 'track_order' not in kwds:
  196. kwds['track_order'] = h5.get_config().track_order
  197. if 'efile_prefix' in kwds:
  198. kwds['efile_prefix'] = self._e(kwds['efile_prefix'])
  199. if 'virtual_prefix' in kwds:
  200. kwds['virtual_prefix'] = self._e(kwds['virtual_prefix'])
  201. with phil:
  202. group = self
  203. if name:
  204. name = self._e(name)
  205. if b'/' in name.lstrip(b'/'):
  206. parent_path, name = name.rsplit(b'/', 1)
  207. group = self.require_group(parent_path)
  208. dsid = dataset.make_new_dset(group, shape, dtype, data, name, **kwds)
  209. dset = dataset.Dataset(dsid)
  210. return dset
  211. if vds_support:
  212. def create_virtual_dataset(self, name, layout, fillvalue=None):
  213. """Create a new virtual dataset in this group.
  214. See virtual datasets in the docs for more information.
  215. name
  216. (str) Name of the new dataset
  217. layout
  218. (VirtualLayout) Defines the sources for the virtual dataset
  219. fillvalue
  220. The value to use where there is no data.
  221. """
  222. with phil:
  223. group = self
  224. if name:
  225. name = self._e(name)
  226. if b'/' in name.lstrip(b'/'):
  227. parent_path, name = name.rsplit(b'/', 1)
  228. group = self.require_group(parent_path)
  229. dsid = layout.make_dataset(
  230. group, name=name, fillvalue=fillvalue,
  231. )
  232. dset = dataset.Dataset(dsid)
  233. return dset
  234. @contextmanager
  235. def build_virtual_dataset(
  236. self, name, shape, dtype, maxshape=None, fillvalue=None
  237. ):
  238. """Assemble a virtual dataset in this group.
  239. This is used as a context manager::
  240. with f.build_virtual_dataset('virt', (10, 1000), np.uint32) as layout:
  241. layout[0] = h5py.VirtualSource('foo.h5', 'data', (1000,))
  242. name
  243. (str) Name of the new dataset
  244. shape
  245. (tuple) Shape of the dataset
  246. dtype
  247. A numpy dtype for data read from the virtual dataset
  248. maxshape
  249. (tuple, optional) Maximum dimensions if the dataset can grow.
  250. Use None for unlimited dimensions.
  251. fillvalue
  252. The value used where no data is available.
  253. """
  254. from .vds import VirtualLayout
  255. layout = VirtualLayout(shape, dtype, maxshape, self.file.filename)
  256. yield layout
  257. self.create_virtual_dataset(name, layout, fillvalue)
  258. def require_dataset(self, name, shape, dtype, exact=False, **kwds):
  259. """ Open a dataset, creating it if it doesn't exist.
  260. If keyword "exact" is False (default), an existing dataset must have
  261. the same shape and a conversion-compatible dtype to be returned. If
  262. True, the shape and dtype must match exactly.
  263. If keyword "maxshape" is given, the maxshape and dtype must match
  264. instead.
  265. If any of the keywords "rdcc_nslots", "rdcc_nbytes", or "rdcc_w0" are
  266. given, they will be used to configure the dataset's chunk cache.
  267. Other dataset keywords (see create_dataset) may be provided, but are
  268. only used if a new dataset is to be created.
  269. Raises TypeError if an incompatible object already exists, or if the
  270. shape, maxshape or dtype don't match according to the above rules.
  271. """
  272. if 'efile_prefix' in kwds:
  273. kwds['efile_prefix'] = self._e(kwds['efile_prefix'])
  274. if 'virtual_prefix' in kwds:
  275. kwds['virtual_prefix'] = self._e(kwds['virtual_prefix'])
  276. with phil:
  277. if name not in self:
  278. return self.create_dataset(name, *(shape, dtype), **kwds)
  279. if isinstance(shape, int):
  280. shape = (shape,)
  281. try:
  282. dsid = dataset.open_dset(self, self._e(name), **kwds)
  283. dset = dataset.Dataset(dsid)
  284. except KeyError as exc:
  285. dset = self[name]
  286. raise TypeError(f"Incompatible object ({dset.__class__.__name__}) already exists") from exc
  287. if shape != dset.shape:
  288. if "maxshape" not in kwds:
  289. raise TypeError("Shapes do not match (existing %s vs new %s)" % (dset.shape, shape))
  290. elif kwds["maxshape"] != dset.maxshape:
  291. raise TypeError("Max shapes do not match (existing %s vs new %s)" % (dset.maxshape, kwds["maxshape"]))
  292. if exact:
  293. if dtype != dset.dtype:
  294. raise TypeError("Datatypes do not exactly match (existing %s vs new %s)" % (dset.dtype, dtype))
  295. elif not numpy.can_cast(dtype, dset.dtype):
  296. raise TypeError("Datatypes cannot be safely cast (existing %s vs new %s)" % (dset.dtype, dtype))
  297. return dset
  298. def create_dataset_like(self, name, other, **kwupdate):
  299. """ Create a dataset similar to `other`.
  300. name
  301. Name of the dataset (absolute or relative). Provide None to make
  302. an anonymous dataset.
  303. other
  304. The dataset which the new dataset should mimic. All properties, such
  305. as shape, dtype, chunking, ... will be taken from it, but no data
  306. or attributes are being copied.
  307. Any dataset keywords (see create_dataset) may be provided, including
  308. shape and dtype, in which case the provided values take precedence over
  309. those from `other`.
  310. """
  311. for k in ('shape', 'dtype', 'chunks', 'compression',
  312. 'compression_opts', 'scaleoffset', 'shuffle', 'fletcher32',
  313. 'fillvalue'):
  314. kwupdate.setdefault(k, getattr(other, k))
  315. # TODO: more elegant way to pass these (dcpl to create_dataset?)
  316. dcpl = other.id.get_create_plist()
  317. kwupdate.setdefault('track_times', dcpl.get_obj_track_times())
  318. kwupdate.setdefault('track_order', dcpl.get_attr_creation_order() > 0)
  319. # Special case: the maxshape property always exists, but if we pass it
  320. # to create_dataset, the new dataset will automatically get chunked
  321. # layout. So we copy it only if it is different from shape.
  322. if other.maxshape != other.shape:
  323. kwupdate.setdefault('maxshape', other.maxshape)
  324. return self.create_dataset(name, **kwupdate)
  325. def require_group(self, name):
  326. # TODO: support kwargs like require_dataset
  327. """Return a group, creating it if it doesn't exist.
  328. TypeError is raised if something with that name already exists that
  329. isn't a group.
  330. """
  331. with phil:
  332. if name not in self:
  333. return self.create_group(name)
  334. grp = self[name]
  335. if not isinstance(grp, Group):
  336. raise TypeError("Incompatible object (%s) already exists" % grp.__class__.__name__)
  337. return grp
  338. def __getitem__(self, name):
  339. """ Open an object in the file """
  340. return self._get(name)
  341. @with_phil
  342. def _get(self, name, lapl=None):
  343. if isinstance(name, h5r.Reference):
  344. if lapl is not None:
  345. raise TypeError(
  346. "Access with link access properties is done with bytes or str, not Reference")
  347. oid = h5r.dereference(name, self.id)
  348. if oid is None:
  349. raise ValueError("Invalid HDF5 object reference")
  350. elif isinstance(name, (bytes, str)):
  351. if lapl is None:
  352. lapl = self._lapl
  353. oid = h5o.open(self.id, self._e(name), lapl=lapl)
  354. else:
  355. raise TypeError("Accessing a group is done with bytes or str, "
  356. "not {}".format(type(name)))
  357. otype = h5i.get_type(oid)
  358. if otype == h5i.GROUP:
  359. return Group(oid)
  360. elif otype == h5i.DATASET:
  361. return dataset.Dataset(oid, readonly=(self.file.mode == 'r'))
  362. elif otype == h5i.DATATYPE:
  363. return datatype.Datatype(oid)
  364. else:
  365. raise TypeError("Unknown object type")
  366. def get(
  367. self, name, default=None, getclass=False, getlink=False,
  368. elink_mode=None, elink_locking=None, elink_swmr=None,
  369. ):
  370. """ Retrieve an item or other information.
  371. "name" given only:
  372. Return the item, or "default" if it doesn't exist
  373. "getclass" is True:
  374. Return the class of object (Group, Dataset, etc.), or "default"
  375. if nothing with that name exists
  376. "getlink" is True:
  377. Return HardLink, SoftLink or ExternalLink instances. Return
  378. "default" if nothing with that name exists.
  379. "getlink" and "getclass" are True:
  380. Return HardLink, SoftLink and ExternalLink classes. Return
  381. "default" if nothing with that name exists.
  382. "elink_mode":
  383. External links access mode:
  384. - "r": Read-only
  385. - "r+": Read/write
  386. - None (default): Use current file access mode
  387. "elink_locking":
  388. External links file locking behavior:
  389. - None (default) -- Use the current file locking
  390. - False (or "false") -- Disable file locking
  391. - True (or "true") -- Enable file locking
  392. - "best-effort" -- Enable file locking but ignore some errors
  393. Warning:
  394. The HDF5_USE_FILE_LOCKING environment variable can override
  395. this parameter.
  396. "elink_swmr":
  397. External link SWMR read mode.
  398. Set to True only when elink_mode = 'r' and
  399. current file is not opened in SWMR write mode.
  400. By default, use current file SWMR mode.
  401. Example:
  402. >>> cls = group.get('foo', getclass=True)
  403. >>> if cls == SoftLink:
  404. """
  405. # pylint: disable=arguments-differ
  406. with phil:
  407. if elink_mode is None and elink_swmr is None and elink_locking is None:
  408. lapl = self._lapl
  409. else:
  410. lapl = make_lapl(self.file, elink_mode, elink_swmr, elink_locking)
  411. if not (getclass or getlink):
  412. try:
  413. return self._get(name, lapl)
  414. except KeyError:
  415. return default
  416. if name not in self:
  417. return default
  418. elif getclass and not getlink:
  419. typecode = h5o.get_info(self.id, self._e(name), lapl=lapl).type
  420. try:
  421. return {h5o.TYPE_GROUP: Group,
  422. h5o.TYPE_DATASET: dataset.Dataset,
  423. h5o.TYPE_NAMED_DATATYPE: datatype.Datatype}[typecode]
  424. except KeyError as exc:
  425. raise TypeError("Unknown object type") from exc
  426. elif getlink:
  427. typecode = self.id.links.get_info(self._e(name), lapl=lapl).type
  428. if typecode == h5l.TYPE_SOFT:
  429. if getclass:
  430. return SoftLink
  431. linkbytes = self.id.links.get_val(self._e(name), lapl=lapl)
  432. return SoftLink(self._d(linkbytes))
  433. elif typecode == h5l.TYPE_EXTERNAL:
  434. if getclass:
  435. return ExternalLink
  436. filebytes, linkbytes = self.id.links.get_val(self._e(name), lapl=lapl)
  437. return ExternalLink(
  438. filename_decode(filebytes), self._d(linkbytes)
  439. )
  440. elif typecode == h5l.TYPE_HARD:
  441. return HardLink if getclass else HardLink()
  442. else:
  443. raise TypeError("Unknown link type")
  444. def __setitem__(self, name, obj):
  445. """ Add an object to the group. The name must not already be in use.
  446. The action taken depends on the type of object assigned:
  447. Named HDF5 object (Dataset, Group, Datatype)
  448. A hard link is created at "name" which points to the
  449. given object.
  450. SoftLink or ExternalLink
  451. Create the corresponding link.
  452. Numpy ndarray
  453. The array is converted to a dataset object, with default
  454. settings (contiguous storage, etc.).
  455. Numpy dtype
  456. Commit a copy of the datatype as a named datatype in the file.
  457. Anything else
  458. Attempt to convert it to an ndarray and store it. Scalar
  459. values are stored as scalar datasets. Raise ValueError if we
  460. can't understand the resulting array dtype.
  461. """
  462. with phil:
  463. name, lcpl = self._e(name, lcpl=True)
  464. if isinstance(obj, HLObject):
  465. h5o.link(obj.id, self.id, name, lcpl=lcpl, lapl=self._lapl)
  466. elif isinstance(obj, SoftLink):
  467. self.id.links.create_soft(name, self._e(obj.path), lcpl=lcpl, lapl=self._lapl)
  468. elif isinstance(obj, ExternalLink):
  469. fn = filename_encode(obj.filename)
  470. self.id.links.create_external(name, fn, self._e(obj.path),
  471. lcpl=lcpl, lapl=self._lapl)
  472. elif isinstance(obj, numpy.dtype):
  473. htype = h5t.py_create(obj, logical=True)
  474. htype.commit(self.id, name, lcpl=lcpl)
  475. else:
  476. ds = self.create_dataset(None, data=obj)
  477. h5o.link(ds.id, self.id, name, lcpl=lcpl)
  478. @with_phil
  479. def __delitem__(self, name):
  480. """ Delete (unlink) an item from this group. """
  481. self.id.unlink(self._e(name))
  482. @with_phil
  483. def __len__(self):
  484. """ Number of members attached to this group """
  485. return self.id.get_num_objs()
  486. @with_phil
  487. def __iter__(self):
  488. """ Iterate over member names """
  489. for x in self.id.__iter__():
  490. yield self._d(x)
  491. @with_phil
  492. def __reversed__(self):
  493. """ Iterate over member names in reverse order. """
  494. for x in self.id.__reversed__():
  495. yield self._d(x)
  496. @with_phil
  497. def __contains__(self, name):
  498. """ Test if a member name exists """
  499. if hasattr(h5g, "_path_valid"):
  500. if not self.id:
  501. return False
  502. return h5g._path_valid(self.id, self._e(name), self._lapl)
  503. return self._e(name) in self.id
  504. def copy(self, source, dest, name=None,
  505. shallow=False, expand_soft=False, expand_external=False,
  506. expand_refs=False, without_attrs=False):
  507. """Copy an object or group.
  508. The source can be a path, Group, Dataset, or Datatype object. The
  509. destination can be either a path or a Group object. The source and
  510. destinations need not be in the same file.
  511. If the source is a Group object, all objects contained in that group
  512. will be copied recursively.
  513. When the destination is a Group object, by default the target will
  514. be created in that group with its current name (basename of obj.name).
  515. You can override that by setting "name" to a string.
  516. There are various options which all default to "False":
  517. - shallow: copy only immediate members of a group.
  518. - expand_soft: expand soft links into new objects.
  519. - expand_external: expand external links into new objects.
  520. - expand_refs: copy objects that are pointed to by references.
  521. - without_attrs: copy object without copying attributes.
  522. Example:
  523. >>> f = File('myfile.hdf5', 'w')
  524. >>> f.create_group("MyGroup")
  525. >>> list(f.keys())
  526. ['MyGroup']
  527. >>> f.copy('MyGroup', 'MyCopy')
  528. >>> list(f.keys())
  529. ['MyGroup', 'MyCopy']
  530. """
  531. with phil:
  532. if isinstance(source, HLObject):
  533. source_path = '.'
  534. else:
  535. # Interpret source as a path relative to this group
  536. source_path = source
  537. source = self
  538. if isinstance(dest, Group):
  539. if name is not None:
  540. dest_path = name
  541. elif source_path == '.':
  542. dest_path = pp.basename(h5i.get_name(source.id))
  543. else:
  544. # copy source into dest group: dest_name/source_name
  545. dest_path = pp.basename(h5i.get_name(source[source_path].id))
  546. elif isinstance(dest, HLObject):
  547. raise TypeError("Destination must be path or Group object")
  548. else:
  549. # Interpret destination as a path relative to this group
  550. dest_path = dest
  551. dest = self
  552. flags = 0
  553. if shallow:
  554. flags |= h5o.COPY_SHALLOW_HIERARCHY_FLAG
  555. if expand_soft:
  556. flags |= h5o.COPY_EXPAND_SOFT_LINK_FLAG
  557. if expand_external:
  558. flags |= h5o.COPY_EXPAND_EXT_LINK_FLAG
  559. if expand_refs:
  560. flags |= h5o.COPY_EXPAND_REFERENCE_FLAG
  561. if without_attrs:
  562. flags |= h5o.COPY_WITHOUT_ATTR_FLAG
  563. if flags:
  564. copypl = h5p.create(h5p.OBJECT_COPY)
  565. copypl.set_copy_object(flags)
  566. else:
  567. copypl = None
  568. h5o.copy(source.id, self._e(source_path), dest.id, self._e(dest_path),
  569. copypl, base.dlcpl)
  570. def move(self, source, dest):
  571. """ Move a link to a new location in the file.
  572. If "source" is a hard link, this effectively renames the object. If
  573. "source" is a soft or external link, the link itself is moved, with its
  574. value unmodified.
  575. """
  576. with phil:
  577. if source == dest:
  578. return
  579. self.id.links.move(self._e(source), self.id, self._e(dest),
  580. lapl=self._lapl, lcpl=self._lcpl)
  581. def visit(self, func):
  582. """ Recursively visit all names in this group and subgroups.
  583. Note: visit ignores soft and external links. To visit those, use
  584. visit_links.
  585. You supply a callable (function, method or callable object); it
  586. will be called exactly once for each link in this group and every
  587. group below it. Your callable must conform to the signature:
  588. func(<member name>) => <None or return value>
  589. Returning None continues iteration, returning anything else stops
  590. and immediately returns that value from the visit method. The
  591. iteration order is lexicographic.
  592. Example:
  593. >>> # List the entire contents of the file
  594. >>> f = File("foo.hdf5")
  595. >>> list_of_names = []
  596. >>> f.visit(list_of_names.append)
  597. """
  598. with phil:
  599. def proxy(name):
  600. """ Call the function with the text name, not bytes """
  601. return func(self._d(name))
  602. return h5o.visit(self.id, proxy)
  603. def visititems(self, func):
  604. """ Recursively visit names and objects in this group.
  605. Note: visititems ignores soft and external links. To visit those, use
  606. visititems_links.
  607. You supply a callable (function, method or callable object); it
  608. will be called exactly once for each link in this group and every
  609. group below it. Your callable must conform to the signature:
  610. func(<member name>, <object>) => <None or return value>
  611. Returning None continues iteration, returning anything else stops
  612. and immediately returns that value from the visit method. The
  613. iteration order is lexicographic.
  614. Example:
  615. # Get a list of all datasets in the file
  616. >>> mylist = []
  617. >>> def func(name, obj):
  618. ... if isinstance(obj, Dataset):
  619. ... mylist.append(name)
  620. ...
  621. >>> f = File('foo.hdf5')
  622. >>> f.visititems(func)
  623. """
  624. with phil:
  625. def proxy(name):
  626. """ Use the text name of the object, not bytes """
  627. name = self._d(name)
  628. return func(name, self[name])
  629. return h5o.visit(self.id, proxy)
  630. def visit_links(self, func):
  631. """ Recursively visit all names in this group and subgroups.
  632. Each link will be visited exactly once, regardless of its target.
  633. You supply a callable (function, method or callable object); it
  634. will be called exactly once for each link in this group and every
  635. group below it. Your callable must conform to the signature:
  636. func(<member name>) => <None or return value>
  637. Returning None continues iteration, returning anything else stops
  638. and immediately returns that value from the visit method. The
  639. iteration order is lexicographic.
  640. Example:
  641. >>> # List the entire contents of the file
  642. >>> f = File("foo.hdf5")
  643. >>> list_of_names = []
  644. >>> f.visit_links(list_of_names.append)
  645. """
  646. with phil:
  647. def proxy(name):
  648. """ Call the function with the text name, not bytes """
  649. return func(self._d(name))
  650. return self.id.links.visit(proxy)
  651. def visititems_links(self, func):
  652. """ Recursively visit links in this group.
  653. Each link will be visited exactly once, regardless of its target.
  654. You supply a callable (function, method or callable object); it
  655. will be called exactly once for each link in this group and every
  656. group below it. Your callable must conform to the signature:
  657. func(<member name>, <link>) => <None or return value>
  658. Returning None continues iteration, returning anything else stops
  659. and immediately returns that value from the visit method. The
  660. iteration order is lexicographic.
  661. Example:
  662. # Get a list of all softlinks in the file
  663. >>> mylist = []
  664. >>> def func(name, link):
  665. ... if isinstance(link, SoftLink):
  666. ... mylist.append(name)
  667. ...
  668. >>> f = File('foo.hdf5')
  669. >>> f.visititems_links(func)
  670. """
  671. with phil:
  672. def proxy(name):
  673. """ Use the text name of the object, not bytes """
  674. name = self._d(name)
  675. return func(name, self.get(name, getlink=True))
  676. return self.id.links.visit(proxy)
  677. @with_phil
  678. def __repr__(self):
  679. if not self:
  680. r = u"<Closed HDF5 group>"
  681. else:
  682. namestr = (
  683. '"%s"' % self.name
  684. ) if self.name is not None else u"(anonymous)"
  685. r = '<HDF5 group %s (%d members)>' % (namestr, len(self))
  686. return r
  687. class HardLink:
  688. """
  689. Represents a hard link in an HDF5 file. Provided only so that
  690. Group.get works in a sensible way. Has no other function.
  691. """
  692. pass
  693. class SoftLink:
  694. """
  695. Represents a symbolic ("soft") link in an HDF5 file. The path
  696. may be absolute or relative. No checking is performed to ensure
  697. that the target actually exists.
  698. """
  699. @property
  700. def path(self):
  701. """ Soft link value. Not guaranteed to be a valid path. """
  702. return self._path
  703. def __init__(self, path):
  704. self._path = str(path)
  705. def __repr__(self):
  706. return '<SoftLink to "%s">' % self.path
  707. class ExternalLink:
  708. """
  709. Represents an HDF5 external link. Paths may be absolute or relative.
  710. No checking is performed to ensure either the target or file exists.
  711. """
  712. @property
  713. def path(self):
  714. """ Soft link path, i.e. the part inside the HDF5 file. """
  715. return self._path
  716. @property
  717. def filename(self):
  718. """ Path to the external HDF5 file in the filesystem. """
  719. return self._filename
  720. def __init__(self, filename, path):
  721. self._filename = filename_decode(filename_encode(filename))
  722. self._path = path
  723. def __repr__(self):
  724. return '<ExternalLink to "%s" in file "%s"' % (self.path,
  725. self.filename)