attrs.py 10.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277
  1. # This file is part of h5py, a Python interface to the HDF5 library.
  2. #
  3. # http://www.h5py.org
  4. #
  5. # Copyright 2008-2013 Andrew Collette and contributors
  6. #
  7. # License: Standard 3-clause BSD; see "license.txt" for full license terms
  8. # and contributor agreement.
  9. """
  10. Implements high-level operations for attributes.
  11. Provides the AttributeManager class, available on high-level objects
  12. as <obj>.attrs.
  13. """
  14. import numpy
  15. from .. import h5, h5s, h5t, h5a, h5p
  16. from . import base
  17. from .base import phil, with_phil, Empty, is_empty_dataspace, product
  18. from .datatype import Datatype
  19. class AttributeManager(base.MutableMappingHDF5, base.CommonStateObject):
  20. """
  21. Allows dictionary-style access to an HDF5 object's attributes.
  22. These are created exclusively by the library and are available as
  23. a Python attribute at <object>.attrs
  24. Like Group objects, attributes provide a minimal dictionary-
  25. style interface. Anything which can be reasonably converted to a
  26. Numpy array or Numpy scalar can be stored.
  27. Attributes are automatically created on assignment with the
  28. syntax <obj>.attrs[name] = value, with the HDF5 type automatically
  29. deduced from the value. Existing attributes are overwritten.
  30. To modify an existing attribute while preserving its type, use the
  31. method modify(). To specify an attribute of a particular type and
  32. shape, use create().
  33. """
  34. def __init__(self, parent):
  35. """ Private constructor.
  36. """
  37. self._id = parent.id
  38. @with_phil
  39. def __getitem__(self, name):
  40. """ Read the value of an attribute.
  41. """
  42. attr = h5a.open(self._id, self._e(name))
  43. shape = attr.shape
  44. # shape is None for empty dataspaces
  45. if shape is None:
  46. return Empty(attr.dtype)
  47. dtype = attr.dtype
  48. # Do this first, as we'll be fiddling with the dtype for top-level
  49. # array types
  50. htype = h5t.py_create(dtype)
  51. # NumPy doesn't support top-level array types, so we have to "fake"
  52. # the correct type and shape for the array. For example, consider
  53. # attr.shape == (5,) and attr.dtype == '(3,)f'. Then:
  54. if dtype.subdtype is not None:
  55. subdtype, subshape = dtype.subdtype
  56. shape = attr.shape + subshape # (5, 3)
  57. dtype = subdtype # 'f'
  58. arr = numpy.zeros(shape, dtype=dtype, order='C')
  59. attr.read(arr, mtype=htype)
  60. string_info = h5t.check_string_dtype(dtype)
  61. if string_info and (string_info.length is None):
  62. # Vlen strings: convert bytes to Python str
  63. arr = numpy.array([
  64. b.decode('utf-8', 'surrogateescape') for b in arr.flat
  65. ], dtype=dtype).reshape(arr.shape)
  66. if arr.ndim == 0:
  67. return arr[()]
  68. return arr
  69. def get_id(self, name):
  70. """Get a low-level AttrID object for the named attribute.
  71. """
  72. return h5a.open(self._id, self._e(name))
  73. @with_phil
  74. def __setitem__(self, name, value):
  75. """ Set a new attribute, overwriting any existing attribute.
  76. The type and shape of the attribute are determined from the data. To
  77. use a specific type or shape, or to preserve the type of an attribute,
  78. use the methods create() and modify().
  79. """
  80. self.create(name, data=value)
  81. @with_phil
  82. def __delitem__(self, name):
  83. """ Delete an attribute (which must already exist). """
  84. h5a.delete(self._id, self._e(name))
  85. def create(self, name, data, shape=None, dtype=None):
  86. """ Create a new attribute, overwriting any existing attribute.
  87. name
  88. Name of the new attribute (required)
  89. data
  90. An array to initialize the attribute (required)
  91. shape
  92. Shape of the attribute. Overrides data.shape if both are
  93. given, in which case the total number of points must be unchanged.
  94. dtype
  95. Data type of the attribute. Overrides data.dtype if both
  96. are given.
  97. """
  98. name = self._e(name)
  99. with phil:
  100. # First, make sure we have a NumPy array. We leave the data type
  101. # conversion for HDF5 to perform.
  102. if not isinstance(data, Empty):
  103. data = base.array_for_new_object(data, specified_dtype=dtype)
  104. if shape is None:
  105. shape = data.shape
  106. elif isinstance(shape, int):
  107. shape = (shape,)
  108. use_htype = None # If a committed type is given, we must use it
  109. # in the call to h5a.create.
  110. if isinstance(dtype, Datatype):
  111. use_htype = dtype.id
  112. dtype = dtype.dtype
  113. elif dtype is None:
  114. dtype = data.dtype
  115. else:
  116. dtype = numpy.dtype(dtype) # In case a string, e.g. 'i8' is passed
  117. original_dtype = dtype # We'll need this for top-level array types
  118. # Where a top-level array type is requested, we have to do some
  119. # fiddling around to present the data as a smaller array of
  120. # subarrays.
  121. if dtype.subdtype is not None:
  122. subdtype, subshape = dtype.subdtype
  123. # Make sure the subshape matches the last N axes' sizes.
  124. if shape[-len(subshape):] != subshape:
  125. raise ValueError("Array dtype shape %s is incompatible with data shape %s" % (subshape, shape))
  126. # New "advertised" shape and dtype
  127. shape = shape[0:len(shape)-len(subshape)]
  128. dtype = subdtype
  129. # Not an array type; make sure to check the number of elements
  130. # is compatible, and reshape if needed.
  131. else:
  132. if shape is not None and product(shape) != product(data.shape):
  133. raise ValueError("Shape of new attribute conflicts with shape of data")
  134. if shape != data.shape:
  135. data = data.reshape(shape)
  136. # We need this to handle special string types.
  137. if not isinstance(data, Empty):
  138. data = numpy.asarray(data, dtype=dtype)
  139. # Make HDF5 datatype and dataspace for the H5A calls
  140. if use_htype is None:
  141. htype = h5t.py_create(original_dtype, logical=True)
  142. htype2 = h5t.py_create(original_dtype) # Must be bit-for-bit representation rather than logical
  143. else:
  144. htype = use_htype
  145. htype2 = None
  146. if isinstance(data, Empty):
  147. space = h5s.create(h5s.NULL)
  148. else:
  149. space = h5s.create_simple(shape)
  150. # For a long time, h5py would create attributes with a random name
  151. # and then rename them, imitating how you can atomically replace
  152. # a file in a filesystem. But HDF5 does not offer atomic replacement
  153. # (you have to delete the existing attribute first), and renaming
  154. # exposes some bugs - see https://github.com/h5py/h5py/issues/1385
  155. # So we've gone back to the simpler delete & recreate model.
  156. if h5a.exists(self._id, name):
  157. h5a.delete(self._id, name)
  158. attr = h5a.create(self._id, name, htype, space)
  159. try:
  160. if not isinstance(data, Empty):
  161. attr.write(data, mtype=htype2)
  162. except:
  163. attr.close()
  164. h5a.delete(self._id, name)
  165. raise
  166. attr.close()
  167. def modify(self, name, value):
  168. """ Change the value of an attribute while preserving its type.
  169. Differs from __setitem__ in that if the attribute already exists, its
  170. type is preserved. This can be very useful for interacting with
  171. externally generated files.
  172. If the attribute doesn't exist, it will be automatically created.
  173. """
  174. with phil:
  175. if name not in self:
  176. self[name] = value
  177. else:
  178. attr = h5a.open(self._id, self._e(name))
  179. if is_empty_dataspace(attr):
  180. raise OSError("Empty attributes can't be modified")
  181. # If the input data is already an array, let HDF5 do the conversion.
  182. # If it's a list or similar, don't make numpy guess a dtype for it.
  183. dt = None if isinstance(value, numpy.ndarray) else attr.dtype
  184. value = numpy.asarray(value, order='C', dtype=dt)
  185. # Allow the case of () <-> (1,)
  186. if (value.shape != attr.shape) and not \
  187. (value.size == 1 and product(attr.shape) == 1):
  188. raise TypeError("Shape of data is incompatible with existing attribute")
  189. attr.write(value)
  190. @with_phil
  191. def __len__(self):
  192. """ Number of attributes attached to the object. """
  193. # I expect we will not have more than 2**32 attributes
  194. return h5a.get_num_attrs(self._id)
  195. def __iter__(self):
  196. """ Iterate over the names of attributes. """
  197. with phil:
  198. attrlist = []
  199. def iter_cb(name, *args):
  200. """ Callback to gather attribute names """
  201. attrlist.append(self._d(name))
  202. cpl = self._id.get_create_plist()
  203. crt_order = cpl.get_attr_creation_order()
  204. cpl.close()
  205. if crt_order & h5p.CRT_ORDER_TRACKED:
  206. idx_type = h5.INDEX_CRT_ORDER
  207. else:
  208. idx_type = h5.INDEX_NAME
  209. h5a.iterate(self._id, iter_cb, index_type=idx_type)
  210. for name in attrlist:
  211. yield name
  212. @with_phil
  213. def __contains__(self, name):
  214. """ Determine if an attribute exists, by name. """
  215. return h5a.exists(self._id, self._e(name))
  216. @with_phil
  217. def __repr__(self):
  218. if not self._id:
  219. return "<Attributes of closed HDF5 object>"
  220. return "<Attributes of HDF5 object at %s>" % id(self._id)