_mio.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377
  1. """
  2. Module for reading and writing matlab (TM) .mat files
  3. """
  4. # Authors: Travis Oliphant, Matthew Brett
  5. from contextlib import contextmanager
  6. from ._miobase import _get_matfile_version, docfiller
  7. from ._mio4 import MatFile4Reader, MatFile4Writer
  8. from ._mio5 import MatFile5Reader, MatFile5Writer
  9. __all__ = ['loadmat', 'savemat', 'whosmat']
  10. @contextmanager
  11. def _open_file_context(file_like, appendmat, mode='rb'):
  12. f, opened = _open_file(file_like, appendmat, mode)
  13. try:
  14. yield f
  15. finally:
  16. if opened:
  17. f.close()
  18. def _open_file(file_like, appendmat, mode='rb'):
  19. """
  20. Open `file_like` and return as file-like object. First, check if object is
  21. already file-like; if so, return it as-is. Otherwise, try to pass it
  22. to open(). If that fails, and `file_like` is a string, and `appendmat` is true,
  23. append '.mat' and try again.
  24. """
  25. reqs = {'read'} if set(mode) & set('r+') else set()
  26. if set(mode) & set('wax+'):
  27. reqs.add('write')
  28. if reqs.issubset(dir(file_like)):
  29. return file_like, False
  30. try:
  31. return open(file_like, mode), True
  32. except OSError as e:
  33. # Probably "not found"
  34. if isinstance(file_like, str):
  35. if appendmat and not file_like.endswith('.mat'):
  36. file_like += '.mat'
  37. return open(file_like, mode), True
  38. else:
  39. raise OSError(
  40. 'Reader needs file name or open file-like object'
  41. ) from e
  42. @docfiller
  43. def mat_reader_factory(file_name, appendmat=True, **kwargs):
  44. """
  45. Create reader for matlab .mat format files.
  46. Parameters
  47. ----------
  48. %(file_arg)s
  49. %(append_arg)s
  50. %(load_args)s
  51. %(struct_arg)s
  52. Returns
  53. -------
  54. matreader : MatFileReader object
  55. Initialized instance of MatFileReader class matching the mat file
  56. type detected in `filename`.
  57. file_opened : bool
  58. Whether the file was opened by this routine.
  59. """
  60. byte_stream, file_opened = _open_file(file_name, appendmat)
  61. mjv, mnv = _get_matfile_version(byte_stream)
  62. if mjv == 0:
  63. return MatFile4Reader(byte_stream, **kwargs), file_opened
  64. elif mjv == 1:
  65. return MatFile5Reader(byte_stream, **kwargs), file_opened
  66. elif mjv == 2:
  67. raise NotImplementedError('Please use HDF reader for matlab v7.3 '
  68. 'files, e.g. h5py')
  69. else:
  70. raise TypeError(f'Did not recognize version {mjv}')
  71. @docfiller
  72. def loadmat(file_name, mdict=None, appendmat=True, *, spmatrix=True, **kwargs):
  73. """
  74. Load MATLAB file.
  75. Parameters
  76. ----------
  77. file_name : str
  78. Name of the mat file (do not need .mat extension if
  79. appendmat==True). Can also pass open file-like object.
  80. mdict : dict, optional
  81. Dictionary in which to insert matfile variables.
  82. appendmat : bool, optional
  83. True to append the .mat extension to the end of the given
  84. filename, if not already present. Default is True.
  85. spmatrix : bool, optional (default: True)
  86. If ``True``, return sparse matrix. Otherwise return sparse array.
  87. Format is `COO` for MatFile 4 and `CSC` for MatFile 5.
  88. Only relevant for sparse variables.
  89. byte_order : str or None, optional
  90. None by default, implying byte order guessed from mat
  91. file. Otherwise can be one of ('native', '=', 'little', '<',
  92. 'BIG', '>').
  93. mat_dtype : bool, optional
  94. If True, return arrays in same dtype as would be loaded into
  95. MATLAB (instead of the dtype with which they are saved).
  96. squeeze_me : bool, optional
  97. Whether to squeeze unit matrix dimensions or not.
  98. chars_as_strings : bool, optional
  99. Whether to convert char arrays to string arrays.
  100. matlab_compatible : bool, optional
  101. Returns matrices as would be loaded by MATLAB (implies
  102. squeeze_me=False, chars_as_strings=False, mat_dtype=True,
  103. struct_as_record=True).
  104. struct_as_record : bool, optional
  105. Whether to load MATLAB structs as NumPy record arrays, or as
  106. old-style NumPy arrays with dtype=object. Setting this flag to
  107. False replicates the behavior of scipy version 0.7.x (returning
  108. NumPy object arrays). The default setting is True, because it
  109. allows easier round-trip load and save of MATLAB files.
  110. verify_compressed_data_integrity : bool, optional
  111. Whether the length of compressed sequences in the MATLAB file
  112. should be checked, to ensure that they are not longer than we expect.
  113. It is advisable to enable this (the default) because overlong
  114. compressed sequences in MATLAB files generally indicate that the
  115. files have experienced some sort of corruption.
  116. variable_names : None or sequence
  117. If None (the default) - read all variables in file. Otherwise,
  118. `variable_names` should be a sequence of strings, giving names of the
  119. MATLAB variables to read from the file. The reader will skip any
  120. variable with a name not in this sequence, possibly saving some read
  121. processing.
  122. simplify_cells : False, optional
  123. If True, return a simplified dict structure (which is useful if the mat
  124. file contains cell arrays). Note that this only affects the structure
  125. of the result and not its contents (which is identical for both output
  126. structures). If True, this automatically sets `struct_as_record` to
  127. False and `squeeze_me` to True, which is required to simplify cells.
  128. uint16_codec : str, optional
  129. The codec to use for decoding characters, which are stored as uint16
  130. values. The default uses the system encoding, but this can be manually
  131. set to other values such as 'ascii', 'latin1', and 'utf-8'. This
  132. parameter is relevant only for files stored as v6 and above, and not
  133. for files stored as v4.
  134. Returns
  135. -------
  136. mat_dict : dict
  137. dictionary with variable names as keys, and loaded matrices as values.
  138. Notes
  139. -----
  140. v4 (Level 1.0), v6 and v7 to 7.2 matfiles are supported.
  141. You will need an HDF5 Python library to read MATLAB 7.3 format mat
  142. files. Because SciPy does not supply one, we do not implement the
  143. HDF5 / 7.3 interface here.
  144. Examples
  145. --------
  146. >>> from os.path import dirname, join as pjoin
  147. >>> import scipy.io as sio
  148. Get the filename for an example .mat file from the tests/data directory.
  149. >>> data_dir = pjoin(dirname(sio.__file__), 'matlab', 'tests', 'data')
  150. >>> mat_fname = pjoin(data_dir, 'testdouble_7.4_GLNX86.mat')
  151. Load the .mat file contents.
  152. >>> mat_contents = sio.loadmat(mat_fname, spmatrix=False)
  153. The result is a dictionary, one key/value pair for each variable:
  154. >>> sorted(mat_contents.keys())
  155. ['__globals__', '__header__', '__version__', 'testdouble']
  156. >>> mat_contents['testdouble']
  157. array([[0. , 0.78539816, 1.57079633, 2.35619449, 3.14159265,
  158. 3.92699082, 4.71238898, 5.49778714, 6.28318531]])
  159. By default SciPy reads MATLAB structs as structured NumPy arrays where the
  160. dtype fields are of type `object` and the names correspond to the MATLAB
  161. struct field names. This can be disabled by setting the optional argument
  162. `struct_as_record=False`.
  163. Get the filename for an example .mat file that contains a MATLAB struct
  164. called `teststruct` and load the contents.
  165. >>> matstruct_fname = pjoin(data_dir, 'teststruct_7.4_GLNX86.mat')
  166. >>> matstruct_contents = sio.loadmat(matstruct_fname)
  167. >>> teststruct = matstruct_contents['teststruct']
  168. >>> teststruct.dtype
  169. dtype([('stringfield', 'O'), ('doublefield', 'O'), ('complexfield', 'O')])
  170. The size of the structured array is the size of the MATLAB struct, not the
  171. number of elements in any particular field. The shape defaults to 2-D
  172. unless the optional argument `squeeze_me=True`, in which case all length 1
  173. dimensions are removed.
  174. >>> teststruct.size
  175. 1
  176. >>> teststruct.shape
  177. (1, 1)
  178. Get the 'stringfield' of the first element in the MATLAB struct.
  179. >>> teststruct[0, 0]['stringfield']
  180. array(['Rats live on no evil star.'],
  181. dtype='<U26')
  182. Get the first element of the 'doublefield'.
  183. >>> teststruct['doublefield'][0, 0]
  184. array([[ 1.41421356, 2.71828183, 3.14159265]])
  185. Load the MATLAB struct, squeezing out length 1 dimensions, and get the item
  186. from the 'complexfield'.
  187. >>> matstruct_squeezed = sio.loadmat(matstruct_fname, squeeze_me=True)
  188. >>> matstruct_squeezed['teststruct'].shape
  189. ()
  190. >>> matstruct_squeezed['teststruct']['complexfield'].shape
  191. ()
  192. >>> matstruct_squeezed['teststruct']['complexfield'].item()
  193. array([ 1.41421356+1.41421356j, 2.71828183+2.71828183j,
  194. 3.14159265+3.14159265j])
  195. """
  196. variable_names = kwargs.pop('variable_names', None)
  197. with _open_file_context(file_name, appendmat) as f:
  198. MR, _ = mat_reader_factory(f, **kwargs)
  199. matfile_dict = MR.get_variables(variable_names)
  200. if spmatrix:
  201. from scipy.sparse import issparse, coo_matrix, csc_matrix
  202. for name, var in list(matfile_dict.items()):
  203. if issparse(var):
  204. fmt_matrix = coo_matrix if var.format == "coo" else csc_matrix
  205. matfile_dict[name] = fmt_matrix(var)
  206. if mdict is not None:
  207. mdict.update(matfile_dict)
  208. else:
  209. mdict = matfile_dict
  210. return mdict
  211. @docfiller
  212. def savemat(file_name, mdict,
  213. appendmat=True,
  214. format='5',
  215. long_field_names=False,
  216. do_compression=False,
  217. oned_as='row'):
  218. """
  219. Save a dictionary of names and arrays into a MATLAB-style .mat file.
  220. This saves the array objects in the given dictionary to a MATLAB-
  221. style .mat file.
  222. Parameters
  223. ----------
  224. file_name : str or file-like object
  225. Name of the .mat file (.mat extension not needed if ``appendmat ==
  226. True``).
  227. Can also pass open file_like object.
  228. mdict : dict
  229. Dictionary from which to save matfile variables. Note that if this dict
  230. has a key starting with ``_`` or a sub-dict has a key starting with ``_``
  231. or a digit, these key's items will not be saved in the mat file and
  232. `MatWriteWarning` will be issued.
  233. appendmat : bool, optional
  234. True (the default) to append the .mat extension to the end of the
  235. given filename, if not already present.
  236. format : {'5', '4'}, string, optional
  237. '5' (the default) for MATLAB 5 and up (to 7.2),
  238. '4' for MATLAB 4 .mat files.
  239. long_field_names : bool, optional
  240. False (the default) - maximum field name length in a structure is
  241. 31 characters which is the documented maximum length.
  242. True - maximum field name length in a structure is 63 characters
  243. which works for MATLAB 7.6+.
  244. do_compression : bool, optional
  245. Whether or not to compress matrices on write. Default is False.
  246. oned_as : {'row', 'column'}, optional
  247. If 'column', write 1-D NumPy arrays as column vectors.
  248. If 'row', write 1-D NumPy arrays as row vectors.
  249. Examples
  250. --------
  251. >>> from scipy.io import savemat
  252. >>> import numpy as np
  253. >>> a = np.arange(20)
  254. >>> mdic = {"a": a, "label": "experiment"}
  255. >>> mdic
  256. {'a': array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
  257. 17, 18, 19]),
  258. 'label': 'experiment'}
  259. >>> savemat("matlab_matrix.mat", mdic)
  260. """
  261. with _open_file_context(file_name, appendmat, 'wb') as file_stream:
  262. if format == '4':
  263. if long_field_names:
  264. message = "Long field names are not available for version 4 files"
  265. raise ValueError(message)
  266. MW = MatFile4Writer(file_stream, oned_as)
  267. elif format == '5':
  268. MW = MatFile5Writer(file_stream,
  269. do_compression=do_compression,
  270. unicode_strings=True,
  271. long_field_names=long_field_names,
  272. oned_as=oned_as)
  273. else:
  274. raise ValueError("Format should be '4' or '5'")
  275. MW.put_variables(mdict)
  276. @docfiller
  277. def whosmat(file_name, appendmat=True, **kwargs):
  278. """
  279. List variables inside a MATLAB file.
  280. Parameters
  281. ----------
  282. %(file_arg)s
  283. %(append_arg)s
  284. %(load_args)s
  285. %(struct_arg)s
  286. Returns
  287. -------
  288. variables : list of tuples
  289. A list of tuples, where each tuple holds the matrix name (a string),
  290. its shape (tuple of ints), and its data class (a string).
  291. Possible data classes are: int8, uint8, int16, uint16, int32, uint32,
  292. int64, uint64, single, double, cell, struct, object, char, sparse,
  293. function, opaque, logical, unknown.
  294. Notes
  295. -----
  296. v4 (Level 1.0), v6 and v7 to 7.2 matfiles are supported.
  297. You will need an HDF5 python library to read matlab 7.3 format mat
  298. files (e.g. h5py). Because SciPy does not supply one, we do not implement the
  299. HDF5 / 7.3 interface here.
  300. .. versionadded:: 0.12.0
  301. Examples
  302. --------
  303. >>> from io import BytesIO
  304. >>> import numpy as np
  305. >>> from scipy.io import savemat, whosmat
  306. Create some arrays, and use `savemat` to write them to a ``BytesIO``
  307. instance.
  308. >>> a = np.array([[10, 20, 30], [11, 21, 31]], dtype=np.int32)
  309. >>> b = np.geomspace(1, 10, 5)
  310. >>> f = BytesIO()
  311. >>> savemat(f, {'a': a, 'b': b})
  312. Use `whosmat` to inspect ``f``. Each tuple in the output list gives
  313. the name, shape and data type of the array in ``f``.
  314. >>> whosmat(f)
  315. [('a', (2, 3), 'int32'), ('b', (1, 5), 'double')]
  316. """
  317. with _open_file_context(file_name, appendmat) as f:
  318. ML, file_opened = mat_reader_factory(f, **kwargs)
  319. variables = ML.list_variables()
  320. return variables