_idl.py 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917
  1. # IDLSave - a python module to read IDL 'save' files
  2. # Copyright (c) 2010 Thomas P. Robitaille
  3. # Many thanks to Craig Markwardt for publishing the Unofficial Format
  4. # Specification for IDL .sav files, without which this Python module would not
  5. # exist (http://cow.physics.wisc.edu/~craigm/idl/savefmt).
  6. # This code was developed by with permission from ITT Visual Information
  7. # Systems. IDL(r) is a registered trademark of ITT Visual Information Systems,
  8. # Inc. for their Interactive Data Language software.
  9. # Permission is hereby granted, free of charge, to any person obtaining a
  10. # copy of this software and associated documentation files (the "Software"),
  11. # to deal in the Software without restriction, including without limitation
  12. # the rights to use, copy, modify, merge, publish, distribute, sublicense,
  13. # and/or sell copies of the Software, and to permit persons to whom the
  14. # Software is furnished to do so, subject to the following conditions:
  15. # The above copyright notice and this permission notice shall be included in
  16. # all copies or substantial portions of the Software.
  17. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18. # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19. # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  20. # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21. # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  22. # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  23. # DEALINGS IN THE SOFTWARE.
  24. __all__ = ['readsav']
  25. import struct
  26. import numpy as np
  27. import tempfile
  28. import zlib
  29. import warnings
  30. # Define the different data types that can be found in an IDL save file
  31. DTYPE_DICT = {1: '>u1',
  32. 2: '>i2',
  33. 3: '>i4',
  34. 4: '>f4',
  35. 5: '>f8',
  36. 6: '>c8',
  37. 7: '|O',
  38. 8: '|O',
  39. 9: '>c16',
  40. 10: '|O',
  41. 11: '|O',
  42. 12: '>u2',
  43. 13: '>u4',
  44. 14: '>i8',
  45. 15: '>u8'}
  46. # Define the different record types that can be found in an IDL save file
  47. RECTYPE_DICT = {0: "START_MARKER",
  48. 1: "COMMON_VARIABLE",
  49. 2: "VARIABLE",
  50. 3: "SYSTEM_VARIABLE",
  51. 6: "END_MARKER",
  52. 10: "TIMESTAMP",
  53. 12: "COMPILED",
  54. 13: "IDENTIFICATION",
  55. 14: "VERSION",
  56. 15: "HEAP_HEADER",
  57. 16: "HEAP_DATA",
  58. 17: "PROMOTE64",
  59. 19: "NOTICE",
  60. 20: "DESCRIPTION"}
  61. # Define a dictionary to contain structure definitions
  62. STRUCT_DICT = {}
  63. def _align_32(f):
  64. '''Align to the next 32-bit position in a file'''
  65. pos = f.tell()
  66. if pos % 4 != 0:
  67. f.seek(pos + 4 - pos % 4)
  68. return
  69. def _skip_bytes(f, n):
  70. '''Skip `n` bytes'''
  71. f.read(n)
  72. return
  73. def _read_bytes(f, n):
  74. '''Read the next `n` bytes'''
  75. return f.read(n)
  76. def _read_byte(f):
  77. '''Read a single byte'''
  78. return np.uint8(struct.unpack('>B', f.read(4)[:1])[0])
  79. def _read_long(f):
  80. '''Read a signed 32-bit integer'''
  81. return np.int32(struct.unpack('>l', f.read(4))[0])
  82. def _read_int16(f):
  83. '''Read a signed 16-bit integer'''
  84. return np.int16(struct.unpack('>h', f.read(4)[2:4])[0])
  85. def _read_int32(f):
  86. '''Read a signed 32-bit integer'''
  87. return np.int32(struct.unpack('>i', f.read(4))[0])
  88. def _read_int64(f):
  89. '''Read a signed 64-bit integer'''
  90. return np.int64(struct.unpack('>q', f.read(8))[0])
  91. def _read_uint16(f):
  92. '''Read an unsigned 16-bit integer'''
  93. return np.uint16(struct.unpack('>H', f.read(4)[2:4])[0])
  94. def _read_uint32(f):
  95. '''Read an unsigned 32-bit integer'''
  96. return np.uint32(struct.unpack('>I', f.read(4))[0])
  97. def _read_uint64(f):
  98. '''Read an unsigned 64-bit integer'''
  99. return np.uint64(struct.unpack('>Q', f.read(8))[0])
  100. def _read_float32(f):
  101. '''Read a 32-bit float'''
  102. return np.float32(struct.unpack('>f', f.read(4))[0])
  103. def _read_float64(f):
  104. '''Read a 64-bit float'''
  105. return np.float64(struct.unpack('>d', f.read(8))[0])
  106. class Pointer:
  107. '''Class used to define pointers'''
  108. def __init__(self, index):
  109. self.index = index
  110. return
  111. class ObjectPointer(Pointer):
  112. '''Class used to define object pointers'''
  113. pass
  114. def _read_string(f):
  115. '''Read a string'''
  116. length = _read_long(f)
  117. if length > 0:
  118. chars = _read_bytes(f, length).decode('latin1')
  119. _align_32(f)
  120. else:
  121. chars = ''
  122. return chars
  123. def _read_string_data(f):
  124. '''Read a data string (length is specified twice)'''
  125. length = _read_long(f)
  126. if length > 0:
  127. length = _read_long(f)
  128. string_data = _read_bytes(f, length)
  129. _align_32(f)
  130. else:
  131. string_data = ''
  132. return string_data
  133. def _read_data(f, dtype):
  134. '''Read a variable with a specified data type'''
  135. if dtype == 1:
  136. if _read_int32(f) != 1:
  137. raise Exception("Error occurred while reading byte variable")
  138. return _read_byte(f)
  139. elif dtype == 2:
  140. return _read_int16(f)
  141. elif dtype == 3:
  142. return _read_int32(f)
  143. elif dtype == 4:
  144. return _read_float32(f)
  145. elif dtype == 5:
  146. return _read_float64(f)
  147. elif dtype == 6:
  148. real = _read_float32(f)
  149. imag = _read_float32(f)
  150. return np.complex64(real + imag * 1j)
  151. elif dtype == 7:
  152. return _read_string_data(f)
  153. elif dtype == 8:
  154. raise Exception("Should not be here - please report this")
  155. elif dtype == 9:
  156. real = _read_float64(f)
  157. imag = _read_float64(f)
  158. return np.complex128(real + imag * 1j)
  159. elif dtype == 10:
  160. return Pointer(_read_int32(f))
  161. elif dtype == 11:
  162. return ObjectPointer(_read_int32(f))
  163. elif dtype == 12:
  164. return _read_uint16(f)
  165. elif dtype == 13:
  166. return _read_uint32(f)
  167. elif dtype == 14:
  168. return _read_int64(f)
  169. elif dtype == 15:
  170. return _read_uint64(f)
  171. else:
  172. raise Exception(f"Unknown IDL type: {dtype} - please report this")
  173. def _read_structure(f, array_desc, struct_desc):
  174. '''
  175. Read a structure, with the array and structure descriptors given as
  176. `array_desc` and `structure_desc` respectively.
  177. '''
  178. nrows = array_desc['nelements']
  179. columns = struct_desc['tagtable']
  180. dtype = []
  181. for col in columns:
  182. if col['structure'] or col['array']:
  183. dtype.append(((col['name'].lower(), col['name']), np.object_))
  184. else:
  185. if col['typecode'] in DTYPE_DICT:
  186. dtype.append(((col['name'].lower(), col['name']),
  187. DTYPE_DICT[col['typecode']]))
  188. else:
  189. raise Exception(f"Variable type {col['typecode']} not implemented")
  190. structure = np.rec.recarray((nrows, ), dtype=dtype)
  191. for i in range(nrows):
  192. for col in columns:
  193. dtype = col['typecode']
  194. if col['structure']:
  195. structure[col['name']][i] = _read_structure(f,
  196. struct_desc['arrtable'][col['name']],
  197. struct_desc['structtable'][col['name']])
  198. elif col['array']:
  199. structure[col['name']][i] = _read_array(f, dtype,
  200. struct_desc['arrtable'][col['name']])
  201. else:
  202. structure[col['name']][i] = _read_data(f, dtype)
  203. # Reshape structure if needed
  204. if array_desc['ndims'] > 1:
  205. dims = array_desc['dims'][:int(array_desc['ndims'])]
  206. dims.reverse()
  207. structure = structure.reshape(dims)
  208. return structure
  209. def _read_array(f, typecode, array_desc):
  210. '''
  211. Read an array of type `typecode`, with the array descriptor given as
  212. `array_desc`.
  213. '''
  214. if typecode in [1, 3, 4, 5, 6, 9, 13, 14, 15]:
  215. if typecode == 1:
  216. nbytes = _read_int32(f)
  217. if nbytes != array_desc['nbytes']:
  218. warnings.warn("Not able to verify number of bytes from header",
  219. stacklevel=3)
  220. # Read bytes as numpy array
  221. array = np.frombuffer(f.read(array_desc['nbytes']),
  222. dtype=DTYPE_DICT[typecode])
  223. elif typecode in [2, 12]:
  224. # These are 2 byte types, need to skip every two as they are not packed
  225. array = np.frombuffer(f.read(array_desc['nbytes']*2),
  226. dtype=DTYPE_DICT[typecode])[1::2]
  227. else:
  228. # Read bytes into list
  229. array = []
  230. for i in range(array_desc['nelements']):
  231. dtype = typecode
  232. data = _read_data(f, dtype)
  233. array.append(data)
  234. array = np.array(array, dtype=np.object_)
  235. # Reshape array if needed
  236. if array_desc['ndims'] > 1:
  237. dims = array_desc['dims'][:int(array_desc['ndims'])]
  238. dims.reverse()
  239. array = array.reshape(dims)
  240. # Go to next alignment position
  241. _align_32(f)
  242. return array
  243. def _read_record(f):
  244. '''Function to read in a full record'''
  245. record = {'rectype': _read_long(f)}
  246. nextrec = _read_uint32(f)
  247. nextrec += _read_uint32(f).astype(np.int64) * 2**32
  248. _skip_bytes(f, 4)
  249. if record['rectype'] not in RECTYPE_DICT:
  250. raise Exception(f"Unknown RECTYPE: {record['rectype']}")
  251. record['rectype'] = RECTYPE_DICT[record['rectype']]
  252. if record['rectype'] in ["VARIABLE", "HEAP_DATA"]:
  253. if record['rectype'] == "VARIABLE":
  254. record['varname'] = _read_string(f)
  255. else:
  256. record['heap_index'] = _read_long(f)
  257. _skip_bytes(f, 4)
  258. rectypedesc = _read_typedesc(f)
  259. if rectypedesc['typecode'] == 0:
  260. if nextrec == f.tell():
  261. record['data'] = None # Indicates NULL value
  262. else:
  263. raise ValueError("Unexpected type code: 0")
  264. else:
  265. varstart = _read_long(f)
  266. if varstart != 7:
  267. raise Exception("VARSTART is not 7")
  268. if rectypedesc['structure']:
  269. record['data'] = _read_structure(f, rectypedesc['array_desc'],
  270. rectypedesc['struct_desc'])
  271. elif rectypedesc['array']:
  272. record['data'] = _read_array(f, rectypedesc['typecode'],
  273. rectypedesc['array_desc'])
  274. else:
  275. dtype = rectypedesc['typecode']
  276. record['data'] = _read_data(f, dtype)
  277. elif record['rectype'] == "TIMESTAMP":
  278. _skip_bytes(f, 4*256)
  279. record['date'] = _read_string(f)
  280. record['user'] = _read_string(f)
  281. record['host'] = _read_string(f)
  282. elif record['rectype'] == "VERSION":
  283. record['format'] = _read_long(f)
  284. record['arch'] = _read_string(f)
  285. record['os'] = _read_string(f)
  286. record['release'] = _read_string(f)
  287. elif record['rectype'] == "IDENTIFICATON":
  288. record['author'] = _read_string(f)
  289. record['title'] = _read_string(f)
  290. record['idcode'] = _read_string(f)
  291. elif record['rectype'] == "NOTICE":
  292. record['notice'] = _read_string(f)
  293. elif record['rectype'] == "DESCRIPTION":
  294. record['description'] = _read_string_data(f)
  295. elif record['rectype'] == "HEAP_HEADER":
  296. record['nvalues'] = _read_long(f)
  297. record['indices'] = [_read_long(f) for _ in range(record['nvalues'])]
  298. elif record['rectype'] == "COMMONBLOCK":
  299. record['nvars'] = _read_long(f)
  300. record['name'] = _read_string(f)
  301. record['varnames'] = [_read_string(f) for _ in range(record['nvars'])]
  302. elif record['rectype'] == "END_MARKER":
  303. record['end'] = True
  304. elif record['rectype'] == "UNKNOWN":
  305. warnings.warn("Skipping UNKNOWN record", stacklevel=3)
  306. elif record['rectype'] == "SYSTEM_VARIABLE":
  307. warnings.warn("Skipping SYSTEM_VARIABLE record", stacklevel=3)
  308. else:
  309. raise Exception(f"record['rectype']={record['rectype']} not implemented")
  310. f.seek(nextrec)
  311. return record
  312. def _read_typedesc(f):
  313. '''Function to read in a type descriptor'''
  314. typedesc = {'typecode': _read_long(f), 'varflags': _read_long(f)}
  315. if typedesc['varflags'] & 2 == 2:
  316. raise Exception("System variables not implemented")
  317. typedesc['array'] = typedesc['varflags'] & 4 == 4
  318. typedesc['structure'] = typedesc['varflags'] & 32 == 32
  319. if typedesc['structure']:
  320. typedesc['array_desc'] = _read_arraydesc(f)
  321. typedesc['struct_desc'] = _read_structdesc(f)
  322. elif typedesc['array']:
  323. typedesc['array_desc'] = _read_arraydesc(f)
  324. return typedesc
  325. def _read_arraydesc(f):
  326. '''Function to read in an array descriptor'''
  327. arraydesc = {'arrstart': _read_long(f)}
  328. if arraydesc['arrstart'] == 8:
  329. _skip_bytes(f, 4)
  330. arraydesc['nbytes'] = _read_long(f)
  331. arraydesc['nelements'] = _read_long(f)
  332. arraydesc['ndims'] = _read_long(f)
  333. _skip_bytes(f, 8)
  334. arraydesc['nmax'] = _read_long(f)
  335. arraydesc['dims'] = [_read_long(f) for _ in range(arraydesc['nmax'])]
  336. elif arraydesc['arrstart'] == 18:
  337. warnings.warn("Using experimental 64-bit array read", stacklevel=3)
  338. _skip_bytes(f, 8)
  339. arraydesc['nbytes'] = _read_uint64(f)
  340. arraydesc['nelements'] = _read_uint64(f)
  341. arraydesc['ndims'] = _read_long(f)
  342. _skip_bytes(f, 8)
  343. arraydesc['nmax'] = 8
  344. arraydesc['dims'] = []
  345. for d in range(arraydesc['nmax']):
  346. v = _read_long(f)
  347. if v != 0:
  348. raise Exception("Expected a zero in ARRAY_DESC")
  349. arraydesc['dims'].append(_read_long(f))
  350. else:
  351. raise Exception(f"Unknown ARRSTART: {arraydesc['arrstart']}")
  352. return arraydesc
  353. def _read_structdesc(f):
  354. '''Function to read in a structure descriptor'''
  355. structdesc = {}
  356. structstart = _read_long(f)
  357. if structstart != 9:
  358. raise Exception("STRUCTSTART should be 9")
  359. structdesc['name'] = _read_string(f)
  360. predef = _read_long(f)
  361. structdesc['ntags'] = _read_long(f)
  362. structdesc['nbytes'] = _read_long(f)
  363. structdesc['predef'] = predef & 1
  364. structdesc['inherits'] = predef & 2
  365. structdesc['is_super'] = predef & 4
  366. if not structdesc['predef']:
  367. structdesc['tagtable'] = [_read_tagdesc(f)
  368. for _ in range(structdesc['ntags'])]
  369. for tag in structdesc['tagtable']:
  370. tag['name'] = _read_string(f)
  371. structdesc['arrtable'] = {tag['name']: _read_arraydesc(f)
  372. for tag in structdesc['tagtable']
  373. if tag['array']}
  374. structdesc['structtable'] = {tag['name']: _read_structdesc(f)
  375. for tag in structdesc['tagtable']
  376. if tag['structure']}
  377. if structdesc['inherits'] or structdesc['is_super']:
  378. structdesc['classname'] = _read_string(f)
  379. structdesc['nsupclasses'] = _read_long(f)
  380. structdesc['supclassnames'] = [
  381. _read_string(f) for _ in range(structdesc['nsupclasses'])]
  382. structdesc['supclasstable'] = [
  383. _read_structdesc(f) for _ in range(structdesc['nsupclasses'])]
  384. STRUCT_DICT[structdesc['name']] = structdesc
  385. else:
  386. if structdesc['name'] not in STRUCT_DICT:
  387. raise Exception("PREDEF=1 but can't find definition")
  388. structdesc = STRUCT_DICT[structdesc['name']]
  389. return structdesc
  390. def _read_tagdesc(f):
  391. '''Function to read in a tag descriptor'''
  392. tagdesc = {'offset': _read_long(f)}
  393. if tagdesc['offset'] == -1:
  394. tagdesc['offset'] = _read_uint64(f)
  395. tagdesc['typecode'] = _read_long(f)
  396. tagflags = _read_long(f)
  397. tagdesc['array'] = tagflags & 4 == 4
  398. tagdesc['structure'] = tagflags & 32 == 32
  399. tagdesc['scalar'] = tagdesc['typecode'] in DTYPE_DICT
  400. # Assume '10'x is scalar
  401. return tagdesc
  402. def _replace_heap(variable, heap):
  403. if isinstance(variable, Pointer):
  404. while isinstance(variable, Pointer):
  405. if variable.index == 0:
  406. variable = None
  407. else:
  408. if variable.index in heap:
  409. variable = heap[variable.index]
  410. else:
  411. warnings.warn("Variable referenced by pointer not found "
  412. "in heap: variable will be set to None",
  413. stacklevel=3)
  414. variable = None
  415. replace, new = _replace_heap(variable, heap)
  416. if replace:
  417. variable = new
  418. return True, variable
  419. elif isinstance(variable, np.rec.recarray):
  420. # Loop over records
  421. for ir, record in enumerate(variable):
  422. replace, new = _replace_heap(record, heap)
  423. if replace:
  424. variable[ir] = new
  425. return False, variable
  426. elif isinstance(variable, np.record):
  427. # Loop over values
  428. for iv, value in enumerate(variable):
  429. replace, new = _replace_heap(value, heap)
  430. if replace:
  431. variable[iv] = new
  432. return False, variable
  433. elif isinstance(variable, np.ndarray):
  434. # Loop over values if type is np.object_
  435. if variable.dtype.type is np.object_:
  436. for iv in range(variable.size):
  437. replace, new = _replace_heap(variable.item(iv), heap)
  438. if replace:
  439. variable.reshape(-1)[iv] = new
  440. return False, variable
  441. else:
  442. return False, variable
  443. class AttrDict(dict):
  444. '''
  445. A case-insensitive dictionary with access via item, attribute, and call
  446. notations:
  447. >>> from scipy.io._idl import AttrDict
  448. >>> d = AttrDict()
  449. >>> d['Variable'] = 123
  450. >>> d['Variable']
  451. 123
  452. >>> d.Variable
  453. 123
  454. >>> d.variable
  455. 123
  456. >>> d('VARIABLE')
  457. 123
  458. >>> d['missing']
  459. Traceback (most recent error last):
  460. ...
  461. KeyError: 'missing'
  462. >>> d.missing
  463. Traceback (most recent error last):
  464. ...
  465. AttributeError: 'AttrDict' object has no attribute 'missing'
  466. '''
  467. def __init__(self, init=None):
  468. if init is None:
  469. init = {}
  470. dict.__init__(self, init)
  471. def __getitem__(self, name):
  472. return super().__getitem__(name.lower())
  473. def __setitem__(self, key, value):
  474. return super().__setitem__(key.lower(), value)
  475. def __getattr__(self, name):
  476. try:
  477. return self.__getitem__(name)
  478. except KeyError:
  479. raise AttributeError(
  480. f"'{type(self)}' object has no attribute '{name}'") from None
  481. __setattr__ = __setitem__
  482. __call__ = __getitem__
  483. def readsav(file_name, idict=None, python_dict=False,
  484. uncompressed_file_name=None, verbose=False):
  485. """
  486. Read an IDL .sav file.
  487. Parameters
  488. ----------
  489. file_name : str
  490. Name of the IDL save file.
  491. idict : dict, optional
  492. Dictionary in which to insert .sav file variables.
  493. python_dict : bool, optional
  494. By default, the object return is not a Python dictionary, but a
  495. case-insensitive dictionary with item, attribute, and call access
  496. to variables. To get a standard Python dictionary, set this option
  497. to True.
  498. uncompressed_file_name : str, optional
  499. This option only has an effect for .sav files written with the
  500. /compress option. If a file name is specified, compressed .sav
  501. files are uncompressed to this file. Otherwise, readsav will use
  502. the `tempfile` module to determine a temporary filename
  503. automatically, and will remove the temporary file upon successfully
  504. reading it in.
  505. verbose : bool, optional
  506. Whether to print out information about the save file, including
  507. the records read, and available variables.
  508. Returns
  509. -------
  510. idl_dict : AttrDict or dict
  511. If `python_dict` is set to False (default), this function returns a
  512. case-insensitive dictionary with item, attribute, and call access
  513. to variables. If `python_dict` is set to True, this function
  514. returns a Python dictionary with all variable names in lowercase.
  515. If `idict` was specified, then variables are written to the
  516. dictionary specified, and the updated dictionary is returned.
  517. Examples
  518. --------
  519. >>> from os.path import dirname, join as pjoin
  520. >>> import scipy.io as sio
  521. >>> from scipy.io import readsav
  522. Get the filename for an example .sav file from the tests/data directory.
  523. >>> data_dir = pjoin(dirname(sio.__file__), 'tests', 'data')
  524. >>> sav_fname = pjoin(data_dir, 'array_float32_1d.sav')
  525. Load the .sav file contents.
  526. >>> sav_data = readsav(sav_fname)
  527. Get keys of the .sav file contents.
  528. >>> print(sav_data.keys())
  529. dict_keys(['array1d'])
  530. Access a content with a key.
  531. >>> print(sav_data['array1d'])
  532. [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  533. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  534. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  535. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  536. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  537. 0. 0. 0.]
  538. """
  539. # Initialize record and variable holders
  540. records = []
  541. if python_dict or idict:
  542. variables = {}
  543. else:
  544. variables = AttrDict()
  545. # Open the IDL file
  546. f = open(file_name, 'rb')
  547. # Read the signature, which should be 'SR'
  548. signature = _read_bytes(f, 2)
  549. if signature != b'SR':
  550. raise Exception(f"Invalid SIGNATURE: {signature}")
  551. # Next, the record format, which is '\x00\x04' for normal .sav
  552. # files, and '\x00\x06' for compressed .sav files.
  553. recfmt = _read_bytes(f, 2)
  554. if recfmt == b'\x00\x04':
  555. pass
  556. elif recfmt == b'\x00\x06':
  557. if verbose:
  558. print("IDL Save file is compressed")
  559. if uncompressed_file_name:
  560. fout = open(uncompressed_file_name, 'w+b')
  561. else:
  562. fout = tempfile.NamedTemporaryFile(suffix='.sav')
  563. if verbose:
  564. print(f" -> expanding to {fout.name}")
  565. # Write header
  566. fout.write(b'SR\x00\x04')
  567. # Cycle through records
  568. while True:
  569. # Read record type
  570. rectype = _read_long(f)
  571. fout.write(struct.pack('>l', int(rectype)))
  572. # Read position of next record and return as int
  573. nextrec = _read_uint32(f)
  574. nextrec += _read_uint32(f).astype(np.int64) * 2**32
  575. # Read the unknown 4 bytes
  576. unknown = f.read(4)
  577. # Check if the end of the file has been reached
  578. if RECTYPE_DICT[rectype] == 'END_MARKER':
  579. modval = np.int64(2**32)
  580. fout.write(struct.pack('>I', int(nextrec) % modval))
  581. fout.write(
  582. struct.pack('>I', int((nextrec - (nextrec % modval)) / modval))
  583. )
  584. fout.write(unknown)
  585. break
  586. # Find current position
  587. pos = f.tell()
  588. # Decompress record
  589. rec_string = zlib.decompress(f.read(nextrec-pos))
  590. # Find new position of next record
  591. nextrec = fout.tell() + len(rec_string) + 12
  592. # Write out record
  593. fout.write(struct.pack('>I', int(nextrec % 2**32)))
  594. fout.write(struct.pack('>I', int((nextrec - (nextrec % 2**32)) / 2**32)))
  595. fout.write(unknown)
  596. fout.write(rec_string)
  597. # Close the original compressed file
  598. f.close()
  599. # Set f to be the decompressed file, and skip the first four bytes
  600. f = fout
  601. f.seek(4)
  602. else:
  603. raise Exception(f"Invalid RECFMT: {recfmt}")
  604. # Loop through records, and add them to the list
  605. while True:
  606. r = _read_record(f)
  607. records.append(r)
  608. if 'end' in r:
  609. if r['end']:
  610. break
  611. # Close the file
  612. f.close()
  613. # Find heap data variables
  614. heap = {}
  615. for r in records:
  616. if r['rectype'] == "HEAP_DATA":
  617. heap[r['heap_index']] = r['data']
  618. # Find all variables
  619. for r in records:
  620. if r['rectype'] == "VARIABLE":
  621. replace, new = _replace_heap(r['data'], heap)
  622. if replace:
  623. r['data'] = new
  624. variables[r['varname'].lower()] = r['data']
  625. if verbose:
  626. # Print out timestamp info about the file
  627. for record in records:
  628. if record['rectype'] == "TIMESTAMP":
  629. print("-"*50)
  630. print(f"Date: {record['date']}")
  631. print(f"User: {record['user']}")
  632. print(f"Host: {record['host']}")
  633. break
  634. # Print out version info about the file
  635. for record in records:
  636. if record['rectype'] == "VERSION":
  637. print("-"*50)
  638. print(f"Format: {record['format']}")
  639. print(f"Architecture: {record['arch']}")
  640. print(f"Operating System: {record['os']}")
  641. print(f"IDL Version: {record['release']}")
  642. break
  643. # Print out identification info about the file
  644. for record in records:
  645. if record['rectype'] == "IDENTIFICATON":
  646. print("-"*50)
  647. print(f"Author: {record['author']}")
  648. print(f"Title: {record['title']}")
  649. print(f"ID Code: {record['idcode']}")
  650. break
  651. # Print out descriptions saved with the file
  652. for record in records:
  653. if record['rectype'] == "DESCRIPTION":
  654. print("-"*50)
  655. print(f"Description: {record['description']}")
  656. break
  657. print("-"*50)
  658. print(f"Successfully read {len(records)} records of which:")
  659. # Create convenience list of record types
  660. rectypes = [r['rectype'] for r in records]
  661. for rt in set(rectypes):
  662. if rt != 'END_MARKER':
  663. print(f" - {rectypes.count(rt)} are of type {rt}")
  664. print("-"*50)
  665. if 'VARIABLE' in rectypes:
  666. print("Available variables:")
  667. for var in variables:
  668. print(f" - {var} [{type(variables[var])}]")
  669. print("-"*50)
  670. if idict:
  671. for var in variables:
  672. idict[var] = variables[var]
  673. return idict
  674. else:
  675. return variables