test_idl.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482
  1. from os import path
  2. import warnings
  3. import numpy as np
  4. from numpy.testing import (assert_equal, assert_array_equal,
  5. assert_)
  6. import pytest
  7. from scipy.io import readsav
  8. from scipy.io import _idl
  9. DATA_PATH = path.join(path.dirname(__file__), 'data')
  10. def assert_identical(a, b):
  11. """Assert whether value AND type are the same"""
  12. assert_equal(a, b)
  13. if isinstance(b, str):
  14. assert_equal(type(a), type(b))
  15. else:
  16. assert_equal(np.asarray(a).dtype.type, np.asarray(b).dtype.type)
  17. def assert_array_identical(a, b):
  18. """Assert whether values AND type are the same"""
  19. assert_array_equal(a, b)
  20. assert_equal(a.dtype.type, b.dtype.type)
  21. # Define vectorized ID function for pointer arrays
  22. vect_id = np.vectorize(id)
  23. class TestIdict:
  24. def test_idict(self):
  25. custom_dict = {'a': np.int16(999)}
  26. original_id = id(custom_dict)
  27. s = readsav(path.join(DATA_PATH, 'scalar_byte.sav'),
  28. idict=custom_dict, verbose=False)
  29. assert_equal(original_id, id(s))
  30. assert_('a' in s)
  31. assert_identical(s['a'], np.int16(999))
  32. assert_identical(s['i8u'], np.uint8(234))
  33. class TestScalars:
  34. # Test that scalar values are read in with the correct value and type
  35. def test_byte(self):
  36. s = readsav(path.join(DATA_PATH, 'scalar_byte.sav'), verbose=False)
  37. assert_identical(s.i8u, np.uint8(234))
  38. def test_int16(self):
  39. s = readsav(path.join(DATA_PATH, 'scalar_int16.sav'), verbose=False)
  40. assert_identical(s.i16s, np.int16(-23456))
  41. def test_int32(self):
  42. s = readsav(path.join(DATA_PATH, 'scalar_int32.sav'), verbose=False)
  43. assert_identical(s.i32s, np.int32(-1234567890))
  44. def test_float32(self):
  45. s = readsav(path.join(DATA_PATH, 'scalar_float32.sav'), verbose=False)
  46. assert_identical(s.f32, np.float32(-3.1234567e+37))
  47. def test_float64(self):
  48. s = readsav(path.join(DATA_PATH, 'scalar_float64.sav'), verbose=False)
  49. assert_identical(s.f64, np.float64(-1.1976931348623157e+307))
  50. def test_complex32(self):
  51. s = readsav(path.join(DATA_PATH, 'scalar_complex32.sav'), verbose=False)
  52. assert_identical(s.c32, np.complex64(3.124442e13-2.312442e31j))
  53. def test_bytes(self):
  54. s = readsav(path.join(DATA_PATH, 'scalar_string.sav'), verbose=False)
  55. msg = "The quick brown fox jumps over the lazy python"
  56. assert_identical(s.s, np.bytes_(msg))
  57. def test_structure(self):
  58. pass
  59. def test_complex64(self):
  60. s = readsav(path.join(DATA_PATH, 'scalar_complex64.sav'), verbose=False)
  61. assert_identical(
  62. s.c64,
  63. np.complex128(1.1987253647623157e+112-5.1987258887729157e+307j)
  64. )
  65. def test_heap_pointer(self):
  66. pass
  67. def test_object_reference(self):
  68. pass
  69. def test_uint16(self):
  70. s = readsav(path.join(DATA_PATH, 'scalar_uint16.sav'), verbose=False)
  71. assert_identical(s.i16u, np.uint16(65511))
  72. def test_uint32(self):
  73. s = readsav(path.join(DATA_PATH, 'scalar_uint32.sav'), verbose=False)
  74. assert_identical(s.i32u, np.uint32(4294967233))
  75. def test_int64(self):
  76. s = readsav(path.join(DATA_PATH, 'scalar_int64.sav'), verbose=False)
  77. assert_identical(s.i64s, np.int64(-9223372036854774567))
  78. def test_uint64(self):
  79. s = readsav(path.join(DATA_PATH, 'scalar_uint64.sav'), verbose=False)
  80. assert_identical(s.i64u, np.uint64(18446744073709529285))
  81. class TestCompressed(TestScalars):
  82. # Test that compressed .sav files can be read in
  83. def test_compressed(self):
  84. s = readsav(path.join(DATA_PATH, 'various_compressed.sav'), verbose=False)
  85. assert_identical(s.i8u, np.uint8(234))
  86. assert_identical(s.f32, np.float32(-3.1234567e+37))
  87. assert_identical(
  88. s.c64,
  89. np.complex128(1.1987253647623157e+112-5.1987258887729157e+307j)
  90. )
  91. assert_equal(s.array5d.shape, (4, 3, 4, 6, 5))
  92. assert_identical(s.arrays.a[0], np.array([1, 2, 3], dtype=np.int16))
  93. assert_identical(s.arrays.b[0], np.array([4., 5., 6., 7.], dtype=np.float32))
  94. assert_identical(s.arrays.c[0],
  95. np.array([np.complex64(1+2j), np.complex64(7+8j)]))
  96. assert_identical(s.arrays.d[0],
  97. np.array([b"cheese", b"bacon", b"spam"], dtype=object))
  98. class TestArrayDimensions:
  99. # Test that multi-dimensional arrays are read in with the correct dimensions
  100. def test_1d(self):
  101. s = readsav(path.join(DATA_PATH, 'array_float32_1d.sav'), verbose=False)
  102. assert_equal(s.array1d.shape, (123, ))
  103. def test_2d(self):
  104. s = readsav(path.join(DATA_PATH, 'array_float32_2d.sav'), verbose=False)
  105. assert_equal(s.array2d.shape, (22, 12))
  106. def test_3d(self):
  107. s = readsav(path.join(DATA_PATH, 'array_float32_3d.sav'), verbose=False)
  108. assert_equal(s.array3d.shape, (11, 22, 12))
  109. def test_4d(self):
  110. s = readsav(path.join(DATA_PATH, 'array_float32_4d.sav'), verbose=False)
  111. assert_equal(s.array4d.shape, (4, 5, 8, 7))
  112. def test_5d(self):
  113. s = readsav(path.join(DATA_PATH, 'array_float32_5d.sav'), verbose=False)
  114. assert_equal(s.array5d.shape, (4, 3, 4, 6, 5))
  115. def test_6d(self):
  116. s = readsav(path.join(DATA_PATH, 'array_float32_6d.sav'), verbose=False)
  117. assert_equal(s.array6d.shape, (3, 6, 4, 5, 3, 4))
  118. def test_7d(self):
  119. s = readsav(path.join(DATA_PATH, 'array_float32_7d.sav'), verbose=False)
  120. assert_equal(s.array7d.shape, (2, 1, 2, 3, 4, 3, 2))
  121. def test_8d(self):
  122. s = readsav(path.join(DATA_PATH, 'array_float32_8d.sav'), verbose=False)
  123. assert_equal(s.array8d.shape, (4, 3, 2, 1, 2, 3, 5, 4))
  124. class TestStructures:
  125. def test_scalars(self):
  126. s = readsav(path.join(DATA_PATH, 'struct_scalars.sav'), verbose=False)
  127. assert_identical(s.scalars.a, np.array(np.int16(1)))
  128. assert_identical(s.scalars.b, np.array(np.int32(2)))
  129. assert_identical(s.scalars.c, np.array(np.float32(3.)))
  130. assert_identical(s.scalars.d, np.array(np.float64(4.)))
  131. assert_identical(s.scalars.e, np.array([b"spam"], dtype=object))
  132. assert_identical(s.scalars.f, np.array(np.complex64(-1.+3j)))
  133. def test_scalars_replicated(self):
  134. s = readsav(path.join(DATA_PATH, 'struct_scalars_replicated.sav'),
  135. verbose=False)
  136. assert_identical(s.scalars_rep.a, np.repeat(np.int16(1), 5))
  137. assert_identical(s.scalars_rep.b, np.repeat(np.int32(2), 5))
  138. assert_identical(s.scalars_rep.c, np.repeat(np.float32(3.), 5))
  139. assert_identical(s.scalars_rep.d, np.repeat(np.float64(4.), 5))
  140. assert_identical(s.scalars_rep.e, np.repeat(b"spam", 5).astype(object))
  141. assert_identical(s.scalars_rep.f, np.repeat(np.complex64(-1.+3j), 5))
  142. def test_scalars_replicated_3d(self):
  143. s = readsav(path.join(DATA_PATH, 'struct_scalars_replicated_3d.sav'),
  144. verbose=False)
  145. assert_identical(s.scalars_rep.a, np.repeat(np.int16(1), 24).reshape(4, 3, 2))
  146. assert_identical(s.scalars_rep.b, np.repeat(np.int32(2), 24).reshape(4, 3, 2))
  147. assert_identical(s.scalars_rep.c,
  148. np.repeat(np.float32(3.), 24).reshape(4, 3, 2))
  149. assert_identical(s.scalars_rep.d,
  150. np.repeat(np.float64(4.), 24).reshape(4, 3, 2))
  151. assert_identical(s.scalars_rep.e,
  152. np.repeat(b"spam", 24).reshape(4, 3, 2).astype(object))
  153. assert_identical(s.scalars_rep.f,
  154. np.repeat(np.complex64(-1.+3j), 24).reshape(4, 3, 2))
  155. def test_arrays(self):
  156. s = readsav(path.join(DATA_PATH, 'struct_arrays.sav'), verbose=False)
  157. assert_array_identical(s.arrays.a[0], np.array([1, 2, 3], dtype=np.int16))
  158. assert_array_identical(s.arrays.b[0],
  159. np.array([4., 5., 6., 7.], dtype=np.float32))
  160. assert_array_identical(s.arrays.c[0],
  161. np.array([np.complex64(1+2j), np.complex64(7+8j)]))
  162. assert_array_identical(s.arrays.d[0],
  163. np.array([b"cheese", b"bacon", b"spam"], dtype=object))
  164. def test_arrays_replicated(self):
  165. s = readsav(path.join(DATA_PATH, 'struct_arrays_replicated.sav'), verbose=False)
  166. # Check column types
  167. assert_(s.arrays_rep.a.dtype.type is np.object_)
  168. assert_(s.arrays_rep.b.dtype.type is np.object_)
  169. assert_(s.arrays_rep.c.dtype.type is np.object_)
  170. assert_(s.arrays_rep.d.dtype.type is np.object_)
  171. # Check column shapes
  172. assert_equal(s.arrays_rep.a.shape, (5, ))
  173. assert_equal(s.arrays_rep.b.shape, (5, ))
  174. assert_equal(s.arrays_rep.c.shape, (5, ))
  175. assert_equal(s.arrays_rep.d.shape, (5, ))
  176. # Check values
  177. for i in range(5):
  178. assert_array_identical(s.arrays_rep.a[i],
  179. np.array([1, 2, 3], dtype=np.int16))
  180. assert_array_identical(s.arrays_rep.b[i],
  181. np.array([4., 5., 6., 7.], dtype=np.float32))
  182. assert_array_identical(s.arrays_rep.c[i],
  183. np.array([np.complex64(1+2j),
  184. np.complex64(7+8j)]))
  185. assert_array_identical(s.arrays_rep.d[i],
  186. np.array([b"cheese", b"bacon", b"spam"],
  187. dtype=object))
  188. def test_arrays_replicated_3d(self):
  189. s = readsav(path.join(DATA_PATH, 'struct_arrays_replicated_3d.sav'),
  190. verbose=False)
  191. # Check column types
  192. assert_(s.arrays_rep.a.dtype.type is np.object_)
  193. assert_(s.arrays_rep.b.dtype.type is np.object_)
  194. assert_(s.arrays_rep.c.dtype.type is np.object_)
  195. assert_(s.arrays_rep.d.dtype.type is np.object_)
  196. # Check column shapes
  197. assert_equal(s.arrays_rep.a.shape, (4, 3, 2))
  198. assert_equal(s.arrays_rep.b.shape, (4, 3, 2))
  199. assert_equal(s.arrays_rep.c.shape, (4, 3, 2))
  200. assert_equal(s.arrays_rep.d.shape, (4, 3, 2))
  201. # Check values
  202. for i in range(4):
  203. for j in range(3):
  204. for k in range(2):
  205. assert_array_identical(s.arrays_rep.a[i, j, k],
  206. np.array([1, 2, 3], dtype=np.int16))
  207. assert_array_identical(s.arrays_rep.b[i, j, k],
  208. np.array([4., 5., 6., 7.],
  209. dtype=np.float32))
  210. assert_array_identical(s.arrays_rep.c[i, j, k],
  211. np.array([np.complex64(1+2j),
  212. np.complex64(7+8j)]))
  213. assert_array_identical(s.arrays_rep.d[i, j, k],
  214. np.array([b"cheese", b"bacon", b"spam"],
  215. dtype=object))
  216. def test_inheritance(self):
  217. s = readsav(path.join(DATA_PATH, 'struct_inherit.sav'), verbose=False)
  218. assert_identical(s.fc.x, np.array([0], dtype=np.int16))
  219. assert_identical(s.fc.y, np.array([0], dtype=np.int16))
  220. assert_identical(s.fc.r, np.array([0], dtype=np.int16))
  221. assert_identical(s.fc.c, np.array([4], dtype=np.int16))
  222. def test_arrays_corrupt_idl80(self):
  223. # test byte arrays with missing nbyte information from IDL 8.0 .sav file
  224. with warnings.catch_warnings():
  225. warnings.filterwarnings(
  226. "ignore", "Not able to verify number of bytes from header", UserWarning)
  227. s = readsav(path.join(DATA_PATH,'struct_arrays_byte_idl80.sav'),
  228. verbose=False)
  229. assert_identical(s.y.x[0], np.array([55,66], dtype=np.uint8))
  230. class TestPointers:
  231. # Check that pointers in .sav files produce references to the same object in Python
  232. def test_pointers(self):
  233. s = readsav(path.join(DATA_PATH, 'scalar_heap_pointer.sav'), verbose=False)
  234. assert_identical(
  235. s.c64_pointer1,
  236. np.complex128(1.1987253647623157e+112-5.1987258887729157e+307j)
  237. )
  238. assert_identical(
  239. s.c64_pointer2,
  240. np.complex128(1.1987253647623157e+112-5.1987258887729157e+307j)
  241. )
  242. assert_(s.c64_pointer1 is s.c64_pointer2)
  243. class TestPointerArray:
  244. # Test that pointers in arrays are correctly read in
  245. def test_1d(self):
  246. s = readsav(path.join(DATA_PATH, 'array_float32_pointer_1d.sav'), verbose=False)
  247. assert_equal(s.array1d.shape, (123, ))
  248. assert_(np.all(s.array1d == np.float32(4.)))
  249. assert_(np.all(vect_id(s.array1d) == id(s.array1d[0])))
  250. def test_2d(self):
  251. s = readsav(path.join(DATA_PATH, 'array_float32_pointer_2d.sav'), verbose=False)
  252. assert_equal(s.array2d.shape, (22, 12))
  253. assert_(np.all(s.array2d == np.float32(4.)))
  254. assert_(np.all(vect_id(s.array2d) == id(s.array2d[0,0])))
  255. def test_3d(self):
  256. s = readsav(path.join(DATA_PATH, 'array_float32_pointer_3d.sav'), verbose=False)
  257. assert_equal(s.array3d.shape, (11, 22, 12))
  258. assert_(np.all(s.array3d == np.float32(4.)))
  259. assert_(np.all(vect_id(s.array3d) == id(s.array3d[0,0,0])))
  260. def test_4d(self):
  261. s = readsav(path.join(DATA_PATH, 'array_float32_pointer_4d.sav'), verbose=False)
  262. assert_equal(s.array4d.shape, (4, 5, 8, 7))
  263. assert_(np.all(s.array4d == np.float32(4.)))
  264. assert_(np.all(vect_id(s.array4d) == id(s.array4d[0,0,0,0])))
  265. def test_5d(self):
  266. s = readsav(path.join(DATA_PATH, 'array_float32_pointer_5d.sav'), verbose=False)
  267. assert_equal(s.array5d.shape, (4, 3, 4, 6, 5))
  268. assert_(np.all(s.array5d == np.float32(4.)))
  269. assert_(np.all(vect_id(s.array5d) == id(s.array5d[0,0,0,0,0])))
  270. def test_6d(self):
  271. s = readsav(path.join(DATA_PATH, 'array_float32_pointer_6d.sav'), verbose=False)
  272. assert_equal(s.array6d.shape, (3, 6, 4, 5, 3, 4))
  273. assert_(np.all(s.array6d == np.float32(4.)))
  274. assert_(np.all(vect_id(s.array6d) == id(s.array6d[0,0,0,0,0,0])))
  275. def test_7d(self):
  276. s = readsav(path.join(DATA_PATH, 'array_float32_pointer_7d.sav'), verbose=False)
  277. assert_equal(s.array7d.shape, (2, 1, 2, 3, 4, 3, 2))
  278. assert_(np.all(s.array7d == np.float32(4.)))
  279. assert_(np.all(vect_id(s.array7d) == id(s.array7d[0,0,0,0,0,0,0])))
  280. def test_8d(self):
  281. s = readsav(path.join(DATA_PATH, 'array_float32_pointer_8d.sav'), verbose=False)
  282. assert_equal(s.array8d.shape, (4, 3, 2, 1, 2, 3, 5, 4))
  283. assert_(np.all(s.array8d == np.float32(4.)))
  284. assert_(np.all(vect_id(s.array8d) == id(s.array8d[0,0,0,0,0,0,0,0])))
  285. class TestPointerStructures:
  286. # Test that structures are correctly read in
  287. def test_scalars(self):
  288. s = readsav(path.join(DATA_PATH, 'struct_pointers.sav'), verbose=False)
  289. assert_identical(s.pointers.g, np.array(np.float32(4.), dtype=np.object_))
  290. assert_identical(s.pointers.h, np.array(np.float32(4.), dtype=np.object_))
  291. assert_(id(s.pointers.g[0]) == id(s.pointers.h[0]))
  292. def test_pointers_replicated(self):
  293. s = readsav(path.join(DATA_PATH, 'struct_pointers_replicated.sav'),
  294. verbose=False)
  295. assert_identical(s.pointers_rep.g,
  296. np.repeat(np.float32(4.), 5).astype(np.object_))
  297. assert_identical(s.pointers_rep.h,
  298. np.repeat(np.float32(4.), 5).astype(np.object_))
  299. assert_(np.all(vect_id(s.pointers_rep.g) == vect_id(s.pointers_rep.h)))
  300. def test_pointers_replicated_3d(self):
  301. s = readsav(path.join(DATA_PATH, 'struct_pointers_replicated_3d.sav'),
  302. verbose=False)
  303. s_expect = np.repeat(np.float32(4.), 24).reshape(4, 3, 2).astype(np.object_)
  304. assert_identical(s.pointers_rep.g, s_expect)
  305. assert_identical(s.pointers_rep.h, s_expect)
  306. assert_(np.all(vect_id(s.pointers_rep.g) == vect_id(s.pointers_rep.h)))
  307. def test_arrays(self):
  308. s = readsav(path.join(DATA_PATH, 'struct_pointer_arrays.sav'), verbose=False)
  309. assert_array_identical(s.arrays.g[0],
  310. np.repeat(np.float32(4.), 2).astype(np.object_))
  311. assert_array_identical(s.arrays.h[0],
  312. np.repeat(np.float32(4.), 3).astype(np.object_))
  313. assert_(np.all(vect_id(s.arrays.g[0]) == id(s.arrays.g[0][0])))
  314. assert_(np.all(vect_id(s.arrays.h[0]) == id(s.arrays.h[0][0])))
  315. assert_(id(s.arrays.g[0][0]) == id(s.arrays.h[0][0]))
  316. def test_arrays_replicated(self):
  317. s = readsav(path.join(DATA_PATH, 'struct_pointer_arrays_replicated.sav'),
  318. verbose=False)
  319. # Check column types
  320. assert_(s.arrays_rep.g.dtype.type is np.object_)
  321. assert_(s.arrays_rep.h.dtype.type is np.object_)
  322. # Check column shapes
  323. assert_equal(s.arrays_rep.g.shape, (5, ))
  324. assert_equal(s.arrays_rep.h.shape, (5, ))
  325. # Check values
  326. for i in range(5):
  327. assert_array_identical(s.arrays_rep.g[i],
  328. np.repeat(np.float32(4.), 2).astype(np.object_))
  329. assert_array_identical(s.arrays_rep.h[i],
  330. np.repeat(np.float32(4.), 3).astype(np.object_))
  331. assert_(np.all(vect_id(s.arrays_rep.g[i]) == id(s.arrays_rep.g[0][0])))
  332. assert_(np.all(vect_id(s.arrays_rep.h[i]) == id(s.arrays_rep.h[0][0])))
  333. def test_arrays_replicated_3d(self):
  334. pth = path.join(DATA_PATH, 'struct_pointer_arrays_replicated_3d.sav')
  335. s = readsav(pth, verbose=False)
  336. # Check column types
  337. assert_(s.arrays_rep.g.dtype.type is np.object_)
  338. assert_(s.arrays_rep.h.dtype.type is np.object_)
  339. # Check column shapes
  340. assert_equal(s.arrays_rep.g.shape, (4, 3, 2))
  341. assert_equal(s.arrays_rep.h.shape, (4, 3, 2))
  342. # Check values
  343. for i in range(4):
  344. for j in range(3):
  345. for k in range(2):
  346. assert_array_identical(s.arrays_rep.g[i, j, k],
  347. np.repeat(np.float32(4.), 2).astype(np.object_))
  348. assert_array_identical(s.arrays_rep.h[i, j, k],
  349. np.repeat(np.float32(4.), 3).astype(np.object_))
  350. g0 = vect_id(s.arrays_rep.g[i, j, k])
  351. g1 = id(s.arrays_rep.g[0, 0, 0][0])
  352. assert np.all(g0 == g1)
  353. h0 = vect_id(s.arrays_rep.h[i, j, k])
  354. h1 = id(s.arrays_rep.h[0, 0, 0][0])
  355. assert np.all(h0 == h1)
  356. class TestTags:
  357. '''Test that sav files with description tag read at all'''
  358. def test_description(self):
  359. s = readsav(path.join(DATA_PATH, 'scalar_byte_descr.sav'), verbose=False)
  360. assert_identical(s.i8u, np.uint8(234))
  361. def test_null_pointer():
  362. # Regression test for null pointers.
  363. s = readsav(path.join(DATA_PATH, 'null_pointer.sav'), verbose=False)
  364. assert_identical(s.point, None)
  365. assert_identical(s.check, np.int16(5))
  366. def test_invalid_pointer():
  367. # Regression test for invalid pointers (gh-4613).
  368. # In some files in the wild, pointers can sometimes refer to a heap
  369. # variable that does not exist. In that case, we now gracefully fail for
  370. # that variable and replace the variable with None and emit a warning.
  371. # Since it's difficult to artificially produce such files, the file used
  372. # here has been edited to force the pointer reference to be invalid.
  373. with warnings.catch_warnings(record=True) as w:
  374. warnings.simplefilter("always")
  375. s = readsav(path.join(DATA_PATH, 'invalid_pointer.sav'), verbose=False)
  376. assert_(len(w) == 1)
  377. assert_(str(w[0].message) == ("Variable referenced by pointer not found in "
  378. "heap: variable will be set to None"))
  379. assert_identical(s['a'], np.array([None, None]))
  380. def test_attrdict():
  381. d = _idl.AttrDict({'one': 1})
  382. assert d['one'] == 1
  383. assert d.one == 1
  384. with pytest.raises(KeyError):
  385. d['two']
  386. with pytest.raises(AttributeError, match='has no attribute'):
  387. d.two