test_format.py 41 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054
  1. # doctest
  2. r''' Test the .npy file format.
  3. Set up:
  4. >>> import sys
  5. >>> from io import BytesIO
  6. >>> from numpy.lib import format
  7. >>>
  8. >>> scalars = [
  9. ... np.uint8,
  10. ... np.int8,
  11. ... np.uint16,
  12. ... np.int16,
  13. ... np.uint32,
  14. ... np.int32,
  15. ... np.uint64,
  16. ... np.int64,
  17. ... np.float32,
  18. ... np.float64,
  19. ... np.complex64,
  20. ... np.complex128,
  21. ... object,
  22. ... ]
  23. >>>
  24. >>> basic_arrays = []
  25. >>>
  26. >>> for scalar in scalars:
  27. ... for endian in '<>':
  28. ... dtype = np.dtype(scalar).newbyteorder(endian)
  29. ... basic = np.arange(15).astype(dtype)
  30. ... basic_arrays.extend([
  31. ... np.array([], dtype=dtype),
  32. ... np.array(10, dtype=dtype),
  33. ... basic,
  34. ... basic.reshape((3,5)),
  35. ... basic.reshape((3,5)).T,
  36. ... basic.reshape((3,5))[::-1,::2],
  37. ... ])
  38. ...
  39. >>>
  40. >>> Pdescr = [
  41. ... ('x', 'i4', (2,)),
  42. ... ('y', 'f8', (2, 2)),
  43. ... ('z', 'u1')]
  44. >>>
  45. >>>
  46. >>> PbufferT = [
  47. ... ([3,2], [[6.,4.],[6.,4.]], 8),
  48. ... ([4,3], [[7.,5.],[7.,5.]], 9),
  49. ... ]
  50. >>>
  51. >>>
  52. >>> Ndescr = [
  53. ... ('x', 'i4', (2,)),
  54. ... ('Info', [
  55. ... ('value', 'c16'),
  56. ... ('y2', 'f8'),
  57. ... ('Info2', [
  58. ... ('name', 'S2'),
  59. ... ('value', 'c16', (2,)),
  60. ... ('y3', 'f8', (2,)),
  61. ... ('z3', 'u4', (2,))]),
  62. ... ('name', 'S2'),
  63. ... ('z2', 'b1')]),
  64. ... ('color', 'S2'),
  65. ... ('info', [
  66. ... ('Name', 'U8'),
  67. ... ('Value', 'c16')]),
  68. ... ('y', 'f8', (2, 2)),
  69. ... ('z', 'u1')]
  70. >>>
  71. >>>
  72. >>> NbufferT = [
  73. ... ([3,2], (6j, 6., ('nn', [6j,4j], [6.,4.], [1,2]), 'NN', True), 'cc', ('NN', 6j), [[6.,4.],[6.,4.]], 8),
  74. ... ([4,3], (7j, 7., ('oo', [7j,5j], [7.,5.], [2,1]), 'OO', False), 'dd', ('OO', 7j), [[7.,5.],[7.,5.]], 9),
  75. ... ]
  76. >>>
  77. >>>
  78. >>> record_arrays = [
  79. ... np.array(PbufferT, dtype=np.dtype(Pdescr).newbyteorder('<')),
  80. ... np.array(NbufferT, dtype=np.dtype(Ndescr).newbyteorder('<')),
  81. ... np.array(PbufferT, dtype=np.dtype(Pdescr).newbyteorder('>')),
  82. ... np.array(NbufferT, dtype=np.dtype(Ndescr).newbyteorder('>')),
  83. ... ]
  84. Test the magic string writing.
  85. >>> format.magic(1, 0)
  86. '\x93NUMPY\x01\x00'
  87. >>> format.magic(0, 0)
  88. '\x93NUMPY\x00\x00'
  89. >>> format.magic(255, 255)
  90. '\x93NUMPY\xff\xff'
  91. >>> format.magic(2, 5)
  92. '\x93NUMPY\x02\x05'
  93. Test the magic string reading.
  94. >>> format.read_magic(BytesIO(format.magic(1, 0)))
  95. (1, 0)
  96. >>> format.read_magic(BytesIO(format.magic(0, 0)))
  97. (0, 0)
  98. >>> format.read_magic(BytesIO(format.magic(255, 255)))
  99. (255, 255)
  100. >>> format.read_magic(BytesIO(format.magic(2, 5)))
  101. (2, 5)
  102. Test the header writing.
  103. >>> for arr in basic_arrays + record_arrays:
  104. ... f = BytesIO()
  105. ... format.write_array_header_1_0(f, arr) # XXX: arr is not a dict, items gets called on it
  106. ... print(repr(f.getvalue()))
  107. ...
  108. "F\x00{'descr': '|u1', 'fortran_order': False, 'shape': (0,)} \n"
  109. "F\x00{'descr': '|u1', 'fortran_order': False, 'shape': ()} \n"
  110. "F\x00{'descr': '|u1', 'fortran_order': False, 'shape': (15,)} \n"
  111. "F\x00{'descr': '|u1', 'fortran_order': False, 'shape': (3, 5)} \n"
  112. "F\x00{'descr': '|u1', 'fortran_order': True, 'shape': (5, 3)} \n"
  113. "F\x00{'descr': '|u1', 'fortran_order': False, 'shape': (3, 3)} \n"
  114. "F\x00{'descr': '|u1', 'fortran_order': False, 'shape': (0,)} \n"
  115. "F\x00{'descr': '|u1', 'fortran_order': False, 'shape': ()} \n"
  116. "F\x00{'descr': '|u1', 'fortran_order': False, 'shape': (15,)} \n"
  117. "F\x00{'descr': '|u1', 'fortran_order': False, 'shape': (3, 5)} \n"
  118. "F\x00{'descr': '|u1', 'fortran_order': True, 'shape': (5, 3)} \n"
  119. "F\x00{'descr': '|u1', 'fortran_order': False, 'shape': (3, 3)} \n"
  120. "F\x00{'descr': '|i1', 'fortran_order': False, 'shape': (0,)} \n"
  121. "F\x00{'descr': '|i1', 'fortran_order': False, 'shape': ()} \n"
  122. "F\x00{'descr': '|i1', 'fortran_order': False, 'shape': (15,)} \n"
  123. "F\x00{'descr': '|i1', 'fortran_order': False, 'shape': (3, 5)} \n"
  124. "F\x00{'descr': '|i1', 'fortran_order': True, 'shape': (5, 3)} \n"
  125. "F\x00{'descr': '|i1', 'fortran_order': False, 'shape': (3, 3)} \n"
  126. "F\x00{'descr': '|i1', 'fortran_order': False, 'shape': (0,)} \n"
  127. "F\x00{'descr': '|i1', 'fortran_order': False, 'shape': ()} \n"
  128. "F\x00{'descr': '|i1', 'fortran_order': False, 'shape': (15,)} \n"
  129. "F\x00{'descr': '|i1', 'fortran_order': False, 'shape': (3, 5)} \n"
  130. "F\x00{'descr': '|i1', 'fortran_order': True, 'shape': (5, 3)} \n"
  131. "F\x00{'descr': '|i1', 'fortran_order': False, 'shape': (3, 3)} \n"
  132. "F\x00{'descr': '<u2', 'fortran_order': False, 'shape': (0,)} \n"
  133. "F\x00{'descr': '<u2', 'fortran_order': False, 'shape': ()} \n"
  134. "F\x00{'descr': '<u2', 'fortran_order': False, 'shape': (15,)} \n"
  135. "F\x00{'descr': '<u2', 'fortran_order': False, 'shape': (3, 5)} \n"
  136. "F\x00{'descr': '<u2', 'fortran_order': True, 'shape': (5, 3)} \n"
  137. "F\x00{'descr': '<u2', 'fortran_order': False, 'shape': (3, 3)} \n"
  138. "F\x00{'descr': '>u2', 'fortran_order': False, 'shape': (0,)} \n"
  139. "F\x00{'descr': '>u2', 'fortran_order': False, 'shape': ()} \n"
  140. "F\x00{'descr': '>u2', 'fortran_order': False, 'shape': (15,)} \n"
  141. "F\x00{'descr': '>u2', 'fortran_order': False, 'shape': (3, 5)} \n"
  142. "F\x00{'descr': '>u2', 'fortran_order': True, 'shape': (5, 3)} \n"
  143. "F\x00{'descr': '>u2', 'fortran_order': False, 'shape': (3, 3)} \n"
  144. "F\x00{'descr': '<i2', 'fortran_order': False, 'shape': (0,)} \n"
  145. "F\x00{'descr': '<i2', 'fortran_order': False, 'shape': ()} \n"
  146. "F\x00{'descr': '<i2', 'fortran_order': False, 'shape': (15,)} \n"
  147. "F\x00{'descr': '<i2', 'fortran_order': False, 'shape': (3, 5)} \n"
  148. "F\x00{'descr': '<i2', 'fortran_order': True, 'shape': (5, 3)} \n"
  149. "F\x00{'descr': '<i2', 'fortran_order': False, 'shape': (3, 3)} \n"
  150. "F\x00{'descr': '>i2', 'fortran_order': False, 'shape': (0,)} \n"
  151. "F\x00{'descr': '>i2', 'fortran_order': False, 'shape': ()} \n"
  152. "F\x00{'descr': '>i2', 'fortran_order': False, 'shape': (15,)} \n"
  153. "F\x00{'descr': '>i2', 'fortran_order': False, 'shape': (3, 5)} \n"
  154. "F\x00{'descr': '>i2', 'fortran_order': True, 'shape': (5, 3)} \n"
  155. "F\x00{'descr': '>i2', 'fortran_order': False, 'shape': (3, 3)} \n"
  156. "F\x00{'descr': '<u4', 'fortran_order': False, 'shape': (0,)} \n"
  157. "F\x00{'descr': '<u4', 'fortran_order': False, 'shape': ()} \n"
  158. "F\x00{'descr': '<u4', 'fortran_order': False, 'shape': (15,)} \n"
  159. "F\x00{'descr': '<u4', 'fortran_order': False, 'shape': (3, 5)} \n"
  160. "F\x00{'descr': '<u4', 'fortran_order': True, 'shape': (5, 3)} \n"
  161. "F\x00{'descr': '<u4', 'fortran_order': False, 'shape': (3, 3)} \n"
  162. "F\x00{'descr': '>u4', 'fortran_order': False, 'shape': (0,)} \n"
  163. "F\x00{'descr': '>u4', 'fortran_order': False, 'shape': ()} \n"
  164. "F\x00{'descr': '>u4', 'fortran_order': False, 'shape': (15,)} \n"
  165. "F\x00{'descr': '>u4', 'fortran_order': False, 'shape': (3, 5)} \n"
  166. "F\x00{'descr': '>u4', 'fortran_order': True, 'shape': (5, 3)} \n"
  167. "F\x00{'descr': '>u4', 'fortran_order': False, 'shape': (3, 3)} \n"
  168. "F\x00{'descr': '<i4', 'fortran_order': False, 'shape': (0,)} \n"
  169. "F\x00{'descr': '<i4', 'fortran_order': False, 'shape': ()} \n"
  170. "F\x00{'descr': '<i4', 'fortran_order': False, 'shape': (15,)} \n"
  171. "F\x00{'descr': '<i4', 'fortran_order': False, 'shape': (3, 5)} \n"
  172. "F\x00{'descr': '<i4', 'fortran_order': True, 'shape': (5, 3)} \n"
  173. "F\x00{'descr': '<i4', 'fortran_order': False, 'shape': (3, 3)} \n"
  174. "F\x00{'descr': '>i4', 'fortran_order': False, 'shape': (0,)} \n"
  175. "F\x00{'descr': '>i4', 'fortran_order': False, 'shape': ()} \n"
  176. "F\x00{'descr': '>i4', 'fortran_order': False, 'shape': (15,)} \n"
  177. "F\x00{'descr': '>i4', 'fortran_order': False, 'shape': (3, 5)} \n"
  178. "F\x00{'descr': '>i4', 'fortran_order': True, 'shape': (5, 3)} \n"
  179. "F\x00{'descr': '>i4', 'fortran_order': False, 'shape': (3, 3)} \n"
  180. "F\x00{'descr': '<u8', 'fortran_order': False, 'shape': (0,)} \n"
  181. "F\x00{'descr': '<u8', 'fortran_order': False, 'shape': ()} \n"
  182. "F\x00{'descr': '<u8', 'fortran_order': False, 'shape': (15,)} \n"
  183. "F\x00{'descr': '<u8', 'fortran_order': False, 'shape': (3, 5)} \n"
  184. "F\x00{'descr': '<u8', 'fortran_order': True, 'shape': (5, 3)} \n"
  185. "F\x00{'descr': '<u8', 'fortran_order': False, 'shape': (3, 3)} \n"
  186. "F\x00{'descr': '>u8', 'fortran_order': False, 'shape': (0,)} \n"
  187. "F\x00{'descr': '>u8', 'fortran_order': False, 'shape': ()} \n"
  188. "F\x00{'descr': '>u8', 'fortran_order': False, 'shape': (15,)} \n"
  189. "F\x00{'descr': '>u8', 'fortran_order': False, 'shape': (3, 5)} \n"
  190. "F\x00{'descr': '>u8', 'fortran_order': True, 'shape': (5, 3)} \n"
  191. "F\x00{'descr': '>u8', 'fortran_order': False, 'shape': (3, 3)} \n"
  192. "F\x00{'descr': '<i8', 'fortran_order': False, 'shape': (0,)} \n"
  193. "F\x00{'descr': '<i8', 'fortran_order': False, 'shape': ()} \n"
  194. "F\x00{'descr': '<i8', 'fortran_order': False, 'shape': (15,)} \n"
  195. "F\x00{'descr': '<i8', 'fortran_order': False, 'shape': (3, 5)} \n"
  196. "F\x00{'descr': '<i8', 'fortran_order': True, 'shape': (5, 3)} \n"
  197. "F\x00{'descr': '<i8', 'fortran_order': False, 'shape': (3, 3)} \n"
  198. "F\x00{'descr': '>i8', 'fortran_order': False, 'shape': (0,)} \n"
  199. "F\x00{'descr': '>i8', 'fortran_order': False, 'shape': ()} \n"
  200. "F\x00{'descr': '>i8', 'fortran_order': False, 'shape': (15,)} \n"
  201. "F\x00{'descr': '>i8', 'fortran_order': False, 'shape': (3, 5)} \n"
  202. "F\x00{'descr': '>i8', 'fortran_order': True, 'shape': (5, 3)} \n"
  203. "F\x00{'descr': '>i8', 'fortran_order': False, 'shape': (3, 3)} \n"
  204. "F\x00{'descr': '<f4', 'fortran_order': False, 'shape': (0,)} \n"
  205. "F\x00{'descr': '<f4', 'fortran_order': False, 'shape': ()} \n"
  206. "F\x00{'descr': '<f4', 'fortran_order': False, 'shape': (15,)} \n"
  207. "F\x00{'descr': '<f4', 'fortran_order': False, 'shape': (3, 5)} \n"
  208. "F\x00{'descr': '<f4', 'fortran_order': True, 'shape': (5, 3)} \n"
  209. "F\x00{'descr': '<f4', 'fortran_order': False, 'shape': (3, 3)} \n"
  210. "F\x00{'descr': '>f4', 'fortran_order': False, 'shape': (0,)} \n"
  211. "F\x00{'descr': '>f4', 'fortran_order': False, 'shape': ()} \n"
  212. "F\x00{'descr': '>f4', 'fortran_order': False, 'shape': (15,)} \n"
  213. "F\x00{'descr': '>f4', 'fortran_order': False, 'shape': (3, 5)} \n"
  214. "F\x00{'descr': '>f4', 'fortran_order': True, 'shape': (5, 3)} \n"
  215. "F\x00{'descr': '>f4', 'fortran_order': False, 'shape': (3, 3)} \n"
  216. "F\x00{'descr': '<f8', 'fortran_order': False, 'shape': (0,)} \n"
  217. "F\x00{'descr': '<f8', 'fortran_order': False, 'shape': ()} \n"
  218. "F\x00{'descr': '<f8', 'fortran_order': False, 'shape': (15,)} \n"
  219. "F\x00{'descr': '<f8', 'fortran_order': False, 'shape': (3, 5)} \n"
  220. "F\x00{'descr': '<f8', 'fortran_order': True, 'shape': (5, 3)} \n"
  221. "F\x00{'descr': '<f8', 'fortran_order': False, 'shape': (3, 3)} \n"
  222. "F\x00{'descr': '>f8', 'fortran_order': False, 'shape': (0,)} \n"
  223. "F\x00{'descr': '>f8', 'fortran_order': False, 'shape': ()} \n"
  224. "F\x00{'descr': '>f8', 'fortran_order': False, 'shape': (15,)} \n"
  225. "F\x00{'descr': '>f8', 'fortran_order': False, 'shape': (3, 5)} \n"
  226. "F\x00{'descr': '>f8', 'fortran_order': True, 'shape': (5, 3)} \n"
  227. "F\x00{'descr': '>f8', 'fortran_order': False, 'shape': (3, 3)} \n"
  228. "F\x00{'descr': '<c8', 'fortran_order': False, 'shape': (0,)} \n"
  229. "F\x00{'descr': '<c8', 'fortran_order': False, 'shape': ()} \n"
  230. "F\x00{'descr': '<c8', 'fortran_order': False, 'shape': (15,)} \n"
  231. "F\x00{'descr': '<c8', 'fortran_order': False, 'shape': (3, 5)} \n"
  232. "F\x00{'descr': '<c8', 'fortran_order': True, 'shape': (5, 3)} \n"
  233. "F\x00{'descr': '<c8', 'fortran_order': False, 'shape': (3, 3)} \n"
  234. "F\x00{'descr': '>c8', 'fortran_order': False, 'shape': (0,)} \n"
  235. "F\x00{'descr': '>c8', 'fortran_order': False, 'shape': ()} \n"
  236. "F\x00{'descr': '>c8', 'fortran_order': False, 'shape': (15,)} \n"
  237. "F\x00{'descr': '>c8', 'fortran_order': False, 'shape': (3, 5)} \n"
  238. "F\x00{'descr': '>c8', 'fortran_order': True, 'shape': (5, 3)} \n"
  239. "F\x00{'descr': '>c8', 'fortran_order': False, 'shape': (3, 3)} \n"
  240. "F\x00{'descr': '<c16', 'fortran_order': False, 'shape': (0,)} \n"
  241. "F\x00{'descr': '<c16', 'fortran_order': False, 'shape': ()} \n"
  242. "F\x00{'descr': '<c16', 'fortran_order': False, 'shape': (15,)} \n"
  243. "F\x00{'descr': '<c16', 'fortran_order': False, 'shape': (3, 5)} \n"
  244. "F\x00{'descr': '<c16', 'fortran_order': True, 'shape': (5, 3)} \n"
  245. "F\x00{'descr': '<c16', 'fortran_order': False, 'shape': (3, 3)} \n"
  246. "F\x00{'descr': '>c16', 'fortran_order': False, 'shape': (0,)} \n"
  247. "F\x00{'descr': '>c16', 'fortran_order': False, 'shape': ()} \n"
  248. "F\x00{'descr': '>c16', 'fortran_order': False, 'shape': (15,)} \n"
  249. "F\x00{'descr': '>c16', 'fortran_order': False, 'shape': (3, 5)} \n"
  250. "F\x00{'descr': '>c16', 'fortran_order': True, 'shape': (5, 3)} \n"
  251. "F\x00{'descr': '>c16', 'fortran_order': False, 'shape': (3, 3)} \n"
  252. "F\x00{'descr': 'O', 'fortran_order': False, 'shape': (0,)} \n"
  253. "F\x00{'descr': 'O', 'fortran_order': False, 'shape': ()} \n"
  254. "F\x00{'descr': 'O', 'fortran_order': False, 'shape': (15,)} \n"
  255. "F\x00{'descr': 'O', 'fortran_order': False, 'shape': (3, 5)} \n"
  256. "F\x00{'descr': 'O', 'fortran_order': True, 'shape': (5, 3)} \n"
  257. "F\x00{'descr': 'O', 'fortran_order': False, 'shape': (3, 3)} \n"
  258. "F\x00{'descr': 'O', 'fortran_order': False, 'shape': (0,)} \n"
  259. "F\x00{'descr': 'O', 'fortran_order': False, 'shape': ()} \n"
  260. "F\x00{'descr': 'O', 'fortran_order': False, 'shape': (15,)} \n"
  261. "F\x00{'descr': 'O', 'fortran_order': False, 'shape': (3, 5)} \n"
  262. "F\x00{'descr': 'O', 'fortran_order': True, 'shape': (5, 3)} \n"
  263. "F\x00{'descr': 'O', 'fortran_order': False, 'shape': (3, 3)} \n"
  264. "v\x00{'descr': [('x', '<i4', (2,)), ('y', '<f8', (2, 2)), ('z', '|u1')],\n 'fortran_order': False,\n 'shape': (2,)} \n"
  265. "\x16\x02{'descr': [('x', '<i4', (2,)),\n ('Info',\n [('value', '<c16'),\n ('y2', '<f8'),\n ('Info2',\n [('name', '|S2'),\n ('value', '<c16', (2,)),\n ('y3', '<f8', (2,)),\n ('z3', '<u4', (2,))]),\n ('name', '|S2'),\n ('z2', '|b1')]),\n ('color', '|S2'),\n ('info', [('Name', '<U8'), ('Value', '<c16')]),\n ('y', '<f8', (2, 2)),\n ('z', '|u1')],\n 'fortran_order': False,\n 'shape': (2,)} \n"
  266. "v\x00{'descr': [('x', '>i4', (2,)), ('y', '>f8', (2, 2)), ('z', '|u1')],\n 'fortran_order': False,\n 'shape': (2,)} \n"
  267. "\x16\x02{'descr': [('x', '>i4', (2,)),\n ('Info',\n [('value', '>c16'),\n ('y2', '>f8'),\n ('Info2',\n [('name', '|S2'),\n ('value', '>c16', (2,)),\n ('y3', '>f8', (2,)),\n ('z3', '>u4', (2,))]),\n ('name', '|S2'),\n ('z2', '|b1')]),\n ('color', '|S2'),\n ('info', [('Name', '>U8'), ('Value', '>c16')]),\n ('y', '>f8', (2, 2)),\n ('z', '|u1')],\n 'fortran_order': False,\n 'shape': (2,)} \n"
  268. '''
  269. import os
  270. import sys
  271. import warnings
  272. from io import BytesIO
  273. import pytest
  274. import numpy as np
  275. from numpy.lib import format
  276. from numpy.testing import (
  277. IS_64BIT,
  278. IS_PYPY,
  279. IS_WASM,
  280. assert_,
  281. assert_array_equal,
  282. assert_raises,
  283. assert_raises_regex,
  284. )
  285. from numpy.testing._private.utils import requires_memory
  286. # Generate some basic arrays to test with.
  287. scalars = [
  288. np.uint8,
  289. np.int8,
  290. np.uint16,
  291. np.int16,
  292. np.uint32,
  293. np.int32,
  294. np.uint64,
  295. np.int64,
  296. np.float32,
  297. np.float64,
  298. np.complex64,
  299. np.complex128,
  300. object,
  301. ]
  302. basic_arrays = []
  303. for scalar in scalars:
  304. for endian in '<>':
  305. dtype = np.dtype(scalar).newbyteorder(endian)
  306. basic = np.arange(1500).astype(dtype)
  307. basic_arrays.extend([
  308. # Empty
  309. np.array([], dtype=dtype),
  310. # Rank-0
  311. np.array(10, dtype=dtype),
  312. # 1-D
  313. basic,
  314. # 2-D C-contiguous
  315. basic.reshape((30, 50)),
  316. # 2-D F-contiguous
  317. basic.reshape((30, 50)).T,
  318. # 2-D non-contiguous
  319. basic.reshape((30, 50))[::-1, ::2],
  320. ])
  321. # More complicated record arrays.
  322. # This is the structure of the table used for plain objects:
  323. #
  324. # +-+-+-+
  325. # |x|y|z|
  326. # +-+-+-+
  327. # Structure of a plain array description:
  328. Pdescr = [
  329. ('x', 'i4', (2,)),
  330. ('y', 'f8', (2, 2)),
  331. ('z', 'u1')]
  332. # A plain list of tuples with values for testing:
  333. PbufferT = [
  334. # x y z
  335. ([3, 2], [[6., 4.], [6., 4.]], 8),
  336. ([4, 3], [[7., 5.], [7., 5.]], 9),
  337. ]
  338. # This is the structure of the table used for nested objects (DON'T PANIC!):
  339. #
  340. # +-+---------------------------------+-----+----------+-+-+
  341. # |x|Info |color|info |y|z|
  342. # | +-----+--+----------------+----+--+ +----+-----+ | |
  343. # | |value|y2|Info2 |name|z2| |Name|Value| | |
  344. # | | | +----+-----+--+--+ | | | | | | |
  345. # | | | |name|value|y3|z3| | | | | | | |
  346. # +-+-----+--+----+-----+--+--+----+--+-----+----+-----+-+-+
  347. #
  348. # The corresponding nested array description:
  349. Ndescr = [
  350. ('x', 'i4', (2,)),
  351. ('Info', [
  352. ('value', 'c16'),
  353. ('y2', 'f8'),
  354. ('Info2', [
  355. ('name', 'S2'),
  356. ('value', 'c16', (2,)),
  357. ('y3', 'f8', (2,)),
  358. ('z3', 'u4', (2,))]),
  359. ('name', 'S2'),
  360. ('z2', 'b1')]),
  361. ('color', 'S2'),
  362. ('info', [
  363. ('Name', 'U8'),
  364. ('Value', 'c16')]),
  365. ('y', 'f8', (2, 2)),
  366. ('z', 'u1')]
  367. NbufferT = [
  368. ([3, 2], (6j, 6., ('nn', [6j, 4j], [6., 4.], [1, 2]), 'NN', True),
  369. 'cc', ('NN', 6j), [[6., 4.], [6., 4.]], 8),
  370. ([4, 3], (7j, 7., ('oo', [7j, 5j], [7., 5.], [2, 1]), 'OO', False),
  371. 'dd', ('OO', 7j), [[7., 5.], [7., 5.]], 9),
  372. ]
  373. record_arrays = [
  374. np.array(PbufferT, dtype=np.dtype(Pdescr).newbyteorder('<')),
  375. np.array(NbufferT, dtype=np.dtype(Ndescr).newbyteorder('<')),
  376. np.array(PbufferT, dtype=np.dtype(Pdescr).newbyteorder('>')),
  377. np.array(NbufferT, dtype=np.dtype(Ndescr).newbyteorder('>')),
  378. np.zeros(1, dtype=[('c', ('<f8', (5,)), (2,))])
  379. ]
  380. # BytesIO that reads a random number of bytes at a time
  381. class BytesIOSRandomSize(BytesIO):
  382. def read(self, size=None):
  383. import random
  384. size = random.randint(1, size)
  385. return super().read(size)
  386. def roundtrip(arr):
  387. f = BytesIO()
  388. format.write_array(f, arr)
  389. f2 = BytesIO(f.getvalue())
  390. arr2 = format.read_array(f2, allow_pickle=True)
  391. return arr2
  392. def roundtrip_randsize(arr):
  393. f = BytesIO()
  394. format.write_array(f, arr)
  395. f2 = BytesIOSRandomSize(f.getvalue())
  396. arr2 = format.read_array(f2)
  397. return arr2
  398. def roundtrip_truncated(arr):
  399. f = BytesIO()
  400. format.write_array(f, arr)
  401. # BytesIO is one byte short
  402. f2 = BytesIO(f.getvalue()[0:-1])
  403. arr2 = format.read_array(f2)
  404. return arr2
  405. def assert_equal_(o1, o2):
  406. assert_(o1 == o2)
  407. def test_roundtrip():
  408. for arr in basic_arrays + record_arrays:
  409. arr2 = roundtrip(arr)
  410. assert_array_equal(arr, arr2)
  411. def test_roundtrip_randsize():
  412. for arr in basic_arrays + record_arrays:
  413. if arr.dtype != object:
  414. arr2 = roundtrip_randsize(arr)
  415. assert_array_equal(arr, arr2)
  416. def test_roundtrip_truncated():
  417. for arr in basic_arrays:
  418. if arr.dtype != object:
  419. assert_raises(ValueError, roundtrip_truncated, arr)
  420. def test_file_truncated(tmp_path):
  421. path = tmp_path / "a.npy"
  422. for arr in basic_arrays:
  423. if arr.dtype != object:
  424. with open(path, 'wb') as f:
  425. format.write_array(f, arr)
  426. # truncate the file by one byte
  427. with open(path, 'rb+') as f:
  428. f.seek(-1, os.SEEK_END)
  429. f.truncate()
  430. with open(path, 'rb') as f:
  431. with pytest.raises(
  432. ValueError,
  433. match=(
  434. r"EOF: reading array header, "
  435. r"expected (\d+) bytes got (\d+)"
  436. ) if arr.size == 0 else (
  437. r"Failed to read all data for array\. "
  438. r"Expected \(.*?\) = (\d+) elements, "
  439. r"could only read (\d+) elements\. "
  440. r"\(file seems not fully written\?\)"
  441. )
  442. ):
  443. _ = format.read_array(f)
  444. def test_long_str():
  445. # check items larger than internal buffer size, gh-4027
  446. long_str_arr = np.ones(1, dtype=np.dtype((str, format.BUFFER_SIZE + 1)))
  447. long_str_arr2 = roundtrip(long_str_arr)
  448. assert_array_equal(long_str_arr, long_str_arr2)
  449. @pytest.mark.skipif(IS_WASM, reason="memmap doesn't work correctly")
  450. @pytest.mark.slow
  451. def test_memmap_roundtrip(tmpdir):
  452. for i, arr in enumerate(basic_arrays + record_arrays):
  453. if arr.dtype.hasobject:
  454. # Skip these since they can't be mmap'ed.
  455. continue
  456. # Write it out normally and through mmap.
  457. nfn = os.path.join(tmpdir, f'normal{i}.npy')
  458. mfn = os.path.join(tmpdir, f'memmap{i}.npy')
  459. with open(nfn, 'wb') as fp:
  460. format.write_array(fp, arr)
  461. fortran_order = (
  462. arr.flags.f_contiguous and not arr.flags.c_contiguous)
  463. ma = format.open_memmap(mfn, mode='w+', dtype=arr.dtype,
  464. shape=arr.shape, fortran_order=fortran_order)
  465. ma[...] = arr
  466. ma.flush()
  467. # Check that both of these files' contents are the same.
  468. with open(nfn, 'rb') as fp:
  469. normal_bytes = fp.read()
  470. with open(mfn, 'rb') as fp:
  471. memmap_bytes = fp.read()
  472. assert_equal_(normal_bytes, memmap_bytes)
  473. # Check that reading the file using memmap works.
  474. ma = format.open_memmap(nfn, mode='r')
  475. ma.flush()
  476. def test_compressed_roundtrip(tmpdir):
  477. arr = np.random.rand(200, 200)
  478. npz_file = os.path.join(tmpdir, 'compressed.npz')
  479. np.savez_compressed(npz_file, arr=arr)
  480. with np.load(npz_file) as npz:
  481. arr1 = npz['arr']
  482. assert_array_equal(arr, arr1)
  483. # aligned
  484. dt1 = np.dtype('i1, i4, i1', align=True)
  485. # non-aligned, explicit offsets
  486. dt2 = np.dtype({'names': ['a', 'b'], 'formats': ['i4', 'i4'],
  487. 'offsets': [1, 6]})
  488. # nested struct-in-struct
  489. dt3 = np.dtype({'names': ['c', 'd'], 'formats': ['i4', dt2]})
  490. # field with '' name
  491. dt4 = np.dtype({'names': ['a', '', 'b'], 'formats': ['i4'] * 3})
  492. # titles
  493. dt5 = np.dtype({'names': ['a', 'b'], 'formats': ['i4', 'i4'],
  494. 'offsets': [1, 6], 'titles': ['aa', 'bb']})
  495. # empty
  496. dt6 = np.dtype({'names': [], 'formats': [], 'itemsize': 8})
  497. @pytest.mark.parametrize("dt", [dt1, dt2, dt3, dt4, dt5, dt6])
  498. def test_load_padded_dtype(tmpdir, dt):
  499. arr = np.zeros(3, dt)
  500. for i in range(3):
  501. arr[i] = i + 5
  502. npz_file = os.path.join(tmpdir, 'aligned.npz')
  503. np.savez(npz_file, arr=arr)
  504. with np.load(npz_file) as npz:
  505. arr1 = npz['arr']
  506. assert_array_equal(arr, arr1)
  507. @pytest.mark.skipif(sys.version_info >= (3, 12), reason="see gh-23988")
  508. @pytest.mark.xfail(IS_WASM, reason="Emscripten NODEFS has a buggy dup")
  509. def test_python2_python3_interoperability():
  510. fname = 'win64python2.npy'
  511. path = os.path.join(os.path.dirname(__file__), 'data', fname)
  512. with pytest.warns(UserWarning, match="Reading.*this warning\\."):
  513. data = np.load(path)
  514. assert_array_equal(data, np.ones(2))
  515. @pytest.mark.filterwarnings(
  516. "ignore:.*align should be passed:numpy.exceptions.VisibleDeprecationWarning")
  517. def test_pickle_python2_python3():
  518. # Test that loading object arrays saved on Python 2 works both on
  519. # Python 2 and Python 3 and vice versa
  520. data_dir = os.path.join(os.path.dirname(__file__), 'data')
  521. expected = np.array([None, range, '\u512a\u826f',
  522. b'\xe4\xb8\x8d\xe8\x89\xaf'],
  523. dtype=object)
  524. for fname in ['py2-np0-objarr.npy', 'py2-objarr.npy', 'py2-objarr.npz',
  525. 'py3-objarr.npy', 'py3-objarr.npz']:
  526. path = os.path.join(data_dir, fname)
  527. for encoding in ['bytes', 'latin1']:
  528. data_f = np.load(path, allow_pickle=True, encoding=encoding)
  529. if fname.endswith('.npz'):
  530. data = data_f['x']
  531. data_f.close()
  532. else:
  533. data = data_f
  534. if encoding == 'latin1' and fname.startswith('py2'):
  535. assert_(isinstance(data[3], str))
  536. assert_array_equal(data[:-1], expected[:-1])
  537. # mojibake occurs
  538. assert_array_equal(data[-1].encode(encoding), expected[-1])
  539. else:
  540. assert_(isinstance(data[3], bytes))
  541. assert_array_equal(data, expected)
  542. if fname.startswith('py2'):
  543. if fname.endswith('.npz'):
  544. data = np.load(path, allow_pickle=True)
  545. assert_raises(UnicodeError, data.__getitem__, 'x')
  546. data.close()
  547. data = np.load(path, allow_pickle=True, fix_imports=False,
  548. encoding='latin1')
  549. assert_raises(ImportError, data.__getitem__, 'x')
  550. data.close()
  551. else:
  552. assert_raises(UnicodeError, np.load, path,
  553. allow_pickle=True)
  554. assert_raises(ImportError, np.load, path,
  555. allow_pickle=True, fix_imports=False,
  556. encoding='latin1')
  557. def test_pickle_disallow(tmpdir):
  558. data_dir = os.path.join(os.path.dirname(__file__), 'data')
  559. path = os.path.join(data_dir, 'py2-objarr.npy')
  560. assert_raises(ValueError, np.load, path,
  561. allow_pickle=False, encoding='latin1')
  562. path = os.path.join(data_dir, 'py2-objarr.npz')
  563. with np.load(path, allow_pickle=False, encoding='latin1') as f:
  564. assert_raises(ValueError, f.__getitem__, 'x')
  565. path = os.path.join(tmpdir, 'pickle-disabled.npy')
  566. assert_raises(ValueError, np.save, path, np.array([None], dtype=object),
  567. allow_pickle=False)
  568. @pytest.mark.parametrize('dt', [
  569. # Not testing a subarray only dtype, because it cannot be attached to an array
  570. # (and would fail the test as of writing this.)
  571. np.dtype([('a', np.int8),
  572. ('b', np.int16),
  573. ('c', np.int32),
  574. ], align=True),
  575. np.dtype([('x', np.dtype(({'names': ['a', 'b'],
  576. 'formats': ['i1', 'i1'],
  577. 'offsets': [0, 4],
  578. 'itemsize': 8,
  579. },
  580. (3,))),
  581. (4,),
  582. )]),
  583. np.dtype([('x',
  584. ('<f8', (5,)),
  585. (2,),
  586. )]),
  587. np.dtype([('x', np.dtype((
  588. np.dtype((
  589. np.dtype({'names': ['a', 'b'],
  590. 'formats': ['i1', 'i1'],
  591. 'offsets': [0, 4],
  592. 'itemsize': 8}),
  593. (3,)
  594. )),
  595. (4,)
  596. )))
  597. ]),
  598. np.dtype([
  599. ('a', np.dtype((
  600. np.dtype((
  601. np.dtype((
  602. np.dtype([
  603. ('a', int),
  604. ('b', np.dtype({'names': ['a', 'b'],
  605. 'formats': ['i1', 'i1'],
  606. 'offsets': [0, 4],
  607. 'itemsize': 8})),
  608. ]),
  609. (3,),
  610. )),
  611. (4,),
  612. )),
  613. (5,),
  614. )))
  615. ]),
  616. ])
  617. def test_descr_to_dtype(dt):
  618. dt1 = format.descr_to_dtype(dt.descr)
  619. assert_equal_(dt1, dt)
  620. arr1 = np.zeros(3, dt)
  621. arr2 = roundtrip(arr1)
  622. assert_array_equal(arr1, arr2)
  623. def test_version_2_0():
  624. f = BytesIO()
  625. # requires more than 2 byte for header
  626. dt = [(("%d" % i) * 100, float) for i in range(500)]
  627. d = np.ones(1000, dtype=dt)
  628. format.write_array(f, d, version=(2, 0))
  629. with warnings.catch_warnings(record=True) as w:
  630. warnings.filterwarnings('always', '', UserWarning)
  631. format.write_array(f, d)
  632. assert_(w[0].category is UserWarning)
  633. # check alignment of data portion
  634. f.seek(0)
  635. header = f.readline()
  636. assert_(len(header) % format.ARRAY_ALIGN == 0)
  637. f.seek(0)
  638. n = format.read_array(f, max_header_size=200000)
  639. assert_array_equal(d, n)
  640. # 1.0 requested but data cannot be saved this way
  641. assert_raises(ValueError, format.write_array, f, d, (1, 0))
  642. @pytest.mark.skipif(IS_WASM, reason="memmap doesn't work correctly")
  643. def test_version_2_0_memmap(tmpdir):
  644. # requires more than 2 byte for header
  645. dt = [(("%d" % i) * 100, float) for i in range(500)]
  646. d = np.ones(1000, dtype=dt)
  647. tf1 = os.path.join(tmpdir, 'version2_01.npy')
  648. tf2 = os.path.join(tmpdir, 'version2_02.npy')
  649. # 1.0 requested but data cannot be saved this way
  650. assert_raises(ValueError, format.open_memmap, tf1, mode='w+', dtype=d.dtype,
  651. shape=d.shape, version=(1, 0))
  652. ma = format.open_memmap(tf1, mode='w+', dtype=d.dtype,
  653. shape=d.shape, version=(2, 0))
  654. ma[...] = d
  655. ma.flush()
  656. ma = format.open_memmap(tf1, mode='r', max_header_size=200000)
  657. assert_array_equal(ma, d)
  658. with warnings.catch_warnings(record=True) as w:
  659. warnings.filterwarnings('always', '', UserWarning)
  660. ma = format.open_memmap(tf2, mode='w+', dtype=d.dtype,
  661. shape=d.shape, version=None)
  662. assert_(w[0].category is UserWarning)
  663. ma[...] = d
  664. ma.flush()
  665. ma = format.open_memmap(tf2, mode='r', max_header_size=200000)
  666. assert_array_equal(ma, d)
  667. @pytest.mark.parametrize("mmap_mode", ["r", None])
  668. def test_huge_header(tmpdir, mmap_mode):
  669. f = os.path.join(tmpdir, 'large_header.npy')
  670. arr = np.array(1, dtype="i," * 10000 + "i")
  671. with pytest.warns(UserWarning, match=".*format 2.0"):
  672. np.save(f, arr)
  673. with pytest.raises(ValueError, match="Header.*large"):
  674. np.load(f, mmap_mode=mmap_mode)
  675. with pytest.raises(ValueError, match="Header.*large"):
  676. np.load(f, mmap_mode=mmap_mode, max_header_size=20000)
  677. res = np.load(f, mmap_mode=mmap_mode, allow_pickle=True)
  678. assert_array_equal(res, arr)
  679. res = np.load(f, mmap_mode=mmap_mode, max_header_size=180000)
  680. assert_array_equal(res, arr)
  681. def test_huge_header_npz(tmpdir):
  682. f = os.path.join(tmpdir, 'large_header.npz')
  683. arr = np.array(1, dtype="i," * 10000 + "i")
  684. with pytest.warns(UserWarning, match=".*format 2.0"):
  685. np.savez(f, arr=arr)
  686. # Only getting the array from the file actually reads it
  687. with pytest.raises(ValueError, match="Header.*large"):
  688. np.load(f)["arr"]
  689. with pytest.raises(ValueError, match="Header.*large"):
  690. np.load(f, max_header_size=20000)["arr"]
  691. res = np.load(f, allow_pickle=True)["arr"]
  692. assert_array_equal(res, arr)
  693. res = np.load(f, max_header_size=180000)["arr"]
  694. assert_array_equal(res, arr)
  695. def test_write_version():
  696. f = BytesIO()
  697. arr = np.arange(1)
  698. # These should pass.
  699. format.write_array(f, arr, version=(1, 0))
  700. format.write_array(f, arr)
  701. format.write_array(f, arr, version=None)
  702. format.write_array(f, arr)
  703. format.write_array(f, arr, version=(2, 0))
  704. format.write_array(f, arr)
  705. # These should all fail.
  706. bad_versions = [
  707. (1, 1),
  708. (0, 0),
  709. (0, 1),
  710. (2, 2),
  711. (255, 255),
  712. ]
  713. for version in bad_versions:
  714. with assert_raises_regex(ValueError,
  715. 'we only support format version.*'):
  716. format.write_array(f, arr, version=version)
  717. bad_version_magic = [
  718. b'\x93NUMPY\x01\x01',
  719. b'\x93NUMPY\x00\x00',
  720. b'\x93NUMPY\x00\x01',
  721. b'\x93NUMPY\x02\x00',
  722. b'\x93NUMPY\x02\x02',
  723. b'\x93NUMPY\xff\xff',
  724. ]
  725. malformed_magic = [
  726. b'\x92NUMPY\x01\x00',
  727. b'\x00NUMPY\x01\x00',
  728. b'\x93numpy\x01\x00',
  729. b'\x93MATLB\x01\x00',
  730. b'\x93NUMPY\x01',
  731. b'\x93NUMPY',
  732. b'',
  733. ]
  734. def test_read_magic():
  735. s1 = BytesIO()
  736. s2 = BytesIO()
  737. arr = np.ones((3, 6), dtype=float)
  738. format.write_array(s1, arr, version=(1, 0))
  739. format.write_array(s2, arr, version=(2, 0))
  740. s1.seek(0)
  741. s2.seek(0)
  742. version1 = format.read_magic(s1)
  743. version2 = format.read_magic(s2)
  744. assert_(version1 == (1, 0))
  745. assert_(version2 == (2, 0))
  746. assert_(s1.tell() == format.MAGIC_LEN)
  747. assert_(s2.tell() == format.MAGIC_LEN)
  748. def test_read_magic_bad_magic():
  749. for magic in malformed_magic:
  750. f = BytesIO(magic)
  751. assert_raises(ValueError, format.read_array, f)
  752. def test_read_version_1_0_bad_magic():
  753. for magic in bad_version_magic + malformed_magic:
  754. f = BytesIO(magic)
  755. assert_raises(ValueError, format.read_array, f)
  756. def test_bad_magic_args():
  757. assert_raises(ValueError, format.magic, -1, 1)
  758. assert_raises(ValueError, format.magic, 256, 1)
  759. assert_raises(ValueError, format.magic, 1, -1)
  760. assert_raises(ValueError, format.magic, 1, 256)
  761. def test_large_header():
  762. s = BytesIO()
  763. d = {'shape': (), 'fortran_order': False, 'descr': '<i8'}
  764. format.write_array_header_1_0(s, d)
  765. s = BytesIO()
  766. d['descr'] = [('x' * 256 * 256, '<i8')]
  767. assert_raises(ValueError, format.write_array_header_1_0, s, d)
  768. def test_read_array_header_1_0():
  769. s = BytesIO()
  770. arr = np.ones((3, 6), dtype=float)
  771. format.write_array(s, arr, version=(1, 0))
  772. s.seek(format.MAGIC_LEN)
  773. shape, fortran, dtype = format.read_array_header_1_0(s)
  774. assert_(s.tell() % format.ARRAY_ALIGN == 0)
  775. assert_((shape, fortran, dtype) == ((3, 6), False, float))
  776. def test_read_array_header_2_0():
  777. s = BytesIO()
  778. arr = np.ones((3, 6), dtype=float)
  779. format.write_array(s, arr, version=(2, 0))
  780. s.seek(format.MAGIC_LEN)
  781. shape, fortran, dtype = format.read_array_header_2_0(s)
  782. assert_(s.tell() % format.ARRAY_ALIGN == 0)
  783. assert_((shape, fortran, dtype) == ((3, 6), False, float))
  784. def test_bad_header():
  785. # header of length less than 2 should fail
  786. s = BytesIO()
  787. assert_raises(ValueError, format.read_array_header_1_0, s)
  788. s = BytesIO(b'1')
  789. assert_raises(ValueError, format.read_array_header_1_0, s)
  790. # header shorter than indicated size should fail
  791. s = BytesIO(b'\x01\x00')
  792. assert_raises(ValueError, format.read_array_header_1_0, s)
  793. # headers without the exact keys required should fail
  794. # d = {"shape": (1, 2),
  795. # "descr": "x"}
  796. s = BytesIO(
  797. b"\x93NUMPY\x01\x006\x00{'descr': 'x', 'shape': (1, 2), }"
  798. b" \n"
  799. )
  800. assert_raises(ValueError, format.read_array_header_1_0, s)
  801. d = {"shape": (1, 2),
  802. "fortran_order": False,
  803. "descr": "x",
  804. "extrakey": -1}
  805. s = BytesIO()
  806. format.write_array_header_1_0(s, d)
  807. assert_raises(ValueError, format.read_array_header_1_0, s)
  808. def test_large_file_support(tmpdir):
  809. if (sys.platform == 'win32' or sys.platform == 'cygwin'):
  810. pytest.skip("Unknown if Windows has sparse filesystems")
  811. # try creating a large sparse file
  812. tf_name = os.path.join(tmpdir, 'sparse_file')
  813. try:
  814. # seek past end would work too, but linux truncate somewhat
  815. # increases the chances that we have a sparse filesystem and can
  816. # avoid actually writing 5GB
  817. import subprocess as sp
  818. sp.check_call(["truncate", "-s", "5368709120", tf_name])
  819. except Exception:
  820. pytest.skip("Could not create 5GB large file")
  821. # write a small array to the end
  822. with open(tf_name, "wb") as f:
  823. f.seek(5368709120)
  824. d = np.arange(5)
  825. np.save(f, d)
  826. # read it back
  827. with open(tf_name, "rb") as f:
  828. f.seek(5368709120)
  829. r = np.load(f)
  830. assert_array_equal(r, d)
  831. @pytest.mark.skipif(IS_PYPY, reason="flaky on PyPy")
  832. @pytest.mark.skipif(not IS_64BIT, reason="test requires 64-bit system")
  833. @pytest.mark.slow
  834. @requires_memory(free_bytes=2 * 2**30)
  835. @pytest.mark.thread_unsafe(reason="crashes with low memory")
  836. def test_large_archive(tmpdir):
  837. # Regression test for product of saving arrays with dimensions of array
  838. # having a product that doesn't fit in int32. See gh-7598 for details.
  839. shape = (2**30, 2)
  840. try:
  841. a = np.empty(shape, dtype=np.uint8)
  842. except MemoryError:
  843. pytest.skip("Could not create large file")
  844. fname = os.path.join(tmpdir, "large_archive")
  845. with open(fname, "wb") as f:
  846. np.savez(f, arr=a)
  847. del a
  848. with open(fname, "rb") as f:
  849. new_a = np.load(f)["arr"]
  850. assert new_a.shape == shape
  851. def test_empty_npz(tmpdir):
  852. # Test for gh-9989
  853. fname = os.path.join(tmpdir, "nothing.npz")
  854. np.savez(fname)
  855. with np.load(fname) as nps:
  856. pass
  857. def test_unicode_field_names(tmpdir):
  858. # gh-7391
  859. arr = np.array([
  860. (1, 3),
  861. (1, 2),
  862. (1, 3),
  863. (1, 2)
  864. ], dtype=[
  865. ('int', int),
  866. ('\N{CJK UNIFIED IDEOGRAPH-6574}\N{CJK UNIFIED IDEOGRAPH-5F62}', int)
  867. ])
  868. fname = os.path.join(tmpdir, "unicode.npy")
  869. with open(fname, 'wb') as f:
  870. format.write_array(f, arr, version=(3, 0))
  871. with open(fname, 'rb') as f:
  872. arr2 = format.read_array(f)
  873. assert_array_equal(arr, arr2)
  874. # notifies the user that 3.0 is selected
  875. with open(fname, 'wb') as f:
  876. with pytest.warns(UserWarning):
  877. format.write_array(f, arr, version=None)
  878. def test_header_growth_axis():
  879. for is_fortran_array, dtype_space, expected_header_length in [
  880. [False, 22, 128], [False, 23, 192], [True, 23, 128], [True, 24, 192]
  881. ]:
  882. for size in [10**i for i in range(format.GROWTH_AXIS_MAX_DIGITS)]:
  883. fp = BytesIO()
  884. format.write_array_header_1_0(fp, {
  885. 'shape': (2, size) if is_fortran_array else (size, 2),
  886. 'fortran_order': is_fortran_array,
  887. 'descr': np.dtype([(' ' * dtype_space, int)])
  888. })
  889. assert len(fp.getvalue()) == expected_header_length
  890. @pytest.mark.parametrize('dt', [
  891. np.dtype({'names': ['a', 'b'], 'formats': [float, np.dtype('S3',
  892. metadata={'some': 'stuff'})]}),
  893. np.dtype(int, metadata={'some': 'stuff'}),
  894. np.dtype([('subarray', (int, (2,)))], metadata={'some': 'stuff'}),
  895. # recursive: metadata on the field of a dtype
  896. np.dtype({'names': ['a', 'b'], 'formats': [
  897. float, np.dtype({'names': ['c'], 'formats': [np.dtype(int, metadata={})]})
  898. ]}),
  899. ])
  900. @pytest.mark.skipif(IS_PYPY and sys.implementation.version <= (7, 3, 8),
  901. reason="PyPy bug in error formatting")
  902. def test_metadata_dtype(dt):
  903. # gh-14142
  904. arr = np.ones(10, dtype=dt)
  905. buf = BytesIO()
  906. with pytest.warns(UserWarning):
  907. np.save(buf, arr)
  908. buf.seek(0)
  909. # Loading should work (metadata was stripped):
  910. arr2 = np.load(buf)
  911. # BUG: assert_array_equal does not check metadata
  912. from numpy.lib._utils_impl import drop_metadata
  913. assert_array_equal(arr, arr2)
  914. assert drop_metadata(arr.dtype) is not arr.dtype
  915. assert drop_metadata(arr2.dtype) is arr2.dtype