test_wavfile.py 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520
  1. import os
  2. import sys
  3. from io import (BytesIO, UnsupportedOperation)
  4. import threading
  5. import warnings
  6. import numpy as np
  7. from numpy.testing import (assert_equal, assert_, assert_array_equal,
  8. break_cycles, IS_PYPY)
  9. import pytest
  10. from pytest import raises, warns
  11. from scipy.io import wavfile
  12. def datafile(fn):
  13. return os.path.join(os.path.dirname(__file__), 'data', fn)
  14. def test_read_1():
  15. # 32-bit PCM (which uses extensible format)
  16. for mmap in [False, True]:
  17. filename = 'test-44100Hz-le-1ch-4bytes.wav'
  18. rate, data = wavfile.read(datafile(filename), mmap=mmap)
  19. assert_equal(rate, 44100)
  20. assert_(np.issubdtype(data.dtype, np.int32))
  21. assert_equal(data.shape, (4410,))
  22. del data
  23. def test_read_2():
  24. # 8-bit unsigned PCM
  25. for mmap in [False, True]:
  26. filename = 'test-8000Hz-le-2ch-1byteu.wav'
  27. rate, data = wavfile.read(datafile(filename), mmap=mmap)
  28. assert_equal(rate, 8000)
  29. assert_(np.issubdtype(data.dtype, np.uint8))
  30. assert_equal(data.shape, (800, 2))
  31. del data
  32. def test_read_3():
  33. # Little-endian float
  34. for mmap in [False, True]:
  35. filename = 'test-44100Hz-2ch-32bit-float-le.wav'
  36. rate, data = wavfile.read(datafile(filename), mmap=mmap)
  37. assert_equal(rate, 44100)
  38. assert_(np.issubdtype(data.dtype, np.float32))
  39. assert_equal(data.shape, (441, 2))
  40. del data
  41. def test_read_4():
  42. # Contains unsupported 'PEAK' chunk
  43. for mmap in [False, True]:
  44. with warnings.catch_warnings():
  45. warnings.filterwarnings(
  46. "ignore",
  47. "Chunk .non-data. not understood, skipping it",
  48. wavfile.WavFileWarning
  49. )
  50. filename = 'test-48000Hz-2ch-64bit-float-le-wavex.wav'
  51. rate, data = wavfile.read(datafile(filename), mmap=mmap)
  52. assert_equal(rate, 48000)
  53. assert_(np.issubdtype(data.dtype, np.float64))
  54. assert_equal(data.shape, (480, 2))
  55. del data
  56. def test_read_5():
  57. # Big-endian float
  58. for mmap in [False, True]:
  59. filename = 'test-44100Hz-2ch-32bit-float-be.wav'
  60. rate, data = wavfile.read(datafile(filename), mmap=mmap)
  61. assert_equal(rate, 44100)
  62. assert_(np.issubdtype(data.dtype, np.float32))
  63. assert_(data.dtype.byteorder == '>' or (sys.byteorder == 'big' and
  64. data.dtype.byteorder == '='))
  65. assert_equal(data.shape, (441, 2))
  66. del data
  67. def test_5_bit_odd_size_no_pad():
  68. # 5-bit, 1 B container, 5 channels, 9 samples, 45 B data chunk
  69. # Generated by LTspice, which incorrectly omits pad byte, but should be
  70. # readable anyway
  71. for mmap in [False, True]:
  72. filename = 'test-8000Hz-le-5ch-9S-5bit.wav'
  73. rate, data = wavfile.read(datafile(filename), mmap=mmap)
  74. assert_equal(rate, 8000)
  75. assert_(np.issubdtype(data.dtype, np.uint8))
  76. assert_equal(data.shape, (9, 5))
  77. # 8-5 = 3 LSBits should be 0
  78. assert_equal(data & 0b00000111, 0)
  79. # Unsigned
  80. assert_equal(data.max(), 0b11111000) # Highest possible
  81. assert_equal(data[0, 0], 128) # Midpoint is 128 for <= 8-bit
  82. assert_equal(data.min(), 0) # Lowest possible
  83. del data
  84. def test_12_bit_even_size():
  85. # 12-bit, 2 B container, 4 channels, 9 samples, 72 B data chunk
  86. # Generated by LTspice from 1 Vpk sine waves
  87. for mmap in [False, True]:
  88. filename = 'test-8000Hz-le-4ch-9S-12bit.wav'
  89. rate, data = wavfile.read(datafile(filename), mmap=mmap)
  90. assert_equal(rate, 8000)
  91. assert_(np.issubdtype(data.dtype, np.int16))
  92. assert_equal(data.shape, (9, 4))
  93. # 16-12 = 4 LSBits should be 0
  94. assert_equal(data & 0b00000000_00001111, 0)
  95. # Signed
  96. assert_equal(data.max(), 0b01111111_11110000) # Highest possible
  97. assert_equal(data[0, 0], 0) # Midpoint is 0 for >= 9-bit
  98. assert_equal(data.min(), -0b10000000_00000000) # Lowest possible
  99. del data
  100. def test_24_bit_odd_size_with_pad():
  101. # 24-bit, 3 B container, 3 channels, 5 samples, 45 B data chunk
  102. # Should not raise any warnings about the data chunk pad byte
  103. filename = 'test-8000Hz-le-3ch-5S-24bit.wav'
  104. rate, data = wavfile.read(datafile(filename), mmap=False)
  105. assert_equal(rate, 8000)
  106. assert_(np.issubdtype(data.dtype, np.int32))
  107. assert_equal(data.shape, (5, 3))
  108. # All LSBytes should be 0
  109. assert_equal(data & 0xff, 0)
  110. # Hand-made max/min samples under different conventions:
  111. # 2**(N-1) 2**(N-1)-1 LSB
  112. assert_equal(data, [[-0x8000_0000, -0x7fff_ff00, -0x200],
  113. [-0x4000_0000, -0x3fff_ff00, -0x100],
  114. [+0x0000_0000, +0x0000_0000, +0x000],
  115. [+0x4000_0000, +0x3fff_ff00, +0x100],
  116. [+0x7fff_ff00, +0x7fff_ff00, +0x200]])
  117. # ^ clipped
  118. def test_20_bit_extra_data():
  119. # 20-bit, 3 B container, 1 channel, 10 samples, 30 B data chunk
  120. # with extra data filling container beyond the bit depth
  121. filename = 'test-1234Hz-le-1ch-10S-20bit-extra.wav'
  122. rate, data = wavfile.read(datafile(filename), mmap=False)
  123. assert_equal(rate, 1234)
  124. assert_(np.issubdtype(data.dtype, np.int32))
  125. assert_equal(data.shape, (10,))
  126. # All LSBytes should still be 0, because 3 B container in 4 B dtype
  127. assert_equal(data & 0xff, 0)
  128. # But it should load the data beyond 20 bits
  129. assert_((data & 0xf00).any())
  130. # Full-scale positive/negative samples, then being halved each time
  131. assert_equal(data, [+0x7ffff000, # +full-scale 20-bit
  132. -0x7ffff000, # -full-scale 20-bit
  133. +0x7ffff000 >> 1, # +1/2
  134. -0x7ffff000 >> 1, # -1/2
  135. +0x7ffff000 >> 2, # +1/4
  136. -0x7ffff000 >> 2, # -1/4
  137. +0x7ffff000 >> 3, # +1/8
  138. -0x7ffff000 >> 3, # -1/8
  139. +0x7ffff000 >> 4, # +1/16
  140. -0x7ffff000 >> 4, # -1/16
  141. ])
  142. def test_36_bit_odd_size():
  143. # 36-bit, 5 B container, 3 channels, 5 samples, 75 B data chunk + pad
  144. filename = 'test-8000Hz-le-3ch-5S-36bit.wav'
  145. rate, data = wavfile.read(datafile(filename), mmap=False)
  146. assert_equal(rate, 8000)
  147. assert_(np.issubdtype(data.dtype, np.int64))
  148. assert_equal(data.shape, (5, 3))
  149. # 28 LSBits should be 0
  150. assert_equal(data & 0xfffffff, 0)
  151. # Hand-made max/min samples under different conventions:
  152. # Fixed-point 2**(N-1) Full-scale 2**(N-1)-1 LSB
  153. correct = [[-0x8000_0000_0000_0000, -0x7fff_ffff_f000_0000, -0x2000_0000],
  154. [-0x4000_0000_0000_0000, -0x3fff_ffff_f000_0000, -0x1000_0000],
  155. [+0x0000_0000_0000_0000, +0x0000_0000_0000_0000, +0x0000_0000],
  156. [+0x4000_0000_0000_0000, +0x3fff_ffff_f000_0000, +0x1000_0000],
  157. [+0x7fff_ffff_f000_0000, +0x7fff_ffff_f000_0000, +0x2000_0000]]
  158. # ^ clipped
  159. assert_equal(data, correct)
  160. def test_45_bit_even_size():
  161. # 45-bit, 6 B container, 3 channels, 5 samples, 90 B data chunk
  162. filename = 'test-8000Hz-le-3ch-5S-45bit.wav'
  163. rate, data = wavfile.read(datafile(filename), mmap=False)
  164. assert_equal(rate, 8000)
  165. assert_(np.issubdtype(data.dtype, np.int64))
  166. assert_equal(data.shape, (5, 3))
  167. # 19 LSBits should be 0
  168. assert_equal(data & 0x7ffff, 0)
  169. # Hand-made max/min samples under different conventions:
  170. # Fixed-point 2**(N-1) Full-scale 2**(N-1)-1 LSB
  171. correct = [[-0x8000_0000_0000_0000, -0x7fff_ffff_fff8_0000, -0x10_0000],
  172. [-0x4000_0000_0000_0000, -0x3fff_ffff_fff8_0000, -0x08_0000],
  173. [+0x0000_0000_0000_0000, +0x0000_0000_0000_0000, +0x00_0000],
  174. [+0x4000_0000_0000_0000, +0x3fff_ffff_fff8_0000, +0x08_0000],
  175. [+0x7fff_ffff_fff8_0000, +0x7fff_ffff_fff8_0000, +0x10_0000]]
  176. # ^ clipped
  177. assert_equal(data, correct)
  178. def test_53_bit_odd_size():
  179. # 53-bit, 7 B container, 3 channels, 5 samples, 105 B data chunk + pad
  180. filename = 'test-8000Hz-le-3ch-5S-53bit.wav'
  181. rate, data = wavfile.read(datafile(filename), mmap=False)
  182. assert_equal(rate, 8000)
  183. assert_(np.issubdtype(data.dtype, np.int64))
  184. assert_equal(data.shape, (5, 3))
  185. # 11 LSBits should be 0
  186. assert_equal(data & 0x7ff, 0)
  187. # Hand-made max/min samples under different conventions:
  188. # Fixed-point 2**(N-1) Full-scale 2**(N-1)-1 LSB
  189. correct = [[-0x8000_0000_0000_0000, -0x7fff_ffff_ffff_f800, -0x1000],
  190. [-0x4000_0000_0000_0000, -0x3fff_ffff_ffff_f800, -0x0800],
  191. [+0x0000_0000_0000_0000, +0x0000_0000_0000_0000, +0x0000],
  192. [+0x4000_0000_0000_0000, +0x3fff_ffff_ffff_f800, +0x0800],
  193. [+0x7fff_ffff_ffff_f800, +0x7fff_ffff_ffff_f800, +0x1000]]
  194. # ^ clipped
  195. assert_equal(data, correct)
  196. def test_64_bit_even_size():
  197. # 64-bit, 8 B container, 3 channels, 5 samples, 120 B data chunk
  198. for mmap in [False, True]:
  199. filename = 'test-8000Hz-le-3ch-5S-64bit.wav'
  200. rate, data = wavfile.read(datafile(filename), mmap=mmap)
  201. assert_equal(rate, 8000)
  202. assert_(np.issubdtype(data.dtype, np.int64))
  203. assert_equal(data.shape, (5, 3))
  204. # Hand-made max/min samples under different conventions:
  205. # Fixed-point 2**(N-1) Full-scale 2**(N-1)-1 LSB
  206. correct = [[-0x8000_0000_0000_0000, -0x7fff_ffff_ffff_ffff, -0x2],
  207. [-0x4000_0000_0000_0000, -0x3fff_ffff_ffff_ffff, -0x1],
  208. [+0x0000_0000_0000_0000, +0x0000_0000_0000_0000, +0x0],
  209. [+0x4000_0000_0000_0000, +0x3fff_ffff_ffff_ffff, +0x1],
  210. [+0x7fff_ffff_ffff_ffff, +0x7fff_ffff_ffff_ffff, +0x2]]
  211. # ^ clipped
  212. assert_equal(data, correct)
  213. del data
  214. def test_unsupported_mmap():
  215. # Test containers that cannot be mapped to numpy types
  216. for filename in {'test-8000Hz-le-3ch-5S-24bit.wav',
  217. 'test-8000Hz-le-3ch-5S-36bit.wav',
  218. 'test-8000Hz-le-3ch-5S-45bit.wav',
  219. 'test-8000Hz-le-3ch-5S-53bit.wav',
  220. 'test-1234Hz-le-1ch-10S-20bit-extra.wav'}:
  221. with raises(ValueError, match="mmap.*not compatible"):
  222. rate, data = wavfile.read(datafile(filename), mmap=True)
  223. def test_rifx():
  224. # Compare equivalent RIFX and RIFF files
  225. for rifx, riff in {('test-44100Hz-be-1ch-4bytes.wav',
  226. 'test-44100Hz-le-1ch-4bytes.wav'),
  227. ('test-8000Hz-be-3ch-5S-24bit.wav',
  228. 'test-8000Hz-le-3ch-5S-24bit.wav')}:
  229. rate1, data1 = wavfile.read(datafile(rifx), mmap=False)
  230. rate2, data2 = wavfile.read(datafile(riff), mmap=False)
  231. assert_equal(rate1, rate2)
  232. assert_equal(data1, data2)
  233. def test_rf64():
  234. # Compare equivalent RF64 and RIFF files
  235. for rf64, riff in {('test-44100Hz-le-1ch-4bytes-rf64.wav',
  236. 'test-44100Hz-le-1ch-4bytes.wav'),
  237. ('test-8000Hz-le-3ch-5S-24bit-rf64.wav',
  238. 'test-8000Hz-le-3ch-5S-24bit.wav')}:
  239. rate1, data1 = wavfile.read(datafile(rf64), mmap=False)
  240. rate2, data2 = wavfile.read(datafile(riff), mmap=False)
  241. assert_array_equal(rate1, rate2)
  242. assert_array_equal(data1, data2)
  243. @pytest.mark.xslow
  244. def test_write_roundtrip_rf64(tmpdir):
  245. dtype = np.dtype("<i8")
  246. tmpfile = str(tmpdir.join('temp.wav'))
  247. rate = 44100
  248. data = np.random.randint(0, 127, (2**29,)).astype(dtype)
  249. wavfile.write(tmpfile, rate, data)
  250. rate2, data2 = wavfile.read(tmpfile, mmap=True)
  251. assert_equal(rate, rate2)
  252. msg = f"{data2.dtype} byteorder not in ('<', '=', '|')"
  253. assert data2.dtype.byteorder in ('<', '=', '|'), msg
  254. assert_array_equal(data, data2)
  255. # also test writing (gh-12176)
  256. data2[0] = 0
  257. # Fake a non-seekable file-like object without resorting to subprocesses.
  258. class Nonseekable:
  259. def __init__(self, fp):
  260. self.fp = fp
  261. def seekable(self):
  262. return False
  263. def read(self, size=-1, /):
  264. return self.fp.read(size)
  265. def close(self):
  266. self.fp.close()
  267. def test_streams():
  268. for filename in ['test-44100Hz-le-1ch-4bytes.wav',
  269. 'test-8000Hz-le-2ch-1byteu.wav',
  270. 'test-44100Hz-2ch-32bit-float-le.wav',
  271. 'test-44100Hz-2ch-32bit-float-be.wav',
  272. 'test-8000Hz-le-5ch-9S-5bit.wav',
  273. 'test-8000Hz-le-4ch-9S-12bit.wav',
  274. 'test-8000Hz-le-3ch-5S-24bit.wav',
  275. 'test-1234Hz-le-1ch-10S-20bit-extra.wav',
  276. 'test-8000Hz-le-3ch-5S-36bit.wav',
  277. 'test-8000Hz-le-3ch-5S-45bit.wav',
  278. 'test-8000Hz-le-3ch-5S-53bit.wav',
  279. 'test-8000Hz-le-3ch-5S-64bit.wav',
  280. 'test-44100Hz-be-1ch-4bytes.wav', # RIFX
  281. 'test-44100Hz-le-1ch-4bytes-rf64.wav']:
  282. dfname = datafile(filename)
  283. with open(dfname, 'rb') as fp1, open(dfname, 'rb') as fp2:
  284. rate1, data1 = wavfile.read(fp1)
  285. rate2, data2 = wavfile.read(Nonseekable(fp2))
  286. rate3, data3 = wavfile.read(dfname, mmap=False)
  287. assert_equal(rate1, rate3)
  288. assert_equal(rate2, rate3)
  289. assert_equal(data1, data3)
  290. assert_equal(data2, data3)
  291. def test_read_unknown_filetype_fail():
  292. # Not an RIFF
  293. for mmap in [False, True]:
  294. filename = 'example_1.nc'
  295. with open(datafile(filename), 'rb') as fp:
  296. with raises(ValueError, match="CDF.*'RIFF', 'RIFX', and 'RF64' supported"):
  297. wavfile.read(fp, mmap=mmap)
  298. def test_read_unknown_riff_form_type():
  299. # RIFF, but not WAVE form
  300. for mmap in [False, True]:
  301. filename = 'Transparent Busy.ani'
  302. with open(datafile(filename), 'rb') as fp:
  303. with raises(ValueError, match='Not a WAV file.*ACON'):
  304. wavfile.read(fp, mmap=mmap)
  305. def test_read_unknown_wave_format():
  306. # RIFF and WAVE, but not supported format
  307. for mmap in [False, True]:
  308. filename = 'test-8000Hz-le-1ch-1byte-ulaw.wav'
  309. with open(datafile(filename), 'rb') as fp:
  310. with raises(ValueError, match='Unknown wave file format.*MULAW.*'
  311. 'Supported formats'):
  312. wavfile.read(fp, mmap=mmap)
  313. def test_read_early_eof_with_data():
  314. # File ends inside 'data' chunk, but we keep incomplete data
  315. for mmap in [False, True]:
  316. filename = 'test-44100Hz-le-1ch-4bytes-early-eof.wav'
  317. with open(datafile(filename), 'rb') as fp:
  318. with warns(wavfile.WavFileWarning, match='Reached EOF'):
  319. rate, data = wavfile.read(fp, mmap=mmap)
  320. assert data.size > 0
  321. assert rate == 44100
  322. # also test writing (gh-12176)
  323. data[0] = 0
  324. def test_read_early_eof():
  325. # File ends after 'fact' chunk at boundary, no data read
  326. for mmap in [False, True]:
  327. filename = 'test-44100Hz-le-1ch-4bytes-early-eof-no-data.wav'
  328. with open(datafile(filename), 'rb') as fp:
  329. with raises(ValueError, match="Unexpected end of file."):
  330. wavfile.read(fp, mmap=mmap)
  331. def test_read_incomplete_chunk():
  332. # File ends inside 'fmt ' chunk ID, no data read
  333. for mmap in [False, True]:
  334. filename = 'test-44100Hz-le-1ch-4bytes-incomplete-chunk.wav'
  335. with open(datafile(filename), 'rb') as fp:
  336. with raises(ValueError, match="Incomplete chunk ID.*b'f'"):
  337. wavfile.read(fp, mmap=mmap)
  338. def test_read_inconsistent_header():
  339. # File header's size fields contradict each other
  340. for mmap in [False, True]:
  341. filename = 'test-8000Hz-le-3ch-5S-24bit-inconsistent.wav'
  342. with open(datafile(filename), 'rb') as fp:
  343. with raises(ValueError, match="header is invalid"):
  344. wavfile.read(fp, mmap=mmap)
  345. # signed 8-bit integer PCM is not allowed
  346. # unsigned > 8-bit integer PCM is not allowed
  347. # 8- or 16-bit float PCM is not expected
  348. # g and q are platform-dependent, so not included
  349. @pytest.mark.parametrize("dt_str", ["<i2", "<i4", "<i8", "<f4", "<f8",
  350. ">i2", ">i4", ">i8", ">f4", ">f8", '|u1'])
  351. @pytest.mark.parametrize("channels", [1, 2, 5])
  352. @pytest.mark.parametrize("rate", [8000, 32000])
  353. @pytest.mark.parametrize("mmap", [False, True])
  354. @pytest.mark.parametrize("realfile", [False, True])
  355. def test_write_roundtrip(realfile, mmap, rate, channels, dt_str, tmpdir):
  356. dtype = np.dtype(dt_str)
  357. if realfile:
  358. tmpfile = str(tmpdir.join(str(threading.get_native_id()), 'temp.wav'))
  359. os.makedirs(os.path.dirname(tmpfile), exist_ok=True)
  360. else:
  361. tmpfile = BytesIO()
  362. data = np.random.rand(100, channels)
  363. if channels == 1:
  364. data = data[:, 0]
  365. if dtype.kind == 'f':
  366. # The range of the float type should be in [-1, 1]
  367. data = data.astype(dtype)
  368. else:
  369. data = (data*128).astype(dtype)
  370. wavfile.write(tmpfile, rate, data)
  371. rate2, data2 = wavfile.read(tmpfile, mmap=mmap)
  372. assert_equal(rate, rate2)
  373. assert_(data2.dtype.byteorder in ('<', '=', '|'), msg=data2.dtype)
  374. assert_array_equal(data, data2)
  375. # also test writing (gh-12176)
  376. if realfile:
  377. data2[0] = 0
  378. else:
  379. with pytest.raises(ValueError, match='read-only'):
  380. data2[0] = 0
  381. if realfile and mmap and IS_PYPY and sys.platform == 'win32':
  382. # windows cannot remove a dead file held by a mmap but not collected
  383. # in PyPy; since the filename gets reused in this test, clean this up
  384. break_cycles()
  385. break_cycles()
  386. @pytest.mark.parametrize("dtype", [np.float16])
  387. def test_wavfile_dtype_unsupported(tmpdir, dtype):
  388. tmpfile = str(tmpdir.join('temp.wav'))
  389. rng = np.random.default_rng(1234)
  390. data = rng.random((100, 5)).astype(dtype)
  391. rate = 8000
  392. with pytest.raises(ValueError, match="Unsupported"):
  393. wavfile.write(tmpfile, rate, data)
  394. def test_seek_emulating_reader_invalid_seek():
  395. # Dummy data for the reader
  396. reader = wavfile.SeekEmulatingReader(BytesIO(b'\x00\x00'))
  397. # Test SEEK_END with an invalid whence value
  398. with pytest.raises(UnsupportedOperation):
  399. reader.seek(0, 5) # Invalid whence value
  400. # Test with negative seek value
  401. with pytest.raises(UnsupportedOperation):
  402. reader.seek(-1, 0) # Negative position with SEEK_SET
  403. # Test SEEK_END with valid parameters (should not raise)
  404. pos = reader.seek(0, os.SEEK_END) # Valid usage
  405. assert pos == 2, f"Failed to seek to end, got position {pos}"