wavfile.py 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944
  1. """
  2. Module to read / write wav files using NumPy arrays
  3. Functions
  4. ---------
  5. `read`: Return the sample rate (in samples/sec) and data from a WAV file.
  6. `write`: Write a NumPy array as a WAV file.
  7. """
  8. import io
  9. import os
  10. import sys
  11. import numpy as np
  12. import struct
  13. import warnings
  14. from enum import IntEnum
  15. __all__ = [
  16. 'WavFileWarning',
  17. 'read',
  18. 'write'
  19. ]
  20. class WavFileWarning(UserWarning):
  21. """
  22. Warning for WAV files with format issues that can still be read.
  23. Raised when a WAV file has problems like missing metadata or
  24. non-standard formatting, but can still be processed successfully.
  25. """
  26. pass
  27. class SeekEmulatingReader:
  28. """
  29. Tracks stream position, provides tell(), and emulates only those
  30. seeks that can be supported by reading forward. Other seeks raise
  31. io.UnsupportedOperation. Note that this class implements only the
  32. minimum necessary to keep wavfile.read() happy.
  33. """
  34. def __init__(self, reader):
  35. self.reader = reader
  36. self.pos = 0
  37. def read(self, size=-1, /):
  38. data = self.reader.read(size)
  39. self.pos += len(data)
  40. return data
  41. def seek(self, offset, whence=os.SEEK_SET, /):
  42. match whence:
  43. case os.SEEK_SET if offset >= self.pos:
  44. self.read(offset - self.pos) # convert to relative
  45. case os.SEEK_CUR if offset >= 0:
  46. self.read(offset) # advance by offset
  47. case os.SEEK_END if offset == 0:
  48. self.read() # advance to end of stream
  49. case _:
  50. raise io.UnsupportedOperation("SeekEmulatingReader was asked to emulate"
  51. " a seek operation it does not support.")
  52. return self.pos
  53. def tell(self):
  54. return self.pos
  55. def close(self):
  56. self.reader.close()
  57. # np.fromfile expects to be able to call flush(), and _read_data_chunk
  58. # expects to catch io.UnsupportedOperation if np.fromfile fails.
  59. def flush(self):
  60. raise io.UnsupportedOperation("SeekEmulatingReader can't flush.")
  61. class WAVE_FORMAT(IntEnum):
  62. """
  63. WAVE form wFormatTag IDs
  64. Complete list is in mmreg.h in Windows 10 SDK. ALAC and OPUS are the
  65. newest additions, in v10.0.14393 2016-07
  66. """
  67. UNKNOWN = 0x0000
  68. PCM = 0x0001
  69. ADPCM = 0x0002
  70. IEEE_FLOAT = 0x0003
  71. VSELP = 0x0004
  72. IBM_CVSD = 0x0005
  73. ALAW = 0x0006
  74. MULAW = 0x0007
  75. DTS = 0x0008
  76. DRM = 0x0009
  77. WMAVOICE9 = 0x000A
  78. WMAVOICE10 = 0x000B
  79. OKI_ADPCM = 0x0010
  80. DVI_ADPCM = 0x0011
  81. IMA_ADPCM = 0x0011 # Duplicate
  82. MEDIASPACE_ADPCM = 0x0012
  83. SIERRA_ADPCM = 0x0013
  84. G723_ADPCM = 0x0014
  85. DIGISTD = 0x0015
  86. DIGIFIX = 0x0016
  87. DIALOGIC_OKI_ADPCM = 0x0017
  88. MEDIAVISION_ADPCM = 0x0018
  89. CU_CODEC = 0x0019
  90. HP_DYN_VOICE = 0x001A
  91. YAMAHA_ADPCM = 0x0020
  92. SONARC = 0x0021
  93. DSPGROUP_TRUESPEECH = 0x0022
  94. ECHOSC1 = 0x0023
  95. AUDIOFILE_AF36 = 0x0024
  96. APTX = 0x0025
  97. AUDIOFILE_AF10 = 0x0026
  98. PROSODY_1612 = 0x0027
  99. LRC = 0x0028
  100. DOLBY_AC2 = 0x0030
  101. GSM610 = 0x0031
  102. MSNAUDIO = 0x0032
  103. ANTEX_ADPCME = 0x0033
  104. CONTROL_RES_VQLPC = 0x0034
  105. DIGIREAL = 0x0035
  106. DIGIADPCM = 0x0036
  107. CONTROL_RES_CR10 = 0x0037
  108. NMS_VBXADPCM = 0x0038
  109. CS_IMAADPCM = 0x0039
  110. ECHOSC3 = 0x003A
  111. ROCKWELL_ADPCM = 0x003B
  112. ROCKWELL_DIGITALK = 0x003C
  113. XEBEC = 0x003D
  114. G721_ADPCM = 0x0040
  115. G728_CELP = 0x0041
  116. MSG723 = 0x0042
  117. INTEL_G723_1 = 0x0043
  118. INTEL_G729 = 0x0044
  119. SHARP_G726 = 0x0045
  120. MPEG = 0x0050
  121. RT24 = 0x0052
  122. PAC = 0x0053
  123. MPEGLAYER3 = 0x0055
  124. LUCENT_G723 = 0x0059
  125. CIRRUS = 0x0060
  126. ESPCM = 0x0061
  127. VOXWARE = 0x0062
  128. CANOPUS_ATRAC = 0x0063
  129. G726_ADPCM = 0x0064
  130. G722_ADPCM = 0x0065
  131. DSAT = 0x0066
  132. DSAT_DISPLAY = 0x0067
  133. VOXWARE_BYTE_ALIGNED = 0x0069
  134. VOXWARE_AC8 = 0x0070
  135. VOXWARE_AC10 = 0x0071
  136. VOXWARE_AC16 = 0x0072
  137. VOXWARE_AC20 = 0x0073
  138. VOXWARE_RT24 = 0x0074
  139. VOXWARE_RT29 = 0x0075
  140. VOXWARE_RT29HW = 0x0076
  141. VOXWARE_VR12 = 0x0077
  142. VOXWARE_VR18 = 0x0078
  143. VOXWARE_TQ40 = 0x0079
  144. VOXWARE_SC3 = 0x007A
  145. VOXWARE_SC3_1 = 0x007B
  146. SOFTSOUND = 0x0080
  147. VOXWARE_TQ60 = 0x0081
  148. MSRT24 = 0x0082
  149. G729A = 0x0083
  150. MVI_MVI2 = 0x0084
  151. DF_G726 = 0x0085
  152. DF_GSM610 = 0x0086
  153. ISIAUDIO = 0x0088
  154. ONLIVE = 0x0089
  155. MULTITUDE_FT_SX20 = 0x008A
  156. INFOCOM_ITS_G721_ADPCM = 0x008B
  157. CONVEDIA_G729 = 0x008C
  158. CONGRUENCY = 0x008D
  159. SBC24 = 0x0091
  160. DOLBY_AC3_SPDIF = 0x0092
  161. MEDIASONIC_G723 = 0x0093
  162. PROSODY_8KBPS = 0x0094
  163. ZYXEL_ADPCM = 0x0097
  164. PHILIPS_LPCBB = 0x0098
  165. PACKED = 0x0099
  166. MALDEN_PHONYTALK = 0x00A0
  167. RACAL_RECORDER_GSM = 0x00A1
  168. RACAL_RECORDER_G720_A = 0x00A2
  169. RACAL_RECORDER_G723_1 = 0x00A3
  170. RACAL_RECORDER_TETRA_ACELP = 0x00A4
  171. NEC_AAC = 0x00B0
  172. RAW_AAC1 = 0x00FF
  173. RHETOREX_ADPCM = 0x0100
  174. IRAT = 0x0101
  175. VIVO_G723 = 0x0111
  176. VIVO_SIREN = 0x0112
  177. PHILIPS_CELP = 0x0120
  178. PHILIPS_GRUNDIG = 0x0121
  179. DIGITAL_G723 = 0x0123
  180. SANYO_LD_ADPCM = 0x0125
  181. SIPROLAB_ACEPLNET = 0x0130
  182. SIPROLAB_ACELP4800 = 0x0131
  183. SIPROLAB_ACELP8V3 = 0x0132
  184. SIPROLAB_G729 = 0x0133
  185. SIPROLAB_G729A = 0x0134
  186. SIPROLAB_KELVIN = 0x0135
  187. VOICEAGE_AMR = 0x0136
  188. G726ADPCM = 0x0140
  189. DICTAPHONE_CELP68 = 0x0141
  190. DICTAPHONE_CELP54 = 0x0142
  191. QUALCOMM_PUREVOICE = 0x0150
  192. QUALCOMM_HALFRATE = 0x0151
  193. TUBGSM = 0x0155
  194. MSAUDIO1 = 0x0160
  195. WMAUDIO2 = 0x0161
  196. WMAUDIO3 = 0x0162
  197. WMAUDIO_LOSSLESS = 0x0163
  198. WMASPDIF = 0x0164
  199. UNISYS_NAP_ADPCM = 0x0170
  200. UNISYS_NAP_ULAW = 0x0171
  201. UNISYS_NAP_ALAW = 0x0172
  202. UNISYS_NAP_16K = 0x0173
  203. SYCOM_ACM_SYC008 = 0x0174
  204. SYCOM_ACM_SYC701_G726L = 0x0175
  205. SYCOM_ACM_SYC701_CELP54 = 0x0176
  206. SYCOM_ACM_SYC701_CELP68 = 0x0177
  207. KNOWLEDGE_ADVENTURE_ADPCM = 0x0178
  208. FRAUNHOFER_IIS_MPEG2_AAC = 0x0180
  209. DTS_DS = 0x0190
  210. CREATIVE_ADPCM = 0x0200
  211. CREATIVE_FASTSPEECH8 = 0x0202
  212. CREATIVE_FASTSPEECH10 = 0x0203
  213. UHER_ADPCM = 0x0210
  214. ULEAD_DV_AUDIO = 0x0215
  215. ULEAD_DV_AUDIO_1 = 0x0216
  216. QUARTERDECK = 0x0220
  217. ILINK_VC = 0x0230
  218. RAW_SPORT = 0x0240
  219. ESST_AC3 = 0x0241
  220. GENERIC_PASSTHRU = 0x0249
  221. IPI_HSX = 0x0250
  222. IPI_RPELP = 0x0251
  223. CS2 = 0x0260
  224. SONY_SCX = 0x0270
  225. SONY_SCY = 0x0271
  226. SONY_ATRAC3 = 0x0272
  227. SONY_SPC = 0x0273
  228. TELUM_AUDIO = 0x0280
  229. TELUM_IA_AUDIO = 0x0281
  230. NORCOM_VOICE_SYSTEMS_ADPCM = 0x0285
  231. FM_TOWNS_SND = 0x0300
  232. MICRONAS = 0x0350
  233. MICRONAS_CELP833 = 0x0351
  234. BTV_DIGITAL = 0x0400
  235. INTEL_MUSIC_CODER = 0x0401
  236. INDEO_AUDIO = 0x0402
  237. QDESIGN_MUSIC = 0x0450
  238. ON2_VP7_AUDIO = 0x0500
  239. ON2_VP6_AUDIO = 0x0501
  240. VME_VMPCM = 0x0680
  241. TPC = 0x0681
  242. LIGHTWAVE_LOSSLESS = 0x08AE
  243. OLIGSM = 0x1000
  244. OLIADPCM = 0x1001
  245. OLICELP = 0x1002
  246. OLISBC = 0x1003
  247. OLIOPR = 0x1004
  248. LH_CODEC = 0x1100
  249. LH_CODEC_CELP = 0x1101
  250. LH_CODEC_SBC8 = 0x1102
  251. LH_CODEC_SBC12 = 0x1103
  252. LH_CODEC_SBC16 = 0x1104
  253. NORRIS = 0x1400
  254. ISIAUDIO_2 = 0x1401
  255. SOUNDSPACE_MUSICOMPRESS = 0x1500
  256. MPEG_ADTS_AAC = 0x1600
  257. MPEG_RAW_AAC = 0x1601
  258. MPEG_LOAS = 0x1602
  259. NOKIA_MPEG_ADTS_AAC = 0x1608
  260. NOKIA_MPEG_RAW_AAC = 0x1609
  261. VODAFONE_MPEG_ADTS_AAC = 0x160A
  262. VODAFONE_MPEG_RAW_AAC = 0x160B
  263. MPEG_HEAAC = 0x1610
  264. VOXWARE_RT24_SPEECH = 0x181C
  265. SONICFOUNDRY_LOSSLESS = 0x1971
  266. INNINGS_TELECOM_ADPCM = 0x1979
  267. LUCENT_SX8300P = 0x1C07
  268. LUCENT_SX5363S = 0x1C0C
  269. CUSEEME = 0x1F03
  270. NTCSOFT_ALF2CM_ACM = 0x1FC4
  271. DVM = 0x2000
  272. DTS2 = 0x2001
  273. MAKEAVIS = 0x3313
  274. DIVIO_MPEG4_AAC = 0x4143
  275. NOKIA_ADAPTIVE_MULTIRATE = 0x4201
  276. DIVIO_G726 = 0x4243
  277. LEAD_SPEECH = 0x434C
  278. LEAD_VORBIS = 0x564C
  279. WAVPACK_AUDIO = 0x5756
  280. OGG_VORBIS_MODE_1 = 0x674F
  281. OGG_VORBIS_MODE_2 = 0x6750
  282. OGG_VORBIS_MODE_3 = 0x6751
  283. OGG_VORBIS_MODE_1_PLUS = 0x676F
  284. OGG_VORBIS_MODE_2_PLUS = 0x6770
  285. OGG_VORBIS_MODE_3_PLUS = 0x6771
  286. ALAC = 0x6C61
  287. _3COM_NBX = 0x7000 # Can't have leading digit
  288. OPUS = 0x704F
  289. FAAD_AAC = 0x706D
  290. AMR_NB = 0x7361
  291. AMR_WB = 0x7362
  292. AMR_WP = 0x7363
  293. GSM_AMR_CBR = 0x7A21
  294. GSM_AMR_VBR_SID = 0x7A22
  295. COMVERSE_INFOSYS_G723_1 = 0xA100
  296. COMVERSE_INFOSYS_AVQSBC = 0xA101
  297. COMVERSE_INFOSYS_SBC = 0xA102
  298. SYMBOL_G729_A = 0xA103
  299. VOICEAGE_AMR_WB = 0xA104
  300. INGENIENT_G726 = 0xA105
  301. MPEG4_AAC = 0xA106
  302. ENCORE_G726 = 0xA107
  303. ZOLL_ASAO = 0xA108
  304. SPEEX_VOICE = 0xA109
  305. VIANIX_MASC = 0xA10A
  306. WM9_SPECTRUM_ANALYZER = 0xA10B
  307. WMF_SPECTRUM_ANAYZER = 0xA10C
  308. GSM_610 = 0xA10D
  309. GSM_620 = 0xA10E
  310. GSM_660 = 0xA10F
  311. GSM_690 = 0xA110
  312. GSM_ADAPTIVE_MULTIRATE_WB = 0xA111
  313. POLYCOM_G722 = 0xA112
  314. POLYCOM_G728 = 0xA113
  315. POLYCOM_G729_A = 0xA114
  316. POLYCOM_SIREN = 0xA115
  317. GLOBAL_IP_ILBC = 0xA116
  318. RADIOTIME_TIME_SHIFT_RADIO = 0xA117
  319. NICE_ACA = 0xA118
  320. NICE_ADPCM = 0xA119
  321. VOCORD_G721 = 0xA11A
  322. VOCORD_G726 = 0xA11B
  323. VOCORD_G722_1 = 0xA11C
  324. VOCORD_G728 = 0xA11D
  325. VOCORD_G729 = 0xA11E
  326. VOCORD_G729_A = 0xA11F
  327. VOCORD_G723_1 = 0xA120
  328. VOCORD_LBC = 0xA121
  329. NICE_G728 = 0xA122
  330. FRACE_TELECOM_G729 = 0xA123
  331. CODIAN = 0xA124
  332. FLAC = 0xF1AC
  333. EXTENSIBLE = 0xFFFE
  334. DEVELOPMENT = 0xFFFF
  335. KNOWN_WAVE_FORMATS = {WAVE_FORMAT.PCM, WAVE_FORMAT.IEEE_FLOAT}
  336. def _raise_bad_format(format_tag):
  337. try:
  338. format_name = WAVE_FORMAT(format_tag).name
  339. except ValueError:
  340. format_name = f'{format_tag:#06x}'
  341. raise ValueError(f"Unknown wave file format: {format_name}. Supported "
  342. "formats: " +
  343. ', '.join(x.name for x in KNOWN_WAVE_FORMATS))
  344. def _read_fmt_chunk(fid, is_big_endian):
  345. """
  346. Returns
  347. -------
  348. size : int
  349. size of format subchunk in bytes (minus 8 for "fmt " and itself)
  350. format_tag : int
  351. PCM, float, or compressed format
  352. channels : int
  353. number of channels
  354. fs : int
  355. sampling frequency in samples per second
  356. bytes_per_second : int
  357. overall byte rate for the file
  358. block_align : int
  359. bytes per sample, including all channels
  360. bit_depth : int
  361. bits per sample
  362. Notes
  363. -----
  364. Assumes file pointer is immediately after the 'fmt ' id
  365. """
  366. if is_big_endian:
  367. fmt = '>'
  368. else:
  369. fmt = '<'
  370. size = struct.unpack(fmt+'I', fid.read(4))[0]
  371. if size < 16:
  372. raise ValueError("Binary structure of wave file is not compliant")
  373. res = struct.unpack(fmt+'HHIIHH', fid.read(16))
  374. bytes_read = 16
  375. format_tag, channels, fs, bytes_per_second, block_align, bit_depth = res
  376. if format_tag == WAVE_FORMAT.EXTENSIBLE and size >= (16+2):
  377. ext_chunk_size = struct.unpack(fmt+'H', fid.read(2))[0]
  378. bytes_read += 2
  379. if ext_chunk_size >= 22:
  380. extensible_chunk_data = fid.read(22)
  381. bytes_read += 22
  382. raw_guid = extensible_chunk_data[2+4:2+4+16]
  383. # GUID template {XXXXXXXX-0000-0010-8000-00AA00389B71} (RFC-2361)
  384. # MS GUID byte order: first three groups are native byte order,
  385. # rest is Big Endian
  386. if is_big_endian:
  387. tail = b'\x00\x00\x00\x10\x80\x00\x00\xAA\x00\x38\x9B\x71'
  388. else:
  389. tail = b'\x00\x00\x10\x00\x80\x00\x00\xAA\x00\x38\x9B\x71'
  390. if raw_guid.endswith(tail):
  391. format_tag = struct.unpack(fmt+'I', raw_guid[:4])[0]
  392. else:
  393. raise ValueError("Binary structure of wave file is not compliant")
  394. if format_tag not in KNOWN_WAVE_FORMATS:
  395. _raise_bad_format(format_tag)
  396. # move file pointer to next chunk
  397. if size > bytes_read:
  398. fid.read(size - bytes_read)
  399. # fmt should always be 16, 18 or 40, but handle it just in case
  400. _handle_pad_byte(fid, size)
  401. if format_tag == WAVE_FORMAT.PCM:
  402. if bytes_per_second != fs * block_align:
  403. raise ValueError("WAV header is invalid: nAvgBytesPerSec must"
  404. " equal product of nSamplesPerSec and"
  405. " nBlockAlign, but file has nSamplesPerSec ="
  406. f" {fs}, nBlockAlign = {block_align}, and"
  407. f" nAvgBytesPerSec = {bytes_per_second}")
  408. return (size, format_tag, channels, fs, bytes_per_second, block_align,
  409. bit_depth)
  410. def _read_data_chunk(fid, format_tag, channels, bit_depth, is_big_endian, is_rf64,
  411. block_align, mmap=False, rf64_chunk_size=None):
  412. """
  413. Notes
  414. -----
  415. Assumes file pointer is immediately after the 'data' id
  416. It's possible to not use all available bits in a container, or to store
  417. samples in a container bigger than necessary, so bytes_per_sample uses
  418. the actual reported container size (nBlockAlign / nChannels). Real-world
  419. examples:
  420. Adobe Audition's "24-bit packed int (type 1, 20-bit)"
  421. nChannels = 2, nBlockAlign = 6, wBitsPerSample = 20
  422. http://www-mmsp.ece.mcgill.ca/Documents/AudioFormats/WAVE/Samples/AFsp/M1F1-int12-AFsp.wav
  423. is:
  424. nChannels = 2, nBlockAlign = 4, wBitsPerSample = 12
  425. http://www-mmsp.ece.mcgill.ca/Documents/AudioFormats/WAVE/Docs/multichaudP.pdf
  426. gives an example of:
  427. nChannels = 2, nBlockAlign = 8, wBitsPerSample = 20
  428. """
  429. if is_big_endian:
  430. fmt = '>'
  431. else:
  432. fmt = '<'
  433. # Size of the data subchunk in bytes
  434. if not is_rf64:
  435. size = struct.unpack(fmt+'I', fid.read(4))[0]
  436. else:
  437. # chunk size is stored in global file header for RF64
  438. size = rf64_chunk_size
  439. # skip data chunk size as it is 0xFFFFFFF
  440. fid.read(4)
  441. # Number of bytes per sample (sample container size)
  442. bytes_per_sample = block_align // channels
  443. n_samples = size // bytes_per_sample
  444. if format_tag == WAVE_FORMAT.PCM:
  445. if 1 <= bit_depth <= 8:
  446. dtype = 'u1' # WAV of 8-bit integer or less are unsigned
  447. elif bytes_per_sample in {3, 5, 6, 7}:
  448. # No compatible dtype. Load as raw bytes for reshaping later.
  449. dtype = 'V1'
  450. elif bit_depth <= 64:
  451. # Remaining bit depths can map directly to signed numpy dtypes
  452. dtype = f'{fmt}i{bytes_per_sample}'
  453. else:
  454. raise ValueError("Unsupported bit depth: the WAV file "
  455. f"has {bit_depth}-bit integer data.")
  456. elif format_tag == WAVE_FORMAT.IEEE_FLOAT:
  457. if bit_depth in {32, 64}:
  458. dtype = f'{fmt}f{bytes_per_sample}'
  459. else:
  460. raise ValueError("Unsupported bit depth: the WAV file "
  461. f"has {bit_depth}-bit floating-point data.")
  462. else:
  463. _raise_bad_format(format_tag)
  464. start = fid.tell()
  465. if not mmap:
  466. try:
  467. count = size if dtype == 'V1' else n_samples
  468. data = np.fromfile(fid, dtype=dtype, count=count)
  469. except io.UnsupportedOperation: # not a C-like file
  470. fid.seek(start, 0) # just in case it seeked, though it shouldn't
  471. data = np.frombuffer(fid.read(size), dtype=dtype)
  472. if dtype == 'V1':
  473. # Rearrange raw bytes into smallest compatible numpy dtype
  474. dt = f'{fmt}i4' if bytes_per_sample == 3 else f'{fmt}i8'
  475. a = np.zeros((len(data) // bytes_per_sample, np.dtype(dt).itemsize),
  476. dtype='V1')
  477. if is_big_endian:
  478. a[:, :bytes_per_sample] = data.reshape((-1, bytes_per_sample))
  479. else:
  480. a[:, -bytes_per_sample:] = data.reshape((-1, bytes_per_sample))
  481. data = a.view(dt).reshape(a.shape[:-1])
  482. else:
  483. if bytes_per_sample in {1, 2, 4, 8}:
  484. start = fid.tell()
  485. data = np.memmap(fid, dtype=dtype, mode='c', offset=start,
  486. shape=(n_samples,))
  487. fid.seek(start + size)
  488. else:
  489. raise ValueError("mmap=True not compatible with "
  490. f"{bytes_per_sample}-byte container size.")
  491. _handle_pad_byte(fid, size)
  492. if channels > 1:
  493. data = data.reshape(-1, channels)
  494. return data
  495. def _skip_unknown_chunk(fid, is_big_endian):
  496. if is_big_endian:
  497. fmt = '>I'
  498. else:
  499. fmt = '<I'
  500. data = fid.read(4)
  501. # call unpack() and seek() only if we have really read data from file
  502. # otherwise empty read at the end of the file would trigger
  503. # unnecessary exception at unpack() call
  504. # in case data equals somehow to 0, there is no need for seek() anyway
  505. if data:
  506. size = struct.unpack(fmt, data)[0]
  507. fid.seek(size, 1)
  508. _handle_pad_byte(fid, size)
  509. def _read_riff_chunk(fid):
  510. str1 = fid.read(4) # File signature
  511. if str1 == b'RIFF':
  512. is_rf64 = False
  513. is_big_endian = False
  514. fmt = '<I'
  515. elif str1 == b'RIFX':
  516. is_rf64 = False
  517. is_big_endian = True
  518. fmt = '>I'
  519. elif str1 == b'RF64':
  520. is_rf64 = True
  521. is_big_endian = False
  522. fmt = '<Q'
  523. else:
  524. # There are also .wav files with "FFIR" or "XFIR" signatures?
  525. raise ValueError(f"File format {repr(str1)} not understood. Only "
  526. "'RIFF', 'RIFX', and 'RF64' supported.")
  527. # Size of entire file
  528. if not is_rf64:
  529. file_size = struct.unpack(fmt, fid.read(4))[0] + 8
  530. rf64_chunk_size = None
  531. str2 = fid.read(4)
  532. else:
  533. # Skip 0xFFFFFFFF (-1) bytes
  534. fid.read(4)
  535. str2 = fid.read(4)
  536. str3 = fid.read(4)
  537. if str3 != b'ds64':
  538. raise ValueError("Invalid RF64 file: ds64 chunk not found.")
  539. ds64_size = struct.unpack("<I", fid.read(4))[0]
  540. file_size = struct.unpack(fmt, fid.read(8))[0] + 8
  541. rf64_chunk_size = struct.unpack('<Q', fid.read(8))[0]
  542. # Ignore additional attributes of ds64 chunk like sample count, tables, etc.
  543. # and just skip to the next chunk
  544. fid.seek(ds64_size - 16, 1)
  545. if str2 != b'WAVE':
  546. raise ValueError(f"Not a WAV file. RIFF form type is {repr(str2)}.")
  547. return file_size, is_big_endian, is_rf64, rf64_chunk_size
  548. def _handle_pad_byte(fid, size):
  549. # "If the chunk size is an odd number of bytes, a pad byte with value zero
  550. # is written after ckData." So we need to seek past this after each chunk.
  551. if size % 2:
  552. fid.seek(1, 1)
  553. def read(filename, mmap=False):
  554. """
  555. Open a WAV file.
  556. Return the sample rate (in samples/sec) and data from an LPCM WAV file.
  557. Parameters
  558. ----------
  559. filename : string or open file handle
  560. Input WAV file.
  561. mmap : bool, optional
  562. Whether to read data as memory-mapped (default: False). Not compatible
  563. with some bit depths; see Notes. Only to be used on real files.
  564. .. versionadded:: 0.12.0
  565. Returns
  566. -------
  567. rate : int
  568. Sample rate of WAV file.
  569. data : numpy array
  570. Data read from WAV file. Data-type is determined from the file;
  571. see Notes. Data is 1-D for 1-channel WAV, or 2-D of shape
  572. (Nsamples, Nchannels) otherwise. If a file-like input without a
  573. C-like file descriptor (e.g., :class:`python:io.BytesIO`) is
  574. passed, this will not be writeable.
  575. Notes
  576. -----
  577. Common data types: [1]_
  578. ===================== =========== =========== =============
  579. WAV format Min Max NumPy dtype
  580. ===================== =========== =========== =============
  581. 32-bit floating-point -1.0 +1.0 float32
  582. 32-bit integer PCM -2147483648 +2147483647 int32
  583. 24-bit integer PCM -2147483648 +2147483392 int32
  584. 16-bit integer PCM -32768 +32767 int16
  585. 8-bit integer PCM 0 255 uint8
  586. ===================== =========== =========== =============
  587. WAV files can specify arbitrary bit depth, and this function supports
  588. reading any integer PCM depth from 1 to 64 bits. Data is returned in the
  589. smallest compatible numpy int type, in left-justified format. 8-bit and
  590. lower is unsigned, while 9-bit and higher is signed.
  591. For example, 24-bit data will be stored as int32, with the MSB of the
  592. 24-bit data stored at the MSB of the int32, and typically the least
  593. significant byte is 0x00. (However, if a file actually contains data past
  594. its specified bit depth, those bits will be read and output, too. [2]_)
  595. This bit justification and sign matches WAV's native internal format, which
  596. allows memory mapping of WAV files that use 1, 2, 4, or 8 bytes per sample
  597. (so 24-bit files cannot be memory-mapped, but 32-bit can).
  598. IEEE float PCM in 32- or 64-bit format is supported, with or without mmap.
  599. Values exceeding [-1, +1] are not clipped.
  600. Non-linear PCM (mu-law, A-law) is not supported.
  601. References
  602. ----------
  603. .. [1] IBM Corporation and Microsoft Corporation, "Multimedia Programming
  604. Interface and Data Specifications 1.0", section "Data Format of the
  605. Samples", August 1991
  606. http://www.tactilemedia.com/info/MCI_Control_Info.html
  607. .. [2] Adobe Systems Incorporated, "Adobe Audition 3 User Guide", section
  608. "Audio file formats: 24-bit Packed Int (type 1, 20-bit)", 2007
  609. Examples
  610. --------
  611. >>> from os.path import dirname, join as pjoin
  612. >>> from scipy.io import wavfile
  613. >>> import scipy.io
  614. Get the filename for an example .wav file from the tests/data directory.
  615. >>> data_dir = pjoin(dirname(scipy.io.__file__), 'tests', 'data')
  616. >>> wav_fname = pjoin(data_dir, 'test-44100Hz-2ch-32bit-float-be.wav')
  617. Load the .wav file contents.
  618. >>> samplerate, data = wavfile.read(wav_fname)
  619. >>> print(f"number of channels = {data.shape[1]}")
  620. number of channels = 2
  621. >>> length = data.shape[0] / samplerate
  622. >>> print(f"length = {length}s")
  623. length = 0.01s
  624. Plot the waveform.
  625. >>> import matplotlib.pyplot as plt
  626. >>> import numpy as np
  627. >>> time = np.linspace(0., length, data.shape[0])
  628. >>> plt.plot(time, data[:, 0], label="Left channel")
  629. >>> plt.plot(time, data[:, 1], label="Right channel")
  630. >>> plt.legend()
  631. >>> plt.xlabel("Time [s]")
  632. >>> plt.ylabel("Amplitude")
  633. >>> plt.show()
  634. """
  635. if hasattr(filename, 'read'):
  636. fid = filename
  637. mmap = False
  638. else:
  639. fid = open(filename, 'rb')
  640. if not (was_seekable := fid.seekable()):
  641. fid = SeekEmulatingReader(fid)
  642. try:
  643. file_size, is_big_endian, is_rf64, rf64_chunk_size = _read_riff_chunk(fid)
  644. fmt_chunk_received = False
  645. data_chunk_received = False
  646. while fid.tell() < file_size:
  647. # read the next chunk
  648. chunk_id = fid.read(4)
  649. if not chunk_id:
  650. if data_chunk_received:
  651. # End of file but data successfully read
  652. warnings.warn(
  653. f"Reached EOF prematurely; finished at {fid.tell():d} bytes, "
  654. f"expected {file_size:d} bytes from header.",
  655. WavFileWarning, stacklevel=2)
  656. break
  657. else:
  658. raise ValueError("Unexpected end of file.")
  659. elif len(chunk_id) < 4:
  660. msg = f"Incomplete chunk ID: {repr(chunk_id)}"
  661. # If we have the data, ignore the broken chunk
  662. if fmt_chunk_received and data_chunk_received:
  663. warnings.warn(msg + ", ignoring it.", WavFileWarning,
  664. stacklevel=2)
  665. else:
  666. raise ValueError(msg)
  667. if chunk_id == b'fmt ':
  668. fmt_chunk_received = True
  669. fmt_chunk = _read_fmt_chunk(fid, is_big_endian)
  670. format_tag, channels, fs = fmt_chunk[1:4]
  671. bit_depth = fmt_chunk[6]
  672. block_align = fmt_chunk[5]
  673. elif chunk_id == b'fact':
  674. _skip_unknown_chunk(fid, is_big_endian)
  675. elif chunk_id == b'data':
  676. data_chunk_received = True
  677. if not fmt_chunk_received:
  678. raise ValueError("No fmt chunk before data")
  679. data = _read_data_chunk(fid, format_tag, channels, bit_depth,
  680. is_big_endian, is_rf64, block_align,
  681. mmap, rf64_chunk_size)
  682. elif chunk_id == b'LIST':
  683. # Someday this could be handled properly but for now skip it
  684. _skip_unknown_chunk(fid, is_big_endian)
  685. elif chunk_id in {b'JUNK', b'Fake'}:
  686. # Skip alignment chunks without warning
  687. _skip_unknown_chunk(fid, is_big_endian)
  688. else:
  689. warnings.warn("Chunk (non-data) not understood, skipping it.",
  690. WavFileWarning, stacklevel=2)
  691. _skip_unknown_chunk(fid, is_big_endian)
  692. finally:
  693. if not hasattr(filename, 'read'):
  694. fid.close()
  695. elif was_seekable:
  696. # Rewind, if we are able, so that caller can do something
  697. # else with the raw WAV stream.
  698. fid.seek(0)
  699. return fs, data
  700. def write(filename, rate, data):
  701. """
  702. Write a NumPy array as a WAV file.
  703. Parameters
  704. ----------
  705. filename : string or open file handle
  706. Output wav file.
  707. rate : int
  708. The sample rate (in samples/sec).
  709. data : ndarray
  710. A 1-D or 2-D NumPy array of either integer or float data-type.
  711. Notes
  712. -----
  713. * Writes a simple uncompressed WAV file.
  714. * To write multiple-channels, use a 2-D array of shape
  715. (Nsamples, Nchannels).
  716. * The bits-per-sample and PCM/float will be determined by the data-type.
  717. Common data types: [1]_
  718. ===================== =========== =========== =============
  719. WAV format Min Max NumPy dtype
  720. ===================== =========== =========== =============
  721. 32-bit floating-point -1.0 +1.0 float32
  722. 32-bit PCM -2147483648 +2147483647 int32
  723. 16-bit PCM -32768 +32767 int16
  724. 8-bit PCM 0 255 uint8
  725. ===================== =========== =========== =============
  726. Note that 8-bit PCM is unsigned.
  727. References
  728. ----------
  729. .. [1] IBM Corporation and Microsoft Corporation, "Multimedia Programming
  730. Interface and Data Specifications 1.0", section "Data Format of the
  731. Samples", August 1991
  732. http://www.tactilemedia.com/info/MCI_Control_Info.html
  733. Examples
  734. --------
  735. Create a 100Hz sine wave, sampled at 44100Hz.
  736. Write to 16-bit PCM, Mono.
  737. >>> from scipy.io.wavfile import write
  738. >>> import numpy as np
  739. >>> samplerate = 44100; fs = 100
  740. >>> t = np.linspace(0., 1., samplerate)
  741. >>> amplitude = np.iinfo(np.int16).max
  742. >>> data = amplitude * np.sin(2. * np.pi * fs * t)
  743. >>> write("example.wav", samplerate, data.astype(np.int16))
  744. """
  745. if hasattr(filename, 'write'):
  746. fid = filename
  747. else:
  748. fid = open(filename, 'wb')
  749. fs = rate
  750. try:
  751. dkind = data.dtype.kind
  752. allowed_dtypes = ['float32', 'float64',
  753. 'uint8', 'int16', 'int32', 'int64']
  754. if data.dtype.name not in allowed_dtypes:
  755. raise ValueError(f"Unsupported data type '{data.dtype}'")
  756. header_data = b''
  757. header_data += b'RIFF'
  758. header_data += b'\x00\x00\x00\x00'
  759. header_data += b'WAVE'
  760. # fmt chunk
  761. header_data += b'fmt '
  762. if dkind == 'f':
  763. format_tag = WAVE_FORMAT.IEEE_FLOAT
  764. else:
  765. format_tag = WAVE_FORMAT.PCM
  766. if data.ndim == 1:
  767. channels = 1
  768. else:
  769. channels = data.shape[1]
  770. bit_depth = data.dtype.itemsize * 8
  771. bytes_per_second = fs*(bit_depth // 8)*channels
  772. block_align = channels * (bit_depth // 8)
  773. fmt_chunk_data = struct.pack('<HHIIHH', format_tag, channels, fs,
  774. bytes_per_second, block_align, bit_depth)
  775. if not (dkind == 'i' or dkind == 'u'):
  776. # add cbSize field for non-PCM files
  777. fmt_chunk_data += b'\x00\x00'
  778. header_data += struct.pack('<I', len(fmt_chunk_data))
  779. header_data += fmt_chunk_data
  780. # check data size (needs to be immediately before the data chunk)
  781. # if too large for standard RIFF, use RF64 instead
  782. resulting_file_size = len(header_data) + 4 + 4 + data.nbytes
  783. is_rf64 = (resulting_file_size - 8) > 0xFFFFFFFF
  784. if is_rf64:
  785. header_data = b''
  786. header_data += b'RF64'
  787. header_data += b'\xFF\xFF\xFF\xFF'
  788. header_data += b'WAVE'
  789. header_data += b'ds64'
  790. # size of ds64 chunk
  791. header_data += struct.pack('<I', 28)
  792. # will be filled later with real file size
  793. header_data += struct.pack('<Q', 0)
  794. header_data += struct.pack('<Q', data.nbytes)
  795. header_data += struct.pack('<Q', data.shape[0])
  796. # ignore 'table' field for now
  797. header_data += struct.pack('<I', 0)
  798. header_data += b'fmt '
  799. header_data += struct.pack('<I', len(fmt_chunk_data))
  800. header_data += fmt_chunk_data
  801. # fact chunk (non-PCM files)
  802. if not (dkind == 'i' or dkind == 'u'):
  803. header_data += b'fact'
  804. header_data += struct.pack('<II', 4, data.shape[0])
  805. fid.write(header_data)
  806. # data chunk
  807. fid.write(b'data')
  808. # write data chunk size, unless its too big in which case 0xFFFFFFFF is written
  809. fid.write(struct.pack('<I', min(data.nbytes, 4294967295)))
  810. if data.dtype.byteorder == '>' or (data.dtype.byteorder == '=' and
  811. sys.byteorder == 'big'):
  812. data = data.byteswap()
  813. _array_tofile(fid, data)
  814. # Determine file size and place it in correct
  815. # position at start of the file or the data chunk.
  816. size = fid.tell()
  817. if not is_rf64:
  818. fid.seek(4)
  819. fid.write(struct.pack('<I', size-8))
  820. else:
  821. fid.seek(20)
  822. fid.write(struct.pack('<Q', size-8))
  823. finally:
  824. if not hasattr(filename, 'write'):
  825. fid.close()
  826. else:
  827. fid.seek(0)
  828. def _array_tofile(fid, data):
  829. # ravel gives a c-contiguous buffer
  830. fid.write(data.ravel().view('b').data)