test_audio.cpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368
  1. // This file is part of OpenCV project.
  2. // It is subject to the license terms in the LICENSE file found in the top-level directory
  3. // of this distribution and at http://opencv.org/license.html.
  4. #include "test_precomp.hpp"
  5. namespace opencv_test { namespace {
  6. //file name, number of audio channels, epsilon, video type, weight, height, number of frame, number of audio samples, fps, psnr Threshold, backend
  7. typedef std::tuple<std::string, int, double, int, int, int, int, int, int, double, VideoCaptureAPIs> paramCombination;
  8. //file name, number of audio channels, number of audio samples, epsilon, backend
  9. typedef std::tuple<std::string, int, int, double, VideoCaptureAPIs> param;
  10. class AudioBaseTest
  11. {
  12. protected:
  13. AudioBaseTest(){}
  14. void getValidAudioData()
  15. {
  16. const double step = 3.14/22050;
  17. double value = 0;
  18. validAudioData.resize(expectedNumAudioCh);
  19. for (int nCh = 0; nCh < expectedNumAudioCh; nCh++)
  20. {
  21. value = 0;
  22. for(unsigned int i = 0; i < numberOfSamples; i++)
  23. {
  24. if (i != 0 && i % 44100 == 0)
  25. value = 0;
  26. validAudioData[nCh].push_back(sin(value));
  27. value += step;
  28. }
  29. }
  30. }
  31. void checkAudio()
  32. {
  33. getValidAudioData();
  34. ASSERT_EQ(expectedNumAudioCh, (int)audioData.size());
  35. for (unsigned int nCh = 0; nCh < audioData.size(); nCh++)
  36. {
  37. #ifdef _WIN32
  38. if (audioData[nCh].size() == 132924 && numberOfSamples == 131819 && fileName == "test_audio.mp4")
  39. throw SkipTestException("Detected failure observed on legacy Windows versions. SKIP");
  40. #endif
  41. ASSERT_EQ(numberOfSamples, audioData[nCh].size()) << "nCh=" << nCh;
  42. for (unsigned int i = 0; i < numberOfSamples; i++)
  43. {
  44. EXPECT_NEAR(validAudioData[nCh][i], audioData[nCh][i], epsilon) << "sample index=" << i << " nCh=" << nCh;
  45. }
  46. }
  47. }
  48. protected:
  49. int expectedNumAudioCh;
  50. unsigned int numberOfSamples;
  51. double epsilon;
  52. VideoCaptureAPIs backend;
  53. std::string root;
  54. std::string fileName;
  55. std::vector<std::vector<double>> validAudioData;
  56. std::vector<std::vector<double>> audioData;
  57. std::vector<int> params;
  58. Mat audioFrame;
  59. VideoCapture cap;
  60. };
  61. class AudioTestFixture : public AudioBaseTest, public testing::TestWithParam <param>
  62. {
  63. public:
  64. AudioTestFixture()
  65. {
  66. fileName = get<0>(GetParam());
  67. expectedNumAudioCh = get<1>(GetParam());
  68. numberOfSamples = get<2>(GetParam());
  69. epsilon = get<3>(GetParam());
  70. backend = get<4>(GetParam());
  71. root = "audio/";
  72. params = { CAP_PROP_AUDIO_STREAM, 0,
  73. CAP_PROP_VIDEO_STREAM, -1,
  74. CAP_PROP_AUDIO_DATA_DEPTH, CV_16S };
  75. }
  76. void doTest()
  77. {
  78. ASSERT_TRUE(cap.open(findDataFile(root + fileName), backend, params));
  79. const int audioBaseIndex = static_cast<int>(cap.get(cv::CAP_PROP_AUDIO_BASE_INDEX));
  80. const int numberOfChannels = (int)cap.get(CAP_PROP_AUDIO_TOTAL_CHANNELS);
  81. ASSERT_EQ(expectedNumAudioCh, numberOfChannels);
  82. double f = 0;
  83. audioData.resize(numberOfChannels);
  84. for (;;)
  85. {
  86. if (cap.grab())
  87. {
  88. for (int nCh = 0; nCh < numberOfChannels; nCh++)
  89. {
  90. ASSERT_TRUE(cap.retrieve(audioFrame, audioBaseIndex + nCh));
  91. ASSERT_EQ(CV_16SC1, audioFrame.type()) << audioData[nCh].size();
  92. for (int i = 0; i < audioFrame.cols; i++)
  93. {
  94. f = ((double) audioFrame.at<signed short>(0,i)) / (double) 32768;
  95. audioData[nCh].push_back(f);
  96. }
  97. }
  98. }
  99. else { break; }
  100. }
  101. ASSERT_FALSE(audioData.empty());
  102. checkAudio();
  103. }
  104. };
  105. const param audioParams[] =
  106. {
  107. #ifdef _WIN32
  108. param("test_audio.wav", 1, 132300, 0.0001, cv::CAP_MSMF),
  109. param("test_mono_audio.mp3", 1, 133104, 0.12, cv::CAP_MSMF),
  110. param("test_stereo_audio.mp3", 2, 133104, 0.12, cv::CAP_MSMF),
  111. param("test_audio.mp4", 1, 133104, 0.15, cv::CAP_MSMF),
  112. #endif
  113. param("test_audio.wav", 1, 132300, 0.0001, cv::CAP_GSTREAMER),
  114. param("test_audio.mp4", 1, 132522, 0.15, cv::CAP_GSTREAMER),
  115. };
  116. class Audio : public AudioTestFixture{};
  117. TEST_P(Audio, audio)
  118. {
  119. if (!videoio_registry::hasBackend(cv::VideoCaptureAPIs(backend)))
  120. throw SkipTestException(cv::videoio_registry::getBackendName(backend) + " backend was not found");
  121. doTest();
  122. }
  123. inline static std::string Audio_name_printer(const testing::TestParamInfo<Audio::ParamType>& info)
  124. {
  125. std::ostringstream out;
  126. out << getExtensionSafe(get<0>(info.param)) << "_"
  127. << get<1>(info.param) << "CN" << "_"
  128. << getBackendNameSafe(get<4>(info.param));
  129. return out.str();
  130. }
  131. INSTANTIATE_TEST_CASE_P(/**/, Audio, testing::ValuesIn(audioParams), Audio_name_printer);
  132. class MediaTestFixture : public AudioBaseTest, public testing::TestWithParam <paramCombination>
  133. {
  134. public:
  135. MediaTestFixture():
  136. videoType(get<3>(GetParam())),
  137. height(get<4>(GetParam())),
  138. width(get<5>(GetParam())),
  139. numberOfFrames(get<6>(GetParam())),
  140. fps(get<8>(GetParam())),
  141. psnrThreshold(get<9>(GetParam()))
  142. {
  143. fileName = get<0>(GetParam());
  144. expectedNumAudioCh = get<1>(GetParam());
  145. numberOfSamples = get<7>(GetParam());
  146. epsilon = get<2>(GetParam());
  147. backend = get<10>(GetParam());
  148. root = "audio/";
  149. params = { CAP_PROP_AUDIO_STREAM, 0,
  150. CAP_PROP_VIDEO_STREAM, 0,
  151. CAP_PROP_AUDIO_DATA_DEPTH, CV_16S };
  152. }
  153. void doTest()
  154. {
  155. ASSERT_TRUE(cap.open(findDataFile(root + fileName), backend, params));
  156. const int audioBaseIndex = static_cast<int>(cap.get(cv::CAP_PROP_AUDIO_BASE_INDEX));
  157. const int numberOfChannels = (int)cap.get(CAP_PROP_AUDIO_TOTAL_CHANNELS);
  158. ASSERT_EQ(expectedNumAudioCh, numberOfChannels);
  159. const int samplePerSecond = (int)cap.get(CAP_PROP_AUDIO_SAMPLES_PER_SECOND);
  160. ASSERT_EQ(44100, samplePerSecond);
  161. int samplesPerFrame = (int)(1./fps*samplePerSecond);
  162. double audio0_timestamp = 0;
  163. Mat videoFrame;
  164. Mat img(height, width, videoType);
  165. audioData.resize(numberOfChannels);
  166. for (int frame = 0; frame < numberOfFrames; frame++)
  167. {
  168. SCOPED_TRACE(cv::format("frame=%d", frame));
  169. ASSERT_TRUE(cap.grab());
  170. if (frame == 0)
  171. {
  172. double audio_shift = cap.get(CAP_PROP_AUDIO_SHIFT_NSEC);
  173. double video0_timestamp = cap.get(CAP_PROP_POS_MSEC) * 1e-3;
  174. audio0_timestamp = video0_timestamp + audio_shift * 1e-9;
  175. std::cout << "video0 timestamp: " << video0_timestamp << " audio0 timestamp: " << audio0_timestamp << " (audio shift nanoseconds: " << audio_shift << " , seconds: " << audio_shift * 1e-9 << ")" << std::endl;
  176. }
  177. ASSERT_TRUE(cap.retrieve(videoFrame));
  178. if (epsilon >= 0)
  179. {
  180. generateFrame(frame, numberOfFrames, img);
  181. ASSERT_EQ(img.size, videoFrame.size);
  182. double psnr = cvtest::PSNR(img, videoFrame);
  183. EXPECT_GE(psnr, psnrThreshold);
  184. }
  185. int audioFrameCols = 0;
  186. for (int nCh = 0; nCh < numberOfChannels; nCh++)
  187. {
  188. ASSERT_TRUE(cap.retrieve(audioFrame, audioBaseIndex+nCh));
  189. if (audioFrame.empty())
  190. continue;
  191. ASSERT_EQ(CV_16SC1, audioFrame.type());
  192. if (nCh == 0)
  193. audioFrameCols = audioFrame.cols;
  194. else
  195. ASSERT_EQ(audioFrameCols, audioFrame.cols) << "channel "<< nCh;
  196. for (int i = 0; i < audioFrame.cols; i++)
  197. {
  198. double f = audioFrame.at<signed short>(0,i) / 32768.0;
  199. audioData[nCh].push_back(f);
  200. }
  201. }
  202. if (frame < 5 || frame >= numberOfFrames-5)
  203. std::cout << "frame=" << frame << ": audioFrameSize=" << audioFrameCols << " videoTimestamp=" << cap.get(CAP_PROP_POS_MSEC) << " ms" << std::endl;
  204. else if (frame == 6)
  205. std::cout << "frame..." << std::endl;
  206. if (audioFrameCols == 0)
  207. continue;
  208. if (frame != 0 && frame != numberOfFrames-1)
  209. {
  210. // validate audio position
  211. EXPECT_NEAR(
  212. cap.get(CAP_PROP_AUDIO_POS) / samplePerSecond + audio0_timestamp,
  213. cap.get(CAP_PROP_POS_MSEC) * 1e-3,
  214. (1.0 / fps) * 0.6)
  215. << "CAP_PROP_AUDIO_POS=" << cap.get(CAP_PROP_AUDIO_POS) << " CAP_PROP_POS_MSEC=" << cap.get(CAP_PROP_POS_MSEC);
  216. }
  217. if (frame != 0 && frame != numberOfFrames-1 && audioData[0].size() != (size_t)numberOfSamples)
  218. {
  219. if (backend == cv::CAP_MSMF)
  220. {
  221. int audioSamplesTolerance = samplesPerFrame / 2;
  222. // validate audio frame size
  223. EXPECT_NEAR(audioFrame.cols, samplesPerFrame, audioSamplesTolerance);
  224. }
  225. }
  226. }
  227. ASSERT_FALSE(cap.grab());
  228. ASSERT_FALSE(audioData.empty());
  229. std::cout << "Total audio samples=" << audioData[0].size() << std::endl;
  230. if (epsilon >= 0)
  231. checkAudio();
  232. }
  233. protected:
  234. const int videoType;
  235. const int height;
  236. const int width;
  237. const int numberOfFrames;
  238. const int fps;
  239. const double psnrThreshold;
  240. };
  241. class Media : public MediaTestFixture{};
  242. TEST_P(Media, audio)
  243. {
  244. if (!videoio_registry::hasBackend(cv::VideoCaptureAPIs(backend)))
  245. throw SkipTestException(cv::videoio_registry::getBackendName(backend) + " backend was not found");
  246. if (cvtest::skipUnstableTests && backend == CAP_GSTREAMER)
  247. throw SkipTestException("Unstable GStreamer test");
  248. doTest();
  249. }
  250. const paramCombination mediaParams[] =
  251. {
  252. paramCombination("test_audio.mp4", 1, 0.15, CV_8UC3, 240, 320, 90, 132299, 30, 30., cv::CAP_GSTREAMER)
  253. #ifdef _WIN32
  254. , paramCombination("test_audio.mp4", 1, 0.15, CV_8UC3, 240, 320, 90, 131819, 30, 30., cv::CAP_MSMF)
  255. #if 0
  256. // https://filesamples.com/samples/video/mp4/sample_960x400_ocean_with_audio.mp4
  257. , paramCombination("sample_960x400_ocean_with_audio.mp4", 2, -1/*eplsilon*/, CV_8UC3, 400, 960, 1116, 2056588, 30, 30., cv::CAP_MSMF)
  258. #endif
  259. #endif // _WIN32
  260. };
  261. inline static std::string Media_name_printer(const testing::TestParamInfo<Media::ParamType>& info)
  262. {
  263. std::ostringstream out;
  264. out << getExtensionSafe(get<0>(info.param)) << "_"
  265. << get<1>(info.param) << "CN" << "_"
  266. << getBackendNameSafe(get<10>(info.param));
  267. return out.str();
  268. }
  269. INSTANTIATE_TEST_CASE_P(/**/, Media, testing::ValuesIn(mediaParams), Media_name_printer);
  270. TEST(AudioOpenCheck, bad_arg_invalid_audio_stream)
  271. {
  272. if (!videoio_registry::hasBackend(cv::VideoCaptureAPIs(cv::CAP_MSMF)))
  273. throw SkipTestException("CAP_MSMF backend was not found");
  274. std::string fileName = "audio/test_audio.wav";
  275. std::vector<int> params {
  276. CAP_PROP_AUDIO_STREAM, 1,
  277. CAP_PROP_VIDEO_STREAM, -1, // disabled
  278. CAP_PROP_AUDIO_DATA_DEPTH, CV_16S
  279. };
  280. VideoCapture cap;
  281. cap.open(findDataFile(fileName), cv::CAP_MSMF, params);
  282. ASSERT_FALSE(cap.isOpened());
  283. }
  284. TEST(AudioOpenCheck, bad_arg_invalid_audio_stream_video)
  285. {
  286. if (!videoio_registry::hasBackend(cv::VideoCaptureAPIs(cv::CAP_MSMF)))
  287. throw SkipTestException("CAP_MSMF backend was not found");
  288. std::string fileName = "audio/test_audio.mp4";
  289. std::vector<int> params {
  290. CAP_PROP_AUDIO_STREAM, 1,
  291. CAP_PROP_VIDEO_STREAM, 0,
  292. CAP_PROP_AUDIO_DATA_DEPTH, CV_16S
  293. };
  294. VideoCapture cap;
  295. cap.open(findDataFile(fileName), cv::CAP_MSMF, params);
  296. ASSERT_FALSE(cap.isOpened());
  297. }
  298. TEST(AudioOpenCheck, MSMF_bad_arg_invalid_audio_sample_per_second)
  299. {
  300. if (!videoio_registry::hasBackend(cv::VideoCaptureAPIs(cv::CAP_MSMF)))
  301. throw SkipTestException("CAP_MSMF backend was not found");
  302. std::string fileName = "audio/test_audio.mp4";
  303. std::vector<int> params {
  304. CAP_PROP_AUDIO_STREAM, 0,
  305. CAP_PROP_VIDEO_STREAM, -1, // disabled
  306. CAP_PROP_AUDIO_SAMPLES_PER_SECOND, (int)1e9
  307. };
  308. VideoCapture cap;
  309. cap.open(findDataFile(fileName), cv::CAP_MSMF, params);
  310. ASSERT_FALSE(cap.isOpened());
  311. }
  312. TEST(AudioOpenCheck, bad_arg_invalid_audio_sample_per_second)
  313. {
  314. std::string fileName = "audio/test_audio.mp4";
  315. std::vector<int> params {
  316. CAP_PROP_AUDIO_STREAM, 0,
  317. CAP_PROP_VIDEO_STREAM, -1, // disabled
  318. CAP_PROP_AUDIO_SAMPLES_PER_SECOND, -1000
  319. };
  320. VideoCapture cap;
  321. cap.open(findDataFile(fileName), cv::CAP_ANY, params);
  322. ASSERT_FALSE(cap.isOpened());
  323. }
  324. }} //namespace