test_model.cpp 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838
  1. // This file is part of OpenCV project.
  2. // It is subject to the license terms in the LICENSE file found in the top-level directory
  3. // of this distribution and at http://opencv.org/license.html.
  4. #include "test_precomp.hpp"
  5. #include <opencv2/dnn/shape_utils.hpp>
  6. #include "npy_blob.hpp"
  7. namespace opencv_test { namespace {
  8. template<typename TString>
  9. static std::string _tf(TString filename, bool required = true)
  10. {
  11. String rootFolder = "dnn/";
  12. return findDataFile(rootFolder + filename, required);
  13. }
  14. class Test_Model : public DNNTestLayer
  15. {
  16. public:
  17. void testDetectModel(const std::string& weights, const std::string& cfg,
  18. const std::string& imgPath, const std::vector<int>& refClassIds,
  19. const std::vector<float>& refConfidences,
  20. const std::vector<Rect2d>& refBoxes,
  21. double scoreDiff, double iouDiff,
  22. double confThreshold = 0.24, double nmsThreshold = 0.0,
  23. const Size& size = {-1, -1}, Scalar mean = Scalar(),
  24. double scale = 1.0, bool swapRB = false, bool crop = false,
  25. bool nmsAcrossClasses = false)
  26. {
  27. checkBackend();
  28. Mat frame = imread(imgPath);
  29. DetectionModel model(weights, cfg);
  30. model.setInputSize(size).setInputMean(mean).setInputScale(scale)
  31. .setInputSwapRB(swapRB).setInputCrop(crop);
  32. model.setPreferableBackend(backend);
  33. model.setPreferableTarget(target);
  34. model.setNmsAcrossClasses(nmsAcrossClasses);
  35. if (target == DNN_TARGET_CPU_FP16)
  36. model.enableWinograd(false);
  37. std::vector<int> classIds;
  38. std::vector<float> confidences;
  39. std::vector<Rect> boxes;
  40. model.detect(frame, classIds, confidences, boxes, confThreshold, nmsThreshold);
  41. std::vector<Rect2d> boxesDouble(boxes.size());
  42. for (int i = 0; i < boxes.size(); i++) {
  43. boxesDouble[i] = boxes[i];
  44. }
  45. normAssertDetections(refClassIds, refConfidences, refBoxes, classIds,
  46. confidences, boxesDouble, "",
  47. confThreshold, scoreDiff, iouDiff);
  48. }
  49. void testClassifyModel(const std::string& weights, const std::string& cfg,
  50. const std::string& imgPath, std::pair<int, float> ref, float norm,
  51. const Size& size = {-1, -1}, Scalar mean = Scalar(),
  52. double scale = 1.0, bool swapRB = false, bool crop = false)
  53. {
  54. checkBackend();
  55. Mat frame = imread(imgPath);
  56. ClassificationModel model(weights, cfg);
  57. model.setInputSize(size).setInputMean(mean).setInputScale(scale)
  58. .setInputSwapRB(swapRB).setInputCrop(crop);
  59. std::pair<int, float> prediction = model.classify(frame);
  60. EXPECT_EQ(prediction.first, ref.first);
  61. ASSERT_NEAR(prediction.second, ref.second, norm);
  62. }
  63. void testKeypointsModel(const std::string& weights, const std::string& cfg,
  64. const Mat& frame, const Mat& exp, float norm,
  65. const Size& size = {-1, -1}, Scalar mean = Scalar(),
  66. double scale = 1.0, bool swapRB = false, bool crop = false)
  67. {
  68. checkBackend();
  69. std::vector<Point2f> points;
  70. KeypointsModel model(weights, cfg);
  71. model.setInputSize(size).setInputMean(mean).setInputScale(scale)
  72. .setInputSwapRB(swapRB).setInputCrop(crop);
  73. model.setPreferableBackend(backend);
  74. model.setPreferableTarget(target);
  75. points = model.estimate(frame, 0.5);
  76. Mat out = Mat(points).reshape(1);
  77. normAssert(exp, out, "", norm, norm);
  78. }
  79. void testSegmentationModel(const std::string& weights_file, const std::string& config_file,
  80. const std::string& inImgPath, const std::string& outImgPath,
  81. float norm, const Size& size = {-1, -1}, Scalar mean = Scalar(),
  82. double scale = 1.0, bool swapRB = false, bool crop = false, const std::string outname = "")
  83. {
  84. checkBackend();
  85. Mat frame = imread(inImgPath);
  86. Mat mask;
  87. Mat exp = imread(outImgPath, 0);
  88. SegmentationModel model(weights_file, config_file);
  89. model.setInputSize(size).setInputMean(mean).setInputScale(scale)
  90. .setInputSwapRB(swapRB).setInputCrop(crop);
  91. model.setPreferableBackend(backend);
  92. model.setPreferableTarget(target);
  93. if(!outname.empty())
  94. model.setOutputNames({outname});
  95. model.segment(frame, mask);
  96. normAssert(mask, exp, "", norm, norm);
  97. }
  98. void testTextRecognitionModel(const std::string& weights, const std::string& cfg,
  99. const std::string& imgPath, const std::string& seq,
  100. const std::string& decodeType, const std::vector<std::string>& vocabulary,
  101. const Size& size = {-1, -1}, Scalar mean = Scalar(),
  102. double scale = 1.0, bool swapRB = false, bool crop = false)
  103. {
  104. checkBackend();
  105. Mat frame = imread(imgPath, IMREAD_GRAYSCALE);
  106. TextRecognitionModel model(weights, cfg);
  107. model.setDecodeType(decodeType)
  108. .setVocabulary(vocabulary)
  109. .setInputSize(size).setInputMean(mean).setInputScale(scale)
  110. .setInputSwapRB(swapRB).setInputCrop(crop);
  111. model.setPreferableBackend(backend);
  112. model.setPreferableTarget(target);
  113. std::string result = model.recognize(frame);
  114. EXPECT_EQ(result, seq) << "Full frame: " << imgPath;
  115. std::vector<Rect> rois;
  116. rois.push_back(Rect(0, 0, frame.cols, frame.rows));
  117. rois.push_back(Rect(0, 0, frame.cols, frame.rows)); // twice
  118. std::vector<std::string> results;
  119. model.recognize(frame, rois, results);
  120. EXPECT_EQ((size_t)2u, results.size()) << "ROI: " << imgPath;
  121. EXPECT_EQ(results[0], seq) << "ROI[0]: " << imgPath;
  122. EXPECT_EQ(results[1], seq) << "ROI[1]: " << imgPath;
  123. }
  124. void testTextDetectionModelByDB(const std::string& weights, const std::string& cfg,
  125. const std::string& imgPath, const std::vector<std::vector<Point>>& gt,
  126. float binThresh, float polyThresh,
  127. uint maxCandidates, double unclipRatio,
  128. const Size& size = {-1, -1}, Scalar mean = Scalar(), Scalar scale = Scalar::all(1.0),
  129. double boxes_iou_diff = 0.05, bool swapRB = false, bool crop = false)
  130. {
  131. checkBackend();
  132. Mat frame = imread(imgPath);
  133. TextDetectionModel_DB model(weights, cfg);
  134. model.setBinaryThreshold(binThresh)
  135. .setPolygonThreshold(polyThresh)
  136. .setUnclipRatio(unclipRatio)
  137. .setMaxCandidates(maxCandidates)
  138. .setInputSize(size).setInputMean(mean).setInputScale(scale)
  139. .setInputSwapRB(swapRB).setInputCrop(crop);
  140. model.setPreferableBackend(backend);
  141. model.setPreferableTarget(target);
  142. // 1. Check common TextDetectionModel API through RotatedRect
  143. std::vector<cv::RotatedRect> results;
  144. model.detectTextRectangles(frame, results);
  145. EXPECT_GT(results.size(), (size_t)0);
  146. std::vector< std::vector<Point> > contours;
  147. for (size_t i = 0; i < results.size(); i++)
  148. {
  149. const RotatedRect& box = results[i];
  150. Mat contour;
  151. boxPoints(box, contour);
  152. std::vector<Point> contour2i(4);
  153. for (int i = 0; i < 4; i++)
  154. {
  155. contour2i[i].x = cvRound(contour.at<float>(i, 0));
  156. contour2i[i].y = cvRound(contour.at<float>(i, 1));
  157. }
  158. contours.push_back(contour2i);
  159. }
  160. #if 0 // test debug
  161. Mat result = frame.clone();
  162. drawContours(result, contours, -1, Scalar(0, 0, 255), 1);
  163. imshow("result", result); // imwrite("result.png", result);
  164. waitKey(0);
  165. #endif
  166. normAssertTextDetections(gt, contours, "", boxes_iou_diff);
  167. // 2. Check quadrangle-based API
  168. // std::vector< std::vector<Point> > contours;
  169. model.detect(frame, contours);
  170. #if 0 // test debug
  171. Mat result = frame.clone();
  172. drawContours(result, contours, -1, Scalar(0, 0, 255), 1);
  173. imshow("result_contours", result); // imwrite("result_contours.png", result);
  174. waitKey(0);
  175. #endif
  176. normAssertTextDetections(gt, contours, "", boxes_iou_diff);
  177. }
  178. void testTextDetectionModelByEAST(
  179. const std::string& weights, const std::string& cfg,
  180. const std::string& imgPath, const std::vector<RotatedRect>& gt,
  181. float confThresh, float nmsThresh,
  182. const Size& size = {-1, -1}, Scalar mean = Scalar(),
  183. double scale = 1.0, bool swapRB = false, bool crop = false,
  184. double eps_center = 5/*pixels*/, double eps_size = 5/*pixels*/, double eps_angle = 1
  185. )
  186. {
  187. checkBackend();
  188. Mat frame = imread(imgPath);
  189. TextDetectionModel_EAST model(weights, cfg);
  190. model.setConfidenceThreshold(confThresh)
  191. .setNMSThreshold(nmsThresh)
  192. .setInputSize(size).setInputMean(mean).setInputScale(scale)
  193. .setInputSwapRB(swapRB).setInputCrop(crop);
  194. model.setPreferableBackend(backend);
  195. model.setPreferableTarget(target);
  196. std::vector<cv::RotatedRect> results;
  197. model.detectTextRectangles(frame, results);
  198. EXPECT_EQ(results.size(), (size_t)1);
  199. for (size_t i = 0; i < results.size(); i++)
  200. {
  201. const RotatedRect& box = results[i];
  202. #if 0 // test debug
  203. Mat contour;
  204. boxPoints(box, contour);
  205. std::vector<Point> contour2i(4);
  206. for (int i = 0; i < 4; i++)
  207. {
  208. contour2i[i].x = cvRound(contour.at<float>(i, 0));
  209. contour2i[i].y = cvRound(contour.at<float>(i, 1));
  210. }
  211. std::vector< std::vector<Point> > contours;
  212. contours.push_back(contour2i);
  213. Mat result = frame.clone();
  214. drawContours(result, contours, -1, Scalar(0, 0, 255), 1);
  215. imshow("result", result); //imwrite("result.png", result);
  216. waitKey(0);
  217. #endif
  218. const RotatedRect& gtBox = gt[i];
  219. EXPECT_NEAR(box.center.x, gtBox.center.x, eps_center);
  220. EXPECT_NEAR(box.center.y, gtBox.center.y, eps_center);
  221. EXPECT_NEAR(box.size.width, gtBox.size.width, eps_size);
  222. EXPECT_NEAR(box.size.height, gtBox.size.height, eps_size);
  223. EXPECT_NEAR(box.angle, gtBox.angle, eps_angle);
  224. }
  225. }
  226. };
  227. TEST_P(Test_Model, Classify)
  228. {
  229. std::pair<int, float> ref(652, 0.641789);
  230. std::string img_path = _tf("grace_hopper_227.png");
  231. std::string config_file = _tf("bvlc_alexnet.prototxt");
  232. std::string weights_file = _tf("bvlc_alexnet.caffemodel", false);
  233. Size size{227, 227};
  234. float norm = 1e-4;
  235. testClassifyModel(weights_file, config_file, img_path, ref, norm, size);
  236. }
  237. TEST_P(Test_Model, DetectRegion)
  238. {
  239. applyTestTag(
  240. CV_TEST_TAG_MEMORY_2GB,
  241. CV_TEST_TAG_LONG,
  242. CV_TEST_TAG_DEBUG_VERYLONG
  243. );
  244. #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000)
  245. // accuracy
  246. if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16)
  247. applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
  248. #elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000)
  249. // accuracy
  250. if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16)
  251. applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
  252. #elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2020040000) // nGraph compilation failure
  253. if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL)
  254. applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
  255. if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16)
  256. applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
  257. #elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2019010000)
  258. // FIXIT DNN_BACKEND_INFERENCE_ENGINE is misused
  259. if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL_FP16)
  260. applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16);
  261. #endif
  262. #if defined(INF_ENGINE_RELEASE)
  263. if (target == DNN_TARGET_MYRIAD
  264. && getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X)
  265. applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X);
  266. #endif
  267. std::vector<int> refClassIds = {6, 1, 11};
  268. std::vector<float> refConfidences = {0.750469f, 0.780879f, 0.901615f};
  269. std::vector<Rect2d> refBoxes = {Rect2d(240, 53, 135, 72),
  270. Rect2d(112, 109, 192, 200),
  271. Rect2d(58, 141, 117, 249)};
  272. std::string img_path = _tf("dog416.png");
  273. std::string weights_file = _tf("yolo-voc.weights", false);
  274. std::string config_file = _tf("yolo-voc.cfg");
  275. double scale = 1.0 / 255.0;
  276. Size size{416, 416};
  277. bool swapRB = true;
  278. double confThreshold = 0.24;
  279. double nmsThreshold = (target == DNN_TARGET_MYRIAD) ? 0.397 : 0.4;
  280. double scoreDiff = 8e-5, iouDiff = 1e-5;
  281. if (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD || target == DNN_TARGET_CUDA_FP16 || target == DNN_TARGET_CPU_FP16)
  282. {
  283. scoreDiff = 1e-2;
  284. iouDiff = 1.6e-2;
  285. }
  286. testDetectModel(weights_file, config_file, img_path, refClassIds, refConfidences,
  287. refBoxes, scoreDiff, iouDiff, confThreshold, nmsThreshold, size,
  288. Scalar(), scale, swapRB);
  289. }
  290. TEST_P(Test_Model, DetectRegionWithNmsAcrossClasses)
  291. {
  292. applyTestTag(
  293. CV_TEST_TAG_MEMORY_2GB,
  294. CV_TEST_TAG_LONG,
  295. CV_TEST_TAG_DEBUG_VERYLONG
  296. );
  297. #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000)
  298. // accuracy
  299. if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16)
  300. applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
  301. #elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000)
  302. // accuracy
  303. if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16)
  304. applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
  305. #elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2020040000) // nGraph compilation failure
  306. if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL)
  307. applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
  308. if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16)
  309. applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
  310. #elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2019010000)
  311. if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL_FP16)
  312. applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16);
  313. #endif
  314. #if defined(INF_ENGINE_RELEASE)
  315. if (target == DNN_TARGET_MYRIAD
  316. && getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X)
  317. applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X);
  318. #endif
  319. std::vector<int> refClassIds = { 6, 11 };
  320. std::vector<float> refConfidences = { 0.750469f, 0.901615f };
  321. std::vector<Rect2d> refBoxes = { Rect2d(240, 53, 135, 72),
  322. Rect2d(58, 141, 117, 249) };
  323. std::string img_path = _tf("dog416.png");
  324. std::string weights_file = _tf("yolo-voc.weights", false);
  325. std::string config_file = _tf("yolo-voc.cfg");
  326. double scale = 1.0 / 255.0;
  327. Size size{ 416, 416 };
  328. bool swapRB = true;
  329. bool crop = false;
  330. bool nmsAcrossClasses = true;
  331. double confThreshold = 0.24;
  332. double nmsThreshold = (target == DNN_TARGET_MYRIAD) ? 0.15: 0.15;
  333. double scoreDiff = 8e-5, iouDiff = 1e-5;
  334. if (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD || target == DNN_TARGET_CUDA_FP16 || target == DNN_TARGET_CPU_FP16)
  335. {
  336. scoreDiff = 1e-2;
  337. iouDiff = 1.6e-2;
  338. }
  339. testDetectModel(weights_file, config_file, img_path, refClassIds, refConfidences,
  340. refBoxes, scoreDiff, iouDiff, confThreshold, nmsThreshold, size,
  341. Scalar(), scale, swapRB, crop,
  342. nmsAcrossClasses);
  343. }
  344. TEST_P(Test_Model, DetectionOutput)
  345. {
  346. applyTestTag(CV_TEST_TAG_DEBUG_VERYLONG);
  347. #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000)
  348. // Check 'backward_compatible_check || in_out_elements_equal' failed at core/src/op/reshape.cpp:427:
  349. // While validating node 'v1::Reshape bbox_pred_reshape (ave_bbox_pred_rois[0]:f32{1,8,1,1}, Constant_388[0]:i64{4}) -> (f32{?,?,?,?})' with friendly_name 'bbox_pred_reshape':
  350. // Requested output shape {1,300,8,1} is incompatible with input shape {1, 8, 1, 1}
  351. if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD)
  352. applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
  353. #elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000)
  354. // Exception: Function contains several inputs and outputs with one friendly name! (HETERO bug?)
  355. if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target != DNN_TARGET_CPU)
  356. applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
  357. if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD)
  358. applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
  359. #elif defined(INF_ENGINE_RELEASE)
  360. // FIXIT DNN_BACKEND_INFERENCE_ENGINE is misused
  361. if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL_FP16)
  362. applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16);
  363. if (target == DNN_TARGET_MYRIAD)
  364. applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD);
  365. #endif
  366. std::vector<int> refClassIds = {7, 12};
  367. std::vector<float> refConfidences = {0.991359f, 0.94786f};
  368. std::vector<Rect2d> refBoxes = {Rect2d(491, 81, 212, 98),
  369. Rect2d(132, 223, 207, 344)};
  370. std::string img_path = _tf("dog416.png");
  371. std::string weights_file = _tf("resnet50_rfcn_final.caffemodel", false);
  372. std::string config_file = _tf("rfcn_pascal_voc_resnet50.prototxt");
  373. Scalar mean = Scalar(102.9801, 115.9465, 122.7717);
  374. Size size{800, 600};
  375. double scoreDiff = default_l1, iouDiff = 1e-5;
  376. float confThreshold = 0.8;
  377. double nmsThreshold = 0.0;
  378. if (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_CUDA_FP16 || target == DNN_TARGET_CPU_FP16)
  379. {
  380. if (backend == DNN_BACKEND_OPENCV)
  381. scoreDiff = 4e-3;
  382. else
  383. scoreDiff = 2e-2;
  384. iouDiff = 1.8e-1;
  385. }
  386. #if defined(INF_ENGINE_RELEASE)
  387. if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
  388. {
  389. scoreDiff = 0.05;
  390. iouDiff = 0.08;
  391. }
  392. #endif
  393. testDetectModel(weights_file, config_file, img_path, refClassIds, refConfidences, refBoxes,
  394. scoreDiff, iouDiff, confThreshold, nmsThreshold, size, mean);
  395. }
  396. TEST_P(Test_Model, DetectionMobilenetSSD)
  397. {
  398. Mat ref = blobFromNPY(_tf("mobilenet_ssd_caffe_out.npy"));
  399. ref = ref.reshape(1, ref.size[2]);
  400. std::string img_path = _tf("street.png");
  401. Mat frame = imread(img_path);
  402. int frameWidth = frame.cols;
  403. int frameHeight = frame.rows;
  404. std::vector<int> refClassIds;
  405. std::vector<float> refConfidences;
  406. std::vector<Rect2d> refBoxes;
  407. for (int i = 0; i < ref.rows; i++)
  408. {
  409. refClassIds.emplace_back(ref.at<float>(i, 1));
  410. refConfidences.emplace_back(ref.at<float>(i, 2));
  411. int left = ref.at<float>(i, 3) * frameWidth;
  412. int top = ref.at<float>(i, 4) * frameHeight;
  413. int right = ref.at<float>(i, 5) * frameWidth;
  414. int bottom = ref.at<float>(i, 6) * frameHeight;
  415. int width = right - left + 1;
  416. int height = bottom - top + 1;
  417. refBoxes.emplace_back(left, top, width, height);
  418. }
  419. std::string weights_file = _tf("MobileNetSSD_deploy_19e3ec3.caffemodel", false);
  420. std::string config_file = _tf("MobileNetSSD_deploy_19e3ec3.prototxt");
  421. Scalar mean = Scalar(127.5, 127.5, 127.5);
  422. double scale = 1.0 / 127.5;
  423. Size size{300, 300};
  424. double scoreDiff = 1e-5, iouDiff = 1e-5;
  425. if (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_CPU_FP16)
  426. {
  427. scoreDiff = 1.7e-2;
  428. iouDiff = 6.91e-2;
  429. }
  430. else if (target == DNN_TARGET_MYRIAD)
  431. {
  432. scoreDiff = 0.017;
  433. if (getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X)
  434. iouDiff = 0.1;
  435. }
  436. else if (target == DNN_TARGET_CUDA_FP16)
  437. {
  438. scoreDiff = 0.0028;
  439. iouDiff = 1e-2;
  440. }
  441. float confThreshold = FLT_MIN;
  442. double nmsThreshold = 0.0;
  443. testDetectModel(weights_file, config_file, img_path, refClassIds, refConfidences, refBoxes,
  444. scoreDiff, iouDiff, confThreshold, nmsThreshold, size, mean, scale);
  445. }
  446. TEST_P(Test_Model, Keypoints_pose)
  447. {
  448. if (target == DNN_TARGET_OPENCL_FP16)
  449. applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
  450. if (target == DNN_TARGET_CPU_FP16)
  451. applyTestTag(CV_TEST_TAG_DNN_SKIP_CPU_FP16);
  452. #ifdef HAVE_INF_ENGINE
  453. if (target == DNN_TARGET_MYRIAD)
  454. applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
  455. #endif
  456. Mat inp = imread(_tf("pose.png"));
  457. std::string weights = _tf("onnx/models/lightweight_pose_estimation_201912.onnx", false);
  458. float kpdata[] = {
  459. 237.65625f, 78.25f, 237.65625f, 136.9375f,
  460. 190.125f, 136.9375f, 142.59375f, 195.625f, 79.21875f, 176.0625f, 285.1875f, 117.375f,
  461. 348.5625f, 195.625f, 396.09375f, 176.0625f, 205.96875f, 313.0f, 205.96875f, 430.375f,
  462. 205.96875f, 528.1875f, 269.34375f, 293.4375f, 253.5f, 430.375f, 237.65625f, 528.1875f,
  463. 221.8125f, 58.6875f, 253.5f, 58.6875f, 205.96875f, 78.25f, 253.5f, 58.6875f
  464. };
  465. Mat exp(18, 2, CV_32FC1, kpdata);
  466. Size size{256, 256};
  467. float norm = 1e-4;
  468. double scale = 1.0/255;
  469. Scalar mean = Scalar(128, 128, 128);
  470. bool swapRB = false;
  471. // Ref. Range: [58.6875, 508.625]
  472. if (target == DNN_TARGET_CUDA_FP16)
  473. norm = 20; // l1 = 1.5, lInf = 20
  474. testKeypointsModel(weights, "", inp, exp, norm, size, mean, scale, swapRB);
  475. }
  476. TEST_P(Test_Model, Keypoints_face)
  477. {
  478. #if defined(INF_ENGINE_RELEASE)
  479. if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
  480. applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
  481. #endif
  482. Mat inp = imread(_tf("gray_face.png"), 0);
  483. std::string weights = _tf("onnx/models/facial_keypoints.onnx", false);
  484. Mat exp = blobFromNPY(_tf("facial_keypoints_exp.npy"));
  485. Size size{224, 224};
  486. double scale = 1.0/255;
  487. Scalar mean = Scalar();
  488. bool swapRB = false;
  489. // Ref. Range: [-1.1784188, 1.7758257]
  490. float norm = 1e-4;
  491. if (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_CPU_FP16)
  492. norm = 5e-3;
  493. if (target == DNN_TARGET_MYRIAD)
  494. {
  495. // Myriad2: l1 = 0.0004, lInf = 0.002
  496. // MyriadX: l1 = 0.003, lInf = 0.009
  497. norm = 0.009;
  498. }
  499. if (target == DNN_TARGET_CUDA_FP16)
  500. norm = 0.004; // l1 = 0.0006, lInf = 0.004
  501. testKeypointsModel(weights, "", inp, exp, norm, size, mean, scale, swapRB);
  502. }
  503. TEST_P(Test_Model, Detection_normalized)
  504. {
  505. std::string img_path = _tf("grace_hopper_227.png");
  506. std::vector<int> refClassIds = {15};
  507. std::vector<float> refConfidences = {0.999222f};
  508. std::vector<Rect2d> refBoxes = {Rect2d(0, 4, 227, 222)};
  509. std::string weights_file = _tf("MobileNetSSD_deploy_19e3ec3.caffemodel", false);
  510. std::string config_file = _tf("MobileNetSSD_deploy_19e3ec3.prototxt");
  511. Scalar mean = Scalar(127.5, 127.5, 127.5);
  512. double scale = 1.0 / 127.5;
  513. Size size{300, 300};
  514. double scoreDiff = 1e-5, iouDiff = 1e-5;
  515. float confThreshold = FLT_MIN;
  516. double nmsThreshold = 0.0;
  517. if (target == DNN_TARGET_CUDA)
  518. {
  519. scoreDiff = 3e-4;
  520. iouDiff = 0.018;
  521. }
  522. if (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD || target == DNN_TARGET_CUDA_FP16 || target == DNN_TARGET_CPU_FP16)
  523. {
  524. scoreDiff = 5e-3;
  525. iouDiff = 0.09;
  526. }
  527. #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2020040000)
  528. if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD)
  529. {
  530. scoreDiff = 0.02;
  531. iouDiff = 0.1f;
  532. }
  533. #endif
  534. testDetectModel(weights_file, config_file, img_path, refClassIds, refConfidences, refBoxes,
  535. scoreDiff, iouDiff, confThreshold, nmsThreshold, size, mean, scale);
  536. }
  537. TEST_P(Test_Model, Segmentation)
  538. {
  539. applyTestTag(
  540. CV_TEST_TAG_MEMORY_2GB,
  541. CV_TEST_TAG_DEBUG_VERYLONG
  542. );
  543. float norm = 0;
  544. #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000)
  545. // Failed to allocate graph: NC_ERROR
  546. if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD)
  547. applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
  548. // accuracy
  549. if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16))
  550. {
  551. norm = 25.0f; // depends on OS/OpenCL version
  552. }
  553. #elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000)
  554. // Failed to allocate graph: NC_ERROR
  555. if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD)
  556. applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
  557. // cnn_network_ngraph_impl.cpp:104 Function contains several inputs and outputs with one friendly name: 'upscore2'!
  558. if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL)
  559. applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
  560. // cnn_network_ngraph_impl.cpp:104 Function contains several inputs and outputs with one friendly name: 'upscore2'!
  561. if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16)
  562. applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
  563. #elif defined(INF_ENGINE_RELEASE)
  564. // Failed to allocate graph: NC_ERROR
  565. if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD)
  566. applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
  567. #endif
  568. if ((backend == DNN_BACKEND_OPENCV && (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_CPU_FP16))
  569. || (backend == DNN_BACKEND_CUDA && target == DNN_TARGET_CUDA_FP16))
  570. {
  571. norm = 7.0f; // l1 = 0.01 lInf = 7
  572. }
  573. std::string inp = _tf("dog416.png");
  574. std::string weights_file = _tf("onnx/models/fcn-resnet50-12.onnx", false);
  575. std::string exp = _tf("segmentation_exp.png");
  576. Size size{128, 128};
  577. double scale = 0.019;
  578. Scalar mean = Scalar(0.485*255, 0.456*255, 0.406*255);
  579. bool swapRB = true;
  580. testSegmentationModel(weights_file, "", inp, exp, norm, size, mean, scale, swapRB, false, "out");
  581. }
  582. TEST_P(Test_Model, TextRecognition)
  583. {
  584. #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000)
  585. // FIXIT: dnn/src/ie_ngraph.cpp:494: error: (-215:Assertion failed) !inps.empty() in function 'createNet'
  586. if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU)
  587. applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
  588. // Node Transpose_79 was not assigned on any pointed device
  589. if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16))
  590. applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16,
  591. CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION
  592. );
  593. #elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000)
  594. // IE Exception: Ngraph operation Reshape with name 71 has dynamic output shape on 0 port, but CPU plug-in supports only static shape
  595. if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16))
  596. applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16,
  597. CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION
  598. );
  599. #endif
  600. std::string imgPath = _tf("text_rec_test.png");
  601. std::string weightPath = _tf("onnx/models/crnn.onnx", false);
  602. std::string seq = "welcome";
  603. Size size{100, 32};
  604. double scale = 1.0 / 127.5;
  605. Scalar mean = Scalar(127.5);
  606. std::string decodeType = "CTC-greedy";
  607. std::vector<std::string> vocabulary = {"0","1","2","3","4","5","6","7","8","9",
  608. "a","b","c","d","e","f","g","h","i","j","k","l","m","n","o","p","q","r","s","t","u","v","w","x","y","z"};
  609. testTextRecognitionModel(weightPath, "", imgPath, seq, decodeType, vocabulary, size, mean, scale);
  610. }
  611. TEST_P(Test_Model, TextRecognitionWithCTCPrefixBeamSearch)
  612. {
  613. #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000)
  614. // Node Transpose_79 was not assigned on any pointed device
  615. if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16))
  616. applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16,
  617. CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION
  618. );
  619. #elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000)
  620. // IE Exception: Ngraph operation Reshape with name 71 has dynamic output shape on 0 port, but CPU plug-in supports only static shape
  621. if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16))
  622. applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16,
  623. CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION
  624. );
  625. #endif
  626. std::string imgPath = _tf("text_rec_test.png");
  627. std::string weightPath = _tf("onnx/models/crnn.onnx", false);
  628. std::string seq = "welcome";
  629. Size size{100, 32};
  630. double scale = 1.0 / 127.5;
  631. Scalar mean = Scalar(127.5);
  632. std::string decodeType = "CTC-prefix-beam-search";
  633. std::vector<std::string> vocabulary = {"0","1","2","3","4","5","6","7","8","9",
  634. "a","b","c","d","e","f","g","h","i","j","k","l","m","n","o","p","q","r","s","t","u","v","w","x","y","z"};
  635. testTextRecognitionModel(weightPath, "", imgPath, seq, decodeType, vocabulary, size, mean, scale);
  636. }
  637. TEST_P(Test_Model, TextDetectionByDB)
  638. {
  639. applyTestTag(CV_TEST_TAG_DEBUG_VERYLONG);
  640. if (target == DNN_TARGET_OPENCL_FP16)
  641. applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
  642. if (target == DNN_TARGET_CPU_FP16)
  643. applyTestTag(CV_TEST_TAG_DNN_SKIP_CPU_FP16);
  644. std::string imgPath = _tf("text_det_test1.png");
  645. std::string weightPathDB = _tf("onnx/models/DB_TD500_resnet50.onnx", false);
  646. std::string weightPathPPDB = _tf("onnx/models/PP_OCRv3_DB_text_det.onnx", false);
  647. // GroundTruth
  648. std::vector<std::vector<Point>> gt = {
  649. { Point(142, 193), Point(136, 164), Point(213, 150), Point(219, 178) },
  650. { Point(136, 165), Point(122, 114), Point(319, 71), Point(330, 122) }
  651. };
  652. Size size{736, 736};
  653. Scalar scaleDB = Scalar::all(1.0 / 255.0);
  654. Scalar meanDB = Scalar(122.67891434, 116.66876762, 104.00698793);
  655. // new mean and stddev
  656. Scalar meanPPDB = Scalar(123.675, 116.28, 103.53);
  657. Scalar stddevPPDB = Scalar(0.229, 0.224, 0.225);
  658. Scalar scalePPDB = scaleDB / stddevPPDB;
  659. float binThresh = 0.3;
  660. float polyThresh = 0.5;
  661. uint maxCandidates = 200;
  662. double unclipRatio = 2.0;
  663. {
  664. SCOPED_TRACE("Original DB");
  665. testTextDetectionModelByDB(weightPathDB, "", imgPath, gt, binThresh, polyThresh, maxCandidates, unclipRatio, size, meanDB, scaleDB, 0.05f);
  666. }
  667. {
  668. SCOPED_TRACE("PP-OCRDBv3");
  669. testTextDetectionModelByDB(weightPathPPDB, "", imgPath, gt, binThresh, polyThresh, maxCandidates, unclipRatio, size, meanPPDB, scalePPDB, 0.21f);
  670. }
  671. }
  672. TEST_P(Test_Model, TextDetectionByEAST)
  673. {
  674. applyTestTag(CV_TEST_TAG_DEBUG_VERYLONG);
  675. std::string imgPath = _tf("text_det_test2.jpg");
  676. std::string weightPath = _tf("frozen_east_text_detection.pb", false);
  677. // GroundTruth
  678. std::vector<RotatedRect> gt = {
  679. RotatedRect(Point2f(657.55f, 409.5f), Size2f(316.84f, 62.45f), -4.79)
  680. };
  681. // Model parameters
  682. Size size{320, 320};
  683. double scale = 1.0;
  684. Scalar mean = Scalar(123.68, 116.78, 103.94);
  685. bool swapRB = true;
  686. // Detection algorithm parameters
  687. float confThresh = 0.5;
  688. float nmsThresh = 0.4;
  689. double eps_center = 5/*pixels*/;
  690. double eps_size = 5/*pixels*/;
  691. double eps_angle = 1;
  692. if (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_CUDA_FP16 || target == DNN_TARGET_MYRIAD || target == DNN_TARGET_CPU_FP16)
  693. {
  694. eps_center = 10;
  695. eps_size = 25;
  696. eps_angle = 3;
  697. }
  698. testTextDetectionModelByEAST(weightPath, "", imgPath, gt, confThresh, nmsThresh, size, mean, scale, swapRB, false/*crop*/,
  699. eps_center, eps_size, eps_angle
  700. );
  701. }
  702. INSTANTIATE_TEST_CASE_P(/**/, Test_Model, dnnBackendsAndTargets());
  703. }} // namespace