CLprocessor.cpp 9.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309
  1. #ifdef OPENCL_FOUND
  2. #define __CL_ENABLE_EXCEPTIONS
  3. #define CL_USE_DEPRECATED_OPENCL_1_1_APIS /*let's give a chance for OpenCL 1.1 devices*/
  4. #include <CL/opencl.hpp>
  5. #endif
  6. #include <GLES2/gl2.h>
  7. #include <EGL/egl.h>
  8. #include <opencv2/core.hpp>
  9. #include <opencv2/imgproc.hpp>
  10. #include <opencv2/core/ocl.hpp>
  11. #include "common.hpp"
  12. #include "CLprocessor.hpp"
  13. #ifdef OPENCL_FOUND
  14. const char oclProgB2B[] = "// clBuffer to clBuffer";
  15. const char oclProgI2B[] = "// clImage to clBuffer";
  16. const char oclProgI2I[] = \
  17. "__constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST; \n" \
  18. "\n" \
  19. "__kernel void Laplacian( \n" \
  20. " __read_only image2d_t imgIn, \n" \
  21. " __write_only image2d_t imgOut \n" \
  22. " ) { \n" \
  23. " \n" \
  24. " const int2 pos = {get_global_id(0), get_global_id(1)}; \n" \
  25. " \n" \
  26. " float4 sum = (float4) 0.0f; \n" \
  27. " sum += read_imagef(imgIn, sampler, pos + (int2)(-1,0)); \n" \
  28. " sum += read_imagef(imgIn, sampler, pos + (int2)(+1,0)); \n" \
  29. " sum += read_imagef(imgIn, sampler, pos + (int2)(0,-1)); \n" \
  30. " sum += read_imagef(imgIn, sampler, pos + (int2)(0,+1)); \n" \
  31. " sum -= read_imagef(imgIn, sampler, pos) * 4; \n" \
  32. " \n" \
  33. " write_imagef(imgOut, pos, sum*10); \n" \
  34. "} \n";
  35. static void dumpCLinfo()
  36. {
  37. LOGD("*** OpenCL info ***");
  38. try
  39. {
  40. std::vector<cl::Platform> platforms;
  41. cl::Platform::get(&platforms);
  42. LOGD("OpenCL info: Found %d OpenCL platforms", platforms.size());
  43. for (int i = 0; i < platforms.size(); ++i)
  44. {
  45. std::string name = platforms[i].getInfo<CL_PLATFORM_NAME>();
  46. std::string version = platforms[i].getInfo<CL_PLATFORM_VERSION>();
  47. std::string profile = platforms[i].getInfo<CL_PLATFORM_PROFILE>();
  48. std::string extensions = platforms[i].getInfo<CL_PLATFORM_EXTENSIONS>();
  49. LOGD( "OpenCL info: Platform[%d] = %s, ver = %s, prof = %s, ext = %s",
  50. i, name.c_str(), version.c_str(), profile.c_str(), extensions.c_str() );
  51. }
  52. std::vector<cl::Device> devices;
  53. platforms[0].getDevices(CL_DEVICE_TYPE_ALL, &devices);
  54. for (int i = 0; i < devices.size(); ++i)
  55. {
  56. std::string name = devices[i].getInfo<CL_DEVICE_NAME>();
  57. std::string extensions = devices[i].getInfo<CL_DEVICE_EXTENSIONS>();
  58. cl_ulong type = devices[i].getInfo<CL_DEVICE_TYPE>();
  59. LOGD( "OpenCL info: Device[%d] = %s (%s), ext = %s",
  60. i, name.c_str(), (type==CL_DEVICE_TYPE_GPU ? "GPU" : "CPU"), extensions.c_str() );
  61. }
  62. }
  63. catch(const cl::Error& e)
  64. {
  65. LOGE( "OpenCL info: error while gathering OpenCL info: %s (%d)", e.what(), e.err() );
  66. }
  67. catch(const std::exception& e)
  68. {
  69. LOGE( "OpenCL info: error while gathering OpenCL info: %s", e.what() );
  70. }
  71. catch(...)
  72. {
  73. LOGE( "OpenCL info: unknown error while gathering OpenCL info" );
  74. }
  75. LOGD("*******************");
  76. }
  77. cl::Context theContext;
  78. cl::CommandQueue theQueue;
  79. cl::Program theProgB2B, theProgI2B, theProgI2I;
  80. bool haveOpenCL = false;
  81. //![init_opencl]
  82. int initCL()
  83. {
  84. dumpCLinfo();
  85. LOGE("initCL: start initCL");
  86. EGLDisplay mEglDisplay = eglGetCurrentDisplay();
  87. if (mEglDisplay == EGL_NO_DISPLAY)
  88. LOGE("initCL: eglGetCurrentDisplay() returned 'EGL_NO_DISPLAY', error = %x", eglGetError());
  89. EGLContext mEglContext = eglGetCurrentContext();
  90. if (mEglContext == EGL_NO_CONTEXT)
  91. LOGE("initCL: eglGetCurrentContext() returned 'EGL_NO_CONTEXT', error = %x", eglGetError());
  92. cl_context_properties props[] =
  93. { CL_GL_CONTEXT_KHR, (cl_context_properties) mEglContext,
  94. CL_EGL_DISPLAY_KHR, (cl_context_properties) mEglDisplay,
  95. CL_CONTEXT_PLATFORM, 0,
  96. 0 };
  97. try
  98. {
  99. haveOpenCL = false;
  100. cl::Platform p = cl::Platform::getDefault();
  101. std::string ext = p.getInfo<CL_PLATFORM_EXTENSIONS>();
  102. if(ext.find("cl_khr_gl_sharing") == std::string::npos)
  103. LOGE("Warning: CL-GL sharing isn't supported by PLATFORM");
  104. props[5] = (cl_context_properties) p();
  105. theContext = cl::Context(CL_DEVICE_TYPE_GPU, props);
  106. std::vector<cl::Device> devs = theContext.getInfo<CL_CONTEXT_DEVICES>();
  107. LOGD("Context returned %d devices, taking the 1st one", devs.size());
  108. ext = devs[0].getInfo<CL_DEVICE_EXTENSIONS>();
  109. if(ext.find("cl_khr_gl_sharing") == std::string::npos)
  110. LOGE("Warning: CL-GL sharing isn't supported by DEVICE");
  111. theQueue = cl::CommandQueue(theContext, devs[0]);
  112. cl::Program::Sources src(1, std::make_pair(oclProgI2I, sizeof(oclProgI2I)));
  113. theProgI2I = cl::Program(theContext, src);
  114. theProgI2I.build(devs);
  115. cv::ocl::attachContext(p.getInfo<CL_PLATFORM_NAME>(), p(), theContext(), devs[0]());
  116. if( cv::ocl::useOpenCL() )
  117. LOGD("OpenCV+OpenCL works OK!");
  118. else
  119. LOGE("Can't init OpenCV with OpenCL TAPI");
  120. haveOpenCL = true;
  121. }
  122. catch(const cl::Error& e)
  123. {
  124. LOGE("cl::Error: %s (%d)", e.what(), e.err());
  125. return 1;
  126. }
  127. catch(const std::exception& e)
  128. {
  129. LOGE("std::exception: %s", e.what());
  130. return 2;
  131. }
  132. catch(...)
  133. {
  134. LOGE( "OpenCL info: unknown error while initializing OpenCL stuff" );
  135. return 3;
  136. }
  137. LOGD("initCL completed");
  138. if (haveOpenCL)
  139. return 0;
  140. else
  141. return 4;
  142. }
  143. //![init_opencl]
  144. #define GL_TEXTURE_2D 0x0DE1
  145. void procOCL_I2I(int texIn, int texOut, int w, int h)
  146. {
  147. LOGD("Processing OpenCL Direct (image2d)");
  148. if(!haveOpenCL)
  149. {
  150. LOGE("OpenCL isn't initialized");
  151. return;
  152. }
  153. LOGD("procOCL_I2I(%d, %d, %d, %d)", texIn, texOut, w, h);
  154. //![process_pure_opencl]
  155. cl::ImageGL imgIn (theContext, CL_MEM_READ_ONLY, GL_TEXTURE_2D, 0, texIn);
  156. cl::ImageGL imgOut(theContext, CL_MEM_WRITE_ONLY, GL_TEXTURE_2D, 0, texOut);
  157. std::vector < cl::Memory > images;
  158. images.push_back(imgIn);
  159. images.push_back(imgOut);
  160. int64_t t = getTimeMs();
  161. theQueue.enqueueAcquireGLObjects(&images);
  162. theQueue.finish();
  163. LOGD("enqueueAcquireGLObjects() costs %d ms", getTimeInterval(t));
  164. t = getTimeMs();
  165. cl::Kernel Laplacian(theProgI2I, "Laplacian"); //TODO: may be done once
  166. Laplacian.setArg(0, imgIn);
  167. Laplacian.setArg(1, imgOut);
  168. theQueue.finish();
  169. LOGD("Kernel() costs %d ms", getTimeInterval(t));
  170. t = getTimeMs();
  171. theQueue.enqueueNDRangeKernel(Laplacian, cl::NullRange, cl::NDRange(w, h), cl::NullRange);
  172. theQueue.finish();
  173. LOGD("enqueueNDRangeKernel() costs %d ms", getTimeInterval(t));
  174. t = getTimeMs();
  175. theQueue.enqueueReleaseGLObjects(&images);
  176. theQueue.finish();
  177. LOGD("enqueueReleaseGLObjects() costs %d ms", getTimeInterval(t));
  178. //![process_pure_opencl]
  179. }
  180. void procOCL_OCV(int texIn, int texOut, int w, int h)
  181. {
  182. LOGD("Processing OpenCL via OpenCV");
  183. if(!haveOpenCL)
  184. {
  185. LOGE("OpenCL isn't initialized");
  186. return;
  187. }
  188. //![process_tapi]
  189. int64_t t = getTimeMs();
  190. cl::ImageGL imgIn (theContext, CL_MEM_READ_ONLY, GL_TEXTURE_2D, 0, texIn);
  191. std::vector < cl::Memory > images(1, imgIn);
  192. theQueue.enqueueAcquireGLObjects(&images);
  193. theQueue.finish();
  194. cv::UMat uIn, uOut, uTmp;
  195. cv::ocl::convertFromImage(imgIn(), uIn);
  196. LOGD("loading texture data to OpenCV UMat costs %d ms", getTimeInterval(t));
  197. theQueue.enqueueReleaseGLObjects(&images);
  198. t = getTimeMs();
  199. //cv::blur(uIn, uOut, cv::Size(5, 5));
  200. cv::Laplacian(uIn, uTmp, CV_8U);
  201. cv:multiply(uTmp, 10, uOut);
  202. cv::ocl::finish();
  203. LOGD("OpenCV processing costs %d ms", getTimeInterval(t));
  204. t = getTimeMs();
  205. cl::ImageGL imgOut(theContext, CL_MEM_WRITE_ONLY, GL_TEXTURE_2D, 0, texOut);
  206. images.clear();
  207. images.push_back(imgOut);
  208. theQueue.enqueueAcquireGLObjects(&images);
  209. cl_mem clBuffer = (cl_mem)uOut.handle(cv::ACCESS_READ);
  210. cl_command_queue q = (cl_command_queue)cv::ocl::Queue::getDefault().ptr();
  211. size_t offset = 0;
  212. size_t origin[3] = { 0, 0, 0 };
  213. size_t region[3] = { (size_t)w, (size_t)h, 1 };
  214. CV_Assert(clEnqueueCopyBufferToImage (q, clBuffer, imgOut(), offset, origin, region, 0, NULL, NULL) == CL_SUCCESS);
  215. theQueue.enqueueReleaseGLObjects(&images);
  216. cv::ocl::finish();
  217. LOGD("uploading results to texture costs %d ms", getTimeInterval(t));
  218. //![process_tapi]
  219. }
  220. #else
  221. int initCL()
  222. {
  223. return 5;
  224. }
  225. #endif
  226. void closeCL()
  227. {
  228. }
  229. void drawFrameProcCPU(int w, int h, int texOut)
  230. {
  231. LOGD("Processing on CPU");
  232. int64_t t;
  233. // let's modify pixels in FBO texture in C++ code (on CPU)
  234. static cv::Mat m;
  235. m.create(h, w, CV_8UC4);
  236. // read
  237. t = getTimeMs();
  238. // expecting FBO to be bound
  239. glReadPixels(0, 0, w, h, GL_RGBA, GL_UNSIGNED_BYTE, m.data);
  240. LOGD("glReadPixels() costs %d ms", getTimeInterval(t));
  241. // modify
  242. t = getTimeMs();
  243. cv::Laplacian(m, m, CV_8U);
  244. m *= 10;
  245. LOGD("Laplacian() costs %d ms", getTimeInterval(t));
  246. // write back
  247. glActiveTexture(GL_TEXTURE0);
  248. glBindTexture(GL_TEXTURE_2D, texOut);
  249. t = getTimeMs();
  250. glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, w, h, GL_RGBA, GL_UNSIGNED_BYTE, m.data);
  251. LOGD("glTexSubImage2D() costs %d ms", getTimeInterval(t));
  252. }
  253. enum ProcMode {PROC_MODE_NO_PROC=0, PROC_MODE_CPU=1, PROC_MODE_OCL_DIRECT=2, PROC_MODE_OCL_OCV=3};
  254. void processFrame(int tex1, int tex2, int w, int h, int mode)
  255. {
  256. switch(mode)
  257. {
  258. //case PROC_MODE_NO_PROC:
  259. case PROC_MODE_CPU:
  260. drawFrameProcCPU(w, h, tex2);
  261. break;
  262. #ifdef OPENCL_FOUND
  263. case PROC_MODE_OCL_DIRECT:
  264. procOCL_I2I(tex1, tex2, w, h);
  265. break;
  266. case PROC_MODE_OCL_OCV:
  267. procOCL_OCV(tex1, tex2, w, h);
  268. break;
  269. #endif
  270. default:
  271. LOGE("Unexpected processing mode: %d", mode);
  272. }
  273. }