OpenCVCompilerOptimizations.cmake 41 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996
  1. # x86/x86-64 arch:
  2. # SSE / SSE2 (always available on 64-bit CPUs)
  3. # SSE3 / SSSE3
  4. # SSE4_1 / SSE4_2 / POPCNT
  5. # AVX / AVX2 / AVX_512F
  6. # FMA3
  7. #
  8. # AVX512 details: https://en.wikipedia.org/wiki/AVX-512#CPUs_with_AVX-512
  9. #
  10. # CPU features groups:
  11. # AVX512_COMMON (Common instructions AVX-512F/CD for all CPUs that support AVX-512)
  12. # AVX512_KNL (Knights Landing with AVX-512F/CD/ER/PF)
  13. # AVX512_KNM (Knights Mill with AVX-512F/CD/ER/PF/4FMAPS/4VNNIW/VPOPCNTDQ)
  14. # AVX512_SKX (Skylake-X with AVX-512F/CD/BW/DQ/VL)
  15. # AVX512_CNL (Cannon Lake with AVX-512F/CD/BW/DQ/VL/IFMA/VBMI)
  16. # AVX512_CLX (Cascade Lake with AVX-512F/CD/BW/DQ/VL/VNNI)
  17. # AVX512_ICL (Ice Lake with AVX-512F/CD/BW/DQ/VL/IFMA/VBMI/VNNI/VBMI2/BITALG/VPOPCNTDQ/VPCLMULQDQ*/GFNI*/VAES*)
  18. # ppc64le arch:
  19. # VSX (always available on Power8)
  20. # VSX3 (always available on Power9)
  21. # RISC-V arch:
  22. # RVV
  23. # CPU_{opt}_SUPPORTED=ON/OFF - compiler support (possibly with additional flag)
  24. # CPU_{opt}_IMPLIES=<list>
  25. # CPU_{opt}_FORCE=<list> - subset of "implies" list
  26. # CPU_{opt}_GROUP=<list> - similar to "implies" list, but additionally merges compiler flags
  27. # CPU_{opt}_FLAGS_ON=""
  28. # CPU_{opt}_FEATURE_ALIAS - mapping to CV_CPU_* HWFeature enum
  29. # Input variables:
  30. # CPU_BASELINE=<list> - preferred list of baseline optimizations
  31. # CPU_DISPATCH=<list> - preferred list of dispatched optimizations
  32. # Advanced input variables:
  33. # CPU_BASELINE_REQUIRE=<list> - list of required baseline optimizations
  34. # CPU_DISPATCH_REQUIRE=<list> - list of required dispatched optimizations
  35. # CPU_BASELINE_DISABLE=<list> - list of disabled baseline optimizations
  36. # Output variables:
  37. # CPU_BASELINE_FINAL=<list> - final list of enabled compiler optimizations
  38. # CPU_DISPATCH_FINAL=<list> - final list of dispatched optimizations
  39. #
  40. # CPU_DISPATCH_FLAGS_${opt} - flags for source files compiled separately (<name>.avx2.cpp)
  41. #
  42. # CPU_{opt}_ENABLED_DEFAULT=ON/OFF - has compiler support without additional flag (CPU_BASELINE_DETECT=ON only)
  43. set(CPU_ALL_OPTIMIZATIONS "SSE;SSE2;SSE3;SSSE3;SSE4_1;SSE4_2;POPCNT;AVX;FP16;AVX2;FMA3;AVX_512F")
  44. list(APPEND CPU_ALL_OPTIMIZATIONS "AVX512_COMMON;AVX512_KNL;AVX512_KNM;AVX512_SKX;AVX512_CNL;AVX512_CLX;AVX512_ICL")
  45. list(APPEND CPU_ALL_OPTIMIZATIONS SVE NEON VFPV3 FP16 NEON_DOTPROD NEON_FP16 NEON_BF16)
  46. list(APPEND CPU_ALL_OPTIMIZATIONS MSA)
  47. list(APPEND CPU_ALL_OPTIMIZATIONS VSX VSX3)
  48. list(APPEND CPU_ALL_OPTIMIZATIONS RVV)
  49. list(APPEND CPU_ALL_OPTIMIZATIONS LSX)
  50. list(APPEND CPU_ALL_OPTIMIZATIONS LASX)
  51. list(REMOVE_DUPLICATES CPU_ALL_OPTIMIZATIONS)
  52. ocv_update(CPU_VFPV3_FEATURE_ALIAS "")
  53. set(HELP_CPU_BASELINE "Specify list of enabled baseline CPU optimizations")
  54. set(HELP_CPU_BASELINE_REQUIRE "Specify list of required baseline CPU optimizations")
  55. set(HELP_CPU_BASELINE_DISABLE "Specify list of forbidden baseline CPU optimizations")
  56. set(HELP_CPU_DISPATCH "Specify list of dispatched CPU optimizations")
  57. set(HELP_CPU_DISPATCH_REQUIRE "Specify list of required dispatched CPU optimizations")
  58. foreach(var CPU_BASELINE CPU_BASELINE_REQUIRE CPU_BASELINE_DISABLE CPU_DISPATCH CPU_DISPATCH_REQUIRE)
  59. if(DEFINED ${var})
  60. string(REPLACE "," ";" _list "${${var}}")
  61. set(${var} "${_list}" CACHE STRING "${HELP_${var}}" FORCE)
  62. endif()
  63. endforeach()
  64. # process legacy flags
  65. macro(ocv_optimization_process_obsolete_option legacy_flag OPT legacy_warn)
  66. if(DEFINED "${legacy_flag}")
  67. if("${legacy_warn}")
  68. message(STATUS "WARNING: Option ${legacy_flag}='${${legacy_flag}}' is deprecated and should not be used anymore")
  69. message(STATUS " Behaviour of this option is not backward compatible")
  70. message(STATUS " Refer to 'CPU_BASELINE'/'CPU_DISPATCH' CMake options documentation")
  71. endif()
  72. if("${${legacy_flag}}")
  73. if(NOT ";${CPU_BASELINE_REQUIRE};" MATCHES ";${OPT};")
  74. set(CPU_BASELINE_REQUIRE "${CPU_BASELINE_REQUIRE};${OPT}" CACHE STRING "${HELP_CPU_BASELINE_REQUIRE}" FORCE)
  75. endif()
  76. else()
  77. if(NOT ";${CPU_BASELINE_DISABLE};" MATCHES ";${OPT};")
  78. set(CPU_BASELINE_DISABLE "${CPU_BASELINE_DISABLE};${OPT}" CACHE STRING "${HELP_CPU_BASELINE_DISABLE}" FORCE)
  79. endif()
  80. endif()
  81. endif()
  82. endmacro()
  83. ocv_optimization_process_obsolete_option(ENABLE_SSE SSE ON)
  84. ocv_optimization_process_obsolete_option(ENABLE_SSE2 SSE2 ON)
  85. ocv_optimization_process_obsolete_option(ENABLE_SSE3 SSE3 ON)
  86. ocv_optimization_process_obsolete_option(ENABLE_SSSE3 SSSE3 ON)
  87. ocv_optimization_process_obsolete_option(ENABLE_SSE41 SSE4_1 ON)
  88. ocv_optimization_process_obsolete_option(ENABLE_SSE42 SSE4_2 ON)
  89. ocv_optimization_process_obsolete_option(ENABLE_POPCNT POPCNT ON)
  90. ocv_optimization_process_obsolete_option(ENABLE_AVX AVX ON)
  91. ocv_optimization_process_obsolete_option(ENABLE_AVX2 AVX2 ON)
  92. ocv_optimization_process_obsolete_option(ENABLE_FMA3 FMA3 ON)
  93. ocv_optimization_process_obsolete_option(ENABLE_VFPV3 VFPV3 OFF)
  94. ocv_optimization_process_obsolete_option(ENABLE_SVE SVE ON)
  95. ocv_optimization_process_obsolete_option(ENABLE_NEON NEON ON)
  96. ocv_optimization_process_obsolete_option(ENABLE_VSX VSX ON)
  97. macro(ocv_is_optimization_in_list resultvar check_opt)
  98. set(__checked "")
  99. set(__queue ${ARGN})
  100. set(${resultvar} 0)
  101. while(__queue AND NOT ${resultvar})
  102. list(REMOVE_DUPLICATES __queue)
  103. set(__queue_current ${__queue})
  104. set(__queue "")
  105. foreach(OPT ${__queue_current})
  106. if("x${OPT}" STREQUAL "x${check_opt}")
  107. set(${resultvar} 1)
  108. break()
  109. elseif(NOT ";${__checked};" MATCHES ";${OPT};")
  110. list(APPEND __queue ${CPU_${OPT}_IMPLIES})
  111. endif()
  112. list(APPEND __checked ${OPT})
  113. endforeach()
  114. endwhile()
  115. endmacro()
  116. macro(ocv_is_optimization_in_force_list resultvar check_opt)
  117. set(__checked "")
  118. set(__queue ${ARGN})
  119. set(${resultvar} 0)
  120. while(__queue AND NOT ${resultvar})
  121. list(REMOVE_DUPLICATES __queue)
  122. set(__queue_current ${__queue})
  123. set(__queue "")
  124. foreach(OPT ${__queue_current})
  125. if(OPT STREQUAL "${check_opt}")
  126. set(${resultvar} 1)
  127. break()
  128. elseif(NOT ";${__checked};" MATCHES ";${OPT};")
  129. list(APPEND __queue ${CPU_${OPT}_FORCE})
  130. endif()
  131. list(APPEND __checked ${OPT})
  132. endforeach()
  133. endwhile()
  134. endmacro()
  135. macro(ocv_append_optimization_flag var OPT)
  136. if(CPU_${OPT}_FLAGS_CONFLICT)
  137. string(REGEX REPLACE " ${CPU_${OPT}_FLAGS_CONFLICT}" "" ${var} " ${${var}} ")
  138. string(REGEX REPLACE "^ +" "" ${var} "${${var}}")
  139. endif()
  140. set(${var} "${${var}} ${CPU_${OPT}_FLAGS_ON}")
  141. endmacro()
  142. # Support GCC -march=native or Intel Compiler -xHost flags
  143. if(";${CPU_BASELINE};" MATCHES ";NATIVE;" OR ";${CPU_BASELINE};" MATCHES ";HOST;")
  144. set(CPU_BASELINE_DETECT ON)
  145. set(_add_native_flag ON)
  146. elseif(";${CPU_BASELINE};" MATCHES ";DETECT;")
  147. set(CPU_BASELINE_DETECT ON)
  148. elseif(" ${CMAKE_CXX_FLAGS} " MATCHES " -march=native | -xHost | /QxHost ")
  149. if(DEFINED CPU_BASELINE)
  150. message(STATUS "CPU: Detected '-march=native' or '-xHost' compiler flag. Force CPU_BASELINE=DETECT.")
  151. endif()
  152. set(CPU_BASELINE "DETECT" CACHE STRING "${HELP_CPU_BASELINE}")
  153. set(CPU_BASELINE_DETECT ON)
  154. endif()
  155. # For platforms which don't allow enabling of extra instruction sets with separate compiler options.
  156. # E.g. GCC/Clang for RISC-V/AArch64 use suffixes for -march option. So we should avoid using existing
  157. # CPU features mechanisms and rely on cmake-toolchain files or flags provided via command-line.
  158. macro(ocv_default_baseline_detect_and_check_dispatch)
  159. set(CPU_BASELINE "DETECT" CACHE STRING "${HELP_CPU_BASELINE}")
  160. if(NOT CPU_BASELINE MATCHES "^(DETECT|NATIVE|)$")
  161. message(WARNING "CPU_BASELINE is set to '${CPU_BASELINE}', but '${CMAKE_SYSTEM_PROCESSOR}' "
  162. "platform is designed to work with DETECT|NATIVE|<empty>, "
  163. "otherwise target CPU architecture may be changed unexpectedly. "
  164. "Please check your resulting compiler flags in the CMake output.")
  165. endif()
  166. foreach(opt ${CPU_DISPATCH})
  167. if(NOT DEFINED CPU_${opt}_FLAGS_ON)
  168. message(WARNING "${opt} is in the CPU_DISPATCH list, but 'CPU_${opt}_FLAGS_ON' is not set. "
  169. "Please provide feature-specific compiler options explicitly.")
  170. endif()
  171. endforeach()
  172. endmacro()
  173. #===================================================================================================
  174. if(X86 OR X86_64)
  175. ocv_update(CPU_KNOWN_OPTIMIZATIONS "SSE;SSE2;SSE3;SSSE3;SSE4_1;POPCNT;SSE4_2;AVX;FP16;AVX2;FMA3;AVX_512F;AVX512_COMMON;AVX512_KNL;AVX512_KNM;AVX512_SKX;AVX512_CNL;AVX512_CLX;AVX512_ICL")
  176. ocv_update(CPU_AVX512_COMMON_GROUP "AVX_512F;AVX_512CD")
  177. ocv_update(CPU_AVX512_KNL_GROUP "AVX512_COMMON;AVX512_KNL_EXTRA")
  178. ocv_update(CPU_AVX512_KNM_GROUP "AVX512_KNL;AVX512_KNM_EXTRA;AVX_512VPOPCNTDQ")
  179. ocv_update(CPU_AVX512_SKX_GROUP "AVX512_COMMON;AVX_512VL;AVX_512BW;AVX_512DQ")
  180. ocv_update(CPU_AVX512_CNL_GROUP "AVX512_SKX;AVX_512IFMA;AVX_512VBMI")
  181. ocv_update(CPU_AVX512_CLX_GROUP "AVX512_SKX;AVX_512VNNI")
  182. ocv_update(CPU_AVX512_ICL_GROUP "AVX512_SKX;AVX_512IFMA;AVX_512VBMI;AVX_512VNNI;AVX_512VBMI2;AVX_512BITALG;AVX_512VPOPCNTDQ") # ? VPCLMULQDQ, GFNI, VAES
  183. ocv_update(CPU_SSE_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_sse.cpp")
  184. ocv_update(CPU_SSE2_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_sse2.cpp")
  185. ocv_update(CPU_SSE3_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_sse3.cpp")
  186. ocv_update(CPU_SSSE3_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_ssse3.cpp")
  187. ocv_update(CPU_SSE4_1_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_sse41.cpp")
  188. ocv_update(CPU_SSE4_2_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_sse42.cpp")
  189. ocv_update(CPU_POPCNT_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_popcnt.cpp")
  190. ocv_update(CPU_AVX_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_avx.cpp")
  191. ocv_update(CPU_AVX2_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_avx2.cpp")
  192. ocv_update(CPU_FP16_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_fp16.cpp")
  193. ocv_update(CPU_AVX_512F_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_avx512.cpp")
  194. ocv_update(CPU_AVX512_COMMON_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_avx512common.cpp")
  195. ocv_update(CPU_AVX512_KNL_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_avx512knl.cpp")
  196. ocv_update(CPU_AVX512_KNM_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_avx512knm.cpp")
  197. ocv_update(CPU_AVX512_SKX_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_avx512skx.cpp")
  198. ocv_update(CPU_AVX512_CNL_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_avx512cnl.cpp")
  199. ocv_update(CPU_AVX512_CLX_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_avx512clx.cpp")
  200. ocv_update(CPU_AVX512_ICL_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_avx512icl.cpp")
  201. if(NOT OPENCV_CPU_OPT_IMPLIES_IGNORE)
  202. ocv_update(CPU_AVX512_ICL_IMPLIES "AVX512_SKX")
  203. ocv_update(CPU_AVX512_CLX_IMPLIES "AVX512_SKX")
  204. ocv_update(CPU_AVX512_CNL_IMPLIES "AVX512_SKX")
  205. ocv_update(CPU_AVX512_SKX_IMPLIES "AVX512_COMMON")
  206. ocv_update(CPU_AVX512_KNM_IMPLIES "AVX512_KNL")
  207. ocv_update(CPU_AVX512_KNL_IMPLIES "AVX512_COMMON")
  208. ocv_update(CPU_AVX512_COMMON_IMPLIES "AVX_512F")
  209. ocv_update(CPU_AVX_512F_IMPLIES "AVX2")
  210. ocv_update(CPU_AVX_512F_FORCE "") # Don't force other optimizations
  211. ocv_update(CPU_AVX2_IMPLIES "AVX;FMA3;FP16")
  212. ocv_update(CPU_FMA3_IMPLIES "AVX2")
  213. ocv_update(CPU_FMA3_FORCE "") # Don't force other optimizations
  214. ocv_update(CPU_FP16_IMPLIES "AVX")
  215. ocv_update(CPU_FP16_FORCE "") # Don't force other optimizations
  216. ocv_update(CPU_AVX_IMPLIES "SSE4_2")
  217. ocv_update(CPU_SSE4_2_IMPLIES "SSE4_1;POPCNT")
  218. ocv_update(CPU_POPCNT_IMPLIES "SSE4_1")
  219. ocv_update(CPU_POPCNT_FORCE "") # Don't force other optimizations
  220. ocv_update(CPU_SSE4_1_IMPLIES "SSE3;SSSE3")
  221. ocv_update(CPU_SSSE3_IMPLIES "SSE3")
  222. ocv_update(CPU_SSE3_IMPLIES "SSE2")
  223. ocv_update(CPU_SSE2_IMPLIES "SSE")
  224. endif()
  225. if(CV_ICC OR CV_ICX)
  226. macro(ocv_intel_compiler_optimization_option name unix_flags msvc_flags)
  227. ocv_update(CPU_${name}_FLAGS_NAME "${name}")
  228. if(MSVC)
  229. set(enable_flags "${msvc_flags}")
  230. set(flags_conflict "/arch:[^ ]*|/Qx:[^ ]+")
  231. else()
  232. set(enable_flags "${unix_flags}")
  233. set(flags_conflict "-msse[^ ]*|-mssse3|-mavx[^ ]*|-march[^ ]*|-x[^ ]+")
  234. endif()
  235. ocv_update(CPU_${name}_FLAGS_ON "${enable_flags}")
  236. if(flags_conflict)
  237. ocv_update(CPU_${name}_FLAGS_CONFLICT "${flags_conflict}")
  238. endif()
  239. endmacro()
  240. ocv_intel_compiler_optimization_option(AVX2 "-march=core-avx2" "/arch:CORE-AVX2")
  241. ocv_intel_compiler_optimization_option(FP16 "-mavx" "/arch:AVX")
  242. ocv_intel_compiler_optimization_option(AVX "-mavx" "/arch:AVX")
  243. ocv_intel_compiler_optimization_option(FMA3 "" "")
  244. ocv_intel_compiler_optimization_option(POPCNT "-mpopcnt" "") # -mpopcnt is available since ICC 19.0.0
  245. ocv_intel_compiler_optimization_option(SSE4_2 "-msse4.2" "/arch:SSE4.2")
  246. ocv_intel_compiler_optimization_option(SSE4_1 "-msse4.1" "/arch:SSE4.1")
  247. ocv_intel_compiler_optimization_option(SSE3 "-msse3" "/arch:SSE3")
  248. ocv_intel_compiler_optimization_option(SSSE3 "-mssse3" "/arch:SSSE3")
  249. ocv_intel_compiler_optimization_option(SSE2 "-msse2" "/arch:SSE2")
  250. if(NOT X86_64) # x64 compiler doesn't support /arch:sse
  251. ocv_intel_compiler_optimization_option(SSE "-msse" "/arch:SSE")
  252. endif()
  253. ocv_intel_compiler_optimization_option(AVX_512F "-xCOMMON-AVX512" "/Qx:COMMON-AVX512")
  254. ocv_intel_compiler_optimization_option(AVX512_COMMON "-xCOMMON-AVX512" "/Qx:COMMON-AVX512")
  255. ocv_intel_compiler_optimization_option(AVX512_KNL "-xKNL" "/Qx:KNL")
  256. ocv_intel_compiler_optimization_option(AVX512_KNM "-xKNM" "/Qx:KNM")
  257. ocv_intel_compiler_optimization_option(AVX512_SKX "-xSKYLAKE-AVX512" "/Qx:SKYLAKE-AVX512")
  258. ocv_intel_compiler_optimization_option(AVX512_CNL "-xCANNONLAKE" "/Qx:CANNONLAKE")
  259. ocv_intel_compiler_optimization_option(AVX512_CLX "-xCASCADELAKE" "/Qx:CASCADELAKE")
  260. ocv_intel_compiler_optimization_option(AVX512_ICL "-xICELAKE-CLIENT" "/Qx:ICELAKE-CLIENT")
  261. elseif(CV_GCC OR CV_CLANG OR CV_ICX)
  262. ocv_update(CPU_AVX2_FLAGS_ON "-mavx2")
  263. ocv_update(CPU_FP16_FLAGS_ON "-mf16c")
  264. ocv_update(CPU_AVX_FLAGS_ON "-mavx")
  265. ocv_update(CPU_FMA3_FLAGS_ON "-mfma")
  266. ocv_update(CPU_POPCNT_FLAGS_ON "-mpopcnt")
  267. ocv_update(CPU_SSE4_2_FLAGS_ON "-msse4.2")
  268. ocv_update(CPU_SSE4_1_FLAGS_ON "-msse4.1")
  269. ocv_update(CPU_SSE3_FLAGS_ON "-msse3")
  270. ocv_update(CPU_SSSE3_FLAGS_ON "-mssse3")
  271. ocv_update(CPU_SSE2_FLAGS_ON "-msse2")
  272. ocv_update(CPU_SSE_FLAGS_ON "-msse")
  273. if(NOT (CV_GCC AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS "5.0")) # GCC >= 5.0
  274. ocv_update(CPU_AVX_512F_FLAGS_ON "-mavx512f")
  275. ocv_update(CPU_AVX_512CD_FLAGS_ON "-mavx512cd")
  276. ocv_update(CPU_AVX512_KNL_EXTRA_FLAGS_ON "-mavx512er -mavx512pf")
  277. ocv_update(CPU_AVX512_KNM_EXTRA_FLAGS_ON "-mavx5124fmaps -mavx5124vnniw")
  278. ocv_update(CPU_AVX_512BW_FLAGS_ON "-mavx512bw")
  279. ocv_update(CPU_AVX_512DQ_FLAGS_ON "-mavx512dq")
  280. ocv_update(CPU_AVX_512VL_FLAGS_ON "-mavx512vl")
  281. ocv_update(CPU_AVX_512IFMA_FLAGS_ON "-mavx512ifma")
  282. ocv_update(CPU_AVX_512VBMI_FLAGS_ON "-mavx512vbmi")
  283. ocv_update(CPU_AVX_512VNNI_FLAGS_ON "-mavx512vnni")
  284. ocv_update(CPU_AVX_512VBMI2_FLAGS_ON "-mavx512vbmi2")
  285. ocv_update(CPU_AVX_512BITALG_FLAGS_ON "-mavx512bitalg")
  286. ocv_update(CPU_AVX_512VPOPCNTDQ_FLAGS_ON "-mavx512vpopcntdq")
  287. else()
  288. ocv_update(CPU_AVX_512F_SUPPORTED OFF)
  289. endif()
  290. elseif(MSVC)
  291. ocv_update(CPU_AVX2_FLAGS_ON "/arch:AVX2")
  292. ocv_update(CPU_AVX_FLAGS_ON "/arch:AVX")
  293. ocv_update(CPU_FP16_FLAGS_ON "/arch:AVX")
  294. if(NOT X86_64)
  295. # 64-bit MSVC compiler uses SSE/SSE2 by default
  296. ocv_update(CPU_SSE_FLAGS_ON "/arch:SSE")
  297. ocv_update(CPU_SSE_SUPPORTED ON)
  298. ocv_update(CPU_SSE2_FLAGS_ON "/arch:SSE2")
  299. ocv_update(CPU_SSE2_SUPPORTED ON)
  300. else()
  301. ocv_update(CPU_SSE_SUPPORTED ON)
  302. ocv_update(CPU_SSE2_SUPPORTED ON)
  303. ocv_update(CPU_AVX_512F_FLAGS_ON "/arch:AVX512")
  304. endif()
  305. # Other instruction sets are supported by default since MSVC 2008 at least
  306. else()
  307. message(WARNING "TODO: Unsupported compiler")
  308. endif()
  309. if(NOT DEFINED CPU_DISPATCH)
  310. if(X86_64)
  311. set(CPU_DISPATCH "SSE4_1;SSE4_2;AVX;FP16;AVX2;AVX512_SKX" CACHE STRING "${HELP_CPU_DISPATCH}")
  312. else()
  313. set(CPU_DISPATCH "SSE4_1;SSE4_2;AVX;FP16" CACHE STRING "${HELP_CPU_DISPATCH}")
  314. endif()
  315. endif()
  316. if(NOT DEFINED CPU_BASELINE)
  317. if(APPLE)
  318. # MacOS X has limited set of possible supported H/W, so compiler is configured well
  319. set(CPU_BASELINE "DETECT" CACHE STRING "${HELP_CPU_BASELINE}")
  320. elseif(X86_64)
  321. set(CPU_BASELINE "SSE3" CACHE STRING "${HELP_CPU_BASELINE}")
  322. else()
  323. set(CPU_BASELINE "SSE2" CACHE STRING "${HELP_CPU_BASELINE}")
  324. endif()
  325. endif()
  326. elseif(ARM OR AARCH64)
  327. ocv_update(CPU_SVE_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_sve.cpp")
  328. ocv_update(CPU_NEON_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_neon.cpp")
  329. ocv_update(CPU_FP16_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_fp16.cpp")
  330. ocv_update(CPU_NEON_FP16_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_neon_fp16.cpp")
  331. ocv_update(CPU_NEON_BF16_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_neon_bf16.cpp")
  332. ocv_update(CPU_NEON_DOTPROD_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_neon_dotprod.cpp")
  333. if(NOT AARCH64)
  334. ocv_update(CPU_KNOWN_OPTIMIZATIONS "VFPV3;NEON;FP16")
  335. if(NOT MSVC)
  336. ocv_update(CPU_VFPV3_FLAGS_ON "-mfpu=vfpv3")
  337. ocv_update(CPU_NEON_FLAGS_ON "-mfpu=neon")
  338. ocv_update(CPU_NEON_FLAGS_CONFLICT "-mfpu=[^ ]*")
  339. ocv_update(CPU_FP16_FLAGS_ON "-mfpu=neon-fp16 -mfp16-format=ieee")
  340. ocv_update(CPU_FP16_FLAGS_CONFLICT "-mfpu=[^ ]*")
  341. endif()
  342. ocv_update(CPU_FP16_IMPLIES "NEON")
  343. else()
  344. if (UNIX AND NOT APPLE)
  345. #Current Apple silicone M4 does not support SVE,
  346. #but some Xcode versions reports their support.
  347. ocv_update(CPU_KNOWN_OPTIMIZATIONS "SVE;NEON;FP16;NEON_DOTPROD;NEON_FP16;NEON_BF16")
  348. else()
  349. ocv_update(CPU_KNOWN_OPTIMIZATIONS "NEON;FP16;NEON_DOTPROD;NEON_FP16;NEON_BF16")
  350. endif()
  351. ocv_update(CPU_FP16_IMPLIES "NEON")
  352. ocv_update(CPU_NEON_DOTPROD_IMPLIES "NEON")
  353. ocv_update(CPU_NEON_FP16_IMPLIES "NEON")
  354. ocv_update(CPU_NEON_BF16_IMPLIES "NEON")
  355. if(MSVC)
  356. ocv_update(CPU_SVE_FLAGS_ON "")
  357. ocv_update(CPU_NEON_DOTPROD_FLAGS_ON "")
  358. ocv_update(CPU_NEON_FP16_FLAGS_ON "")
  359. ocv_update(CPU_NEON_BF16_FLAGS_ON "")
  360. else()
  361. ocv_update(CPU_SVE_FLAGS_ON "-march=armv8.2-a+sve")
  362. ocv_update(CPU_NEON_DOTPROD_FLAGS_ON "-march=armv8.2-a+dotprod")
  363. ocv_update(CPU_NEON_FP16_FLAGS_ON "-march=armv8.2-a+fp16")
  364. ocv_update(CPU_NEON_BF16_FLAGS_ON "-march=armv8.2-a+bf16")
  365. endif()
  366. set(CPU_DISPATCH "NEON_FP16;NEON_BF16;NEON_DOTPROD" CACHE STRING "${HELP_CPU_DISPATCH}")
  367. ocv_default_baseline_detect_and_check_dispatch()
  368. endif()
  369. elseif(MIPS)
  370. ocv_update(CPU_MSA_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_msa.cpp")
  371. ocv_update(CPU_KNOWN_OPTIMIZATIONS "MSA")
  372. ocv_update(CPU_MSA_FLAGS_ON "-mmsa")
  373. set(CPU_BASELINE "DETECT" CACHE STRING "${HELP_CPU_BASELINE}")
  374. elseif(PPC64LE)
  375. ocv_update(CPU_KNOWN_OPTIMIZATIONS "VSX;VSX3")
  376. ocv_update(CPU_VSX_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_vsx.cpp")
  377. ocv_update(CPU_VSX3_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_vsx3.cpp")
  378. if(NOT OPENCV_CPU_OPT_IMPLIES_IGNORE)
  379. ocv_update(CPU_VSX3_IMPLIES "VSX")
  380. endif()
  381. if(CV_CLANG AND (NOT ${CMAKE_CXX_COMPILER} MATCHES "xlc"))
  382. ocv_update(CPU_VSX_FLAGS_ON "-mvsx -maltivec")
  383. ocv_update(CPU_VSX3_FLAGS_ON "-mpower9-vector")
  384. else()
  385. ocv_update(CPU_VSX_FLAGS_ON "-mcpu=power8")
  386. ocv_update(CPU_VSX3_FLAGS_ON "-mcpu=power9 -mtune=power9")
  387. endif()
  388. set(CPU_DISPATCH "VSX3" CACHE STRING "${HELP_CPU_DISPATCH}")
  389. set(CPU_BASELINE "VSX" CACHE STRING "${HELP_CPU_BASELINE}")
  390. elseif(RISCV)
  391. ocv_update(CPU_KNOWN_OPTIMIZATIONS "RVV")
  392. ocv_update(CPU_RVV_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_rvv.cpp")
  393. ocv_default_baseline_detect_and_check_dispatch()
  394. elseif(LOONGARCH64)
  395. ocv_update(CPU_LSX_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_lsx.cpp")
  396. ocv_update(CPU_LASX_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_lasx.cpp")
  397. ocv_update(CPU_KNOWN_OPTIMIZATIONS "LSX;LASX")
  398. ocv_update(CPU_LSX_FLAGS_ON "-mlsx")
  399. ocv_update(CPU_LASX_FLAGS_ON "-mlasx")
  400. set(CPU_BASELINE "LSX" CACHE STRING "${HELP_CPU_BASELINE}")
  401. set(CPU_DISPATCH "LASX" CACHE STRING "${HELP_CPU_DISPATCH}")
  402. endif()
  403. # Helper values for cmake-gui
  404. set(CPU_BASELINE "DETECT" CACHE STRING "${HELP_CPU_BASELINE}")
  405. set(CPU_DISPATCH "" CACHE STRING "${HELP_CPU_DISPATCH}")
  406. set_property(CACHE CPU_BASELINE PROPERTY STRINGS "" ${CPU_KNOWN_OPTIMIZATIONS})
  407. set_property(CACHE CPU_DISPATCH PROPERTY STRINGS "" ${CPU_KNOWN_OPTIMIZATIONS})
  408. # Update CPU_BASELINE_DETECT flag
  409. if(";${CPU_BASELINE};" MATCHES ";DETECT;")
  410. set(CPU_BASELINE_DETECT ON)
  411. endif()
  412. set(CPU_BASELINE_FLAGS "")
  413. set(CPU_BASELINE_FINAL "")
  414. set(CPU_DISPATCH_FINAL "")
  415. if(CV_DISABLE_OPTIMIZATION)
  416. set(CPU_DISPATCH "")
  417. set(CPU_DISPATCH_REQUIRE "")
  418. endif()
  419. if("x${CPU_DISPATCH}" STREQUAL "xALL")
  420. set(CPU_DISPATCH "${CPU_KNOWN_OPTIMIZATIONS}")
  421. endif()
  422. macro(ocv_check_compiler_optimization OPT)
  423. if(NOT DEFINED CPU_${OPT}_SUPPORTED)
  424. if((DEFINED CPU_${OPT}_FLAGS_ON AND NOT "x${CPU_${OPT}_FLAGS_ON}" STREQUAL "x") OR CPU_${OPT}_TEST_FILE)
  425. set(_varname "")
  426. if(CPU_${OPT}_TEST_FILE)
  427. set(__available 0)
  428. if(NOT __is_disabled AND (__is_from_baseline OR CPU_BASELINE_DETECT))
  429. set(_varname "HAVE_CPU_${OPT}_SUPPORT")
  430. ocv_check_compiler_flag(CXX "${CPU_BASELINE_FLAGS}" "${_varname}" "${CPU_${OPT}_TEST_FILE}")
  431. if(${_varname})
  432. list(APPEND CPU_BASELINE_FINAL ${OPT})
  433. set(CPU_${OPT}_ENABLED_DEFAULT ON)
  434. set(__available 1)
  435. endif()
  436. endif()
  437. if(NOT __available)
  438. if(NOT "x${CPU_${OPT}_FLAGS_NAME}" STREQUAL "x")
  439. set(_varname "HAVE_CPU_${CPU_${OPT}_FLAGS_NAME}")
  440. set(_compile_flags "${CPU_BASELINE_FLAGS}")
  441. ocv_append_optimization_flag(_compile_flags ${OPT})
  442. ocv_check_compiler_flag(CXX "${_compile_flags}" "${_varname}" "${CPU_${OPT}_TEST_FILE}")
  443. elseif(NOT "x${CPU_${OPT}_FLAGS_ON}" STREQUAL "x")
  444. ocv_check_flag_support(CXX "${CPU_${OPT}_FLAGS_ON}" _varname "" "${CPU_${OPT}_TEST_FILE}")
  445. else()
  446. set(_varname "HAVE_CPU_${OPT}_SUPPORT")
  447. set(_compile_flags "${CPU_BASELINE_FLAGS}")
  448. ocv_append_optimization_flag(_compile_flags ${OPT})
  449. ocv_check_compiler_flag(CXX "${_compile_flags}" "${_varname}" "${CPU_${OPT}_TEST_FILE}")
  450. endif()
  451. endif()
  452. else()
  453. ocv_check_flag_support(CXX "${CPU_${OPT}_FLAGS_ON}" _varname "")
  454. endif()
  455. if(_varname AND ${_varname})
  456. set(CPU_${OPT}_SUPPORTED ON)
  457. elseif(NOT CPU_${OPT}_SUPPORTED)
  458. message(STATUS "${OPT} is not supported by C++ compiler")
  459. endif()
  460. else()
  461. set(CPU_${OPT}_SUPPORTED ON)
  462. endif()
  463. endif()
  464. endmacro()
  465. foreach(OPT ${CPU_KNOWN_OPTIMIZATIONS})
  466. set(CPU_${OPT}_USAGE_COUNT 0 CACHE INTERNAL "")
  467. if("${CPU_${OPT}_FLAGS_ON}" STREQUAL "disabled")
  468. set(CPU_${OPT}_SUPPORTED OFF)
  469. elseif(DEFINED CPU_${OPT}_GROUP)
  470. if(NOT DEFINED CPU_${OPT}_IMPLIES)
  471. set(CPU_${OPT}_IMPLIES "${CPU_${OPT}_GROUP}")
  472. endif()
  473. set(__disabled 0)
  474. set(__flags "")
  475. foreach(OPT2 ${CPU_${OPT}_GROUP})
  476. if("${CPU_${OPT2}_FLAGS_ON}" STREQUAL "disabled" OR (DEFINED CPU_${OPT2}_SUPPORTED AND NOT CPU_${OPT}_SUPPORTED))
  477. set(__disabled 1)
  478. endif()
  479. set(__flags "${__flags} ${CPU_${OPT2}_FLAGS_ON}")
  480. string(STRIP "${__flags}" __flags)
  481. endforeach()
  482. if(__disabled)
  483. set(CPU_${OPT}_SUPPORTED OFF)
  484. else()
  485. if(NOT DEFINED CPU_${OPT}_FLAGS_ON)
  486. set(CPU_${OPT}_FLAGS_ON "${__flags}")
  487. endif()
  488. endif()
  489. endif()
  490. if(NOT DEFINED CPU_${OPT}_FORCE)
  491. set(CPU_${OPT}_FORCE "${CPU_${OPT}_IMPLIES}")
  492. endif()
  493. #message("${OPT}: CPU_${OPT}_FLAGS_ON=${CPU_${OPT}_FLAGS_ON}")
  494. endforeach()
  495. if(_add_native_flag)
  496. set(_varname "HAVE_CPU_NATIVE_SUPPORT")
  497. ocv_check_compiler_flag(CXX "-march=native" "${_varname}" "")
  498. if(${_varname})
  499. set(CPU_BASELINE_FLAGS "${CPU_BASELINE_FLAGS} -march=native")
  500. else()
  501. set(_varname "HAVE_CPU_HOST_SUPPORT")
  502. if(MSVC)
  503. set(_flag "/QxHost")
  504. else()
  505. set(_flag "-xHost")
  506. endif()
  507. ocv_check_compiler_flag(CXX "${_flag}" "${_varname}" "")
  508. if(${_varname})
  509. set(CPU_BASELINE_FLAGS "${CPU_BASELINE_FLAGS} ${_flag}")
  510. endif()
  511. endif()
  512. endif()
  513. foreach(OPT ${CPU_KNOWN_OPTIMIZATIONS})
  514. set(__is_disabled 0)
  515. foreach(OPT2 ${CPU_BASELINE_DISABLE})
  516. ocv_is_optimization_in_list(__is_disabled ${OPT2} ${OPT})
  517. if(__is_disabled)
  518. break()
  519. endif()
  520. endforeach()
  521. if(__is_disabled)
  522. set(__is_from_baseline 0)
  523. else()
  524. if(CPU_${OPT}_SUPPORTED AND CPU_BASELINE_DETECT)
  525. list(APPEND CPU_BASELINE_FINAL ${OPT})
  526. endif()
  527. ocv_is_optimization_in_list(__is_from_baseline ${OPT} ${CPU_BASELINE_REQUIRE})
  528. if(NOT __is_from_baseline)
  529. ocv_is_optimization_in_list(__is_from_baseline ${OPT} ${CPU_BASELINE})
  530. endif()
  531. endif()
  532. ocv_is_optimization_in_list(__is_from_dispatch ${OPT} ${CPU_DISPATCH_REQUIRE})
  533. if(NOT __is_from_dispatch)
  534. ocv_is_optimization_in_list(__is_from_dispatch ${OPT} ${CPU_DISPATCH})
  535. endif()
  536. if(__is_from_dispatch OR __is_from_baseline OR CPU_BASELINE_DETECT)
  537. ocv_check_compiler_optimization(${OPT})
  538. endif()
  539. if(CPU_BASELINE_DETECT AND NOT __is_from_baseline AND NOT __is_disabled)
  540. ocv_is_optimization_in_list(__is_from_baseline ${OPT} ${CPU_BASELINE_FINAL})
  541. endif()
  542. if(CPU_${OPT}_SUPPORTED)
  543. if(";${CPU_DISPATCH};" MATCHES ";${OPT};" AND NOT __is_from_baseline)
  544. list(APPEND CPU_DISPATCH_FINAL ${OPT})
  545. elseif(__is_from_baseline AND NOT __is_disabled)
  546. if(NOT ";${CPU_BASELINE_FINAL};" MATCHES ";${OPT};")
  547. list(APPEND CPU_BASELINE_FINAL ${OPT})
  548. endif()
  549. if(NOT CPU_${OPT}_ENABLED_DEFAULT) # Don't change compiler flags in 'detection' mode
  550. ocv_append_optimization_flag(CPU_BASELINE_FLAGS ${OPT})
  551. endif()
  552. endif()
  553. endif()
  554. endforeach()
  555. foreach(OPT ${CPU_BASELINE_REQUIRE})
  556. if(NOT ";${CPU_BASELINE_FINAL};" MATCHES ";${OPT};")
  557. message(SEND_ERROR "Required baseline optimization is not supported: ${OPT} (CPU_BASELINE_REQUIRE=${CPU_BASELINE_REQUIRE})")
  558. endif()
  559. endforeach()
  560. foreach(OPT ${CPU_BASELINE})
  561. if(OPT STREQUAL "DETECT" OR OPT STREQUAL "HOST" OR OPT STREQUAL "NATIVE")
  562. # nothing
  563. elseif(NOT ";${CPU_BASELINE_FINAL};" MATCHES ";${OPT};")
  564. message(STATUS "Optimization ${OPT} is not available, skipped")
  565. endif()
  566. endforeach()
  567. foreach(OPT ${CPU_DISPATCH_REQUIRE})
  568. if(";${CPU_DISPATCH_FINAL};" MATCHES ";${OPT};")
  569. # OK
  570. elseif(";${CPU_BASELINE_FINAL};" MATCHES ";${OPT};")
  571. message(SEND_ERROR "Dispatched optimization ${OPT} is in baseline list (CPU_DISPATCH_REQUIRE=${CPU_DISPATCH_REQUIRE})")
  572. else()
  573. message(SEND_ERROR "Required dispatch optimization is not supported: ${OPT} (CPU_DISPATCH_REQUIRE=${CPU_DISPATCH_REQUIRE})")
  574. endif()
  575. endforeach()
  576. foreach(OPT ${CPU_DISPATCH})
  577. if(";${CPU_DISPATCH_FINAL};" MATCHES ";${OPT};")
  578. # OK
  579. elseif(";${CPU_BASELINE_FINAL};" MATCHES ";${OPT};")
  580. # OK
  581. else()
  582. message(STATUS "Dispatch optimization ${OPT} is not available, skipped")
  583. endif()
  584. endforeach()
  585. #message(STATUS "CPU_BASELINE_FINAL=${CPU_BASELINE_FINAL}")
  586. #message(STATUS "CPU_DISPATCH_FINAL=${CPU_DISPATCH_FINAL}")
  587. #if(CPU_DISPATCH_FINAL AND NOT PYTHON_DEFAULT_EXECUTABLE)
  588. # message(FATAL_ERROR "Python is required for CPU dispatched optimization support")
  589. #endif()
  590. macro(ocv_compiler_optimization_options)
  591. set(__flags "${OPENCV_EXTRA_CXX_FLAGS} ${CPU_BASELINE_FLAGS}")
  592. if(NOT __flags STREQUAL CACHED_CPU_BASELINE_FLAGS)
  593. set(CACHED_CPU_BASELINE_FLAGS "${__flags}" CACHE INTERNAL "" FORCE)
  594. ocv_clear_vars(HAVE_CPU_BASELINE_FLAGS)
  595. endif()
  596. ocv_check_compiler_flag(CXX "${__flags}" HAVE_CPU_BASELINE_FLAGS)
  597. if(NOT HAVE_CPU_BASELINE_FLAGS)
  598. message(FATAL_ERROR "Compiler doesn't support baseline optimization flags: ${CPU_BASELINE_FLAGS}")
  599. endif()
  600. add_extra_compiler_option_force("${CPU_BASELINE_FLAGS}")
  601. foreach(OPT ${CPU_DISPATCH_FINAL})
  602. set(__dispatch_flags "")
  603. set(__dispatch_definitions "")
  604. set(__dispatch_opts "")
  605. set(__dispatch_opts_force "")
  606. foreach(OPT2 ${CPU_KNOWN_OPTIMIZATIONS})
  607. if(NOT CPU_${OPT2}_SUPPORTED)
  608. #continue()
  609. else()
  610. ocv_is_optimization_in_list(__is_from_baseline ${OPT2} ${CPU_BASELINE_FINAL})
  611. if(NOT __is_from_baseline)
  612. ocv_is_optimization_in_list(__is_active ${OPT2} ${OPT})
  613. if(__is_active)
  614. ocv_append_optimization_flag(__dispatch_flags ${OPT2})
  615. list(APPEND __dispatch_definitions "CV_CPU_COMPILE_${OPT2}=1")
  616. list(APPEND __dispatch_opts "${OPT2}")
  617. endif()
  618. ocv_is_optimization_in_force_list(__is_force ${OPT2} ${OPT})
  619. if(__is_force)
  620. list(APPEND __dispatch_opts_force "${OPT2}")
  621. endif()
  622. endif()
  623. endif()
  624. endforeach()
  625. set(__flags "${OPENCV_EXTRA_CXX_FLAGS} ${__dispatch_flags}")
  626. if(NOT __flags STREQUAL CACHED_CPU_DISPATCH_${OPT}_FLAGS)
  627. set(CACHED_CPU_DISPATCH_${OPT}_FLAGS "${__flags}" CACHE INTERNAL "" FORCE)
  628. ocv_clear_vars(HAVE_CPU_DISPATCH_FLAGS_${OPT})
  629. endif()
  630. ocv_check_compiler_flag(CXX "${__flags}" HAVE_CPU_DISPATCH_FLAGS_${OPT})
  631. if(NOT HAVE_CPU_DISPATCH_FLAGS_${OPT})
  632. message(FATAL_ERROR "Compiler doesn't support optimization flags for ${OPT} dispatch mode: ${__dispatch_flags}")
  633. endif()
  634. set(CPU_DISPATCH_FLAGS_${OPT} "${__dispatch_flags}")
  635. set(CPU_DISPATCH_DEFINITIONS_${OPT} "${__dispatch_definitions}")
  636. set(CPU_DISPATCH_${OPT}_INCLUDED "${__dispatch_opts}")
  637. set(CPU_DISPATCH_${OPT}_FORCED "${__dispatch_opts_force}")
  638. endforeach()
  639. if(ENABLE_POWERPC)
  640. add_extra_compiler_option("-mcpu=G3 -mtune=G5")
  641. endif()
  642. endmacro()
  643. macro(ocv_compiler_optimization_options_finalize)
  644. if((CV_GCC OR CV_CLANG OR CV_ICX) AND (X86 OR X86_64))
  645. if(NOT APPLE AND CMAKE_SIZEOF_VOID_P EQUAL 4)
  646. if(OPENCV_EXTRA_CXX_FLAGS MATCHES "-m(sse2|avx)")
  647. add_extra_compiler_option(-mfpmath=sse) # !! important - be on the same wave with x64 compilers
  648. else()
  649. add_extra_compiler_option(-mfpmath=387)
  650. endif()
  651. endif()
  652. endif()
  653. if(MSVC)
  654. # Generate Intrinsic Functions
  655. set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /Oi")
  656. endif(MSVC)
  657. endmacro()
  658. macro(ocv_compiler_optimization_process_sources SOURCES_VAR_NAME LIBS_VAR_NAME TARGET_BASE_NAME)
  659. set(__result "")
  660. set(__result_libs "")
  661. foreach(OPT ${CPU_DISPATCH_FINAL})
  662. set(__result_${OPT} "")
  663. endforeach()
  664. foreach(fname ${${SOURCES_VAR_NAME}})
  665. string(TOLOWER "${fname}" fname_LOWER)
  666. get_filename_component(fname_LOWER "${fname_LOWER}" NAME)
  667. if(fname_LOWER MATCHES ".+\\.([^\\.]*)\\.cpp$")
  668. string(TOUPPER "${CMAKE_MATCH_1}" OPT_)
  669. if(OPT_ MATCHES "(CUDA.*|DISPATCH.*|OCL)") # don't touch files like filename.cuda.cpp
  670. list(APPEND __result "${fname}")
  671. #continue()
  672. elseif(CV_DISABLE_OPTIMIZATION OR NOT CV_ENABLE_INTRINSICS)
  673. ocv_get_smart_file_name(fname_ "${fname}")
  674. message(STATUS "Excluding from source files list (optimization is disabled): ${fname_}")
  675. #continue()
  676. else()
  677. get_source_file_property(__definitions "${fname}" COMPILE_DEFINITIONS)
  678. if(__definitions)
  679. list(APPEND __definitions "CV_CPU_DISPATCH_MODE=${OPT_}")
  680. else()
  681. set(__definitions "CV_CPU_DISPATCH_MODE=${OPT_}")
  682. endif()
  683. set_source_files_properties("${fname}" PROPERTIES COMPILE_DEFINITIONS "${__definitions}")
  684. set(__opt_found 0)
  685. foreach(OPT ${CPU_BASELINE_FINAL})
  686. string(TOLOWER "${OPT}" OPT_LOWER)
  687. if(fname_LOWER MATCHES "\\.${OPT_LOWER}\\.cpp$")
  688. #message("${fname} BASELINE-${OPT}")
  689. set(__opt_found 1)
  690. list(APPEND __result_${OPT} "${fname}")
  691. break()
  692. endif()
  693. endforeach()
  694. foreach(OPT ${CPU_DISPATCH_FINAL})
  695. foreach(OPT2 ${CPU_DISPATCH_${OPT}_FORCED})
  696. string(TOLOWER "${OPT2}" OPT2_LOWER)
  697. if(fname_LOWER MATCHES "\\.${OPT2_LOWER}\\.cpp$")
  698. list(APPEND __result_${OPT} "${fname}")
  699. math(EXPR CPU_${OPT}_USAGE_COUNT "${CPU_${OPT}_USAGE_COUNT}+1")
  700. set(CPU_${OPT}_USAGE_COUNT "${CPU_${OPT}_USAGE_COUNT}" CACHE INTERNAL "" FORCE)
  701. #message("(${CPU_${OPT}_USAGE_COUNT})${fname} ${OPT}")
  702. #message(" ${CPU_DISPATCH_${OPT}_INCLUDED}")
  703. #message(" ${CPU_DISPATCH_DEFINITIONS_${OPT}}")
  704. #message(" ${CPU_DISPATCH_FLAGS_${OPT}}")
  705. set(__opt_found 1)
  706. break()
  707. endif()
  708. endforeach()
  709. if(__opt_found)
  710. set(__opt_found 1)
  711. break()
  712. endif()
  713. endforeach()
  714. if(NOT __opt_found)
  715. ocv_get_smart_file_name(fname_ "${fname}")
  716. message(STATUS "Excluding from source files list: ${fname_}")
  717. endif()
  718. endif()
  719. else()
  720. list(APPEND __result "${fname}")
  721. endif()
  722. endforeach()
  723. foreach(OPT ${CPU_BASELINE_FINAL} ${CPU_DISPATCH_FINAL})
  724. if(__result_${OPT})
  725. #message("${OPT}: ${__result_${OPT}}")
  726. if(CMAKE_GENERATOR MATCHES "^Visual"
  727. OR OPENCV_CMAKE_CPU_OPTIMIZATIONS_FORCE_TARGETS
  728. )
  729. # MSVS generator is not able to properly order compilation flags:
  730. # extra flags are added before common flags, so switching between optimizations doesn't work correctly
  731. # Also CMAKE_CXX_FLAGS doesn't work (it is directory-based, so add_subdirectory is required)
  732. add_library(${TARGET_BASE_NAME}_${OPT} OBJECT ${__result_${OPT}})
  733. ocv_append_dependant_targets(${TARGET_BASE_NAME} ${TARGET_BASE_NAME}_${OPT})
  734. set_target_properties(${TARGET_BASE_NAME}_${OPT} PROPERTIES COMPILE_DEFINITIONS "${CPU_DISPATCH_DEFINITIONS_${OPT}}")
  735. set_target_properties(${TARGET_BASE_NAME}_${OPT} PROPERTIES COMPILE_FLAGS "${CPU_DISPATCH_FLAGS_${OPT}}")
  736. target_include_directories(${TARGET_BASE_NAME}_${OPT} PRIVATE $<TARGET_PROPERTY:${TARGET_BASE_NAME},INCLUDE_DIRECTORIES>)
  737. #list(APPEND __result_libs ${TARGET_BASE_NAME}_${OPT})
  738. list(APPEND __result "$<TARGET_OBJECTS:${TARGET_BASE_NAME}_${OPT}>")
  739. if(ENABLE_SOLUTION_FOLDERS)
  740. set_target_properties(${TARGET_BASE_NAME}_${OPT} PROPERTIES FOLDER "dispatched")
  741. endif()
  742. else()
  743. foreach(fname ${__result_${OPT}})
  744. get_source_file_property(__definitions "${fname}" COMPILE_DEFINITIONS)
  745. if(__definitions)
  746. list(APPEND __definitions "${CPU_DISPATCH_DEFINITIONS_${OPT}}")
  747. else()
  748. set(__definitions "${CPU_DISPATCH_DEFINITIONS_${OPT}}")
  749. endif()
  750. set_source_files_properties("${fname}" PROPERTIES COMPILE_DEFINITIONS "${__definitions}")
  751. set_source_files_properties("${fname}" PROPERTIES COMPILE_FLAGS "${CPU_DISPATCH_FLAGS_${OPT}}")
  752. endforeach()
  753. list(APPEND __result ${__result_${OPT}})
  754. endif()
  755. endif()
  756. endforeach()
  757. set(${SOURCES_VAR_NAME} "${__result}")
  758. list(APPEND ${LIBS_VAR_NAME} ${__result_libs})
  759. endmacro()
  760. macro(ocv_compiler_optimization_fill_cpu_config)
  761. set(OPENCV_CPU_BASELINE_DEFINITIONS_CONFIGMAKE "")
  762. foreach(OPT ${CPU_BASELINE_FINAL})
  763. set(OPENCV_CPU_BASELINE_DEFINITIONS_CONFIGMAKE "${OPENCV_CPU_BASELINE_DEFINITIONS_CONFIGMAKE}
  764. #define CV_CPU_COMPILE_${OPT} 1
  765. #define CV_CPU_BASELINE_COMPILE_${OPT} 1
  766. ")
  767. endforeach()
  768. set(OPENCV_CPU_BASELINE_DEFINITIONS_CONFIGMAKE "${OPENCV_CPU_BASELINE_DEFINITIONS_CONFIGMAKE}
  769. #define CV_CPU_BASELINE_FEATURES 0 \\")
  770. foreach(OPT ${CPU_BASELINE_FINAL})
  771. if(NOT DEFINED CPU_${OPT}_FEATURE_ALIAS OR NOT "x${CPU_${OPT}_FEATURE_ALIAS}" STREQUAL "x")
  772. set(OPENCV_CPU_BASELINE_DEFINITIONS_CONFIGMAKE "${OPENCV_CPU_BASELINE_DEFINITIONS_CONFIGMAKE}
  773. , CV_CPU_${OPT} \\")
  774. endif()
  775. endforeach()
  776. set(OPENCV_CPU_BASELINE_DEFINITIONS_CONFIGMAKE "${OPENCV_CPU_BASELINE_DEFINITIONS_CONFIGMAKE}\n")
  777. set(__dispatch_modes "")
  778. foreach(OPT ${CPU_DISPATCH_FINAL})
  779. list(APPEND __dispatch_modes ${CPU_DISPATCH_${OPT}_FORCE} ${OPT})
  780. endforeach()
  781. list(REMOVE_DUPLICATES __dispatch_modes)
  782. foreach(OPT ${__dispatch_modes})
  783. set(OPENCV_CPU_DISPATCH_DEFINITIONS_CONFIGMAKE "${OPENCV_CPU_DISPATCH_DEFINITIONS_CONFIGMAKE}
  784. #define CV_CPU_DISPATCH_COMPILE_${OPT} 1")
  785. endforeach()
  786. set(OPENCV_CPU_DISPATCH_DEFINITIONS_CONFIGMAKE "${OPENCV_CPU_DISPATCH_DEFINITIONS_CONFIGMAKE}
  787. \n\n#define CV_CPU_DISPATCH_FEATURES 0 \\")
  788. foreach(OPT ${__dispatch_modes})
  789. if(NOT DEFINED CPU_${OPT}_FEATURE_ALIAS OR NOT "x${CPU_${OPT}_FEATURE_ALIAS}" STREQUAL "x")
  790. set(OPENCV_CPU_DISPATCH_DEFINITIONS_CONFIGMAKE "${OPENCV_CPU_DISPATCH_DEFINITIONS_CONFIGMAKE}
  791. , CV_CPU_${OPT} \\")
  792. endif()
  793. endforeach()
  794. set(OPENCV_CPU_DISPATCH_DEFINITIONS_CONFIGMAKE "${OPENCV_CPU_DISPATCH_DEFINITIONS_CONFIGMAKE}\n")
  795. set(OPENCV_CPU_CONTROL_DEFINITIONS_CONFIGMAKE "// AUTOGENERATED, DO NOT EDIT\n")
  796. foreach(OPT ${CPU_ALL_OPTIMIZATIONS})
  797. if(NOT DEFINED CPU_${OPT}_FEATURE_ALIAS OR NOT "x${CPU_${OPT}_FEATURE_ALIAS}" STREQUAL "x")
  798. set(OPENCV_CPU_CONTROL_DEFINITIONS_CONFIGMAKE "${OPENCV_CPU_CONTROL_DEFINITIONS_CONFIGMAKE}
  799. #if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_${OPT}
  800. # define CV_TRY_${OPT} 1
  801. # define CV_CPU_FORCE_${OPT} 1
  802. # define CV_CPU_HAS_SUPPORT_${OPT} 1
  803. # define CV_CPU_CALL_${OPT}(fn, args) return (cpu_baseline::fn args)
  804. # define CV_CPU_CALL_${OPT}_(fn, args) return (opt_${OPT}::fn args)
  805. #elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_${OPT}
  806. # define CV_TRY_${OPT} 1
  807. # define CV_CPU_FORCE_${OPT} 0
  808. # define CV_CPU_HAS_SUPPORT_${OPT} (cv::checkHardwareSupport(CV_CPU_${OPT}))
  809. # define CV_CPU_CALL_${OPT}(fn, args) if (CV_CPU_HAS_SUPPORT_${OPT}) return (opt_${OPT}::fn args)
  810. # define CV_CPU_CALL_${OPT}_(fn, args) if (CV_CPU_HAS_SUPPORT_${OPT}) return (opt_${OPT}::fn args)
  811. #else
  812. # define CV_TRY_${OPT} 0
  813. # define CV_CPU_FORCE_${OPT} 0
  814. # define CV_CPU_HAS_SUPPORT_${OPT} 0
  815. # define CV_CPU_CALL_${OPT}(fn, args)
  816. # define CV_CPU_CALL_${OPT}_(fn, args)
  817. #endif
  818. #define __CV_CPU_DISPATCH_CHAIN_${OPT}(fn, args, mode, ...) CV_CPU_CALL_${OPT}(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
  819. ")
  820. endif()
  821. endforeach()
  822. set(OPENCV_CPU_CONTROL_DEFINITIONS_CONFIGMAKE "${OPENCV_CPU_CONTROL_DEFINITIONS_CONFIGMAKE}
  823. #define CV_CPU_CALL_BASELINE(fn, args) return (cpu_baseline::fn args)
  824. #define __CV_CPU_DISPATCH_CHAIN_BASELINE(fn, args, mode, ...) CV_CPU_CALL_BASELINE(fn, args) /* last in sequence */
  825. ")
  826. set(__file "${OpenCV_SOURCE_DIR}/modules/core/include/opencv2/core/cv_cpu_helper.h")
  827. if(EXISTS "${__file}")
  828. file(READ "${__file}" __content)
  829. endif()
  830. if(__content STREQUAL OPENCV_CPU_CONTROL_DEFINITIONS_CONFIGMAKE)
  831. #message(STATUS "${__file} contains same content")
  832. else()
  833. file(WRITE "${__file}" "${OPENCV_CPU_CONTROL_DEFINITIONS_CONFIGMAKE}")
  834. message(WARNING "${__file} is updated")
  835. endif()
  836. endmacro()
  837. macro(__ocv_add_dispatched_file filename target_src_var src_directory dst_directory precomp_hpp optimizations_var)
  838. if(NOT OPENCV_INITIAL_PASS)
  839. set(__codestr "
  840. #include \"${src_directory}/${precomp_hpp}\"
  841. #include \"${src_directory}/${filename}.simd.hpp\"
  842. ")
  843. set(__declarations_str "#define CV_CPU_SIMD_FILENAME \"${src_directory}/${filename}.simd.hpp\"")
  844. set(__dispatch_modes "BASELINE")
  845. set(__optimizations "${${optimizations_var}}")
  846. if(CV_DISABLE_OPTIMIZATION OR NOT CV_ENABLE_INTRINSICS)
  847. set(__optimizations "")
  848. endif()
  849. foreach(OPT ${__optimizations})
  850. string(TOLOWER "${OPT}" OPT_LOWER)
  851. set(__file "${CMAKE_CURRENT_BINARY_DIR}/${dst_directory}${filename}.${OPT_LOWER}.cpp")
  852. if(EXISTS "${__file}")
  853. file(READ "${__file}" __content)
  854. else()
  855. set(__content "")
  856. endif()
  857. if(__content STREQUAL __codestr)
  858. #message(STATUS "${__file} contains up-to-date content")
  859. else()
  860. file(WRITE "${__file}" "${__codestr}")
  861. endif()
  862. if(";${CPU_DISPATCH_FINAL};" MATCHES "${OPT}" OR __CPU_DISPATCH_INCLUDE_ALL)
  863. if(EXISTS "${src_directory}/${filename}.${OPT_LOWER}.cpp")
  864. message(STATUS "Using overridden ${OPT} source: ${src_directory}/${filename}.${OPT_LOWER}.cpp")
  865. else()
  866. list(APPEND ${target_src_var} "${__file}")
  867. endif()
  868. set(__declarations_str "${__declarations_str}
  869. #define CV_CPU_DISPATCH_MODE ${OPT}
  870. #include \"opencv2/core/private/cv_cpu_include_simd_declarations.hpp\"
  871. ")
  872. set(__dispatch_modes "${OPT}, ${__dispatch_modes}")
  873. endif()
  874. endforeach()
  875. set(__declarations_str "${__declarations_str}
  876. #define CV_CPU_DISPATCH_MODES_ALL ${__dispatch_modes}
  877. #undef CV_CPU_SIMD_FILENAME
  878. ")
  879. set(__file "${CMAKE_CURRENT_BINARY_DIR}/${dst_directory}${filename}.simd_declarations.hpp")
  880. if(EXISTS "${__file}")
  881. file(READ "${__file}" __content)
  882. endif()
  883. if(__content STREQUAL __declarations_str)
  884. #message(STATUS "${__file} contains up-to-date content")
  885. else()
  886. file(WRITE "${__file}" "${__declarations_str}")
  887. endif()
  888. endif()
  889. endmacro()
  890. macro(ocv_add_dispatched_file filename)
  891. set(__optimizations "${ARGN}")
  892. if(" ${ARGV1}" STREQUAL " TEST")
  893. list(REMOVE_AT __optimizations 0)
  894. __ocv_add_dispatched_file("${filename}" "OPENCV_MODULE_${the_module}_TEST_SOURCES_DISPATCHED" "${CMAKE_CURRENT_LIST_DIR}/test" "test/" "test_precomp.hpp" __optimizations)
  895. else()
  896. __ocv_add_dispatched_file("${filename}" "OPENCV_MODULE_${the_module}_SOURCES_DISPATCHED" "${CMAKE_CURRENT_LIST_DIR}/src" "" "precomp.hpp" __optimizations)
  897. endif()
  898. endmacro()
  899. # Workaround to support code which always require all code paths
  900. macro(ocv_add_dispatched_file_force_all)
  901. set(__CPU_DISPATCH_INCLUDE_ALL 1)
  902. ocv_add_dispatched_file(${ARGN})
  903. unset(__CPU_DISPATCH_INCLUDE_ALL)
  904. endmacro()
  905. if(CV_DISABLE_OPTIMIZATION OR CV_ICC OR CX_ICX)
  906. ocv_update(CV_ENABLE_UNROLLED 0)
  907. else()
  908. ocv_update(CV_ENABLE_UNROLLED 1)
  909. endif()