detect-intrinsics.cmake 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604
  1. # detect-intrinsics.cmake -- Detect compiler intrinsics support
  2. # Licensed under the Zlib license, see LICENSE.md for details
  3. macro(check_acle_compiler_flag)
  4. if(NOT NATIVEFLAG)
  5. if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
  6. check_c_compiler_flag("-march=armv8-a+crc" HAVE_MARCH_ARMV8_CRC)
  7. if(HAVE_MARCH_ARMV8_CRC)
  8. set(ACLEFLAG "-march=armv8-a+crc" CACHE INTERNAL "Compiler option to enable ACLE support")
  9. else()
  10. check_c_compiler_flag("-march=armv8-a+crc+simd" HAVE_MARCH_ARMV8_CRC_SIMD)
  11. if(HAVE_MARCH_ARMV8_CRC_SIMD)
  12. set(ACLEFLAG "-march=armv8-a+crc+simd" CACHE INTERNAL "Compiler option to enable ACLE support")
  13. endif()
  14. endif()
  15. endif()
  16. endif()
  17. # Check whether compiler supports ARMv8 CRC intrinsics
  18. set(CMAKE_REQUIRED_FLAGS "${ACLEFLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}")
  19. check_c_source_compiles(
  20. "#if defined(_MSC_VER)
  21. #include <intrin.h>
  22. #else
  23. #include <arm_acle.h>
  24. #endif
  25. unsigned int f(unsigned int a, unsigned int b) {
  26. return __crc32w(a, b);
  27. }
  28. int main(void) { return 0; }"
  29. HAVE_ACLE_FLAG
  30. )
  31. set(CMAKE_REQUIRED_FLAGS)
  32. endmacro()
  33. macro(check_armv6_compiler_flag)
  34. if(NOT NATIVEFLAG)
  35. if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
  36. check_c_compiler_flag("-march=armv6" HAVE_MARCH_ARMV6)
  37. if(HAVE_MARCH_ARMV6)
  38. set(ARMV6FLAG "-march=armv6" CACHE INTERNAL "Compiler option to enable ARMv6 support")
  39. endif()
  40. endif()
  41. endif()
  42. # Check whether compiler supports ARMv6 inline asm
  43. set(CMAKE_REQUIRED_FLAGS "${ARMV6FLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}")
  44. check_c_source_compiles(
  45. "unsigned int f(unsigned int a, unsigned int b) {
  46. unsigned int c;
  47. __asm__ __volatile__ ( \"uqsub16 %0, %1, %2\" : \"=r\" (c) : \"r\" (a), \"r\" (b) );
  48. return (int)c;
  49. }
  50. int main(void) { return f(1,2); }"
  51. HAVE_ARMV6_INLINE_ASM
  52. )
  53. # Check whether compiler supports ARMv6 intrinsics
  54. check_c_source_compiles(
  55. "#if defined(_MSC_VER)
  56. #include <intrin.h>
  57. #else
  58. #include <arm_acle.h>
  59. #endif
  60. unsigned int f(unsigned int a, unsigned int b) {
  61. #if defined(_MSC_VER)
  62. return _arm_uqsub16(a, b);
  63. #else
  64. return __uqsub16(a, b);
  65. #endif
  66. }
  67. int main(void) { return f(1,2); }"
  68. HAVE_ARMV6_INTRIN
  69. )
  70. set(CMAKE_REQUIRED_FLAGS)
  71. endmacro()
  72. macro(check_avx512_intrinsics)
  73. if(NOT NATIVEFLAG)
  74. if(CMAKE_C_COMPILER_ID MATCHES "Intel")
  75. if(CMAKE_HOST_UNIX OR APPLE)
  76. set(AVX512FLAG "-mavx512f -mavx512dq -mavx512bw -mavx512vl")
  77. else()
  78. set(AVX512FLAG "/arch:AVX512")
  79. endif()
  80. elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
  81. # For CPUs that can benefit from AVX512, it seems GCC generates suboptimal
  82. # instruction scheduling unless you specify a reasonable -mtune= target
  83. set(AVX512FLAG "-mavx512f -mavx512dq -mavx512bw -mavx512vl")
  84. if(NOT MSVC)
  85. check_c_compiler_flag("-mtune=cascadelake" HAVE_CASCADE_LAKE)
  86. if(HAVE_CASCADE_LAKE)
  87. set(AVX512FLAG "${AVX512FLAG} -mtune=cascadelake")
  88. else()
  89. set(AVX512FLAG "${AVX512FLAG} -mtune=skylake-avx512")
  90. endif()
  91. unset(HAVE_CASCADE_LAKE)
  92. endif()
  93. elseif(MSVC)
  94. set(AVX512FLAG "/arch:AVX512")
  95. endif()
  96. endif()
  97. # Check whether compiler supports AVX512 intrinsics
  98. set(CMAKE_REQUIRED_FLAGS "${AVX512FLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}")
  99. check_c_source_compiles(
  100. "#include <immintrin.h>
  101. __m512i f(__m512i y) {
  102. __m512i x = _mm512_set1_epi8(2);
  103. return _mm512_sub_epi8(x, y);
  104. }
  105. int main(void) { return 0; }"
  106. HAVE_AVX512_INTRIN
  107. )
  108. endmacro()
  109. macro(check_avx512vnni_intrinsics)
  110. if(NOT NATIVEFLAG)
  111. if(CMAKE_C_COMPILER_ID MATCHES "Intel")
  112. if(CMAKE_HOST_UNIX OR APPLE OR CMAKE_C_COMPILER_ID MATCHES "IntelLLVM")
  113. set(AVX512VNNIFLAG "-mavx512f -mavx512dq -mavx512bw -mavx512vl -mavx512vnni")
  114. else()
  115. set(AVX512VNNIFLAG "/arch:AVX512")
  116. endif()
  117. elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
  118. set(AVX512VNNIFLAG "-mavx512f -mavx512dq -mavx512bw -mavx512vl -mavx512vnni")
  119. if(NOT MSVC)
  120. check_c_compiler_flag("-mtune=cascadelake" HAVE_CASCADE_LAKE)
  121. if(HAVE_CASCADE_LAKE)
  122. set(AVX512VNNIFLAG "${AVX512VNNIFLAG} -mtune=cascadelake")
  123. else()
  124. set(AVX512VNNIFLAG "${AVX512VNNIFLAG} -mtune=skylake-avx512")
  125. endif()
  126. unset(HAVE_CASCADE_LAKE)
  127. endif()
  128. elseif(MSVC)
  129. set(AVX512VNNIFLAG "/arch:AVX512")
  130. endif()
  131. endif()
  132. # Check whether compiler supports AVX512vnni intrinsics
  133. set(CMAKE_REQUIRED_FLAGS "${AVX512VNNIFLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}")
  134. check_c_source_compiles(
  135. "#include <immintrin.h>
  136. __m512i f(__m512i x, __m512i y) {
  137. __m512i z = _mm512_setzero_epi32();
  138. return _mm512_dpbusd_epi32(z, x, y);
  139. }
  140. int main(void) { return 0; }"
  141. HAVE_AVX512VNNI_INTRIN
  142. )
  143. set(CMAKE_REQUIRED_FLAGS)
  144. endmacro()
  145. macro(check_avx2_intrinsics)
  146. if(NOT NATIVEFLAG)
  147. if(CMAKE_C_COMPILER_ID MATCHES "Intel")
  148. if(CMAKE_HOST_UNIX OR APPLE)
  149. set(AVX2FLAG "-mavx2")
  150. else()
  151. set(AVX2FLAG "/arch:AVX2")
  152. endif()
  153. elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
  154. set(AVX2FLAG "-mavx2")
  155. elseif(MSVC)
  156. set(AVX2FLAG "/arch:AVX2")
  157. endif()
  158. endif()
  159. # Check whether compiler supports AVX2 intrinics
  160. set(CMAKE_REQUIRED_FLAGS "${AVX2FLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}")
  161. check_c_source_compiles(
  162. "#include <immintrin.h>
  163. __m256i f(__m256i x) {
  164. const __m256i y = _mm256_set1_epi16(1);
  165. return _mm256_subs_epu16(x, y);
  166. }
  167. int main(void) { return 0; }"
  168. HAVE_AVX2_INTRIN
  169. )
  170. set(CMAKE_REQUIRED_FLAGS)
  171. endmacro()
  172. macro(check_neon_compiler_flag)
  173. if(NOT NATIVEFLAG)
  174. if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
  175. if("${ARCH}" MATCHES "aarch64")
  176. set(NEONFLAG "-march=armv8-a+simd")
  177. else()
  178. set(NEONFLAG "-mfpu=neon")
  179. endif()
  180. endif()
  181. endif()
  182. # Check whether compiler supports NEON flag
  183. set(CMAKE_REQUIRED_FLAGS "${NEONFLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}")
  184. check_c_source_compiles(
  185. "#if defined(_M_ARM64) || defined(_M_ARM64EC)
  186. # include <arm64_neon.h>
  187. #else
  188. # include <arm_neon.h>
  189. #endif
  190. int main() { return 0; }"
  191. NEON_AVAILABLE FAIL_REGEX "not supported")
  192. # Check whether compiler native flag is enough for NEON support
  193. # Some GCC versions don't enable FPU (vector unit) when using -march=native
  194. if(NEON_AVAILABLE AND NATIVEFLAG AND (NOT "${ARCH}" MATCHES "aarch64"))
  195. check_c_source_compiles(
  196. "#include <arm_neon.h>
  197. uint8x16_t f(uint8x16_t x, uint8x16_t y) {
  198. return vaddq_u8(x, y);
  199. }
  200. int main(int argc, char* argv[]) {
  201. uint8x16_t a = vdupq_n_u8(argc);
  202. uint8x16_t b = vdupq_n_u8(argc);
  203. uint8x16_t result = f(a, b);
  204. return result[0];
  205. }"
  206. ARM_NEON_SUPPORT_NATIVE
  207. )
  208. if(NOT ARM_NEON_SUPPORT_NATIVE)
  209. set(CMAKE_REQUIRED_FLAGS "${NATIVEFLAG} -mfpu=neon ${ZNOLTOFLAG}")
  210. check_c_source_compiles(
  211. "#include <arm_neon.h>
  212. uint8x16_t f(uint8x16_t x, uint8x16_t y) {
  213. return vaddq_u8(x, y);
  214. }
  215. int main(int argc, char* argv[]) {
  216. uint8x16_t a = vdupq_n_u8(argc);
  217. uint8x16_t b = vdupq_n_u8(argc);
  218. uint8x16_t result = f(a, b);
  219. return result[0];
  220. }"
  221. ARM_NEON_SUPPORT_NATIVE_MFPU
  222. )
  223. if(ARM_NEON_SUPPORT_NATIVE_MFPU)
  224. set(NEONFLAG "-mfpu=neon")
  225. else()
  226. # Remove local NEON_AVAILABLE variable and overwrite the cache
  227. unset(NEON_AVAILABLE)
  228. set(NEON_AVAILABLE "" CACHE INTERNAL "NEON support available" FORCE)
  229. endif()
  230. endif()
  231. endif()
  232. set(CMAKE_REQUIRED_FLAGS)
  233. endmacro()
  234. macro(check_neon_ld4_intrinsics)
  235. if(NOT NATIVEFLAG)
  236. if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
  237. if("${ARCH}" MATCHES "aarch64")
  238. set(NEONFLAG "-march=armv8-a+simd")
  239. else()
  240. set(NEONFLAG "-mfpu=neon")
  241. endif()
  242. endif()
  243. endif()
  244. # Check whether compiler supports loading 4 neon vecs into a register range
  245. set(CMAKE_REQUIRED_FLAGS "${NEONFLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}")
  246. check_c_source_compiles(
  247. "#if defined(_MSC_VER) && (defined(_M_ARM64) || defined(_M_ARM64EC))
  248. # include <arm64_neon.h>
  249. #else
  250. # include <arm_neon.h>
  251. #endif
  252. int32x4x4_t f(int var[16]) { return vld1q_s32_x4(var); }
  253. int main(void) { return 0; }"
  254. NEON_HAS_LD4)
  255. set(CMAKE_REQUIRED_FLAGS)
  256. endmacro()
  257. macro(check_pclmulqdq_intrinsics)
  258. if(NOT NATIVEFLAG)
  259. if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang" OR CMAKE_C_COMPILER_ID MATCHES "IntelLLVM")
  260. set(PCLMULFLAG "-mpclmul")
  261. endif()
  262. endif()
  263. # Check whether compiler supports PCLMULQDQ intrinsics
  264. if(NOT (APPLE AND "${ARCH}" MATCHES "i386"))
  265. # The pclmul code currently crashes on Mac in 32bit mode. Avoid for now.
  266. set(CMAKE_REQUIRED_FLAGS "${PCLMULFLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}")
  267. check_c_source_compiles(
  268. "#include <immintrin.h>
  269. #include <wmmintrin.h>
  270. __m128i f(__m128i a, __m128i b) { return _mm_clmulepi64_si128(a, b, 0x10); }
  271. int main(void) { return 0; }"
  272. HAVE_PCLMULQDQ_INTRIN
  273. )
  274. set(CMAKE_REQUIRED_FLAGS)
  275. else()
  276. set(HAVE_PCLMULQDQ_INTRIN OFF)
  277. endif()
  278. endmacro()
  279. macro(check_vpclmulqdq_intrinsics)
  280. if(NOT NATIVEFLAG)
  281. if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang" OR CMAKE_C_COMPILER_ID MATCHES "IntelLLVM")
  282. set(VPCLMULFLAG "-mvpclmulqdq -mavx512f")
  283. endif()
  284. endif()
  285. # Check whether compiler supports VPCLMULQDQ intrinsics
  286. if(NOT (APPLE AND "${ARCH}" MATCHES "i386"))
  287. set(CMAKE_REQUIRED_FLAGS "${VPCLMULFLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}")
  288. check_c_source_compiles(
  289. "#include <immintrin.h>
  290. #include <wmmintrin.h>
  291. __m512i f(__m512i a) {
  292. __m512i b = _mm512_setzero_si512();
  293. return _mm512_clmulepi64_epi128(a, b, 0x10);
  294. }
  295. int main(void) { return 0; }"
  296. HAVE_VPCLMULQDQ_INTRIN
  297. )
  298. set(CMAKE_REQUIRED_FLAGS)
  299. else()
  300. set(HAVE_VPCLMULQDQ_INTRIN OFF)
  301. endif()
  302. endmacro()
  303. macro(check_ppc_intrinsics)
  304. # Check if compiler supports AltiVec
  305. set(CMAKE_REQUIRED_FLAGS "-maltivec ${ZNOLTOFLAG}")
  306. check_c_source_compiles(
  307. "#include <altivec.h>
  308. int main(void)
  309. {
  310. vector int a = vec_splats(0);
  311. vector int b = vec_splats(0);
  312. a = vec_add(a, b);
  313. return 0;
  314. }"
  315. HAVE_ALTIVEC
  316. )
  317. set(CMAKE_REQUIRED_FLAGS)
  318. if(HAVE_ALTIVEC)
  319. set(PPCFLAGS "-maltivec")
  320. endif()
  321. set(CMAKE_REQUIRED_FLAGS "-maltivec -mno-vsx ${ZNOLTOFLAG}")
  322. check_c_source_compiles(
  323. "#include <altivec.h>
  324. int main(void)
  325. {
  326. vector int a = vec_splats(0);
  327. vector int b = vec_splats(0);
  328. a = vec_add(a, b);
  329. return 0;
  330. }"
  331. HAVE_NOVSX
  332. )
  333. set(CMAKE_REQUIRED_FLAGS)
  334. if(HAVE_NOVSX)
  335. set(PPCFLAGS "${PPCFLAGS} -mno-vsx")
  336. endif()
  337. # Check if we have what we need for AltiVec optimizations
  338. set(CMAKE_REQUIRED_FLAGS "${PPCFLAGS} ${NATIVEFLAG} ${ZNOLTOFLAG}")
  339. check_c_source_compiles(
  340. "#include <sys/auxv.h>
  341. #ifdef __FreeBSD__
  342. #include <machine/cpu.h>
  343. #endif
  344. int main() {
  345. #ifdef __FreeBSD__
  346. unsigned long hwcap;
  347. elf_aux_info(AT_HWCAP, &hwcap, sizeof(hwcap));
  348. return (hwcap & PPC_FEATURE_HAS_ALTIVEC);
  349. #else
  350. return (getauxval(AT_HWCAP) & PPC_FEATURE_HAS_ALTIVEC);
  351. #endif
  352. }"
  353. HAVE_VMX
  354. )
  355. set(CMAKE_REQUIRED_FLAGS)
  356. endmacro()
  357. macro(check_power8_intrinsics)
  358. if(NOT NATIVEFLAG)
  359. if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
  360. set(POWER8FLAG "-mcpu=power8")
  361. endif()
  362. endif()
  363. # Check if we have what we need for POWER8 optimizations
  364. set(CMAKE_REQUIRED_FLAGS "${POWER8FLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}")
  365. check_c_source_compiles(
  366. "#include <sys/auxv.h>
  367. #ifdef __FreeBSD__
  368. #include <machine/cpu.h>
  369. #endif
  370. int main() {
  371. #ifdef __FreeBSD__
  372. unsigned long hwcap;
  373. elf_aux_info(AT_HWCAP2, &hwcap, sizeof(hwcap));
  374. return (hwcap & PPC_FEATURE2_ARCH_2_07);
  375. #else
  376. return (getauxval(AT_HWCAP2) & PPC_FEATURE2_ARCH_2_07);
  377. #endif
  378. }"
  379. HAVE_POWER8_INTRIN
  380. )
  381. if(NOT HAVE_POWER8_INTRIN AND HAVE_LINUX_AUXVEC_H)
  382. check_c_source_compiles(
  383. "#include <sys/auxv.h>
  384. #include <linux/auxvec.h>
  385. int main() {
  386. return (getauxval(AT_HWCAP2) & PPC_FEATURE2_ARCH_2_07);
  387. }"
  388. HAVE_POWER8_INTRIN2
  389. )
  390. if(HAVE_POWER8_INTRIN2)
  391. set(POWER8_NEED_AUXVEC_H 1)
  392. set(HAVE_POWER8_INTRIN ${HAVE_POWER8_INTRIN2} CACHE INTERNAL "Have POWER8 intrinsics" FORCE)
  393. unset(HAVE_POWER8_INTRIN2 CACHE)
  394. endif()
  395. endif()
  396. set(CMAKE_REQUIRED_FLAGS)
  397. endmacro()
  398. macro(check_rvv_intrinsics)
  399. if(NOT NATIVEFLAG)
  400. if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
  401. set(RISCVFLAG "-march=rv64gcv")
  402. endif()
  403. endif()
  404. # Check whether compiler supports RVV
  405. set(CMAKE_REQUIRED_FLAGS "${RISCVFLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}")
  406. check_c_source_compiles(
  407. "#include <riscv_vector.h>
  408. int main() {
  409. return 0;
  410. }"
  411. HAVE_RVV_INTRIN
  412. )
  413. set(CMAKE_REQUIRED_FLAGS)
  414. endmacro()
  415. macro(check_s390_intrinsics)
  416. check_c_source_compiles(
  417. "#include <sys/auxv.h>
  418. #ifndef HWCAP_S390_VXRS
  419. #define HWCAP_S390_VXRS HWCAP_S390_VX
  420. #endif
  421. int main() {
  422. return (getauxval(AT_HWCAP) & HWCAP_S390_VXRS);
  423. }"
  424. HAVE_S390_INTRIN
  425. )
  426. endmacro()
  427. macro(check_power9_intrinsics)
  428. if(NOT NATIVEFLAG)
  429. if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
  430. set(POWER9FLAG "-mcpu=power9")
  431. endif()
  432. endif()
  433. # Check if we have what we need for POWER9 optimizations
  434. set(CMAKE_REQUIRED_FLAGS "${POWER9FLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}")
  435. check_c_source_compiles(
  436. "#include <sys/auxv.h>
  437. #ifdef __FreeBSD__
  438. #include <machine/cpu.h>
  439. #endif
  440. int main() {
  441. #ifdef __FreeBSD__
  442. unsigned long hwcap;
  443. elf_aux_info(AT_HWCAP2, &hwcap, sizeof(hwcap));
  444. return (hwcap & PPC_FEATURE2_ARCH_3_00);
  445. #else
  446. return (getauxval(AT_HWCAP2) & PPC_FEATURE2_ARCH_3_00);
  447. #endif
  448. }"
  449. HAVE_POWER9_INTRIN
  450. )
  451. if(NOT HAVE_POWER9_INTRIN AND HAVE_LINUX_AUXVEC_H)
  452. check_c_source_compiles(
  453. "#include <sys/auxv.h>
  454. #include <linux/auxvec.h>
  455. int main() {
  456. return (getauxval(AT_HWCAP2) & PPC_FEATURE2_ARCH_3_00);
  457. }"
  458. HAVE_POWER9_INTRIN2
  459. )
  460. if(HAVE_POWER9_INTRIN2)
  461. set(POWER9_NEED_AUXVEC_H 1)
  462. set(HAVE_POWER9_INTRIN ${HAVE_POWER9_INTRIN2} CACHE INTERNAL "Have POWER9 intrinsics" FORCE)
  463. unset(HAVE_POWER9_INTRIN2 CACHE)
  464. endif()
  465. endif()
  466. set(CMAKE_REQUIRED_FLAGS)
  467. endmacro()
  468. macro(check_sse2_intrinsics)
  469. if(NOT NATIVEFLAG)
  470. if(CMAKE_C_COMPILER_ID MATCHES "Intel")
  471. if(CMAKE_HOST_UNIX OR APPLE)
  472. set(SSE2FLAG "-msse2")
  473. else()
  474. set(SSE2FLAG "/arch:SSE2")
  475. endif()
  476. elseif(MSVC)
  477. if(NOT "${ARCH}" MATCHES "x86_64")
  478. set(SSE2FLAG "/arch:SSE2")
  479. endif()
  480. elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
  481. set(SSE2FLAG "-msse2")
  482. endif()
  483. endif()
  484. # Check whether compiler supports SSE2 intrinsics
  485. set(CMAKE_REQUIRED_FLAGS "${SSE2FLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}")
  486. check_c_source_compiles(
  487. "#include <immintrin.h>
  488. __m128i f(__m128i x, __m128i y) { return _mm_sad_epu8(x, y); }
  489. int main(void) { return 0; }"
  490. HAVE_SSE2_INTRIN
  491. )
  492. set(CMAKE_REQUIRED_FLAGS)
  493. endmacro()
  494. macro(check_ssse3_intrinsics)
  495. if(NOT NATIVEFLAG)
  496. if(CMAKE_C_COMPILER_ID MATCHES "Intel")
  497. if(CMAKE_HOST_UNIX OR APPLE)
  498. set(SSSE3FLAG "-mssse3")
  499. else()
  500. set(SSSE3FLAG "/arch:SSSE3")
  501. endif()
  502. elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
  503. set(SSSE3FLAG "-mssse3")
  504. endif()
  505. endif()
  506. # Check whether compiler supports SSSE3 intrinsics
  507. set(CMAKE_REQUIRED_FLAGS "${SSSE3FLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}")
  508. check_c_source_compiles(
  509. "#include <immintrin.h>
  510. __m128i f(__m128i u) {
  511. __m128i v = _mm_set1_epi32(1);
  512. return _mm_hadd_epi32(u, v);
  513. }
  514. int main(void) { return 0; }"
  515. HAVE_SSSE3_INTRIN
  516. )
  517. endmacro()
  518. macro(check_sse42_intrinsics)
  519. if(NOT NATIVEFLAG)
  520. if(CMAKE_C_COMPILER_ID MATCHES "Intel")
  521. if(CMAKE_HOST_UNIX OR APPLE)
  522. set(SSE42FLAG "-msse4.2")
  523. else()
  524. set(SSE42FLAG "/arch:SSE4.2")
  525. endif()
  526. elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
  527. set(SSE42FLAG "-msse4.2")
  528. endif()
  529. endif()
  530. # Check whether compiler supports SSE4.2 intrinsics
  531. set(CMAKE_REQUIRED_FLAGS "${SSE42FLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}")
  532. check_c_source_compiles(
  533. "#include <nmmintrin.h>
  534. unsigned int f(unsigned int a, unsigned int b) { return _mm_crc32_u32(a, b); }
  535. int main(void) { return 0; }"
  536. HAVE_SSE42_INTRIN
  537. )
  538. set(CMAKE_REQUIRED_FLAGS)
  539. endmacro()
  540. macro(check_vgfma_intrinsics)
  541. if(NOT NATIVEFLAG)
  542. set(VGFMAFLAG "-march=z13")
  543. if(CMAKE_C_COMPILER_ID MATCHES "GNU")
  544. set(VGFMAFLAG "${VGFMAFLAG} -mzarch")
  545. endif()
  546. if(CMAKE_C_COMPILER_ID MATCHES "Clang")
  547. set(VGFMAFLAG "${VGFMAFLAG} -fzvector")
  548. endif()
  549. endif()
  550. # Check whether compiler supports "VECTOR GALOIS FIELD MULTIPLY SUM AND ACCUMULATE" intrinsic
  551. set(CMAKE_REQUIRED_FLAGS "${VGFMAFLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}")
  552. check_c_source_compiles(
  553. "#include <vecintrin.h>
  554. int main(void) {
  555. unsigned long long a __attribute__((vector_size(16))) = { 0 };
  556. unsigned long long b __attribute__((vector_size(16))) = { 0 };
  557. unsigned char c __attribute__((vector_size(16))) = { 0 };
  558. c = vec_gfmsum_accum_128(a, b, c);
  559. return c[0];
  560. }"
  561. HAVE_VGFMA_INTRIN FAIL_REGEX "not supported")
  562. set(CMAKE_REQUIRED_FLAGS)
  563. endmacro()
  564. macro(check_xsave_intrinsics)
  565. if(NOT NATIVEFLAG AND NOT MSVC AND NOT CMAKE_C_COMPILER_ID MATCHES "Intel")
  566. set(XSAVEFLAG "-mxsave")
  567. endif()
  568. set(CMAKE_REQUIRED_FLAGS "${XSAVEFLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}")
  569. check_c_source_compiles(
  570. "#ifdef _MSC_VER
  571. # include <intrin.h>
  572. #elif __GNUC__ == 8 && __GNUC_MINOR__ > 1
  573. # include <xsaveintrin.h>
  574. #else
  575. # include <immintrin.h>
  576. #endif
  577. unsigned int f(unsigned int a) { return (int) _xgetbv(a); }
  578. int main(void) { return 0; }"
  579. HAVE_XSAVE_INTRIN FAIL_REGEX "not supported")
  580. set(CMAKE_REQUIRED_FLAGS)
  581. endmacro()