| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604 |
- # detect-intrinsics.cmake -- Detect compiler intrinsics support
- # Licensed under the Zlib license, see LICENSE.md for details
- macro(check_acle_compiler_flag)
- if(NOT NATIVEFLAG)
- if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
- check_c_compiler_flag("-march=armv8-a+crc" HAVE_MARCH_ARMV8_CRC)
- if(HAVE_MARCH_ARMV8_CRC)
- set(ACLEFLAG "-march=armv8-a+crc" CACHE INTERNAL "Compiler option to enable ACLE support")
- else()
- check_c_compiler_flag("-march=armv8-a+crc+simd" HAVE_MARCH_ARMV8_CRC_SIMD)
- if(HAVE_MARCH_ARMV8_CRC_SIMD)
- set(ACLEFLAG "-march=armv8-a+crc+simd" CACHE INTERNAL "Compiler option to enable ACLE support")
- endif()
- endif()
- endif()
- endif()
- # Check whether compiler supports ARMv8 CRC intrinsics
- set(CMAKE_REQUIRED_FLAGS "${ACLEFLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}")
- check_c_source_compiles(
- "#if defined(_MSC_VER)
- #include <intrin.h>
- #else
- #include <arm_acle.h>
- #endif
- unsigned int f(unsigned int a, unsigned int b) {
- return __crc32w(a, b);
- }
- int main(void) { return 0; }"
- HAVE_ACLE_FLAG
- )
- set(CMAKE_REQUIRED_FLAGS)
- endmacro()
- macro(check_armv6_compiler_flag)
- if(NOT NATIVEFLAG)
- if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
- check_c_compiler_flag("-march=armv6" HAVE_MARCH_ARMV6)
- if(HAVE_MARCH_ARMV6)
- set(ARMV6FLAG "-march=armv6" CACHE INTERNAL "Compiler option to enable ARMv6 support")
- endif()
- endif()
- endif()
- # Check whether compiler supports ARMv6 inline asm
- set(CMAKE_REQUIRED_FLAGS "${ARMV6FLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}")
- check_c_source_compiles(
- "unsigned int f(unsigned int a, unsigned int b) {
- unsigned int c;
- __asm__ __volatile__ ( \"uqsub16 %0, %1, %2\" : \"=r\" (c) : \"r\" (a), \"r\" (b) );
- return (int)c;
- }
- int main(void) { return f(1,2); }"
- HAVE_ARMV6_INLINE_ASM
- )
- # Check whether compiler supports ARMv6 intrinsics
- check_c_source_compiles(
- "#if defined(_MSC_VER)
- #include <intrin.h>
- #else
- #include <arm_acle.h>
- #endif
- unsigned int f(unsigned int a, unsigned int b) {
- #if defined(_MSC_VER)
- return _arm_uqsub16(a, b);
- #else
- return __uqsub16(a, b);
- #endif
- }
- int main(void) { return f(1,2); }"
- HAVE_ARMV6_INTRIN
- )
- set(CMAKE_REQUIRED_FLAGS)
- endmacro()
- macro(check_avx512_intrinsics)
- if(NOT NATIVEFLAG)
- if(CMAKE_C_COMPILER_ID MATCHES "Intel")
- if(CMAKE_HOST_UNIX OR APPLE)
- set(AVX512FLAG "-mavx512f -mavx512dq -mavx512bw -mavx512vl")
- else()
- set(AVX512FLAG "/arch:AVX512")
- endif()
- elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
- # For CPUs that can benefit from AVX512, it seems GCC generates suboptimal
- # instruction scheduling unless you specify a reasonable -mtune= target
- set(AVX512FLAG "-mavx512f -mavx512dq -mavx512bw -mavx512vl")
- if(NOT MSVC)
- check_c_compiler_flag("-mtune=cascadelake" HAVE_CASCADE_LAKE)
- if(HAVE_CASCADE_LAKE)
- set(AVX512FLAG "${AVX512FLAG} -mtune=cascadelake")
- else()
- set(AVX512FLAG "${AVX512FLAG} -mtune=skylake-avx512")
- endif()
- unset(HAVE_CASCADE_LAKE)
- endif()
- elseif(MSVC)
- set(AVX512FLAG "/arch:AVX512")
- endif()
- endif()
- # Check whether compiler supports AVX512 intrinsics
- set(CMAKE_REQUIRED_FLAGS "${AVX512FLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}")
- check_c_source_compiles(
- "#include <immintrin.h>
- __m512i f(__m512i y) {
- __m512i x = _mm512_set1_epi8(2);
- return _mm512_sub_epi8(x, y);
- }
- int main(void) { return 0; }"
- HAVE_AVX512_INTRIN
- )
- endmacro()
- macro(check_avx512vnni_intrinsics)
- if(NOT NATIVEFLAG)
- if(CMAKE_C_COMPILER_ID MATCHES "Intel")
- if(CMAKE_HOST_UNIX OR APPLE OR CMAKE_C_COMPILER_ID MATCHES "IntelLLVM")
- set(AVX512VNNIFLAG "-mavx512f -mavx512dq -mavx512bw -mavx512vl -mavx512vnni")
- else()
- set(AVX512VNNIFLAG "/arch:AVX512")
- endif()
- elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
- set(AVX512VNNIFLAG "-mavx512f -mavx512dq -mavx512bw -mavx512vl -mavx512vnni")
- if(NOT MSVC)
- check_c_compiler_flag("-mtune=cascadelake" HAVE_CASCADE_LAKE)
- if(HAVE_CASCADE_LAKE)
- set(AVX512VNNIFLAG "${AVX512VNNIFLAG} -mtune=cascadelake")
- else()
- set(AVX512VNNIFLAG "${AVX512VNNIFLAG} -mtune=skylake-avx512")
- endif()
- unset(HAVE_CASCADE_LAKE)
- endif()
- elseif(MSVC)
- set(AVX512VNNIFLAG "/arch:AVX512")
- endif()
- endif()
- # Check whether compiler supports AVX512vnni intrinsics
- set(CMAKE_REQUIRED_FLAGS "${AVX512VNNIFLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}")
- check_c_source_compiles(
- "#include <immintrin.h>
- __m512i f(__m512i x, __m512i y) {
- __m512i z = _mm512_setzero_epi32();
- return _mm512_dpbusd_epi32(z, x, y);
- }
- int main(void) { return 0; }"
- HAVE_AVX512VNNI_INTRIN
- )
- set(CMAKE_REQUIRED_FLAGS)
- endmacro()
- macro(check_avx2_intrinsics)
- if(NOT NATIVEFLAG)
- if(CMAKE_C_COMPILER_ID MATCHES "Intel")
- if(CMAKE_HOST_UNIX OR APPLE)
- set(AVX2FLAG "-mavx2")
- else()
- set(AVX2FLAG "/arch:AVX2")
- endif()
- elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
- set(AVX2FLAG "-mavx2")
- elseif(MSVC)
- set(AVX2FLAG "/arch:AVX2")
- endif()
- endif()
- # Check whether compiler supports AVX2 intrinics
- set(CMAKE_REQUIRED_FLAGS "${AVX2FLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}")
- check_c_source_compiles(
- "#include <immintrin.h>
- __m256i f(__m256i x) {
- const __m256i y = _mm256_set1_epi16(1);
- return _mm256_subs_epu16(x, y);
- }
- int main(void) { return 0; }"
- HAVE_AVX2_INTRIN
- )
- set(CMAKE_REQUIRED_FLAGS)
- endmacro()
- macro(check_neon_compiler_flag)
- if(NOT NATIVEFLAG)
- if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
- if("${ARCH}" MATCHES "aarch64")
- set(NEONFLAG "-march=armv8-a+simd")
- else()
- set(NEONFLAG "-mfpu=neon")
- endif()
- endif()
- endif()
- # Check whether compiler supports NEON flag
- set(CMAKE_REQUIRED_FLAGS "${NEONFLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}")
- check_c_source_compiles(
- "#if defined(_M_ARM64) || defined(_M_ARM64EC)
- # include <arm64_neon.h>
- #else
- # include <arm_neon.h>
- #endif
- int main() { return 0; }"
- NEON_AVAILABLE FAIL_REGEX "not supported")
- # Check whether compiler native flag is enough for NEON support
- # Some GCC versions don't enable FPU (vector unit) when using -march=native
- if(NEON_AVAILABLE AND NATIVEFLAG AND (NOT "${ARCH}" MATCHES "aarch64"))
- check_c_source_compiles(
- "#include <arm_neon.h>
- uint8x16_t f(uint8x16_t x, uint8x16_t y) {
- return vaddq_u8(x, y);
- }
- int main(int argc, char* argv[]) {
- uint8x16_t a = vdupq_n_u8(argc);
- uint8x16_t b = vdupq_n_u8(argc);
- uint8x16_t result = f(a, b);
- return result[0];
- }"
- ARM_NEON_SUPPORT_NATIVE
- )
- if(NOT ARM_NEON_SUPPORT_NATIVE)
- set(CMAKE_REQUIRED_FLAGS "${NATIVEFLAG} -mfpu=neon ${ZNOLTOFLAG}")
- check_c_source_compiles(
- "#include <arm_neon.h>
- uint8x16_t f(uint8x16_t x, uint8x16_t y) {
- return vaddq_u8(x, y);
- }
- int main(int argc, char* argv[]) {
- uint8x16_t a = vdupq_n_u8(argc);
- uint8x16_t b = vdupq_n_u8(argc);
- uint8x16_t result = f(a, b);
- return result[0];
- }"
- ARM_NEON_SUPPORT_NATIVE_MFPU
- )
- if(ARM_NEON_SUPPORT_NATIVE_MFPU)
- set(NEONFLAG "-mfpu=neon")
- else()
- # Remove local NEON_AVAILABLE variable and overwrite the cache
- unset(NEON_AVAILABLE)
- set(NEON_AVAILABLE "" CACHE INTERNAL "NEON support available" FORCE)
- endif()
- endif()
- endif()
- set(CMAKE_REQUIRED_FLAGS)
- endmacro()
- macro(check_neon_ld4_intrinsics)
- if(NOT NATIVEFLAG)
- if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
- if("${ARCH}" MATCHES "aarch64")
- set(NEONFLAG "-march=armv8-a+simd")
- else()
- set(NEONFLAG "-mfpu=neon")
- endif()
- endif()
- endif()
- # Check whether compiler supports loading 4 neon vecs into a register range
- set(CMAKE_REQUIRED_FLAGS "${NEONFLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}")
- check_c_source_compiles(
- "#if defined(_MSC_VER) && (defined(_M_ARM64) || defined(_M_ARM64EC))
- # include <arm64_neon.h>
- #else
- # include <arm_neon.h>
- #endif
- int32x4x4_t f(int var[16]) { return vld1q_s32_x4(var); }
- int main(void) { return 0; }"
- NEON_HAS_LD4)
- set(CMAKE_REQUIRED_FLAGS)
- endmacro()
- macro(check_pclmulqdq_intrinsics)
- if(NOT NATIVEFLAG)
- if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang" OR CMAKE_C_COMPILER_ID MATCHES "IntelLLVM")
- set(PCLMULFLAG "-mpclmul")
- endif()
- endif()
- # Check whether compiler supports PCLMULQDQ intrinsics
- if(NOT (APPLE AND "${ARCH}" MATCHES "i386"))
- # The pclmul code currently crashes on Mac in 32bit mode. Avoid for now.
- set(CMAKE_REQUIRED_FLAGS "${PCLMULFLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}")
- check_c_source_compiles(
- "#include <immintrin.h>
- #include <wmmintrin.h>
- __m128i f(__m128i a, __m128i b) { return _mm_clmulepi64_si128(a, b, 0x10); }
- int main(void) { return 0; }"
- HAVE_PCLMULQDQ_INTRIN
- )
- set(CMAKE_REQUIRED_FLAGS)
- else()
- set(HAVE_PCLMULQDQ_INTRIN OFF)
- endif()
- endmacro()
- macro(check_vpclmulqdq_intrinsics)
- if(NOT NATIVEFLAG)
- if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang" OR CMAKE_C_COMPILER_ID MATCHES "IntelLLVM")
- set(VPCLMULFLAG "-mvpclmulqdq -mavx512f")
- endif()
- endif()
- # Check whether compiler supports VPCLMULQDQ intrinsics
- if(NOT (APPLE AND "${ARCH}" MATCHES "i386"))
- set(CMAKE_REQUIRED_FLAGS "${VPCLMULFLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}")
- check_c_source_compiles(
- "#include <immintrin.h>
- #include <wmmintrin.h>
- __m512i f(__m512i a) {
- __m512i b = _mm512_setzero_si512();
- return _mm512_clmulepi64_epi128(a, b, 0x10);
- }
- int main(void) { return 0; }"
- HAVE_VPCLMULQDQ_INTRIN
- )
- set(CMAKE_REQUIRED_FLAGS)
- else()
- set(HAVE_VPCLMULQDQ_INTRIN OFF)
- endif()
- endmacro()
- macro(check_ppc_intrinsics)
- # Check if compiler supports AltiVec
- set(CMAKE_REQUIRED_FLAGS "-maltivec ${ZNOLTOFLAG}")
- check_c_source_compiles(
- "#include <altivec.h>
- int main(void)
- {
- vector int a = vec_splats(0);
- vector int b = vec_splats(0);
- a = vec_add(a, b);
- return 0;
- }"
- HAVE_ALTIVEC
- )
- set(CMAKE_REQUIRED_FLAGS)
- if(HAVE_ALTIVEC)
- set(PPCFLAGS "-maltivec")
- endif()
- set(CMAKE_REQUIRED_FLAGS "-maltivec -mno-vsx ${ZNOLTOFLAG}")
- check_c_source_compiles(
- "#include <altivec.h>
- int main(void)
- {
- vector int a = vec_splats(0);
- vector int b = vec_splats(0);
- a = vec_add(a, b);
- return 0;
- }"
- HAVE_NOVSX
- )
- set(CMAKE_REQUIRED_FLAGS)
- if(HAVE_NOVSX)
- set(PPCFLAGS "${PPCFLAGS} -mno-vsx")
- endif()
- # Check if we have what we need for AltiVec optimizations
- set(CMAKE_REQUIRED_FLAGS "${PPCFLAGS} ${NATIVEFLAG} ${ZNOLTOFLAG}")
- check_c_source_compiles(
- "#include <sys/auxv.h>
- #ifdef __FreeBSD__
- #include <machine/cpu.h>
- #endif
- int main() {
- #ifdef __FreeBSD__
- unsigned long hwcap;
- elf_aux_info(AT_HWCAP, &hwcap, sizeof(hwcap));
- return (hwcap & PPC_FEATURE_HAS_ALTIVEC);
- #else
- return (getauxval(AT_HWCAP) & PPC_FEATURE_HAS_ALTIVEC);
- #endif
- }"
- HAVE_VMX
- )
- set(CMAKE_REQUIRED_FLAGS)
- endmacro()
- macro(check_power8_intrinsics)
- if(NOT NATIVEFLAG)
- if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
- set(POWER8FLAG "-mcpu=power8")
- endif()
- endif()
- # Check if we have what we need for POWER8 optimizations
- set(CMAKE_REQUIRED_FLAGS "${POWER8FLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}")
- check_c_source_compiles(
- "#include <sys/auxv.h>
- #ifdef __FreeBSD__
- #include <machine/cpu.h>
- #endif
- int main() {
- #ifdef __FreeBSD__
- unsigned long hwcap;
- elf_aux_info(AT_HWCAP2, &hwcap, sizeof(hwcap));
- return (hwcap & PPC_FEATURE2_ARCH_2_07);
- #else
- return (getauxval(AT_HWCAP2) & PPC_FEATURE2_ARCH_2_07);
- #endif
- }"
- HAVE_POWER8_INTRIN
- )
- if(NOT HAVE_POWER8_INTRIN AND HAVE_LINUX_AUXVEC_H)
- check_c_source_compiles(
- "#include <sys/auxv.h>
- #include <linux/auxvec.h>
- int main() {
- return (getauxval(AT_HWCAP2) & PPC_FEATURE2_ARCH_2_07);
- }"
- HAVE_POWER8_INTRIN2
- )
- if(HAVE_POWER8_INTRIN2)
- set(POWER8_NEED_AUXVEC_H 1)
- set(HAVE_POWER8_INTRIN ${HAVE_POWER8_INTRIN2} CACHE INTERNAL "Have POWER8 intrinsics" FORCE)
- unset(HAVE_POWER8_INTRIN2 CACHE)
- endif()
- endif()
- set(CMAKE_REQUIRED_FLAGS)
- endmacro()
- macro(check_rvv_intrinsics)
- if(NOT NATIVEFLAG)
- if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
- set(RISCVFLAG "-march=rv64gcv")
- endif()
- endif()
- # Check whether compiler supports RVV
- set(CMAKE_REQUIRED_FLAGS "${RISCVFLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}")
- check_c_source_compiles(
- "#include <riscv_vector.h>
- int main() {
- return 0;
- }"
- HAVE_RVV_INTRIN
- )
- set(CMAKE_REQUIRED_FLAGS)
- endmacro()
- macro(check_s390_intrinsics)
- check_c_source_compiles(
- "#include <sys/auxv.h>
- #ifndef HWCAP_S390_VXRS
- #define HWCAP_S390_VXRS HWCAP_S390_VX
- #endif
- int main() {
- return (getauxval(AT_HWCAP) & HWCAP_S390_VXRS);
- }"
- HAVE_S390_INTRIN
- )
- endmacro()
- macro(check_power9_intrinsics)
- if(NOT NATIVEFLAG)
- if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
- set(POWER9FLAG "-mcpu=power9")
- endif()
- endif()
- # Check if we have what we need for POWER9 optimizations
- set(CMAKE_REQUIRED_FLAGS "${POWER9FLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}")
- check_c_source_compiles(
- "#include <sys/auxv.h>
- #ifdef __FreeBSD__
- #include <machine/cpu.h>
- #endif
- int main() {
- #ifdef __FreeBSD__
- unsigned long hwcap;
- elf_aux_info(AT_HWCAP2, &hwcap, sizeof(hwcap));
- return (hwcap & PPC_FEATURE2_ARCH_3_00);
- #else
- return (getauxval(AT_HWCAP2) & PPC_FEATURE2_ARCH_3_00);
- #endif
- }"
- HAVE_POWER9_INTRIN
- )
- if(NOT HAVE_POWER9_INTRIN AND HAVE_LINUX_AUXVEC_H)
- check_c_source_compiles(
- "#include <sys/auxv.h>
- #include <linux/auxvec.h>
- int main() {
- return (getauxval(AT_HWCAP2) & PPC_FEATURE2_ARCH_3_00);
- }"
- HAVE_POWER9_INTRIN2
- )
- if(HAVE_POWER9_INTRIN2)
- set(POWER9_NEED_AUXVEC_H 1)
- set(HAVE_POWER9_INTRIN ${HAVE_POWER9_INTRIN2} CACHE INTERNAL "Have POWER9 intrinsics" FORCE)
- unset(HAVE_POWER9_INTRIN2 CACHE)
- endif()
- endif()
- set(CMAKE_REQUIRED_FLAGS)
- endmacro()
- macro(check_sse2_intrinsics)
- if(NOT NATIVEFLAG)
- if(CMAKE_C_COMPILER_ID MATCHES "Intel")
- if(CMAKE_HOST_UNIX OR APPLE)
- set(SSE2FLAG "-msse2")
- else()
- set(SSE2FLAG "/arch:SSE2")
- endif()
- elseif(MSVC)
- if(NOT "${ARCH}" MATCHES "x86_64")
- set(SSE2FLAG "/arch:SSE2")
- endif()
- elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
- set(SSE2FLAG "-msse2")
- endif()
- endif()
- # Check whether compiler supports SSE2 intrinsics
- set(CMAKE_REQUIRED_FLAGS "${SSE2FLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}")
- check_c_source_compiles(
- "#include <immintrin.h>
- __m128i f(__m128i x, __m128i y) { return _mm_sad_epu8(x, y); }
- int main(void) { return 0; }"
- HAVE_SSE2_INTRIN
- )
- set(CMAKE_REQUIRED_FLAGS)
- endmacro()
- macro(check_ssse3_intrinsics)
- if(NOT NATIVEFLAG)
- if(CMAKE_C_COMPILER_ID MATCHES "Intel")
- if(CMAKE_HOST_UNIX OR APPLE)
- set(SSSE3FLAG "-mssse3")
- else()
- set(SSSE3FLAG "/arch:SSSE3")
- endif()
- elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
- set(SSSE3FLAG "-mssse3")
- endif()
- endif()
- # Check whether compiler supports SSSE3 intrinsics
- set(CMAKE_REQUIRED_FLAGS "${SSSE3FLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}")
- check_c_source_compiles(
- "#include <immintrin.h>
- __m128i f(__m128i u) {
- __m128i v = _mm_set1_epi32(1);
- return _mm_hadd_epi32(u, v);
- }
- int main(void) { return 0; }"
- HAVE_SSSE3_INTRIN
- )
- endmacro()
- macro(check_sse42_intrinsics)
- if(NOT NATIVEFLAG)
- if(CMAKE_C_COMPILER_ID MATCHES "Intel")
- if(CMAKE_HOST_UNIX OR APPLE)
- set(SSE42FLAG "-msse4.2")
- else()
- set(SSE42FLAG "/arch:SSE4.2")
- endif()
- elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
- set(SSE42FLAG "-msse4.2")
- endif()
- endif()
- # Check whether compiler supports SSE4.2 intrinsics
- set(CMAKE_REQUIRED_FLAGS "${SSE42FLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}")
- check_c_source_compiles(
- "#include <nmmintrin.h>
- unsigned int f(unsigned int a, unsigned int b) { return _mm_crc32_u32(a, b); }
- int main(void) { return 0; }"
- HAVE_SSE42_INTRIN
- )
- set(CMAKE_REQUIRED_FLAGS)
- endmacro()
- macro(check_vgfma_intrinsics)
- if(NOT NATIVEFLAG)
- set(VGFMAFLAG "-march=z13")
- if(CMAKE_C_COMPILER_ID MATCHES "GNU")
- set(VGFMAFLAG "${VGFMAFLAG} -mzarch")
- endif()
- if(CMAKE_C_COMPILER_ID MATCHES "Clang")
- set(VGFMAFLAG "${VGFMAFLAG} -fzvector")
- endif()
- endif()
- # Check whether compiler supports "VECTOR GALOIS FIELD MULTIPLY SUM AND ACCUMULATE" intrinsic
- set(CMAKE_REQUIRED_FLAGS "${VGFMAFLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}")
- check_c_source_compiles(
- "#include <vecintrin.h>
- int main(void) {
- unsigned long long a __attribute__((vector_size(16))) = { 0 };
- unsigned long long b __attribute__((vector_size(16))) = { 0 };
- unsigned char c __attribute__((vector_size(16))) = { 0 };
- c = vec_gfmsum_accum_128(a, b, c);
- return c[0];
- }"
- HAVE_VGFMA_INTRIN FAIL_REGEX "not supported")
- set(CMAKE_REQUIRED_FLAGS)
- endmacro()
- macro(check_xsave_intrinsics)
- if(NOT NATIVEFLAG AND NOT MSVC AND NOT CMAKE_C_COMPILER_ID MATCHES "Intel")
- set(XSAVEFLAG "-mxsave")
- endif()
- set(CMAKE_REQUIRED_FLAGS "${XSAVEFLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}")
- check_c_source_compiles(
- "#ifdef _MSC_VER
- # include <intrin.h>
- #elif __GNUC__ == 8 && __GNUC_MINOR__ > 1
- # include <xsaveintrin.h>
- #else
- # include <immintrin.h>
- #endif
- unsigned int f(unsigned int a) { return (int) _xgetbv(a); }
- int main(void) { return 0; }"
- HAVE_XSAVE_INTRIN FAIL_REGEX "not supported")
- set(CMAKE_REQUIRED_FLAGS)
- endmacro()
|