| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172 |
- /* x86_functions.h -- x86 implementations for arch-specific functions.
- * Copyright (C) 2013 Intel Corporation Jim Kukunas
- * For conditions of distribution and use, see copyright notice in zlib.h
- */
- #ifndef X86_FUNCTIONS_H_
- #define X86_FUNCTIONS_H_
- #ifdef X86_SSE2
- uint32_t chunksize_sse2(void);
- uint8_t* chunkmemset_safe_sse2(uint8_t *out, unsigned dist, unsigned len, unsigned left);
- # ifdef HAVE_BUILTIN_CTZ
- uint32_t compare256_sse2(const uint8_t *src0, const uint8_t *src1);
- uint32_t longest_match_sse2(deflate_state *const s, Pos cur_match);
- uint32_t longest_match_slow_sse2(deflate_state *const s, Pos cur_match);
- void slide_hash_sse2(deflate_state *s);
- # endif
- void inflate_fast_sse2(PREFIX3(stream)* strm, uint32_t start);
- #endif
- #ifdef X86_SSSE3
- uint32_t adler32_ssse3(uint32_t adler, const uint8_t *buf, size_t len);
- uint8_t* chunkmemset_safe_ssse3(uint8_t *out, unsigned dist, unsigned len, unsigned left);
- void inflate_fast_ssse3(PREFIX3(stream) *strm, uint32_t start);
- #endif
- #ifdef X86_SSE42
- uint32_t adler32_fold_copy_sse42(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
- #endif
- #ifdef X86_AVX2
- uint32_t adler32_avx2(uint32_t adler, const uint8_t *buf, size_t len);
- uint32_t adler32_fold_copy_avx2(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
- uint32_t chunksize_avx2(void);
- uint8_t* chunkmemset_safe_avx2(uint8_t *out, unsigned dist, unsigned len, unsigned left);
- # ifdef HAVE_BUILTIN_CTZ
- uint32_t compare256_avx2(const uint8_t *src0, const uint8_t *src1);
- uint32_t longest_match_avx2(deflate_state *const s, Pos cur_match);
- uint32_t longest_match_slow_avx2(deflate_state *const s, Pos cur_match);
- void slide_hash_avx2(deflate_state *s);
- # endif
- void inflate_fast_avx2(PREFIX3(stream)* strm, uint32_t start);
- #endif
- #ifdef X86_AVX512
- uint32_t adler32_avx512(uint32_t adler, const uint8_t *buf, size_t len);
- uint32_t adler32_fold_copy_avx512(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
- #endif
- #ifdef X86_AVX512VNNI
- uint32_t adler32_avx512_vnni(uint32_t adler, const uint8_t *buf, size_t len);
- uint32_t adler32_fold_copy_avx512_vnni(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
- #endif
- #ifdef X86_PCLMULQDQ_CRC
- uint32_t crc32_fold_pclmulqdq_reset(crc32_fold *crc);
- void crc32_fold_pclmulqdq_copy(crc32_fold *crc, uint8_t *dst, const uint8_t *src, size_t len);
- void crc32_fold_pclmulqdq(crc32_fold *crc, const uint8_t *src, size_t len, uint32_t init_crc);
- uint32_t crc32_fold_pclmulqdq_final(crc32_fold *crc);
- uint32_t crc32_pclmulqdq(uint32_t crc32, const uint8_t *buf, size_t len);
- #endif
- #ifdef X86_VPCLMULQDQ_CRC
- uint32_t crc32_fold_vpclmulqdq_reset(crc32_fold *crc);
- void crc32_fold_vpclmulqdq_copy(crc32_fold *crc, uint8_t *dst, const uint8_t *src, size_t len);
- void crc32_fold_vpclmulqdq(crc32_fold *crc, const uint8_t *src, size_t len, uint32_t init_crc);
- uint32_t crc32_fold_vpclmulqdq_final(crc32_fold *crc);
- uint32_t crc32_vpclmulqdq(uint32_t crc32, const uint8_t *buf, size_t len);
- #endif
- #ifdef DISABLE_RUNTIME_CPU_DETECTION
- // X86 - SSE2
- # if (defined(X86_SSE2) && defined(__SSE2__)) || defined(__x86_64__) || defined(_M_X64) || defined(X86_NOCHECK_SSE2)
- # undef native_chunkmemset_safe
- # define native_chunkmemset_safe chunkmemset_safe_sse2
- # undef native_chunksize
- # define native_chunksize chunksize_sse2
- # undef native_inflate_fast
- # define native_inflate_fast inflate_fast_sse2
- # undef native_slide_hash
- # define native_slide_hash slide_hash_sse2
- # ifdef HAVE_BUILTIN_CTZ
- # undef native_compare256
- # define native_compare256 compare256_sse2
- # undef native_longest_match
- # define native_longest_match longest_match_sse2
- # undef native_longest_match_slow
- # define native_longest_match_slow longest_match_slow_sse2
- # endif
- #endif
- // X86 - SSSE3
- # if defined(X86_SSSE3) && defined(__SSSE3__)
- # undef native_adler32
- # define native_adler32 adler32_ssse3
- # undef native_chunkmemset_safe
- # define native_chunkmemset_safe chunkmemset_safe_ssse3
- # undef native_inflate_fast
- # define native_inflate_fast inflate_fast_ssse3
- # endif
- // X86 - SSE4.2
- # if defined(X86_SSE42) && defined(__SSE4_2__)
- # undef native_adler32_fold_copy
- # define native_adler32_fold_copy adler32_fold_copy_sse42
- # endif
- // X86 - PCLMUL
- #if defined(X86_PCLMULQDQ_CRC) && defined(__PCLMUL__)
- # undef native_crc32
- # define native_crc32 crc32_pclmulqdq
- # undef native_crc32_fold
- # define native_crc32_fold crc32_fold_pclmulqdq
- # undef native_crc32_fold_copy
- # define native_crc32_fold_copy crc32_fold_pclmulqdq_copy
- # undef native_crc32_fold_final
- # define native_crc32_fold_final crc32_fold_pclmulqdq_final
- # undef native_crc32_fold_reset
- # define native_crc32_fold_reset crc32_fold_pclmulqdq_reset
- #endif
- // X86 - AVX
- # if defined(X86_AVX2) && defined(__AVX2__)
- # undef native_adler32
- # define native_adler32 adler32_avx2
- # undef native_adler32_fold_copy
- # define native_adler32_fold_copy adler32_fold_copy_avx2
- # undef native_chunkmemset_safe
- # define native_chunkmemset_safe chunkmemset_safe_avx2
- # undef native_chunksize
- # define native_chunksize chunksize_avx2
- # undef native_inflate_fast
- # define native_inflate_fast inflate_fast_avx2
- # undef native_slide_hash
- # define native_slide_hash slide_hash_avx2
- # ifdef HAVE_BUILTIN_CTZ
- # undef native_compare256
- # define native_compare256 compare256_avx2
- # undef native_longest_match
- # define native_longest_match longest_match_avx2
- # undef native_longest_match_slow
- # define native_longest_match_slow longest_match_slow_avx2
- # endif
- # endif
- // X86 - AVX512 (F,DQ,BW,Vl)
- # if defined(X86_AVX512) && defined(__AVX512F__) && defined(__AVX512DQ__) && defined(__AVX512BW__) && defined(__AVX512VL__)
- # undef native_adler32
- # define native_adler32 adler32_avx512
- # undef native_adler32_fold_copy
- # define native_adler32_fold_copy adler32_fold_copy_avx512
- // X86 - AVX512 (VNNI)
- # if defined(X86_AVX512VNNI) && defined(__AVX512VNNI__)
- # undef native_adler32
- # define native_adler32 adler32_avx512_vnni
- # undef native_adler32_fold_copy
- # define native_adler32_fold_copy adler32_fold_copy_avx512_vnni
- # endif
- // X86 - VPCLMULQDQ
- # if defined(__PCLMUL__) && defined(__AVX512F__) && defined(__VPCLMULQDQ__)
- # undef native_crc32
- # define native_crc32 crc32_vpclmulqdq
- # undef native_crc32_fold
- # define native_crc32_fold crc32_fold_vpclmulqdq
- # undef native_crc32_fold_copy
- # define native_crc32_fold_copy crc32_fold_vpclmulqdq_copy
- # undef native_crc32_fold_final
- # define native_crc32_fold_final crc32_fold_vpclmulqdq_final
- # undef native_crc32_fold_reset
- # define native_crc32_fold_reset crc32_fold_vpclmulqdq_reset
- # endif
- # endif
- #endif
- #endif /* X86_FUNCTIONS_H_ */
|