chunkset_sse2.c 1.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556
  1. /* chunkset_sse2.c -- SSE2 inline functions to copy small data chunks.
  2. * For conditions of distribution and use, see copyright notice in zlib.h
  3. */
  4. #include "zbuild.h"
  5. #ifdef X86_SSE2
  6. #include <immintrin.h>
  7. typedef __m128i chunk_t;
  8. #define CHUNK_SIZE 16
  9. #define HAVE_CHUNKMEMSET_2
  10. #define HAVE_CHUNKMEMSET_4
  11. #define HAVE_CHUNKMEMSET_8
  12. static inline void chunkmemset_2(uint8_t *from, chunk_t *chunk) {
  13. int16_t tmp;
  14. memcpy(&tmp, from, sizeof(tmp));
  15. *chunk = _mm_set1_epi16(tmp);
  16. }
  17. static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) {
  18. int32_t tmp;
  19. memcpy(&tmp, from, sizeof(tmp));
  20. *chunk = _mm_set1_epi32(tmp);
  21. }
  22. static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) {
  23. int64_t tmp;
  24. memcpy(&tmp, from, sizeof(tmp));
  25. *chunk = _mm_set1_epi64x(tmp);
  26. }
  27. static inline void loadchunk(uint8_t const *s, chunk_t *chunk) {
  28. *chunk = _mm_loadu_si128((__m128i *)s);
  29. }
  30. static inline void storechunk(uint8_t *out, chunk_t *chunk) {
  31. _mm_storeu_si128((__m128i *)out, *chunk);
  32. }
  33. #define CHUNKSIZE chunksize_sse2
  34. #define CHUNKCOPY chunkcopy_sse2
  35. #define CHUNKUNROLL chunkunroll_sse2
  36. #define CHUNKMEMSET chunkmemset_sse2
  37. #define CHUNKMEMSET_SAFE chunkmemset_safe_sse2
  38. #include "chunkset_tpl.h"
  39. #define INFLATE_FAST inflate_fast_sse2
  40. #include "inffast_tpl.h"
  41. #endif