chunkset_ssse3.c 2.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293
  1. /* chunkset_ssse3.c -- SSSE3 inline functions to copy small data chunks.
  2. * For conditions of distribution and use, see copyright notice in zlib.h
  3. */
  4. #include "zbuild.h"
  5. #if defined(X86_SSSE3)
  6. #include <immintrin.h>
  7. #include "../generic/chunk_permute_table.h"
  8. typedef __m128i chunk_t;
  9. #define CHUNK_SIZE 16
  10. #define HAVE_CHUNKMEMSET_2
  11. #define HAVE_CHUNKMEMSET_4
  12. #define HAVE_CHUNKMEMSET_8
  13. #define HAVE_CHUNK_MAG
  14. static const lut_rem_pair perm_idx_lut[13] = {
  15. {0, 1}, /* 3 */
  16. {0, 0}, /* don't care */
  17. {1 * 32, 1}, /* 5 */
  18. {2 * 32, 4}, /* 6 */
  19. {3 * 32, 2}, /* 7 */
  20. {0 * 32, 0}, /* don't care */
  21. {4 * 32, 7}, /* 9 */
  22. {5 * 32, 6}, /* 10 */
  23. {6 * 32, 5}, /* 11 */
  24. {7 * 32, 4}, /* 12 */
  25. {8 * 32, 3}, /* 13 */
  26. {9 * 32, 2}, /* 14 */
  27. {10 * 32, 1},/* 15 */
  28. };
  29. static inline void chunkmemset_2(uint8_t *from, chunk_t *chunk) {
  30. int16_t tmp;
  31. memcpy(&tmp, from, sizeof(tmp));
  32. *chunk = _mm_set1_epi16(tmp);
  33. }
  34. static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) {
  35. int32_t tmp;
  36. memcpy(&tmp, from, sizeof(tmp));
  37. *chunk = _mm_set1_epi32(tmp);
  38. }
  39. static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) {
  40. int64_t tmp;
  41. memcpy(&tmp, from, sizeof(tmp));
  42. *chunk = _mm_set1_epi64x(tmp);
  43. }
  44. static inline void loadchunk(uint8_t const *s, chunk_t *chunk) {
  45. *chunk = _mm_loadu_si128((__m128i *)s);
  46. }
  47. static inline void storechunk(uint8_t *out, chunk_t *chunk) {
  48. _mm_storeu_si128((__m128i *)out, *chunk);
  49. }
  50. static inline chunk_t GET_CHUNK_MAG(uint8_t *buf, uint32_t *chunk_rem, uint32_t dist) {
  51. lut_rem_pair lut_rem = perm_idx_lut[dist - 3];
  52. __m128i perm_vec, ret_vec;
  53. /* Important to note:
  54. * This is _not_ to subvert the memory sanitizer but to instead unpoison some
  55. * bytes we willingly and purposefully load uninitialized that we swizzle over
  56. * in a vector register, anyway. If what we assume is wrong about what is used,
  57. * the memory sanitizer will still usefully flag it */
  58. __msan_unpoison(buf + dist, 16 - dist);
  59. ret_vec = _mm_loadu_si128((__m128i*)buf);
  60. *chunk_rem = lut_rem.remval;
  61. perm_vec = _mm_load_si128((__m128i*)(permute_table + lut_rem.idx));
  62. ret_vec = _mm_shuffle_epi8(ret_vec, perm_vec);
  63. return ret_vec;
  64. }
  65. #define CHUNKSIZE chunksize_ssse3
  66. #define CHUNKMEMSET chunkmemset_ssse3
  67. #define CHUNKMEMSET_SAFE chunkmemset_safe_ssse3
  68. #define CHUNKCOPY chunkcopy_ssse3
  69. #define CHUNKUNROLL chunkunroll_ssse3
  70. #include "chunkset_tpl.h"
  71. #define INFLATE_FAST inflate_fast_ssse3
  72. #include "inffast_tpl.h"
  73. #endif