slide_hash_avx2.c 1.1 KB

123456789101112131415161718192021222324252627282930313233343536373839
  1. /*
  2. * AVX2 optimized hash slide, based on Intel's slide_sse implementation
  3. *
  4. * Copyright (C) 2017 Intel Corporation
  5. * Authors:
  6. * Arjan van de Ven <arjan@linux.intel.com>
  7. * Jim Kukunas <james.t.kukunas@linux.intel.com>
  8. * Mika T. Lindqvist <postmaster@raasu.org>
  9. *
  10. * For conditions of distribution and use, see copyright notice in zlib.h
  11. */
  12. #include "zbuild.h"
  13. #include "deflate.h"
  14. #include <immintrin.h>
  15. static inline void slide_hash_chain(Pos *table, uint32_t entries, const __m256i wsize) {
  16. table += entries;
  17. table -= 16;
  18. do {
  19. __m256i value, result;
  20. value = _mm256_loadu_si256((__m256i *)table);
  21. result = _mm256_subs_epu16(value, wsize);
  22. _mm256_storeu_si256((__m256i *)table, result);
  23. table -= 16;
  24. entries -= 16;
  25. } while (entries > 0);
  26. }
  27. Z_INTERNAL void slide_hash_avx2(deflate_state *s) {
  28. uint16_t wsize = (uint16_t)s->w_size;
  29. const __m256i ymm_wsize = _mm256_set1_epi16((short)wsize);
  30. slide_hash_chain(s->head, HASH_SIZE, ymm_wsize);
  31. slide_hash_chain(s->prev, wsize, ymm_wsize);
  32. }