slide_hash_sse2.c 1.7 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162
  1. /*
  2. * SSE optimized hash slide
  3. *
  4. * Copyright (C) 2017 Intel Corporation
  5. * Authors:
  6. * Arjan van de Ven <arjan@linux.intel.com>
  7. * Jim Kukunas <james.t.kukunas@linux.intel.com>
  8. *
  9. * For conditions of distribution and use, see copyright notice in zlib.h
  10. */
  11. #include "zbuild.h"
  12. #include "deflate.h"
  13. #include <immintrin.h>
  14. #include <assert.h>
  15. static inline void slide_hash_chain(Pos *table0, Pos *table1, uint32_t entries0,
  16. uint32_t entries1, const __m128i wsize) {
  17. uint32_t entries;
  18. Pos *table;
  19. __m128i value0, value1, result0, result1;
  20. int on_chain = 0;
  21. next_chain:
  22. table = (on_chain) ? table1 : table0;
  23. entries = (on_chain) ? entries1 : entries0;
  24. table += entries;
  25. table -= 16;
  26. /* ZALLOC allocates this pointer unless the user chose a custom allocator.
  27. * Our alloc function is aligned to 64 byte boundaries */
  28. do {
  29. value0 = _mm_load_si128((__m128i *)table);
  30. value1 = _mm_load_si128((__m128i *)(table + 8));
  31. result0 = _mm_subs_epu16(value0, wsize);
  32. result1 = _mm_subs_epu16(value1, wsize);
  33. _mm_store_si128((__m128i *)table, result0);
  34. _mm_store_si128((__m128i *)(table + 8), result1);
  35. table -= 16;
  36. entries -= 16;
  37. } while (entries > 0);
  38. ++on_chain;
  39. if (on_chain > 1) {
  40. return;
  41. } else {
  42. goto next_chain;
  43. }
  44. }
  45. Z_INTERNAL void slide_hash_sse2(deflate_state *s) {
  46. uint16_t wsize = (uint16_t)s->w_size;
  47. const __m128i xmm_wsize = _mm_set1_epi16((short)wsize);
  48. assert(((uintptr_t)s->head & 15) == 0);
  49. assert(((uintptr_t)s->prev & 15) == 0);
  50. slide_hash_chain(s->head, s->prev, HASH_SIZE, wsize, xmm_wsize);
  51. }