compare256_neon.c 1.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960
  1. /* compare256_neon.c - NEON version of compare256
  2. * Copyright (C) 2022 Nathan Moinvaziri
  3. * For conditions of distribution and use, see copyright notice in zlib.h
  4. */
  5. #include "zbuild.h"
  6. #include "zutil_p.h"
  7. #include "deflate.h"
  8. #include "fallback_builtins.h"
  9. #if defined(ARM_NEON) && defined(HAVE_BUILTIN_CTZLL)
  10. #include "neon_intrins.h"
  11. static inline uint32_t compare256_neon_static(const uint8_t *src0, const uint8_t *src1) {
  12. uint32_t len = 0;
  13. do {
  14. uint8x16_t a, b, cmp;
  15. uint64_t lane;
  16. a = vld1q_u8(src0);
  17. b = vld1q_u8(src1);
  18. cmp = veorq_u8(a, b);
  19. lane = vgetq_lane_u64(vreinterpretq_u64_u8(cmp), 0);
  20. if (lane) {
  21. uint32_t match_byte = (uint32_t)__builtin_ctzll(lane) / 8;
  22. return len + match_byte;
  23. }
  24. len += 8;
  25. lane = vgetq_lane_u64(vreinterpretq_u64_u8(cmp), 1);
  26. if (lane) {
  27. uint32_t match_byte = (uint32_t)__builtin_ctzll(lane) / 8;
  28. return len + match_byte;
  29. }
  30. len += 8;
  31. src0 += 16, src1 += 16;
  32. } while (len < 256);
  33. return 256;
  34. }
  35. Z_INTERNAL uint32_t compare256_neon(const uint8_t *src0, const uint8_t *src1) {
  36. return compare256_neon_static(src0, src1);
  37. }
  38. #define LONGEST_MATCH longest_match_neon
  39. #define COMPARE256 compare256_neon_static
  40. #include "match_tpl.h"
  41. #define LONGEST_MATCH_SLOW
  42. #define LONGEST_MATCH longest_match_slow_neon
  43. #define COMPARE256 compare256_neon_static
  44. #include "match_tpl.h"
  45. #endif