compare256_power9.c 2.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566
  1. /* compare256_power9.c - Power9 version of compare256
  2. * Copyright (C) 2019 Matheus Castanho <msc@linux.ibm.com>, IBM
  3. * For conditions of distribution and use, see copyright notice in zlib.h
  4. */
  5. #ifdef POWER9
  6. #include <altivec.h>
  7. #include "zbuild.h"
  8. #include "zutil_p.h"
  9. #include "deflate.h"
  10. #include "zendian.h"
  11. /* Older versions of GCC misimplemented semantics for these bit counting builtins.
  12. * https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=3f30f2d1dbb3228b8468b26239fe60c2974ce2ac */
  13. #if defined(__GNUC__) && !defined(__clang__) && (__GNUC__ < 12)
  14. #if BYTE_ORDER == LITTLE_ENDIAN
  15. # define zng_vec_vctzlsbb(vc, len) len = __builtin_vec_vctzlsbb(vc)
  16. #else
  17. # define zng_vec_vctzlsbb(vc, len) len = __builtin_vec_vclzlsbb(vc)
  18. #endif
  19. #else
  20. # define zng_vec_vctzlsbb(vc, len) len = vec_cntlz_lsbb(vc)
  21. #endif
  22. static inline uint32_t compare256_power9_static(const uint8_t *src0, const uint8_t *src1) {
  23. uint32_t len = 0, cmplen;
  24. do {
  25. vector unsigned char vsrc0, vsrc1, vc;
  26. vsrc0 = *((vector unsigned char *)src0);
  27. vsrc1 = *((vector unsigned char *)src1);
  28. /* Compare 16 bytes at a time. Each byte of vc will be either
  29. * all ones or all zeroes, depending on the result of the comparison. */
  30. vc = (vector unsigned char)vec_cmpne(vsrc0, vsrc1);
  31. /* Since the index of matching bytes will contain only zeroes
  32. * on vc (since we used cmpne), counting the number of consecutive
  33. * bytes where LSB == 0 is the same as counting the length of the match. */
  34. zng_vec_vctzlsbb(vc, cmplen);
  35. if (cmplen != 16)
  36. return len + cmplen;
  37. src0 += 16, src1 += 16, len += 16;
  38. } while (len < 256);
  39. return 256;
  40. }
  41. Z_INTERNAL uint32_t compare256_power9(const uint8_t *src0, const uint8_t *src1) {
  42. return compare256_power9_static(src0, src1);
  43. }
  44. #define LONGEST_MATCH longest_match_power9
  45. #define COMPARE256 compare256_power9_static
  46. #include "match_tpl.h"
  47. #define LONGEST_MATCH_SLOW
  48. #define LONGEST_MATCH longest_match_slow_power9
  49. #define COMPARE256 compare256_power9_static
  50. #include "match_tpl.h"
  51. #endif