match_tpl.h 9.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284
  1. /* match_tpl.h -- find longest match template for compare256 variants
  2. *
  3. * Copyright (C) 1995-2024 Jean-loup Gailly and Mark Adler
  4. * For conditions of distribution and use, see copyright notice in zlib.h
  5. *
  6. * Portions copyright (C) 2014-2021 Konstantin Nosov
  7. * Fast-zlib optimized longest_match
  8. * https://github.com/gildor2/fast_zlib
  9. */
  10. #ifndef MATCH_TPL_H
  11. #define MATCH_TPL_H
  12. #define EARLY_EXIT_TRIGGER_LEVEL 5
  13. #endif
  14. /* Set match_start to the longest match starting at the given string and
  15. * return its length. Matches shorter or equal to prev_length are discarded,
  16. * in which case the result is equal to prev_length and match_start is garbage.
  17. *
  18. * IN assertions: cur_match is the head of the hash chain for the current
  19. * string (strstart) and its distance is <= MAX_DIST, and prev_length >=1
  20. * OUT assertion: the match length is not greater than s->lookahead
  21. */
  22. Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) {
  23. unsigned int strstart = s->strstart;
  24. const unsigned wmask = s->w_mask;
  25. unsigned char *window = s->window;
  26. unsigned char *scan = window + strstart;
  27. Z_REGISTER unsigned char *mbase_start = window;
  28. Z_REGISTER unsigned char *mbase_end;
  29. const Pos *prev = s->prev;
  30. Pos limit;
  31. #ifdef LONGEST_MATCH_SLOW
  32. Pos limit_base;
  33. #else
  34. int32_t early_exit;
  35. #endif
  36. uint32_t chain_length, nice_match, best_len, offset;
  37. uint32_t lookahead = s->lookahead;
  38. Pos match_offset = 0;
  39. #ifdef UNALIGNED_OK
  40. uint8_t scan_start[8];
  41. #endif
  42. uint8_t scan_end[8];
  43. #define GOTO_NEXT_CHAIN \
  44. if (--chain_length && (cur_match = prev[cur_match & wmask]) > limit) \
  45. continue; \
  46. return best_len;
  47. /* The code is optimized for STD_MAX_MATCH-2 multiple of 16. */
  48. Assert(STD_MAX_MATCH == 258, "Code too clever");
  49. best_len = s->prev_length ? s->prev_length : STD_MIN_MATCH-1;
  50. /* Calculate read offset which should only extend an extra byte
  51. * to find the next best match length.
  52. */
  53. offset = best_len-1;
  54. #ifdef UNALIGNED_OK
  55. if (best_len >= sizeof(uint32_t)) {
  56. offset -= 2;
  57. #ifdef UNALIGNED64_OK
  58. if (best_len >= sizeof(uint64_t))
  59. offset -= 4;
  60. #endif
  61. }
  62. #endif
  63. #ifdef UNALIGNED64_OK
  64. memcpy(scan_start, scan, sizeof(uint64_t));
  65. memcpy(scan_end, scan+offset, sizeof(uint64_t));
  66. #elif defined(UNALIGNED_OK)
  67. memcpy(scan_start, scan, sizeof(uint32_t));
  68. memcpy(scan_end, scan+offset, sizeof(uint32_t));
  69. #else
  70. scan_end[0] = *(scan+offset);
  71. scan_end[1] = *(scan+offset+1);
  72. #endif
  73. mbase_end = (mbase_start+offset);
  74. /* Do not waste too much time if we already have a good match */
  75. chain_length = s->max_chain_length;
  76. if (best_len >= s->good_match)
  77. chain_length >>= 2;
  78. nice_match = (uint32_t)s->nice_match;
  79. /* Stop when cur_match becomes <= limit. To simplify the code,
  80. * we prevent matches with the string of window index 0
  81. */
  82. limit = strstart > MAX_DIST(s) ? (Pos)(strstart - MAX_DIST(s)) : 0;
  83. #ifdef LONGEST_MATCH_SLOW
  84. limit_base = limit;
  85. if (best_len >= STD_MIN_MATCH) {
  86. /* We're continuing search (lazy evaluation). */
  87. uint32_t i, hash;
  88. Pos pos;
  89. /* Find a most distant chain starting from scan with index=1 (index=0 corresponds
  90. * to cur_match). We cannot use s->prev[strstart+1,...] immediately, because
  91. * these strings are not yet inserted into the hash table.
  92. */
  93. hash = s->update_hash(0, scan[1]);
  94. hash = s->update_hash(hash, scan[2]);
  95. for (i = 3; i <= best_len; i++) {
  96. hash = s->update_hash(hash, scan[i]);
  97. /* If we're starting with best_len >= 3, we can use offset search. */
  98. pos = s->head[hash];
  99. if (pos < cur_match) {
  100. match_offset = (Pos)(i - 2);
  101. cur_match = pos;
  102. }
  103. }
  104. /* Update offset-dependent variables */
  105. limit = limit_base+match_offset;
  106. if (cur_match <= limit)
  107. goto break_matching;
  108. mbase_start -= match_offset;
  109. mbase_end -= match_offset;
  110. }
  111. #else
  112. early_exit = s->level < EARLY_EXIT_TRIGGER_LEVEL;
  113. #endif
  114. Assert((unsigned long)strstart <= s->window_size - MIN_LOOKAHEAD, "need lookahead");
  115. for (;;) {
  116. if (cur_match >= strstart)
  117. break;
  118. /* Skip to next match if the match length cannot increase or if the match length is
  119. * less than 2. Note that the checks below for insufficient lookahead only occur
  120. * occasionally for performance reasons.
  121. * Therefore uninitialized memory will be accessed and conditional jumps will be made
  122. * that depend on those values. However the length of the match is limited to the
  123. * lookahead, so the output of deflate is not affected by the uninitialized values.
  124. */
  125. #ifdef UNALIGNED_OK
  126. if (best_len < sizeof(uint32_t)) {
  127. for (;;) {
  128. if (zng_memcmp_2(mbase_end+cur_match, scan_end) == 0 &&
  129. zng_memcmp_2(mbase_start+cur_match, scan_start) == 0)
  130. break;
  131. GOTO_NEXT_CHAIN;
  132. }
  133. # ifdef UNALIGNED64_OK
  134. } else if (best_len >= sizeof(uint64_t)) {
  135. for (;;) {
  136. if (zng_memcmp_8(mbase_end+cur_match, scan_end) == 0 &&
  137. zng_memcmp_8(mbase_start+cur_match, scan_start) == 0)
  138. break;
  139. GOTO_NEXT_CHAIN;
  140. }
  141. # endif
  142. } else {
  143. for (;;) {
  144. if (zng_memcmp_4(mbase_end+cur_match, scan_end) == 0 &&
  145. zng_memcmp_4(mbase_start+cur_match, scan_start) == 0)
  146. break;
  147. GOTO_NEXT_CHAIN;
  148. }
  149. }
  150. #else
  151. for (;;) {
  152. if (mbase_end[cur_match] == scan_end[0] && mbase_end[cur_match+1] == scan_end[1] &&
  153. mbase_start[cur_match] == scan[0] && mbase_start[cur_match+1] == scan[1])
  154. break;
  155. GOTO_NEXT_CHAIN;
  156. }
  157. #endif
  158. uint32_t len = COMPARE256(scan+2, mbase_start+cur_match+2) + 2;
  159. Assert(scan+len <= window+(unsigned)(s->window_size-1), "wild scan");
  160. if (len > best_len) {
  161. uint32_t match_start = cur_match - match_offset;
  162. s->match_start = match_start;
  163. /* Do not look for matches beyond the end of the input. */
  164. if (len > lookahead)
  165. return lookahead;
  166. best_len = len;
  167. if (best_len >= nice_match)
  168. return best_len;
  169. offset = best_len-1;
  170. #ifdef UNALIGNED_OK
  171. if (best_len >= sizeof(uint32_t)) {
  172. offset -= 2;
  173. #ifdef UNALIGNED64_OK
  174. if (best_len >= sizeof(uint64_t))
  175. offset -= 4;
  176. #endif
  177. }
  178. #endif
  179. #ifdef UNALIGNED64_OK
  180. memcpy(scan_end, scan+offset, sizeof(uint64_t));
  181. #elif defined(UNALIGNED_OK)
  182. memcpy(scan_end, scan+offset, sizeof(uint32_t));
  183. #else
  184. scan_end[0] = *(scan+offset);
  185. scan_end[1] = *(scan+offset+1);
  186. #endif
  187. #ifdef LONGEST_MATCH_SLOW
  188. /* Look for a better string offset */
  189. if (UNLIKELY(len > STD_MIN_MATCH && match_start + len < strstart)) {
  190. Pos pos, next_pos;
  191. uint32_t i, hash;
  192. unsigned char *scan_endstr;
  193. /* Go back to offset 0 */
  194. cur_match -= match_offset;
  195. match_offset = 0;
  196. next_pos = cur_match;
  197. for (i = 0; i <= len - STD_MIN_MATCH; i++) {
  198. pos = prev[(cur_match + i) & wmask];
  199. if (pos < next_pos) {
  200. /* Hash chain is more distant, use it */
  201. if (pos <= limit_base + i)
  202. goto break_matching;
  203. next_pos = pos;
  204. match_offset = (Pos)i;
  205. }
  206. }
  207. /* Switch cur_match to next_pos chain */
  208. cur_match = next_pos;
  209. /* Try hash head at len-(STD_MIN_MATCH-1) position to see if we could get
  210. * a better cur_match at the end of string. Using (STD_MIN_MATCH-1) lets
  211. * us include one more byte into hash - the byte which will be checked
  212. * in main loop now, and which allows to grow match by 1.
  213. */
  214. scan_endstr = scan + len - (STD_MIN_MATCH+1);
  215. hash = s->update_hash(0, scan_endstr[0]);
  216. hash = s->update_hash(hash, scan_endstr[1]);
  217. hash = s->update_hash(hash, scan_endstr[2]);
  218. pos = s->head[hash];
  219. if (pos < cur_match) {
  220. match_offset = (Pos)(len - (STD_MIN_MATCH+1));
  221. if (pos <= limit_base + match_offset)
  222. goto break_matching;
  223. cur_match = pos;
  224. }
  225. /* Update offset-dependent variables */
  226. limit = limit_base+match_offset;
  227. mbase_start = window-match_offset;
  228. mbase_end = (mbase_start+offset);
  229. continue;
  230. }
  231. #endif
  232. mbase_end = (mbase_start+offset);
  233. }
  234. #ifndef LONGEST_MATCH_SLOW
  235. else if (UNLIKELY(early_exit)) {
  236. /* The probability of finding a match later if we here is pretty low, so for
  237. * performance it's best to outright stop here for the lower compression levels
  238. */
  239. break;
  240. }
  241. #endif
  242. GOTO_NEXT_CHAIN;
  243. }
  244. return best_len;
  245. #ifdef LONGEST_MATCH_SLOW
  246. break_matching:
  247. if (best_len < s->lookahead)
  248. return best_len;
  249. return s->lookahead;
  250. #endif
  251. }
  252. #undef LONGEST_MATCH_SLOW
  253. #undef LONGEST_MATCH
  254. #undef COMPARE256