comparison mupdf-source/thirdparty/brotli/c/enc/hash.h @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 /* Copyright 2010 Google Inc. All Rights Reserved.
2
3 Distributed under MIT license.
4 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5 */
6
7 /* A (forgetful) hash table to the data seen by the compressor, to
8 help create backward references to previous data. */
9
10 #ifndef BROTLI_ENC_HASH_H_
11 #define BROTLI_ENC_HASH_H_
12
13 #include <stdlib.h> /* exit */
14 #include <string.h> /* memcmp, memset */
15
16 #include <brotli/types.h>
17
18 #include "../common/constants.h"
19 #include "../common/dictionary.h"
20 #include "../common/platform.h"
21 #include "compound_dictionary.h"
22 #include "encoder_dict.h"
23 #include "fast_log.h"
24 #include "find_match_length.h"
25 #include "matching_tag_mask.h"
26 #include "memory.h"
27 #include "quality.h"
28 #include "static_dict.h"
29
30 #if defined(__cplusplus) || defined(c_plusplus)
31 extern "C" {
32 #endif
33
34 typedef struct {
35 /**
36 * Dynamically allocated areas; regular hasher uses one or two allocations;
37 * "composite" hasher uses up to 4 allocations.
38 */
39 void* extra[4];
40
41 /**
42 * False before the fisrt invocation of HasherSetup (where "extra" memory)
43 * is allocated.
44 */
45 BROTLI_BOOL is_setup_;
46
47 size_t dict_num_lookups;
48 size_t dict_num_matches;
49
50 BrotliHasherParams params;
51
52 /**
53 * False if hasher needs to be "prepared" before use (before the first
54 * invocation of HasherSetup or after HasherReset). "preparation" is hasher
55 * data initialization (using input ringbuffer).
56 */
57 BROTLI_BOOL is_prepared_;
58 } HasherCommon;
59
60 #define score_t size_t
61
62 static const uint32_t kCutoffTransformsCount = 10;
63 /* 0, 12, 27, 23, 42, 63, 56, 48, 59, 64 */
64 /* 0+0, 4+8, 8+19, 12+11, 16+26, 20+43, 24+32, 28+20, 32+27, 36+28 */
65 static const uint64_t kCutoffTransforms =
66 BROTLI_MAKE_UINT64_T(0x071B520A, 0xDA2D3200);
67
68 typedef struct HasherSearchResult {
69 size_t len;
70 size_t distance;
71 score_t score;
72 int len_code_delta; /* == len_code - len */
73 } HasherSearchResult;
74
75 /* kHashMul32 multiplier has these properties:
76 * The multiplier must be odd. Otherwise we may lose the highest bit.
77 * No long streaks of ones or zeros.
78 * There is no effort to ensure that it is a prime, the oddity is enough
79 for this use.
80 * The number has been tuned heuristically against compression benchmarks. */
81 static const uint32_t kHashMul32 = 0x1E35A7BD;
82 static const uint64_t kHashMul64 =
83 BROTLI_MAKE_UINT64_T(0x1FE35A7Bu, 0xD3579BD3u);
84
85 static BROTLI_INLINE uint32_t Hash14(const uint8_t* data) {
86 uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kHashMul32;
87 /* The higher bits contain more mixture from the multiplication,
88 so we take our results from there. */
89 return h >> (32 - 14);
90 }
91
92 static BROTLI_INLINE void PrepareDistanceCache(
93 int* BROTLI_RESTRICT distance_cache, const int num_distances) {
94 if (num_distances > 4) {
95 int last_distance = distance_cache[0];
96 distance_cache[4] = last_distance - 1;
97 distance_cache[5] = last_distance + 1;
98 distance_cache[6] = last_distance - 2;
99 distance_cache[7] = last_distance + 2;
100 distance_cache[8] = last_distance - 3;
101 distance_cache[9] = last_distance + 3;
102 if (num_distances > 10) {
103 int next_last_distance = distance_cache[1];
104 distance_cache[10] = next_last_distance - 1;
105 distance_cache[11] = next_last_distance + 1;
106 distance_cache[12] = next_last_distance - 2;
107 distance_cache[13] = next_last_distance + 2;
108 distance_cache[14] = next_last_distance - 3;
109 distance_cache[15] = next_last_distance + 3;
110 }
111 }
112 }
113
114 #define BROTLI_LITERAL_BYTE_SCORE 135
115 #define BROTLI_DISTANCE_BIT_PENALTY 30
116 /* Score must be positive after applying maximal penalty. */
117 #define BROTLI_SCORE_BASE (BROTLI_DISTANCE_BIT_PENALTY * 8 * sizeof(size_t))
118
119 /* Usually, we always choose the longest backward reference. This function
120 allows for the exception of that rule.
121
122 If we choose a backward reference that is further away, it will
123 usually be coded with more bits. We approximate this by assuming
124 log2(distance). If the distance can be expressed in terms of the
125 last four distances, we use some heuristic constants to estimate
126 the bits cost. For the first up to four literals we use the bit
127 cost of the literals from the literal cost model, after that we
128 use the average bit cost of the cost model.
129
130 This function is used to sometimes discard a longer backward reference
131 when it is not much longer and the bit cost for encoding it is more
132 than the saved literals.
133
134 backward_reference_offset MUST be positive. */
135 static BROTLI_INLINE score_t BackwardReferenceScore(
136 size_t copy_length, size_t backward_reference_offset) {
137 return BROTLI_SCORE_BASE + BROTLI_LITERAL_BYTE_SCORE * (score_t)copy_length -
138 BROTLI_DISTANCE_BIT_PENALTY * Log2FloorNonZero(backward_reference_offset);
139 }
140
141 static BROTLI_INLINE score_t BackwardReferenceScoreUsingLastDistance(
142 size_t copy_length) {
143 return BROTLI_LITERAL_BYTE_SCORE * (score_t)copy_length +
144 BROTLI_SCORE_BASE + 15;
145 }
146
147 static BROTLI_INLINE score_t BackwardReferencePenaltyUsingLastDistance(
148 size_t distance_short_code) {
149 return (score_t)39 + ((0x1CA10 >> (distance_short_code & 0xE)) & 0xE);
150 }
151
152 static BROTLI_INLINE BROTLI_BOOL TestStaticDictionaryItem(
153 const BrotliEncoderDictionary* dictionary, size_t len, size_t word_idx,
154 const uint8_t* data, size_t max_length, size_t max_backward,
155 size_t max_distance, HasherSearchResult* out) {
156 size_t offset;
157 size_t matchlen;
158 size_t backward;
159 score_t score;
160 offset = dictionary->words->offsets_by_length[len] + len * word_idx;
161 if (len > max_length) {
162 return BROTLI_FALSE;
163 }
164
165 matchlen =
166 FindMatchLengthWithLimit(data, &dictionary->words->data[offset], len);
167 if (matchlen + dictionary->cutoffTransformsCount <= len || matchlen == 0) {
168 return BROTLI_FALSE;
169 }
170 {
171 size_t cut = len - matchlen;
172 size_t transform_id = (cut << 2) +
173 (size_t)((dictionary->cutoffTransforms >> (cut * 6)) & 0x3F);
174 backward = max_backward + 1 + word_idx +
175 (transform_id << dictionary->words->size_bits_by_length[len]);
176 }
177 if (backward > max_distance) {
178 return BROTLI_FALSE;
179 }
180 score = BackwardReferenceScore(matchlen, backward);
181 if (score < out->score) {
182 return BROTLI_FALSE;
183 }
184 out->len = matchlen;
185 out->len_code_delta = (int)len - (int)matchlen;
186 out->distance = backward;
187 out->score = score;
188 return BROTLI_TRUE;
189 }
190
191 static BROTLI_INLINE void SearchInStaticDictionary(
192 const BrotliEncoderDictionary* dictionary,
193 HasherCommon* common, const uint8_t* data, size_t max_length,
194 size_t max_backward, size_t max_distance,
195 HasherSearchResult* out, BROTLI_BOOL shallow) {
196 size_t key;
197 size_t i;
198 if (common->dict_num_matches < (common->dict_num_lookups >> 7)) {
199 return;
200 }
201 key = Hash14(data) << 1;
202 for (i = 0; i < (shallow ? 1u : 2u); ++i, ++key) {
203 common->dict_num_lookups++;
204 if (dictionary->hash_table_lengths[key] != 0) {
205 BROTLI_BOOL item_matches = TestStaticDictionaryItem(
206 dictionary, dictionary->hash_table_lengths[key],
207 dictionary->hash_table_words[key], data,
208 max_length, max_backward, max_distance, out);
209 if (item_matches) {
210 common->dict_num_matches++;
211 }
212 }
213 }
214 }
215
216 typedef struct BackwardMatch {
217 uint32_t distance;
218 uint32_t length_and_code;
219 } BackwardMatch;
220
221 static BROTLI_INLINE void InitBackwardMatch(BackwardMatch* self,
222 size_t dist, size_t len) {
223 self->distance = (uint32_t)dist;
224 self->length_and_code = (uint32_t)(len << 5);
225 }
226
227 static BROTLI_INLINE void InitDictionaryBackwardMatch(BackwardMatch* self,
228 size_t dist, size_t len, size_t len_code) {
229 self->distance = (uint32_t)dist;
230 self->length_and_code =
231 (uint32_t)((len << 5) | (len == len_code ? 0 : len_code));
232 }
233
234 static BROTLI_INLINE size_t BackwardMatchLength(const BackwardMatch* self) {
235 return self->length_and_code >> 5;
236 }
237
238 static BROTLI_INLINE size_t BackwardMatchLengthCode(const BackwardMatch* self) {
239 size_t code = self->length_and_code & 31;
240 return code ? code : BackwardMatchLength(self);
241 }
242
243 #define EXPAND_CAT(a, b) CAT(a, b)
244 #define CAT(a, b) a ## b
245 #define FN(X) EXPAND_CAT(X, HASHER())
246
247 #define HASHER() H10
248 #define BUCKET_BITS 17
249 #define MAX_TREE_SEARCH_DEPTH 64
250 #define MAX_TREE_COMP_LENGTH 128
251 #include "hash_to_binary_tree_inc.h" /* NOLINT(build/include) */
252 #undef MAX_TREE_SEARCH_DEPTH
253 #undef MAX_TREE_COMP_LENGTH
254 #undef BUCKET_BITS
255 #undef HASHER
256 /* MAX_NUM_MATCHES == 64 + MAX_TREE_SEARCH_DEPTH */
257 #define MAX_NUM_MATCHES_H10 128
258
259 /* For BUCKET_SWEEP_BITS == 0, enabling the dictionary lookup makes compression
260 a little faster (0.5% - 1%) and it compresses 0.15% better on small text
261 and HTML inputs. */
262
263 #define HASHER() H2
264 #define BUCKET_BITS 16
265 #define BUCKET_SWEEP_BITS 0
266 #define HASH_LEN 5
267 #define USE_DICTIONARY 1
268 #include "hash_longest_match_quickly_inc.h" /* NOLINT(build/include) */
269 #undef BUCKET_SWEEP_BITS
270 #undef USE_DICTIONARY
271 #undef HASHER
272
273 #define HASHER() H3
274 #define BUCKET_SWEEP_BITS 1
275 #define USE_DICTIONARY 0
276 #include "hash_longest_match_quickly_inc.h" /* NOLINT(build/include) */
277 #undef USE_DICTIONARY
278 #undef BUCKET_SWEEP_BITS
279 #undef BUCKET_BITS
280 #undef HASHER
281
282 #define HASHER() H4
283 #define BUCKET_BITS 17
284 #define BUCKET_SWEEP_BITS 2
285 #define USE_DICTIONARY 1
286 #include "hash_longest_match_quickly_inc.h" /* NOLINT(build/include) */
287 #undef USE_DICTIONARY
288 #undef HASH_LEN
289 #undef BUCKET_SWEEP_BITS
290 #undef BUCKET_BITS
291 #undef HASHER
292
293 #define HASHER() H5
294 #include "hash_longest_match_inc.h" /* NOLINT(build/include) */
295 #undef HASHER
296
297 #define HASHER() H6
298 #include "hash_longest_match64_inc.h" /* NOLINT(build/include) */
299 #undef HASHER
300
301 #if defined(BROTLI_MAX_SIMD_QUALITY)
302 #define HASHER() H58
303 #include "hash_longest_match_simd_inc.h" /* NOLINT(build/include) */
304 #undef HASHER
305
306 #define HASHER() H68
307 #include "hash_longest_match64_simd_inc.h" /* NOLINT(build/include) */
308 #undef HASHER
309 #endif
310
311 #define BUCKET_BITS 15
312
313 #define NUM_LAST_DISTANCES_TO_CHECK 4
314 #define NUM_BANKS 1
315 #define BANK_BITS 16
316 #define HASHER() H40
317 #include "hash_forgetful_chain_inc.h" /* NOLINT(build/include) */
318 #undef HASHER
319 #undef NUM_LAST_DISTANCES_TO_CHECK
320
321 #define NUM_LAST_DISTANCES_TO_CHECK 10
322 #define HASHER() H41
323 #include "hash_forgetful_chain_inc.h" /* NOLINT(build/include) */
324 #undef HASHER
325 #undef NUM_LAST_DISTANCES_TO_CHECK
326 #undef NUM_BANKS
327 #undef BANK_BITS
328
329 #define NUM_LAST_DISTANCES_TO_CHECK 16
330 #define NUM_BANKS 512
331 #define BANK_BITS 9
332 #define HASHER() H42
333 #include "hash_forgetful_chain_inc.h" /* NOLINT(build/include) */
334 #undef HASHER
335 #undef NUM_LAST_DISTANCES_TO_CHECK
336 #undef NUM_BANKS
337 #undef BANK_BITS
338
339 #undef BUCKET_BITS
340
341 #define HASHER() H54
342 #define BUCKET_BITS 20
343 #define BUCKET_SWEEP_BITS 2
344 #define HASH_LEN 7
345 #define USE_DICTIONARY 0
346 #include "hash_longest_match_quickly_inc.h" /* NOLINT(build/include) */
347 #undef USE_DICTIONARY
348 #undef HASH_LEN
349 #undef BUCKET_SWEEP_BITS
350 #undef BUCKET_BITS
351 #undef HASHER
352
353 /* fast large window hashers */
354
355 #define HASHER() HROLLING_FAST
356 #define CHUNKLEN 32
357 #define JUMP 4
358 #define NUMBUCKETS 16777216
359 #define MASK ((NUMBUCKETS * 64) - 1)
360 #include "hash_rolling_inc.h" /* NOLINT(build/include) */
361 #undef JUMP
362 #undef HASHER
363
364
365 #define HASHER() HROLLING
366 #define JUMP 1
367 #include "hash_rolling_inc.h" /* NOLINT(build/include) */
368 #undef MASK
369 #undef NUMBUCKETS
370 #undef JUMP
371 #undef CHUNKLEN
372 #undef HASHER
373
374 #define HASHER() H35
375 #define HASHER_A H3
376 #define HASHER_B HROLLING_FAST
377 #include "hash_composite_inc.h" /* NOLINT(build/include) */
378 #undef HASHER_A
379 #undef HASHER_B
380 #undef HASHER
381
382 #define HASHER() H55
383 #define HASHER_A H54
384 #define HASHER_B HROLLING_FAST
385 #include "hash_composite_inc.h" /* NOLINT(build/include) */
386 #undef HASHER_A
387 #undef HASHER_B
388 #undef HASHER
389
390 #define HASHER() H65
391 #define HASHER_A H6
392 #define HASHER_B HROLLING
393 #include "hash_composite_inc.h" /* NOLINT(build/include) */
394 #undef HASHER_A
395 #undef HASHER_B
396 #undef HASHER
397
398 #undef FN
399 #undef CAT
400 #undef EXPAND_CAT
401
402 #if defined(BROTLI_MAX_SIMD_QUALITY)
403 #define FOR_SIMPLE_HASHERS(H) \
404 H(2) H(3) H(4) H(5) H(6) H(40) H(41) H(42) H(54) H(58) H(68)
405 #else
406 #define FOR_SIMPLE_HASHERS(H) \
407 H(2) H(3) H(4) H(5) H(6) H(40) H(41) H(42) H(54)
408 #endif
409 #define FOR_COMPOSITE_HASHERS(H) H(35) H(55) H(65)
410 #define FOR_GENERIC_HASHERS(H) FOR_SIMPLE_HASHERS(H) FOR_COMPOSITE_HASHERS(H)
411 #define FOR_ALL_HASHERS(H) FOR_GENERIC_HASHERS(H) H(10)
412
413 typedef struct {
414 HasherCommon common;
415
416 union {
417 #define MEMBER_(N) \
418 H ## N _H ## N;
419 FOR_ALL_HASHERS(MEMBER_)
420 #undef MEMBER_
421 } privat;
422 } Hasher;
423
424 /* MUST be invoked before any other method. */
425 static BROTLI_INLINE void HasherInit(Hasher* hasher) {
426 hasher->common.is_setup_ = BROTLI_FALSE;
427 hasher->common.extra[0] = NULL;
428 hasher->common.extra[1] = NULL;
429 hasher->common.extra[2] = NULL;
430 hasher->common.extra[3] = NULL;
431 }
432
433 static BROTLI_INLINE void DestroyHasher(MemoryManager* m, Hasher* hasher) {
434 if (hasher->common.extra[0] != NULL) BROTLI_FREE(m, hasher->common.extra[0]);
435 if (hasher->common.extra[1] != NULL) BROTLI_FREE(m, hasher->common.extra[1]);
436 if (hasher->common.extra[2] != NULL) BROTLI_FREE(m, hasher->common.extra[2]);
437 if (hasher->common.extra[3] != NULL) BROTLI_FREE(m, hasher->common.extra[3]);
438 }
439
440 static BROTLI_INLINE void HasherReset(Hasher* hasher) {
441 hasher->common.is_prepared_ = BROTLI_FALSE;
442 }
443
444 static BROTLI_INLINE void HasherSize(const BrotliEncoderParams* params,
445 BROTLI_BOOL one_shot, const size_t input_size, size_t* alloc_size) {
446 switch (params->hasher.type) {
447 #define SIZE_(N) \
448 case N: \
449 HashMemAllocInBytesH ## N(params, one_shot, input_size, alloc_size); \
450 break;
451 FOR_ALL_HASHERS(SIZE_)
452 #undef SIZE_
453 default:
454 break;
455 }
456 }
457
458 static BROTLI_INLINE void HasherSetup(MemoryManager* m, Hasher* hasher,
459 BrotliEncoderParams* params, const uint8_t* data, size_t position,
460 size_t input_size, BROTLI_BOOL is_last) {
461 BROTLI_BOOL one_shot = (position == 0 && is_last);
462 if (!hasher->common.is_setup_) {
463 size_t alloc_size[4] = {0};
464 size_t i;
465 ChooseHasher(params, &params->hasher);
466 hasher->common.params = params->hasher;
467 hasher->common.dict_num_lookups = 0;
468 hasher->common.dict_num_matches = 0;
469 HasherSize(params, one_shot, input_size, alloc_size);
470 for (i = 0; i < 4; ++i) {
471 if (alloc_size[i] == 0) continue;
472 hasher->common.extra[i] = BROTLI_ALLOC(m, uint8_t, alloc_size[i]);
473 if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(hasher->common.extra[i])) return;
474 }
475 switch (hasher->common.params.type) {
476 #define INITIALIZE_(N) \
477 case N: \
478 InitializeH ## N(&hasher->common, \
479 &hasher->privat._H ## N, params); \
480 break;
481 FOR_ALL_HASHERS(INITIALIZE_);
482 #undef INITIALIZE_
483 default:
484 break;
485 }
486 HasherReset(hasher);
487 hasher->common.is_setup_ = BROTLI_TRUE;
488 }
489
490 if (!hasher->common.is_prepared_) {
491 switch (hasher->common.params.type) {
492 #define PREPARE_(N) \
493 case N: \
494 PrepareH ## N( \
495 &hasher->privat._H ## N, \
496 one_shot, input_size, data); \
497 break;
498 FOR_ALL_HASHERS(PREPARE_)
499 #undef PREPARE_
500 default: break;
501 }
502 hasher->common.is_prepared_ = BROTLI_TRUE;
503 }
504 }
505
506 static BROTLI_INLINE void InitOrStitchToPreviousBlock(
507 MemoryManager* m, Hasher* hasher, const uint8_t* data, size_t mask,
508 BrotliEncoderParams* params, size_t position, size_t input_size,
509 BROTLI_BOOL is_last) {
510 HasherSetup(m, hasher, params, data, position, input_size, is_last);
511 if (BROTLI_IS_OOM(m)) return;
512 switch (hasher->common.params.type) {
513 #define INIT_(N) \
514 case N: \
515 StitchToPreviousBlockH ## N( \
516 &hasher->privat._H ## N, \
517 input_size, position, data, mask); \
518 break;
519 FOR_ALL_HASHERS(INIT_)
520 #undef INIT_
521 default: break;
522 }
523 }
524
525 /* NB: when seamless dictionary-ring-buffer copies are implemented, don't forget
526 to add proper guards for non-zero-BROTLI_PARAM_STREAM_OFFSET. */
527 static BROTLI_INLINE void FindCompoundDictionaryMatch(
528 const PreparedDictionary* self, const uint8_t* BROTLI_RESTRICT data,
529 const size_t ring_buffer_mask, const int* BROTLI_RESTRICT distance_cache,
530 const size_t cur_ix, const size_t max_length, const size_t distance_offset,
531 const size_t max_distance, HasherSearchResult* BROTLI_RESTRICT out) {
532 const uint32_t source_size = self->source_size;
533 const size_t boundary = distance_offset - source_size;
534 const uint32_t hash_bits = self->hash_bits;
535 const uint32_t bucket_bits = self->bucket_bits;
536 const uint32_t slot_bits = self->slot_bits;
537
538 const uint32_t hash_shift = 64u - bucket_bits;
539 const uint32_t slot_mask = (~((uint32_t)0U)) >> (32 - slot_bits);
540 const uint64_t hash_mask = (~((uint64_t)0U)) >> (64 - hash_bits);
541
542 const uint32_t* slot_offsets = (uint32_t*)(&self[1]);
543 const uint16_t* heads = (uint16_t*)(&slot_offsets[(size_t)1u << slot_bits]);
544 const uint32_t* items = (uint32_t*)(&heads[(size_t)1u << bucket_bits]);
545 const uint8_t* source = NULL;
546
547 const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
548 score_t best_score = out->score;
549 size_t best_len = out->len;
550 size_t i;
551 const uint64_t h =
552 (BROTLI_UNALIGNED_LOAD64LE(&data[cur_ix_masked]) & hash_mask) *
553 kPreparedDictionaryHashMul64Long;
554 const uint32_t key = (uint32_t)(h >> hash_shift);
555 const uint32_t slot = key & slot_mask;
556 const uint32_t head = heads[key];
557 const uint32_t* BROTLI_RESTRICT chain = &items[slot_offsets[slot] + head];
558 uint32_t item = (head == 0xFFFF) ? 1 : 0;
559
560 const void* tail = (void*)&items[self->num_items];
561 if (self->magic == kPreparedDictionaryMagic) {
562 source = (const uint8_t*)tail;
563 } else {
564 /* kLeanPreparedDictionaryMagic */
565 source = (const uint8_t*)BROTLI_UNALIGNED_LOAD_PTR((const uint8_t**)tail);
566 }
567
568 BROTLI_DCHECK(cur_ix_masked + max_length <= ring_buffer_mask);
569
570 for (i = 0; i < 4; ++i) {
571 const size_t distance = (size_t)distance_cache[i];
572 size_t offset;
573 size_t limit;
574 size_t len;
575 if (distance <= boundary || distance > distance_offset) continue;
576 offset = distance_offset - distance;
577 limit = source_size - offset;
578 limit = limit > max_length ? max_length : limit;
579 len = FindMatchLengthWithLimit(&source[offset], &data[cur_ix_masked],
580 limit);
581 if (len >= 2) {
582 score_t score = BackwardReferenceScoreUsingLastDistance(len);
583 if (best_score < score) {
584 if (i != 0) score -= BackwardReferencePenaltyUsingLastDistance(i);
585 if (best_score < score) {
586 best_score = score;
587 if (len > best_len) best_len = len;
588 out->len = len;
589 out->len_code_delta = 0;
590 out->distance = distance;
591 out->score = best_score;
592 }
593 }
594 }
595 }
596 /* we require matches of len >4, so increase best_len to 3, so we can compare
597 * 4 bytes all the time. */
598 if (best_len < 3) {
599 best_len = 3;
600 }
601 while (item == 0) {
602 size_t offset;
603 size_t distance;
604 size_t limit;
605 item = *chain;
606 chain++;
607 offset = item & 0x7FFFFFFF;
608 item &= 0x80000000;
609 distance = distance_offset - offset;
610 limit = source_size - offset;
611 limit = (limit > max_length) ? max_length : limit;
612 if (distance > max_distance) continue;
613 if (cur_ix_masked + best_len > ring_buffer_mask || best_len >= limit ||
614 /* compare 4 bytes ending at best_len + 1 */
615 BrotliUnalignedRead32(&data[cur_ix_masked + best_len - 3]) !=
616 BrotliUnalignedRead32(&source[offset + best_len - 3])) {
617 continue;
618 }
619 {
620 const size_t len = FindMatchLengthWithLimit(&source[offset],
621 &data[cur_ix_masked],
622 limit);
623 if (len >= 4) {
624 score_t score = BackwardReferenceScore(len, distance);
625 if (best_score < score) {
626 best_score = score;
627 best_len = len;
628 out->len = best_len;
629 out->len_code_delta = 0;
630 out->distance = distance;
631 out->score = best_score;
632 }
633 }
634 }
635 }
636 }
637
638 /* NB: when seamless dictionary-ring-buffer copies are implemented, don't forget
639 to add proper guards for non-zero-BROTLI_PARAM_STREAM_OFFSET. */
640 static BROTLI_INLINE size_t FindAllCompoundDictionaryMatches(
641 const PreparedDictionary* self, const uint8_t* BROTLI_RESTRICT data,
642 const size_t ring_buffer_mask, const size_t cur_ix, const size_t min_length,
643 const size_t max_length, const size_t distance_offset,
644 const size_t max_distance, BackwardMatch* matches, size_t match_limit) {
645 const uint32_t source_size = self->source_size;
646 const uint32_t hash_bits = self->hash_bits;
647 const uint32_t bucket_bits = self->bucket_bits;
648 const uint32_t slot_bits = self->slot_bits;
649
650 const uint32_t hash_shift = 64u - bucket_bits;
651 const uint32_t slot_mask = (~((uint32_t)0U)) >> (32 - slot_bits);
652 const uint64_t hash_mask = (~((uint64_t)0U)) >> (64 - hash_bits);
653
654 const uint32_t* slot_offsets = (uint32_t*)(&self[1]);
655 const uint16_t* heads = (uint16_t*)(&slot_offsets[(size_t)1u << slot_bits]);
656 const uint32_t* items = (uint32_t*)(&heads[(size_t)1u << bucket_bits]);
657 const uint8_t* source = NULL;
658
659 const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
660 size_t best_len = min_length;
661 const uint64_t h =
662 (BROTLI_UNALIGNED_LOAD64LE(&data[cur_ix_masked]) & hash_mask) *
663 kPreparedDictionaryHashMul64Long;
664 const uint32_t key = (uint32_t)(h >> hash_shift);
665 const uint32_t slot = key & slot_mask;
666 const uint32_t head = heads[key];
667 const uint32_t* BROTLI_RESTRICT chain = &items[slot_offsets[slot] + head];
668 uint32_t item = (head == 0xFFFF) ? 1 : 0;
669 size_t found = 0;
670
671 const void* tail = (void*)&items[self->num_items];
672 if (self->magic == kPreparedDictionaryMagic) {
673 source = (const uint8_t*)tail;
674 } else {
675 /* kLeanPreparedDictionaryMagic */
676 source = (const uint8_t*)BROTLI_UNALIGNED_LOAD_PTR((const uint8_t**)tail);
677 }
678
679 BROTLI_DCHECK(cur_ix_masked + max_length <= ring_buffer_mask);
680
681 while (item == 0) {
682 size_t offset;
683 size_t distance;
684 size_t limit;
685 size_t len;
686 item = *chain;
687 chain++;
688 offset = item & 0x7FFFFFFF;
689 item &= 0x80000000;
690 distance = distance_offset - offset;
691 limit = source_size - offset;
692 limit = (limit > max_length) ? max_length : limit;
693 if (distance > max_distance) continue;
694 if (cur_ix_masked + best_len > ring_buffer_mask ||
695 best_len >= limit ||
696 data[cur_ix_masked + best_len] != source[offset + best_len]) {
697 continue;
698 }
699 len = FindMatchLengthWithLimit(
700 &source[offset], &data[cur_ix_masked], limit);
701 if (len > best_len) {
702 best_len = len;
703 InitBackwardMatch(matches++, distance, len);
704 found++;
705 if (found == match_limit) break;
706 }
707 }
708 return found;
709 }
710
711 static BROTLI_INLINE void LookupCompoundDictionaryMatch(
712 const CompoundDictionary* addon, const uint8_t* BROTLI_RESTRICT data,
713 const size_t ring_buffer_mask, const int* BROTLI_RESTRICT distance_cache,
714 const size_t cur_ix, const size_t max_length,
715 const size_t max_ring_buffer_distance, const size_t max_distance,
716 HasherSearchResult* sr) {
717 size_t base_offset = max_ring_buffer_distance + 1 + addon->total_size - 1;
718 size_t d;
719 for (d = 0; d < addon->num_chunks; ++d) {
720 /* Only one prepared dictionary type is currently supported. */
721 FindCompoundDictionaryMatch(
722 (const PreparedDictionary*)addon->chunks[d], data, ring_buffer_mask,
723 distance_cache, cur_ix, max_length,
724 base_offset - addon->chunk_offsets[d], max_distance, sr);
725 }
726 }
727
728 static BROTLI_INLINE size_t LookupAllCompoundDictionaryMatches(
729 const CompoundDictionary* addon, const uint8_t* BROTLI_RESTRICT data,
730 const size_t ring_buffer_mask, const size_t cur_ix, size_t min_length,
731 const size_t max_length, const size_t max_ring_buffer_distance,
732 const size_t max_distance, BackwardMatch* matches,
733 size_t match_limit) {
734 size_t base_offset = max_ring_buffer_distance + 1 + addon->total_size - 1;
735 size_t d;
736 size_t total_found = 0;
737 for (d = 0; d < addon->num_chunks; ++d) {
738 /* Only one prepared dictionary type is currently supported. */
739 total_found += FindAllCompoundDictionaryMatches(
740 (const PreparedDictionary*)addon->chunks[d], data, ring_buffer_mask,
741 cur_ix, min_length, max_length, base_offset - addon->chunk_offsets[d],
742 max_distance, matches + total_found, match_limit - total_found);
743 if (total_found == match_limit) break;
744 if (total_found > 0) {
745 min_length = BackwardMatchLength(&matches[total_found - 1]);
746 }
747 }
748 return total_found;
749 }
750
751 #if defined(__cplusplus) || defined(c_plusplus)
752 } /* extern "C" */
753 #endif
754
755 #endif /* BROTLI_ENC_HASH_H_ */