Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/brotli/c/enc/encoder_dict.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /* Copyright 2017 Google Inc. All Rights Reserved. | |
| 2 | |
| 3 Distributed under MIT license. | |
| 4 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT | |
| 5 */ | |
| 6 | |
| 7 #include "encoder_dict.h" | |
| 8 | |
| 9 #include <stdlib.h> /* malloc, free */ | |
| 10 | |
| 11 #include "../common/dictionary.h" | |
| 12 #include "../common/platform.h" | |
| 13 #include "../common/shared_dictionary_internal.h" | |
| 14 #include "../common/transform.h" | |
| 15 #include "compound_dictionary.h" | |
| 16 #include "dictionary_hash.h" | |
| 17 #include "memory.h" | |
| 18 #include "quality.h" | |
| 19 #include "hash.h" | |
| 20 | |
| 21 #if defined(__cplusplus) || defined(c_plusplus) | |
| 22 extern "C" { | |
| 23 #endif | |
| 24 | |
| 25 #define NUM_HASH_BITS 15u | |
| 26 #define NUM_HASH_BUCKETS (1u << NUM_HASH_BITS) | |
| 27 | |
| 28 static void BrotliTrieInit(BrotliTrie* trie) { | |
| 29 trie->pool_capacity = 0; | |
| 30 trie->pool_size = 0; | |
| 31 trie->pool = 0; | |
| 32 | |
| 33 /* Set up the root node */ | |
| 34 trie->root.single = 0; | |
| 35 trie->root.len_ = 0; | |
| 36 trie->root.idx_ = 0; | |
| 37 trie->root.sub = 0; | |
| 38 } | |
| 39 | |
| 40 static void BrotliTrieFree(MemoryManager* m, BrotliTrie* trie) { | |
| 41 BrotliFree(m, trie->pool); | |
| 42 } | |
| 43 | |
| 44 /* Initializes to RFC 7932 static dictionary / transforms. */ | |
| 45 static void InitEncoderDictionary(BrotliEncoderDictionary* dict) { | |
| 46 dict->words = BrotliGetDictionary(); | |
| 47 dict->num_transforms = (uint32_t)BrotliGetTransforms()->num_transforms; | |
| 48 | |
| 49 dict->hash_table_words = kStaticDictionaryHashWords; | |
| 50 dict->hash_table_lengths = kStaticDictionaryHashLengths; | |
| 51 dict->buckets = kStaticDictionaryBuckets; | |
| 52 dict->dict_words = kStaticDictionaryWords; | |
| 53 | |
| 54 dict->cutoffTransformsCount = kCutoffTransformsCount; | |
| 55 dict->cutoffTransforms = kCutoffTransforms; | |
| 56 | |
| 57 dict->parent = 0; | |
| 58 | |
| 59 dict->hash_table_data_words_ = 0; | |
| 60 dict->hash_table_data_lengths_ = 0; | |
| 61 dict->buckets_alloc_size_ = 0; | |
| 62 dict->buckets_data_ = 0; | |
| 63 dict->dict_words_alloc_size_ = 0; | |
| 64 dict->dict_words_data_ = 0; | |
| 65 dict->words_instance_ = 0; | |
| 66 dict->has_words_heavy = BROTLI_FALSE; | |
| 67 BrotliTrieInit(&dict->trie); | |
| 68 } | |
| 69 | |
| 70 static void BrotliDestroyEncoderDictionary(MemoryManager* m, | |
| 71 BrotliEncoderDictionary* dict) { | |
| 72 BrotliFree(m, dict->hash_table_data_words_); | |
| 73 BrotliFree(m, dict->hash_table_data_lengths_); | |
| 74 BrotliFree(m, dict->buckets_data_); | |
| 75 BrotliFree(m, dict->dict_words_data_); | |
| 76 BrotliFree(m, dict->words_instance_); | |
| 77 BrotliTrieFree(m, &dict->trie); | |
| 78 } | |
| 79 | |
| 80 #if defined(BROTLI_EXPERIMENTAL) | |
| 81 /* Word length must be at least 4 bytes */ | |
| 82 static uint32_t Hash(const uint8_t* data, int bits) { | |
| 83 uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kHashMul32; | |
| 84 /* The higher bits contain more mixture from the multiplication, | |
| 85 so we take our results from there. */ | |
| 86 return h >> (32 - bits); | |
| 87 } | |
| 88 | |
| 89 /* Theoretical max possible word size after transform */ | |
| 90 #define kTransformedBufferSize \ | |
| 91 (256 + 256 + SHARED_BROTLI_MAX_DICTIONARY_WORD_LENGTH) | |
| 92 | |
| 93 /* To be safe buffer must have at least kTransformedBufferSize */ | |
| 94 static void TransformedDictionaryWord(uint32_t word_idx, int len, int transform, | |
| 95 const BrotliTransforms* transforms, | |
| 96 const BrotliEncoderDictionary* dict, | |
| 97 uint8_t* buffer, size_t* size) { | |
| 98 const uint8_t* dict_word = &dict->words->data[ | |
| 99 dict->words->offsets_by_length[len] + (uint32_t)len * word_idx]; | |
| 100 *size = (size_t)BrotliTransformDictionaryWord(buffer, dict_word, len, | |
| 101 transforms, transform); | |
| 102 } | |
| 103 | |
| 104 static DictWord MakeDictWord(uint8_t len, uint8_t transform, uint16_t idx) { | |
| 105 DictWord result; | |
| 106 result.len = len; | |
| 107 result.transform = transform; | |
| 108 result.idx = idx; | |
| 109 return result; | |
| 110 } | |
| 111 | |
| 112 static uint32_t BrotliTrieAlloc(MemoryManager* m, size_t num, BrotliTrie* trie, | |
| 113 BrotliTrieNode** keep) { | |
| 114 uint32_t result; | |
| 115 uint32_t keep_index = 0; | |
| 116 if (keep && *keep != &trie->root) { | |
| 117 /* Optional node to keep, since address may change after re-allocating */ | |
| 118 keep_index = (uint32_t)(*keep - trie->pool); | |
| 119 } | |
| 120 if (trie->pool_size == 0) { | |
| 121 /* Have a placeholder node in the front. We do not want the result to be 0, | |
| 122 it must be at least 1, 0 represents "null pointer" */ | |
| 123 trie->pool_size = 1; | |
| 124 } | |
| 125 BROTLI_ENSURE_CAPACITY(m, BrotliTrieNode, trie->pool, trie->pool_capacity, | |
| 126 trie->pool_size + num); | |
| 127 if (BROTLI_IS_OOM(m)) return 0; | |
| 128 /* Init the new nodes to empty */ | |
| 129 memset(trie->pool + trie->pool_size, 0, sizeof(*trie->pool) * num); | |
| 130 result = (uint32_t)trie->pool_size; | |
| 131 trie->pool_size += num; | |
| 132 if (keep && *keep != &trie->root) { | |
| 133 *keep = trie->pool + keep_index; | |
| 134 } | |
| 135 return result; | |
| 136 } | |
| 137 | |
| 138 /** | |
| 139 * len and idx: payload for last node | |
| 140 * word, size: the string | |
| 141 * index: position in the string | |
| 142 */ | |
| 143 static BROTLI_BOOL BrotliTrieNodeAdd(MemoryManager* m, uint8_t len, | |
| 144 uint32_t idx, const uint8_t* word, size_t size, int index, | |
| 145 BrotliTrieNode* node, BrotliTrie* trie) { | |
| 146 BrotliTrieNode* child = 0; | |
| 147 uint8_t c; | |
| 148 if ((size_t)index == size) { | |
| 149 if (!node->len_ || idx < node->idx_) { | |
| 150 node->len_ = len; | |
| 151 node->idx_ = idx; | |
| 152 } | |
| 153 return BROTLI_TRUE; | |
| 154 } | |
| 155 c = word[index]; | |
| 156 if (node->single && c != node->c) { | |
| 157 BrotliTrieNode old = trie->pool[node->sub]; | |
| 158 uint32_t new_nodes = BrotliTrieAlloc(m, 32, trie, &node); | |
| 159 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; | |
| 160 node->single = 0; | |
| 161 node->sub = new_nodes; | |
| 162 trie->pool[node->sub + (node->c >> 4)].sub = new_nodes + 16; | |
| 163 trie->pool[trie->pool[node->sub + (node->c >> 4)].sub + (node->c & 15)] = | |
| 164 old; | |
| 165 } | |
| 166 if (!node->sub) { | |
| 167 uint32_t new_node = BrotliTrieAlloc(m, 1, trie, &node); | |
| 168 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; | |
| 169 node->single = 1; | |
| 170 node->c = c; | |
| 171 node->sub = new_node; | |
| 172 } | |
| 173 if (node->single) { | |
| 174 child = &trie->pool[node->sub]; | |
| 175 } else { | |
| 176 if (!trie->pool[node->sub + (c >> 4)].sub) { | |
| 177 uint32_t new_nodes = BrotliTrieAlloc(m, 16, trie, &node); | |
| 178 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; | |
| 179 trie->pool[node->sub + (c >> 4)].sub = new_nodes; | |
| 180 } | |
| 181 child = &trie->pool[trie->pool[node->sub + (c >> 4)].sub + (c & 15)]; | |
| 182 } | |
| 183 return BrotliTrieNodeAdd(m, len, idx, word, size, index + 1, child, trie); | |
| 184 } | |
| 185 | |
| 186 static BROTLI_BOOL BrotliTrieAdd(MemoryManager* m, uint8_t len, uint32_t idx, | |
| 187 const uint8_t* word, size_t size, BrotliTrie* trie) { | |
| 188 return BrotliTrieNodeAdd(m, len, idx, word, size, 0, &trie->root, trie); | |
| 189 } | |
| 190 | |
| 191 const BrotliTrieNode* BrotliTrieSub(const BrotliTrie* trie, | |
| 192 const BrotliTrieNode* node, uint8_t c) { | |
| 193 BrotliTrieNode* temp_node; | |
| 194 if (node->single) { | |
| 195 if (node->c == c) return &trie->pool[node->sub]; | |
| 196 return 0; | |
| 197 } | |
| 198 if (!node->sub) return 0; | |
| 199 temp_node = &trie->pool[node->sub + (c >> 4)]; | |
| 200 if (!temp_node->sub) return 0; | |
| 201 return &trie->pool[temp_node->sub + (c & 15)]; | |
| 202 } | |
| 203 | |
| 204 static const BrotliTrieNode* BrotliTrieFind(const BrotliTrie* trie, | |
| 205 const uint8_t* word, size_t size) { | |
| 206 const BrotliTrieNode* node = &trie->root; | |
| 207 size_t i; | |
| 208 for (i = 0; i < size; i++) { | |
| 209 node = BrotliTrieSub(trie, node, word[i]); | |
| 210 if (!node) return 0; | |
| 211 } | |
| 212 return node; | |
| 213 } | |
| 214 | |
| 215 static BROTLI_BOOL BuildDictionaryLut(MemoryManager* m, | |
| 216 const BrotliTransforms* transforms, | |
| 217 BrotliEncoderDictionary* dict) { | |
| 218 uint32_t i; | |
| 219 DictWord* dict_words; | |
| 220 uint16_t* buckets; | |
| 221 DictWord** words_by_hash; | |
| 222 size_t* words_by_hash_size; | |
| 223 size_t* words_by_hash_capacity; | |
| 224 BrotliTrie dedup; | |
| 225 uint8_t word[kTransformedBufferSize]; | |
| 226 size_t word_size; | |
| 227 size_t total = 0; | |
| 228 uint8_t l; | |
| 229 uint16_t idx; | |
| 230 | |
| 231 BrotliTrieInit(&dedup); | |
| 232 | |
| 233 words_by_hash = (DictWord**)BrotliAllocate(m, | |
| 234 sizeof(*words_by_hash) * NUM_HASH_BUCKETS); | |
| 235 words_by_hash_size = (size_t*)BrotliAllocate(m, | |
| 236 sizeof(*words_by_hash_size) * NUM_HASH_BUCKETS); | |
| 237 words_by_hash_capacity = (size_t*)BrotliAllocate(m, | |
| 238 sizeof(*words_by_hash_capacity) * NUM_HASH_BUCKETS); | |
| 239 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; | |
| 240 memset(words_by_hash, 0, sizeof(*words_by_hash) * NUM_HASH_BUCKETS); | |
| 241 memset(words_by_hash_size, 0, sizeof(*words_by_hash_size) * NUM_HASH_BUCKETS); | |
| 242 memset(words_by_hash_capacity, 0, | |
| 243 sizeof(*words_by_hash_capacity) * NUM_HASH_BUCKETS); | |
| 244 | |
| 245 if (transforms->num_transforms > 0) { | |
| 246 for (l = SHARED_BROTLI_MIN_DICTIONARY_WORD_LENGTH; | |
| 247 l <= SHARED_BROTLI_MAX_DICTIONARY_WORD_LENGTH; ++l) { | |
| 248 uint16_t n = dict->words->size_bits_by_length[l] ? | |
| 249 (uint16_t)(1 << dict->words->size_bits_by_length[l]) : 0u; | |
| 250 for (idx = 0; idx < n; ++idx) { | |
| 251 uint32_t key; | |
| 252 /* First transform (usually identity) */ | |
| 253 TransformedDictionaryWord(idx, l, 0, transforms, dict, word, | |
| 254 &word_size); | |
| 255 /* Cannot hash words smaller than 4 bytes */ | |
| 256 if (word_size < 4) { | |
| 257 /* Break instead of continue, all next words of this length will have | |
| 258 same length after transform */ | |
| 259 break; | |
| 260 } | |
| 261 if (!BrotliTrieAdd(m, 0, idx, word, word_size, &dedup)) { | |
| 262 return BROTLI_FALSE; | |
| 263 } | |
| 264 key = Hash(word, NUM_HASH_BITS); | |
| 265 BROTLI_ENSURE_CAPACITY_APPEND(m, DictWord, words_by_hash[key], | |
| 266 words_by_hash_capacity[key], words_by_hash_size[key], | |
| 267 MakeDictWord(l, 0, idx)); | |
| 268 ++total; | |
| 269 } | |
| 270 } | |
| 271 } | |
| 272 | |
| 273 /* These LUT transforms only supported if no custom transforms. This is | |
| 274 ok, we will use the heavy trie instead. */ | |
| 275 if (transforms == BrotliGetTransforms()) { | |
| 276 for (l = SHARED_BROTLI_MIN_DICTIONARY_WORD_LENGTH; | |
| 277 l <= SHARED_BROTLI_MAX_DICTIONARY_WORD_LENGTH; ++l) { | |
| 278 uint16_t n = dict->words->size_bits_by_length[l] ? | |
| 279 (uint16_t)(1 << dict->words->size_bits_by_length[l]) : 0u; | |
| 280 for (idx = 0; idx < n; ++idx) { | |
| 281 int k; | |
| 282 BROTLI_BOOL is_ascii = BROTLI_TRUE; | |
| 283 size_t offset = dict->words->offsets_by_length[l] + (size_t)l * idx; | |
| 284 const uint8_t* data = &dict->words->data[offset]; | |
| 285 for (k = 0; k < l; ++k) { | |
| 286 if (data[k] >= 128) is_ascii = BROTLI_FALSE; | |
| 287 } | |
| 288 if (data[0] < 128) { | |
| 289 int transform = 9; /* {empty, uppercase first, empty} */ | |
| 290 uint32_t ix = idx + (uint32_t)transform * n; | |
| 291 const BrotliTrieNode* it; | |
| 292 TransformedDictionaryWord(idx, l, transform, transforms, | |
| 293 dict, word, &word_size); | |
| 294 it = BrotliTrieFind(&dedup, word, word_size); | |
| 295 if (!it || it->idx_ > ix) { | |
| 296 uint32_t key = Hash(word, NUM_HASH_BITS); | |
| 297 if (!BrotliTrieAdd(m, 0, ix, word, word_size, &dedup)) { | |
| 298 return BROTLI_FALSE; | |
| 299 } | |
| 300 BROTLI_ENSURE_CAPACITY_APPEND(m, DictWord, words_by_hash[key], | |
| 301 words_by_hash_capacity[key], words_by_hash_size[key], | |
| 302 MakeDictWord(l, BROTLI_TRANSFORM_UPPERCASE_FIRST, idx)); | |
| 303 ++total; | |
| 304 } | |
| 305 } | |
| 306 if (is_ascii) { | |
| 307 int transform = 44; /* {empty, uppercase all, empty} */ | |
| 308 uint32_t ix = idx + (uint32_t)transform * n; | |
| 309 const BrotliTrieNode* it; | |
| 310 TransformedDictionaryWord(idx, l, transform, transforms, | |
| 311 dict, word, &word_size); | |
| 312 it = BrotliTrieFind(&dedup, word, word_size); | |
| 313 if (!it || it->idx_ > ix) { | |
| 314 uint32_t key = Hash(word, NUM_HASH_BITS); | |
| 315 if (!BrotliTrieAdd(m, 0, ix, word, word_size, &dedup)) { | |
| 316 return BROTLI_FALSE; | |
| 317 } | |
| 318 BROTLI_ENSURE_CAPACITY_APPEND(m, DictWord, words_by_hash[key], | |
| 319 words_by_hash_capacity[key], words_by_hash_size[key], | |
| 320 MakeDictWord(l, BROTLI_TRANSFORM_UPPERCASE_ALL, idx)); | |
| 321 ++total; | |
| 322 } | |
| 323 } | |
| 324 } | |
| 325 } | |
| 326 } | |
| 327 | |
| 328 dict_words = (DictWord*)BrotliAllocate(m, | |
| 329 sizeof(*dict->dict_words) * (total + 1)); | |
| 330 buckets = (uint16_t*)BrotliAllocate(m, | |
| 331 sizeof(*dict->buckets) * NUM_HASH_BUCKETS); | |
| 332 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; | |
| 333 dict->dict_words_alloc_size_ = total + 1; | |
| 334 dict->dict_words = dict->dict_words_data_ = dict_words; | |
| 335 dict->buckets_alloc_size_ = NUM_HASH_BUCKETS; | |
| 336 dict->buckets = dict->buckets_data_ = buckets; | |
| 337 | |
| 338 /* Unused; makes offsets start from 1. */ | |
| 339 dict_words[0] = MakeDictWord(0, 0, 0); | |
| 340 total = 1; | |
| 341 for (i = 0; i < NUM_HASH_BUCKETS; ++i) { | |
| 342 size_t num_words = words_by_hash_size[i]; | |
| 343 if (num_words > 0) { | |
| 344 buckets[i] = (uint16_t)(total); | |
| 345 memcpy(&dict_words[total], &words_by_hash[i][0], | |
| 346 sizeof(dict_words[0]) * num_words); | |
| 347 total += num_words; | |
| 348 dict_words[total - 1].len |= 0x80; | |
| 349 } else { | |
| 350 buckets[i] = 0; | |
| 351 } | |
| 352 } | |
| 353 | |
| 354 for (i = 0; i < NUM_HASH_BUCKETS; ++i) { | |
| 355 BrotliFree(m, words_by_hash[i]); | |
| 356 } | |
| 357 BrotliFree(m, words_by_hash); | |
| 358 BrotliFree(m, words_by_hash_size); | |
| 359 BrotliFree(m, words_by_hash_capacity); | |
| 360 BrotliTrieFree(m, &dedup); | |
| 361 | |
| 362 return BROTLI_TRUE; | |
| 363 } | |
| 364 | |
| 365 static void BuildDictionaryHashTable(uint16_t* hash_table_words, | |
| 366 uint8_t* hash_table_lengths, const BrotliDictionary* dict) { | |
| 367 int j, len; | |
| 368 /* The order of the loops is such that in case of collision, words with | |
| 369 shorter length are preferred, and in case of same length, words with | |
| 370 smaller index. There is only a single word per bucket. */ | |
| 371 /* TODO(lode): consider adding optional user-supplied frequency_map to use | |
| 372 for preferred words instead, this can make the encoder better for | |
| 373 quality 9 and below without affecting the decoder */ | |
| 374 memset(hash_table_words, 0, sizeof(kStaticDictionaryHashWords)); | |
| 375 memset(hash_table_lengths, 0, sizeof(kStaticDictionaryHashLengths)); | |
| 376 for (len = SHARED_BROTLI_MAX_DICTIONARY_WORD_LENGTH; | |
| 377 len >= SHARED_BROTLI_MIN_DICTIONARY_WORD_LENGTH; --len) { | |
| 378 const size_t num_words = dict->size_bits_by_length[len] ? | |
| 379 (1u << dict->size_bits_by_length[len]) : 0; | |
| 380 for (j = (int)num_words - 1; j >= 0; --j) { | |
| 381 size_t offset = dict->offsets_by_length[len] + | |
| 382 (size_t)len * (size_t)j; | |
| 383 const uint8_t* word = &dict->data[offset]; | |
| 384 const uint32_t key = Hash(word, 14); | |
| 385 int idx = (int)(key << 1) + (len < 8 ? 1 : 0); | |
| 386 BROTLI_DCHECK(idx < (int)NUM_HASH_BUCKETS); | |
| 387 hash_table_words[idx] = (uint16_t)j; | |
| 388 hash_table_lengths[idx] = (uint8_t)len; | |
| 389 } | |
| 390 } | |
| 391 } | |
| 392 | |
| 393 static BROTLI_BOOL GenerateWordsHeavy(MemoryManager* m, | |
| 394 const BrotliTransforms* transforms, | |
| 395 BrotliEncoderDictionary* dict) { | |
| 396 int i, j, l; | |
| 397 for (j = (int)transforms->num_transforms - 1; j >= 0 ; --j) { | |
| 398 for (l = 0; l < 32; l++) { | |
| 399 int num = (int)((1u << dict->words->size_bits_by_length[l]) & ~1u); | |
| 400 for (i = 0; i < num; i++) { | |
| 401 uint8_t transformed[kTransformedBufferSize]; | |
| 402 size_t size; | |
| 403 TransformedDictionaryWord( | |
| 404 (uint32_t)i, l, j, transforms, dict, transformed, &size); | |
| 405 if (size < 4) continue; | |
| 406 if (!BrotliTrieAdd(m, (uint8_t)l, (uint32_t)(i + num * j), | |
| 407 transformed, size, &dict->trie)) { | |
| 408 return BROTLI_FALSE; | |
| 409 } | |
| 410 } | |
| 411 } | |
| 412 } | |
| 413 return BROTLI_TRUE; | |
| 414 } | |
| 415 | |
| 416 /* Computes cutoffTransformsCount (in count) and cutoffTransforms (in data) for | |
| 417 the custom transforms, where possible within the limits of the | |
| 418 cutoffTransforms encoding. The fast encoder uses this to do fast lookup for | |
| 419 transforms that remove the N last characters (OmitLast). */ | |
| 420 static void ComputeCutoffTransforms( | |
| 421 const BrotliTransforms* transforms, | |
| 422 uint32_t* count, uint64_t* data) { | |
| 423 int i; | |
| 424 /* The encoding in a 64-bit integer of transform N in the data is: (N << 2) + | |
| 425 ((cutoffTransforms >> (N * 6)) & 0x3F), so for example the identity | |
| 426 transform code must be 0-63, for N=1 the transform code must be 4-67, ..., | |
| 427 for N=9 it must be 36-99. | |
| 428 TODO(lode): consider a simple flexible uint8_t[10] instead of the uint64_t | |
| 429 for the cutoff transforms, so that shared dictionaries can have the | |
| 430 OmitLast transforms anywhere without loss. */ | |
| 431 *count = 0; | |
| 432 *data = 0; | |
| 433 for (i = 0; i < BROTLI_TRANSFORMS_MAX_CUT_OFF + 1; i++) { | |
| 434 int idx = transforms->cutOffTransforms[i]; | |
| 435 if (idx == -1) break; /* Not found */ | |
| 436 if (idx < (i << 2)) break; /* Too small for the encoding */ | |
| 437 if (idx >= (i << 2) + 64) break; /* Too large for the encoding */ | |
| 438 (*count)++; | |
| 439 *data |= (uint64_t)(((uint64_t)idx - | |
| 440 ((uint64_t)i << 2u)) << ((uint64_t)i * 6u)); | |
| 441 } | |
| 442 } | |
| 443 | |
| 444 static BROTLI_BOOL ComputeDictionary(MemoryManager* m, int quality, | |
| 445 const BrotliTransforms* transforms, | |
| 446 BrotliEncoderDictionary* current) { | |
| 447 int default_words = current->words == BrotliGetDictionary(); | |
| 448 int default_transforms = transforms == BrotliGetTransforms(); | |
| 449 | |
| 450 if (default_words && default_transforms) { | |
| 451 /* hashes are already set to Brotli defaults */ | |
| 452 return BROTLI_TRUE; | |
| 453 } | |
| 454 | |
| 455 current->hash_table_data_words_ = (uint16_t*)BrotliAllocate( | |
| 456 m, sizeof(kStaticDictionaryHashWords)); | |
| 457 current->hash_table_data_lengths_ = (uint8_t*)BrotliAllocate( | |
| 458 m, sizeof(kStaticDictionaryHashLengths)); | |
| 459 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; | |
| 460 current->hash_table_words = current->hash_table_data_words_; | |
| 461 current->hash_table_lengths = current->hash_table_data_lengths_; | |
| 462 | |
| 463 BuildDictionaryHashTable(current->hash_table_data_words_, | |
| 464 current->hash_table_data_lengths_, current->words); | |
| 465 | |
| 466 ComputeCutoffTransforms(transforms, | |
| 467 ¤t->cutoffTransformsCount, ¤t->cutoffTransforms); | |
| 468 | |
| 469 /* Only compute the data for slow encoder if the requested quality is high | |
| 470 enough to need it */ | |
| 471 if (quality >= ZOPFLIFICATION_QUALITY) { | |
| 472 if (!BuildDictionaryLut(m, transforms, current)) return BROTLI_FALSE; | |
| 473 | |
| 474 /* For the built-in Brotli transforms, there is a hard-coded function to | |
| 475 handle all transforms, but for custom transforms, we use the following | |
| 476 large hammer instead */ | |
| 477 current->has_words_heavy = !default_transforms; | |
| 478 if (current->has_words_heavy) { | |
| 479 if (!GenerateWordsHeavy(m, transforms, current)) return BROTLI_FALSE; | |
| 480 } | |
| 481 } | |
| 482 | |
| 483 return BROTLI_TRUE; | |
| 484 } | |
| 485 #endif /* BROTLI_EXPERIMENTAL */ | |
| 486 | |
| 487 void BrotliInitSharedEncoderDictionary(SharedEncoderDictionary* dict) { | |
| 488 dict->magic = kSharedDictionaryMagic; | |
| 489 | |
| 490 dict->compound.num_chunks = 0; | |
| 491 dict->compound.total_size = 0; | |
| 492 dict->compound.chunk_offsets[0] = 0; | |
| 493 dict->compound.num_prepared_instances_ = 0; | |
| 494 | |
| 495 dict->contextual.context_based = 0; | |
| 496 dict->contextual.num_dictionaries = 1; | |
| 497 dict->contextual.instances_ = 0; | |
| 498 dict->contextual.num_instances_ = 1; /* The instance_ field */ | |
| 499 dict->contextual.dict[0] = &dict->contextual.instance_; | |
| 500 InitEncoderDictionary(&dict->contextual.instance_); | |
| 501 dict->contextual.instance_.parent = &dict->contextual; | |
| 502 | |
| 503 dict->max_quality = BROTLI_MAX_QUALITY; | |
| 504 } | |
| 505 | |
| 506 #if defined(BROTLI_EXPERIMENTAL) | |
| 507 /* TODO(eustas): make sure that tooling will warn user if not all the cutoff | |
| 508 transforms are available (for low-quality encoder). */ | |
| 509 static BROTLI_BOOL InitCustomSharedEncoderDictionary( | |
| 510 MemoryManager* m, const BrotliSharedDictionary* decoded_dict, | |
| 511 int quality, SharedEncoderDictionary* dict) { | |
| 512 ContextualEncoderDictionary* contextual; | |
| 513 CompoundDictionary* compound; | |
| 514 BrotliEncoderDictionary* instances; | |
| 515 int i; | |
| 516 BrotliInitSharedEncoderDictionary(dict); | |
| 517 | |
| 518 contextual = &dict->contextual; | |
| 519 compound = &dict->compound; | |
| 520 | |
| 521 for (i = 0; i < (int)decoded_dict->num_prefix; i++) { | |
| 522 PreparedDictionary* prepared = CreatePreparedDictionary(m, | |
| 523 decoded_dict->prefix[i], decoded_dict->prefix_size[i]); | |
| 524 AttachPreparedDictionary(compound, prepared); | |
| 525 /* remember for cleanup */ | |
| 526 compound->prepared_instances_[ | |
| 527 compound->num_prepared_instances_++] = prepared; | |
| 528 } | |
| 529 | |
| 530 dict->max_quality = quality; | |
| 531 contextual->context_based = decoded_dict->context_based; | |
| 532 if (decoded_dict->context_based) { | |
| 533 memcpy(contextual->context_map, decoded_dict->context_map, | |
| 534 SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS); | |
| 535 } | |
| 536 | |
| 537 contextual->num_dictionaries = decoded_dict->num_dictionaries; | |
| 538 contextual->num_instances_ = decoded_dict->num_dictionaries; | |
| 539 if (contextual->num_instances_ == 1) { | |
| 540 instances = &contextual->instance_; | |
| 541 } else { | |
| 542 contextual->instances_ = (BrotliEncoderDictionary*) | |
| 543 BrotliAllocate(m, sizeof(*contextual->instances_) * | |
| 544 contextual->num_instances_); | |
| 545 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; | |
| 546 instances = contextual->instances_; | |
| 547 } | |
| 548 for (i = 0; i < (int)contextual->num_instances_; i++) { | |
| 549 BrotliEncoderDictionary* current = &instances[i]; | |
| 550 InitEncoderDictionary(current); | |
| 551 current->parent = &dict->contextual; | |
| 552 if (decoded_dict->words[i] == BrotliGetDictionary()) { | |
| 553 current->words = BrotliGetDictionary(); | |
| 554 } else { | |
| 555 current->words_instance_ = (BrotliDictionary*)BrotliAllocate( | |
| 556 m, sizeof(BrotliDictionary)); | |
| 557 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; | |
| 558 *current->words_instance_ = *decoded_dict->words[i]; | |
| 559 current->words = current->words_instance_; | |
| 560 } | |
| 561 current->num_transforms = | |
| 562 (uint32_t)decoded_dict->transforms[i]->num_transforms; | |
| 563 if (!ComputeDictionary( | |
| 564 m, quality, decoded_dict->transforms[i], current)) { | |
| 565 return BROTLI_FALSE; | |
| 566 } | |
| 567 | |
| 568 contextual->dict[i] = current; | |
| 569 } | |
| 570 | |
| 571 return BROTLI_TRUE; /* success */ | |
| 572 } | |
| 573 | |
| 574 BROTLI_BOOL BrotliInitCustomSharedEncoderDictionary( | |
| 575 MemoryManager* m, const uint8_t* encoded_dict, size_t size, | |
| 576 int quality, SharedEncoderDictionary* dict) { | |
| 577 BROTLI_BOOL success = BROTLI_FALSE; | |
| 578 BrotliSharedDictionary* decoded_dict = BrotliSharedDictionaryCreateInstance( | |
| 579 m->alloc_func, m->free_func, m->opaque); | |
| 580 if (!decoded_dict) { /* OOM */ | |
| 581 return BROTLI_FALSE; | |
| 582 } | |
| 583 success = BrotliSharedDictionaryAttach( | |
| 584 decoded_dict, BROTLI_SHARED_DICTIONARY_SERIALIZED, size, encoded_dict); | |
| 585 if (success) { | |
| 586 success = InitCustomSharedEncoderDictionary(m, | |
| 587 decoded_dict, quality, dict); | |
| 588 } | |
| 589 BrotliSharedDictionaryDestroyInstance(decoded_dict); | |
| 590 return success; | |
| 591 } | |
| 592 #endif /* BROTLI_EXPERIMENTAL */ | |
| 593 | |
| 594 void BrotliCleanupSharedEncoderDictionary(MemoryManager* m, | |
| 595 SharedEncoderDictionary* dict) { | |
| 596 size_t i; | |
| 597 for (i = 0; i < dict->compound.num_prepared_instances_; i++) { | |
| 598 DestroyPreparedDictionary(m, | |
| 599 (PreparedDictionary*)dict->compound.prepared_instances_[i]); | |
| 600 } | |
| 601 if (dict->contextual.num_instances_ == 1) { | |
| 602 BrotliDestroyEncoderDictionary(m, &dict->contextual.instance_); | |
| 603 } else if (dict->contextual.num_instances_ > 1) { | |
| 604 for (i = 0; i < dict->contextual.num_instances_; i++) { | |
| 605 BrotliDestroyEncoderDictionary(m, &dict->contextual.instances_[i]); | |
| 606 } | |
| 607 BrotliFree(m, dict->contextual.instances_); | |
| 608 } | |
| 609 } | |
| 610 | |
| 611 ManagedDictionary* BrotliCreateManagedDictionary( | |
| 612 brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque) { | |
| 613 ManagedDictionary* result = (ManagedDictionary*)BrotliBootstrapAlloc( | |
| 614 sizeof(ManagedDictionary), alloc_func, free_func, opaque); | |
| 615 if (result == NULL) return NULL; | |
| 616 | |
| 617 result->magic = kManagedDictionaryMagic; | |
| 618 BrotliInitMemoryManager( | |
| 619 &result->memory_manager_, alloc_func, free_func, opaque); | |
| 620 result->dictionary = NULL; | |
| 621 | |
| 622 return result; | |
| 623 } | |
| 624 | |
| 625 void BrotliDestroyManagedDictionary(ManagedDictionary* dictionary) { | |
| 626 if (!dictionary) return; | |
| 627 BrotliBootstrapFree(dictionary, &dictionary->memory_manager_); | |
| 628 } | |
| 629 | |
| 630 /* Escalate internal functions visibility; for testing purposes only. */ | |
| 631 #if defined(BROTLI_TEST) | |
| 632 void InitEncoderDictionaryForTest(BrotliEncoderDictionary*); | |
| 633 void InitEncoderDictionaryForTest(BrotliEncoderDictionary* d) { | |
| 634 InitEncoderDictionary(d); | |
| 635 } | |
| 636 #endif | |
| 637 | |
| 638 #if defined(__cplusplus) || defined(c_plusplus) | |
| 639 } /* extern "C" */ | |
| 640 #endif |
