Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/brotli/c/enc/static_dict.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /* Copyright 2013 Google Inc. All Rights Reserved. | |
| 2 | |
| 3 Distributed under MIT license. | |
| 4 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT | |
| 5 */ | |
| 6 | |
| 7 #include "static_dict.h" | |
| 8 | |
| 9 #include "../common/dictionary.h" | |
| 10 #include "../common/platform.h" | |
| 11 #include "../common/transform.h" | |
| 12 #include "encoder_dict.h" | |
| 13 #include "find_match_length.h" | |
| 14 | |
| 15 #if defined(__cplusplus) || defined(c_plusplus) | |
| 16 extern "C" { | |
| 17 #endif | |
| 18 | |
| 19 static BROTLI_INLINE uint32_t Hash(const uint8_t* data) { | |
| 20 uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kDictHashMul32; | |
| 21 /* The higher bits contain more mixture from the multiplication, | |
| 22 so we take our results from there. */ | |
| 23 return h >> (32 - kDictNumBits); | |
| 24 } | |
| 25 | |
| 26 static BROTLI_INLINE void AddMatch(size_t distance, size_t len, size_t len_code, | |
| 27 uint32_t* matches) { | |
| 28 uint32_t match = (uint32_t)((distance << 5) + len_code); | |
| 29 matches[len] = BROTLI_MIN(uint32_t, matches[len], match); | |
| 30 } | |
| 31 | |
| 32 static BROTLI_INLINE size_t DictMatchLength(const BrotliDictionary* dictionary, | |
| 33 const uint8_t* data, | |
| 34 size_t id, | |
| 35 size_t len, | |
| 36 size_t maxlen) { | |
| 37 const size_t offset = dictionary->offsets_by_length[len] + len * id; | |
| 38 return FindMatchLengthWithLimit(&dictionary->data[offset], data, | |
| 39 BROTLI_MIN(size_t, len, maxlen)); | |
| 40 } | |
| 41 | |
| 42 static BROTLI_INLINE BROTLI_BOOL IsMatch(const BrotliDictionary* dictionary, | |
| 43 DictWord w, const uint8_t* data, size_t max_length) { | |
| 44 if (w.len > max_length) { | |
| 45 return BROTLI_FALSE; | |
| 46 } else { | |
| 47 const size_t offset = dictionary->offsets_by_length[w.len] + | |
| 48 (size_t)w.len * (size_t)w.idx; | |
| 49 const uint8_t* dict = &dictionary->data[offset]; | |
| 50 if (w.transform == 0) { | |
| 51 /* Match against base dictionary word. */ | |
| 52 return | |
| 53 TO_BROTLI_BOOL(FindMatchLengthWithLimit(dict, data, w.len) == w.len); | |
| 54 } else if (w.transform == 10) { | |
| 55 /* Match against uppercase first transform. | |
| 56 Note that there are only ASCII uppercase words in the lookup table. */ | |
| 57 return TO_BROTLI_BOOL(dict[0] >= 'a' && dict[0] <= 'z' && | |
| 58 (dict[0] ^ 32) == data[0] && | |
| 59 FindMatchLengthWithLimit(&dict[1], &data[1], w.len - 1u) == | |
| 60 w.len - 1u); | |
| 61 } else { | |
| 62 /* Match against uppercase all transform. | |
| 63 Note that there are only ASCII uppercase words in the lookup table. */ | |
| 64 size_t i; | |
| 65 for (i = 0; i < w.len; ++i) { | |
| 66 if (dict[i] >= 'a' && dict[i] <= 'z') { | |
| 67 if ((dict[i] ^ 32) != data[i]) return BROTLI_FALSE; | |
| 68 } else { | |
| 69 if (dict[i] != data[i]) return BROTLI_FALSE; | |
| 70 } | |
| 71 } | |
| 72 return BROTLI_TRUE; | |
| 73 } | |
| 74 } | |
| 75 } | |
| 76 | |
| 77 /* Finds matches for a single static dictionary */ | |
| 78 static BROTLI_BOOL BrotliFindAllStaticDictionaryMatchesFor( | |
| 79 const BrotliEncoderDictionary* dictionary, const uint8_t* data, | |
| 80 size_t min_length, size_t max_length, uint32_t* matches) { | |
| 81 BROTLI_BOOL has_found_match = BROTLI_FALSE; | |
| 82 #if defined(BROTLI_EXPERIMENTAL) | |
| 83 if (dictionary->has_words_heavy) { | |
| 84 const BrotliTrieNode* node = &dictionary->trie.root; | |
| 85 size_t l = 0; | |
| 86 while (node && l < max_length) { | |
| 87 uint8_t c; | |
| 88 if (l >= min_length && node->len_) { | |
| 89 AddMatch(node->idx_, l, node->len_, matches); | |
| 90 has_found_match = BROTLI_TRUE; | |
| 91 } | |
| 92 c = data[l++]; | |
| 93 node = BrotliTrieSub(&dictionary->trie, node, c); | |
| 94 } | |
| 95 return has_found_match; | |
| 96 } | |
| 97 #endif /* BROTLI_EXPERIMENTAL */ | |
| 98 { | |
| 99 size_t offset = dictionary->buckets[Hash(data)]; | |
| 100 BROTLI_BOOL end = !offset; | |
| 101 while (!end) { | |
| 102 DictWord w = dictionary->dict_words[offset++]; | |
| 103 const size_t l = w.len & 0x1F; | |
| 104 const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l]; | |
| 105 const size_t id = w.idx; | |
| 106 end = !!(w.len & 0x80); | |
| 107 w.len = (uint8_t)l; | |
| 108 if (w.transform == 0) { | |
| 109 const size_t matchlen = | |
| 110 DictMatchLength(dictionary->words, data, id, l, max_length); | |
| 111 const uint8_t* s; | |
| 112 size_t minlen; | |
| 113 size_t maxlen; | |
| 114 size_t len; | |
| 115 /* Transform "" + BROTLI_TRANSFORM_IDENTITY + "" */ | |
| 116 if (matchlen == l) { | |
| 117 AddMatch(id, l, l, matches); | |
| 118 has_found_match = BROTLI_TRUE; | |
| 119 } | |
| 120 /* Transforms "" + BROTLI_TRANSFORM_OMIT_LAST_1 + "" and | |
| 121 "" + BROTLI_TRANSFORM_OMIT_LAST_1 + "ing " */ | |
| 122 if (matchlen >= l - 1) { | |
| 123 AddMatch(id + 12 * n, l - 1, l, matches); | |
| 124 if (l + 2 < max_length && | |
| 125 data[l - 1] == 'i' && data[l] == 'n' && data[l + 1] == 'g' && | |
| 126 data[l + 2] == ' ') { | |
| 127 AddMatch(id + 49 * n, l + 3, l, matches); | |
| 128 } | |
| 129 has_found_match = BROTLI_TRUE; | |
| 130 } | |
| 131 /* Transform "" + BROTLI_TRANSFORM_OMIT_LAST_# + "" (# = 2 .. 9) */ | |
| 132 minlen = min_length; | |
| 133 if (l > 9) minlen = BROTLI_MAX(size_t, minlen, l - 9); | |
| 134 maxlen = BROTLI_MIN(size_t, matchlen, l - 2); | |
| 135 for (len = minlen; len <= maxlen; ++len) { | |
| 136 size_t cut = l - len; | |
| 137 size_t transform_id = (cut << 2) + | |
| 138 (size_t)((dictionary->cutoffTransforms >> (cut * 6)) & 0x3F); | |
| 139 AddMatch(id + transform_id * n, len, l, matches); | |
| 140 has_found_match = BROTLI_TRUE; | |
| 141 } | |
| 142 if (matchlen < l || l + 6 >= max_length) { | |
| 143 continue; | |
| 144 } | |
| 145 s = &data[l]; | |
| 146 /* Transforms "" + BROTLI_TRANSFORM_IDENTITY + <suffix> */ | |
| 147 if (s[0] == ' ') { | |
| 148 AddMatch(id + n, l + 1, l, matches); | |
| 149 if (s[1] == 'a') { | |
| 150 if (s[2] == ' ') { | |
| 151 AddMatch(id + 28 * n, l + 3, l, matches); | |
| 152 } else if (s[2] == 's') { | |
| 153 if (s[3] == ' ') AddMatch(id + 46 * n, l + 4, l, matches); | |
| 154 } else if (s[2] == 't') { | |
| 155 if (s[3] == ' ') AddMatch(id + 60 * n, l + 4, l, matches); | |
| 156 } else if (s[2] == 'n') { | |
| 157 if (s[3] == 'd' && s[4] == ' ') { | |
| 158 AddMatch(id + 10 * n, l + 5, l, matches); | |
| 159 } | |
| 160 } | |
| 161 } else if (s[1] == 'b') { | |
| 162 if (s[2] == 'y' && s[3] == ' ') { | |
| 163 AddMatch(id + 38 * n, l + 4, l, matches); | |
| 164 } | |
| 165 } else if (s[1] == 'i') { | |
| 166 if (s[2] == 'n') { | |
| 167 if (s[3] == ' ') AddMatch(id + 16 * n, l + 4, l, matches); | |
| 168 } else if (s[2] == 's') { | |
| 169 if (s[3] == ' ') AddMatch(id + 47 * n, l + 4, l, matches); | |
| 170 } | |
| 171 } else if (s[1] == 'f') { | |
| 172 if (s[2] == 'o') { | |
| 173 if (s[3] == 'r' && s[4] == ' ') { | |
| 174 AddMatch(id + 25 * n, l + 5, l, matches); | |
| 175 } | |
| 176 } else if (s[2] == 'r') { | |
| 177 if (s[3] == 'o' && s[4] == 'm' && s[5] == ' ') { | |
| 178 AddMatch(id + 37 * n, l + 6, l, matches); | |
| 179 } | |
| 180 } | |
| 181 } else if (s[1] == 'o') { | |
| 182 if (s[2] == 'f') { | |
| 183 if (s[3] == ' ') AddMatch(id + 8 * n, l + 4, l, matches); | |
| 184 } else if (s[2] == 'n') { | |
| 185 if (s[3] == ' ') AddMatch(id + 45 * n, l + 4, l, matches); | |
| 186 } | |
| 187 } else if (s[1] == 'n') { | |
| 188 if (s[2] == 'o' && s[3] == 't' && s[4] == ' ') { | |
| 189 AddMatch(id + 80 * n, l + 5, l, matches); | |
| 190 } | |
| 191 } else if (s[1] == 't') { | |
| 192 if (s[2] == 'h') { | |
| 193 if (s[3] == 'e') { | |
| 194 if (s[4] == ' ') AddMatch(id + 5 * n, l + 5, l, matches); | |
| 195 } else if (s[3] == 'a') { | |
| 196 if (s[4] == 't' && s[5] == ' ') { | |
| 197 AddMatch(id + 29 * n, l + 6, l, matches); | |
| 198 } | |
| 199 } | |
| 200 } else if (s[2] == 'o') { | |
| 201 if (s[3] == ' ') AddMatch(id + 17 * n, l + 4, l, matches); | |
| 202 } | |
| 203 } else if (s[1] == 'w') { | |
| 204 if (s[2] == 'i' && s[3] == 't' && s[4] == 'h' && s[5] == ' ') { | |
| 205 AddMatch(id + 35 * n, l + 6, l, matches); | |
| 206 } | |
| 207 } | |
| 208 } else if (s[0] == '"') { | |
| 209 AddMatch(id + 19 * n, l + 1, l, matches); | |
| 210 if (s[1] == '>') { | |
| 211 AddMatch(id + 21 * n, l + 2, l, matches); | |
| 212 } | |
| 213 } else if (s[0] == '.') { | |
| 214 AddMatch(id + 20 * n, l + 1, l, matches); | |
| 215 if (s[1] == ' ') { | |
| 216 AddMatch(id + 31 * n, l + 2, l, matches); | |
| 217 if (s[2] == 'T' && s[3] == 'h') { | |
| 218 if (s[4] == 'e') { | |
| 219 if (s[5] == ' ') AddMatch(id + 43 * n, l + 6, l, matches); | |
| 220 } else if (s[4] == 'i') { | |
| 221 if (s[5] == 's' && s[6] == ' ') { | |
| 222 AddMatch(id + 75 * n, l + 7, l, matches); | |
| 223 } | |
| 224 } | |
| 225 } | |
| 226 } | |
| 227 } else if (s[0] == ',') { | |
| 228 AddMatch(id + 76 * n, l + 1, l, matches); | |
| 229 if (s[1] == ' ') { | |
| 230 AddMatch(id + 14 * n, l + 2, l, matches); | |
| 231 } | |
| 232 } else if (s[0] == '\n') { | |
| 233 AddMatch(id + 22 * n, l + 1, l, matches); | |
| 234 if (s[1] == '\t') { | |
| 235 AddMatch(id + 50 * n, l + 2, l, matches); | |
| 236 } | |
| 237 } else if (s[0] == ']') { | |
| 238 AddMatch(id + 24 * n, l + 1, l, matches); | |
| 239 } else if (s[0] == '\'') { | |
| 240 AddMatch(id + 36 * n, l + 1, l, matches); | |
| 241 } else if (s[0] == ':') { | |
| 242 AddMatch(id + 51 * n, l + 1, l, matches); | |
| 243 } else if (s[0] == '(') { | |
| 244 AddMatch(id + 57 * n, l + 1, l, matches); | |
| 245 } else if (s[0] == '=') { | |
| 246 if (s[1] == '"') { | |
| 247 AddMatch(id + 70 * n, l + 2, l, matches); | |
| 248 } else if (s[1] == '\'') { | |
| 249 AddMatch(id + 86 * n, l + 2, l, matches); | |
| 250 } | |
| 251 } else if (s[0] == 'a') { | |
| 252 if (s[1] == 'l' && s[2] == ' ') { | |
| 253 AddMatch(id + 84 * n, l + 3, l, matches); | |
| 254 } | |
| 255 } else if (s[0] == 'e') { | |
| 256 if (s[1] == 'd') { | |
| 257 if (s[2] == ' ') AddMatch(id + 53 * n, l + 3, l, matches); | |
| 258 } else if (s[1] == 'r') { | |
| 259 if (s[2] == ' ') AddMatch(id + 82 * n, l + 3, l, matches); | |
| 260 } else if (s[1] == 's') { | |
| 261 if (s[2] == 't' && s[3] == ' ') { | |
| 262 AddMatch(id + 95 * n, l + 4, l, matches); | |
| 263 } | |
| 264 } | |
| 265 } else if (s[0] == 'f') { | |
| 266 if (s[1] == 'u' && s[2] == 'l' && s[3] == ' ') { | |
| 267 AddMatch(id + 90 * n, l + 4, l, matches); | |
| 268 } | |
| 269 } else if (s[0] == 'i') { | |
| 270 if (s[1] == 'v') { | |
| 271 if (s[2] == 'e' && s[3] == ' ') { | |
| 272 AddMatch(id + 92 * n, l + 4, l, matches); | |
| 273 } | |
| 274 } else if (s[1] == 'z') { | |
| 275 if (s[2] == 'e' && s[3] == ' ') { | |
| 276 AddMatch(id + 100 * n, l + 4, l, matches); | |
| 277 } | |
| 278 } | |
| 279 } else if (s[0] == 'l') { | |
| 280 if (s[1] == 'e') { | |
| 281 if (s[2] == 's' && s[3] == 's' && s[4] == ' ') { | |
| 282 AddMatch(id + 93 * n, l + 5, l, matches); | |
| 283 } | |
| 284 } else if (s[1] == 'y') { | |
| 285 if (s[2] == ' ') AddMatch(id + 61 * n, l + 3, l, matches); | |
| 286 } | |
| 287 } else if (s[0] == 'o') { | |
| 288 if (s[1] == 'u' && s[2] == 's' && s[3] == ' ') { | |
| 289 AddMatch(id + 106 * n, l + 4, l, matches); | |
| 290 } | |
| 291 } | |
| 292 } else { | |
| 293 /* Set is_all_caps=0 for BROTLI_TRANSFORM_UPPERCASE_FIRST and | |
| 294 is_all_caps=1 otherwise (BROTLI_TRANSFORM_UPPERCASE_ALL) | |
| 295 transform. */ | |
| 296 const BROTLI_BOOL is_all_caps = | |
| 297 TO_BROTLI_BOOL(w.transform != BROTLI_TRANSFORM_UPPERCASE_FIRST); | |
| 298 const uint8_t* s; | |
| 299 if (!IsMatch(dictionary->words, w, data, max_length)) { | |
| 300 continue; | |
| 301 } | |
| 302 /* Transform "" + kUppercase{First,All} + "" */ | |
| 303 AddMatch(id + (is_all_caps ? 44 : 9) * n, l, l, matches); | |
| 304 has_found_match = BROTLI_TRUE; | |
| 305 if (l + 1 >= max_length) { | |
| 306 continue; | |
| 307 } | |
| 308 /* Transforms "" + kUppercase{First,All} + <suffix> */ | |
| 309 s = &data[l]; | |
| 310 if (s[0] == ' ') { | |
| 311 AddMatch(id + (is_all_caps ? 68 : 4) * n, l + 1, l, matches); | |
| 312 } else if (s[0] == '"') { | |
| 313 AddMatch(id + (is_all_caps ? 87 : 66) * n, l + 1, l, matches); | |
| 314 if (s[1] == '>') { | |
| 315 AddMatch(id + (is_all_caps ? 97 : 69) * n, l + 2, l, matches); | |
| 316 } | |
| 317 } else if (s[0] == '.') { | |
| 318 AddMatch(id + (is_all_caps ? 101 : 79) * n, l + 1, l, matches); | |
| 319 if (s[1] == ' ') { | |
| 320 AddMatch(id + (is_all_caps ? 114 : 88) * n, l + 2, l, matches); | |
| 321 } | |
| 322 } else if (s[0] == ',') { | |
| 323 AddMatch(id + (is_all_caps ? 112 : 99) * n, l + 1, l, matches); | |
| 324 if (s[1] == ' ') { | |
| 325 AddMatch(id + (is_all_caps ? 107 : 58) * n, l + 2, l, matches); | |
| 326 } | |
| 327 } else if (s[0] == '\'') { | |
| 328 AddMatch(id + (is_all_caps ? 94 : 74) * n, l + 1, l, matches); | |
| 329 } else if (s[0] == '(') { | |
| 330 AddMatch(id + (is_all_caps ? 113 : 78) * n, l + 1, l, matches); | |
| 331 } else if (s[0] == '=') { | |
| 332 if (s[1] == '"') { | |
| 333 AddMatch(id + (is_all_caps ? 105 : 104) * n, l + 2, l, matches); | |
| 334 } else if (s[1] == '\'') { | |
| 335 AddMatch(id + (is_all_caps ? 116 : 108) * n, l + 2, l, matches); | |
| 336 } | |
| 337 } | |
| 338 } | |
| 339 } | |
| 340 } | |
| 341 /* Transforms with prefixes " " and "." */ | |
| 342 if (max_length >= 5 && (data[0] == ' ' || data[0] == '.')) { | |
| 343 BROTLI_BOOL is_space = TO_BROTLI_BOOL(data[0] == ' '); | |
| 344 size_t offset = dictionary->buckets[Hash(&data[1])]; | |
| 345 BROTLI_BOOL end = !offset; | |
| 346 while (!end) { | |
| 347 DictWord w = dictionary->dict_words[offset++]; | |
| 348 const size_t l = w.len & 0x1F; | |
| 349 const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l]; | |
| 350 const size_t id = w.idx; | |
| 351 end = !!(w.len & 0x80); | |
| 352 w.len = (uint8_t)l; | |
| 353 if (w.transform == 0) { | |
| 354 const uint8_t* s; | |
| 355 if (!IsMatch(dictionary->words, w, &data[1], max_length - 1)) { | |
| 356 continue; | |
| 357 } | |
| 358 /* Transforms " " + BROTLI_TRANSFORM_IDENTITY + "" and | |
| 359 "." + BROTLI_TRANSFORM_IDENTITY + "" */ | |
| 360 AddMatch(id + (is_space ? 6 : 32) * n, l + 1, l, matches); | |
| 361 has_found_match = BROTLI_TRUE; | |
| 362 if (l + 2 >= max_length) { | |
| 363 continue; | |
| 364 } | |
| 365 /* Transforms " " + BROTLI_TRANSFORM_IDENTITY + <suffix> and | |
| 366 "." + BROTLI_TRANSFORM_IDENTITY + <suffix> | |
| 367 */ | |
| 368 s = &data[l + 1]; | |
| 369 if (s[0] == ' ') { | |
| 370 AddMatch(id + (is_space ? 2 : 77) * n, l + 2, l, matches); | |
| 371 } else if (s[0] == '(') { | |
| 372 AddMatch(id + (is_space ? 89 : 67) * n, l + 2, l, matches); | |
| 373 } else if (is_space) { | |
| 374 if (s[0] == ',') { | |
| 375 AddMatch(id + 103 * n, l + 2, l, matches); | |
| 376 if (s[1] == ' ') { | |
| 377 AddMatch(id + 33 * n, l + 3, l, matches); | |
| 378 } | |
| 379 } else if (s[0] == '.') { | |
| 380 AddMatch(id + 71 * n, l + 2, l, matches); | |
| 381 if (s[1] == ' ') { | |
| 382 AddMatch(id + 52 * n, l + 3, l, matches); | |
| 383 } | |
| 384 } else if (s[0] == '=') { | |
| 385 if (s[1] == '"') { | |
| 386 AddMatch(id + 81 * n, l + 3, l, matches); | |
| 387 } else if (s[1] == '\'') { | |
| 388 AddMatch(id + 98 * n, l + 3, l, matches); | |
| 389 } | |
| 390 } | |
| 391 } | |
| 392 } else if (is_space) { | |
| 393 /* Set is_all_caps=0 for BROTLI_TRANSFORM_UPPERCASE_FIRST and | |
| 394 is_all_caps=1 otherwise (BROTLI_TRANSFORM_UPPERCASE_ALL) | |
| 395 transform. */ | |
| 396 const BROTLI_BOOL is_all_caps = | |
| 397 TO_BROTLI_BOOL(w.transform != BROTLI_TRANSFORM_UPPERCASE_FIRST); | |
| 398 const uint8_t* s; | |
| 399 if (!IsMatch(dictionary->words, w, &data[1], max_length - 1)) { | |
| 400 continue; | |
| 401 } | |
| 402 /* Transforms " " + kUppercase{First,All} + "" */ | |
| 403 AddMatch(id + (is_all_caps ? 85 : 30) * n, l + 1, l, matches); | |
| 404 has_found_match = BROTLI_TRUE; | |
| 405 if (l + 2 >= max_length) { | |
| 406 continue; | |
| 407 } | |
| 408 /* Transforms " " + kUppercase{First,All} + <suffix> */ | |
| 409 s = &data[l + 1]; | |
| 410 if (s[0] == ' ') { | |
| 411 AddMatch(id + (is_all_caps ? 83 : 15) * n, l + 2, l, matches); | |
| 412 } else if (s[0] == ',') { | |
| 413 if (!is_all_caps) { | |
| 414 AddMatch(id + 109 * n, l + 2, l, matches); | |
| 415 } | |
| 416 if (s[1] == ' ') { | |
| 417 AddMatch(id + (is_all_caps ? 111 : 65) * n, l + 3, l, matches); | |
| 418 } | |
| 419 } else if (s[0] == '.') { | |
| 420 AddMatch(id + (is_all_caps ? 115 : 96) * n, l + 2, l, matches); | |
| 421 if (s[1] == ' ') { | |
| 422 AddMatch(id + (is_all_caps ? 117 : 91) * n, l + 3, l, matches); | |
| 423 } | |
| 424 } else if (s[0] == '=') { | |
| 425 if (s[1] == '"') { | |
| 426 AddMatch(id + (is_all_caps ? 110 : 118) * n, l + 3, l, matches); | |
| 427 } else if (s[1] == '\'') { | |
| 428 AddMatch(id + (is_all_caps ? 119 : 120) * n, l + 3, l, matches); | |
| 429 } | |
| 430 } | |
| 431 } | |
| 432 } | |
| 433 } | |
| 434 if (max_length >= 6) { | |
| 435 /* Transforms with prefixes "e ", "s ", ", " and "\xC2\xA0" */ | |
| 436 if ((data[1] == ' ' && | |
| 437 (data[0] == 'e' || data[0] == 's' || data[0] == ',')) || | |
| 438 (data[0] == 0xC2 && data[1] == 0xA0)) { | |
| 439 size_t offset = dictionary->buckets[Hash(&data[2])]; | |
| 440 BROTLI_BOOL end = !offset; | |
| 441 while (!end) { | |
| 442 DictWord w = dictionary->dict_words[offset++]; | |
| 443 const size_t l = w.len & 0x1F; | |
| 444 const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l]; | |
| 445 const size_t id = w.idx; | |
| 446 end = !!(w.len & 0x80); | |
| 447 w.len = (uint8_t)l; | |
| 448 if (w.transform == 0 && | |
| 449 IsMatch(dictionary->words, w, &data[2], max_length - 2)) { | |
| 450 if (data[0] == 0xC2) { | |
| 451 AddMatch(id + 102 * n, l + 2, l, matches); | |
| 452 has_found_match = BROTLI_TRUE; | |
| 453 } else if (l + 2 < max_length && data[l + 2] == ' ') { | |
| 454 size_t t = data[0] == 'e' ? 18 : (data[0] == 's' ? 7 : 13); | |
| 455 AddMatch(id + t * n, l + 3, l, matches); | |
| 456 has_found_match = BROTLI_TRUE; | |
| 457 } | |
| 458 } | |
| 459 } | |
| 460 } | |
| 461 } | |
| 462 if (max_length >= 9) { | |
| 463 /* Transforms with prefixes " the " and ".com/" */ | |
| 464 if ((data[0] == ' ' && data[1] == 't' && data[2] == 'h' && | |
| 465 data[3] == 'e' && data[4] == ' ') || | |
| 466 (data[0] == '.' && data[1] == 'c' && data[2] == 'o' && | |
| 467 data[3] == 'm' && data[4] == '/')) { | |
| 468 size_t offset = dictionary->buckets[Hash(&data[5])]; | |
| 469 BROTLI_BOOL end = !offset; | |
| 470 while (!end) { | |
| 471 DictWord w = dictionary->dict_words[offset++]; | |
| 472 const size_t l = w.len & 0x1F; | |
| 473 const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l]; | |
| 474 const size_t id = w.idx; | |
| 475 end = !!(w.len & 0x80); | |
| 476 w.len = (uint8_t)l; | |
| 477 if (w.transform == 0 && | |
| 478 IsMatch(dictionary->words, w, &data[5], max_length - 5)) { | |
| 479 AddMatch(id + (data[0] == ' ' ? 41 : 72) * n, l + 5, l, matches); | |
| 480 has_found_match = BROTLI_TRUE; | |
| 481 if (l + 5 < max_length) { | |
| 482 const uint8_t* s = &data[l + 5]; | |
| 483 if (data[0] == ' ') { | |
| 484 if (l + 8 < max_length && | |
| 485 s[0] == ' ' && s[1] == 'o' && s[2] == 'f' && s[3] == ' ') { | |
| 486 AddMatch(id + 62 * n, l + 9, l, matches); | |
| 487 if (l + 12 < max_length && | |
| 488 s[4] == 't' && s[5] == 'h' && s[6] == 'e' && s[7] == ' ') { | |
| 489 AddMatch(id + 73 * n, l + 13, l, matches); | |
| 490 } | |
| 491 } | |
| 492 } | |
| 493 } | |
| 494 } | |
| 495 } | |
| 496 } | |
| 497 } | |
| 498 return has_found_match; | |
| 499 } | |
| 500 | |
| 501 /* Finds matches for one or more dictionaries, if multiple are present | |
| 502 in the contextual dictionary */ | |
| 503 BROTLI_BOOL BrotliFindAllStaticDictionaryMatches( | |
| 504 const BrotliEncoderDictionary* dictionary, const uint8_t* data, | |
| 505 size_t min_length, size_t max_length, uint32_t* matches) { | |
| 506 BROTLI_BOOL has_found_match = | |
| 507 BrotliFindAllStaticDictionaryMatchesFor( | |
| 508 dictionary, data, min_length, max_length, matches); | |
| 509 | |
| 510 if (!!dictionary->parent && dictionary->parent->num_dictionaries > 1) { | |
| 511 uint32_t matches2[BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN + 1]; | |
| 512 int l; | |
| 513 const BrotliEncoderDictionary* dictionary2 = dictionary->parent->dict[0]; | |
| 514 if (dictionary2 == dictionary) { | |
| 515 dictionary2 = dictionary->parent->dict[1]; | |
| 516 } | |
| 517 | |
| 518 for (l = 0; l < BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN + 1; l++) { | |
| 519 matches2[l] = kInvalidMatch; | |
| 520 } | |
| 521 | |
| 522 has_found_match |= BrotliFindAllStaticDictionaryMatchesFor( | |
| 523 dictionary2, data, min_length, max_length, matches2); | |
| 524 | |
| 525 for (l = 0; l < BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN + 1; l++) { | |
| 526 if (matches2[l] != kInvalidMatch) { | |
| 527 uint32_t dist = (uint32_t)(matches2[l] >> 5); | |
| 528 uint32_t len_code = matches2[l] & 31; | |
| 529 uint32_t skipdist = (uint32_t)((uint32_t)(1 << dictionary->words-> | |
| 530 size_bits_by_length[len_code]) & ~1u) * | |
| 531 (uint32_t)dictionary->num_transforms; | |
| 532 /* TODO(lode): check for dist overflow */ | |
| 533 dist += skipdist; | |
| 534 AddMatch(dist, (size_t)l, len_code, matches); | |
| 535 } | |
| 536 } | |
| 537 } | |
| 538 return has_found_match; | |
| 539 } | |
| 540 #if defined(__cplusplus) || defined(c_plusplus) | |
| 541 } /* extern "C" */ | |
| 542 #endif |
