Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/brotli/c/enc/encode.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /* Copyright 2013 Google Inc. All Rights Reserved. | |
| 2 | |
| 3 Distributed under MIT license. | |
| 4 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT | |
| 5 */ | |
| 6 | |
| 7 /* Implementation of Brotli compressor. */ | |
| 8 | |
| 9 #include <brotli/encode.h> | |
| 10 #include <brotli/shared_dictionary.h> | |
| 11 #include <brotli/types.h> | |
| 12 | |
| 13 #include <stdlib.h> /* free, malloc */ | |
| 14 #include <string.h> /* memcpy, memset */ | |
| 15 | |
| 16 #include "../common/constants.h" | |
| 17 #include "../common/context.h" | |
| 18 #include "../common/platform.h" | |
| 19 #include "../common/version.h" | |
| 20 #include "backward_references.h" | |
| 21 #include "backward_references_hq.h" | |
| 22 #include "bit_cost.h" | |
| 23 #include "brotli_bit_stream.h" | |
| 24 #include "compress_fragment.h" | |
| 25 #include "compress_fragment_two_pass.h" | |
| 26 #include "dictionary_hash.h" | |
| 27 #include "encoder_dict.h" | |
| 28 #include "entropy_encode.h" | |
| 29 #include "fast_log.h" | |
| 30 #include "hash.h" | |
| 31 #include "histogram.h" | |
| 32 #include "memory.h" | |
| 33 #include "metablock.h" | |
| 34 #include "prefix.h" | |
| 35 #include "state.h" | |
| 36 #include "quality.h" | |
| 37 #include "ringbuffer.h" | |
| 38 #include "utf8_util.h" | |
| 39 #include "write_bits.h" | |
| 40 | |
| 41 #if defined(__cplusplus) || defined(c_plusplus) | |
| 42 extern "C" { | |
| 43 #endif | |
| 44 | |
| 45 #define COPY_ARRAY(dst, src) memcpy(dst, src, sizeof(src)); | |
| 46 | |
| 47 static size_t InputBlockSize(BrotliEncoderState* s) { | |
| 48 return (size_t)1 << s->params.lgblock; | |
| 49 } | |
| 50 | |
| 51 static uint64_t UnprocessedInputSize(BrotliEncoderState* s) { | |
| 52 return s->input_pos_ - s->last_processed_pos_; | |
| 53 } | |
| 54 | |
| 55 static size_t RemainingInputBlockSize(BrotliEncoderState* s) { | |
| 56 const uint64_t delta = UnprocessedInputSize(s); | |
| 57 size_t block_size = InputBlockSize(s); | |
| 58 if (delta >= block_size) return 0; | |
| 59 return block_size - (size_t)delta; | |
| 60 } | |
| 61 | |
| 62 BROTLI_BOOL BrotliEncoderSetParameter( | |
| 63 BrotliEncoderState* state, BrotliEncoderParameter p, uint32_t value) { | |
| 64 /* Changing parameters on the fly is not implemented yet. */ | |
| 65 if (state->is_initialized_) return BROTLI_FALSE; | |
| 66 /* TODO(eustas): Validate/clamp parameters here. */ | |
| 67 switch (p) { | |
| 68 case BROTLI_PARAM_MODE: | |
| 69 state->params.mode = (BrotliEncoderMode)value; | |
| 70 return BROTLI_TRUE; | |
| 71 | |
| 72 case BROTLI_PARAM_QUALITY: | |
| 73 state->params.quality = (int)value; | |
| 74 return BROTLI_TRUE; | |
| 75 | |
| 76 case BROTLI_PARAM_LGWIN: | |
| 77 state->params.lgwin = (int)value; | |
| 78 return BROTLI_TRUE; | |
| 79 | |
| 80 case BROTLI_PARAM_LGBLOCK: | |
| 81 state->params.lgblock = (int)value; | |
| 82 return BROTLI_TRUE; | |
| 83 | |
| 84 case BROTLI_PARAM_DISABLE_LITERAL_CONTEXT_MODELING: | |
| 85 if ((value != 0) && (value != 1)) return BROTLI_FALSE; | |
| 86 state->params.disable_literal_context_modeling = TO_BROTLI_BOOL(!!value); | |
| 87 return BROTLI_TRUE; | |
| 88 | |
| 89 case BROTLI_PARAM_SIZE_HINT: | |
| 90 state->params.size_hint = value; | |
| 91 return BROTLI_TRUE; | |
| 92 | |
| 93 case BROTLI_PARAM_LARGE_WINDOW: | |
| 94 state->params.large_window = TO_BROTLI_BOOL(!!value); | |
| 95 return BROTLI_TRUE; | |
| 96 | |
| 97 case BROTLI_PARAM_NPOSTFIX: | |
| 98 state->params.dist.distance_postfix_bits = value; | |
| 99 return BROTLI_TRUE; | |
| 100 | |
| 101 case BROTLI_PARAM_NDIRECT: | |
| 102 state->params.dist.num_direct_distance_codes = value; | |
| 103 return BROTLI_TRUE; | |
| 104 | |
| 105 case BROTLI_PARAM_STREAM_OFFSET: | |
| 106 if (value > (1u << 30)) return BROTLI_FALSE; | |
| 107 state->params.stream_offset = value; | |
| 108 return BROTLI_TRUE; | |
| 109 | |
| 110 default: return BROTLI_FALSE; | |
| 111 } | |
| 112 } | |
| 113 | |
| 114 /* Wraps 64-bit input position to 32-bit ring-buffer position preserving | |
| 115 "not-a-first-lap" feature. */ | |
| 116 static uint32_t WrapPosition(uint64_t position) { | |
| 117 uint32_t result = (uint32_t)position; | |
| 118 uint64_t gb = position >> 30; | |
| 119 if (gb > 2) { | |
| 120 /* Wrap every 2GiB; The first 3GB are continuous. */ | |
| 121 result = (result & ((1u << 30) - 1)) | ((uint32_t)((gb - 1) & 1) + 1) << 30; | |
| 122 } | |
| 123 return result; | |
| 124 } | |
| 125 | |
| 126 static uint8_t* GetBrotliStorage(BrotliEncoderState* s, size_t size) { | |
| 127 MemoryManager* m = &s->memory_manager_; | |
| 128 if (s->storage_size_ < size) { | |
| 129 BROTLI_FREE(m, s->storage_); | |
| 130 s->storage_ = BROTLI_ALLOC(m, uint8_t, size); | |
| 131 if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(s->storage_)) return NULL; | |
| 132 s->storage_size_ = size; | |
| 133 } | |
| 134 return s->storage_; | |
| 135 } | |
| 136 | |
| 137 static size_t HashTableSize(size_t max_table_size, size_t input_size) { | |
| 138 size_t htsize = 256; | |
| 139 while (htsize < max_table_size && htsize < input_size) { | |
| 140 htsize <<= 1; | |
| 141 } | |
| 142 return htsize; | |
| 143 } | |
| 144 | |
| 145 static int* GetHashTable(BrotliEncoderState* s, int quality, | |
| 146 size_t input_size, size_t* table_size) { | |
| 147 /* Use smaller hash table when input.size() is smaller, since we | |
| 148 fill the table, incurring O(hash table size) overhead for | |
| 149 compression, and if the input is short, we won't need that | |
| 150 many hash table entries anyway. */ | |
| 151 MemoryManager* m = &s->memory_manager_; | |
| 152 const size_t max_table_size = MaxHashTableSize(quality); | |
| 153 size_t htsize = HashTableSize(max_table_size, input_size); | |
| 154 int* table; | |
| 155 BROTLI_DCHECK(max_table_size >= 256); | |
| 156 if (quality == FAST_ONE_PASS_COMPRESSION_QUALITY) { | |
| 157 /* Only odd shifts are supported by fast-one-pass. */ | |
| 158 if ((htsize & 0xAAAAA) == 0) { | |
| 159 htsize <<= 1; | |
| 160 } | |
| 161 } | |
| 162 | |
| 163 if (htsize <= sizeof(s->small_table_) / sizeof(s->small_table_[0])) { | |
| 164 table = s->small_table_; | |
| 165 } else { | |
| 166 if (htsize > s->large_table_size_) { | |
| 167 s->large_table_size_ = htsize; | |
| 168 BROTLI_FREE(m, s->large_table_); | |
| 169 s->large_table_ = BROTLI_ALLOC(m, int, htsize); | |
| 170 if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(s->large_table_)) return 0; | |
| 171 } | |
| 172 table = s->large_table_; | |
| 173 } | |
| 174 | |
| 175 *table_size = htsize; | |
| 176 memset(table, 0, htsize * sizeof(*table)); | |
| 177 return table; | |
| 178 } | |
| 179 | |
| 180 static void EncodeWindowBits(int lgwin, BROTLI_BOOL large_window, | |
| 181 uint16_t* last_bytes, uint8_t* last_bytes_bits) { | |
| 182 if (large_window) { | |
| 183 *last_bytes = (uint16_t)(((lgwin & 0x3F) << 8) | 0x11); | |
| 184 *last_bytes_bits = 14; | |
| 185 } else { | |
| 186 if (lgwin == 16) { | |
| 187 *last_bytes = 0; | |
| 188 *last_bytes_bits = 1; | |
| 189 } else if (lgwin == 17) { | |
| 190 *last_bytes = 1; | |
| 191 *last_bytes_bits = 7; | |
| 192 } else if (lgwin > 17) { | |
| 193 *last_bytes = (uint16_t)(((lgwin - 17) << 1) | 0x01); | |
| 194 *last_bytes_bits = 4; | |
| 195 } else { | |
| 196 *last_bytes = (uint16_t)(((lgwin - 8) << 4) | 0x01); | |
| 197 *last_bytes_bits = 7; | |
| 198 } | |
| 199 } | |
| 200 } | |
| 201 | |
| 202 /* TODO(eustas): move to compress_fragment.c? */ | |
| 203 /* Initializes the command and distance prefix codes for the first block. */ | |
| 204 static void InitCommandPrefixCodes(BrotliOnePassArena* s) { | |
| 205 static const uint8_t kDefaultCommandDepths[128] = { | |
| 206 0, 4, 4, 5, 6, 6, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, | |
| 207 0, 0, 0, 4, 4, 4, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, | |
| 208 7, 7, 10, 10, 10, 10, 10, 10, 0, 4, 4, 5, 5, 5, 6, 6, | |
| 209 7, 8, 8, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, | |
| 210 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
| 211 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4, | |
| 212 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 7, 7, 7, 8, 10, | |
| 213 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, | |
| 214 }; | |
| 215 static const uint16_t kDefaultCommandBits[128] = { | |
| 216 0, 0, 8, 9, 3, 35, 7, 71, | |
| 217 39, 103, 23, 47, 175, 111, 239, 31, | |
| 218 0, 0, 0, 4, 12, 2, 10, 6, | |
| 219 13, 29, 11, 43, 27, 59, 87, 55, | |
| 220 15, 79, 319, 831, 191, 703, 447, 959, | |
| 221 0, 14, 1, 25, 5, 21, 19, 51, | |
| 222 119, 159, 95, 223, 479, 991, 63, 575, | |
| 223 127, 639, 383, 895, 255, 767, 511, 1023, | |
| 224 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
| 225 27, 59, 7, 39, 23, 55, 30, 1, 17, 9, 25, 5, 0, 8, 4, 12, | |
| 226 2, 10, 6, 21, 13, 29, 3, 19, 11, 15, 47, 31, 95, 63, 127, 255, | |
| 227 767, 2815, 1791, 3839, 511, 2559, 1535, 3583, 1023, 3071, 2047, 4095, | |
| 228 }; | |
| 229 static const uint8_t kDefaultCommandCode[] = { | |
| 230 0xff, 0x77, 0xd5, 0xbf, 0xe7, 0xde, 0xea, 0x9e, 0x51, 0x5d, 0xde, 0xc6, | |
| 231 0x70, 0x57, 0xbc, 0x58, 0x58, 0x58, 0xd8, 0xd8, 0x58, 0xd5, 0xcb, 0x8c, | |
| 232 0xea, 0xe0, 0xc3, 0x87, 0x1f, 0x83, 0xc1, 0x60, 0x1c, 0x67, 0xb2, 0xaa, | |
| 233 0x06, 0x83, 0xc1, 0x60, 0x30, 0x18, 0xcc, 0xa1, 0xce, 0x88, 0x54, 0x94, | |
| 234 0x46, 0xe1, 0xb0, 0xd0, 0x4e, 0xb2, 0xf7, 0x04, 0x00, | |
| 235 }; | |
| 236 static const size_t kDefaultCommandCodeNumBits = 448; | |
| 237 COPY_ARRAY(s->cmd_depth, kDefaultCommandDepths); | |
| 238 COPY_ARRAY(s->cmd_bits, kDefaultCommandBits); | |
| 239 | |
| 240 /* Initialize the pre-compressed form of the command and distance prefix | |
| 241 codes. */ | |
| 242 COPY_ARRAY(s->cmd_code, kDefaultCommandCode); | |
| 243 s->cmd_code_numbits = kDefaultCommandCodeNumBits; | |
| 244 } | |
| 245 | |
| 246 /* Decide about the context map based on the ability of the prediction | |
| 247 ability of the previous byte UTF8-prefix on the next byte. The | |
| 248 prediction ability is calculated as Shannon entropy. Here we need | |
| 249 Shannon entropy instead of 'BitsEntropy' since the prefix will be | |
| 250 encoded with the remaining 6 bits of the following byte, and | |
| 251 BitsEntropy will assume that symbol to be stored alone using Huffman | |
| 252 coding. */ | |
| 253 static void ChooseContextMap(int quality, | |
| 254 uint32_t* bigram_histo, | |
| 255 size_t* num_literal_contexts, | |
| 256 const uint32_t** literal_context_map) { | |
| 257 static const uint32_t kStaticContextMapContinuation[64] = { | |
| 258 1, 1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
| 259 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
| 260 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
| 261 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
| 262 }; | |
| 263 static const uint32_t kStaticContextMapSimpleUTF8[64] = { | |
| 264 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
| 265 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
| 266 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
| 267 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
| 268 }; | |
| 269 | |
| 270 uint32_t monogram_histo[3] = { 0 }; | |
| 271 uint32_t two_prefix_histo[6] = { 0 }; | |
| 272 size_t total; | |
| 273 size_t i; | |
| 274 size_t sink; | |
| 275 double entropy[4]; | |
| 276 for (i = 0; i < 9; ++i) { | |
| 277 monogram_histo[i % 3] += bigram_histo[i]; | |
| 278 two_prefix_histo[i % 6] += bigram_histo[i]; | |
| 279 } | |
| 280 entropy[1] = ShannonEntropy(monogram_histo, 3, &sink); | |
| 281 entropy[2] = (ShannonEntropy(two_prefix_histo, 3, &sink) + | |
| 282 ShannonEntropy(two_prefix_histo + 3, 3, &sink)); | |
| 283 entropy[3] = 0; | |
| 284 for (i = 0; i < 3; ++i) { | |
| 285 entropy[3] += ShannonEntropy(bigram_histo + 3 * i, 3, &sink); | |
| 286 } | |
| 287 | |
| 288 total = monogram_histo[0] + monogram_histo[1] + monogram_histo[2]; | |
| 289 BROTLI_DCHECK(total != 0); | |
| 290 entropy[0] = 1.0 / (double)total; | |
| 291 entropy[1] *= entropy[0]; | |
| 292 entropy[2] *= entropy[0]; | |
| 293 entropy[3] *= entropy[0]; | |
| 294 | |
| 295 if (quality < MIN_QUALITY_FOR_HQ_CONTEXT_MODELING) { | |
| 296 /* 3 context models is a bit slower, don't use it at lower qualities. */ | |
| 297 entropy[3] = entropy[1] * 10; | |
| 298 } | |
| 299 /* If expected savings by symbol are less than 0.2 bits, skip the | |
| 300 context modeling -- in exchange for faster decoding speed. */ | |
| 301 if (entropy[1] - entropy[2] < 0.2 && | |
| 302 entropy[1] - entropy[3] < 0.2) { | |
| 303 *num_literal_contexts = 1; | |
| 304 } else if (entropy[2] - entropy[3] < 0.02) { | |
| 305 *num_literal_contexts = 2; | |
| 306 *literal_context_map = kStaticContextMapSimpleUTF8; | |
| 307 } else { | |
| 308 *num_literal_contexts = 3; | |
| 309 *literal_context_map = kStaticContextMapContinuation; | |
| 310 } | |
| 311 } | |
| 312 | |
| 313 /* Decide if we want to use a more complex static context map containing 13 | |
| 314 context values, based on the entropy reduction of histograms over the | |
| 315 first 5 bits of literals. */ | |
| 316 static BROTLI_BOOL ShouldUseComplexStaticContextMap(const uint8_t* input, | |
| 317 size_t start_pos, size_t length, size_t mask, int quality, size_t size_hint, | |
| 318 size_t* num_literal_contexts, const uint32_t** literal_context_map, | |
| 319 uint32_t* arena) { | |
| 320 static const uint32_t kStaticContextMapComplexUTF8[64] = { | |
| 321 11, 11, 12, 12, /* 0 special */ | |
| 322 0, 0, 0, 0, /* 4 lf */ | |
| 323 1, 1, 9, 9, /* 8 space */ | |
| 324 2, 2, 2, 2, /* !, first after space/lf and after something else. */ | |
| 325 1, 1, 1, 1, /* " */ | |
| 326 8, 3, 3, 3, /* % */ | |
| 327 1, 1, 1, 1, /* ({[ */ | |
| 328 2, 2, 2, 2, /* }]) */ | |
| 329 8, 4, 4, 4, /* :; */ | |
| 330 8, 7, 4, 4, /* . */ | |
| 331 8, 0, 0, 0, /* > */ | |
| 332 3, 3, 3, 3, /* [0..9] */ | |
| 333 5, 5, 10, 5, /* [A-Z] */ | |
| 334 5, 5, 10, 5, | |
| 335 6, 6, 6, 6, /* [a-z] */ | |
| 336 6, 6, 6, 6, | |
| 337 }; | |
| 338 BROTLI_UNUSED(quality); | |
| 339 /* Try the more complex static context map only for long data. */ | |
| 340 if (size_hint < (1 << 20)) { | |
| 341 return BROTLI_FALSE; | |
| 342 } else { | |
| 343 const size_t end_pos = start_pos + length; | |
| 344 /* To make entropy calculations faster, we collect histograms | |
| 345 over the 5 most significant bits of literals. One histogram | |
| 346 without context and 13 additional histograms for each context value. */ | |
| 347 uint32_t* BROTLI_RESTRICT const combined_histo = arena; | |
| 348 uint32_t* BROTLI_RESTRICT const context_histo = arena + 32; | |
| 349 uint32_t total = 0; | |
| 350 double entropy[3]; | |
| 351 size_t sink; | |
| 352 size_t i; | |
| 353 ContextLut utf8_lut = BROTLI_CONTEXT_LUT(CONTEXT_UTF8); | |
| 354 memset(arena, 0, sizeof(arena[0]) * 32 * (BROTLI_MAX_STATIC_CONTEXTS + 1)); | |
| 355 for (; start_pos + 64 <= end_pos; start_pos += 4096) { | |
| 356 const size_t stride_end_pos = start_pos + 64; | |
| 357 uint8_t prev2 = input[start_pos & mask]; | |
| 358 uint8_t prev1 = input[(start_pos + 1) & mask]; | |
| 359 size_t pos; | |
| 360 /* To make the analysis of the data faster we only examine 64 byte long | |
| 361 strides at every 4kB intervals. */ | |
| 362 for (pos = start_pos + 2; pos < stride_end_pos; ++pos) { | |
| 363 const uint8_t literal = input[pos & mask]; | |
| 364 const uint8_t context = (uint8_t)kStaticContextMapComplexUTF8[ | |
| 365 BROTLI_CONTEXT(prev1, prev2, utf8_lut)]; | |
| 366 ++total; | |
| 367 ++combined_histo[literal >> 3]; | |
| 368 ++context_histo[(context << 5) + (literal >> 3)]; | |
| 369 prev2 = prev1; | |
| 370 prev1 = literal; | |
| 371 } | |
| 372 } | |
| 373 entropy[1] = ShannonEntropy(combined_histo, 32, &sink); | |
| 374 entropy[2] = 0; | |
| 375 for (i = 0; i < BROTLI_MAX_STATIC_CONTEXTS; ++i) { | |
| 376 entropy[2] += ShannonEntropy(context_histo + (i << 5), 32, &sink); | |
| 377 } | |
| 378 entropy[0] = 1.0 / (double)total; | |
| 379 entropy[1] *= entropy[0]; | |
| 380 entropy[2] *= entropy[0]; | |
| 381 /* The triggering heuristics below were tuned by compressing the individual | |
| 382 files of the silesia corpus. If we skip this kind of context modeling | |
| 383 for not very well compressible input (i.e. entropy using context modeling | |
| 384 is 60% of maximal entropy) or if expected savings by symbol are less | |
| 385 than 0.2 bits, then in every case when it triggers, the final compression | |
| 386 ratio is improved. Note however that this heuristics might be too strict | |
| 387 for some cases and could be tuned further. */ | |
| 388 if (entropy[2] > 3.0 || entropy[1] - entropy[2] < 0.2) { | |
| 389 return BROTLI_FALSE; | |
| 390 } else { | |
| 391 *num_literal_contexts = BROTLI_MAX_STATIC_CONTEXTS; | |
| 392 *literal_context_map = kStaticContextMapComplexUTF8; | |
| 393 return BROTLI_TRUE; | |
| 394 } | |
| 395 } | |
| 396 } | |
| 397 | |
| 398 static void DecideOverLiteralContextModeling(const uint8_t* input, | |
| 399 size_t start_pos, size_t length, size_t mask, int quality, size_t size_hint, | |
| 400 size_t* num_literal_contexts, const uint32_t** literal_context_map, | |
| 401 uint32_t* arena) { | |
| 402 if (quality < MIN_QUALITY_FOR_CONTEXT_MODELING || length < 64) { | |
| 403 return; | |
| 404 } else if (ShouldUseComplexStaticContextMap( | |
| 405 input, start_pos, length, mask, quality, size_hint, | |
| 406 num_literal_contexts, literal_context_map, arena)) { | |
| 407 /* Context map was already set, nothing else to do. */ | |
| 408 } else { | |
| 409 /* Gather bi-gram data of the UTF8 byte prefixes. To make the analysis of | |
| 410 UTF8 data faster we only examine 64 byte long strides at every 4kB | |
| 411 intervals. */ | |
| 412 const size_t end_pos = start_pos + length; | |
| 413 uint32_t* BROTLI_RESTRICT const bigram_prefix_histo = arena; | |
| 414 memset(bigram_prefix_histo, 0, sizeof(arena[0]) * 9); | |
| 415 for (; start_pos + 64 <= end_pos; start_pos += 4096) { | |
| 416 static const int lut[4] = { 0, 0, 1, 2 }; | |
| 417 const size_t stride_end_pos = start_pos + 64; | |
| 418 int prev = lut[input[start_pos & mask] >> 6] * 3; | |
| 419 size_t pos; | |
| 420 for (pos = start_pos + 1; pos < stride_end_pos; ++pos) { | |
| 421 const uint8_t literal = input[pos & mask]; | |
| 422 ++bigram_prefix_histo[prev + lut[literal >> 6]]; | |
| 423 prev = lut[literal >> 6] * 3; | |
| 424 } | |
| 425 } | |
| 426 ChooseContextMap(quality, &bigram_prefix_histo[0], num_literal_contexts, | |
| 427 literal_context_map); | |
| 428 } | |
| 429 } | |
| 430 | |
| 431 static BROTLI_BOOL ShouldCompress( | |
| 432 const uint8_t* data, const size_t mask, const uint64_t last_flush_pos, | |
| 433 const size_t bytes, const size_t num_literals, const size_t num_commands) { | |
| 434 /* TODO(eustas): find more precise minimal block overhead. */ | |
| 435 if (bytes <= 2) return BROTLI_FALSE; | |
| 436 if (num_commands < (bytes >> 8) + 2) { | |
| 437 if ((double)num_literals > 0.99 * (double)bytes) { | |
| 438 uint32_t literal_histo[256] = { 0 }; | |
| 439 static const uint32_t kSampleRate = 13; | |
| 440 static const double kInvSampleRate = 1.0 / 13.0; | |
| 441 static const double kMinEntropy = 7.92; | |
| 442 const double bit_cost_threshold = | |
| 443 (double)bytes * kMinEntropy * kInvSampleRate; | |
| 444 size_t t = (bytes + kSampleRate - 1) / kSampleRate; | |
| 445 uint32_t pos = (uint32_t)last_flush_pos; | |
| 446 size_t i; | |
| 447 for (i = 0; i < t; i++) { | |
| 448 ++literal_histo[data[pos & mask]]; | |
| 449 pos += kSampleRate; | |
| 450 } | |
| 451 if (BitsEntropy(literal_histo, 256) > bit_cost_threshold) { | |
| 452 return BROTLI_FALSE; | |
| 453 } | |
| 454 } | |
| 455 } | |
| 456 return BROTLI_TRUE; | |
| 457 } | |
| 458 | |
| 459 /* Chooses the literal context mode for a metablock */ | |
| 460 static ContextType ChooseContextMode(const BrotliEncoderParams* params, | |
| 461 const uint8_t* data, const size_t pos, const size_t mask, | |
| 462 const size_t length) { | |
| 463 /* We only do the computation for the option of something else than | |
| 464 CONTEXT_UTF8 for the highest qualities */ | |
| 465 if (params->quality >= MIN_QUALITY_FOR_HQ_BLOCK_SPLITTING && | |
| 466 !BrotliIsMostlyUTF8(data, pos, mask, length, kMinUTF8Ratio)) { | |
| 467 return CONTEXT_SIGNED; | |
| 468 } | |
| 469 return CONTEXT_UTF8; | |
| 470 } | |
| 471 | |
| 472 static void WriteMetaBlockInternal(MemoryManager* m, | |
| 473 const uint8_t* data, | |
| 474 const size_t mask, | |
| 475 const uint64_t last_flush_pos, | |
| 476 const size_t bytes, | |
| 477 const BROTLI_BOOL is_last, | |
| 478 ContextType literal_context_mode, | |
| 479 const BrotliEncoderParams* params, | |
| 480 const uint8_t prev_byte, | |
| 481 const uint8_t prev_byte2, | |
| 482 const size_t num_literals, | |
| 483 const size_t num_commands, | |
| 484 Command* commands, | |
| 485 const int* saved_dist_cache, | |
| 486 int* dist_cache, | |
| 487 size_t* storage_ix, | |
| 488 uint8_t* storage) { | |
| 489 const uint32_t wrapped_last_flush_pos = WrapPosition(last_flush_pos); | |
| 490 uint16_t last_bytes; | |
| 491 uint8_t last_bytes_bits; | |
| 492 ContextLut literal_context_lut = BROTLI_CONTEXT_LUT(literal_context_mode); | |
| 493 BrotliEncoderParams block_params = *params; | |
| 494 | |
| 495 if (bytes == 0) { | |
| 496 /* Write the ISLAST and ISEMPTY bits. */ | |
| 497 BrotliWriteBits(2, 3, storage_ix, storage); | |
| 498 *storage_ix = (*storage_ix + 7u) & ~7u; | |
| 499 return; | |
| 500 } | |
| 501 | |
| 502 if (!ShouldCompress(data, mask, last_flush_pos, bytes, | |
| 503 num_literals, num_commands)) { | |
| 504 /* Restore the distance cache, as its last update by | |
| 505 CreateBackwardReferences is now unused. */ | |
| 506 memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0])); | |
| 507 BrotliStoreUncompressedMetaBlock(is_last, data, | |
| 508 wrapped_last_flush_pos, mask, bytes, | |
| 509 storage_ix, storage); | |
| 510 return; | |
| 511 } | |
| 512 | |
| 513 BROTLI_DCHECK(*storage_ix <= 14); | |
| 514 last_bytes = (uint16_t)((storage[1] << 8) | storage[0]); | |
| 515 last_bytes_bits = (uint8_t)(*storage_ix); | |
| 516 if (params->quality <= MAX_QUALITY_FOR_STATIC_ENTROPY_CODES) { | |
| 517 BrotliStoreMetaBlockFast(m, data, wrapped_last_flush_pos, | |
| 518 bytes, mask, is_last, params, | |
| 519 commands, num_commands, | |
| 520 storage_ix, storage); | |
| 521 if (BROTLI_IS_OOM(m)) return; | |
| 522 } else if (params->quality < MIN_QUALITY_FOR_BLOCK_SPLIT) { | |
| 523 BrotliStoreMetaBlockTrivial(m, data, wrapped_last_flush_pos, | |
| 524 bytes, mask, is_last, params, | |
| 525 commands, num_commands, | |
| 526 storage_ix, storage); | |
| 527 if (BROTLI_IS_OOM(m)) return; | |
| 528 } else { | |
| 529 MetaBlockSplit mb; | |
| 530 InitMetaBlockSplit(&mb); | |
| 531 if (params->quality < MIN_QUALITY_FOR_HQ_BLOCK_SPLITTING) { | |
| 532 size_t num_literal_contexts = 1; | |
| 533 const uint32_t* literal_context_map = NULL; | |
| 534 if (!params->disable_literal_context_modeling) { | |
| 535 /* TODO(eustas): pull to higher level and reuse. */ | |
| 536 uint32_t* arena = | |
| 537 BROTLI_ALLOC(m, uint32_t, 32 * (BROTLI_MAX_STATIC_CONTEXTS + 1)); | |
| 538 if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(arena)) return; | |
| 539 DecideOverLiteralContextModeling( | |
| 540 data, wrapped_last_flush_pos, bytes, mask, params->quality, | |
| 541 params->size_hint, &num_literal_contexts, | |
| 542 &literal_context_map, arena); | |
| 543 BROTLI_FREE(m, arena); | |
| 544 } | |
| 545 BrotliBuildMetaBlockGreedy(m, data, wrapped_last_flush_pos, mask, | |
| 546 prev_byte, prev_byte2, literal_context_lut, num_literal_contexts, | |
| 547 literal_context_map, commands, num_commands, &mb); | |
| 548 if (BROTLI_IS_OOM(m)) return; | |
| 549 } else { | |
| 550 BrotliBuildMetaBlock(m, data, wrapped_last_flush_pos, mask, &block_params, | |
| 551 prev_byte, prev_byte2, | |
| 552 commands, num_commands, | |
| 553 literal_context_mode, | |
| 554 &mb); | |
| 555 if (BROTLI_IS_OOM(m)) return; | |
| 556 } | |
| 557 if (params->quality >= MIN_QUALITY_FOR_OPTIMIZE_HISTOGRAMS) { | |
| 558 /* The number of distance symbols effectively used for distance | |
| 559 histograms. It might be less than distance alphabet size | |
| 560 for "Large Window Brotli" (32-bit). */ | |
| 561 BrotliOptimizeHistograms(block_params.dist.alphabet_size_limit, &mb); | |
| 562 } | |
| 563 BrotliStoreMetaBlock(m, data, wrapped_last_flush_pos, bytes, mask, | |
| 564 prev_byte, prev_byte2, | |
| 565 is_last, | |
| 566 &block_params, | |
| 567 literal_context_mode, | |
| 568 commands, num_commands, | |
| 569 &mb, | |
| 570 storage_ix, storage); | |
| 571 if (BROTLI_IS_OOM(m)) return; | |
| 572 DestroyMetaBlockSplit(m, &mb); | |
| 573 } | |
| 574 if (bytes + 4 < (*storage_ix >> 3)) { | |
| 575 /* Restore the distance cache and last byte. */ | |
| 576 memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0])); | |
| 577 storage[0] = (uint8_t)last_bytes; | |
| 578 storage[1] = (uint8_t)(last_bytes >> 8); | |
| 579 *storage_ix = last_bytes_bits; | |
| 580 BrotliStoreUncompressedMetaBlock(is_last, data, | |
| 581 wrapped_last_flush_pos, mask, | |
| 582 bytes, storage_ix, storage); | |
| 583 } | |
| 584 } | |
| 585 | |
| 586 static void ChooseDistanceParams(BrotliEncoderParams* params) { | |
| 587 uint32_t distance_postfix_bits = 0; | |
| 588 uint32_t num_direct_distance_codes = 0; | |
| 589 | |
| 590 if (params->quality >= MIN_QUALITY_FOR_NONZERO_DISTANCE_PARAMS) { | |
| 591 uint32_t ndirect_msb; | |
| 592 if (params->mode == BROTLI_MODE_FONT) { | |
| 593 distance_postfix_bits = 1; | |
| 594 num_direct_distance_codes = 12; | |
| 595 } else { | |
| 596 distance_postfix_bits = params->dist.distance_postfix_bits; | |
| 597 num_direct_distance_codes = params->dist.num_direct_distance_codes; | |
| 598 } | |
| 599 ndirect_msb = (num_direct_distance_codes >> distance_postfix_bits) & 0x0F; | |
| 600 if (distance_postfix_bits > BROTLI_MAX_NPOSTFIX || | |
| 601 num_direct_distance_codes > BROTLI_MAX_NDIRECT || | |
| 602 (ndirect_msb << distance_postfix_bits) != num_direct_distance_codes) { | |
| 603 distance_postfix_bits = 0; | |
| 604 num_direct_distance_codes = 0; | |
| 605 } | |
| 606 } | |
| 607 | |
| 608 BrotliInitDistanceParams(¶ms->dist, distance_postfix_bits, | |
| 609 num_direct_distance_codes, params->large_window); | |
| 610 } | |
| 611 | |
| 612 static BROTLI_BOOL EnsureInitialized(BrotliEncoderState* s) { | |
| 613 MemoryManager* m = &s->memory_manager_; | |
| 614 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; | |
| 615 if (s->is_initialized_) return BROTLI_TRUE; | |
| 616 | |
| 617 s->last_bytes_bits_ = 0; | |
| 618 s->last_bytes_ = 0; | |
| 619 s->flint_ = BROTLI_FLINT_DONE; | |
| 620 s->remaining_metadata_bytes_ = BROTLI_UINT32_MAX; | |
| 621 | |
| 622 SanitizeParams(&s->params); | |
| 623 s->params.lgblock = ComputeLgBlock(&s->params); | |
| 624 ChooseDistanceParams(&s->params); | |
| 625 | |
| 626 if (s->params.stream_offset != 0) { | |
| 627 s->flint_ = BROTLI_FLINT_NEEDS_2_BYTES; | |
| 628 /* Poison the distance cache. -16 +- 3 is still less than zero (invalid). */ | |
| 629 s->dist_cache_[0] = -16; | |
| 630 s->dist_cache_[1] = -16; | |
| 631 s->dist_cache_[2] = -16; | |
| 632 s->dist_cache_[3] = -16; | |
| 633 memcpy(s->saved_dist_cache_, s->dist_cache_, sizeof(s->saved_dist_cache_)); | |
| 634 } | |
| 635 | |
| 636 RingBufferSetup(&s->params, &s->ringbuffer_); | |
| 637 | |
| 638 /* Initialize last byte with stream header. */ | |
| 639 { | |
| 640 int lgwin = s->params.lgwin; | |
| 641 if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY || | |
| 642 s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) { | |
| 643 lgwin = BROTLI_MAX(int, lgwin, 18); | |
| 644 } | |
| 645 if (s->params.stream_offset == 0) { | |
| 646 EncodeWindowBits(lgwin, s->params.large_window, | |
| 647 &s->last_bytes_, &s->last_bytes_bits_); | |
| 648 } else { | |
| 649 /* Bigger values have the same effect, but could cause overflows. */ | |
| 650 s->params.stream_offset = BROTLI_MIN(size_t, | |
| 651 s->params.stream_offset, BROTLI_MAX_BACKWARD_LIMIT(lgwin)); | |
| 652 } | |
| 653 } | |
| 654 | |
| 655 if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY) { | |
| 656 s->one_pass_arena_ = BROTLI_ALLOC(m, BrotliOnePassArena, 1); | |
| 657 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; | |
| 658 InitCommandPrefixCodes(s->one_pass_arena_); | |
| 659 } else if (s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) { | |
| 660 s->two_pass_arena_ = BROTLI_ALLOC(m, BrotliTwoPassArena, 1); | |
| 661 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; | |
| 662 } | |
| 663 | |
| 664 s->is_initialized_ = BROTLI_TRUE; | |
| 665 return BROTLI_TRUE; | |
| 666 } | |
| 667 | |
| 668 static void BrotliEncoderInitParams(BrotliEncoderParams* params) { | |
| 669 params->mode = BROTLI_DEFAULT_MODE; | |
| 670 params->large_window = BROTLI_FALSE; | |
| 671 params->quality = BROTLI_DEFAULT_QUALITY; | |
| 672 params->lgwin = BROTLI_DEFAULT_WINDOW; | |
| 673 params->lgblock = 0; | |
| 674 params->stream_offset = 0; | |
| 675 params->size_hint = 0; | |
| 676 params->disable_literal_context_modeling = BROTLI_FALSE; | |
| 677 BrotliInitSharedEncoderDictionary(¶ms->dictionary); | |
| 678 params->dist.distance_postfix_bits = 0; | |
| 679 params->dist.num_direct_distance_codes = 0; | |
| 680 params->dist.alphabet_size_max = | |
| 681 BROTLI_DISTANCE_ALPHABET_SIZE(0, 0, BROTLI_MAX_DISTANCE_BITS); | |
| 682 params->dist.alphabet_size_limit = params->dist.alphabet_size_max; | |
| 683 params->dist.max_distance = BROTLI_MAX_DISTANCE; | |
| 684 } | |
| 685 | |
| 686 static void BrotliEncoderCleanupParams(MemoryManager* m, | |
| 687 BrotliEncoderParams* params) { | |
| 688 BrotliCleanupSharedEncoderDictionary(m, ¶ms->dictionary); | |
| 689 } | |
| 690 | |
| 691 #ifdef BROTLI_REPORTING | |
| 692 /* When BROTLI_REPORTING is defined extra reporting module have to be linked. */ | |
| 693 void BrotliEncoderOnStart(const BrotliEncoderState* s); | |
| 694 void BrotliEncoderOnFinish(const BrotliEncoderState* s); | |
| 695 #define BROTLI_ENCODER_ON_START(s) BrotliEncoderOnStart(s); | |
| 696 #define BROTLI_ENCODER_ON_FINISH(s) BrotliEncoderOnFinish(s); | |
| 697 #else | |
| 698 #if !defined(BROTLI_ENCODER_ON_START) | |
| 699 #define BROTLI_ENCODER_ON_START(s) (void)(s); | |
| 700 #endif | |
| 701 #if !defined(BROTLI_ENCODER_ON_FINISH) | |
| 702 #define BROTLI_ENCODER_ON_FINISH(s) (void)(s); | |
| 703 #endif | |
| 704 #endif | |
| 705 | |
| 706 static void BrotliEncoderInitState(BrotliEncoderState* s) { | |
| 707 BROTLI_ENCODER_ON_START(s); | |
| 708 BrotliEncoderInitParams(&s->params); | |
| 709 s->input_pos_ = 0; | |
| 710 s->num_commands_ = 0; | |
| 711 s->num_literals_ = 0; | |
| 712 s->last_insert_len_ = 0; | |
| 713 s->last_flush_pos_ = 0; | |
| 714 s->last_processed_pos_ = 0; | |
| 715 s->prev_byte_ = 0; | |
| 716 s->prev_byte2_ = 0; | |
| 717 s->storage_size_ = 0; | |
| 718 s->storage_ = 0; | |
| 719 HasherInit(&s->hasher_); | |
| 720 s->large_table_ = NULL; | |
| 721 s->large_table_size_ = 0; | |
| 722 s->one_pass_arena_ = NULL; | |
| 723 s->two_pass_arena_ = NULL; | |
| 724 s->command_buf_ = NULL; | |
| 725 s->literal_buf_ = NULL; | |
| 726 s->total_in_ = 0; | |
| 727 s->next_out_ = NULL; | |
| 728 s->available_out_ = 0; | |
| 729 s->total_out_ = 0; | |
| 730 s->stream_state_ = BROTLI_STREAM_PROCESSING; | |
| 731 s->is_last_block_emitted_ = BROTLI_FALSE; | |
| 732 s->is_initialized_ = BROTLI_FALSE; | |
| 733 | |
| 734 RingBufferInit(&s->ringbuffer_); | |
| 735 | |
| 736 s->commands_ = 0; | |
| 737 s->cmd_alloc_size_ = 0; | |
| 738 | |
| 739 /* Initialize distance cache. */ | |
| 740 s->dist_cache_[0] = 4; | |
| 741 s->dist_cache_[1] = 11; | |
| 742 s->dist_cache_[2] = 15; | |
| 743 s->dist_cache_[3] = 16; | |
| 744 /* Save the state of the distance cache in case we need to restore it for | |
| 745 emitting an uncompressed block. */ | |
| 746 memcpy(s->saved_dist_cache_, s->dist_cache_, sizeof(s->saved_dist_cache_)); | |
| 747 } | |
| 748 | |
| 749 BrotliEncoderState* BrotliEncoderCreateInstance( | |
| 750 brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque) { | |
| 751 BrotliEncoderState* state = (BrotliEncoderState*)BrotliBootstrapAlloc( | |
| 752 sizeof(BrotliEncoderState), alloc_func, free_func, opaque); | |
| 753 if (state == NULL) { | |
| 754 /* BROTLI_DUMP(); */ | |
| 755 return 0; | |
| 756 } | |
| 757 BrotliInitMemoryManager( | |
| 758 &state->memory_manager_, alloc_func, free_func, opaque); | |
| 759 BrotliEncoderInitState(state); | |
| 760 return state; | |
| 761 } | |
| 762 | |
| 763 static void BrotliEncoderCleanupState(BrotliEncoderState* s) { | |
| 764 MemoryManager* m = &s->memory_manager_; | |
| 765 | |
| 766 BROTLI_ENCODER_ON_FINISH(s); | |
| 767 | |
| 768 if (BROTLI_IS_OOM(m)) { | |
| 769 BrotliWipeOutMemoryManager(m); | |
| 770 return; | |
| 771 } | |
| 772 | |
| 773 BROTLI_FREE(m, s->storage_); | |
| 774 BROTLI_FREE(m, s->commands_); | |
| 775 RingBufferFree(m, &s->ringbuffer_); | |
| 776 DestroyHasher(m, &s->hasher_); | |
| 777 BROTLI_FREE(m, s->large_table_); | |
| 778 BROTLI_FREE(m, s->one_pass_arena_); | |
| 779 BROTLI_FREE(m, s->two_pass_arena_); | |
| 780 BROTLI_FREE(m, s->command_buf_); | |
| 781 BROTLI_FREE(m, s->literal_buf_); | |
| 782 BrotliEncoderCleanupParams(m, &s->params); | |
| 783 } | |
| 784 | |
| 785 /* Deinitializes and frees BrotliEncoderState instance. */ | |
| 786 void BrotliEncoderDestroyInstance(BrotliEncoderState* state) { | |
| 787 if (!state) { | |
| 788 return; | |
| 789 } else { | |
| 790 BrotliEncoderCleanupState(state); | |
| 791 BrotliBootstrapFree(state, &state->memory_manager_); | |
| 792 } | |
| 793 } | |
| 794 | |
| 795 /* | |
| 796 Copies the given input data to the internal ring buffer of the compressor. | |
| 797 No processing of the data occurs at this time and this function can be | |
| 798 called multiple times before calling WriteBrotliData() to process the | |
| 799 accumulated input. At most input_block_size() bytes of input data can be | |
| 800 copied to the ring buffer, otherwise the next WriteBrotliData() will fail. | |
| 801 */ | |
| 802 static void CopyInputToRingBuffer(BrotliEncoderState* s, | |
| 803 const size_t input_size, | |
| 804 const uint8_t* input_buffer) { | |
| 805 RingBuffer* ringbuffer_ = &s->ringbuffer_; | |
| 806 MemoryManager* m = &s->memory_manager_; | |
| 807 RingBufferWrite(m, input_buffer, input_size, ringbuffer_); | |
| 808 if (BROTLI_IS_OOM(m)) return; | |
| 809 s->input_pos_ += input_size; | |
| 810 | |
| 811 /* TL;DR: If needed, initialize 7 more bytes in the ring buffer to make the | |
| 812 hashing not depend on uninitialized data. This makes compression | |
| 813 deterministic and it prevents uninitialized memory warnings in Valgrind. | |
| 814 Even without erasing, the output would be valid (but nondeterministic). | |
| 815 | |
| 816 Background information: The compressor stores short (at most 8 bytes) | |
| 817 substrings of the input already read in a hash table, and detects | |
| 818 repetitions by looking up such substrings in the hash table. If it | |
| 819 can find a substring, it checks whether the substring is really there | |
| 820 in the ring buffer (or it's just a hash collision). Should the hash | |
| 821 table become corrupt, this check makes sure that the output is | |
| 822 still valid, albeit the compression ratio would be bad. | |
| 823 | |
| 824 The compressor populates the hash table from the ring buffer as it's | |
| 825 reading new bytes from the input. However, at the last few indexes of | |
| 826 the ring buffer, there are not enough bytes to build full-length | |
| 827 substrings from. Since the hash table always contains full-length | |
| 828 substrings, we overwrite with zeros here to make sure that those | |
| 829 substrings will contain zeros at the end instead of uninitialized | |
| 830 data. | |
| 831 | |
| 832 Please note that erasing is not necessary (because the | |
| 833 memory region is already initialized since he ring buffer | |
| 834 has a `tail' that holds a copy of the beginning,) so we | |
| 835 skip erasing if we have already gone around at least once in | |
| 836 the ring buffer. | |
| 837 | |
| 838 Only clear during the first round of ring-buffer writes. On | |
| 839 subsequent rounds data in the ring-buffer would be affected. */ | |
| 840 if (ringbuffer_->pos_ <= ringbuffer_->mask_) { | |
| 841 /* This is the first time when the ring buffer is being written. | |
| 842 We clear 7 bytes just after the bytes that have been copied from | |
| 843 the input buffer. | |
| 844 | |
| 845 The ring-buffer has a "tail" that holds a copy of the beginning, | |
| 846 but only once the ring buffer has been fully written once, i.e., | |
| 847 pos <= mask. For the first time, we need to write values | |
| 848 in this tail (where index may be larger than mask), so that | |
| 849 we have exactly defined behavior and don't read uninitialized | |
| 850 memory. Due to performance reasons, hashing reads data using a | |
| 851 LOAD64, which can go 7 bytes beyond the bytes written in the | |
| 852 ring-buffer. */ | |
| 853 memset(ringbuffer_->buffer_ + ringbuffer_->pos_, 0, 7); | |
| 854 } | |
| 855 } | |
| 856 | |
| 857 /* Marks all input as processed. | |
| 858 Returns true if position wrapping occurs. */ | |
| 859 static BROTLI_BOOL UpdateLastProcessedPos(BrotliEncoderState* s) { | |
| 860 uint32_t wrapped_last_processed_pos = WrapPosition(s->last_processed_pos_); | |
| 861 uint32_t wrapped_input_pos = WrapPosition(s->input_pos_); | |
| 862 s->last_processed_pos_ = s->input_pos_; | |
| 863 return TO_BROTLI_BOOL(wrapped_input_pos < wrapped_last_processed_pos); | |
| 864 } | |
| 865 | |
| 866 static void ExtendLastCommand(BrotliEncoderState* s, uint32_t* bytes, | |
| 867 uint32_t* wrapped_last_processed_pos) { | |
| 868 Command* last_command = &s->commands_[s->num_commands_ - 1]; | |
| 869 const uint8_t* data = s->ringbuffer_.buffer_; | |
| 870 const uint32_t mask = s->ringbuffer_.mask_; | |
| 871 uint64_t max_backward_distance = | |
| 872 (((uint64_t)1) << s->params.lgwin) - BROTLI_WINDOW_GAP; | |
| 873 uint64_t last_copy_len = last_command->copy_len_ & 0x1FFFFFF; | |
| 874 uint64_t last_processed_pos = s->last_processed_pos_ - last_copy_len; | |
| 875 uint64_t max_distance = last_processed_pos < max_backward_distance ? | |
| 876 last_processed_pos : max_backward_distance; | |
| 877 uint64_t cmd_dist = (uint64_t)s->dist_cache_[0]; | |
| 878 uint32_t distance_code = CommandRestoreDistanceCode(last_command, | |
| 879 &s->params.dist); | |
| 880 const CompoundDictionary* dict = &s->params.dictionary.compound; | |
| 881 size_t compound_dictionary_size = dict->total_size; | |
| 882 if (distance_code < BROTLI_NUM_DISTANCE_SHORT_CODES || | |
| 883 distance_code - (BROTLI_NUM_DISTANCE_SHORT_CODES - 1) == cmd_dist) { | |
| 884 if (cmd_dist <= max_distance) { | |
| 885 while (*bytes != 0 && data[*wrapped_last_processed_pos & mask] == | |
| 886 data[(*wrapped_last_processed_pos - cmd_dist) & mask]) { | |
| 887 last_command->copy_len_++; | |
| 888 (*bytes)--; | |
| 889 (*wrapped_last_processed_pos)++; | |
| 890 } | |
| 891 } else { | |
| 892 if ((cmd_dist - max_distance - 1) < compound_dictionary_size && | |
| 893 last_copy_len < cmd_dist - max_distance) { | |
| 894 size_t address = | |
| 895 compound_dictionary_size - (size_t)(cmd_dist - max_distance) + | |
| 896 (size_t)last_copy_len; | |
| 897 size_t br_index = 0; | |
| 898 size_t br_offset; | |
| 899 const uint8_t* chunk; | |
| 900 size_t chunk_length; | |
| 901 while (address >= dict->chunk_offsets[br_index + 1]) br_index++; | |
| 902 br_offset = address - dict->chunk_offsets[br_index]; | |
| 903 chunk = dict->chunk_source[br_index]; | |
| 904 chunk_length = | |
| 905 dict->chunk_offsets[br_index + 1] - dict->chunk_offsets[br_index]; | |
| 906 while (*bytes != 0 && data[*wrapped_last_processed_pos & mask] == | |
| 907 chunk[br_offset]) { | |
| 908 last_command->copy_len_++; | |
| 909 (*bytes)--; | |
| 910 (*wrapped_last_processed_pos)++; | |
| 911 if (++br_offset == chunk_length) { | |
| 912 br_index++; | |
| 913 br_offset = 0; | |
| 914 if (br_index != dict->num_chunks) { | |
| 915 chunk = dict->chunk_source[br_index]; | |
| 916 chunk_length = dict->chunk_offsets[br_index + 1] - | |
| 917 dict->chunk_offsets[br_index]; | |
| 918 } else { | |
| 919 break; | |
| 920 } | |
| 921 } | |
| 922 } | |
| 923 } | |
| 924 } | |
| 925 /* The copy length is at most the metablock size, and thus expressible. */ | |
| 926 GetLengthCode(last_command->insert_len_, | |
| 927 (size_t)((int)(last_command->copy_len_ & 0x1FFFFFF) + | |
| 928 (int)(last_command->copy_len_ >> 25)), | |
| 929 TO_BROTLI_BOOL((last_command->dist_prefix_ & 0x3FF) == 0), | |
| 930 &last_command->cmd_prefix_); | |
| 931 } | |
| 932 } | |
| 933 | |
| 934 /* | |
| 935 Processes the accumulated input data and sets |*out_size| to the length of | |
| 936 the new output meta-block, or to zero if no new output meta-block has been | |
| 937 created (in this case the processed input data is buffered internally). | |
| 938 If |*out_size| is positive, |*output| points to the start of the output | |
| 939 data. If |is_last| or |force_flush| is BROTLI_TRUE, an output meta-block is | |
| 940 always created. However, until |is_last| is BROTLI_TRUE encoder may retain up | |
| 941 to 7 bits of the last byte of output. Byte-alignment could be enforced by | |
| 942 emitting an empty meta-data block. | |
| 943 Returns BROTLI_FALSE if the size of the input data is larger than | |
| 944 input_block_size(). | |
| 945 */ | |
| 946 static BROTLI_BOOL EncodeData( | |
| 947 BrotliEncoderState* s, const BROTLI_BOOL is_last, | |
| 948 const BROTLI_BOOL force_flush, size_t* out_size, uint8_t** output) { | |
| 949 const uint64_t delta = UnprocessedInputSize(s); | |
| 950 uint32_t bytes = (uint32_t)delta; | |
| 951 uint32_t wrapped_last_processed_pos = WrapPosition(s->last_processed_pos_); | |
| 952 uint8_t* data; | |
| 953 uint32_t mask; | |
| 954 MemoryManager* m = &s->memory_manager_; | |
| 955 ContextType literal_context_mode; | |
| 956 ContextLut literal_context_lut; | |
| 957 BROTLI_BOOL fast_compress = | |
| 958 s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY || | |
| 959 s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY; | |
| 960 | |
| 961 data = s->ringbuffer_.buffer_; | |
| 962 mask = s->ringbuffer_.mask_; | |
| 963 | |
| 964 if (delta == 0) { /* No new input; still might want to flush or finish. */ | |
| 965 if (!data) { /* No input has been processed so far. */ | |
| 966 if (is_last) { /* Emit complete finalized stream. */ | |
| 967 BROTLI_DCHECK(s->last_bytes_bits_ <= 14); | |
| 968 s->last_bytes_ |= (uint16_t)(3u << s->last_bytes_bits_); | |
| 969 s->last_bytes_bits_ = (uint8_t)(s->last_bytes_bits_ + 2u); | |
| 970 s->tiny_buf_.u8[0] = (uint8_t)s->last_bytes_; | |
| 971 s->tiny_buf_.u8[1] = (uint8_t)(s->last_bytes_ >> 8); | |
| 972 *output = s->tiny_buf_.u8; | |
| 973 *out_size = (s->last_bytes_bits_ + 7u) >> 3u; | |
| 974 return BROTLI_TRUE; | |
| 975 } else { /* No data, not last -> no-op. */ | |
| 976 *out_size = 0; | |
| 977 return BROTLI_TRUE; | |
| 978 } | |
| 979 } else { | |
| 980 /* Fast compress performs flush every block -> flush is no-op. */ | |
| 981 if (!is_last && (!force_flush || fast_compress)) { /* Another no-op. */ | |
| 982 *out_size = 0; | |
| 983 return BROTLI_TRUE; | |
| 984 } | |
| 985 } | |
| 986 } | |
| 987 BROTLI_DCHECK(data); | |
| 988 | |
| 989 if (s->params.quality > s->params.dictionary.max_quality) return BROTLI_FALSE; | |
| 990 /* Adding more blocks after "last" block is forbidden. */ | |
| 991 if (s->is_last_block_emitted_) return BROTLI_FALSE; | |
| 992 if (is_last) s->is_last_block_emitted_ = BROTLI_TRUE; | |
| 993 | |
| 994 if (delta > InputBlockSize(s)) { | |
| 995 return BROTLI_FALSE; | |
| 996 } | |
| 997 if (s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY && | |
| 998 !s->command_buf_) { | |
| 999 s->command_buf_ = | |
| 1000 BROTLI_ALLOC(m, uint32_t, kCompressFragmentTwoPassBlockSize); | |
| 1001 s->literal_buf_ = | |
| 1002 BROTLI_ALLOC(m, uint8_t, kCompressFragmentTwoPassBlockSize); | |
| 1003 if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(s->command_buf_) || | |
| 1004 BROTLI_IS_NULL(s->literal_buf_)) { | |
| 1005 return BROTLI_FALSE; | |
| 1006 } | |
| 1007 } | |
| 1008 | |
| 1009 if (fast_compress) { | |
| 1010 uint8_t* storage; | |
| 1011 size_t storage_ix = s->last_bytes_bits_; | |
| 1012 size_t table_size; | |
| 1013 int* table; | |
| 1014 | |
| 1015 storage = GetBrotliStorage(s, 2 * bytes + 503); | |
| 1016 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; | |
| 1017 storage[0] = (uint8_t)s->last_bytes_; | |
| 1018 storage[1] = (uint8_t)(s->last_bytes_ >> 8); | |
| 1019 table = GetHashTable(s, s->params.quality, bytes, &table_size); | |
| 1020 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; | |
| 1021 if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY) { | |
| 1022 BrotliCompressFragmentFast( | |
| 1023 s->one_pass_arena_, &data[wrapped_last_processed_pos & mask], | |
| 1024 bytes, is_last, | |
| 1025 table, table_size, | |
| 1026 &storage_ix, storage); | |
| 1027 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; | |
| 1028 } else { | |
| 1029 BrotliCompressFragmentTwoPass( | |
| 1030 s->two_pass_arena_, &data[wrapped_last_processed_pos & mask], | |
| 1031 bytes, is_last, | |
| 1032 s->command_buf_, s->literal_buf_, | |
| 1033 table, table_size, | |
| 1034 &storage_ix, storage); | |
| 1035 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; | |
| 1036 } | |
| 1037 s->last_bytes_ = (uint16_t)(storage[storage_ix >> 3]); | |
| 1038 s->last_bytes_bits_ = storage_ix & 7u; | |
| 1039 UpdateLastProcessedPos(s); | |
| 1040 *output = &storage[0]; | |
| 1041 *out_size = storage_ix >> 3; | |
| 1042 return BROTLI_TRUE; | |
| 1043 } | |
| 1044 | |
| 1045 { | |
| 1046 /* Theoretical max number of commands is 1 per 2 bytes. */ | |
| 1047 size_t newsize = s->num_commands_ + bytes / 2 + 1; | |
| 1048 if (newsize > s->cmd_alloc_size_) { | |
| 1049 Command* new_commands; | |
| 1050 /* Reserve a bit more memory to allow merging with a next block | |
| 1051 without reallocation: that would impact speed. */ | |
| 1052 newsize += (bytes / 4) + 16; | |
| 1053 s->cmd_alloc_size_ = newsize; | |
| 1054 new_commands = BROTLI_ALLOC(m, Command, newsize); | |
| 1055 if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(new_commands)) return BROTLI_FALSE; | |
| 1056 if (s->commands_) { | |
| 1057 memcpy(new_commands, s->commands_, sizeof(Command) * s->num_commands_); | |
| 1058 BROTLI_FREE(m, s->commands_); | |
| 1059 } | |
| 1060 s->commands_ = new_commands; | |
| 1061 } | |
| 1062 } | |
| 1063 | |
| 1064 InitOrStitchToPreviousBlock(m, &s->hasher_, data, mask, &s->params, | |
| 1065 wrapped_last_processed_pos, bytes, is_last); | |
| 1066 | |
| 1067 literal_context_mode = ChooseContextMode( | |
| 1068 &s->params, data, WrapPosition(s->last_flush_pos_), | |
| 1069 mask, (size_t)(s->input_pos_ - s->last_flush_pos_)); | |
| 1070 literal_context_lut = BROTLI_CONTEXT_LUT(literal_context_mode); | |
| 1071 | |
| 1072 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; | |
| 1073 | |
| 1074 if (s->num_commands_ && s->last_insert_len_ == 0) { | |
| 1075 ExtendLastCommand(s, &bytes, &wrapped_last_processed_pos); | |
| 1076 } | |
| 1077 | |
| 1078 if (s->params.quality == ZOPFLIFICATION_QUALITY) { | |
| 1079 BROTLI_DCHECK(s->params.hasher.type == 10); | |
| 1080 BrotliCreateZopfliBackwardReferences(m, bytes, wrapped_last_processed_pos, | |
| 1081 data, mask, literal_context_lut, &s->params, | |
| 1082 &s->hasher_, s->dist_cache_, | |
| 1083 &s->last_insert_len_, &s->commands_[s->num_commands_], | |
| 1084 &s->num_commands_, &s->num_literals_); | |
| 1085 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; | |
| 1086 } else if (s->params.quality == HQ_ZOPFLIFICATION_QUALITY) { | |
| 1087 BROTLI_DCHECK(s->params.hasher.type == 10); | |
| 1088 BrotliCreateHqZopfliBackwardReferences(m, bytes, wrapped_last_processed_pos, | |
| 1089 data, mask, literal_context_lut, &s->params, | |
| 1090 &s->hasher_, s->dist_cache_, | |
| 1091 &s->last_insert_len_, &s->commands_[s->num_commands_], | |
| 1092 &s->num_commands_, &s->num_literals_); | |
| 1093 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; | |
| 1094 } else { | |
| 1095 BrotliCreateBackwardReferences(bytes, wrapped_last_processed_pos, | |
| 1096 data, mask, literal_context_lut, &s->params, | |
| 1097 &s->hasher_, s->dist_cache_, | |
| 1098 &s->last_insert_len_, &s->commands_[s->num_commands_], | |
| 1099 &s->num_commands_, &s->num_literals_); | |
| 1100 } | |
| 1101 | |
| 1102 { | |
| 1103 const size_t max_length = MaxMetablockSize(&s->params); | |
| 1104 const size_t max_literals = max_length / 8; | |
| 1105 const size_t max_commands = max_length / 8; | |
| 1106 const size_t processed_bytes = (size_t)(s->input_pos_ - s->last_flush_pos_); | |
| 1107 /* If maximal possible additional block doesn't fit metablock, flush now. */ | |
| 1108 /* TODO(eustas): Postpone decision until next block arrives? */ | |
| 1109 const BROTLI_BOOL next_input_fits_metablock = TO_BROTLI_BOOL( | |
| 1110 processed_bytes + InputBlockSize(s) <= max_length); | |
| 1111 /* If block splitting is not used, then flush as soon as there is some | |
| 1112 amount of commands / literals produced. */ | |
| 1113 const BROTLI_BOOL should_flush = TO_BROTLI_BOOL( | |
| 1114 s->params.quality < MIN_QUALITY_FOR_BLOCK_SPLIT && | |
| 1115 s->num_literals_ + s->num_commands_ >= MAX_NUM_DELAYED_SYMBOLS); | |
| 1116 if (!is_last && !force_flush && !should_flush && | |
| 1117 next_input_fits_metablock && | |
| 1118 s->num_literals_ < max_literals && | |
| 1119 s->num_commands_ < max_commands) { | |
| 1120 /* Merge with next input block. Everything will happen later. */ | |
| 1121 if (UpdateLastProcessedPos(s)) { | |
| 1122 HasherReset(&s->hasher_); | |
| 1123 } | |
| 1124 *out_size = 0; | |
| 1125 return BROTLI_TRUE; | |
| 1126 } | |
| 1127 } | |
| 1128 | |
| 1129 /* Create the last insert-only command. */ | |
| 1130 if (s->last_insert_len_ > 0) { | |
| 1131 InitInsertCommand(&s->commands_[s->num_commands_++], s->last_insert_len_); | |
| 1132 s->num_literals_ += s->last_insert_len_; | |
| 1133 s->last_insert_len_ = 0; | |
| 1134 } | |
| 1135 | |
| 1136 if (!is_last && s->input_pos_ == s->last_flush_pos_) { | |
| 1137 /* We have no new input data and we don't have to finish the stream, so | |
| 1138 nothing to do. */ | |
| 1139 *out_size = 0; | |
| 1140 return BROTLI_TRUE; | |
| 1141 } | |
| 1142 BROTLI_DCHECK(s->input_pos_ >= s->last_flush_pos_); | |
| 1143 BROTLI_DCHECK(s->input_pos_ > s->last_flush_pos_ || is_last); | |
| 1144 BROTLI_DCHECK(s->input_pos_ - s->last_flush_pos_ <= 1u << 24); | |
| 1145 { | |
| 1146 const uint32_t metablock_size = | |
| 1147 (uint32_t)(s->input_pos_ - s->last_flush_pos_); | |
| 1148 uint8_t* storage = GetBrotliStorage(s, 2 * metablock_size + 503); | |
| 1149 size_t storage_ix = s->last_bytes_bits_; | |
| 1150 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; | |
| 1151 storage[0] = (uint8_t)s->last_bytes_; | |
| 1152 storage[1] = (uint8_t)(s->last_bytes_ >> 8); | |
| 1153 WriteMetaBlockInternal( | |
| 1154 m, data, mask, s->last_flush_pos_, metablock_size, is_last, | |
| 1155 literal_context_mode, &s->params, s->prev_byte_, s->prev_byte2_, | |
| 1156 s->num_literals_, s->num_commands_, s->commands_, s->saved_dist_cache_, | |
| 1157 s->dist_cache_, &storage_ix, storage); | |
| 1158 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; | |
| 1159 s->last_bytes_ = (uint16_t)(storage[storage_ix >> 3]); | |
| 1160 s->last_bytes_bits_ = storage_ix & 7u; | |
| 1161 s->last_flush_pos_ = s->input_pos_; | |
| 1162 if (UpdateLastProcessedPos(s)) { | |
| 1163 HasherReset(&s->hasher_); | |
| 1164 } | |
| 1165 if (s->last_flush_pos_ > 0) { | |
| 1166 s->prev_byte_ = data[((uint32_t)s->last_flush_pos_ - 1) & mask]; | |
| 1167 } | |
| 1168 if (s->last_flush_pos_ > 1) { | |
| 1169 s->prev_byte2_ = data[(uint32_t)(s->last_flush_pos_ - 2) & mask]; | |
| 1170 } | |
| 1171 s->num_commands_ = 0; | |
| 1172 s->num_literals_ = 0; | |
| 1173 /* Save the state of the distance cache in case we need to restore it for | |
| 1174 emitting an uncompressed block. */ | |
| 1175 memcpy(s->saved_dist_cache_, s->dist_cache_, sizeof(s->saved_dist_cache_)); | |
| 1176 *output = &storage[0]; | |
| 1177 *out_size = storage_ix >> 3; | |
| 1178 return BROTLI_TRUE; | |
| 1179 } | |
| 1180 } | |
| 1181 | |
| 1182 /* Dumps remaining output bits and metadata header to |header|. | |
| 1183 Returns number of produced bytes. | |
| 1184 REQUIRED: |header| should be 8-byte aligned and at least 16 bytes long. | |
| 1185 REQUIRED: |block_size| <= (1 << 24). */ | |
| 1186 static size_t WriteMetadataHeader( | |
| 1187 BrotliEncoderState* s, const size_t block_size, uint8_t* header) { | |
| 1188 size_t storage_ix; | |
| 1189 storage_ix = s->last_bytes_bits_; | |
| 1190 header[0] = (uint8_t)s->last_bytes_; | |
| 1191 header[1] = (uint8_t)(s->last_bytes_ >> 8); | |
| 1192 s->last_bytes_ = 0; | |
| 1193 s->last_bytes_bits_ = 0; | |
| 1194 | |
| 1195 BrotliWriteBits(1, 0, &storage_ix, header); | |
| 1196 BrotliWriteBits(2, 3, &storage_ix, header); | |
| 1197 BrotliWriteBits(1, 0, &storage_ix, header); | |
| 1198 if (block_size == 0) { | |
| 1199 BrotliWriteBits(2, 0, &storage_ix, header); | |
| 1200 } else { | |
| 1201 uint32_t nbits = (block_size == 1) ? 1 : | |
| 1202 (Log2FloorNonZero((uint32_t)block_size - 1) + 1); | |
| 1203 uint32_t nbytes = (nbits + 7) / 8; | |
| 1204 BrotliWriteBits(2, nbytes, &storage_ix, header); | |
| 1205 BrotliWriteBits(8 * nbytes, block_size - 1, &storage_ix, header); | |
| 1206 } | |
| 1207 return (storage_ix + 7u) >> 3; | |
| 1208 } | |
| 1209 | |
| 1210 size_t BrotliEncoderMaxCompressedSize(size_t input_size) { | |
| 1211 /* [window bits / empty metadata] + N * [uncompressed] + [last empty] */ | |
| 1212 size_t num_large_blocks = input_size >> 14; | |
| 1213 size_t overhead = 2 + (4 * num_large_blocks) + 3 + 1; | |
| 1214 size_t result = input_size + overhead; | |
| 1215 if (input_size == 0) return 2; | |
| 1216 return (result < input_size) ? 0 : result; | |
| 1217 } | |
| 1218 | |
| 1219 /* Wraps data to uncompressed brotli stream with minimal window size. | |
| 1220 |output| should point at region with at least BrotliEncoderMaxCompressedSize | |
| 1221 addressable bytes. | |
| 1222 Returns the length of stream. */ | |
| 1223 static size_t MakeUncompressedStream( | |
| 1224 const uint8_t* input, size_t input_size, uint8_t* output) { | |
| 1225 size_t size = input_size; | |
| 1226 size_t result = 0; | |
| 1227 size_t offset = 0; | |
| 1228 if (input_size == 0) { | |
| 1229 output[0] = 6; | |
| 1230 return 1; | |
| 1231 } | |
| 1232 output[result++] = 0x21; /* window bits = 10, is_last = false */ | |
| 1233 output[result++] = 0x03; /* empty metadata, padding */ | |
| 1234 while (size > 0) { | |
| 1235 uint32_t nibbles = 0; | |
| 1236 uint32_t chunk_size; | |
| 1237 uint32_t bits; | |
| 1238 chunk_size = (size > (1u << 24)) ? (1u << 24) : (uint32_t)size; | |
| 1239 if (chunk_size > (1u << 16)) nibbles = (chunk_size > (1u << 20)) ? 2 : 1; | |
| 1240 bits = | |
| 1241 (nibbles << 1) | ((chunk_size - 1) << 3) | (1u << (19 + 4 * nibbles)); | |
| 1242 output[result++] = (uint8_t)bits; | |
| 1243 output[result++] = (uint8_t)(bits >> 8); | |
| 1244 output[result++] = (uint8_t)(bits >> 16); | |
| 1245 if (nibbles == 2) output[result++] = (uint8_t)(bits >> 24); | |
| 1246 memcpy(&output[result], &input[offset], chunk_size); | |
| 1247 result += chunk_size; | |
| 1248 offset += chunk_size; | |
| 1249 size -= chunk_size; | |
| 1250 } | |
| 1251 output[result++] = 3; | |
| 1252 return result; | |
| 1253 } | |
| 1254 | |
| 1255 BROTLI_BOOL BrotliEncoderCompress( | |
| 1256 int quality, int lgwin, BrotliEncoderMode mode, size_t input_size, | |
| 1257 const uint8_t input_buffer[BROTLI_ARRAY_PARAM(input_size)], | |
| 1258 size_t* encoded_size, | |
| 1259 uint8_t encoded_buffer[BROTLI_ARRAY_PARAM(*encoded_size)]) { | |
| 1260 BrotliEncoderState* s; | |
| 1261 size_t out_size = *encoded_size; | |
| 1262 const uint8_t* input_start = input_buffer; | |
| 1263 uint8_t* output_start = encoded_buffer; | |
| 1264 size_t max_out_size = BrotliEncoderMaxCompressedSize(input_size); | |
| 1265 if (out_size == 0) { | |
| 1266 /* Output buffer needs at least one byte. */ | |
| 1267 return BROTLI_FALSE; | |
| 1268 } | |
| 1269 if (input_size == 0) { | |
| 1270 /* Handle the special case of empty input. */ | |
| 1271 *encoded_size = 1; | |
| 1272 *encoded_buffer = 6; | |
| 1273 return BROTLI_TRUE; | |
| 1274 } | |
| 1275 | |
| 1276 s = BrotliEncoderCreateInstance(0, 0, 0); | |
| 1277 if (!s) { | |
| 1278 return BROTLI_FALSE; | |
| 1279 } else { | |
| 1280 size_t available_in = input_size; | |
| 1281 const uint8_t* next_in = input_buffer; | |
| 1282 size_t available_out = *encoded_size; | |
| 1283 uint8_t* next_out = encoded_buffer; | |
| 1284 size_t total_out = 0; | |
| 1285 BROTLI_BOOL result = BROTLI_FALSE; | |
| 1286 /* TODO(eustas): check that parameters are sane. */ | |
| 1287 BrotliEncoderSetParameter(s, BROTLI_PARAM_QUALITY, (uint32_t)quality); | |
| 1288 BrotliEncoderSetParameter(s, BROTLI_PARAM_LGWIN, (uint32_t)lgwin); | |
| 1289 BrotliEncoderSetParameter(s, BROTLI_PARAM_MODE, (uint32_t)mode); | |
| 1290 BrotliEncoderSetParameter(s, BROTLI_PARAM_SIZE_HINT, (uint32_t)input_size); | |
| 1291 if (lgwin > BROTLI_MAX_WINDOW_BITS) { | |
| 1292 BrotliEncoderSetParameter(s, BROTLI_PARAM_LARGE_WINDOW, BROTLI_TRUE); | |
| 1293 } | |
| 1294 result = BrotliEncoderCompressStream(s, BROTLI_OPERATION_FINISH, | |
| 1295 &available_in, &next_in, &available_out, &next_out, &total_out); | |
| 1296 if (!BrotliEncoderIsFinished(s)) result = 0; | |
| 1297 *encoded_size = total_out; | |
| 1298 BrotliEncoderDestroyInstance(s); | |
| 1299 if (!result || (max_out_size && *encoded_size > max_out_size)) { | |
| 1300 goto fallback; | |
| 1301 } | |
| 1302 return BROTLI_TRUE; | |
| 1303 } | |
| 1304 fallback: | |
| 1305 *encoded_size = 0; | |
| 1306 if (!max_out_size) return BROTLI_FALSE; | |
| 1307 if (out_size >= max_out_size) { | |
| 1308 *encoded_size = | |
| 1309 MakeUncompressedStream(input_start, input_size, output_start); | |
| 1310 return BROTLI_TRUE; | |
| 1311 } | |
| 1312 return BROTLI_FALSE; | |
| 1313 } | |
| 1314 | |
| 1315 static void InjectBytePaddingBlock(BrotliEncoderState* s) { | |
| 1316 uint32_t seal = s->last_bytes_; | |
| 1317 size_t seal_bits = s->last_bytes_bits_; | |
| 1318 uint8_t* destination; | |
| 1319 s->last_bytes_ = 0; | |
| 1320 s->last_bytes_bits_ = 0; | |
| 1321 /* is_last = 0, data_nibbles = 11, reserved = 0, meta_nibbles = 00 */ | |
| 1322 seal |= 0x6u << seal_bits; | |
| 1323 seal_bits += 6; | |
| 1324 /* If we have already created storage, then append to it. | |
| 1325 Storage is valid until next block is being compressed. */ | |
| 1326 if (s->next_out_) { | |
| 1327 destination = s->next_out_ + s->available_out_; | |
| 1328 } else { | |
| 1329 destination = s->tiny_buf_.u8; | |
| 1330 s->next_out_ = destination; | |
| 1331 } | |
| 1332 destination[0] = (uint8_t)seal; | |
| 1333 if (seal_bits > 8) destination[1] = (uint8_t)(seal >> 8); | |
| 1334 if (seal_bits > 16) destination[2] = (uint8_t)(seal >> 16); | |
| 1335 s->available_out_ += (seal_bits + 7) >> 3; | |
| 1336 } | |
| 1337 | |
| 1338 /* Fills the |total_out|, if it is not NULL. */ | |
| 1339 static void SetTotalOut(BrotliEncoderState* s, size_t* total_out) { | |
| 1340 if (total_out) { | |
| 1341 /* Saturating conversion uint64_t -> size_t */ | |
| 1342 size_t result = (size_t)-1; | |
| 1343 if (s->total_out_ < result) { | |
| 1344 result = (size_t)s->total_out_; | |
| 1345 } | |
| 1346 *total_out = result; | |
| 1347 } | |
| 1348 } | |
| 1349 | |
| 1350 /* Injects padding bits or pushes compressed data to output. | |
| 1351 Returns false if nothing is done. */ | |
| 1352 static BROTLI_BOOL InjectFlushOrPushOutput(BrotliEncoderState* s, | |
| 1353 size_t* available_out, uint8_t** next_out, size_t* total_out) { | |
| 1354 if (s->stream_state_ == BROTLI_STREAM_FLUSH_REQUESTED && | |
| 1355 s->last_bytes_bits_ != 0) { | |
| 1356 InjectBytePaddingBlock(s); | |
| 1357 return BROTLI_TRUE; | |
| 1358 } | |
| 1359 | |
| 1360 if (s->available_out_ != 0 && *available_out != 0) { | |
| 1361 size_t copy_output_size = | |
| 1362 BROTLI_MIN(size_t, s->available_out_, *available_out); | |
| 1363 memcpy(*next_out, s->next_out_, copy_output_size); | |
| 1364 *next_out += copy_output_size; | |
| 1365 *available_out -= copy_output_size; | |
| 1366 s->next_out_ += copy_output_size; | |
| 1367 s->available_out_ -= copy_output_size; | |
| 1368 s->total_out_ += copy_output_size; | |
| 1369 SetTotalOut(s, total_out); | |
| 1370 return BROTLI_TRUE; | |
| 1371 } | |
| 1372 | |
| 1373 return BROTLI_FALSE; | |
| 1374 } | |
| 1375 | |
| 1376 static void CheckFlushComplete(BrotliEncoderState* s) { | |
| 1377 if (s->stream_state_ == BROTLI_STREAM_FLUSH_REQUESTED && | |
| 1378 s->available_out_ == 0) { | |
| 1379 s->stream_state_ = BROTLI_STREAM_PROCESSING; | |
| 1380 s->next_out_ = 0; | |
| 1381 } | |
| 1382 } | |
| 1383 | |
| 1384 static BROTLI_BOOL BrotliEncoderCompressStreamFast( | |
| 1385 BrotliEncoderState* s, BrotliEncoderOperation op, size_t* available_in, | |
| 1386 const uint8_t** next_in, size_t* available_out, uint8_t** next_out, | |
| 1387 size_t* total_out) { | |
| 1388 const size_t block_size_limit = (size_t)1 << s->params.lgwin; | |
| 1389 const size_t buf_size = BROTLI_MIN(size_t, kCompressFragmentTwoPassBlockSize, | |
| 1390 BROTLI_MIN(size_t, *available_in, block_size_limit)); | |
| 1391 uint32_t* tmp_command_buf = NULL; | |
| 1392 uint32_t* command_buf = NULL; | |
| 1393 uint8_t* tmp_literal_buf = NULL; | |
| 1394 uint8_t* literal_buf = NULL; | |
| 1395 MemoryManager* m = &s->memory_manager_; | |
| 1396 if (s->params.quality != FAST_ONE_PASS_COMPRESSION_QUALITY && | |
| 1397 s->params.quality != FAST_TWO_PASS_COMPRESSION_QUALITY) { | |
| 1398 return BROTLI_FALSE; | |
| 1399 } | |
| 1400 if (s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) { | |
| 1401 if (!s->command_buf_ && buf_size == kCompressFragmentTwoPassBlockSize) { | |
| 1402 s->command_buf_ = | |
| 1403 BROTLI_ALLOC(m, uint32_t, kCompressFragmentTwoPassBlockSize); | |
| 1404 s->literal_buf_ = | |
| 1405 BROTLI_ALLOC(m, uint8_t, kCompressFragmentTwoPassBlockSize); | |
| 1406 if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(s->command_buf_) || | |
| 1407 BROTLI_IS_NULL(s->literal_buf_)) { | |
| 1408 return BROTLI_FALSE; | |
| 1409 } | |
| 1410 } | |
| 1411 if (s->command_buf_) { | |
| 1412 command_buf = s->command_buf_; | |
| 1413 literal_buf = s->literal_buf_; | |
| 1414 } else { | |
| 1415 tmp_command_buf = BROTLI_ALLOC(m, uint32_t, buf_size); | |
| 1416 tmp_literal_buf = BROTLI_ALLOC(m, uint8_t, buf_size); | |
| 1417 if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(tmp_command_buf) || | |
| 1418 BROTLI_IS_NULL(tmp_literal_buf)) { | |
| 1419 return BROTLI_FALSE; | |
| 1420 } | |
| 1421 command_buf = tmp_command_buf; | |
| 1422 literal_buf = tmp_literal_buf; | |
| 1423 } | |
| 1424 } | |
| 1425 | |
| 1426 while (BROTLI_TRUE) { | |
| 1427 if (InjectFlushOrPushOutput(s, available_out, next_out, total_out)) { | |
| 1428 continue; | |
| 1429 } | |
| 1430 | |
| 1431 /* Compress block only when internal output buffer is empty, stream is not | |
| 1432 finished, there is no pending flush request, and there is either | |
| 1433 additional input or pending operation. */ | |
| 1434 if (s->available_out_ == 0 && | |
| 1435 s->stream_state_ == BROTLI_STREAM_PROCESSING && | |
| 1436 (*available_in != 0 || op != BROTLI_OPERATION_PROCESS)) { | |
| 1437 size_t block_size = BROTLI_MIN(size_t, block_size_limit, *available_in); | |
| 1438 BROTLI_BOOL is_last = | |
| 1439 (*available_in == block_size) && (op == BROTLI_OPERATION_FINISH); | |
| 1440 BROTLI_BOOL force_flush = | |
| 1441 (*available_in == block_size) && (op == BROTLI_OPERATION_FLUSH); | |
| 1442 size_t max_out_size = 2 * block_size + 503; | |
| 1443 BROTLI_BOOL inplace = BROTLI_TRUE; | |
| 1444 uint8_t* storage = NULL; | |
| 1445 size_t storage_ix = s->last_bytes_bits_; | |
| 1446 size_t table_size; | |
| 1447 int* table; | |
| 1448 | |
| 1449 if (force_flush && block_size == 0) { | |
| 1450 s->stream_state_ = BROTLI_STREAM_FLUSH_REQUESTED; | |
| 1451 continue; | |
| 1452 } | |
| 1453 if (max_out_size <= *available_out) { | |
| 1454 storage = *next_out; | |
| 1455 } else { | |
| 1456 inplace = BROTLI_FALSE; | |
| 1457 storage = GetBrotliStorage(s, max_out_size); | |
| 1458 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; | |
| 1459 } | |
| 1460 storage[0] = (uint8_t)s->last_bytes_; | |
| 1461 storage[1] = (uint8_t)(s->last_bytes_ >> 8); | |
| 1462 table = GetHashTable(s, s->params.quality, block_size, &table_size); | |
| 1463 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; | |
| 1464 | |
| 1465 if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY) { | |
| 1466 BrotliCompressFragmentFast(s->one_pass_arena_, *next_in, block_size, | |
| 1467 is_last, table, table_size, &storage_ix, storage); | |
| 1468 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; | |
| 1469 } else { | |
| 1470 BrotliCompressFragmentTwoPass(s->two_pass_arena_, *next_in, block_size, | |
| 1471 is_last, command_buf, literal_buf, table, table_size, | |
| 1472 &storage_ix, storage); | |
| 1473 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; | |
| 1474 } | |
| 1475 if (block_size != 0) { | |
| 1476 *next_in += block_size; | |
| 1477 *available_in -= block_size; | |
| 1478 s->total_in_ += block_size; | |
| 1479 } | |
| 1480 if (inplace) { | |
| 1481 size_t out_bytes = storage_ix >> 3; | |
| 1482 BROTLI_DCHECK(out_bytes <= *available_out); | |
| 1483 BROTLI_DCHECK((storage_ix & 7) == 0 || out_bytes < *available_out); | |
| 1484 *next_out += out_bytes; | |
| 1485 *available_out -= out_bytes; | |
| 1486 s->total_out_ += out_bytes; | |
| 1487 SetTotalOut(s, total_out); | |
| 1488 } else { | |
| 1489 size_t out_bytes = storage_ix >> 3; | |
| 1490 s->next_out_ = storage; | |
| 1491 s->available_out_ = out_bytes; | |
| 1492 } | |
| 1493 s->last_bytes_ = (uint16_t)(storage[storage_ix >> 3]); | |
| 1494 s->last_bytes_bits_ = storage_ix & 7u; | |
| 1495 | |
| 1496 if (force_flush) s->stream_state_ = BROTLI_STREAM_FLUSH_REQUESTED; | |
| 1497 if (is_last) s->stream_state_ = BROTLI_STREAM_FINISHED; | |
| 1498 continue; | |
| 1499 } | |
| 1500 break; | |
| 1501 } | |
| 1502 BROTLI_FREE(m, tmp_command_buf); | |
| 1503 BROTLI_FREE(m, tmp_literal_buf); | |
| 1504 CheckFlushComplete(s); | |
| 1505 return BROTLI_TRUE; | |
| 1506 } | |
| 1507 | |
| 1508 static BROTLI_BOOL ProcessMetadata( | |
| 1509 BrotliEncoderState* s, size_t* available_in, const uint8_t** next_in, | |
| 1510 size_t* available_out, uint8_t** next_out, size_t* total_out) { | |
| 1511 if (*available_in > (1u << 24)) return BROTLI_FALSE; | |
| 1512 /* Switch to metadata block workflow, if required. */ | |
| 1513 if (s->stream_state_ == BROTLI_STREAM_PROCESSING) { | |
| 1514 s->remaining_metadata_bytes_ = (uint32_t)*available_in; | |
| 1515 s->stream_state_ = BROTLI_STREAM_METADATA_HEAD; | |
| 1516 } | |
| 1517 if (s->stream_state_ != BROTLI_STREAM_METADATA_HEAD && | |
| 1518 s->stream_state_ != BROTLI_STREAM_METADATA_BODY) { | |
| 1519 return BROTLI_FALSE; | |
| 1520 } | |
| 1521 | |
| 1522 while (BROTLI_TRUE) { | |
| 1523 if (InjectFlushOrPushOutput(s, available_out, next_out, total_out)) { | |
| 1524 continue; | |
| 1525 } | |
| 1526 if (s->available_out_ != 0) break; | |
| 1527 | |
| 1528 if (s->input_pos_ != s->last_flush_pos_) { | |
| 1529 BROTLI_BOOL result = EncodeData(s, BROTLI_FALSE, BROTLI_TRUE, | |
| 1530 &s->available_out_, &s->next_out_); | |
| 1531 if (!result) return BROTLI_FALSE; | |
| 1532 continue; | |
| 1533 } | |
| 1534 | |
| 1535 if (s->stream_state_ == BROTLI_STREAM_METADATA_HEAD) { | |
| 1536 s->next_out_ = s->tiny_buf_.u8; | |
| 1537 s->available_out_ = | |
| 1538 WriteMetadataHeader(s, s->remaining_metadata_bytes_, s->next_out_); | |
| 1539 s->stream_state_ = BROTLI_STREAM_METADATA_BODY; | |
| 1540 continue; | |
| 1541 } else { | |
| 1542 /* Exit workflow only when there is no more input and no more output. | |
| 1543 Otherwise client may continue producing empty metadata blocks. */ | |
| 1544 if (s->remaining_metadata_bytes_ == 0) { | |
| 1545 s->remaining_metadata_bytes_ = BROTLI_UINT32_MAX; | |
| 1546 s->stream_state_ = BROTLI_STREAM_PROCESSING; | |
| 1547 break; | |
| 1548 } | |
| 1549 if (*available_out) { | |
| 1550 /* Directly copy input to output. */ | |
| 1551 uint32_t copy = (uint32_t)BROTLI_MIN( | |
| 1552 size_t, s->remaining_metadata_bytes_, *available_out); | |
| 1553 memcpy(*next_out, *next_in, copy); | |
| 1554 *next_in += copy; | |
| 1555 *available_in -= copy; | |
| 1556 s->total_in_ += copy; /* not actually data input, though */ | |
| 1557 s->remaining_metadata_bytes_ -= copy; | |
| 1558 *next_out += copy; | |
| 1559 *available_out -= copy; | |
| 1560 } else { | |
| 1561 /* This guarantees progress in "TakeOutput" workflow. */ | |
| 1562 uint32_t copy = BROTLI_MIN(uint32_t, s->remaining_metadata_bytes_, 16); | |
| 1563 s->next_out_ = s->tiny_buf_.u8; | |
| 1564 memcpy(s->next_out_, *next_in, copy); | |
| 1565 *next_in += copy; | |
| 1566 *available_in -= copy; | |
| 1567 s->total_in_ += copy; /* not actually data input, though */ | |
| 1568 s->remaining_metadata_bytes_ -= copy; | |
| 1569 s->available_out_ = copy; | |
| 1570 } | |
| 1571 continue; | |
| 1572 } | |
| 1573 } | |
| 1574 | |
| 1575 return BROTLI_TRUE; | |
| 1576 } | |
| 1577 | |
| 1578 static void UpdateSizeHint(BrotliEncoderState* s, size_t available_in) { | |
| 1579 if (s->params.size_hint == 0) { | |
| 1580 uint64_t delta = UnprocessedInputSize(s); | |
| 1581 uint64_t tail = available_in; | |
| 1582 uint32_t limit = 1u << 30; | |
| 1583 uint32_t total; | |
| 1584 if ((delta >= limit) || (tail >= limit) || ((delta + tail) >= limit)) { | |
| 1585 total = limit; | |
| 1586 } else { | |
| 1587 total = (uint32_t)(delta + tail); | |
| 1588 } | |
| 1589 s->params.size_hint = total; | |
| 1590 } | |
| 1591 } | |
| 1592 | |
| 1593 BROTLI_BOOL BrotliEncoderCompressStream( | |
| 1594 BrotliEncoderState* s, BrotliEncoderOperation op, size_t* available_in, | |
| 1595 const uint8_t** next_in, size_t* available_out, uint8_t** next_out, | |
| 1596 size_t* total_out) { | |
| 1597 if (!EnsureInitialized(s)) return BROTLI_FALSE; | |
| 1598 | |
| 1599 /* Unfinished metadata block; check requirements. */ | |
| 1600 if (s->remaining_metadata_bytes_ != BROTLI_UINT32_MAX) { | |
| 1601 if (*available_in != s->remaining_metadata_bytes_) return BROTLI_FALSE; | |
| 1602 if (op != BROTLI_OPERATION_EMIT_METADATA) return BROTLI_FALSE; | |
| 1603 } | |
| 1604 | |
| 1605 if (op == BROTLI_OPERATION_EMIT_METADATA) { | |
| 1606 UpdateSizeHint(s, 0); /* First data metablock might be emitted here. */ | |
| 1607 return ProcessMetadata( | |
| 1608 s, available_in, next_in, available_out, next_out, total_out); | |
| 1609 } | |
| 1610 | |
| 1611 if (s->stream_state_ == BROTLI_STREAM_METADATA_HEAD || | |
| 1612 s->stream_state_ == BROTLI_STREAM_METADATA_BODY) { | |
| 1613 return BROTLI_FALSE; | |
| 1614 } | |
| 1615 | |
| 1616 if (s->stream_state_ != BROTLI_STREAM_PROCESSING && *available_in != 0) { | |
| 1617 return BROTLI_FALSE; | |
| 1618 } | |
| 1619 if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY || | |
| 1620 s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) { | |
| 1621 return BrotliEncoderCompressStreamFast(s, op, available_in, next_in, | |
| 1622 available_out, next_out, total_out); | |
| 1623 } | |
| 1624 while (BROTLI_TRUE) { | |
| 1625 size_t remaining_block_size = RemainingInputBlockSize(s); | |
| 1626 /* Shorten input to flint size. */ | |
| 1627 if (s->flint_ >= 0 && remaining_block_size > (size_t)s->flint_) { | |
| 1628 remaining_block_size = (size_t)s->flint_; | |
| 1629 } | |
| 1630 | |
| 1631 if (remaining_block_size != 0 && *available_in != 0) { | |
| 1632 size_t copy_input_size = | |
| 1633 BROTLI_MIN(size_t, remaining_block_size, *available_in); | |
| 1634 CopyInputToRingBuffer(s, copy_input_size, *next_in); | |
| 1635 *next_in += copy_input_size; | |
| 1636 *available_in -= copy_input_size; | |
| 1637 s->total_in_ += copy_input_size; | |
| 1638 if (s->flint_ > 0) s->flint_ = (int8_t)(s->flint_ - (int)copy_input_size); | |
| 1639 continue; | |
| 1640 } | |
| 1641 | |
| 1642 if (InjectFlushOrPushOutput(s, available_out, next_out, total_out)) { | |
| 1643 /* Exit the "emit flint" workflow. */ | |
| 1644 if (s->flint_ == BROTLI_FLINT_WAITING_FOR_FLUSHING) { | |
| 1645 CheckFlushComplete(s); | |
| 1646 if (s->stream_state_ == BROTLI_STREAM_PROCESSING) { | |
| 1647 s->flint_ = BROTLI_FLINT_DONE; | |
| 1648 } | |
| 1649 } | |
| 1650 continue; | |
| 1651 } | |
| 1652 | |
| 1653 /* Compress data only when internal output buffer is empty, stream is not | |
| 1654 finished and there is no pending flush request. */ | |
| 1655 if (s->available_out_ == 0 && | |
| 1656 s->stream_state_ == BROTLI_STREAM_PROCESSING) { | |
| 1657 if (remaining_block_size == 0 || op != BROTLI_OPERATION_PROCESS) { | |
| 1658 BROTLI_BOOL is_last = TO_BROTLI_BOOL( | |
| 1659 (*available_in == 0) && op == BROTLI_OPERATION_FINISH); | |
| 1660 BROTLI_BOOL force_flush = TO_BROTLI_BOOL( | |
| 1661 (*available_in == 0) && op == BROTLI_OPERATION_FLUSH); | |
| 1662 BROTLI_BOOL result; | |
| 1663 /* Force emitting (uncompressed) piece containing flint. */ | |
| 1664 if (!is_last && s->flint_ == 0) { | |
| 1665 s->flint_ = BROTLI_FLINT_WAITING_FOR_FLUSHING; | |
| 1666 force_flush = BROTLI_TRUE; | |
| 1667 } | |
| 1668 UpdateSizeHint(s, *available_in); | |
| 1669 result = EncodeData(s, is_last, force_flush, | |
| 1670 &s->available_out_, &s->next_out_); | |
| 1671 if (!result) return BROTLI_FALSE; | |
| 1672 if (force_flush) s->stream_state_ = BROTLI_STREAM_FLUSH_REQUESTED; | |
| 1673 if (is_last) s->stream_state_ = BROTLI_STREAM_FINISHED; | |
| 1674 continue; | |
| 1675 } | |
| 1676 } | |
| 1677 break; | |
| 1678 } | |
| 1679 CheckFlushComplete(s); | |
| 1680 return BROTLI_TRUE; | |
| 1681 } | |
| 1682 | |
| 1683 BROTLI_BOOL BrotliEncoderIsFinished(BrotliEncoderState* s) { | |
| 1684 return TO_BROTLI_BOOL(s->stream_state_ == BROTLI_STREAM_FINISHED && | |
| 1685 !BrotliEncoderHasMoreOutput(s)); | |
| 1686 } | |
| 1687 | |
| 1688 BROTLI_BOOL BrotliEncoderHasMoreOutput(BrotliEncoderState* s) { | |
| 1689 return TO_BROTLI_BOOL(s->available_out_ != 0); | |
| 1690 } | |
| 1691 | |
| 1692 const uint8_t* BrotliEncoderTakeOutput(BrotliEncoderState* s, size_t* size) { | |
| 1693 size_t consumed_size = s->available_out_; | |
| 1694 uint8_t* result = s->next_out_; | |
| 1695 if (*size) { | |
| 1696 consumed_size = BROTLI_MIN(size_t, *size, s->available_out_); | |
| 1697 } | |
| 1698 if (consumed_size) { | |
| 1699 s->next_out_ += consumed_size; | |
| 1700 s->available_out_ -= consumed_size; | |
| 1701 s->total_out_ += consumed_size; | |
| 1702 CheckFlushComplete(s); | |
| 1703 *size = consumed_size; | |
| 1704 } else { | |
| 1705 *size = 0; | |
| 1706 result = 0; | |
| 1707 } | |
| 1708 return result; | |
| 1709 } | |
| 1710 | |
| 1711 uint32_t BrotliEncoderVersion(void) { | |
| 1712 return BROTLI_VERSION; | |
| 1713 } | |
| 1714 | |
| 1715 BrotliEncoderPreparedDictionary* BrotliEncoderPrepareDictionary( | |
| 1716 BrotliSharedDictionaryType type, size_t size, | |
| 1717 const uint8_t data[BROTLI_ARRAY_PARAM(size)], int quality, | |
| 1718 brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque) { | |
| 1719 ManagedDictionary* managed_dictionary = NULL; | |
| 1720 BROTLI_BOOL type_is_known = BROTLI_FALSE; | |
| 1721 type_is_known |= (type == BROTLI_SHARED_DICTIONARY_RAW); | |
| 1722 #if defined(BROTLI_EXPERIMENTAL) | |
| 1723 type_is_known |= (type == BROTLI_SHARED_DICTIONARY_SERIALIZED); | |
| 1724 #endif /* BROTLI_EXPERIMENTAL */ | |
| 1725 if (!type_is_known) { | |
| 1726 return NULL; | |
| 1727 } | |
| 1728 managed_dictionary = | |
| 1729 BrotliCreateManagedDictionary(alloc_func, free_func, opaque); | |
| 1730 if (managed_dictionary == NULL) { | |
| 1731 return NULL; | |
| 1732 } | |
| 1733 if (type == BROTLI_SHARED_DICTIONARY_RAW) { | |
| 1734 managed_dictionary->dictionary = (uint32_t*)CreatePreparedDictionary( | |
| 1735 &managed_dictionary->memory_manager_, data, size); | |
| 1736 } | |
| 1737 #if defined(BROTLI_EXPERIMENTAL) | |
| 1738 if (type == BROTLI_SHARED_DICTIONARY_SERIALIZED) { | |
| 1739 SharedEncoderDictionary* dict = (SharedEncoderDictionary*)BrotliAllocate( | |
| 1740 &managed_dictionary->memory_manager_, sizeof(SharedEncoderDictionary)); | |
| 1741 managed_dictionary->dictionary = (uint32_t*)dict; | |
| 1742 if (dict != NULL) { | |
| 1743 BROTLI_BOOL ok = BrotliInitCustomSharedEncoderDictionary( | |
| 1744 &managed_dictionary->memory_manager_, data, size, quality, dict); | |
| 1745 if (!ok) { | |
| 1746 BrotliFree(&managed_dictionary->memory_manager_, dict); | |
| 1747 managed_dictionary->dictionary = NULL; | |
| 1748 } | |
| 1749 } | |
| 1750 } | |
| 1751 #else /* BROTLI_EXPERIMENTAL */ | |
| 1752 (void)quality; | |
| 1753 #endif /* BROTLI_EXPERIMENTAL */ | |
| 1754 if (managed_dictionary->dictionary == NULL) { | |
| 1755 BrotliDestroyManagedDictionary(managed_dictionary); | |
| 1756 return NULL; | |
| 1757 } | |
| 1758 return (BrotliEncoderPreparedDictionary*)managed_dictionary; | |
| 1759 } | |
| 1760 | |
| 1761 void BrotliEncoderDestroyPreparedDictionary( | |
| 1762 BrotliEncoderPreparedDictionary* dictionary) { | |
| 1763 ManagedDictionary* dict = (ManagedDictionary*)dictionary; | |
| 1764 if (!dictionary) return; | |
| 1765 /* First field of dictionary structs. */ | |
| 1766 /* Only managed dictionaries are eligible for destruction by this method. */ | |
| 1767 if (dict->magic != kManagedDictionaryMagic) { | |
| 1768 return; | |
| 1769 } | |
| 1770 if (dict->dictionary == NULL) { | |
| 1771 /* This should never ever happen. */ | |
| 1772 } else if (*dict->dictionary == kLeanPreparedDictionaryMagic) { | |
| 1773 DestroyPreparedDictionary( | |
| 1774 &dict->memory_manager_, (PreparedDictionary*)dict->dictionary); | |
| 1775 } else if (*dict->dictionary == kSharedDictionaryMagic) { | |
| 1776 BrotliCleanupSharedEncoderDictionary(&dict->memory_manager_, | |
| 1777 (SharedEncoderDictionary*)dict->dictionary); | |
| 1778 BrotliFree(&dict->memory_manager_, dict->dictionary); | |
| 1779 } else { | |
| 1780 /* There is also kPreparedDictionaryMagic, but such instances should be | |
| 1781 * constructed and destroyed by different means. */ | |
| 1782 } | |
| 1783 dict->dictionary = NULL; | |
| 1784 BrotliDestroyManagedDictionary(dict); | |
| 1785 } | |
| 1786 | |
| 1787 BROTLI_BOOL BrotliEncoderAttachPreparedDictionary(BrotliEncoderState* state, | |
| 1788 const BrotliEncoderPreparedDictionary* dictionary) { | |
| 1789 /* First field of dictionary structs */ | |
| 1790 const BrotliEncoderPreparedDictionary* dict = dictionary; | |
| 1791 uint32_t magic = *((const uint32_t*)dict); | |
| 1792 SharedEncoderDictionary* current = NULL; | |
| 1793 if (magic == kManagedDictionaryMagic) { | |
| 1794 /* Unwrap managed dictionary. */ | |
| 1795 ManagedDictionary* managed_dictionary = (ManagedDictionary*)dict; | |
| 1796 magic = *managed_dictionary->dictionary; | |
| 1797 dict = (BrotliEncoderPreparedDictionary*)managed_dictionary->dictionary; | |
| 1798 } | |
| 1799 current = &state->params.dictionary; | |
| 1800 if (magic == kPreparedDictionaryMagic || | |
| 1801 magic == kLeanPreparedDictionaryMagic) { | |
| 1802 const PreparedDictionary* prepared = (const PreparedDictionary*)dict; | |
| 1803 if (!AttachPreparedDictionary(¤t->compound, prepared)) { | |
| 1804 return BROTLI_FALSE; | |
| 1805 } | |
| 1806 } else if (magic == kSharedDictionaryMagic) { | |
| 1807 const SharedEncoderDictionary* attached = | |
| 1808 (const SharedEncoderDictionary*)dict; | |
| 1809 BROTLI_BOOL was_default = !current->contextual.context_based && | |
| 1810 current->contextual.num_dictionaries == 1 && | |
| 1811 current->contextual.dict[0]->hash_table_words == | |
| 1812 kStaticDictionaryHashWords && | |
| 1813 current->contextual.dict[0]->hash_table_lengths == | |
| 1814 kStaticDictionaryHashLengths; | |
| 1815 BROTLI_BOOL new_default = !attached->contextual.context_based && | |
| 1816 attached->contextual.num_dictionaries == 1 && | |
| 1817 attached->contextual.dict[0]->hash_table_words == | |
| 1818 kStaticDictionaryHashWords && | |
| 1819 attached->contextual.dict[0]->hash_table_lengths == | |
| 1820 kStaticDictionaryHashLengths; | |
| 1821 size_t i; | |
| 1822 if (state->is_initialized_) return BROTLI_FALSE; | |
| 1823 current->max_quality = | |
| 1824 BROTLI_MIN(int, current->max_quality, attached->max_quality); | |
| 1825 for (i = 0; i < attached->compound.num_chunks; i++) { | |
| 1826 if (!AttachPreparedDictionary(¤t->compound, | |
| 1827 attached->compound.chunks[i])) { | |
| 1828 return BROTLI_FALSE; | |
| 1829 } | |
| 1830 } | |
| 1831 if (!new_default) { | |
| 1832 if (!was_default) return BROTLI_FALSE; | |
| 1833 /* Copy by value, but then set num_instances_ to 0 because their memory | |
| 1834 is managed by attached, not by current */ | |
| 1835 current->contextual = attached->contextual; | |
| 1836 current->contextual.num_instances_ = 0; | |
| 1837 } | |
| 1838 } else { | |
| 1839 return BROTLI_FALSE; | |
| 1840 } | |
| 1841 return BROTLI_TRUE; | |
| 1842 } | |
| 1843 | |
| 1844 size_t BrotliEncoderEstimatePeakMemoryUsage(int quality, int lgwin, | |
| 1845 size_t input_size) { | |
| 1846 BrotliEncoderParams params; | |
| 1847 size_t memory_manager_slots = BROTLI_ENCODER_MEMORY_MANAGER_SLOTS; | |
| 1848 size_t memory_manager_size = memory_manager_slots * sizeof(void*); | |
| 1849 BrotliEncoderInitParams(¶ms); | |
| 1850 params.quality = quality; | |
| 1851 params.lgwin = lgwin; | |
| 1852 params.size_hint = input_size; | |
| 1853 params.large_window = lgwin > BROTLI_MAX_WINDOW_BITS; | |
| 1854 SanitizeParams(¶ms); | |
| 1855 params.lgblock = ComputeLgBlock(¶ms); | |
| 1856 ChooseHasher(¶ms, ¶ms.hasher); | |
| 1857 if (params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY || | |
| 1858 params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) { | |
| 1859 size_t state_size = sizeof(BrotliEncoderState); | |
| 1860 size_t block_size = BROTLI_MIN(size_t, input_size, ((size_t)1ul << params.lgwin)); | |
| 1861 size_t hash_table_size = | |
| 1862 HashTableSize(MaxHashTableSize(params.quality), block_size); | |
| 1863 size_t hash_size = | |
| 1864 (hash_table_size < (1u << 10)) ? 0 : sizeof(int) * hash_table_size; | |
| 1865 size_t cmdbuf_size = params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY ? | |
| 1866 5 * BROTLI_MIN(size_t, block_size, 1ul << 17) : 0; | |
| 1867 if (params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY) { | |
| 1868 state_size += sizeof(BrotliOnePassArena); | |
| 1869 } else { | |
| 1870 state_size += sizeof(BrotliTwoPassArena); | |
| 1871 } | |
| 1872 return hash_size + cmdbuf_size + state_size; | |
| 1873 } else { | |
| 1874 size_t short_ringbuffer_size = (size_t)1 << params.lgblock; | |
| 1875 int ringbuffer_bits = ComputeRbBits(¶ms); | |
| 1876 size_t ringbuffer_size = input_size < short_ringbuffer_size ? | |
| 1877 input_size : ((size_t)1u << ringbuffer_bits) + short_ringbuffer_size; | |
| 1878 size_t hash_size[4] = {0}; | |
| 1879 size_t metablock_size = | |
| 1880 BROTLI_MIN(size_t, input_size, MaxMetablockSize(¶ms)); | |
| 1881 size_t inputblock_size = | |
| 1882 BROTLI_MIN(size_t, input_size, (size_t)1 << params.lgblock); | |
| 1883 size_t cmdbuf_size = metablock_size * 2 + inputblock_size * 6; | |
| 1884 size_t outbuf_size = metablock_size * 2 + 503; | |
| 1885 size_t histogram_size = 0; | |
| 1886 HasherSize(¶ms, BROTLI_TRUE, input_size, hash_size); | |
| 1887 if (params.quality < MIN_QUALITY_FOR_BLOCK_SPLIT) { | |
| 1888 cmdbuf_size = BROTLI_MIN(size_t, cmdbuf_size, | |
| 1889 MAX_NUM_DELAYED_SYMBOLS * sizeof(Command) + inputblock_size * 12); | |
| 1890 } | |
| 1891 if (params.quality >= MIN_QUALITY_FOR_HQ_BLOCK_SPLITTING) { | |
| 1892 /* Only a very rough estimation, based on enwik8. */ | |
| 1893 histogram_size = 200 << 20; | |
| 1894 } else if (params.quality >= MIN_QUALITY_FOR_BLOCK_SPLIT) { | |
| 1895 size_t literal_histograms = | |
| 1896 BROTLI_MIN(size_t, metablock_size / 6144, 256); | |
| 1897 size_t command_histograms = | |
| 1898 BROTLI_MIN(size_t, metablock_size / 6144, 256); | |
| 1899 size_t distance_histograms = | |
| 1900 BROTLI_MIN(size_t, metablock_size / 6144, 256); | |
| 1901 histogram_size = literal_histograms * sizeof(HistogramLiteral) + | |
| 1902 command_histograms * sizeof(HistogramCommand) + | |
| 1903 distance_histograms * sizeof(HistogramDistance); | |
| 1904 } | |
| 1905 return (memory_manager_size + ringbuffer_size + | |
| 1906 hash_size[0] + hash_size[1] + hash_size[2] + hash_size[3] + | |
| 1907 cmdbuf_size + | |
| 1908 outbuf_size + | |
| 1909 histogram_size); | |
| 1910 } | |
| 1911 } | |
| 1912 size_t BrotliEncoderGetPreparedDictionarySize( | |
| 1913 const BrotliEncoderPreparedDictionary* prepared_dictionary) { | |
| 1914 /* First field of dictionary structs */ | |
| 1915 const BrotliEncoderPreparedDictionary* prepared = prepared_dictionary; | |
| 1916 uint32_t magic = *((const uint32_t*)prepared); | |
| 1917 size_t overhead = 0; | |
| 1918 if (magic == kManagedDictionaryMagic) { | |
| 1919 const ManagedDictionary* managed = (const ManagedDictionary*)prepared; | |
| 1920 overhead = sizeof(ManagedDictionary); | |
| 1921 magic = *managed->dictionary; | |
| 1922 prepared = (const BrotliEncoderPreparedDictionary*)managed->dictionary; | |
| 1923 } | |
| 1924 | |
| 1925 if (magic == kPreparedDictionaryMagic) { | |
| 1926 const PreparedDictionary* dictionary = | |
| 1927 (const PreparedDictionary*)prepared; | |
| 1928 /* Keep in sync with step 3 of CreatePreparedDictionary */ | |
| 1929 return sizeof(PreparedDictionary) + dictionary->source_size + | |
| 1930 (sizeof(uint32_t) << dictionary->slot_bits) + | |
| 1931 (sizeof(uint16_t) << dictionary->bucket_bits) + | |
| 1932 (sizeof(uint32_t) * dictionary->num_items) + overhead; | |
| 1933 } else if (magic == kLeanPreparedDictionaryMagic) { | |
| 1934 const PreparedDictionary* dictionary = | |
| 1935 (const PreparedDictionary*)prepared; | |
| 1936 /* Keep in sync with step 3 of CreatePreparedDictionary */ | |
| 1937 return sizeof(PreparedDictionary) + sizeof(uint8_t*) + | |
| 1938 (sizeof(uint32_t) << dictionary->slot_bits) + | |
| 1939 (sizeof(uint16_t) << dictionary->bucket_bits) + | |
| 1940 (sizeof(uint32_t) * dictionary->num_items) + overhead; | |
| 1941 } else if (magic == kSharedDictionaryMagic) { | |
| 1942 const SharedEncoderDictionary* dictionary = | |
| 1943 (const SharedEncoderDictionary*)prepared; | |
| 1944 const CompoundDictionary* compound = &dictionary->compound; | |
| 1945 const ContextualEncoderDictionary* contextual = &dictionary->contextual; | |
| 1946 size_t result = sizeof(*dictionary); | |
| 1947 size_t i; | |
| 1948 size_t num_instances; | |
| 1949 const BrotliEncoderDictionary* instances; | |
| 1950 for (i = 0; i < compound->num_prepared_instances_; i++) { | |
| 1951 size_t size = BrotliEncoderGetPreparedDictionarySize( | |
| 1952 (const BrotliEncoderPreparedDictionary*) | |
| 1953 compound->prepared_instances_[i]); | |
| 1954 if (!size) return 0; /* error */ | |
| 1955 result += size; | |
| 1956 } | |
| 1957 if (contextual->context_based) { | |
| 1958 num_instances = contextual->num_instances_; | |
| 1959 instances = contextual->instances_; | |
| 1960 result += sizeof(*instances) * num_instances; | |
| 1961 } else { | |
| 1962 num_instances = 1; | |
| 1963 instances = &contextual->instance_; | |
| 1964 } | |
| 1965 for (i = 0; i < num_instances; i++) { | |
| 1966 const BrotliEncoderDictionary* dict = &instances[i]; | |
| 1967 result += dict->trie.pool_capacity * sizeof(BrotliTrieNode); | |
| 1968 if (dict->hash_table_data_words_) { | |
| 1969 result += sizeof(kStaticDictionaryHashWords); | |
| 1970 } | |
| 1971 if (dict->hash_table_data_lengths_) { | |
| 1972 result += sizeof(kStaticDictionaryHashLengths); | |
| 1973 } | |
| 1974 if (dict->buckets_data_) { | |
| 1975 result += sizeof(*dict->buckets_data_) * dict->buckets_alloc_size_; | |
| 1976 } | |
| 1977 if (dict->dict_words_data_) { | |
| 1978 result += sizeof(*dict->dict_words) * dict->dict_words_alloc_size_; | |
| 1979 } | |
| 1980 if (dict->words_instance_) { | |
| 1981 result += sizeof(*dict->words_instance_); | |
| 1982 /* data_size not added here: it is never allocated by the | |
| 1983 SharedEncoderDictionary, instead it always points to the file | |
| 1984 already loaded in memory. So if the caller wants to include | |
| 1985 this memory as well, add the size of the loaded dictionary | |
| 1986 file to this. */ | |
| 1987 } | |
| 1988 } | |
| 1989 return result + overhead; | |
| 1990 } | |
| 1991 return 0; /* error */ | |
| 1992 } | |
| 1993 | |
| 1994 #if defined(BROTLI_TEST) | |
| 1995 size_t MakeUncompressedStreamForTest(const uint8_t*, size_t, uint8_t*); | |
| 1996 size_t MakeUncompressedStreamForTest( | |
| 1997 const uint8_t* input, size_t input_size, uint8_t* output) { | |
| 1998 return MakeUncompressedStream(input, input_size, output); | |
| 1999 } | |
| 2000 #endif | |
| 2001 | |
| 2002 #if defined(__cplusplus) || defined(c_plusplus) | |
| 2003 } /* extern "C" */ | |
| 2004 #endif |
