comparison mupdf-source/thirdparty/brotli/c/enc/encode.c @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 /* Copyright 2013 Google Inc. All Rights Reserved.
2
3 Distributed under MIT license.
4 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5 */
6
7 /* Implementation of Brotli compressor. */
8
9 #include <brotli/encode.h>
10 #include <brotli/shared_dictionary.h>
11 #include <brotli/types.h>
12
13 #include <stdlib.h> /* free, malloc */
14 #include <string.h> /* memcpy, memset */
15
16 #include "../common/constants.h"
17 #include "../common/context.h"
18 #include "../common/platform.h"
19 #include "../common/version.h"
20 #include "backward_references.h"
21 #include "backward_references_hq.h"
22 #include "bit_cost.h"
23 #include "brotli_bit_stream.h"
24 #include "compress_fragment.h"
25 #include "compress_fragment_two_pass.h"
26 #include "dictionary_hash.h"
27 #include "encoder_dict.h"
28 #include "entropy_encode.h"
29 #include "fast_log.h"
30 #include "hash.h"
31 #include "histogram.h"
32 #include "memory.h"
33 #include "metablock.h"
34 #include "prefix.h"
35 #include "state.h"
36 #include "quality.h"
37 #include "ringbuffer.h"
38 #include "utf8_util.h"
39 #include "write_bits.h"
40
41 #if defined(__cplusplus) || defined(c_plusplus)
42 extern "C" {
43 #endif
44
45 #define COPY_ARRAY(dst, src) memcpy(dst, src, sizeof(src));
46
47 static size_t InputBlockSize(BrotliEncoderState* s) {
48 return (size_t)1 << s->params.lgblock;
49 }
50
51 static uint64_t UnprocessedInputSize(BrotliEncoderState* s) {
52 return s->input_pos_ - s->last_processed_pos_;
53 }
54
55 static size_t RemainingInputBlockSize(BrotliEncoderState* s) {
56 const uint64_t delta = UnprocessedInputSize(s);
57 size_t block_size = InputBlockSize(s);
58 if (delta >= block_size) return 0;
59 return block_size - (size_t)delta;
60 }
61
62 BROTLI_BOOL BrotliEncoderSetParameter(
63 BrotliEncoderState* state, BrotliEncoderParameter p, uint32_t value) {
64 /* Changing parameters on the fly is not implemented yet. */
65 if (state->is_initialized_) return BROTLI_FALSE;
66 /* TODO(eustas): Validate/clamp parameters here. */
67 switch (p) {
68 case BROTLI_PARAM_MODE:
69 state->params.mode = (BrotliEncoderMode)value;
70 return BROTLI_TRUE;
71
72 case BROTLI_PARAM_QUALITY:
73 state->params.quality = (int)value;
74 return BROTLI_TRUE;
75
76 case BROTLI_PARAM_LGWIN:
77 state->params.lgwin = (int)value;
78 return BROTLI_TRUE;
79
80 case BROTLI_PARAM_LGBLOCK:
81 state->params.lgblock = (int)value;
82 return BROTLI_TRUE;
83
84 case BROTLI_PARAM_DISABLE_LITERAL_CONTEXT_MODELING:
85 if ((value != 0) && (value != 1)) return BROTLI_FALSE;
86 state->params.disable_literal_context_modeling = TO_BROTLI_BOOL(!!value);
87 return BROTLI_TRUE;
88
89 case BROTLI_PARAM_SIZE_HINT:
90 state->params.size_hint = value;
91 return BROTLI_TRUE;
92
93 case BROTLI_PARAM_LARGE_WINDOW:
94 state->params.large_window = TO_BROTLI_BOOL(!!value);
95 return BROTLI_TRUE;
96
97 case BROTLI_PARAM_NPOSTFIX:
98 state->params.dist.distance_postfix_bits = value;
99 return BROTLI_TRUE;
100
101 case BROTLI_PARAM_NDIRECT:
102 state->params.dist.num_direct_distance_codes = value;
103 return BROTLI_TRUE;
104
105 case BROTLI_PARAM_STREAM_OFFSET:
106 if (value > (1u << 30)) return BROTLI_FALSE;
107 state->params.stream_offset = value;
108 return BROTLI_TRUE;
109
110 default: return BROTLI_FALSE;
111 }
112 }
113
114 /* Wraps 64-bit input position to 32-bit ring-buffer position preserving
115 "not-a-first-lap" feature. */
116 static uint32_t WrapPosition(uint64_t position) {
117 uint32_t result = (uint32_t)position;
118 uint64_t gb = position >> 30;
119 if (gb > 2) {
120 /* Wrap every 2GiB; The first 3GB are continuous. */
121 result = (result & ((1u << 30) - 1)) | ((uint32_t)((gb - 1) & 1) + 1) << 30;
122 }
123 return result;
124 }
125
126 static uint8_t* GetBrotliStorage(BrotliEncoderState* s, size_t size) {
127 MemoryManager* m = &s->memory_manager_;
128 if (s->storage_size_ < size) {
129 BROTLI_FREE(m, s->storage_);
130 s->storage_ = BROTLI_ALLOC(m, uint8_t, size);
131 if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(s->storage_)) return NULL;
132 s->storage_size_ = size;
133 }
134 return s->storage_;
135 }
136
137 static size_t HashTableSize(size_t max_table_size, size_t input_size) {
138 size_t htsize = 256;
139 while (htsize < max_table_size && htsize < input_size) {
140 htsize <<= 1;
141 }
142 return htsize;
143 }
144
145 static int* GetHashTable(BrotliEncoderState* s, int quality,
146 size_t input_size, size_t* table_size) {
147 /* Use smaller hash table when input.size() is smaller, since we
148 fill the table, incurring O(hash table size) overhead for
149 compression, and if the input is short, we won't need that
150 many hash table entries anyway. */
151 MemoryManager* m = &s->memory_manager_;
152 const size_t max_table_size = MaxHashTableSize(quality);
153 size_t htsize = HashTableSize(max_table_size, input_size);
154 int* table;
155 BROTLI_DCHECK(max_table_size >= 256);
156 if (quality == FAST_ONE_PASS_COMPRESSION_QUALITY) {
157 /* Only odd shifts are supported by fast-one-pass. */
158 if ((htsize & 0xAAAAA) == 0) {
159 htsize <<= 1;
160 }
161 }
162
163 if (htsize <= sizeof(s->small_table_) / sizeof(s->small_table_[0])) {
164 table = s->small_table_;
165 } else {
166 if (htsize > s->large_table_size_) {
167 s->large_table_size_ = htsize;
168 BROTLI_FREE(m, s->large_table_);
169 s->large_table_ = BROTLI_ALLOC(m, int, htsize);
170 if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(s->large_table_)) return 0;
171 }
172 table = s->large_table_;
173 }
174
175 *table_size = htsize;
176 memset(table, 0, htsize * sizeof(*table));
177 return table;
178 }
179
180 static void EncodeWindowBits(int lgwin, BROTLI_BOOL large_window,
181 uint16_t* last_bytes, uint8_t* last_bytes_bits) {
182 if (large_window) {
183 *last_bytes = (uint16_t)(((lgwin & 0x3F) << 8) | 0x11);
184 *last_bytes_bits = 14;
185 } else {
186 if (lgwin == 16) {
187 *last_bytes = 0;
188 *last_bytes_bits = 1;
189 } else if (lgwin == 17) {
190 *last_bytes = 1;
191 *last_bytes_bits = 7;
192 } else if (lgwin > 17) {
193 *last_bytes = (uint16_t)(((lgwin - 17) << 1) | 0x01);
194 *last_bytes_bits = 4;
195 } else {
196 *last_bytes = (uint16_t)(((lgwin - 8) << 4) | 0x01);
197 *last_bytes_bits = 7;
198 }
199 }
200 }
201
202 /* TODO(eustas): move to compress_fragment.c? */
203 /* Initializes the command and distance prefix codes for the first block. */
204 static void InitCommandPrefixCodes(BrotliOnePassArena* s) {
205 static const uint8_t kDefaultCommandDepths[128] = {
206 0, 4, 4, 5, 6, 6, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8,
207 0, 0, 0, 4, 4, 4, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7,
208 7, 7, 10, 10, 10, 10, 10, 10, 0, 4, 4, 5, 5, 5, 6, 6,
209 7, 8, 8, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
210 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
211 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4,
212 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 7, 7, 7, 8, 10,
213 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
214 };
215 static const uint16_t kDefaultCommandBits[128] = {
216 0, 0, 8, 9, 3, 35, 7, 71,
217 39, 103, 23, 47, 175, 111, 239, 31,
218 0, 0, 0, 4, 12, 2, 10, 6,
219 13, 29, 11, 43, 27, 59, 87, 55,
220 15, 79, 319, 831, 191, 703, 447, 959,
221 0, 14, 1, 25, 5, 21, 19, 51,
222 119, 159, 95, 223, 479, 991, 63, 575,
223 127, 639, 383, 895, 255, 767, 511, 1023,
224 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
225 27, 59, 7, 39, 23, 55, 30, 1, 17, 9, 25, 5, 0, 8, 4, 12,
226 2, 10, 6, 21, 13, 29, 3, 19, 11, 15, 47, 31, 95, 63, 127, 255,
227 767, 2815, 1791, 3839, 511, 2559, 1535, 3583, 1023, 3071, 2047, 4095,
228 };
229 static const uint8_t kDefaultCommandCode[] = {
230 0xff, 0x77, 0xd5, 0xbf, 0xe7, 0xde, 0xea, 0x9e, 0x51, 0x5d, 0xde, 0xc6,
231 0x70, 0x57, 0xbc, 0x58, 0x58, 0x58, 0xd8, 0xd8, 0x58, 0xd5, 0xcb, 0x8c,
232 0xea, 0xe0, 0xc3, 0x87, 0x1f, 0x83, 0xc1, 0x60, 0x1c, 0x67, 0xb2, 0xaa,
233 0x06, 0x83, 0xc1, 0x60, 0x30, 0x18, 0xcc, 0xa1, 0xce, 0x88, 0x54, 0x94,
234 0x46, 0xe1, 0xb0, 0xd0, 0x4e, 0xb2, 0xf7, 0x04, 0x00,
235 };
236 static const size_t kDefaultCommandCodeNumBits = 448;
237 COPY_ARRAY(s->cmd_depth, kDefaultCommandDepths);
238 COPY_ARRAY(s->cmd_bits, kDefaultCommandBits);
239
240 /* Initialize the pre-compressed form of the command and distance prefix
241 codes. */
242 COPY_ARRAY(s->cmd_code, kDefaultCommandCode);
243 s->cmd_code_numbits = kDefaultCommandCodeNumBits;
244 }
245
246 /* Decide about the context map based on the ability of the prediction
247 ability of the previous byte UTF8-prefix on the next byte. The
248 prediction ability is calculated as Shannon entropy. Here we need
249 Shannon entropy instead of 'BitsEntropy' since the prefix will be
250 encoded with the remaining 6 bits of the following byte, and
251 BitsEntropy will assume that symbol to be stored alone using Huffman
252 coding. */
253 static void ChooseContextMap(int quality,
254 uint32_t* bigram_histo,
255 size_t* num_literal_contexts,
256 const uint32_t** literal_context_map) {
257 static const uint32_t kStaticContextMapContinuation[64] = {
258 1, 1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
259 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
260 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
261 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
262 };
263 static const uint32_t kStaticContextMapSimpleUTF8[64] = {
264 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
265 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
266 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
267 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
268 };
269
270 uint32_t monogram_histo[3] = { 0 };
271 uint32_t two_prefix_histo[6] = { 0 };
272 size_t total;
273 size_t i;
274 size_t sink;
275 double entropy[4];
276 for (i = 0; i < 9; ++i) {
277 monogram_histo[i % 3] += bigram_histo[i];
278 two_prefix_histo[i % 6] += bigram_histo[i];
279 }
280 entropy[1] = ShannonEntropy(monogram_histo, 3, &sink);
281 entropy[2] = (ShannonEntropy(two_prefix_histo, 3, &sink) +
282 ShannonEntropy(two_prefix_histo + 3, 3, &sink));
283 entropy[3] = 0;
284 for (i = 0; i < 3; ++i) {
285 entropy[3] += ShannonEntropy(bigram_histo + 3 * i, 3, &sink);
286 }
287
288 total = monogram_histo[0] + monogram_histo[1] + monogram_histo[2];
289 BROTLI_DCHECK(total != 0);
290 entropy[0] = 1.0 / (double)total;
291 entropy[1] *= entropy[0];
292 entropy[2] *= entropy[0];
293 entropy[3] *= entropy[0];
294
295 if (quality < MIN_QUALITY_FOR_HQ_CONTEXT_MODELING) {
296 /* 3 context models is a bit slower, don't use it at lower qualities. */
297 entropy[3] = entropy[1] * 10;
298 }
299 /* If expected savings by symbol are less than 0.2 bits, skip the
300 context modeling -- in exchange for faster decoding speed. */
301 if (entropy[1] - entropy[2] < 0.2 &&
302 entropy[1] - entropy[3] < 0.2) {
303 *num_literal_contexts = 1;
304 } else if (entropy[2] - entropy[3] < 0.02) {
305 *num_literal_contexts = 2;
306 *literal_context_map = kStaticContextMapSimpleUTF8;
307 } else {
308 *num_literal_contexts = 3;
309 *literal_context_map = kStaticContextMapContinuation;
310 }
311 }
312
313 /* Decide if we want to use a more complex static context map containing 13
314 context values, based on the entropy reduction of histograms over the
315 first 5 bits of literals. */
316 static BROTLI_BOOL ShouldUseComplexStaticContextMap(const uint8_t* input,
317 size_t start_pos, size_t length, size_t mask, int quality, size_t size_hint,
318 size_t* num_literal_contexts, const uint32_t** literal_context_map,
319 uint32_t* arena) {
320 static const uint32_t kStaticContextMapComplexUTF8[64] = {
321 11, 11, 12, 12, /* 0 special */
322 0, 0, 0, 0, /* 4 lf */
323 1, 1, 9, 9, /* 8 space */
324 2, 2, 2, 2, /* !, first after space/lf and after something else. */
325 1, 1, 1, 1, /* " */
326 8, 3, 3, 3, /* % */
327 1, 1, 1, 1, /* ({[ */
328 2, 2, 2, 2, /* }]) */
329 8, 4, 4, 4, /* :; */
330 8, 7, 4, 4, /* . */
331 8, 0, 0, 0, /* > */
332 3, 3, 3, 3, /* [0..9] */
333 5, 5, 10, 5, /* [A-Z] */
334 5, 5, 10, 5,
335 6, 6, 6, 6, /* [a-z] */
336 6, 6, 6, 6,
337 };
338 BROTLI_UNUSED(quality);
339 /* Try the more complex static context map only for long data. */
340 if (size_hint < (1 << 20)) {
341 return BROTLI_FALSE;
342 } else {
343 const size_t end_pos = start_pos + length;
344 /* To make entropy calculations faster, we collect histograms
345 over the 5 most significant bits of literals. One histogram
346 without context and 13 additional histograms for each context value. */
347 uint32_t* BROTLI_RESTRICT const combined_histo = arena;
348 uint32_t* BROTLI_RESTRICT const context_histo = arena + 32;
349 uint32_t total = 0;
350 double entropy[3];
351 size_t sink;
352 size_t i;
353 ContextLut utf8_lut = BROTLI_CONTEXT_LUT(CONTEXT_UTF8);
354 memset(arena, 0, sizeof(arena[0]) * 32 * (BROTLI_MAX_STATIC_CONTEXTS + 1));
355 for (; start_pos + 64 <= end_pos; start_pos += 4096) {
356 const size_t stride_end_pos = start_pos + 64;
357 uint8_t prev2 = input[start_pos & mask];
358 uint8_t prev1 = input[(start_pos + 1) & mask];
359 size_t pos;
360 /* To make the analysis of the data faster we only examine 64 byte long
361 strides at every 4kB intervals. */
362 for (pos = start_pos + 2; pos < stride_end_pos; ++pos) {
363 const uint8_t literal = input[pos & mask];
364 const uint8_t context = (uint8_t)kStaticContextMapComplexUTF8[
365 BROTLI_CONTEXT(prev1, prev2, utf8_lut)];
366 ++total;
367 ++combined_histo[literal >> 3];
368 ++context_histo[(context << 5) + (literal >> 3)];
369 prev2 = prev1;
370 prev1 = literal;
371 }
372 }
373 entropy[1] = ShannonEntropy(combined_histo, 32, &sink);
374 entropy[2] = 0;
375 for (i = 0; i < BROTLI_MAX_STATIC_CONTEXTS; ++i) {
376 entropy[2] += ShannonEntropy(context_histo + (i << 5), 32, &sink);
377 }
378 entropy[0] = 1.0 / (double)total;
379 entropy[1] *= entropy[0];
380 entropy[2] *= entropy[0];
381 /* The triggering heuristics below were tuned by compressing the individual
382 files of the silesia corpus. If we skip this kind of context modeling
383 for not very well compressible input (i.e. entropy using context modeling
384 is 60% of maximal entropy) or if expected savings by symbol are less
385 than 0.2 bits, then in every case when it triggers, the final compression
386 ratio is improved. Note however that this heuristics might be too strict
387 for some cases and could be tuned further. */
388 if (entropy[2] > 3.0 || entropy[1] - entropy[2] < 0.2) {
389 return BROTLI_FALSE;
390 } else {
391 *num_literal_contexts = BROTLI_MAX_STATIC_CONTEXTS;
392 *literal_context_map = kStaticContextMapComplexUTF8;
393 return BROTLI_TRUE;
394 }
395 }
396 }
397
398 static void DecideOverLiteralContextModeling(const uint8_t* input,
399 size_t start_pos, size_t length, size_t mask, int quality, size_t size_hint,
400 size_t* num_literal_contexts, const uint32_t** literal_context_map,
401 uint32_t* arena) {
402 if (quality < MIN_QUALITY_FOR_CONTEXT_MODELING || length < 64) {
403 return;
404 } else if (ShouldUseComplexStaticContextMap(
405 input, start_pos, length, mask, quality, size_hint,
406 num_literal_contexts, literal_context_map, arena)) {
407 /* Context map was already set, nothing else to do. */
408 } else {
409 /* Gather bi-gram data of the UTF8 byte prefixes. To make the analysis of
410 UTF8 data faster we only examine 64 byte long strides at every 4kB
411 intervals. */
412 const size_t end_pos = start_pos + length;
413 uint32_t* BROTLI_RESTRICT const bigram_prefix_histo = arena;
414 memset(bigram_prefix_histo, 0, sizeof(arena[0]) * 9);
415 for (; start_pos + 64 <= end_pos; start_pos += 4096) {
416 static const int lut[4] = { 0, 0, 1, 2 };
417 const size_t stride_end_pos = start_pos + 64;
418 int prev = lut[input[start_pos & mask] >> 6] * 3;
419 size_t pos;
420 for (pos = start_pos + 1; pos < stride_end_pos; ++pos) {
421 const uint8_t literal = input[pos & mask];
422 ++bigram_prefix_histo[prev + lut[literal >> 6]];
423 prev = lut[literal >> 6] * 3;
424 }
425 }
426 ChooseContextMap(quality, &bigram_prefix_histo[0], num_literal_contexts,
427 literal_context_map);
428 }
429 }
430
431 static BROTLI_BOOL ShouldCompress(
432 const uint8_t* data, const size_t mask, const uint64_t last_flush_pos,
433 const size_t bytes, const size_t num_literals, const size_t num_commands) {
434 /* TODO(eustas): find more precise minimal block overhead. */
435 if (bytes <= 2) return BROTLI_FALSE;
436 if (num_commands < (bytes >> 8) + 2) {
437 if ((double)num_literals > 0.99 * (double)bytes) {
438 uint32_t literal_histo[256] = { 0 };
439 static const uint32_t kSampleRate = 13;
440 static const double kInvSampleRate = 1.0 / 13.0;
441 static const double kMinEntropy = 7.92;
442 const double bit_cost_threshold =
443 (double)bytes * kMinEntropy * kInvSampleRate;
444 size_t t = (bytes + kSampleRate - 1) / kSampleRate;
445 uint32_t pos = (uint32_t)last_flush_pos;
446 size_t i;
447 for (i = 0; i < t; i++) {
448 ++literal_histo[data[pos & mask]];
449 pos += kSampleRate;
450 }
451 if (BitsEntropy(literal_histo, 256) > bit_cost_threshold) {
452 return BROTLI_FALSE;
453 }
454 }
455 }
456 return BROTLI_TRUE;
457 }
458
459 /* Chooses the literal context mode for a metablock */
460 static ContextType ChooseContextMode(const BrotliEncoderParams* params,
461 const uint8_t* data, const size_t pos, const size_t mask,
462 const size_t length) {
463 /* We only do the computation for the option of something else than
464 CONTEXT_UTF8 for the highest qualities */
465 if (params->quality >= MIN_QUALITY_FOR_HQ_BLOCK_SPLITTING &&
466 !BrotliIsMostlyUTF8(data, pos, mask, length, kMinUTF8Ratio)) {
467 return CONTEXT_SIGNED;
468 }
469 return CONTEXT_UTF8;
470 }
471
472 static void WriteMetaBlockInternal(MemoryManager* m,
473 const uint8_t* data,
474 const size_t mask,
475 const uint64_t last_flush_pos,
476 const size_t bytes,
477 const BROTLI_BOOL is_last,
478 ContextType literal_context_mode,
479 const BrotliEncoderParams* params,
480 const uint8_t prev_byte,
481 const uint8_t prev_byte2,
482 const size_t num_literals,
483 const size_t num_commands,
484 Command* commands,
485 const int* saved_dist_cache,
486 int* dist_cache,
487 size_t* storage_ix,
488 uint8_t* storage) {
489 const uint32_t wrapped_last_flush_pos = WrapPosition(last_flush_pos);
490 uint16_t last_bytes;
491 uint8_t last_bytes_bits;
492 ContextLut literal_context_lut = BROTLI_CONTEXT_LUT(literal_context_mode);
493 BrotliEncoderParams block_params = *params;
494
495 if (bytes == 0) {
496 /* Write the ISLAST and ISEMPTY bits. */
497 BrotliWriteBits(2, 3, storage_ix, storage);
498 *storage_ix = (*storage_ix + 7u) & ~7u;
499 return;
500 }
501
502 if (!ShouldCompress(data, mask, last_flush_pos, bytes,
503 num_literals, num_commands)) {
504 /* Restore the distance cache, as its last update by
505 CreateBackwardReferences is now unused. */
506 memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0]));
507 BrotliStoreUncompressedMetaBlock(is_last, data,
508 wrapped_last_flush_pos, mask, bytes,
509 storage_ix, storage);
510 return;
511 }
512
513 BROTLI_DCHECK(*storage_ix <= 14);
514 last_bytes = (uint16_t)((storage[1] << 8) | storage[0]);
515 last_bytes_bits = (uint8_t)(*storage_ix);
516 if (params->quality <= MAX_QUALITY_FOR_STATIC_ENTROPY_CODES) {
517 BrotliStoreMetaBlockFast(m, data, wrapped_last_flush_pos,
518 bytes, mask, is_last, params,
519 commands, num_commands,
520 storage_ix, storage);
521 if (BROTLI_IS_OOM(m)) return;
522 } else if (params->quality < MIN_QUALITY_FOR_BLOCK_SPLIT) {
523 BrotliStoreMetaBlockTrivial(m, data, wrapped_last_flush_pos,
524 bytes, mask, is_last, params,
525 commands, num_commands,
526 storage_ix, storage);
527 if (BROTLI_IS_OOM(m)) return;
528 } else {
529 MetaBlockSplit mb;
530 InitMetaBlockSplit(&mb);
531 if (params->quality < MIN_QUALITY_FOR_HQ_BLOCK_SPLITTING) {
532 size_t num_literal_contexts = 1;
533 const uint32_t* literal_context_map = NULL;
534 if (!params->disable_literal_context_modeling) {
535 /* TODO(eustas): pull to higher level and reuse. */
536 uint32_t* arena =
537 BROTLI_ALLOC(m, uint32_t, 32 * (BROTLI_MAX_STATIC_CONTEXTS + 1));
538 if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(arena)) return;
539 DecideOverLiteralContextModeling(
540 data, wrapped_last_flush_pos, bytes, mask, params->quality,
541 params->size_hint, &num_literal_contexts,
542 &literal_context_map, arena);
543 BROTLI_FREE(m, arena);
544 }
545 BrotliBuildMetaBlockGreedy(m, data, wrapped_last_flush_pos, mask,
546 prev_byte, prev_byte2, literal_context_lut, num_literal_contexts,
547 literal_context_map, commands, num_commands, &mb);
548 if (BROTLI_IS_OOM(m)) return;
549 } else {
550 BrotliBuildMetaBlock(m, data, wrapped_last_flush_pos, mask, &block_params,
551 prev_byte, prev_byte2,
552 commands, num_commands,
553 literal_context_mode,
554 &mb);
555 if (BROTLI_IS_OOM(m)) return;
556 }
557 if (params->quality >= MIN_QUALITY_FOR_OPTIMIZE_HISTOGRAMS) {
558 /* The number of distance symbols effectively used for distance
559 histograms. It might be less than distance alphabet size
560 for "Large Window Brotli" (32-bit). */
561 BrotliOptimizeHistograms(block_params.dist.alphabet_size_limit, &mb);
562 }
563 BrotliStoreMetaBlock(m, data, wrapped_last_flush_pos, bytes, mask,
564 prev_byte, prev_byte2,
565 is_last,
566 &block_params,
567 literal_context_mode,
568 commands, num_commands,
569 &mb,
570 storage_ix, storage);
571 if (BROTLI_IS_OOM(m)) return;
572 DestroyMetaBlockSplit(m, &mb);
573 }
574 if (bytes + 4 < (*storage_ix >> 3)) {
575 /* Restore the distance cache and last byte. */
576 memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0]));
577 storage[0] = (uint8_t)last_bytes;
578 storage[1] = (uint8_t)(last_bytes >> 8);
579 *storage_ix = last_bytes_bits;
580 BrotliStoreUncompressedMetaBlock(is_last, data,
581 wrapped_last_flush_pos, mask,
582 bytes, storage_ix, storage);
583 }
584 }
585
586 static void ChooseDistanceParams(BrotliEncoderParams* params) {
587 uint32_t distance_postfix_bits = 0;
588 uint32_t num_direct_distance_codes = 0;
589
590 if (params->quality >= MIN_QUALITY_FOR_NONZERO_DISTANCE_PARAMS) {
591 uint32_t ndirect_msb;
592 if (params->mode == BROTLI_MODE_FONT) {
593 distance_postfix_bits = 1;
594 num_direct_distance_codes = 12;
595 } else {
596 distance_postfix_bits = params->dist.distance_postfix_bits;
597 num_direct_distance_codes = params->dist.num_direct_distance_codes;
598 }
599 ndirect_msb = (num_direct_distance_codes >> distance_postfix_bits) & 0x0F;
600 if (distance_postfix_bits > BROTLI_MAX_NPOSTFIX ||
601 num_direct_distance_codes > BROTLI_MAX_NDIRECT ||
602 (ndirect_msb << distance_postfix_bits) != num_direct_distance_codes) {
603 distance_postfix_bits = 0;
604 num_direct_distance_codes = 0;
605 }
606 }
607
608 BrotliInitDistanceParams(&params->dist, distance_postfix_bits,
609 num_direct_distance_codes, params->large_window);
610 }
611
612 static BROTLI_BOOL EnsureInitialized(BrotliEncoderState* s) {
613 MemoryManager* m = &s->memory_manager_;
614 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
615 if (s->is_initialized_) return BROTLI_TRUE;
616
617 s->last_bytes_bits_ = 0;
618 s->last_bytes_ = 0;
619 s->flint_ = BROTLI_FLINT_DONE;
620 s->remaining_metadata_bytes_ = BROTLI_UINT32_MAX;
621
622 SanitizeParams(&s->params);
623 s->params.lgblock = ComputeLgBlock(&s->params);
624 ChooseDistanceParams(&s->params);
625
626 if (s->params.stream_offset != 0) {
627 s->flint_ = BROTLI_FLINT_NEEDS_2_BYTES;
628 /* Poison the distance cache. -16 +- 3 is still less than zero (invalid). */
629 s->dist_cache_[0] = -16;
630 s->dist_cache_[1] = -16;
631 s->dist_cache_[2] = -16;
632 s->dist_cache_[3] = -16;
633 memcpy(s->saved_dist_cache_, s->dist_cache_, sizeof(s->saved_dist_cache_));
634 }
635
636 RingBufferSetup(&s->params, &s->ringbuffer_);
637
638 /* Initialize last byte with stream header. */
639 {
640 int lgwin = s->params.lgwin;
641 if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY ||
642 s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) {
643 lgwin = BROTLI_MAX(int, lgwin, 18);
644 }
645 if (s->params.stream_offset == 0) {
646 EncodeWindowBits(lgwin, s->params.large_window,
647 &s->last_bytes_, &s->last_bytes_bits_);
648 } else {
649 /* Bigger values have the same effect, but could cause overflows. */
650 s->params.stream_offset = BROTLI_MIN(size_t,
651 s->params.stream_offset, BROTLI_MAX_BACKWARD_LIMIT(lgwin));
652 }
653 }
654
655 if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY) {
656 s->one_pass_arena_ = BROTLI_ALLOC(m, BrotliOnePassArena, 1);
657 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
658 InitCommandPrefixCodes(s->one_pass_arena_);
659 } else if (s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) {
660 s->two_pass_arena_ = BROTLI_ALLOC(m, BrotliTwoPassArena, 1);
661 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
662 }
663
664 s->is_initialized_ = BROTLI_TRUE;
665 return BROTLI_TRUE;
666 }
667
668 static void BrotliEncoderInitParams(BrotliEncoderParams* params) {
669 params->mode = BROTLI_DEFAULT_MODE;
670 params->large_window = BROTLI_FALSE;
671 params->quality = BROTLI_DEFAULT_QUALITY;
672 params->lgwin = BROTLI_DEFAULT_WINDOW;
673 params->lgblock = 0;
674 params->stream_offset = 0;
675 params->size_hint = 0;
676 params->disable_literal_context_modeling = BROTLI_FALSE;
677 BrotliInitSharedEncoderDictionary(&params->dictionary);
678 params->dist.distance_postfix_bits = 0;
679 params->dist.num_direct_distance_codes = 0;
680 params->dist.alphabet_size_max =
681 BROTLI_DISTANCE_ALPHABET_SIZE(0, 0, BROTLI_MAX_DISTANCE_BITS);
682 params->dist.alphabet_size_limit = params->dist.alphabet_size_max;
683 params->dist.max_distance = BROTLI_MAX_DISTANCE;
684 }
685
686 static void BrotliEncoderCleanupParams(MemoryManager* m,
687 BrotliEncoderParams* params) {
688 BrotliCleanupSharedEncoderDictionary(m, &params->dictionary);
689 }
690
691 #ifdef BROTLI_REPORTING
692 /* When BROTLI_REPORTING is defined extra reporting module have to be linked. */
693 void BrotliEncoderOnStart(const BrotliEncoderState* s);
694 void BrotliEncoderOnFinish(const BrotliEncoderState* s);
695 #define BROTLI_ENCODER_ON_START(s) BrotliEncoderOnStart(s);
696 #define BROTLI_ENCODER_ON_FINISH(s) BrotliEncoderOnFinish(s);
697 #else
698 #if !defined(BROTLI_ENCODER_ON_START)
699 #define BROTLI_ENCODER_ON_START(s) (void)(s);
700 #endif
701 #if !defined(BROTLI_ENCODER_ON_FINISH)
702 #define BROTLI_ENCODER_ON_FINISH(s) (void)(s);
703 #endif
704 #endif
705
706 static void BrotliEncoderInitState(BrotliEncoderState* s) {
707 BROTLI_ENCODER_ON_START(s);
708 BrotliEncoderInitParams(&s->params);
709 s->input_pos_ = 0;
710 s->num_commands_ = 0;
711 s->num_literals_ = 0;
712 s->last_insert_len_ = 0;
713 s->last_flush_pos_ = 0;
714 s->last_processed_pos_ = 0;
715 s->prev_byte_ = 0;
716 s->prev_byte2_ = 0;
717 s->storage_size_ = 0;
718 s->storage_ = 0;
719 HasherInit(&s->hasher_);
720 s->large_table_ = NULL;
721 s->large_table_size_ = 0;
722 s->one_pass_arena_ = NULL;
723 s->two_pass_arena_ = NULL;
724 s->command_buf_ = NULL;
725 s->literal_buf_ = NULL;
726 s->total_in_ = 0;
727 s->next_out_ = NULL;
728 s->available_out_ = 0;
729 s->total_out_ = 0;
730 s->stream_state_ = BROTLI_STREAM_PROCESSING;
731 s->is_last_block_emitted_ = BROTLI_FALSE;
732 s->is_initialized_ = BROTLI_FALSE;
733
734 RingBufferInit(&s->ringbuffer_);
735
736 s->commands_ = 0;
737 s->cmd_alloc_size_ = 0;
738
739 /* Initialize distance cache. */
740 s->dist_cache_[0] = 4;
741 s->dist_cache_[1] = 11;
742 s->dist_cache_[2] = 15;
743 s->dist_cache_[3] = 16;
744 /* Save the state of the distance cache in case we need to restore it for
745 emitting an uncompressed block. */
746 memcpy(s->saved_dist_cache_, s->dist_cache_, sizeof(s->saved_dist_cache_));
747 }
748
749 BrotliEncoderState* BrotliEncoderCreateInstance(
750 brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque) {
751 BrotliEncoderState* state = (BrotliEncoderState*)BrotliBootstrapAlloc(
752 sizeof(BrotliEncoderState), alloc_func, free_func, opaque);
753 if (state == NULL) {
754 /* BROTLI_DUMP(); */
755 return 0;
756 }
757 BrotliInitMemoryManager(
758 &state->memory_manager_, alloc_func, free_func, opaque);
759 BrotliEncoderInitState(state);
760 return state;
761 }
762
763 static void BrotliEncoderCleanupState(BrotliEncoderState* s) {
764 MemoryManager* m = &s->memory_manager_;
765
766 BROTLI_ENCODER_ON_FINISH(s);
767
768 if (BROTLI_IS_OOM(m)) {
769 BrotliWipeOutMemoryManager(m);
770 return;
771 }
772
773 BROTLI_FREE(m, s->storage_);
774 BROTLI_FREE(m, s->commands_);
775 RingBufferFree(m, &s->ringbuffer_);
776 DestroyHasher(m, &s->hasher_);
777 BROTLI_FREE(m, s->large_table_);
778 BROTLI_FREE(m, s->one_pass_arena_);
779 BROTLI_FREE(m, s->two_pass_arena_);
780 BROTLI_FREE(m, s->command_buf_);
781 BROTLI_FREE(m, s->literal_buf_);
782 BrotliEncoderCleanupParams(m, &s->params);
783 }
784
785 /* Deinitializes and frees BrotliEncoderState instance. */
786 void BrotliEncoderDestroyInstance(BrotliEncoderState* state) {
787 if (!state) {
788 return;
789 } else {
790 BrotliEncoderCleanupState(state);
791 BrotliBootstrapFree(state, &state->memory_manager_);
792 }
793 }
794
795 /*
796 Copies the given input data to the internal ring buffer of the compressor.
797 No processing of the data occurs at this time and this function can be
798 called multiple times before calling WriteBrotliData() to process the
799 accumulated input. At most input_block_size() bytes of input data can be
800 copied to the ring buffer, otherwise the next WriteBrotliData() will fail.
801 */
802 static void CopyInputToRingBuffer(BrotliEncoderState* s,
803 const size_t input_size,
804 const uint8_t* input_buffer) {
805 RingBuffer* ringbuffer_ = &s->ringbuffer_;
806 MemoryManager* m = &s->memory_manager_;
807 RingBufferWrite(m, input_buffer, input_size, ringbuffer_);
808 if (BROTLI_IS_OOM(m)) return;
809 s->input_pos_ += input_size;
810
811 /* TL;DR: If needed, initialize 7 more bytes in the ring buffer to make the
812 hashing not depend on uninitialized data. This makes compression
813 deterministic and it prevents uninitialized memory warnings in Valgrind.
814 Even without erasing, the output would be valid (but nondeterministic).
815
816 Background information: The compressor stores short (at most 8 bytes)
817 substrings of the input already read in a hash table, and detects
818 repetitions by looking up such substrings in the hash table. If it
819 can find a substring, it checks whether the substring is really there
820 in the ring buffer (or it's just a hash collision). Should the hash
821 table become corrupt, this check makes sure that the output is
822 still valid, albeit the compression ratio would be bad.
823
824 The compressor populates the hash table from the ring buffer as it's
825 reading new bytes from the input. However, at the last few indexes of
826 the ring buffer, there are not enough bytes to build full-length
827 substrings from. Since the hash table always contains full-length
828 substrings, we overwrite with zeros here to make sure that those
829 substrings will contain zeros at the end instead of uninitialized
830 data.
831
832 Please note that erasing is not necessary (because the
833 memory region is already initialized since he ring buffer
834 has a `tail' that holds a copy of the beginning,) so we
835 skip erasing if we have already gone around at least once in
836 the ring buffer.
837
838 Only clear during the first round of ring-buffer writes. On
839 subsequent rounds data in the ring-buffer would be affected. */
840 if (ringbuffer_->pos_ <= ringbuffer_->mask_) {
841 /* This is the first time when the ring buffer is being written.
842 We clear 7 bytes just after the bytes that have been copied from
843 the input buffer.
844
845 The ring-buffer has a "tail" that holds a copy of the beginning,
846 but only once the ring buffer has been fully written once, i.e.,
847 pos <= mask. For the first time, we need to write values
848 in this tail (where index may be larger than mask), so that
849 we have exactly defined behavior and don't read uninitialized
850 memory. Due to performance reasons, hashing reads data using a
851 LOAD64, which can go 7 bytes beyond the bytes written in the
852 ring-buffer. */
853 memset(ringbuffer_->buffer_ + ringbuffer_->pos_, 0, 7);
854 }
855 }
856
857 /* Marks all input as processed.
858 Returns true if position wrapping occurs. */
859 static BROTLI_BOOL UpdateLastProcessedPos(BrotliEncoderState* s) {
860 uint32_t wrapped_last_processed_pos = WrapPosition(s->last_processed_pos_);
861 uint32_t wrapped_input_pos = WrapPosition(s->input_pos_);
862 s->last_processed_pos_ = s->input_pos_;
863 return TO_BROTLI_BOOL(wrapped_input_pos < wrapped_last_processed_pos);
864 }
865
866 static void ExtendLastCommand(BrotliEncoderState* s, uint32_t* bytes,
867 uint32_t* wrapped_last_processed_pos) {
868 Command* last_command = &s->commands_[s->num_commands_ - 1];
869 const uint8_t* data = s->ringbuffer_.buffer_;
870 const uint32_t mask = s->ringbuffer_.mask_;
871 uint64_t max_backward_distance =
872 (((uint64_t)1) << s->params.lgwin) - BROTLI_WINDOW_GAP;
873 uint64_t last_copy_len = last_command->copy_len_ & 0x1FFFFFF;
874 uint64_t last_processed_pos = s->last_processed_pos_ - last_copy_len;
875 uint64_t max_distance = last_processed_pos < max_backward_distance ?
876 last_processed_pos : max_backward_distance;
877 uint64_t cmd_dist = (uint64_t)s->dist_cache_[0];
878 uint32_t distance_code = CommandRestoreDistanceCode(last_command,
879 &s->params.dist);
880 const CompoundDictionary* dict = &s->params.dictionary.compound;
881 size_t compound_dictionary_size = dict->total_size;
882 if (distance_code < BROTLI_NUM_DISTANCE_SHORT_CODES ||
883 distance_code - (BROTLI_NUM_DISTANCE_SHORT_CODES - 1) == cmd_dist) {
884 if (cmd_dist <= max_distance) {
885 while (*bytes != 0 && data[*wrapped_last_processed_pos & mask] ==
886 data[(*wrapped_last_processed_pos - cmd_dist) & mask]) {
887 last_command->copy_len_++;
888 (*bytes)--;
889 (*wrapped_last_processed_pos)++;
890 }
891 } else {
892 if ((cmd_dist - max_distance - 1) < compound_dictionary_size &&
893 last_copy_len < cmd_dist - max_distance) {
894 size_t address =
895 compound_dictionary_size - (size_t)(cmd_dist - max_distance) +
896 (size_t)last_copy_len;
897 size_t br_index = 0;
898 size_t br_offset;
899 const uint8_t* chunk;
900 size_t chunk_length;
901 while (address >= dict->chunk_offsets[br_index + 1]) br_index++;
902 br_offset = address - dict->chunk_offsets[br_index];
903 chunk = dict->chunk_source[br_index];
904 chunk_length =
905 dict->chunk_offsets[br_index + 1] - dict->chunk_offsets[br_index];
906 while (*bytes != 0 && data[*wrapped_last_processed_pos & mask] ==
907 chunk[br_offset]) {
908 last_command->copy_len_++;
909 (*bytes)--;
910 (*wrapped_last_processed_pos)++;
911 if (++br_offset == chunk_length) {
912 br_index++;
913 br_offset = 0;
914 if (br_index != dict->num_chunks) {
915 chunk = dict->chunk_source[br_index];
916 chunk_length = dict->chunk_offsets[br_index + 1] -
917 dict->chunk_offsets[br_index];
918 } else {
919 break;
920 }
921 }
922 }
923 }
924 }
925 /* The copy length is at most the metablock size, and thus expressible. */
926 GetLengthCode(last_command->insert_len_,
927 (size_t)((int)(last_command->copy_len_ & 0x1FFFFFF) +
928 (int)(last_command->copy_len_ >> 25)),
929 TO_BROTLI_BOOL((last_command->dist_prefix_ & 0x3FF) == 0),
930 &last_command->cmd_prefix_);
931 }
932 }
933
934 /*
935 Processes the accumulated input data and sets |*out_size| to the length of
936 the new output meta-block, or to zero if no new output meta-block has been
937 created (in this case the processed input data is buffered internally).
938 If |*out_size| is positive, |*output| points to the start of the output
939 data. If |is_last| or |force_flush| is BROTLI_TRUE, an output meta-block is
940 always created. However, until |is_last| is BROTLI_TRUE encoder may retain up
941 to 7 bits of the last byte of output. Byte-alignment could be enforced by
942 emitting an empty meta-data block.
943 Returns BROTLI_FALSE if the size of the input data is larger than
944 input_block_size().
945 */
946 static BROTLI_BOOL EncodeData(
947 BrotliEncoderState* s, const BROTLI_BOOL is_last,
948 const BROTLI_BOOL force_flush, size_t* out_size, uint8_t** output) {
949 const uint64_t delta = UnprocessedInputSize(s);
950 uint32_t bytes = (uint32_t)delta;
951 uint32_t wrapped_last_processed_pos = WrapPosition(s->last_processed_pos_);
952 uint8_t* data;
953 uint32_t mask;
954 MemoryManager* m = &s->memory_manager_;
955 ContextType literal_context_mode;
956 ContextLut literal_context_lut;
957 BROTLI_BOOL fast_compress =
958 s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY ||
959 s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY;
960
961 data = s->ringbuffer_.buffer_;
962 mask = s->ringbuffer_.mask_;
963
964 if (delta == 0) { /* No new input; still might want to flush or finish. */
965 if (!data) { /* No input has been processed so far. */
966 if (is_last) { /* Emit complete finalized stream. */
967 BROTLI_DCHECK(s->last_bytes_bits_ <= 14);
968 s->last_bytes_ |= (uint16_t)(3u << s->last_bytes_bits_);
969 s->last_bytes_bits_ = (uint8_t)(s->last_bytes_bits_ + 2u);
970 s->tiny_buf_.u8[0] = (uint8_t)s->last_bytes_;
971 s->tiny_buf_.u8[1] = (uint8_t)(s->last_bytes_ >> 8);
972 *output = s->tiny_buf_.u8;
973 *out_size = (s->last_bytes_bits_ + 7u) >> 3u;
974 return BROTLI_TRUE;
975 } else { /* No data, not last -> no-op. */
976 *out_size = 0;
977 return BROTLI_TRUE;
978 }
979 } else {
980 /* Fast compress performs flush every block -> flush is no-op. */
981 if (!is_last && (!force_flush || fast_compress)) { /* Another no-op. */
982 *out_size = 0;
983 return BROTLI_TRUE;
984 }
985 }
986 }
987 BROTLI_DCHECK(data);
988
989 if (s->params.quality > s->params.dictionary.max_quality) return BROTLI_FALSE;
990 /* Adding more blocks after "last" block is forbidden. */
991 if (s->is_last_block_emitted_) return BROTLI_FALSE;
992 if (is_last) s->is_last_block_emitted_ = BROTLI_TRUE;
993
994 if (delta > InputBlockSize(s)) {
995 return BROTLI_FALSE;
996 }
997 if (s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY &&
998 !s->command_buf_) {
999 s->command_buf_ =
1000 BROTLI_ALLOC(m, uint32_t, kCompressFragmentTwoPassBlockSize);
1001 s->literal_buf_ =
1002 BROTLI_ALLOC(m, uint8_t, kCompressFragmentTwoPassBlockSize);
1003 if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(s->command_buf_) ||
1004 BROTLI_IS_NULL(s->literal_buf_)) {
1005 return BROTLI_FALSE;
1006 }
1007 }
1008
1009 if (fast_compress) {
1010 uint8_t* storage;
1011 size_t storage_ix = s->last_bytes_bits_;
1012 size_t table_size;
1013 int* table;
1014
1015 storage = GetBrotliStorage(s, 2 * bytes + 503);
1016 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1017 storage[0] = (uint8_t)s->last_bytes_;
1018 storage[1] = (uint8_t)(s->last_bytes_ >> 8);
1019 table = GetHashTable(s, s->params.quality, bytes, &table_size);
1020 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1021 if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY) {
1022 BrotliCompressFragmentFast(
1023 s->one_pass_arena_, &data[wrapped_last_processed_pos & mask],
1024 bytes, is_last,
1025 table, table_size,
1026 &storage_ix, storage);
1027 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1028 } else {
1029 BrotliCompressFragmentTwoPass(
1030 s->two_pass_arena_, &data[wrapped_last_processed_pos & mask],
1031 bytes, is_last,
1032 s->command_buf_, s->literal_buf_,
1033 table, table_size,
1034 &storage_ix, storage);
1035 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1036 }
1037 s->last_bytes_ = (uint16_t)(storage[storage_ix >> 3]);
1038 s->last_bytes_bits_ = storage_ix & 7u;
1039 UpdateLastProcessedPos(s);
1040 *output = &storage[0];
1041 *out_size = storage_ix >> 3;
1042 return BROTLI_TRUE;
1043 }
1044
1045 {
1046 /* Theoretical max number of commands is 1 per 2 bytes. */
1047 size_t newsize = s->num_commands_ + bytes / 2 + 1;
1048 if (newsize > s->cmd_alloc_size_) {
1049 Command* new_commands;
1050 /* Reserve a bit more memory to allow merging with a next block
1051 without reallocation: that would impact speed. */
1052 newsize += (bytes / 4) + 16;
1053 s->cmd_alloc_size_ = newsize;
1054 new_commands = BROTLI_ALLOC(m, Command, newsize);
1055 if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(new_commands)) return BROTLI_FALSE;
1056 if (s->commands_) {
1057 memcpy(new_commands, s->commands_, sizeof(Command) * s->num_commands_);
1058 BROTLI_FREE(m, s->commands_);
1059 }
1060 s->commands_ = new_commands;
1061 }
1062 }
1063
1064 InitOrStitchToPreviousBlock(m, &s->hasher_, data, mask, &s->params,
1065 wrapped_last_processed_pos, bytes, is_last);
1066
1067 literal_context_mode = ChooseContextMode(
1068 &s->params, data, WrapPosition(s->last_flush_pos_),
1069 mask, (size_t)(s->input_pos_ - s->last_flush_pos_));
1070 literal_context_lut = BROTLI_CONTEXT_LUT(literal_context_mode);
1071
1072 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1073
1074 if (s->num_commands_ && s->last_insert_len_ == 0) {
1075 ExtendLastCommand(s, &bytes, &wrapped_last_processed_pos);
1076 }
1077
1078 if (s->params.quality == ZOPFLIFICATION_QUALITY) {
1079 BROTLI_DCHECK(s->params.hasher.type == 10);
1080 BrotliCreateZopfliBackwardReferences(m, bytes, wrapped_last_processed_pos,
1081 data, mask, literal_context_lut, &s->params,
1082 &s->hasher_, s->dist_cache_,
1083 &s->last_insert_len_, &s->commands_[s->num_commands_],
1084 &s->num_commands_, &s->num_literals_);
1085 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1086 } else if (s->params.quality == HQ_ZOPFLIFICATION_QUALITY) {
1087 BROTLI_DCHECK(s->params.hasher.type == 10);
1088 BrotliCreateHqZopfliBackwardReferences(m, bytes, wrapped_last_processed_pos,
1089 data, mask, literal_context_lut, &s->params,
1090 &s->hasher_, s->dist_cache_,
1091 &s->last_insert_len_, &s->commands_[s->num_commands_],
1092 &s->num_commands_, &s->num_literals_);
1093 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1094 } else {
1095 BrotliCreateBackwardReferences(bytes, wrapped_last_processed_pos,
1096 data, mask, literal_context_lut, &s->params,
1097 &s->hasher_, s->dist_cache_,
1098 &s->last_insert_len_, &s->commands_[s->num_commands_],
1099 &s->num_commands_, &s->num_literals_);
1100 }
1101
1102 {
1103 const size_t max_length = MaxMetablockSize(&s->params);
1104 const size_t max_literals = max_length / 8;
1105 const size_t max_commands = max_length / 8;
1106 const size_t processed_bytes = (size_t)(s->input_pos_ - s->last_flush_pos_);
1107 /* If maximal possible additional block doesn't fit metablock, flush now. */
1108 /* TODO(eustas): Postpone decision until next block arrives? */
1109 const BROTLI_BOOL next_input_fits_metablock = TO_BROTLI_BOOL(
1110 processed_bytes + InputBlockSize(s) <= max_length);
1111 /* If block splitting is not used, then flush as soon as there is some
1112 amount of commands / literals produced. */
1113 const BROTLI_BOOL should_flush = TO_BROTLI_BOOL(
1114 s->params.quality < MIN_QUALITY_FOR_BLOCK_SPLIT &&
1115 s->num_literals_ + s->num_commands_ >= MAX_NUM_DELAYED_SYMBOLS);
1116 if (!is_last && !force_flush && !should_flush &&
1117 next_input_fits_metablock &&
1118 s->num_literals_ < max_literals &&
1119 s->num_commands_ < max_commands) {
1120 /* Merge with next input block. Everything will happen later. */
1121 if (UpdateLastProcessedPos(s)) {
1122 HasherReset(&s->hasher_);
1123 }
1124 *out_size = 0;
1125 return BROTLI_TRUE;
1126 }
1127 }
1128
1129 /* Create the last insert-only command. */
1130 if (s->last_insert_len_ > 0) {
1131 InitInsertCommand(&s->commands_[s->num_commands_++], s->last_insert_len_);
1132 s->num_literals_ += s->last_insert_len_;
1133 s->last_insert_len_ = 0;
1134 }
1135
1136 if (!is_last && s->input_pos_ == s->last_flush_pos_) {
1137 /* We have no new input data and we don't have to finish the stream, so
1138 nothing to do. */
1139 *out_size = 0;
1140 return BROTLI_TRUE;
1141 }
1142 BROTLI_DCHECK(s->input_pos_ >= s->last_flush_pos_);
1143 BROTLI_DCHECK(s->input_pos_ > s->last_flush_pos_ || is_last);
1144 BROTLI_DCHECK(s->input_pos_ - s->last_flush_pos_ <= 1u << 24);
1145 {
1146 const uint32_t metablock_size =
1147 (uint32_t)(s->input_pos_ - s->last_flush_pos_);
1148 uint8_t* storage = GetBrotliStorage(s, 2 * metablock_size + 503);
1149 size_t storage_ix = s->last_bytes_bits_;
1150 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1151 storage[0] = (uint8_t)s->last_bytes_;
1152 storage[1] = (uint8_t)(s->last_bytes_ >> 8);
1153 WriteMetaBlockInternal(
1154 m, data, mask, s->last_flush_pos_, metablock_size, is_last,
1155 literal_context_mode, &s->params, s->prev_byte_, s->prev_byte2_,
1156 s->num_literals_, s->num_commands_, s->commands_, s->saved_dist_cache_,
1157 s->dist_cache_, &storage_ix, storage);
1158 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1159 s->last_bytes_ = (uint16_t)(storage[storage_ix >> 3]);
1160 s->last_bytes_bits_ = storage_ix & 7u;
1161 s->last_flush_pos_ = s->input_pos_;
1162 if (UpdateLastProcessedPos(s)) {
1163 HasherReset(&s->hasher_);
1164 }
1165 if (s->last_flush_pos_ > 0) {
1166 s->prev_byte_ = data[((uint32_t)s->last_flush_pos_ - 1) & mask];
1167 }
1168 if (s->last_flush_pos_ > 1) {
1169 s->prev_byte2_ = data[(uint32_t)(s->last_flush_pos_ - 2) & mask];
1170 }
1171 s->num_commands_ = 0;
1172 s->num_literals_ = 0;
1173 /* Save the state of the distance cache in case we need to restore it for
1174 emitting an uncompressed block. */
1175 memcpy(s->saved_dist_cache_, s->dist_cache_, sizeof(s->saved_dist_cache_));
1176 *output = &storage[0];
1177 *out_size = storage_ix >> 3;
1178 return BROTLI_TRUE;
1179 }
1180 }
1181
1182 /* Dumps remaining output bits and metadata header to |header|.
1183 Returns number of produced bytes.
1184 REQUIRED: |header| should be 8-byte aligned and at least 16 bytes long.
1185 REQUIRED: |block_size| <= (1 << 24). */
1186 static size_t WriteMetadataHeader(
1187 BrotliEncoderState* s, const size_t block_size, uint8_t* header) {
1188 size_t storage_ix;
1189 storage_ix = s->last_bytes_bits_;
1190 header[0] = (uint8_t)s->last_bytes_;
1191 header[1] = (uint8_t)(s->last_bytes_ >> 8);
1192 s->last_bytes_ = 0;
1193 s->last_bytes_bits_ = 0;
1194
1195 BrotliWriteBits(1, 0, &storage_ix, header);
1196 BrotliWriteBits(2, 3, &storage_ix, header);
1197 BrotliWriteBits(1, 0, &storage_ix, header);
1198 if (block_size == 0) {
1199 BrotliWriteBits(2, 0, &storage_ix, header);
1200 } else {
1201 uint32_t nbits = (block_size == 1) ? 1 :
1202 (Log2FloorNonZero((uint32_t)block_size - 1) + 1);
1203 uint32_t nbytes = (nbits + 7) / 8;
1204 BrotliWriteBits(2, nbytes, &storage_ix, header);
1205 BrotliWriteBits(8 * nbytes, block_size - 1, &storage_ix, header);
1206 }
1207 return (storage_ix + 7u) >> 3;
1208 }
1209
1210 size_t BrotliEncoderMaxCompressedSize(size_t input_size) {
1211 /* [window bits / empty metadata] + N * [uncompressed] + [last empty] */
1212 size_t num_large_blocks = input_size >> 14;
1213 size_t overhead = 2 + (4 * num_large_blocks) + 3 + 1;
1214 size_t result = input_size + overhead;
1215 if (input_size == 0) return 2;
1216 return (result < input_size) ? 0 : result;
1217 }
1218
1219 /* Wraps data to uncompressed brotli stream with minimal window size.
1220 |output| should point at region with at least BrotliEncoderMaxCompressedSize
1221 addressable bytes.
1222 Returns the length of stream. */
1223 static size_t MakeUncompressedStream(
1224 const uint8_t* input, size_t input_size, uint8_t* output) {
1225 size_t size = input_size;
1226 size_t result = 0;
1227 size_t offset = 0;
1228 if (input_size == 0) {
1229 output[0] = 6;
1230 return 1;
1231 }
1232 output[result++] = 0x21; /* window bits = 10, is_last = false */
1233 output[result++] = 0x03; /* empty metadata, padding */
1234 while (size > 0) {
1235 uint32_t nibbles = 0;
1236 uint32_t chunk_size;
1237 uint32_t bits;
1238 chunk_size = (size > (1u << 24)) ? (1u << 24) : (uint32_t)size;
1239 if (chunk_size > (1u << 16)) nibbles = (chunk_size > (1u << 20)) ? 2 : 1;
1240 bits =
1241 (nibbles << 1) | ((chunk_size - 1) << 3) | (1u << (19 + 4 * nibbles));
1242 output[result++] = (uint8_t)bits;
1243 output[result++] = (uint8_t)(bits >> 8);
1244 output[result++] = (uint8_t)(bits >> 16);
1245 if (nibbles == 2) output[result++] = (uint8_t)(bits >> 24);
1246 memcpy(&output[result], &input[offset], chunk_size);
1247 result += chunk_size;
1248 offset += chunk_size;
1249 size -= chunk_size;
1250 }
1251 output[result++] = 3;
1252 return result;
1253 }
1254
1255 BROTLI_BOOL BrotliEncoderCompress(
1256 int quality, int lgwin, BrotliEncoderMode mode, size_t input_size,
1257 const uint8_t input_buffer[BROTLI_ARRAY_PARAM(input_size)],
1258 size_t* encoded_size,
1259 uint8_t encoded_buffer[BROTLI_ARRAY_PARAM(*encoded_size)]) {
1260 BrotliEncoderState* s;
1261 size_t out_size = *encoded_size;
1262 const uint8_t* input_start = input_buffer;
1263 uint8_t* output_start = encoded_buffer;
1264 size_t max_out_size = BrotliEncoderMaxCompressedSize(input_size);
1265 if (out_size == 0) {
1266 /* Output buffer needs at least one byte. */
1267 return BROTLI_FALSE;
1268 }
1269 if (input_size == 0) {
1270 /* Handle the special case of empty input. */
1271 *encoded_size = 1;
1272 *encoded_buffer = 6;
1273 return BROTLI_TRUE;
1274 }
1275
1276 s = BrotliEncoderCreateInstance(0, 0, 0);
1277 if (!s) {
1278 return BROTLI_FALSE;
1279 } else {
1280 size_t available_in = input_size;
1281 const uint8_t* next_in = input_buffer;
1282 size_t available_out = *encoded_size;
1283 uint8_t* next_out = encoded_buffer;
1284 size_t total_out = 0;
1285 BROTLI_BOOL result = BROTLI_FALSE;
1286 /* TODO(eustas): check that parameters are sane. */
1287 BrotliEncoderSetParameter(s, BROTLI_PARAM_QUALITY, (uint32_t)quality);
1288 BrotliEncoderSetParameter(s, BROTLI_PARAM_LGWIN, (uint32_t)lgwin);
1289 BrotliEncoderSetParameter(s, BROTLI_PARAM_MODE, (uint32_t)mode);
1290 BrotliEncoderSetParameter(s, BROTLI_PARAM_SIZE_HINT, (uint32_t)input_size);
1291 if (lgwin > BROTLI_MAX_WINDOW_BITS) {
1292 BrotliEncoderSetParameter(s, BROTLI_PARAM_LARGE_WINDOW, BROTLI_TRUE);
1293 }
1294 result = BrotliEncoderCompressStream(s, BROTLI_OPERATION_FINISH,
1295 &available_in, &next_in, &available_out, &next_out, &total_out);
1296 if (!BrotliEncoderIsFinished(s)) result = 0;
1297 *encoded_size = total_out;
1298 BrotliEncoderDestroyInstance(s);
1299 if (!result || (max_out_size && *encoded_size > max_out_size)) {
1300 goto fallback;
1301 }
1302 return BROTLI_TRUE;
1303 }
1304 fallback:
1305 *encoded_size = 0;
1306 if (!max_out_size) return BROTLI_FALSE;
1307 if (out_size >= max_out_size) {
1308 *encoded_size =
1309 MakeUncompressedStream(input_start, input_size, output_start);
1310 return BROTLI_TRUE;
1311 }
1312 return BROTLI_FALSE;
1313 }
1314
1315 static void InjectBytePaddingBlock(BrotliEncoderState* s) {
1316 uint32_t seal = s->last_bytes_;
1317 size_t seal_bits = s->last_bytes_bits_;
1318 uint8_t* destination;
1319 s->last_bytes_ = 0;
1320 s->last_bytes_bits_ = 0;
1321 /* is_last = 0, data_nibbles = 11, reserved = 0, meta_nibbles = 00 */
1322 seal |= 0x6u << seal_bits;
1323 seal_bits += 6;
1324 /* If we have already created storage, then append to it.
1325 Storage is valid until next block is being compressed. */
1326 if (s->next_out_) {
1327 destination = s->next_out_ + s->available_out_;
1328 } else {
1329 destination = s->tiny_buf_.u8;
1330 s->next_out_ = destination;
1331 }
1332 destination[0] = (uint8_t)seal;
1333 if (seal_bits > 8) destination[1] = (uint8_t)(seal >> 8);
1334 if (seal_bits > 16) destination[2] = (uint8_t)(seal >> 16);
1335 s->available_out_ += (seal_bits + 7) >> 3;
1336 }
1337
1338 /* Fills the |total_out|, if it is not NULL. */
1339 static void SetTotalOut(BrotliEncoderState* s, size_t* total_out) {
1340 if (total_out) {
1341 /* Saturating conversion uint64_t -> size_t */
1342 size_t result = (size_t)-1;
1343 if (s->total_out_ < result) {
1344 result = (size_t)s->total_out_;
1345 }
1346 *total_out = result;
1347 }
1348 }
1349
1350 /* Injects padding bits or pushes compressed data to output.
1351 Returns false if nothing is done. */
1352 static BROTLI_BOOL InjectFlushOrPushOutput(BrotliEncoderState* s,
1353 size_t* available_out, uint8_t** next_out, size_t* total_out) {
1354 if (s->stream_state_ == BROTLI_STREAM_FLUSH_REQUESTED &&
1355 s->last_bytes_bits_ != 0) {
1356 InjectBytePaddingBlock(s);
1357 return BROTLI_TRUE;
1358 }
1359
1360 if (s->available_out_ != 0 && *available_out != 0) {
1361 size_t copy_output_size =
1362 BROTLI_MIN(size_t, s->available_out_, *available_out);
1363 memcpy(*next_out, s->next_out_, copy_output_size);
1364 *next_out += copy_output_size;
1365 *available_out -= copy_output_size;
1366 s->next_out_ += copy_output_size;
1367 s->available_out_ -= copy_output_size;
1368 s->total_out_ += copy_output_size;
1369 SetTotalOut(s, total_out);
1370 return BROTLI_TRUE;
1371 }
1372
1373 return BROTLI_FALSE;
1374 }
1375
1376 static void CheckFlushComplete(BrotliEncoderState* s) {
1377 if (s->stream_state_ == BROTLI_STREAM_FLUSH_REQUESTED &&
1378 s->available_out_ == 0) {
1379 s->stream_state_ = BROTLI_STREAM_PROCESSING;
1380 s->next_out_ = 0;
1381 }
1382 }
1383
1384 static BROTLI_BOOL BrotliEncoderCompressStreamFast(
1385 BrotliEncoderState* s, BrotliEncoderOperation op, size_t* available_in,
1386 const uint8_t** next_in, size_t* available_out, uint8_t** next_out,
1387 size_t* total_out) {
1388 const size_t block_size_limit = (size_t)1 << s->params.lgwin;
1389 const size_t buf_size = BROTLI_MIN(size_t, kCompressFragmentTwoPassBlockSize,
1390 BROTLI_MIN(size_t, *available_in, block_size_limit));
1391 uint32_t* tmp_command_buf = NULL;
1392 uint32_t* command_buf = NULL;
1393 uint8_t* tmp_literal_buf = NULL;
1394 uint8_t* literal_buf = NULL;
1395 MemoryManager* m = &s->memory_manager_;
1396 if (s->params.quality != FAST_ONE_PASS_COMPRESSION_QUALITY &&
1397 s->params.quality != FAST_TWO_PASS_COMPRESSION_QUALITY) {
1398 return BROTLI_FALSE;
1399 }
1400 if (s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) {
1401 if (!s->command_buf_ && buf_size == kCompressFragmentTwoPassBlockSize) {
1402 s->command_buf_ =
1403 BROTLI_ALLOC(m, uint32_t, kCompressFragmentTwoPassBlockSize);
1404 s->literal_buf_ =
1405 BROTLI_ALLOC(m, uint8_t, kCompressFragmentTwoPassBlockSize);
1406 if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(s->command_buf_) ||
1407 BROTLI_IS_NULL(s->literal_buf_)) {
1408 return BROTLI_FALSE;
1409 }
1410 }
1411 if (s->command_buf_) {
1412 command_buf = s->command_buf_;
1413 literal_buf = s->literal_buf_;
1414 } else {
1415 tmp_command_buf = BROTLI_ALLOC(m, uint32_t, buf_size);
1416 tmp_literal_buf = BROTLI_ALLOC(m, uint8_t, buf_size);
1417 if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(tmp_command_buf) ||
1418 BROTLI_IS_NULL(tmp_literal_buf)) {
1419 return BROTLI_FALSE;
1420 }
1421 command_buf = tmp_command_buf;
1422 literal_buf = tmp_literal_buf;
1423 }
1424 }
1425
1426 while (BROTLI_TRUE) {
1427 if (InjectFlushOrPushOutput(s, available_out, next_out, total_out)) {
1428 continue;
1429 }
1430
1431 /* Compress block only when internal output buffer is empty, stream is not
1432 finished, there is no pending flush request, and there is either
1433 additional input or pending operation. */
1434 if (s->available_out_ == 0 &&
1435 s->stream_state_ == BROTLI_STREAM_PROCESSING &&
1436 (*available_in != 0 || op != BROTLI_OPERATION_PROCESS)) {
1437 size_t block_size = BROTLI_MIN(size_t, block_size_limit, *available_in);
1438 BROTLI_BOOL is_last =
1439 (*available_in == block_size) && (op == BROTLI_OPERATION_FINISH);
1440 BROTLI_BOOL force_flush =
1441 (*available_in == block_size) && (op == BROTLI_OPERATION_FLUSH);
1442 size_t max_out_size = 2 * block_size + 503;
1443 BROTLI_BOOL inplace = BROTLI_TRUE;
1444 uint8_t* storage = NULL;
1445 size_t storage_ix = s->last_bytes_bits_;
1446 size_t table_size;
1447 int* table;
1448
1449 if (force_flush && block_size == 0) {
1450 s->stream_state_ = BROTLI_STREAM_FLUSH_REQUESTED;
1451 continue;
1452 }
1453 if (max_out_size <= *available_out) {
1454 storage = *next_out;
1455 } else {
1456 inplace = BROTLI_FALSE;
1457 storage = GetBrotliStorage(s, max_out_size);
1458 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1459 }
1460 storage[0] = (uint8_t)s->last_bytes_;
1461 storage[1] = (uint8_t)(s->last_bytes_ >> 8);
1462 table = GetHashTable(s, s->params.quality, block_size, &table_size);
1463 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1464
1465 if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY) {
1466 BrotliCompressFragmentFast(s->one_pass_arena_, *next_in, block_size,
1467 is_last, table, table_size, &storage_ix, storage);
1468 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1469 } else {
1470 BrotliCompressFragmentTwoPass(s->two_pass_arena_, *next_in, block_size,
1471 is_last, command_buf, literal_buf, table, table_size,
1472 &storage_ix, storage);
1473 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1474 }
1475 if (block_size != 0) {
1476 *next_in += block_size;
1477 *available_in -= block_size;
1478 s->total_in_ += block_size;
1479 }
1480 if (inplace) {
1481 size_t out_bytes = storage_ix >> 3;
1482 BROTLI_DCHECK(out_bytes <= *available_out);
1483 BROTLI_DCHECK((storage_ix & 7) == 0 || out_bytes < *available_out);
1484 *next_out += out_bytes;
1485 *available_out -= out_bytes;
1486 s->total_out_ += out_bytes;
1487 SetTotalOut(s, total_out);
1488 } else {
1489 size_t out_bytes = storage_ix >> 3;
1490 s->next_out_ = storage;
1491 s->available_out_ = out_bytes;
1492 }
1493 s->last_bytes_ = (uint16_t)(storage[storage_ix >> 3]);
1494 s->last_bytes_bits_ = storage_ix & 7u;
1495
1496 if (force_flush) s->stream_state_ = BROTLI_STREAM_FLUSH_REQUESTED;
1497 if (is_last) s->stream_state_ = BROTLI_STREAM_FINISHED;
1498 continue;
1499 }
1500 break;
1501 }
1502 BROTLI_FREE(m, tmp_command_buf);
1503 BROTLI_FREE(m, tmp_literal_buf);
1504 CheckFlushComplete(s);
1505 return BROTLI_TRUE;
1506 }
1507
1508 static BROTLI_BOOL ProcessMetadata(
1509 BrotliEncoderState* s, size_t* available_in, const uint8_t** next_in,
1510 size_t* available_out, uint8_t** next_out, size_t* total_out) {
1511 if (*available_in > (1u << 24)) return BROTLI_FALSE;
1512 /* Switch to metadata block workflow, if required. */
1513 if (s->stream_state_ == BROTLI_STREAM_PROCESSING) {
1514 s->remaining_metadata_bytes_ = (uint32_t)*available_in;
1515 s->stream_state_ = BROTLI_STREAM_METADATA_HEAD;
1516 }
1517 if (s->stream_state_ != BROTLI_STREAM_METADATA_HEAD &&
1518 s->stream_state_ != BROTLI_STREAM_METADATA_BODY) {
1519 return BROTLI_FALSE;
1520 }
1521
1522 while (BROTLI_TRUE) {
1523 if (InjectFlushOrPushOutput(s, available_out, next_out, total_out)) {
1524 continue;
1525 }
1526 if (s->available_out_ != 0) break;
1527
1528 if (s->input_pos_ != s->last_flush_pos_) {
1529 BROTLI_BOOL result = EncodeData(s, BROTLI_FALSE, BROTLI_TRUE,
1530 &s->available_out_, &s->next_out_);
1531 if (!result) return BROTLI_FALSE;
1532 continue;
1533 }
1534
1535 if (s->stream_state_ == BROTLI_STREAM_METADATA_HEAD) {
1536 s->next_out_ = s->tiny_buf_.u8;
1537 s->available_out_ =
1538 WriteMetadataHeader(s, s->remaining_metadata_bytes_, s->next_out_);
1539 s->stream_state_ = BROTLI_STREAM_METADATA_BODY;
1540 continue;
1541 } else {
1542 /* Exit workflow only when there is no more input and no more output.
1543 Otherwise client may continue producing empty metadata blocks. */
1544 if (s->remaining_metadata_bytes_ == 0) {
1545 s->remaining_metadata_bytes_ = BROTLI_UINT32_MAX;
1546 s->stream_state_ = BROTLI_STREAM_PROCESSING;
1547 break;
1548 }
1549 if (*available_out) {
1550 /* Directly copy input to output. */
1551 uint32_t copy = (uint32_t)BROTLI_MIN(
1552 size_t, s->remaining_metadata_bytes_, *available_out);
1553 memcpy(*next_out, *next_in, copy);
1554 *next_in += copy;
1555 *available_in -= copy;
1556 s->total_in_ += copy; /* not actually data input, though */
1557 s->remaining_metadata_bytes_ -= copy;
1558 *next_out += copy;
1559 *available_out -= copy;
1560 } else {
1561 /* This guarantees progress in "TakeOutput" workflow. */
1562 uint32_t copy = BROTLI_MIN(uint32_t, s->remaining_metadata_bytes_, 16);
1563 s->next_out_ = s->tiny_buf_.u8;
1564 memcpy(s->next_out_, *next_in, copy);
1565 *next_in += copy;
1566 *available_in -= copy;
1567 s->total_in_ += copy; /* not actually data input, though */
1568 s->remaining_metadata_bytes_ -= copy;
1569 s->available_out_ = copy;
1570 }
1571 continue;
1572 }
1573 }
1574
1575 return BROTLI_TRUE;
1576 }
1577
1578 static void UpdateSizeHint(BrotliEncoderState* s, size_t available_in) {
1579 if (s->params.size_hint == 0) {
1580 uint64_t delta = UnprocessedInputSize(s);
1581 uint64_t tail = available_in;
1582 uint32_t limit = 1u << 30;
1583 uint32_t total;
1584 if ((delta >= limit) || (tail >= limit) || ((delta + tail) >= limit)) {
1585 total = limit;
1586 } else {
1587 total = (uint32_t)(delta + tail);
1588 }
1589 s->params.size_hint = total;
1590 }
1591 }
1592
1593 BROTLI_BOOL BrotliEncoderCompressStream(
1594 BrotliEncoderState* s, BrotliEncoderOperation op, size_t* available_in,
1595 const uint8_t** next_in, size_t* available_out, uint8_t** next_out,
1596 size_t* total_out) {
1597 if (!EnsureInitialized(s)) return BROTLI_FALSE;
1598
1599 /* Unfinished metadata block; check requirements. */
1600 if (s->remaining_metadata_bytes_ != BROTLI_UINT32_MAX) {
1601 if (*available_in != s->remaining_metadata_bytes_) return BROTLI_FALSE;
1602 if (op != BROTLI_OPERATION_EMIT_METADATA) return BROTLI_FALSE;
1603 }
1604
1605 if (op == BROTLI_OPERATION_EMIT_METADATA) {
1606 UpdateSizeHint(s, 0); /* First data metablock might be emitted here. */
1607 return ProcessMetadata(
1608 s, available_in, next_in, available_out, next_out, total_out);
1609 }
1610
1611 if (s->stream_state_ == BROTLI_STREAM_METADATA_HEAD ||
1612 s->stream_state_ == BROTLI_STREAM_METADATA_BODY) {
1613 return BROTLI_FALSE;
1614 }
1615
1616 if (s->stream_state_ != BROTLI_STREAM_PROCESSING && *available_in != 0) {
1617 return BROTLI_FALSE;
1618 }
1619 if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY ||
1620 s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) {
1621 return BrotliEncoderCompressStreamFast(s, op, available_in, next_in,
1622 available_out, next_out, total_out);
1623 }
1624 while (BROTLI_TRUE) {
1625 size_t remaining_block_size = RemainingInputBlockSize(s);
1626 /* Shorten input to flint size. */
1627 if (s->flint_ >= 0 && remaining_block_size > (size_t)s->flint_) {
1628 remaining_block_size = (size_t)s->flint_;
1629 }
1630
1631 if (remaining_block_size != 0 && *available_in != 0) {
1632 size_t copy_input_size =
1633 BROTLI_MIN(size_t, remaining_block_size, *available_in);
1634 CopyInputToRingBuffer(s, copy_input_size, *next_in);
1635 *next_in += copy_input_size;
1636 *available_in -= copy_input_size;
1637 s->total_in_ += copy_input_size;
1638 if (s->flint_ > 0) s->flint_ = (int8_t)(s->flint_ - (int)copy_input_size);
1639 continue;
1640 }
1641
1642 if (InjectFlushOrPushOutput(s, available_out, next_out, total_out)) {
1643 /* Exit the "emit flint" workflow. */
1644 if (s->flint_ == BROTLI_FLINT_WAITING_FOR_FLUSHING) {
1645 CheckFlushComplete(s);
1646 if (s->stream_state_ == BROTLI_STREAM_PROCESSING) {
1647 s->flint_ = BROTLI_FLINT_DONE;
1648 }
1649 }
1650 continue;
1651 }
1652
1653 /* Compress data only when internal output buffer is empty, stream is not
1654 finished and there is no pending flush request. */
1655 if (s->available_out_ == 0 &&
1656 s->stream_state_ == BROTLI_STREAM_PROCESSING) {
1657 if (remaining_block_size == 0 || op != BROTLI_OPERATION_PROCESS) {
1658 BROTLI_BOOL is_last = TO_BROTLI_BOOL(
1659 (*available_in == 0) && op == BROTLI_OPERATION_FINISH);
1660 BROTLI_BOOL force_flush = TO_BROTLI_BOOL(
1661 (*available_in == 0) && op == BROTLI_OPERATION_FLUSH);
1662 BROTLI_BOOL result;
1663 /* Force emitting (uncompressed) piece containing flint. */
1664 if (!is_last && s->flint_ == 0) {
1665 s->flint_ = BROTLI_FLINT_WAITING_FOR_FLUSHING;
1666 force_flush = BROTLI_TRUE;
1667 }
1668 UpdateSizeHint(s, *available_in);
1669 result = EncodeData(s, is_last, force_flush,
1670 &s->available_out_, &s->next_out_);
1671 if (!result) return BROTLI_FALSE;
1672 if (force_flush) s->stream_state_ = BROTLI_STREAM_FLUSH_REQUESTED;
1673 if (is_last) s->stream_state_ = BROTLI_STREAM_FINISHED;
1674 continue;
1675 }
1676 }
1677 break;
1678 }
1679 CheckFlushComplete(s);
1680 return BROTLI_TRUE;
1681 }
1682
1683 BROTLI_BOOL BrotliEncoderIsFinished(BrotliEncoderState* s) {
1684 return TO_BROTLI_BOOL(s->stream_state_ == BROTLI_STREAM_FINISHED &&
1685 !BrotliEncoderHasMoreOutput(s));
1686 }
1687
1688 BROTLI_BOOL BrotliEncoderHasMoreOutput(BrotliEncoderState* s) {
1689 return TO_BROTLI_BOOL(s->available_out_ != 0);
1690 }
1691
1692 const uint8_t* BrotliEncoderTakeOutput(BrotliEncoderState* s, size_t* size) {
1693 size_t consumed_size = s->available_out_;
1694 uint8_t* result = s->next_out_;
1695 if (*size) {
1696 consumed_size = BROTLI_MIN(size_t, *size, s->available_out_);
1697 }
1698 if (consumed_size) {
1699 s->next_out_ += consumed_size;
1700 s->available_out_ -= consumed_size;
1701 s->total_out_ += consumed_size;
1702 CheckFlushComplete(s);
1703 *size = consumed_size;
1704 } else {
1705 *size = 0;
1706 result = 0;
1707 }
1708 return result;
1709 }
1710
1711 uint32_t BrotliEncoderVersion(void) {
1712 return BROTLI_VERSION;
1713 }
1714
1715 BrotliEncoderPreparedDictionary* BrotliEncoderPrepareDictionary(
1716 BrotliSharedDictionaryType type, size_t size,
1717 const uint8_t data[BROTLI_ARRAY_PARAM(size)], int quality,
1718 brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque) {
1719 ManagedDictionary* managed_dictionary = NULL;
1720 BROTLI_BOOL type_is_known = BROTLI_FALSE;
1721 type_is_known |= (type == BROTLI_SHARED_DICTIONARY_RAW);
1722 #if defined(BROTLI_EXPERIMENTAL)
1723 type_is_known |= (type == BROTLI_SHARED_DICTIONARY_SERIALIZED);
1724 #endif /* BROTLI_EXPERIMENTAL */
1725 if (!type_is_known) {
1726 return NULL;
1727 }
1728 managed_dictionary =
1729 BrotliCreateManagedDictionary(alloc_func, free_func, opaque);
1730 if (managed_dictionary == NULL) {
1731 return NULL;
1732 }
1733 if (type == BROTLI_SHARED_DICTIONARY_RAW) {
1734 managed_dictionary->dictionary = (uint32_t*)CreatePreparedDictionary(
1735 &managed_dictionary->memory_manager_, data, size);
1736 }
1737 #if defined(BROTLI_EXPERIMENTAL)
1738 if (type == BROTLI_SHARED_DICTIONARY_SERIALIZED) {
1739 SharedEncoderDictionary* dict = (SharedEncoderDictionary*)BrotliAllocate(
1740 &managed_dictionary->memory_manager_, sizeof(SharedEncoderDictionary));
1741 managed_dictionary->dictionary = (uint32_t*)dict;
1742 if (dict != NULL) {
1743 BROTLI_BOOL ok = BrotliInitCustomSharedEncoderDictionary(
1744 &managed_dictionary->memory_manager_, data, size, quality, dict);
1745 if (!ok) {
1746 BrotliFree(&managed_dictionary->memory_manager_, dict);
1747 managed_dictionary->dictionary = NULL;
1748 }
1749 }
1750 }
1751 #else /* BROTLI_EXPERIMENTAL */
1752 (void)quality;
1753 #endif /* BROTLI_EXPERIMENTAL */
1754 if (managed_dictionary->dictionary == NULL) {
1755 BrotliDestroyManagedDictionary(managed_dictionary);
1756 return NULL;
1757 }
1758 return (BrotliEncoderPreparedDictionary*)managed_dictionary;
1759 }
1760
1761 void BrotliEncoderDestroyPreparedDictionary(
1762 BrotliEncoderPreparedDictionary* dictionary) {
1763 ManagedDictionary* dict = (ManagedDictionary*)dictionary;
1764 if (!dictionary) return;
1765 /* First field of dictionary structs. */
1766 /* Only managed dictionaries are eligible for destruction by this method. */
1767 if (dict->magic != kManagedDictionaryMagic) {
1768 return;
1769 }
1770 if (dict->dictionary == NULL) {
1771 /* This should never ever happen. */
1772 } else if (*dict->dictionary == kLeanPreparedDictionaryMagic) {
1773 DestroyPreparedDictionary(
1774 &dict->memory_manager_, (PreparedDictionary*)dict->dictionary);
1775 } else if (*dict->dictionary == kSharedDictionaryMagic) {
1776 BrotliCleanupSharedEncoderDictionary(&dict->memory_manager_,
1777 (SharedEncoderDictionary*)dict->dictionary);
1778 BrotliFree(&dict->memory_manager_, dict->dictionary);
1779 } else {
1780 /* There is also kPreparedDictionaryMagic, but such instances should be
1781 * constructed and destroyed by different means. */
1782 }
1783 dict->dictionary = NULL;
1784 BrotliDestroyManagedDictionary(dict);
1785 }
1786
1787 BROTLI_BOOL BrotliEncoderAttachPreparedDictionary(BrotliEncoderState* state,
1788 const BrotliEncoderPreparedDictionary* dictionary) {
1789 /* First field of dictionary structs */
1790 const BrotliEncoderPreparedDictionary* dict = dictionary;
1791 uint32_t magic = *((const uint32_t*)dict);
1792 SharedEncoderDictionary* current = NULL;
1793 if (magic == kManagedDictionaryMagic) {
1794 /* Unwrap managed dictionary. */
1795 ManagedDictionary* managed_dictionary = (ManagedDictionary*)dict;
1796 magic = *managed_dictionary->dictionary;
1797 dict = (BrotliEncoderPreparedDictionary*)managed_dictionary->dictionary;
1798 }
1799 current = &state->params.dictionary;
1800 if (magic == kPreparedDictionaryMagic ||
1801 magic == kLeanPreparedDictionaryMagic) {
1802 const PreparedDictionary* prepared = (const PreparedDictionary*)dict;
1803 if (!AttachPreparedDictionary(&current->compound, prepared)) {
1804 return BROTLI_FALSE;
1805 }
1806 } else if (magic == kSharedDictionaryMagic) {
1807 const SharedEncoderDictionary* attached =
1808 (const SharedEncoderDictionary*)dict;
1809 BROTLI_BOOL was_default = !current->contextual.context_based &&
1810 current->contextual.num_dictionaries == 1 &&
1811 current->contextual.dict[0]->hash_table_words ==
1812 kStaticDictionaryHashWords &&
1813 current->contextual.dict[0]->hash_table_lengths ==
1814 kStaticDictionaryHashLengths;
1815 BROTLI_BOOL new_default = !attached->contextual.context_based &&
1816 attached->contextual.num_dictionaries == 1 &&
1817 attached->contextual.dict[0]->hash_table_words ==
1818 kStaticDictionaryHashWords &&
1819 attached->contextual.dict[0]->hash_table_lengths ==
1820 kStaticDictionaryHashLengths;
1821 size_t i;
1822 if (state->is_initialized_) return BROTLI_FALSE;
1823 current->max_quality =
1824 BROTLI_MIN(int, current->max_quality, attached->max_quality);
1825 for (i = 0; i < attached->compound.num_chunks; i++) {
1826 if (!AttachPreparedDictionary(&current->compound,
1827 attached->compound.chunks[i])) {
1828 return BROTLI_FALSE;
1829 }
1830 }
1831 if (!new_default) {
1832 if (!was_default) return BROTLI_FALSE;
1833 /* Copy by value, but then set num_instances_ to 0 because their memory
1834 is managed by attached, not by current */
1835 current->contextual = attached->contextual;
1836 current->contextual.num_instances_ = 0;
1837 }
1838 } else {
1839 return BROTLI_FALSE;
1840 }
1841 return BROTLI_TRUE;
1842 }
1843
1844 size_t BrotliEncoderEstimatePeakMemoryUsage(int quality, int lgwin,
1845 size_t input_size) {
1846 BrotliEncoderParams params;
1847 size_t memory_manager_slots = BROTLI_ENCODER_MEMORY_MANAGER_SLOTS;
1848 size_t memory_manager_size = memory_manager_slots * sizeof(void*);
1849 BrotliEncoderInitParams(&params);
1850 params.quality = quality;
1851 params.lgwin = lgwin;
1852 params.size_hint = input_size;
1853 params.large_window = lgwin > BROTLI_MAX_WINDOW_BITS;
1854 SanitizeParams(&params);
1855 params.lgblock = ComputeLgBlock(&params);
1856 ChooseHasher(&params, &params.hasher);
1857 if (params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY ||
1858 params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) {
1859 size_t state_size = sizeof(BrotliEncoderState);
1860 size_t block_size = BROTLI_MIN(size_t, input_size, ((size_t)1ul << params.lgwin));
1861 size_t hash_table_size =
1862 HashTableSize(MaxHashTableSize(params.quality), block_size);
1863 size_t hash_size =
1864 (hash_table_size < (1u << 10)) ? 0 : sizeof(int) * hash_table_size;
1865 size_t cmdbuf_size = params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY ?
1866 5 * BROTLI_MIN(size_t, block_size, 1ul << 17) : 0;
1867 if (params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY) {
1868 state_size += sizeof(BrotliOnePassArena);
1869 } else {
1870 state_size += sizeof(BrotliTwoPassArena);
1871 }
1872 return hash_size + cmdbuf_size + state_size;
1873 } else {
1874 size_t short_ringbuffer_size = (size_t)1 << params.lgblock;
1875 int ringbuffer_bits = ComputeRbBits(&params);
1876 size_t ringbuffer_size = input_size < short_ringbuffer_size ?
1877 input_size : ((size_t)1u << ringbuffer_bits) + short_ringbuffer_size;
1878 size_t hash_size[4] = {0};
1879 size_t metablock_size =
1880 BROTLI_MIN(size_t, input_size, MaxMetablockSize(&params));
1881 size_t inputblock_size =
1882 BROTLI_MIN(size_t, input_size, (size_t)1 << params.lgblock);
1883 size_t cmdbuf_size = metablock_size * 2 + inputblock_size * 6;
1884 size_t outbuf_size = metablock_size * 2 + 503;
1885 size_t histogram_size = 0;
1886 HasherSize(&params, BROTLI_TRUE, input_size, hash_size);
1887 if (params.quality < MIN_QUALITY_FOR_BLOCK_SPLIT) {
1888 cmdbuf_size = BROTLI_MIN(size_t, cmdbuf_size,
1889 MAX_NUM_DELAYED_SYMBOLS * sizeof(Command) + inputblock_size * 12);
1890 }
1891 if (params.quality >= MIN_QUALITY_FOR_HQ_BLOCK_SPLITTING) {
1892 /* Only a very rough estimation, based on enwik8. */
1893 histogram_size = 200 << 20;
1894 } else if (params.quality >= MIN_QUALITY_FOR_BLOCK_SPLIT) {
1895 size_t literal_histograms =
1896 BROTLI_MIN(size_t, metablock_size / 6144, 256);
1897 size_t command_histograms =
1898 BROTLI_MIN(size_t, metablock_size / 6144, 256);
1899 size_t distance_histograms =
1900 BROTLI_MIN(size_t, metablock_size / 6144, 256);
1901 histogram_size = literal_histograms * sizeof(HistogramLiteral) +
1902 command_histograms * sizeof(HistogramCommand) +
1903 distance_histograms * sizeof(HistogramDistance);
1904 }
1905 return (memory_manager_size + ringbuffer_size +
1906 hash_size[0] + hash_size[1] + hash_size[2] + hash_size[3] +
1907 cmdbuf_size +
1908 outbuf_size +
1909 histogram_size);
1910 }
1911 }
1912 size_t BrotliEncoderGetPreparedDictionarySize(
1913 const BrotliEncoderPreparedDictionary* prepared_dictionary) {
1914 /* First field of dictionary structs */
1915 const BrotliEncoderPreparedDictionary* prepared = prepared_dictionary;
1916 uint32_t magic = *((const uint32_t*)prepared);
1917 size_t overhead = 0;
1918 if (magic == kManagedDictionaryMagic) {
1919 const ManagedDictionary* managed = (const ManagedDictionary*)prepared;
1920 overhead = sizeof(ManagedDictionary);
1921 magic = *managed->dictionary;
1922 prepared = (const BrotliEncoderPreparedDictionary*)managed->dictionary;
1923 }
1924
1925 if (magic == kPreparedDictionaryMagic) {
1926 const PreparedDictionary* dictionary =
1927 (const PreparedDictionary*)prepared;
1928 /* Keep in sync with step 3 of CreatePreparedDictionary */
1929 return sizeof(PreparedDictionary) + dictionary->source_size +
1930 (sizeof(uint32_t) << dictionary->slot_bits) +
1931 (sizeof(uint16_t) << dictionary->bucket_bits) +
1932 (sizeof(uint32_t) * dictionary->num_items) + overhead;
1933 } else if (magic == kLeanPreparedDictionaryMagic) {
1934 const PreparedDictionary* dictionary =
1935 (const PreparedDictionary*)prepared;
1936 /* Keep in sync with step 3 of CreatePreparedDictionary */
1937 return sizeof(PreparedDictionary) + sizeof(uint8_t*) +
1938 (sizeof(uint32_t) << dictionary->slot_bits) +
1939 (sizeof(uint16_t) << dictionary->bucket_bits) +
1940 (sizeof(uint32_t) * dictionary->num_items) + overhead;
1941 } else if (magic == kSharedDictionaryMagic) {
1942 const SharedEncoderDictionary* dictionary =
1943 (const SharedEncoderDictionary*)prepared;
1944 const CompoundDictionary* compound = &dictionary->compound;
1945 const ContextualEncoderDictionary* contextual = &dictionary->contextual;
1946 size_t result = sizeof(*dictionary);
1947 size_t i;
1948 size_t num_instances;
1949 const BrotliEncoderDictionary* instances;
1950 for (i = 0; i < compound->num_prepared_instances_; i++) {
1951 size_t size = BrotliEncoderGetPreparedDictionarySize(
1952 (const BrotliEncoderPreparedDictionary*)
1953 compound->prepared_instances_[i]);
1954 if (!size) return 0; /* error */
1955 result += size;
1956 }
1957 if (contextual->context_based) {
1958 num_instances = contextual->num_instances_;
1959 instances = contextual->instances_;
1960 result += sizeof(*instances) * num_instances;
1961 } else {
1962 num_instances = 1;
1963 instances = &contextual->instance_;
1964 }
1965 for (i = 0; i < num_instances; i++) {
1966 const BrotliEncoderDictionary* dict = &instances[i];
1967 result += dict->trie.pool_capacity * sizeof(BrotliTrieNode);
1968 if (dict->hash_table_data_words_) {
1969 result += sizeof(kStaticDictionaryHashWords);
1970 }
1971 if (dict->hash_table_data_lengths_) {
1972 result += sizeof(kStaticDictionaryHashLengths);
1973 }
1974 if (dict->buckets_data_) {
1975 result += sizeof(*dict->buckets_data_) * dict->buckets_alloc_size_;
1976 }
1977 if (dict->dict_words_data_) {
1978 result += sizeof(*dict->dict_words) * dict->dict_words_alloc_size_;
1979 }
1980 if (dict->words_instance_) {
1981 result += sizeof(*dict->words_instance_);
1982 /* data_size not added here: it is never allocated by the
1983 SharedEncoderDictionary, instead it always points to the file
1984 already loaded in memory. So if the caller wants to include
1985 this memory as well, add the size of the loaded dictionary
1986 file to this. */
1987 }
1988 }
1989 return result + overhead;
1990 }
1991 return 0; /* error */
1992 }
1993
1994 #if defined(BROTLI_TEST)
1995 size_t MakeUncompressedStreamForTest(const uint8_t*, size_t, uint8_t*);
1996 size_t MakeUncompressedStreamForTest(
1997 const uint8_t* input, size_t input_size, uint8_t* output) {
1998 return MakeUncompressedStream(input, input_size, output);
1999 }
2000 #endif
2001
2002 #if defined(__cplusplus) || defined(c_plusplus)
2003 } /* extern "C" */
2004 #endif