comparison mupdf-source/thirdparty/jbig2dec/jbig2_arith.c @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 /* Copyright (C) 2001-2023 Artifex Software, Inc.
2 All Rights Reserved.
3
4 This software is provided AS-IS with no warranty, either express or
5 implied.
6
7 This software is distributed under license and may not be copied,
8 modified or distributed except as expressly authorized under the terms
9 of the license contained in the file LICENSE in this distribution.
10
11 Refer to licensing information at http://www.artifex.com or contact
12 Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
13 CA 94129, USA, for further information.
14 */
15
16 /*
17 jbig2dec
18 */
19
20 #ifdef HAVE_CONFIG_H
21 #include "config.h"
22 #endif
23 #include "os_types.h"
24
25 #include <stdio.h>
26 #include <stdlib.h>
27
28 #include "jbig2.h"
29 #include "jbig2_priv.h"
30 #include "jbig2_arith.h"
31
32 struct _Jbig2ArithState {
33 uint32_t C;
34 uint32_t A;
35
36 int CT;
37
38 uint32_t next_word;
39 size_t next_word_bytes;
40 int err;
41
42 Jbig2WordStream *ws;
43 size_t offset;
44 };
45
46 /*
47 Previous versions of this code had a #define to allow
48 us to choose between using the revised arithmetic decoding
49 specified in the 'Software Convention' section of the spec.
50 Back to back tests showed that the 'Software Convention'
51 version was indeed slightly faster. We therefore enable it
52 by default. We also strip the option out, because a) it
53 makes the code harder to read, and b) such things are an
54 invitation to bitrot.
55 */
56
57 static int
58 jbig2_arith_bytein(Jbig2Ctx *ctx, Jbig2ArithState *as)
59 {
60 byte B;
61
62 /* Treat both errors and reading beyond end of stream as an error. */
63 if (as->err != 0) {
64 jbig2_error(ctx, JBIG2_SEVERITY_FATAL, JBIG2_UNKNOWN_SEGMENT_NUMBER, "failed to read from underlying stream during arithmetic decoding");
65 return -1;
66 }
67 if (as->next_word_bytes == 0) {
68 jbig2_error(ctx, JBIG2_SEVERITY_FATAL, JBIG2_UNKNOWN_SEGMENT_NUMBER, "failed to read beyond end of underlying stream during arithmetic decoding");
69 return -1;
70 }
71
72 /* At this point there is at least one byte in as->next_word. */
73
74 /* This code confused me no end when I first read it, so a quick note
75 * to save others (and future me's) from being similarly confused.
76 * 'next_word' does indeed contain 'next_word_bytes' of valid data
77 * (always starting at the most significant byte). The confusing
78 * thing is that the first byte has always already been read.
79 * i.e. it serves only as an indication that the last byte we read
80 * was FF or not.
81 *
82 * The jbig2 bytestream uses FF bytes, followed by a byte > 0x8F as
83 * marker bytes. These never occur in normal streams of arithmetic
84 * encoding, so meeting one terminates the stream (with an infinite
85 * series of 1 bits).
86 *
87 * If we meet an FF byte, we return it as normal. We just 'remember'
88 * that fact for the next byte we read.
89 */
90
91 /* Figure F.3 */
92 B = (byte)((as->next_word >> 24) & 0xFF);
93 if (B == 0xFF) {
94 byte B1;
95
96 /* next_word_bytes can only be == 1 here, but let's be defensive. */
97 if (as->next_word_bytes <= 1) {
98 int ret = as->ws->get_next_word(ctx, as->ws, as->offset, &as->next_word);
99 if (ret < 0) {
100 as->err = 1;
101 return jbig2_error(ctx, JBIG2_SEVERITY_WARNING, JBIG2_UNKNOWN_SEGMENT_NUMBER, "failed to check for marker code due to failure in underlying stream during arithmetic decoding");
102 }
103 as->next_word_bytes = (size_t) ret;
104
105 if (as->next_word_bytes == 0) {
106 jbig2_error(ctx, JBIG2_SEVERITY_WARNING, JBIG2_UNKNOWN_SEGMENT_NUMBER, "failed to read end of possible terminating marker code, assuming terminating marker code");
107 as->next_word = 0xFF900000;
108 as->next_word_bytes = 2;
109 as->C += 0xFF00;
110 as->CT = 8;
111 return 0;
112 }
113
114 as->offset += as->next_word_bytes;
115
116 B1 = (byte)((as->next_word >> 24) & 0xFF);
117 if (B1 > 0x8F) {
118 #ifdef JBIG2_DEBUG_ARITH
119 fprintf(stderr, "read %02x (aa)\n", B);
120 #endif
121 as->CT = 8;
122 as->next_word = 0xFF000000 | (as->next_word >> 8);
123 as->next_word_bytes = 2;
124 as->offset--;
125 } else {
126 #ifdef JBIG2_DEBUG_ARITH
127 fprintf(stderr, "read %02x (a)\n", B);
128 #endif
129 as->C += 0xFE00 - (B1 << 9);
130 as->CT = 7;
131 }
132 } else {
133 B1 = (byte)((as->next_word >> 16) & 0xFF);
134 if (B1 > 0x8F) {
135 #ifdef JBIG2_DEBUG_ARITH
136 fprintf(stderr, "read %02x (ba)\n", B);
137 #endif
138 as->CT = 8;
139 } else {
140 as->next_word_bytes--;
141 as->next_word <<= 8;
142 #ifdef JBIG2_DEBUG_ARITH
143 fprintf(stderr, "read %02x (b)\n", B);
144 #endif
145
146 as->C += 0xFE00 - (B1 << 9);
147 as->CT = 7;
148 }
149 }
150 } else {
151 #ifdef JBIG2_DEBUG_ARITH
152 fprintf(stderr, "read %02x\n", B);
153 #endif
154 as->next_word <<= 8;
155 as->next_word_bytes--;
156
157 if (as->next_word_bytes == 0) {
158 int ret = as->ws->get_next_word(ctx, as->ws, as->offset, &as->next_word);
159 if (ret < 0) {
160 as->err = 1;
161 return jbig2_error(ctx, JBIG2_SEVERITY_WARNING, JBIG2_UNKNOWN_SEGMENT_NUMBER, "failed to read from underlying stream during arithmetic decoding");
162 }
163 as->next_word_bytes = (size_t) ret;
164
165 if (as->next_word_bytes == 0) {
166 jbig2_error(ctx, JBIG2_SEVERITY_WARNING, JBIG2_UNKNOWN_SEGMENT_NUMBER, "failed to find terminating marker code before end of underlying stream, assuming terminating marker code");
167 as->next_word = 0xFF900000;
168 as->next_word_bytes = 2;
169 as->C += 0xFF00;
170 as->CT = 8;
171 return 0;
172 }
173
174 as->offset += as->next_word_bytes;
175 }
176
177 B = (byte)((as->next_word >> 24) & 0xFF);
178 as->C += 0xFF00 - (B << 8);
179 as->CT = 8;
180 }
181
182 return 0;
183 }
184
185 /** Allocate and initialize a new arithmetic coding state
186 * the returned pointer can simply be freed; this does
187 * not affect the associated Jbig2WordStream.
188 */
189 Jbig2ArithState *
190 jbig2_arith_new(Jbig2Ctx *ctx, Jbig2WordStream *ws)
191 {
192 Jbig2ArithState *result;
193 int ret;
194
195 result = jbig2_new(ctx, Jbig2ArithState, 1);
196 if (result == NULL) {
197 jbig2_error(ctx, JBIG2_SEVERITY_FATAL, JBIG2_UNKNOWN_SEGMENT_NUMBER, "failed to allocate arithmetic coding state");
198 return NULL;
199 }
200
201 result->err = 0;
202 result->ws = ws;
203 result->offset = 0;
204
205 ret = result->ws->get_next_word(ctx, result->ws, result->offset, &result->next_word);
206 if (ret < 0) {
207 jbig2_free(ctx->allocator, result);
208 jbig2_error(ctx, JBIG2_SEVERITY_WARNING, JBIG2_UNKNOWN_SEGMENT_NUMBER, "failed to initialize underlying stream of arithmetic decoder");
209 return NULL;
210 }
211
212 result->next_word_bytes = (size_t) ret;
213 if (result->next_word_bytes == 0) {
214 jbig2_free(ctx->allocator, result);
215 jbig2_error(ctx, JBIG2_SEVERITY_FATAL, JBIG2_UNKNOWN_SEGMENT_NUMBER, "failed to read first byte from underlying stream when initializing arithmetic decoder");
216 return NULL;
217 }
218
219 result->offset += result->next_word_bytes;
220
221 /* Figure F.1 */
222 result->C = (~(result->next_word >> 8)) & 0xFF0000;
223
224 /* Figure E.20 (2) */
225 if (jbig2_arith_bytein(ctx, result) < 0) {
226 jbig2_free(ctx->allocator, result);
227 jbig2_error(ctx, JBIG2_SEVERITY_WARNING, JBIG2_UNKNOWN_SEGMENT_NUMBER, "failed to read second byte from underlying stream when initializing arithmetic decoder");
228 return NULL;
229 }
230
231 /* Figure E.20 (3) */
232 result->C <<= 7;
233 result->CT -= 7;
234 result->A = 0x8000;
235
236 return result;
237 }
238
239 #define MAX_QE_ARRAY_SIZE 47
240
241 /* could put bit fields in to minimize memory usage */
242 typedef struct {
243 uint16_t Qe;
244 byte mps_xor; /* mps_xor = index ^ NMPS */
245 byte lps_xor; /* lps_xor = index ^ NLPS ^ (SWITCH << 7) */
246 } Jbig2ArithQe;
247
248 #define MPS(index, nmps) ((index) ^ (nmps))
249 #define LPS(index, nlps, swtch) ((index) ^ (nlps) ^ ((swtch) << 7))
250
251 static const Jbig2ArithQe jbig2_arith_Qe[MAX_QE_ARRAY_SIZE] = {
252 {0x5601, MPS(0, 1), LPS(0, 1, 1)},
253 {0x3401, MPS(1, 2), LPS(1, 6, 0)},
254 {0x1801, MPS(2, 3), LPS(2, 9, 0)},
255 {0x0AC1, MPS(3, 4), LPS(3, 12, 0)},
256 {0x0521, MPS(4, 5), LPS(4, 29, 0)},
257 {0x0221, MPS(5, 38), LPS(5, 33, 0)},
258 {0x5601, MPS(6, 7), LPS(6, 6, 1)},
259 {0x5401, MPS(7, 8), LPS(7, 14, 0)},
260 {0x4801, MPS(8, 9), LPS(8, 14, 0)},
261 {0x3801, MPS(9, 10), LPS(9, 14, 0)},
262 {0x3001, MPS(10, 11), LPS(10, 17, 0)},
263 {0x2401, MPS(11, 12), LPS(11, 18, 0)},
264 {0x1C01, MPS(12, 13), LPS(12, 20, 0)},
265 {0x1601, MPS(13, 29), LPS(13, 21, 0)},
266 {0x5601, MPS(14, 15), LPS(14, 14, 1)},
267 {0x5401, MPS(15, 16), LPS(15, 14, 0)},
268 {0x5101, MPS(16, 17), LPS(16, 15, 0)},
269 {0x4801, MPS(17, 18), LPS(17, 16, 0)},
270 {0x3801, MPS(18, 19), LPS(18, 17, 0)},
271 {0x3401, MPS(19, 20), LPS(19, 18, 0)},
272 {0x3001, MPS(20, 21), LPS(20, 19, 0)},
273 {0x2801, MPS(21, 22), LPS(21, 19, 0)},
274 {0x2401, MPS(22, 23), LPS(22, 20, 0)},
275 {0x2201, MPS(23, 24), LPS(23, 21, 0)},
276 {0x1C01, MPS(24, 25), LPS(24, 22, 0)},
277 {0x1801, MPS(25, 26), LPS(25, 23, 0)},
278 {0x1601, MPS(26, 27), LPS(26, 24, 0)},
279 {0x1401, MPS(27, 28), LPS(27, 25, 0)},
280 {0x1201, MPS(28, 29), LPS(28, 26, 0)},
281 {0x1101, MPS(29, 30), LPS(29, 27, 0)},
282 {0x0AC1, MPS(30, 31), LPS(30, 28, 0)},
283 {0x09C1, MPS(31, 32), LPS(31, 29, 0)},
284 {0x08A1, MPS(32, 33), LPS(32, 30, 0)},
285 {0x0521, MPS(33, 34), LPS(33, 31, 0)},
286 {0x0441, MPS(34, 35), LPS(34, 32, 0)},
287 {0x02A1, MPS(35, 36), LPS(35, 33, 0)},
288 {0x0221, MPS(36, 37), LPS(36, 34, 0)},
289 {0x0141, MPS(37, 38), LPS(37, 35, 0)},
290 {0x0111, MPS(38, 39), LPS(38, 36, 0)},
291 {0x0085, MPS(39, 40), LPS(39, 37, 0)},
292 {0x0049, MPS(40, 41), LPS(40, 38, 0)},
293 {0x0025, MPS(41, 42), LPS(41, 39, 0)},
294 {0x0015, MPS(42, 43), LPS(42, 40, 0)},
295 {0x0009, MPS(43, 44), LPS(43, 41, 0)},
296 {0x0005, MPS(44, 45), LPS(44, 42, 0)},
297 {0x0001, MPS(45, 45), LPS(45, 43, 0)},
298 {0x5601, MPS(46, 46), LPS(46, 46, 0)}
299 };
300
301 static int
302 jbig2_arith_renormd(Jbig2Ctx *ctx, Jbig2ArithState *as)
303 {
304 /* Figure E.18 */
305 do {
306 if (as->CT == 0 && jbig2_arith_bytein(ctx, as) < 0) {
307 return jbig2_error(ctx, JBIG2_SEVERITY_WARNING, JBIG2_UNKNOWN_SEGMENT_NUMBER, "failed to read byte from compressed data stream");
308 }
309 as->A <<= 1;
310 as->C <<= 1;
311 as->CT--;
312 } while ((as->A & 0x8000) == 0);
313
314 return 0;
315 }
316
317 int
318 jbig2_arith_decode(Jbig2Ctx *ctx, Jbig2ArithState *as, Jbig2ArithCx *pcx)
319 {
320 Jbig2ArithCx cx = *pcx;
321 const Jbig2ArithQe *pqe;
322 unsigned int index = cx & 0x7f;
323 bool D;
324
325 if (index >= MAX_QE_ARRAY_SIZE) {
326 return jbig2_error(ctx, JBIG2_SEVERITY_FATAL, JBIG2_UNKNOWN_SEGMENT_NUMBER, "failed to determine probability estimate because index out of range");
327 }
328
329 pqe = &jbig2_arith_Qe[index];
330
331 /* Figure F.2 */
332 as->A -= pqe->Qe;
333 if ((as->C >> 16) < as->A) {
334 if ((as->A & 0x8000) == 0) {
335 /* MPS_EXCHANGE, Figure E.16 */
336 if (as->A < pqe->Qe) {
337 D = 1 - (cx >> 7);
338 *pcx ^= pqe->lps_xor;
339 } else {
340 D = cx >> 7;
341 *pcx ^= pqe->mps_xor;
342 }
343 if (jbig2_arith_renormd(ctx, as) < 0) {
344 return jbig2_error(ctx, JBIG2_SEVERITY_WARNING, JBIG2_UNKNOWN_SEGMENT_NUMBER, "failed to renormalize decoder");
345 }
346
347 return D;
348 } else {
349 return cx >> 7;
350 }
351 } else {
352 as->C -= (as->A) << 16;
353 /* LPS_EXCHANGE, Figure E.17 */
354 if (as->A < pqe->Qe) {
355 as->A = pqe->Qe;
356 D = cx >> 7;
357 *pcx ^= pqe->mps_xor;
358 } else {
359 as->A = pqe->Qe;
360 D = 1 - (cx >> 7);
361 *pcx ^= pqe->lps_xor;
362 }
363 if (jbig2_arith_renormd(ctx, as) < 0) {
364 return jbig2_error(ctx, JBIG2_SEVERITY_WARNING, JBIG2_UNKNOWN_SEGMENT_NUMBER, "failed to renormalize decoder");
365 }
366
367 return D;
368 }
369 }
370
371 #ifdef TEST
372
373 static const byte test_stream[] = {
374 0x84, 0xC7, 0x3B, 0xFC, 0xE1, 0xA1, 0x43, 0x04, 0x02, 0x20, 0x00, 0x00,
375 0x41, 0x0D, 0xBB, 0x86, 0xF4, 0x31, 0x7F, 0xFF, 0x88, 0xFF, 0x37, 0x47,
376 0x1A, 0xDB, 0x6A, 0xDF, 0xFF, 0xAC,
377 0x00, 0x00
378 };
379
380 #if defined(JBIG2_DEBUG) || defined(JBIG2_DEBUG_ARITH)
381 static void
382 jbig2_arith_trace(Jbig2ArithState *as, Jbig2ArithCx cx)
383 {
384 fprintf(stderr, "I = %2d, MPS = %d, A = %04x, CT = %2d, C = %08x\n", cx & 0x7f, cx >> 7, as->A, as->CT, as->C);
385 }
386 #endif
387
388 static int
389 test_get_word(Jbig2Ctx *ctx, Jbig2WordStream *self, size_t offset, uint32_t *word)
390 {
391 uint32_t val = 0;
392 int ret = 0;
393
394 if (self == NULL || word == NULL)
395 return -1;
396 if (offset >= sizeof (test_stream))
397 return 0;
398
399 if (offset < sizeof(test_stream)) {
400 val |= test_stream[offset] << 24;
401 ret++;
402 }
403 if (offset + 1 < sizeof(test_stream)) {
404 val |= test_stream[offset + 1] << 16;
405 ret++;
406 }
407 if (offset + 2 < sizeof(test_stream)) {
408 val |= test_stream[offset + 2] << 8;
409 ret++;
410 }
411 if (offset + 3 < sizeof(test_stream)) {
412 val |= test_stream[offset + 3];
413 ret++;
414 }
415 *word = val;
416 return ret;
417 }
418
419 int
420 main(int argc, char **argv)
421 {
422 Jbig2Ctx *ctx;
423 Jbig2WordStream ws;
424 Jbig2ArithState *as;
425 int i;
426 Jbig2ArithCx cx = 0;
427
428 ctx = jbig2_ctx_new(NULL, 0, NULL, NULL, NULL);
429
430 ws.get_next_word = test_get_word;
431 as = jbig2_arith_new(ctx, &ws);
432 #ifdef JBIG2_DEBUG_ARITH
433 jbig2_arith_trace(as, cx);
434 #endif
435
436 for (i = 0; i < 256; i++) {
437 #ifdef JBIG2_DEBUG_ARITH
438 int D =
439 #else
440 (void)
441 #endif
442 jbig2_arith_decode(ctx, as, &cx);
443
444 #ifdef JBIG2_DEBUG_ARITH
445 fprintf(stderr, "%3d: D = %d, ", i, D);
446 jbig2_arith_trace(as, cx);
447 #endif
448 }
449
450 jbig2_free(ctx->allocator, as);
451
452 jbig2_ctx_free(ctx);
453
454 return 0;
455 }
456 #endif