Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/jbig2dec/jbig2_arith.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /* Copyright (C) 2001-2023 Artifex Software, Inc. | |
| 2 All Rights Reserved. | |
| 3 | |
| 4 This software is provided AS-IS with no warranty, either express or | |
| 5 implied. | |
| 6 | |
| 7 This software is distributed under license and may not be copied, | |
| 8 modified or distributed except as expressly authorized under the terms | |
| 9 of the license contained in the file LICENSE in this distribution. | |
| 10 | |
| 11 Refer to licensing information at http://www.artifex.com or contact | |
| 12 Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, | |
| 13 CA 94129, USA, for further information. | |
| 14 */ | |
| 15 | |
| 16 /* | |
| 17 jbig2dec | |
| 18 */ | |
| 19 | |
| 20 #ifdef HAVE_CONFIG_H | |
| 21 #include "config.h" | |
| 22 #endif | |
| 23 #include "os_types.h" | |
| 24 | |
| 25 #include <stdio.h> | |
| 26 #include <stdlib.h> | |
| 27 | |
| 28 #include "jbig2.h" | |
| 29 #include "jbig2_priv.h" | |
| 30 #include "jbig2_arith.h" | |
| 31 | |
| 32 struct _Jbig2ArithState { | |
| 33 uint32_t C; | |
| 34 uint32_t A; | |
| 35 | |
| 36 int CT; | |
| 37 | |
| 38 uint32_t next_word; | |
| 39 size_t next_word_bytes; | |
| 40 int err; | |
| 41 | |
| 42 Jbig2WordStream *ws; | |
| 43 size_t offset; | |
| 44 }; | |
| 45 | |
| 46 /* | |
| 47 Previous versions of this code had a #define to allow | |
| 48 us to choose between using the revised arithmetic decoding | |
| 49 specified in the 'Software Convention' section of the spec. | |
| 50 Back to back tests showed that the 'Software Convention' | |
| 51 version was indeed slightly faster. We therefore enable it | |
| 52 by default. We also strip the option out, because a) it | |
| 53 makes the code harder to read, and b) such things are an | |
| 54 invitation to bitrot. | |
| 55 */ | |
| 56 | |
| 57 static int | |
| 58 jbig2_arith_bytein(Jbig2Ctx *ctx, Jbig2ArithState *as) | |
| 59 { | |
| 60 byte B; | |
| 61 | |
| 62 /* Treat both errors and reading beyond end of stream as an error. */ | |
| 63 if (as->err != 0) { | |
| 64 jbig2_error(ctx, JBIG2_SEVERITY_FATAL, JBIG2_UNKNOWN_SEGMENT_NUMBER, "failed to read from underlying stream during arithmetic decoding"); | |
| 65 return -1; | |
| 66 } | |
| 67 if (as->next_word_bytes == 0) { | |
| 68 jbig2_error(ctx, JBIG2_SEVERITY_FATAL, JBIG2_UNKNOWN_SEGMENT_NUMBER, "failed to read beyond end of underlying stream during arithmetic decoding"); | |
| 69 return -1; | |
| 70 } | |
| 71 | |
| 72 /* At this point there is at least one byte in as->next_word. */ | |
| 73 | |
| 74 /* This code confused me no end when I first read it, so a quick note | |
| 75 * to save others (and future me's) from being similarly confused. | |
| 76 * 'next_word' does indeed contain 'next_word_bytes' of valid data | |
| 77 * (always starting at the most significant byte). The confusing | |
| 78 * thing is that the first byte has always already been read. | |
| 79 * i.e. it serves only as an indication that the last byte we read | |
| 80 * was FF or not. | |
| 81 * | |
| 82 * The jbig2 bytestream uses FF bytes, followed by a byte > 0x8F as | |
| 83 * marker bytes. These never occur in normal streams of arithmetic | |
| 84 * encoding, so meeting one terminates the stream (with an infinite | |
| 85 * series of 1 bits). | |
| 86 * | |
| 87 * If we meet an FF byte, we return it as normal. We just 'remember' | |
| 88 * that fact for the next byte we read. | |
| 89 */ | |
| 90 | |
| 91 /* Figure F.3 */ | |
| 92 B = (byte)((as->next_word >> 24) & 0xFF); | |
| 93 if (B == 0xFF) { | |
| 94 byte B1; | |
| 95 | |
| 96 /* next_word_bytes can only be == 1 here, but let's be defensive. */ | |
| 97 if (as->next_word_bytes <= 1) { | |
| 98 int ret = as->ws->get_next_word(ctx, as->ws, as->offset, &as->next_word); | |
| 99 if (ret < 0) { | |
| 100 as->err = 1; | |
| 101 return jbig2_error(ctx, JBIG2_SEVERITY_WARNING, JBIG2_UNKNOWN_SEGMENT_NUMBER, "failed to check for marker code due to failure in underlying stream during arithmetic decoding"); | |
| 102 } | |
| 103 as->next_word_bytes = (size_t) ret; | |
| 104 | |
| 105 if (as->next_word_bytes == 0) { | |
| 106 jbig2_error(ctx, JBIG2_SEVERITY_WARNING, JBIG2_UNKNOWN_SEGMENT_NUMBER, "failed to read end of possible terminating marker code, assuming terminating marker code"); | |
| 107 as->next_word = 0xFF900000; | |
| 108 as->next_word_bytes = 2; | |
| 109 as->C += 0xFF00; | |
| 110 as->CT = 8; | |
| 111 return 0; | |
| 112 } | |
| 113 | |
| 114 as->offset += as->next_word_bytes; | |
| 115 | |
| 116 B1 = (byte)((as->next_word >> 24) & 0xFF); | |
| 117 if (B1 > 0x8F) { | |
| 118 #ifdef JBIG2_DEBUG_ARITH | |
| 119 fprintf(stderr, "read %02x (aa)\n", B); | |
| 120 #endif | |
| 121 as->CT = 8; | |
| 122 as->next_word = 0xFF000000 | (as->next_word >> 8); | |
| 123 as->next_word_bytes = 2; | |
| 124 as->offset--; | |
| 125 } else { | |
| 126 #ifdef JBIG2_DEBUG_ARITH | |
| 127 fprintf(stderr, "read %02x (a)\n", B); | |
| 128 #endif | |
| 129 as->C += 0xFE00 - (B1 << 9); | |
| 130 as->CT = 7; | |
| 131 } | |
| 132 } else { | |
| 133 B1 = (byte)((as->next_word >> 16) & 0xFF); | |
| 134 if (B1 > 0x8F) { | |
| 135 #ifdef JBIG2_DEBUG_ARITH | |
| 136 fprintf(stderr, "read %02x (ba)\n", B); | |
| 137 #endif | |
| 138 as->CT = 8; | |
| 139 } else { | |
| 140 as->next_word_bytes--; | |
| 141 as->next_word <<= 8; | |
| 142 #ifdef JBIG2_DEBUG_ARITH | |
| 143 fprintf(stderr, "read %02x (b)\n", B); | |
| 144 #endif | |
| 145 | |
| 146 as->C += 0xFE00 - (B1 << 9); | |
| 147 as->CT = 7; | |
| 148 } | |
| 149 } | |
| 150 } else { | |
| 151 #ifdef JBIG2_DEBUG_ARITH | |
| 152 fprintf(stderr, "read %02x\n", B); | |
| 153 #endif | |
| 154 as->next_word <<= 8; | |
| 155 as->next_word_bytes--; | |
| 156 | |
| 157 if (as->next_word_bytes == 0) { | |
| 158 int ret = as->ws->get_next_word(ctx, as->ws, as->offset, &as->next_word); | |
| 159 if (ret < 0) { | |
| 160 as->err = 1; | |
| 161 return jbig2_error(ctx, JBIG2_SEVERITY_WARNING, JBIG2_UNKNOWN_SEGMENT_NUMBER, "failed to read from underlying stream during arithmetic decoding"); | |
| 162 } | |
| 163 as->next_word_bytes = (size_t) ret; | |
| 164 | |
| 165 if (as->next_word_bytes == 0) { | |
| 166 jbig2_error(ctx, JBIG2_SEVERITY_WARNING, JBIG2_UNKNOWN_SEGMENT_NUMBER, "failed to find terminating marker code before end of underlying stream, assuming terminating marker code"); | |
| 167 as->next_word = 0xFF900000; | |
| 168 as->next_word_bytes = 2; | |
| 169 as->C += 0xFF00; | |
| 170 as->CT = 8; | |
| 171 return 0; | |
| 172 } | |
| 173 | |
| 174 as->offset += as->next_word_bytes; | |
| 175 } | |
| 176 | |
| 177 B = (byte)((as->next_word >> 24) & 0xFF); | |
| 178 as->C += 0xFF00 - (B << 8); | |
| 179 as->CT = 8; | |
| 180 } | |
| 181 | |
| 182 return 0; | |
| 183 } | |
| 184 | |
| 185 /** Allocate and initialize a new arithmetic coding state | |
| 186 * the returned pointer can simply be freed; this does | |
| 187 * not affect the associated Jbig2WordStream. | |
| 188 */ | |
| 189 Jbig2ArithState * | |
| 190 jbig2_arith_new(Jbig2Ctx *ctx, Jbig2WordStream *ws) | |
| 191 { | |
| 192 Jbig2ArithState *result; | |
| 193 int ret; | |
| 194 | |
| 195 result = jbig2_new(ctx, Jbig2ArithState, 1); | |
| 196 if (result == NULL) { | |
| 197 jbig2_error(ctx, JBIG2_SEVERITY_FATAL, JBIG2_UNKNOWN_SEGMENT_NUMBER, "failed to allocate arithmetic coding state"); | |
| 198 return NULL; | |
| 199 } | |
| 200 | |
| 201 result->err = 0; | |
| 202 result->ws = ws; | |
| 203 result->offset = 0; | |
| 204 | |
| 205 ret = result->ws->get_next_word(ctx, result->ws, result->offset, &result->next_word); | |
| 206 if (ret < 0) { | |
| 207 jbig2_free(ctx->allocator, result); | |
| 208 jbig2_error(ctx, JBIG2_SEVERITY_WARNING, JBIG2_UNKNOWN_SEGMENT_NUMBER, "failed to initialize underlying stream of arithmetic decoder"); | |
| 209 return NULL; | |
| 210 } | |
| 211 | |
| 212 result->next_word_bytes = (size_t) ret; | |
| 213 if (result->next_word_bytes == 0) { | |
| 214 jbig2_free(ctx->allocator, result); | |
| 215 jbig2_error(ctx, JBIG2_SEVERITY_FATAL, JBIG2_UNKNOWN_SEGMENT_NUMBER, "failed to read first byte from underlying stream when initializing arithmetic decoder"); | |
| 216 return NULL; | |
| 217 } | |
| 218 | |
| 219 result->offset += result->next_word_bytes; | |
| 220 | |
| 221 /* Figure F.1 */ | |
| 222 result->C = (~(result->next_word >> 8)) & 0xFF0000; | |
| 223 | |
| 224 /* Figure E.20 (2) */ | |
| 225 if (jbig2_arith_bytein(ctx, result) < 0) { | |
| 226 jbig2_free(ctx->allocator, result); | |
| 227 jbig2_error(ctx, JBIG2_SEVERITY_WARNING, JBIG2_UNKNOWN_SEGMENT_NUMBER, "failed to read second byte from underlying stream when initializing arithmetic decoder"); | |
| 228 return NULL; | |
| 229 } | |
| 230 | |
| 231 /* Figure E.20 (3) */ | |
| 232 result->C <<= 7; | |
| 233 result->CT -= 7; | |
| 234 result->A = 0x8000; | |
| 235 | |
| 236 return result; | |
| 237 } | |
| 238 | |
| 239 #define MAX_QE_ARRAY_SIZE 47 | |
| 240 | |
| 241 /* could put bit fields in to minimize memory usage */ | |
| 242 typedef struct { | |
| 243 uint16_t Qe; | |
| 244 byte mps_xor; /* mps_xor = index ^ NMPS */ | |
| 245 byte lps_xor; /* lps_xor = index ^ NLPS ^ (SWITCH << 7) */ | |
| 246 } Jbig2ArithQe; | |
| 247 | |
| 248 #define MPS(index, nmps) ((index) ^ (nmps)) | |
| 249 #define LPS(index, nlps, swtch) ((index) ^ (nlps) ^ ((swtch) << 7)) | |
| 250 | |
| 251 static const Jbig2ArithQe jbig2_arith_Qe[MAX_QE_ARRAY_SIZE] = { | |
| 252 {0x5601, MPS(0, 1), LPS(0, 1, 1)}, | |
| 253 {0x3401, MPS(1, 2), LPS(1, 6, 0)}, | |
| 254 {0x1801, MPS(2, 3), LPS(2, 9, 0)}, | |
| 255 {0x0AC1, MPS(3, 4), LPS(3, 12, 0)}, | |
| 256 {0x0521, MPS(4, 5), LPS(4, 29, 0)}, | |
| 257 {0x0221, MPS(5, 38), LPS(5, 33, 0)}, | |
| 258 {0x5601, MPS(6, 7), LPS(6, 6, 1)}, | |
| 259 {0x5401, MPS(7, 8), LPS(7, 14, 0)}, | |
| 260 {0x4801, MPS(8, 9), LPS(8, 14, 0)}, | |
| 261 {0x3801, MPS(9, 10), LPS(9, 14, 0)}, | |
| 262 {0x3001, MPS(10, 11), LPS(10, 17, 0)}, | |
| 263 {0x2401, MPS(11, 12), LPS(11, 18, 0)}, | |
| 264 {0x1C01, MPS(12, 13), LPS(12, 20, 0)}, | |
| 265 {0x1601, MPS(13, 29), LPS(13, 21, 0)}, | |
| 266 {0x5601, MPS(14, 15), LPS(14, 14, 1)}, | |
| 267 {0x5401, MPS(15, 16), LPS(15, 14, 0)}, | |
| 268 {0x5101, MPS(16, 17), LPS(16, 15, 0)}, | |
| 269 {0x4801, MPS(17, 18), LPS(17, 16, 0)}, | |
| 270 {0x3801, MPS(18, 19), LPS(18, 17, 0)}, | |
| 271 {0x3401, MPS(19, 20), LPS(19, 18, 0)}, | |
| 272 {0x3001, MPS(20, 21), LPS(20, 19, 0)}, | |
| 273 {0x2801, MPS(21, 22), LPS(21, 19, 0)}, | |
| 274 {0x2401, MPS(22, 23), LPS(22, 20, 0)}, | |
| 275 {0x2201, MPS(23, 24), LPS(23, 21, 0)}, | |
| 276 {0x1C01, MPS(24, 25), LPS(24, 22, 0)}, | |
| 277 {0x1801, MPS(25, 26), LPS(25, 23, 0)}, | |
| 278 {0x1601, MPS(26, 27), LPS(26, 24, 0)}, | |
| 279 {0x1401, MPS(27, 28), LPS(27, 25, 0)}, | |
| 280 {0x1201, MPS(28, 29), LPS(28, 26, 0)}, | |
| 281 {0x1101, MPS(29, 30), LPS(29, 27, 0)}, | |
| 282 {0x0AC1, MPS(30, 31), LPS(30, 28, 0)}, | |
| 283 {0x09C1, MPS(31, 32), LPS(31, 29, 0)}, | |
| 284 {0x08A1, MPS(32, 33), LPS(32, 30, 0)}, | |
| 285 {0x0521, MPS(33, 34), LPS(33, 31, 0)}, | |
| 286 {0x0441, MPS(34, 35), LPS(34, 32, 0)}, | |
| 287 {0x02A1, MPS(35, 36), LPS(35, 33, 0)}, | |
| 288 {0x0221, MPS(36, 37), LPS(36, 34, 0)}, | |
| 289 {0x0141, MPS(37, 38), LPS(37, 35, 0)}, | |
| 290 {0x0111, MPS(38, 39), LPS(38, 36, 0)}, | |
| 291 {0x0085, MPS(39, 40), LPS(39, 37, 0)}, | |
| 292 {0x0049, MPS(40, 41), LPS(40, 38, 0)}, | |
| 293 {0x0025, MPS(41, 42), LPS(41, 39, 0)}, | |
| 294 {0x0015, MPS(42, 43), LPS(42, 40, 0)}, | |
| 295 {0x0009, MPS(43, 44), LPS(43, 41, 0)}, | |
| 296 {0x0005, MPS(44, 45), LPS(44, 42, 0)}, | |
| 297 {0x0001, MPS(45, 45), LPS(45, 43, 0)}, | |
| 298 {0x5601, MPS(46, 46), LPS(46, 46, 0)} | |
| 299 }; | |
| 300 | |
| 301 static int | |
| 302 jbig2_arith_renormd(Jbig2Ctx *ctx, Jbig2ArithState *as) | |
| 303 { | |
| 304 /* Figure E.18 */ | |
| 305 do { | |
| 306 if (as->CT == 0 && jbig2_arith_bytein(ctx, as) < 0) { | |
| 307 return jbig2_error(ctx, JBIG2_SEVERITY_WARNING, JBIG2_UNKNOWN_SEGMENT_NUMBER, "failed to read byte from compressed data stream"); | |
| 308 } | |
| 309 as->A <<= 1; | |
| 310 as->C <<= 1; | |
| 311 as->CT--; | |
| 312 } while ((as->A & 0x8000) == 0); | |
| 313 | |
| 314 return 0; | |
| 315 } | |
| 316 | |
| 317 int | |
| 318 jbig2_arith_decode(Jbig2Ctx *ctx, Jbig2ArithState *as, Jbig2ArithCx *pcx) | |
| 319 { | |
| 320 Jbig2ArithCx cx = *pcx; | |
| 321 const Jbig2ArithQe *pqe; | |
| 322 unsigned int index = cx & 0x7f; | |
| 323 bool D; | |
| 324 | |
| 325 if (index >= MAX_QE_ARRAY_SIZE) { | |
| 326 return jbig2_error(ctx, JBIG2_SEVERITY_FATAL, JBIG2_UNKNOWN_SEGMENT_NUMBER, "failed to determine probability estimate because index out of range"); | |
| 327 } | |
| 328 | |
| 329 pqe = &jbig2_arith_Qe[index]; | |
| 330 | |
| 331 /* Figure F.2 */ | |
| 332 as->A -= pqe->Qe; | |
| 333 if ((as->C >> 16) < as->A) { | |
| 334 if ((as->A & 0x8000) == 0) { | |
| 335 /* MPS_EXCHANGE, Figure E.16 */ | |
| 336 if (as->A < pqe->Qe) { | |
| 337 D = 1 - (cx >> 7); | |
| 338 *pcx ^= pqe->lps_xor; | |
| 339 } else { | |
| 340 D = cx >> 7; | |
| 341 *pcx ^= pqe->mps_xor; | |
| 342 } | |
| 343 if (jbig2_arith_renormd(ctx, as) < 0) { | |
| 344 return jbig2_error(ctx, JBIG2_SEVERITY_WARNING, JBIG2_UNKNOWN_SEGMENT_NUMBER, "failed to renormalize decoder"); | |
| 345 } | |
| 346 | |
| 347 return D; | |
| 348 } else { | |
| 349 return cx >> 7; | |
| 350 } | |
| 351 } else { | |
| 352 as->C -= (as->A) << 16; | |
| 353 /* LPS_EXCHANGE, Figure E.17 */ | |
| 354 if (as->A < pqe->Qe) { | |
| 355 as->A = pqe->Qe; | |
| 356 D = cx >> 7; | |
| 357 *pcx ^= pqe->mps_xor; | |
| 358 } else { | |
| 359 as->A = pqe->Qe; | |
| 360 D = 1 - (cx >> 7); | |
| 361 *pcx ^= pqe->lps_xor; | |
| 362 } | |
| 363 if (jbig2_arith_renormd(ctx, as) < 0) { | |
| 364 return jbig2_error(ctx, JBIG2_SEVERITY_WARNING, JBIG2_UNKNOWN_SEGMENT_NUMBER, "failed to renormalize decoder"); | |
| 365 } | |
| 366 | |
| 367 return D; | |
| 368 } | |
| 369 } | |
| 370 | |
| 371 #ifdef TEST | |
| 372 | |
| 373 static const byte test_stream[] = { | |
| 374 0x84, 0xC7, 0x3B, 0xFC, 0xE1, 0xA1, 0x43, 0x04, 0x02, 0x20, 0x00, 0x00, | |
| 375 0x41, 0x0D, 0xBB, 0x86, 0xF4, 0x31, 0x7F, 0xFF, 0x88, 0xFF, 0x37, 0x47, | |
| 376 0x1A, 0xDB, 0x6A, 0xDF, 0xFF, 0xAC, | |
| 377 0x00, 0x00 | |
| 378 }; | |
| 379 | |
| 380 #if defined(JBIG2_DEBUG) || defined(JBIG2_DEBUG_ARITH) | |
| 381 static void | |
| 382 jbig2_arith_trace(Jbig2ArithState *as, Jbig2ArithCx cx) | |
| 383 { | |
| 384 fprintf(stderr, "I = %2d, MPS = %d, A = %04x, CT = %2d, C = %08x\n", cx & 0x7f, cx >> 7, as->A, as->CT, as->C); | |
| 385 } | |
| 386 #endif | |
| 387 | |
| 388 static int | |
| 389 test_get_word(Jbig2Ctx *ctx, Jbig2WordStream *self, size_t offset, uint32_t *word) | |
| 390 { | |
| 391 uint32_t val = 0; | |
| 392 int ret = 0; | |
| 393 | |
| 394 if (self == NULL || word == NULL) | |
| 395 return -1; | |
| 396 if (offset >= sizeof (test_stream)) | |
| 397 return 0; | |
| 398 | |
| 399 if (offset < sizeof(test_stream)) { | |
| 400 val |= test_stream[offset] << 24; | |
| 401 ret++; | |
| 402 } | |
| 403 if (offset + 1 < sizeof(test_stream)) { | |
| 404 val |= test_stream[offset + 1] << 16; | |
| 405 ret++; | |
| 406 } | |
| 407 if (offset + 2 < sizeof(test_stream)) { | |
| 408 val |= test_stream[offset + 2] << 8; | |
| 409 ret++; | |
| 410 } | |
| 411 if (offset + 3 < sizeof(test_stream)) { | |
| 412 val |= test_stream[offset + 3]; | |
| 413 ret++; | |
| 414 } | |
| 415 *word = val; | |
| 416 return ret; | |
| 417 } | |
| 418 | |
| 419 int | |
| 420 main(int argc, char **argv) | |
| 421 { | |
| 422 Jbig2Ctx *ctx; | |
| 423 Jbig2WordStream ws; | |
| 424 Jbig2ArithState *as; | |
| 425 int i; | |
| 426 Jbig2ArithCx cx = 0; | |
| 427 | |
| 428 ctx = jbig2_ctx_new(NULL, 0, NULL, NULL, NULL); | |
| 429 | |
| 430 ws.get_next_word = test_get_word; | |
| 431 as = jbig2_arith_new(ctx, &ws); | |
| 432 #ifdef JBIG2_DEBUG_ARITH | |
| 433 jbig2_arith_trace(as, cx); | |
| 434 #endif | |
| 435 | |
| 436 for (i = 0; i < 256; i++) { | |
| 437 #ifdef JBIG2_DEBUG_ARITH | |
| 438 int D = | |
| 439 #else | |
| 440 (void) | |
| 441 #endif | |
| 442 jbig2_arith_decode(ctx, as, &cx); | |
| 443 | |
| 444 #ifdef JBIG2_DEBUG_ARITH | |
| 445 fprintf(stderr, "%3d: D = %d, ", i, D); | |
| 446 jbig2_arith_trace(as, cx); | |
| 447 #endif | |
| 448 } | |
| 449 | |
| 450 jbig2_free(ctx->allocator, as); | |
| 451 | |
| 452 jbig2_ctx_free(ctx); | |
| 453 | |
| 454 return 0; | |
| 455 } | |
| 456 #endif |
