Mercurial > hgrepos > Python2 > PyMuPDF
diff mupdf-source/thirdparty/jbig2dec/jbig2_arith.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mupdf-source/thirdparty/jbig2dec/jbig2_arith.c Mon Sep 15 11:43:07 2025 +0200 @@ -0,0 +1,456 @@ +/* Copyright (C) 2001-2023 Artifex Software, Inc. + All Rights Reserved. + + This software is provided AS-IS with no warranty, either express or + implied. + + This software is distributed under license and may not be copied, + modified or distributed except as expressly authorized under the terms + of the license contained in the file LICENSE in this distribution. + + Refer to licensing information at http://www.artifex.com or contact + Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, + CA 94129, USA, for further information. +*/ + +/* + jbig2dec +*/ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif +#include "os_types.h" + +#include <stdio.h> +#include <stdlib.h> + +#include "jbig2.h" +#include "jbig2_priv.h" +#include "jbig2_arith.h" + +struct _Jbig2ArithState { + uint32_t C; + uint32_t A; + + int CT; + + uint32_t next_word; + size_t next_word_bytes; + int err; + + Jbig2WordStream *ws; + size_t offset; +}; + +/* + Previous versions of this code had a #define to allow + us to choose between using the revised arithmetic decoding + specified in the 'Software Convention' section of the spec. + Back to back tests showed that the 'Software Convention' + version was indeed slightly faster. We therefore enable it + by default. We also strip the option out, because a) it + makes the code harder to read, and b) such things are an + invitation to bitrot. +*/ + +static int +jbig2_arith_bytein(Jbig2Ctx *ctx, Jbig2ArithState *as) +{ + byte B; + + /* Treat both errors and reading beyond end of stream as an error. */ + if (as->err != 0) { + jbig2_error(ctx, JBIG2_SEVERITY_FATAL, JBIG2_UNKNOWN_SEGMENT_NUMBER, "failed to read from underlying stream during arithmetic decoding"); + return -1; + } + if (as->next_word_bytes == 0) { + jbig2_error(ctx, JBIG2_SEVERITY_FATAL, JBIG2_UNKNOWN_SEGMENT_NUMBER, "failed to read beyond end of underlying stream during arithmetic decoding"); + return -1; + } + + /* At this point there is at least one byte in as->next_word. */ + + /* This code confused me no end when I first read it, so a quick note + * to save others (and future me's) from being similarly confused. + * 'next_word' does indeed contain 'next_word_bytes' of valid data + * (always starting at the most significant byte). The confusing + * thing is that the first byte has always already been read. + * i.e. it serves only as an indication that the last byte we read + * was FF or not. + * + * The jbig2 bytestream uses FF bytes, followed by a byte > 0x8F as + * marker bytes. These never occur in normal streams of arithmetic + * encoding, so meeting one terminates the stream (with an infinite + * series of 1 bits). + * + * If we meet an FF byte, we return it as normal. We just 'remember' + * that fact for the next byte we read. + */ + + /* Figure F.3 */ + B = (byte)((as->next_word >> 24) & 0xFF); + if (B == 0xFF) { + byte B1; + + /* next_word_bytes can only be == 1 here, but let's be defensive. */ + if (as->next_word_bytes <= 1) { + int ret = as->ws->get_next_word(ctx, as->ws, as->offset, &as->next_word); + if (ret < 0) { + as->err = 1; + return jbig2_error(ctx, JBIG2_SEVERITY_WARNING, JBIG2_UNKNOWN_SEGMENT_NUMBER, "failed to check for marker code due to failure in underlying stream during arithmetic decoding"); + } + as->next_word_bytes = (size_t) ret; + + if (as->next_word_bytes == 0) { + jbig2_error(ctx, JBIG2_SEVERITY_WARNING, JBIG2_UNKNOWN_SEGMENT_NUMBER, "failed to read end of possible terminating marker code, assuming terminating marker code"); + as->next_word = 0xFF900000; + as->next_word_bytes = 2; + as->C += 0xFF00; + as->CT = 8; + return 0; + } + + as->offset += as->next_word_bytes; + + B1 = (byte)((as->next_word >> 24) & 0xFF); + if (B1 > 0x8F) { +#ifdef JBIG2_DEBUG_ARITH + fprintf(stderr, "read %02x (aa)\n", B); +#endif + as->CT = 8; + as->next_word = 0xFF000000 | (as->next_word >> 8); + as->next_word_bytes = 2; + as->offset--; + } else { +#ifdef JBIG2_DEBUG_ARITH + fprintf(stderr, "read %02x (a)\n", B); +#endif + as->C += 0xFE00 - (B1 << 9); + as->CT = 7; + } + } else { + B1 = (byte)((as->next_word >> 16) & 0xFF); + if (B1 > 0x8F) { +#ifdef JBIG2_DEBUG_ARITH + fprintf(stderr, "read %02x (ba)\n", B); +#endif + as->CT = 8; + } else { + as->next_word_bytes--; + as->next_word <<= 8; +#ifdef JBIG2_DEBUG_ARITH + fprintf(stderr, "read %02x (b)\n", B); +#endif + + as->C += 0xFE00 - (B1 << 9); + as->CT = 7; + } + } + } else { +#ifdef JBIG2_DEBUG_ARITH + fprintf(stderr, "read %02x\n", B); +#endif + as->next_word <<= 8; + as->next_word_bytes--; + + if (as->next_word_bytes == 0) { + int ret = as->ws->get_next_word(ctx, as->ws, as->offset, &as->next_word); + if (ret < 0) { + as->err = 1; + return jbig2_error(ctx, JBIG2_SEVERITY_WARNING, JBIG2_UNKNOWN_SEGMENT_NUMBER, "failed to read from underlying stream during arithmetic decoding"); + } + as->next_word_bytes = (size_t) ret; + + if (as->next_word_bytes == 0) { + jbig2_error(ctx, JBIG2_SEVERITY_WARNING, JBIG2_UNKNOWN_SEGMENT_NUMBER, "failed to find terminating marker code before end of underlying stream, assuming terminating marker code"); + as->next_word = 0xFF900000; + as->next_word_bytes = 2; + as->C += 0xFF00; + as->CT = 8; + return 0; + } + + as->offset += as->next_word_bytes; + } + + B = (byte)((as->next_word >> 24) & 0xFF); + as->C += 0xFF00 - (B << 8); + as->CT = 8; + } + + return 0; +} + +/** Allocate and initialize a new arithmetic coding state + * the returned pointer can simply be freed; this does + * not affect the associated Jbig2WordStream. + */ +Jbig2ArithState * +jbig2_arith_new(Jbig2Ctx *ctx, Jbig2WordStream *ws) +{ + Jbig2ArithState *result; + int ret; + + result = jbig2_new(ctx, Jbig2ArithState, 1); + if (result == NULL) { + jbig2_error(ctx, JBIG2_SEVERITY_FATAL, JBIG2_UNKNOWN_SEGMENT_NUMBER, "failed to allocate arithmetic coding state"); + return NULL; + } + + result->err = 0; + result->ws = ws; + result->offset = 0; + + ret = result->ws->get_next_word(ctx, result->ws, result->offset, &result->next_word); + if (ret < 0) { + jbig2_free(ctx->allocator, result); + jbig2_error(ctx, JBIG2_SEVERITY_WARNING, JBIG2_UNKNOWN_SEGMENT_NUMBER, "failed to initialize underlying stream of arithmetic decoder"); + return NULL; + } + + result->next_word_bytes = (size_t) ret; + if (result->next_word_bytes == 0) { + jbig2_free(ctx->allocator, result); + jbig2_error(ctx, JBIG2_SEVERITY_FATAL, JBIG2_UNKNOWN_SEGMENT_NUMBER, "failed to read first byte from underlying stream when initializing arithmetic decoder"); + return NULL; + } + + result->offset += result->next_word_bytes; + + /* Figure F.1 */ + result->C = (~(result->next_word >> 8)) & 0xFF0000; + + /* Figure E.20 (2) */ + if (jbig2_arith_bytein(ctx, result) < 0) { + jbig2_free(ctx->allocator, result); + jbig2_error(ctx, JBIG2_SEVERITY_WARNING, JBIG2_UNKNOWN_SEGMENT_NUMBER, "failed to read second byte from underlying stream when initializing arithmetic decoder"); + return NULL; + } + + /* Figure E.20 (3) */ + result->C <<= 7; + result->CT -= 7; + result->A = 0x8000; + + return result; +} + +#define MAX_QE_ARRAY_SIZE 47 + +/* could put bit fields in to minimize memory usage */ +typedef struct { + uint16_t Qe; + byte mps_xor; /* mps_xor = index ^ NMPS */ + byte lps_xor; /* lps_xor = index ^ NLPS ^ (SWITCH << 7) */ +} Jbig2ArithQe; + +#define MPS(index, nmps) ((index) ^ (nmps)) +#define LPS(index, nlps, swtch) ((index) ^ (nlps) ^ ((swtch) << 7)) + +static const Jbig2ArithQe jbig2_arith_Qe[MAX_QE_ARRAY_SIZE] = { + {0x5601, MPS(0, 1), LPS(0, 1, 1)}, + {0x3401, MPS(1, 2), LPS(1, 6, 0)}, + {0x1801, MPS(2, 3), LPS(2, 9, 0)}, + {0x0AC1, MPS(3, 4), LPS(3, 12, 0)}, + {0x0521, MPS(4, 5), LPS(4, 29, 0)}, + {0x0221, MPS(5, 38), LPS(5, 33, 0)}, + {0x5601, MPS(6, 7), LPS(6, 6, 1)}, + {0x5401, MPS(7, 8), LPS(7, 14, 0)}, + {0x4801, MPS(8, 9), LPS(8, 14, 0)}, + {0x3801, MPS(9, 10), LPS(9, 14, 0)}, + {0x3001, MPS(10, 11), LPS(10, 17, 0)}, + {0x2401, MPS(11, 12), LPS(11, 18, 0)}, + {0x1C01, MPS(12, 13), LPS(12, 20, 0)}, + {0x1601, MPS(13, 29), LPS(13, 21, 0)}, + {0x5601, MPS(14, 15), LPS(14, 14, 1)}, + {0x5401, MPS(15, 16), LPS(15, 14, 0)}, + {0x5101, MPS(16, 17), LPS(16, 15, 0)}, + {0x4801, MPS(17, 18), LPS(17, 16, 0)}, + {0x3801, MPS(18, 19), LPS(18, 17, 0)}, + {0x3401, MPS(19, 20), LPS(19, 18, 0)}, + {0x3001, MPS(20, 21), LPS(20, 19, 0)}, + {0x2801, MPS(21, 22), LPS(21, 19, 0)}, + {0x2401, MPS(22, 23), LPS(22, 20, 0)}, + {0x2201, MPS(23, 24), LPS(23, 21, 0)}, + {0x1C01, MPS(24, 25), LPS(24, 22, 0)}, + {0x1801, MPS(25, 26), LPS(25, 23, 0)}, + {0x1601, MPS(26, 27), LPS(26, 24, 0)}, + {0x1401, MPS(27, 28), LPS(27, 25, 0)}, + {0x1201, MPS(28, 29), LPS(28, 26, 0)}, + {0x1101, MPS(29, 30), LPS(29, 27, 0)}, + {0x0AC1, MPS(30, 31), LPS(30, 28, 0)}, + {0x09C1, MPS(31, 32), LPS(31, 29, 0)}, + {0x08A1, MPS(32, 33), LPS(32, 30, 0)}, + {0x0521, MPS(33, 34), LPS(33, 31, 0)}, + {0x0441, MPS(34, 35), LPS(34, 32, 0)}, + {0x02A1, MPS(35, 36), LPS(35, 33, 0)}, + {0x0221, MPS(36, 37), LPS(36, 34, 0)}, + {0x0141, MPS(37, 38), LPS(37, 35, 0)}, + {0x0111, MPS(38, 39), LPS(38, 36, 0)}, + {0x0085, MPS(39, 40), LPS(39, 37, 0)}, + {0x0049, MPS(40, 41), LPS(40, 38, 0)}, + {0x0025, MPS(41, 42), LPS(41, 39, 0)}, + {0x0015, MPS(42, 43), LPS(42, 40, 0)}, + {0x0009, MPS(43, 44), LPS(43, 41, 0)}, + {0x0005, MPS(44, 45), LPS(44, 42, 0)}, + {0x0001, MPS(45, 45), LPS(45, 43, 0)}, + {0x5601, MPS(46, 46), LPS(46, 46, 0)} +}; + +static int +jbig2_arith_renormd(Jbig2Ctx *ctx, Jbig2ArithState *as) +{ + /* Figure E.18 */ + do { + if (as->CT == 0 && jbig2_arith_bytein(ctx, as) < 0) { + return jbig2_error(ctx, JBIG2_SEVERITY_WARNING, JBIG2_UNKNOWN_SEGMENT_NUMBER, "failed to read byte from compressed data stream"); + } + as->A <<= 1; + as->C <<= 1; + as->CT--; + } while ((as->A & 0x8000) == 0); + + return 0; +} + +int +jbig2_arith_decode(Jbig2Ctx *ctx, Jbig2ArithState *as, Jbig2ArithCx *pcx) +{ + Jbig2ArithCx cx = *pcx; + const Jbig2ArithQe *pqe; + unsigned int index = cx & 0x7f; + bool D; + + if (index >= MAX_QE_ARRAY_SIZE) { + return jbig2_error(ctx, JBIG2_SEVERITY_FATAL, JBIG2_UNKNOWN_SEGMENT_NUMBER, "failed to determine probability estimate because index out of range"); + } + + pqe = &jbig2_arith_Qe[index]; + + /* Figure F.2 */ + as->A -= pqe->Qe; + if ((as->C >> 16) < as->A) { + if ((as->A & 0x8000) == 0) { + /* MPS_EXCHANGE, Figure E.16 */ + if (as->A < pqe->Qe) { + D = 1 - (cx >> 7); + *pcx ^= pqe->lps_xor; + } else { + D = cx >> 7; + *pcx ^= pqe->mps_xor; + } + if (jbig2_arith_renormd(ctx, as) < 0) { + return jbig2_error(ctx, JBIG2_SEVERITY_WARNING, JBIG2_UNKNOWN_SEGMENT_NUMBER, "failed to renormalize decoder"); + } + + return D; + } else { + return cx >> 7; + } + } else { + as->C -= (as->A) << 16; + /* LPS_EXCHANGE, Figure E.17 */ + if (as->A < pqe->Qe) { + as->A = pqe->Qe; + D = cx >> 7; + *pcx ^= pqe->mps_xor; + } else { + as->A = pqe->Qe; + D = 1 - (cx >> 7); + *pcx ^= pqe->lps_xor; + } + if (jbig2_arith_renormd(ctx, as) < 0) { + return jbig2_error(ctx, JBIG2_SEVERITY_WARNING, JBIG2_UNKNOWN_SEGMENT_NUMBER, "failed to renormalize decoder"); + } + + return D; + } +} + +#ifdef TEST + +static const byte test_stream[] = { + 0x84, 0xC7, 0x3B, 0xFC, 0xE1, 0xA1, 0x43, 0x04, 0x02, 0x20, 0x00, 0x00, + 0x41, 0x0D, 0xBB, 0x86, 0xF4, 0x31, 0x7F, 0xFF, 0x88, 0xFF, 0x37, 0x47, + 0x1A, 0xDB, 0x6A, 0xDF, 0xFF, 0xAC, + 0x00, 0x00 +}; + +#if defined(JBIG2_DEBUG) || defined(JBIG2_DEBUG_ARITH) +static void +jbig2_arith_trace(Jbig2ArithState *as, Jbig2ArithCx cx) +{ + fprintf(stderr, "I = %2d, MPS = %d, A = %04x, CT = %2d, C = %08x\n", cx & 0x7f, cx >> 7, as->A, as->CT, as->C); +} +#endif + +static int +test_get_word(Jbig2Ctx *ctx, Jbig2WordStream *self, size_t offset, uint32_t *word) +{ + uint32_t val = 0; + int ret = 0; + + if (self == NULL || word == NULL) + return -1; + if (offset >= sizeof (test_stream)) + return 0; + + if (offset < sizeof(test_stream)) { + val |= test_stream[offset] << 24; + ret++; + } + if (offset + 1 < sizeof(test_stream)) { + val |= test_stream[offset + 1] << 16; + ret++; + } + if (offset + 2 < sizeof(test_stream)) { + val |= test_stream[offset + 2] << 8; + ret++; + } + if (offset + 3 < sizeof(test_stream)) { + val |= test_stream[offset + 3]; + ret++; + } + *word = val; + return ret; +} + +int +main(int argc, char **argv) +{ + Jbig2Ctx *ctx; + Jbig2WordStream ws; + Jbig2ArithState *as; + int i; + Jbig2ArithCx cx = 0; + + ctx = jbig2_ctx_new(NULL, 0, NULL, NULL, NULL); + + ws.get_next_word = test_get_word; + as = jbig2_arith_new(ctx, &ws); +#ifdef JBIG2_DEBUG_ARITH + jbig2_arith_trace(as, cx); +#endif + + for (i = 0; i < 256; i++) { +#ifdef JBIG2_DEBUG_ARITH + int D = +#else + (void) +#endif + jbig2_arith_decode(ctx, as, &cx); + +#ifdef JBIG2_DEBUG_ARITH + fprintf(stderr, "%3d: D = %d, ", i, D); + jbig2_arith_trace(as, cx); +#endif + } + + jbig2_free(ctx->allocator, as); + + jbig2_ctx_free(ctx); + + return 0; +} +#endif
