comparison mupdf-source/thirdparty/zint/backend/tif_lzw.h @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 /* tif_lzw.h - LZW compression for TIFF */
2 /*
3 libzint - the open source barcode library
4 Copyright (C) 2021-2024 Robin Stuart <rstuart114@gmail.com>
5
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions
8 are met:
9
10 1. Redistributions of source code must retain the above copyright
11 notice, this list of conditions and the following disclaimer.
12 2. Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
15 3. Neither the name of the project nor the names of its contributors
16 may be used to endorse or promote products derived from this software
17 without specific prior written permission.
18
19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
23 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 SUCH DAMAGE.
30 */
31 /* SPDX-License-Identifier: BSD-3-Clause */
32
33 #ifndef Z_TIF_LZW_H
34 #define Z_TIF_LZW_H
35
36 #ifdef __cplusplus
37 extern "C" {
38 #endif /* __cplusplus */
39
40 /*
41 * Adapted from TIFF Library 4.2.0 libtiff/tif_lzw.c */
42 /*
43 * Copyright (c) 1988-1997 Sam Leffler
44 * Copyright (c) 1991-1997 Silicon Graphics, Inc.
45 *
46 * Permission to use, copy, modify, distribute, and sell this software and
47 * its documentation for any purpose is hereby granted without fee, provided
48 * that (i) the above copyright notices and this permission notice appear in
49 * all copies of the software and related documentation, and (ii) the names of
50 * Sam Leffler and Silicon Graphics may not be used in any advertising or
51 * publicity relating to the software without the specific, prior written
52 * permission of Sam Leffler and Silicon Graphics.
53 *
54 * THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND,
55 * EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY
56 * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
57 *
58 * IN NO EVENT SHALL SAM LEFFLER OR SILICON GRAPHICS BE LIABLE FOR
59 * ANY SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND,
60 * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
61 * WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY OF
62 * LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
63 * OF THIS SOFTWARE.
64 */
65 /*
66 * TIFF Library.
67 * Rev 5.0 Lempel-Ziv & Welch Compression Support
68 *
69 * This code is derived from the compress program whose code is
70 * derived from software contributed to Berkeley by James A. Woods,
71 * derived from original work by Spencer Thomas and Joseph Orost.
72 *
73 * The original Berkeley copyright notice appears below in its entirety.
74 */
75 /*
76 * Copyright (c) 1985, 1986 The Regents of the University of California.
77 * All rights reserved.
78 *
79 * This code is derived from software contributed to Berkeley by
80 * James A. Woods, derived from original work by Spencer Thomas
81 * and Joseph Orost.
82 *
83 * Redistribution and use in source and binary forms are permitted
84 * provided that the above copyright notice and this paragraph are
85 * duplicated in all such forms and that any documentation,
86 * advertising materials, and other materials related to such
87 * distribution and use acknowledge that the software was developed
88 * by the University of California, Berkeley. The name of the
89 * University may not be used to endorse or promote products derived
90 * from this software without specific prior written permission.
91 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
92 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
93 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
94 */
95
96 #define MAXCODE(n) ((1L << (n)) - 1)
97
98 /*
99 * The TIFF spec specifies that encoded bit
100 * strings range from 9 to 12 bits.
101 */
102 #define BITS_MIN 9 /* start with 9 bits */
103 #define BITS_MAX 12 /* max of 12 bit strings */
104 /* predefined codes */
105 #define CODE_CLEAR 256 /* code to clear string table */
106 #define CODE_EOI 257 /* end-of-information code */
107 #define CODE_FIRST 258 /* first free code entry */
108 #define CODE_MAX MAXCODE(BITS_MAX)
109 #define HSIZE 9001L /* 91% occupancy */
110 #define HSHIFT (13 - 8)
111
112 /*
113 * Encoding-specific state.
114 */
115 typedef uint16_t tif_lzw_hcode; /* codes fit in 16 bits */
116 typedef struct {
117 long hash;
118 tif_lzw_hcode code;
119 } tif_lzw_hash;
120
121 #define CHECK_GAP 10000 /* ratio check interval */
122
123 /*
124 * State block.
125 */
126 typedef struct {
127 tif_lzw_hash *enc_hashtab; /* kept separate for small machines */
128 } tif_lzw_state;
129
130 /*
131 * LZW Encoding.
132 */
133
134 /*
135 * Reset encoding hash table.
136 */
137 static void tif_lzw_cl_hash(tif_lzw_state *sp) {
138 register tif_lzw_hash *hp = &sp->enc_hashtab[HSIZE - 1];
139 register long i = HSIZE - 8;
140
141 do {
142 i -= 8;
143 hp[-7].hash = -1;
144 hp[-6].hash = -1;
145 hp[-5].hash = -1;
146 hp[-4].hash = -1;
147 hp[-3].hash = -1;
148 hp[-2].hash = -1;
149 hp[-1].hash = -1;
150 hp[ 0].hash = -1;
151 hp -= 8;
152 } while (i >= 0);
153
154 for (i += 8; i > 0; i--, hp--) {
155 hp->hash = -1;
156 }
157 }
158
159 #define CALCRATIO(sp, rat) { \
160 if (incount > 0x007fffff) { /* NB: shift will overflow */ \
161 rat = outcount >> 8; \
162 rat = (rat == 0 ? 0x7fffffff : incount / rat); \
163 } else \
164 rat = (incount << 8) / outcount; \
165 }
166
167 /* Explicit 0xff masking to make icc -check=conversions happy */
168 #define PutNextCode(op_fmp, c) { \
169 nextdata = (nextdata << nbits) | c; \
170 nextbits += nbits; \
171 fm_putc((nextdata >> (nextbits - 8)) & 0xff, op_fmp); \
172 nextbits -= 8; \
173 if (nextbits >= 8) { \
174 fm_putc((nextdata >> (nextbits - 8)) & 0xff, op_fmp); \
175 nextbits -= 8; \
176 } \
177 outcount += nbits; \
178 }
179
180 /*
181 * Encode a chunk of pixels.
182 *
183 * Uses an open addressing double hashing (no chaining) on the
184 * prefix code/next character combination. We do a variant of
185 * Knuth's algorithm D (vol. 3, sec. 6.4) along with G. Knott's
186 * relatively-prime secondary probe. Here, the modular division
187 * first probe is gives way to a faster exclusive-or manipulation.
188 * Also do block compression with an adaptive reset, whereby the
189 * code table is cleared when the compression ratio decreases,
190 * but after the table fills. The variable-length output codes
191 * are re-sized at this point, and a CODE_CLEAR is generated
192 * for the decoder.
193 */
194 static int tif_lzw_encode(tif_lzw_state *sp, struct filemem *op_fmp, const unsigned char *bp, int cc) {
195 register long fcode;
196 register tif_lzw_hash *hp;
197 register int h, c;
198 tif_lzw_hcode ent;
199 long disp;
200
201 int nbits; /* # of bits/code */
202 int maxcode; /* maximum code for nbits */
203 int free_ent; /* next free entry in hash table */
204 unsigned long nextdata; /* next bits of i/o */
205 long nextbits; /* # of valid bits in nextdata */
206 long checkpoint; /* point at which to clear table */
207 long ratio; /* current compression ratio */
208 long incount; /* (input) data bytes encoded */
209 long outcount; /* encoded (output) bytes */
210
211 /*
212 * Reset encoding state at the start of a strip.
213 */
214 if (sp->enc_hashtab == NULL) {
215 sp->enc_hashtab = (tif_lzw_hash *) malloc(HSIZE * sizeof(tif_lzw_hash));
216 if (sp->enc_hashtab == NULL) {
217 return 0;
218 }
219 }
220
221 tif_lzw_cl_hash(sp); /* clear hash table */
222
223 nbits = BITS_MIN;
224 maxcode = MAXCODE(BITS_MIN);
225 free_ent = CODE_FIRST;
226 nextdata = 0;
227 nextbits = 0;
228 checkpoint = CHECK_GAP;
229 ratio = 0;
230 incount = 0;
231 outcount = 0;
232
233 ent = (tif_lzw_hcode) -1;
234
235 if (cc > 0) {
236 PutNextCode(op_fmp, CODE_CLEAR);
237 ent = *bp++; cc--; incount++;
238 }
239 while (cc > 0) {
240 c = *bp++; cc--; incount++;
241 fcode = ((long)c << BITS_MAX) + ent;
242 h = (c << HSHIFT) ^ ent; /* xor hashing */
243 #ifdef _WINDOWS
244 /*
245 * Check hash index for an overflow.
246 */
247 if (h >= HSIZE) {
248 h -= HSIZE;
249 }
250 #endif
251 hp = &sp->enc_hashtab[h];
252 if (hp->hash == fcode) {
253 ent = hp->code;
254 continue;
255 }
256 if (hp->hash >= 0) {
257 /*
258 * Primary hash failed, check secondary hash.
259 */
260 disp = HSIZE - h;
261 if (h == 0) {
262 disp = 1;
263 }
264 do {
265 /*
266 * Avoid pointer arithmetic because of
267 * wraparound problems with segments.
268 */
269 if ((h -= disp) < 0) {
270 h += HSIZE;
271 }
272 hp = &sp->enc_hashtab[h];
273 if (hp->hash == fcode) {
274 ent = hp->code;
275 goto hit;
276 }
277 } while (hp->hash >= 0);
278 }
279 /*
280 * New entry, emit code and add to table.
281 */
282 PutNextCode(op_fmp, ent);
283 ent = (tif_lzw_hcode) c;
284 hp->code = (tif_lzw_hcode) (free_ent++);
285 hp->hash = fcode;
286 if (free_ent == CODE_MAX - 1) {
287 /* table is full, emit clear code and reset */
288 tif_lzw_cl_hash(sp);
289 ratio = 0;
290 incount = 0;
291 outcount = 0;
292 free_ent = CODE_FIRST;
293 PutNextCode(op_fmp, CODE_CLEAR);
294 nbits = BITS_MIN;
295 maxcode = MAXCODE(BITS_MIN);
296 } else {
297 /*
298 * If the next entry is going to be too big for
299 * the code size, then increase it, if possible.
300 */
301 if (free_ent > maxcode) {
302 nbits++;
303 assert(nbits <= BITS_MAX);
304 maxcode = (int) MAXCODE(nbits);
305 } else if (incount >= checkpoint) {
306 long rat;
307 /*
308 * Check compression ratio and, if things seem
309 * to be slipping, clear the hash table and
310 * reset state. The compression ratio is a
311 * 24+8-bit fractional number.
312 */
313 checkpoint = incount + CHECK_GAP;
314 CALCRATIO(sp, rat);
315 if (rat <= ratio) {
316 tif_lzw_cl_hash(sp);
317 ratio = 0;
318 incount = 0;
319 outcount = 0;
320 free_ent = CODE_FIRST;
321 PutNextCode(op_fmp, CODE_CLEAR);
322 nbits = BITS_MIN;
323 maxcode = MAXCODE(BITS_MIN);
324 } else {
325 ratio = rat;
326 }
327 }
328 }
329 hit:
330 ;
331 }
332
333 /*
334 * Finish off an encoded strip by flushing the last
335 * string and tacking on an End Of Information code.
336 */
337 if (ent != (tif_lzw_hcode) -1) {
338
339 PutNextCode(op_fmp, ent);
340 free_ent++;
341
342 if (free_ent == CODE_MAX - 1) {
343 /* table is full, emit clear code and reset */
344 outcount = 0;
345 PutNextCode(op_fmp, CODE_CLEAR);
346 nbits = BITS_MIN;
347 } else {
348 /*
349 * If the next entry is going to be too big for
350 * the code size, then increase it, if possible.
351 */
352 if (free_ent > maxcode) {
353 nbits++;
354 assert(nbits <= BITS_MAX);
355 }
356 }
357 }
358 PutNextCode(op_fmp, CODE_EOI);
359 /* Explicit 0xff masking to make icc -check=conversions happy */
360 if (nextbits > 0) {
361 fm_putc((nextdata << (8 - nextbits)) & 0xff, op_fmp);
362 }
363
364 return 1;
365 }
366
367 static void tif_lzw_cleanup(tif_lzw_state *sp) {
368 if (sp->enc_hashtab) {
369 free(sp->enc_hashtab);
370 }
371 }
372
373 static void tif_lzw_init(tif_lzw_state *sp) {
374 sp->enc_hashtab = NULL;
375 }
376
377 #ifdef __cplusplus
378 }
379 #endif /* __cplusplus */
380
381 /* vim: set ts=4 sw=4 et : */
382 #endif /* Z_TIF_LZW_H */