comparison mupdf-source/thirdparty/leptonica/src/tiffio.c @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 /*====================================================================*
2 - Copyright (C) 2001 Leptonica. All rights reserved.
3 -
4 - Redistribution and use in source and binary forms, with or without
5 - modification, are permitted provided that the following conditions
6 - are met:
7 - 1. Redistributions of source code must retain the above copyright
8 - notice, this list of conditions and the following disclaimer.
9 - 2. Redistributions in binary form must reproduce the above
10 - copyright notice, this list of conditions and the following
11 - disclaimer in the documentation and/or other materials
12 - provided with the distribution.
13 -
14 - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15 - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16 - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17 - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY
18 - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *====================================================================*/
26
27 /*!
28 * \file tiffio.c
29 * <pre>
30 *
31 * TIFFClientOpen() wrappers for FILE*:
32 * static tsize_t lept_read_proc()
33 * static tsize_t lept_write_proc()
34 * static toff_t lept_seek_proc()
35 * static int lept_close_proc()
36 * static toff_t lept_size_proc()
37 *
38 * Reading tiff:
39 * PIX *pixReadTiff() [ special top level ]
40 * PIX *pixReadStreamTiff()
41 * static PIX *pixReadFromTiffStream()
42 *
43 * Writing tiff:
44 * l_int32 pixWriteTiff() [ special top level ]
45 * l_int32 pixWriteTiffCustom() [ special top level ]
46 * l_int32 pixWriteStreamTiff()
47 * l_int32 pixWriteStreamTiffWA()
48 * static l_int32 pixWriteToTiffStream()
49 * static l_int32 writeCustomTiffTags()
50 *
51 * Reading and writing multipage tiff
52 * PIX *pixReadFromMultipageTiff()
53 * PIXA *pixaReadMultipageTiff() [ special top level ]
54 * l_int32 pixaWriteMultipageTiff() [ special top level ]
55 * l_int32 writeMultipageTiff() [ special top level ]
56 * l_int32 writeMultipageTiffSA()
57 *
58 * Information about tiff file
59 * l_int32 fprintTiffInfo()
60 * l_int32 tiffGetCount()
61 * l_int32 getTiffResolution()
62 * static l_int32 getTiffStreamResolution()
63 * l_int32 readHeaderTiff()
64 * l_int32 freadHeaderTiff()
65 * l_int32 readHeaderMemTiff()
66 * static l_int32 tiffReadHeaderTiff()
67 * l_int32 findTiffCompression()
68 * static l_int32 getTiffCompressedFormat()
69 *
70 * Extraction of tiff g4 data:
71 * l_int32 extractG4DataFromFile()
72 *
73 * Open tiff stream from file stream
74 * static TIFF *fopenTiff()
75 *
76 * Wrapper for TIFFOpen:
77 * static TIFF *openTiff()
78 *
79 * Memory I/O: reading memory --> pix and writing pix --> memory
80 * Ten static low-level memstream functions
81 * static L_MEMSTREAM *memstreamCreateForRead()
82 * static L_MEMSTREAM *memstreamCreateForWrite()
83 * static tsize_t tiffReadCallback()
84 * static tsize_t tiffWriteCallback()
85 * static toff_t tiffSeekCallback()
86 * static l_int32 tiffCloseCallback()
87 * static toff_t tiffSizeCallback()
88 * static l_int32 tiffMapCallback()
89 * static void tiffUnmapCallback()
90 * static TIFF *fopenTiffMemstream()
91 *
92 * PIX *pixReadMemTiff();
93 * PIX *pixReadMemFromMultipageTiff();
94 * PIXA *pixaReadMemMultipageTiff() [ special top level ]
95 * l_int32 pixaWriteMemMultipageTiff() [ special top level ]
96 * l_int32 pixWriteMemTiff();
97 * l_int32 pixWriteMemTiffCustom();
98 *
99 * Note 1: To include all necessary functions, use libtiff version 3.7.4
100 * (from 2005) or later.
101 * Note 2: What compression methods in tiff are supported?
102 * * We support most methods that are fully implemented in the
103 * tiff library, such as G3, G4, RLE and LZW.
104 * * The exception is the old-style jpeg tiff format (OJPEG), which
105 * is not supported.
106 * * We support two formats requiring external libraries: ZIP and JPEG
107 * All computers should have the zip library.
108 * * At present we do not support WEBP in tiff, which uses
109 * libwebp and was added in tifflib 4.1.0 in 2019.
110 * Note 3: We set the pad bits to 0 before writing in pixWriteToTiffStream().
111 * Although they don't affect the raster image after decompression,
112 * it is sometimes convenient to use a golden file with a
113 * byte-by-byte check to verify invariance. The issue came up
114 * on Windows for 2 and 4 bpp images.
115 * </pre>
116 */
117
118 #ifdef HAVE_CONFIG_H
119 #include <config_auto.h>
120 #endif /* HAVE_CONFIG_H */
121
122 #include <string.h>
123 #include <math.h> /* for isnan */
124 #include <sys/types.h>
125 #ifndef _MSC_VER
126 #include <unistd.h>
127 #else /* _MSC_VER */
128 #include <io.h>
129 #endif /* _MSC_VER */
130 #include <fcntl.h>
131 #include "allheaders.h"
132
133 /* ---------------------------------------------------------*/
134 #if HAVE_LIBTIFF && HAVE_LIBJPEG /* defined in environ.h */
135 /* ---------------------------------------------------------*/
136
137 #include "tiff.h"
138 #include "tiffio.h"
139
140 static const l_int32 DefaultResolution = 300; /* ppi */
141 static const l_int32 ManyPagesInTiffFile = 3000; /* warn if big */
142
143 /* Verified that tiflib makes valid g4 files of this size */
144 static const l_int32 MaxTiffWidth = 1 << 20; /* 1M pixels */
145 static const l_int32 MaxTiffHeight = 1 << 20; /* 1M pixels */
146
147 /* Check g4 data size */
148 static const size_t MaxNumTiffBytes = (1 << 28) - 1; /* 256 MB */
149
150 /* All functions with TIFF interfaces are static. */
151 static PIX *pixReadFromTiffStream(TIFF *tif);
152 static l_int32 getTiffStreamResolution(TIFF *tif, l_int32 *pxres,
153 l_int32 *pyres);
154 static l_int32 tiffReadHeaderTiff(TIFF *tif, l_int32 *pwidth,
155 l_int32 *pheight, l_int32 *pbps,
156 l_int32 *pspp, l_int32 *pres,
157 l_int32 *pcmap, l_int32 *pformat);
158 static l_int32 writeCustomTiffTags(TIFF *tif, NUMA *natags,
159 SARRAY *savals, SARRAY *satypes,
160 NUMA *nasizes);
161 static l_int32 pixWriteToTiffStream(TIFF *tif, PIX *pix, l_int32 comptype,
162 NUMA *natags, SARRAY *savals,
163 SARRAY *satypes, NUMA *nasizes);
164 static TIFF *fopenTiff(FILE *fp, const char *modestring);
165 static TIFF *openTiff(const char *filename, const char *modestring);
166
167 /* Static helper for tiff compression type */
168 static l_int32 getTiffCompressedFormat(l_uint16 tiffcomp);
169
170 /* Static function for memory I/O */
171 static TIFF *fopenTiffMemstream(const char *filename, const char *operation,
172 l_uint8 **pdata, size_t *pdatasize);
173
174 /* This structure defines a transform to be performed on a TIFF image
175 * (note that the same transformation can be represented in
176 * several different ways using this structure since
177 * vflip + hflip + counterclockwise == clockwise). */
178 struct tiff_transform {
179 int vflip; /* if non-zero, image needs a vertical fip */
180 int hflip; /* if non-zero, image needs a horizontal flip */
181 int rotate; /* -1 -> counterclockwise 90-degree rotation,
182 0 -> no rotation
183 1 -> clockwise 90-degree rotation */
184 };
185
186 /* This describes the transformations needed for a given orientation
187 * tag. The tag values start at 1, so you need to subtract 1 to get a
188 * valid index into this array. It is only valid when not using
189 * TIFFReadRGBAImageOriented(). */
190 static struct tiff_transform tiff_orientation_transforms[] = {
191 {0, 0, 0},
192 {0, 1, 0},
193 {1, 1, 0},
194 {1, 0, 0},
195 {0, 1, -1},
196 {0, 0, 1},
197 {0, 1, 1},
198 {0, 0, -1}
199 };
200
201 /* Same as above, except that test transformations are only valid
202 * when using TIFFReadRGBAImageOriented(). Transformations
203 * were determined empirically. See the libtiff mailing list for
204 * more discussion: http://www.asmail.be/msg0054683875.html */
205 static struct tiff_transform tiff_partial_orientation_transforms[] = {
206 {0, 0, 0},
207 {0, 0, 0},
208 {0, 0, 0},
209 {0, 0, 0},
210 {0, 1, -1},
211 {0, 1, 1},
212 {1, 0, 1},
213 {0, 1, -1}
214 };
215
216
217 /*-----------------------------------------------------------------------*
218 * TIFFClientOpen() wrappers for FILE* *
219 * Provided by Jürgen Buchmüller *
220 * *
221 * We previously used TIFFFdOpen(), which used low-level file *
222 * descriptors. It had portability issues with Windows, along *
223 * with other limitations from lack of stream control operations. *
224 * These callbacks to TIFFClientOpen() avoid the problems. *
225 * *
226 * Jürgen made the functions use 64 bit file operations where possible *
227 * or required, namely for seek and size. On Windows there are specific *
228 * _fseeki64() and _ftelli64() functions. On unix it is common to look *
229 * for a macro _LARGEFILE64_SOURCE being defined, which makes available *
230 * the off64_t type, and to use fseeko() and ftello() in this case. *
231 *-----------------------------------------------------------------------*/
232 static tsize_t
233 lept_read_proc(thandle_t cookie,
234 tdata_t buff,
235 tsize_t size)
236 {
237 FILE* fp = (FILE *)cookie;
238 tsize_t done;
239 if (!buff || !cookie || !fp)
240 return (tsize_t)-1;
241 done = fread(buff, 1, size, fp);
242 return done;
243 }
244
245 static tsize_t
246 lept_write_proc(thandle_t cookie,
247 tdata_t buff,
248 tsize_t size)
249 {
250 FILE* fp = (FILE *)cookie;
251 tsize_t done;
252 if (!buff || !cookie || !fp)
253 return (tsize_t)-1;
254 done = fwrite(buff, 1, size, fp);
255 return done;
256 }
257
258 static toff_t
259 lept_seek_proc(thandle_t cookie,
260 toff_t offs,
261 int whence)
262 {
263 FILE* fp = (FILE *)cookie;
264 #if defined(_MSC_VER)
265 __int64 pos = 0;
266 if (!cookie || !fp)
267 return (tsize_t)-1;
268 switch (whence) {
269 case SEEK_SET:
270 pos = 0;
271 break;
272 case SEEK_CUR:
273 pos = ftell(fp);
274 break;
275 case SEEK_END:
276 _fseeki64(fp, 0, SEEK_END);
277 pos = _ftelli64(fp);
278 break;
279 }
280 pos = (__int64)(pos + offs);
281 _fseeki64(fp, pos, SEEK_SET);
282 if (pos == _ftelli64(fp))
283 return (tsize_t)pos;
284 #elif defined(_LARGEFILE64_SOURCE)
285 off64_t pos = 0;
286 if (!cookie || !fp)
287 return (tsize_t)-1;
288 switch (whence) {
289 case SEEK_SET:
290 pos = 0;
291 break;
292 case SEEK_CUR:
293 pos = ftello(fp);
294 break;
295 case SEEK_END:
296 fseeko(fp, 0, SEEK_END);
297 pos = ftello(fp);
298 break;
299 }
300 pos = (off64_t)(pos + offs);
301 fseeko(fp, pos, SEEK_SET);
302 if (pos == ftello(fp))
303 return (tsize_t)pos;
304 #else
305 off_t pos = 0;
306 if (!cookie || !fp)
307 return (tsize_t)-1;
308 switch (whence) {
309 case SEEK_SET:
310 pos = 0;
311 break;
312 case SEEK_CUR:
313 pos = ftell(fp);
314 break;
315 case SEEK_END:
316 fseek(fp, 0, SEEK_END);
317 pos = ftell(fp);
318 break;
319 }
320 pos = (off_t)(pos + offs);
321 fseek(fp, pos, SEEK_SET);
322 if (pos == ftell(fp))
323 return (tsize_t)pos;
324 #endif
325 return (tsize_t)-1;
326 }
327
328 static int
329 lept_close_proc(thandle_t cookie)
330 {
331 FILE* fp = (FILE *)cookie;
332 if (!cookie || !fp)
333 return 0;
334 fseek(fp, 0, SEEK_SET);
335 return 0;
336 }
337
338 static toff_t
339 lept_size_proc(thandle_t cookie)
340 {
341 FILE* fp = (FILE *)cookie;
342 #if defined(_MSC_VER)
343 __int64 pos;
344 __int64 size;
345 if (!cookie || !fp)
346 return (tsize_t)-1;
347 pos = _ftelli64(fp);
348 _fseeki64(fp, 0, SEEK_END);
349 size = _ftelli64(fp);
350 _fseeki64(fp, pos, SEEK_SET);
351 #elif defined(_LARGEFILE64_SOURCE)
352 off64_t pos;
353 off64_t size;
354 if (!fp)
355 return (tsize_t)-1;
356 pos = ftello(fp);
357 fseeko(fp, 0, SEEK_END);
358 size = ftello(fp);
359 fseeko(fp, pos, SEEK_SET);
360 #else
361 off_t pos;
362 off_t size;
363 if (!cookie || !fp)
364 return (tsize_t)-1;
365 pos = ftell(fp);
366 fseek(fp, 0, SEEK_END);
367 size = ftell(fp);
368 fseek(fp, pos, SEEK_SET);
369 #endif
370 return (toff_t)size;
371 }
372
373
374 /*--------------------------------------------------------------*
375 * Reading from file *
376 *--------------------------------------------------------------*/
377 /*!
378 * \brief pixReadTiff()
379 *
380 * \param[in] filename
381 * \param[in] n page number 0 based
382 * \return pix, or NULL on error
383 *
384 * <pre>
385 * Notes:
386 * (1) This is a version of pixRead(), specialized for tiff
387 * files, that allows specification of the page to be returned
388 * (2) No warning messages on failure, because of how multi-page
389 * TIFF reading works. You are supposed to keep trying until
390 * it stops working.
391 * </pre>
392 */
393 PIX *
394 pixReadTiff(const char *filename,
395 l_int32 n)
396 {
397 FILE *fp;
398 PIX *pix;
399
400 if (!filename)
401 return (PIX *)ERROR_PTR("filename not defined", __func__, NULL);
402
403 if ((fp = fopenReadStream(filename)) == NULL)
404 return (PIX *)ERROR_PTR_1("image file not found",
405 filename, __func__, NULL);
406 pix = pixReadStreamTiff(fp, n);
407 fclose(fp);
408 return pix;
409 }
410
411
412 /*--------------------------------------------------------------*
413 * Reading from stream *
414 *--------------------------------------------------------------*/
415 /*!
416 * \brief pixReadStreamTiff()
417 *
418 * \param[in] fp file stream
419 * \param[in] n page number: 0 based
420 * \return pix, or NULL on error or if there are no more images in the file
421 *
422 * <pre>
423 * Notes:
424 * (1) No warning messages on failure, because of how multi-page
425 * TIFF reading works. You are supposed to keep trying until
426 * it stops working.
427 * </pre>
428 */
429 PIX *
430 pixReadStreamTiff(FILE *fp,
431 l_int32 n)
432 {
433 PIX *pix;
434 TIFF *tif;
435
436 if (!fp)
437 return (PIX *)ERROR_PTR("stream not defined", __func__, NULL);
438
439 if ((tif = fopenTiff(fp, "r")) == NULL)
440 return (PIX *)ERROR_PTR("tif not opened", __func__, NULL);
441
442 if (TIFFSetDirectory(tif, n) == 0) {
443 TIFFCleanup(tif);
444 return NULL;
445 }
446 if ((pix = pixReadFromTiffStream(tif)) == NULL) {
447 TIFFCleanup(tif);
448 return NULL;
449 }
450 TIFFCleanup(tif);
451 return pix;
452 }
453
454
455 /*!
456 * \brief pixReadFromTiffStream()
457 *
458 * \param[in] tif TIFF handle
459 * \return pix, or NULL on error
460 *
461 * <pre>
462 * Notes:
463 * (1) We can read the following images (up to 32 bits/pixel):
464 * 1 spp (grayscale): 1, 2, 4, 8, 16 bps
465 * 1 spp (colormapped): 1, 2, 4, 8 bps
466 * 2 spp (gray+alpha): 8 bps
467 * 3 spp (rgb) and 4 spp (rgba): 8 or 16 bps
468 * Note that 16 bps rgb and rgba are converted to 8 bps in the pix.
469 * (2) In particular, we do not support
470 * 16 bps for spp == 2
471 * 4 bps for spp == 3 or spp == 4.
472 * (3) We only support uint image data.
473 * (4) We do not support tiled format, old-style jpeg encoding,
474 * or webp encoded tiff.
475 * (5) 2 bpp gray+alpha are rasterized as 32 bit/pixel rgba, with
476 * the gray value replicated in r, g and b.
477 * (6) For colormapped images, we support 8 bits/color in the palette.
478 * Tiff colormaps have 16 bits/color, and we reduce them to 8.
479 * (7) Quoting the libtiff documentation at
480 * http://libtiff.maptools.org/libtiff.html
481 * "libtiff provides a high-level interface for reading image data
482 * from a TIFF file. This interface handles the details of data
483 * organization and format for a wide variety of TIFF files;
484 * at least the large majority of those files that one would
485 * normally encounter. Image data is, by default, returned as
486 * ABGR pixels packed into 32-bit words (8 bits per sample).
487 * Rectangular rasters can be read or data can be intercepted
488 * at an intermediate level and packed into memory in a format
489 * more suitable to the application. The library handles all
490 * the details of the format of data stored on disk and,
491 * in most cases, if any colorspace conversions are required:
492 * bilevel to RGB, greyscale to RGB, CMYK to RGB, YCbCr to RGB,
493 * 16-bit samples to 8-bit samples, associated/unassociated alpha,
494 * etc."
495 * </pre>
496 */
497 static PIX *
498 pixReadFromTiffStream(TIFF *tif)
499 {
500 char *text;
501 l_uint8 *linebuf, *data, *rowptr;
502 l_uint16 spp, bps, photometry, tiffcomp, orientation, sample_fmt;
503 l_uint16 *redmap, *greenmap, *bluemap;
504 l_int32 d, wpl, bpl, comptype, i, j, k, ncolors, rval, gval, bval, aval;
505 l_int32 xres, yres, tiffbpl, packedbpl, half_size, twothirds_size;
506 l_uint32 w, h, tiffword, read_oriented;
507 l_uint32 *line, *ppixel, *tiffdata, *pixdata;
508 PIX *pix, *pix1;
509 PIXCMAP *cmap;
510
511 if (!tif)
512 return (PIX *)ERROR_PTR("tif not defined", __func__, NULL);
513
514 read_oriented = 0;
515
516 /* Only accept uint image data:
517 * SAMPLEFORMAT_UINT = 1;
518 * SAMPLEFORMAT_INT = 2;
519 * SAMPLEFORMAT_IEEEFP = 3;
520 * SAMPLEFORMAT_VOID = 4; */
521 TIFFGetFieldDefaulted(tif, TIFFTAG_SAMPLEFORMAT, &sample_fmt);
522 if (sample_fmt != SAMPLEFORMAT_UINT) {
523 L_ERROR("sample format = %d is not uint\n", __func__, sample_fmt);
524 return NULL;
525 }
526
527 /* Can't read tiff in tiled format. For what is involved, see, e.g:
528 * https://www.cs.rochester.edu/~nelson/courses/vision/\
529 * resources/tiff/libtiff.html#Tiles
530 * A tiled tiff can be converted to a normal (strip) tif:
531 * tiffcp -s <input-tiled-tif> <output-strip-tif> */
532 if (TIFFIsTiled(tif)) {
533 L_ERROR("tiled format is not supported\n", __func__);
534 return NULL;
535 }
536
537 /* Old style jpeg is not supported. We tried supporting 8 bpp.
538 * TIFFReadScanline() fails on this format, so we used RGBA
539 * reading, which generates a 4 spp image, and pulled out the
540 * red component. However, there were problems with double-frees
541 * in cleanup. For RGB, tiffbpl is exactly half the size that
542 * you would expect for the raster data in a scanline, which
543 * is 3 * w. */
544 TIFFGetFieldDefaulted(tif, TIFFTAG_COMPRESSION, &tiffcomp);
545 if (tiffcomp == COMPRESSION_OJPEG) {
546 L_ERROR("old style jpeg format is not supported\n", __func__);
547 return NULL;
548 }
549
550 /* webp in tiff is in 4.1.0 and not yet supported in Adobe registry */
551 #if defined(COMPRESSION_WEBP)
552 if (tiffcomp == COMPRESSION_WEBP) {
553 L_ERROR("webp in tiff not generally supported yet\n", __func__);
554 return NULL;
555 }
556 #endif /* COMPRESSION_WEBP */
557
558 /* Use default fields for bps and spp */
559 TIFFGetFieldDefaulted(tif, TIFFTAG_BITSPERSAMPLE, &bps);
560 TIFFGetFieldDefaulted(tif, TIFFTAG_SAMPLESPERPIXEL, &spp);
561 if (bps != 1 && bps != 2 && bps != 4 && bps != 8 && bps != 16) {
562 L_ERROR("invalid bps = %d\n", __func__, bps);
563 return NULL;
564 }
565 if (spp == 2 && bps != 8) {
566 L_ERROR("for 2 spp, only handle 8 bps; this is %d bps\n",
567 __func__, bps);
568 return NULL;
569 }
570 if ((spp == 3 || spp == 4) && bps < 8) {
571 L_ERROR("for 3 and 4 spp, only handle 8 and 16 bps; this is %d bps\n",
572 __func__, bps);
573 return NULL;
574 }
575 if (spp == 1) {
576 d = bps;
577 } else if (spp == 2) { /* gray plus alpha */
578 d = 32; /* will convert to RGBA */
579 } else if (spp == 3 || spp == 4) {
580 d = 32;
581 } else {
582 L_ERROR("spp = %d; not in {1,2,3,4}\n", __func__, spp);
583 return NULL;
584 }
585
586 TIFFGetField(tif, TIFFTAG_IMAGEWIDTH, &w);
587 TIFFGetField(tif, TIFFTAG_IMAGELENGTH, &h);
588 if (w > MaxTiffWidth) {
589 L_ERROR("width = %d pixels; too large\n", __func__, w);
590 return NULL;
591 }
592 if (h > MaxTiffHeight) {
593 L_ERROR("height = %d pixels; too large\n", __func__, h);
594 return NULL;
595 }
596
597 /* The relation between the size of a byte buffer required to hold
598 a raster of image pixels (packedbpl) and the size of the tiff
599 buffer (tiffbuf) is either 1:1 or approximately 1.5:1 or 2:1,
600 depending on how the data is stored and subsampled. For security,
601 we test this relation between tiffbuf and the image parameters
602 w, spp and bps. */
603 tiffbpl = TIFFScanlineSize(tif);
604 packedbpl = (bps * spp * w + 7) / 8;
605 half_size = (L_ABS(2 * tiffbpl - packedbpl) <= 8);
606 twothirds_size = (L_ABS(3 * tiffbpl - 2 * packedbpl) <= 8);
607 #if 0
608 if (half_size)
609 L_INFO("half_size: packedbpl = %d is approx. twice tiffbpl = %d\n",
610 __func__, packedbpl, tiffbpl);
611 if (twothirds_size)
612 L_INFO("twothirds_size: packedbpl = %d is approx. 1.5 tiffbpl = %d\n",
613 __func__, packedbpl, tiffbpl);
614 lept_stderr("tiffbpl = %d, packedbpl = %d, bps = %d, spp = %d, w = %d\n",
615 tiffbpl, packedbpl, bps, spp, w);
616 #endif
617 if (tiffbpl != packedbpl && !half_size && !twothirds_size) {
618 L_ERROR("invalid tiffbpl: tiffbpl = %d, packedbpl = %d, "
619 "bps = %d, spp = %d, w = %d\n",
620 __func__, tiffbpl, packedbpl, bps, spp, w);
621 return NULL;
622 }
623
624 /* Use a linebuf that will hold all the pixels generated
625 by tiff when reading (decompressing) a scanline. */
626 if ((pix = pixCreate(w, h, d)) == NULL)
627 return (PIX *)ERROR_PTR("pix not made", __func__, NULL);
628 pixSetInputFormat(pix, IFF_TIFF);
629 data = (l_uint8 *)pixGetData(pix);
630 wpl = pixGetWpl(pix);
631 bpl = 4 * wpl;
632 if (spp == 1) {
633 linebuf = (l_uint8 *)LEPT_CALLOC(4 * wpl, sizeof(l_uint8));
634 for (i = 0; i < h; i++) {
635 if (TIFFReadScanline(tif, linebuf, i, 0) < 0) {
636 LEPT_FREE(linebuf);
637 pixDestroy(&pix);
638 L_ERROR("spp = 1, read fail at line %d\n", __func__, i);
639 return NULL;
640 }
641 memcpy(data, linebuf, tiffbpl);
642 data += bpl;
643 }
644 if (bps <= 8)
645 pixEndianByteSwap(pix);
646 else /* bps == 16 */
647 pixEndianTwoByteSwap(pix);
648 LEPT_FREE(linebuf);
649 } else if (spp == 2 && bps == 8) { /* gray plus alpha */
650 L_INFO("gray+alpha is not supported; converting to RGBA\n", __func__);
651 pixSetSpp(pix, 4);
652 linebuf = (l_uint8 *)LEPT_CALLOC(4 * wpl, sizeof(l_uint8));
653 pixdata = pixGetData(pix);
654 for (i = 0; i < h; i++) {
655 if (TIFFReadScanline(tif, linebuf, i, 0) < 0) {
656 LEPT_FREE(linebuf);
657 pixDestroy(&pix);
658 L_ERROR("spp = 2, read fail at line %d\n", __func__, i);
659 return NULL;
660 }
661 rowptr = linebuf;
662 ppixel = pixdata + i * wpl;
663 for (j = k = 0; j < w; j++) {
664 /* Copy gray value into r, g and b */
665 SET_DATA_BYTE(ppixel, COLOR_RED, rowptr[k]);
666 SET_DATA_BYTE(ppixel, COLOR_GREEN, rowptr[k]);
667 SET_DATA_BYTE(ppixel, COLOR_BLUE, rowptr[k++]);
668 SET_DATA_BYTE(ppixel, L_ALPHA_CHANNEL, rowptr[k++]);
669 ppixel++;
670 }
671 }
672 LEPT_FREE(linebuf);
673 } else { /* rgb and rgba */
674 if ((tiffdata = (l_uint32 *)LEPT_CALLOC((size_t)w * h,
675 sizeof(l_uint32))) == NULL) {
676 pixDestroy(&pix);
677 return (PIX *)ERROR_PTR("calloc fail for tiffdata", __func__, NULL);
678 }
679 /* TIFFReadRGBAImageOriented() converts to 8 bps */
680 if (!TIFFReadRGBAImageOriented(tif, w, h, tiffdata,
681 ORIENTATION_TOPLEFT, 0)) {
682 LEPT_FREE(tiffdata);
683 pixDestroy(&pix);
684 return (PIX *)ERROR_PTR("failed to read tiffdata", __func__, NULL);
685 } else {
686 read_oriented = 1;
687 }
688
689 if (spp == 4) pixSetSpp(pix, 4);
690 line = pixGetData(pix);
691 for (i = 0; i < h; i++, line += wpl) {
692 for (j = 0, ppixel = line; j < w; j++) {
693 /* TIFFGet* are macros */
694 tiffword = tiffdata[i * w + j];
695 rval = TIFFGetR(tiffword);
696 gval = TIFFGetG(tiffword);
697 bval = TIFFGetB(tiffword);
698 if (spp == 3) {
699 composeRGBPixel(rval, gval, bval, ppixel);
700 } else { /* spp == 4 */
701 aval = TIFFGetA(tiffword);
702 composeRGBAPixel(rval, gval, bval, aval, ppixel);
703 }
704 ppixel++;
705 }
706 }
707 LEPT_FREE(tiffdata);
708 }
709
710 if (getTiffStreamResolution(tif, &xres, &yres) == 0) {
711 pixSetXRes(pix, xres);
712 pixSetYRes(pix, yres);
713 }
714
715 /* Find and save the compression type */
716 comptype = getTiffCompressedFormat(tiffcomp);
717 pixSetInputFormat(pix, comptype);
718
719 if (TIFFGetField(tif, TIFFTAG_COLORMAP, &redmap, &greenmap, &bluemap)) {
720 /* Save the colormap as a pix cmap. Because the
721 * tiff colormap components are 16 bit unsigned,
722 * and go from black (0) to white (0xffff), the
723 * the pix cmap takes the most significant byte. */
724 if (bps > 8) {
725 pixDestroy(&pix);
726 return (PIX *)ERROR_PTR("colormap size > 256", __func__, NULL);
727 }
728 if ((cmap = pixcmapCreate(bps)) == NULL) {
729 pixDestroy(&pix);
730 return (PIX *)ERROR_PTR("colormap not made", __func__, NULL);
731 }
732 ncolors = 1 << bps;
733 for (i = 0; i < ncolors; i++)
734 pixcmapAddColor(cmap, redmap[i] >> 8, greenmap[i] >> 8,
735 bluemap[i] >> 8);
736 if (pixSetColormap(pix, cmap)) {
737 pixDestroy(&pix);
738 return (PIX *)ERROR_PTR("invalid colormap", __func__, NULL);
739 }
740
741 /* Remove the colormap for 1 bpp. */
742 if (bps == 1) {
743 pix1 = pixRemoveColormap(pix, REMOVE_CMAP_BASED_ON_SRC);
744 pixDestroy(&pix);
745 pix = pix1;
746 }
747 } else { /* No colormap: check photometry and invert if necessary */
748 if (!TIFFGetField(tif, TIFFTAG_PHOTOMETRIC, &photometry)) {
749 /* Guess default photometry setting. Assume min_is_white
750 * if compressed 1 bpp; min_is_black otherwise. */
751 if (tiffcomp == COMPRESSION_CCITTFAX3 ||
752 tiffcomp == COMPRESSION_CCITTFAX4 ||
753 tiffcomp == COMPRESSION_CCITTRLE ||
754 tiffcomp == COMPRESSION_CCITTRLEW) {
755 photometry = PHOTOMETRIC_MINISWHITE;
756 } else {
757 photometry = PHOTOMETRIC_MINISBLACK;
758 }
759 }
760 if ((d == 1 && photometry == PHOTOMETRIC_MINISBLACK) ||
761 (d == 8 && photometry == PHOTOMETRIC_MINISWHITE))
762 pixInvert(pix, pix);
763 }
764
765 if (TIFFGetField(tif, TIFFTAG_ORIENTATION, &orientation)) {
766 if (orientation >= 1 && orientation <= 8) {
767 struct tiff_transform *transform = (read_oriented) ?
768 &tiff_partial_orientation_transforms[orientation - 1] :
769 &tiff_orientation_transforms[orientation - 1];
770 if (transform->vflip) pixFlipTB(pix, pix);
771 if (transform->hflip) pixFlipLR(pix, pix);
772 if (transform->rotate) {
773 PIX *oldpix = pix;
774 pix = pixRotate90(oldpix, transform->rotate);
775 pixDestroy(&oldpix);
776 }
777 }
778 }
779
780 text = NULL;
781 TIFFGetField(tif, TIFFTAG_IMAGEDESCRIPTION, &text);
782 if (text) pixSetText(pix, text);
783 return pix;
784 }
785
786
787 /*--------------------------------------------------------------*
788 * Writing to file *
789 *--------------------------------------------------------------*/
790 /*!
791 * \brief pixWriteTiff()
792 *
793 * \param[in] filename to write to
794 * \param[in] pix any depth, colormap will be removed
795 * \param[in] comptype IFF_TIFF, IFF_TIFF_RLE, IFF_TIFF_PACKBITS,
796 * IFF_TIFF_G3, IFF_TIFF_G4,
797 * IFF_TIFF_LZW, IFF_TIFF_ZIP, IFF_TIFF_JPEG
798 * \param[in] modestr "a" or "w"
799 * \return 0 if OK, 1 on error
800 *
801 * <pre>
802 * Notes:
803 * (1) For multipage tiff, write the first pix with mode "w" and
804 * all subsequent pix with mode "a".
805 * (2) For multipage tiff, there is considerable overhead in the
806 * machinery to append an image and add the directory entry,
807 * and the time required for each image increases linearly
808 * with the number of images in the file.
809 * </pre>
810 */
811 l_ok
812 pixWriteTiff(const char *filename,
813 PIX *pix,
814 l_int32 comptype,
815 const char *modestr)
816 {
817 return pixWriteTiffCustom(filename, pix, comptype, modestr,
818 NULL, NULL, NULL, NULL);
819 }
820
821
822 /*!
823 * \brief pixWriteTiffCustom()
824 *
825 * \param[in] filename to write to
826 * \param[in] pix
827 * \param[in] comptype IFF_TIFF, IFF_TIFF_RLE, IFF_TIFF_PACKBITS,
828 * IFF_TIFF_G3, IFF_TIFF_G4,
829 * IFF_TIFF_LZW, IFF_TIFF_ZIP, IFF_TIFF_JPEG
830 * \param[in] modestr "a" or "w"
831 * \param[in] natags [optional] NUMA of custom tiff tags
832 * \param[in] savals [optional] SARRAY of values
833 * \param[in] satypes [optional] SARRAY of types
834 * \param[in] nasizes [optional] NUMA of sizes
835 * \return 0 if OK, 1 on error
836 *
837 * Usage:
838 * 1 This writes a page image to a tiff file, with optional
839 * extra tags defined in tiff.h
840 * 2 For multipage tiff, write the first pix with mode "w" and
841 * all subsequent pix with mode "a".
842 * 3 For the custom tiff tags:
843 * a The three arrays {natags, savals, satypes} must all be
844 * either NULL or defined and of equal size.
845 * b If they are defined, the tags are an array of integers,
846 * the vals are an array of values in string format, and
847 * the types are an array of types in string format.
848 * c All valid tags are definined in tiff.h.
849 * d The types allowed are the set of strings:
850 * "char*"
851 * "l_uint8*"
852 * "l_uint16"
853 * "l_uint32"
854 * "l_int32"
855 * "l_float64"
856 * "l_uint16-l_uint16" note the dash; use it between the
857 * two l_uint16 vals in the val string
858 * Of these, "char*" and "l_uint16" are the most commonly used.
859 * e The last array, nasizes, is also optional. It is for
860 * tags that take an array of bytes for a value, a number of
861 * elements in the array, and a type that is either "char*"
862 * or "l_uint8*" probably either will work.
863 * Use NULL if there are no such tags.
864 * f VERY IMPORTANT: if there are any tags that require the
865 * extra size value, stored in nasizes, they must be
866 * written first!
867 */
868 l_ok
869 pixWriteTiffCustom(const char *filename,
870 PIX *pix,
871 l_int32 comptype,
872 const char *modestr,
873 NUMA *natags,
874 SARRAY *savals,
875 SARRAY *satypes,
876 NUMA *nasizes)
877 {
878 l_int32 ret;
879 TIFF *tif;
880
881 if (!filename)
882 return ERROR_INT("filename not defined", __func__, 1);
883 if (!pix)
884 return ERROR_INT("pix not defined", __func__, 1);
885
886 if ((tif = openTiff(filename, modestr)) == NULL)
887 return ERROR_INT("tif not opened", __func__, 1);
888 ret = pixWriteToTiffStream(tif, pix, comptype, natags, savals,
889 satypes, nasizes);
890 TIFFClose(tif);
891 return ret;
892 }
893
894
895 /*--------------------------------------------------------------*
896 * Writing to stream *
897 *--------------------------------------------------------------*/
898 /*!
899 * \brief pixWriteStreamTiff()
900 *
901 * \param[in] fp file stream
902 * \param[in] pix
903 * \param[in] comptype IFF_TIFF, IFF_TIFF_RLE, IFF_TIFF_PACKBITS,
904 * IFF_TIFF_G3, IFF_TIFF_G4,
905 * IFF_TIFF_LZW, IFF_TIFF_ZIP, IFF_TIFF_JPEG
906 * \return 0 if OK, 1 on error
907 *
908 * <pre>
909 * Notes:
910 * (1) This writes a single image to a file stream opened for writing.
911 * (2) If the pix has a colormap, it is preserved in the output file.
912 * (3) For images with bpp > 1, this resets the comptype, if
913 * necessary, to write uncompressed data.
914 * (4) G3 and G4 are only defined for 1 bpp.
915 * (5) We only allow PACKBITS for bpp = 1, because for bpp > 1
916 * it typically expands images that are not synthetically generated.
917 * (6) G4 compression is typically about twice as good as G3.
918 * G4 is excellent for binary compression of text/line-art,
919 * but terrible for halftones and dithered patterns. (In
920 * fact, G4 on halftones can give a file that is larger
921 * than uncompressed!) If a binary image has dithered
922 * regions, it is usually better to compress with png.
923 * </pre>
924 */
925 l_ok
926 pixWriteStreamTiff(FILE *fp,
927 PIX *pix,
928 l_int32 comptype)
929 {
930 return pixWriteStreamTiffWA(fp, pix, comptype, "w");
931 }
932
933
934 /*!
935 * \brief pixWriteStreamTiffWA()
936 *
937 * \param[in] fp file stream opened for append or write
938 * \param[in] pix
939 * \param[in] comptype IFF_TIFF, IFF_TIFF_RLE, IFF_TIFF_PACKBITS,
940 * IFF_TIFF_G3, IFF_TIFF_G4,
941 * IFF_TIFF_LZW, IFF_TIFF_ZIP, IFF_TIFF_JPEG
942 * \param[in] modestr "w" or "a"
943 * \return 0 if OK, 1 on error
944 *
945 * <pre>
946 * Notes:
947 * (1) See pixWriteStreamTiff()
948 * </pre>
949 */
950 l_ok
951 pixWriteStreamTiffWA(FILE *fp,
952 PIX *pix,
953 l_int32 comptype,
954 const char *modestr)
955 {
956 TIFF *tif;
957
958 if (!fp)
959 return ERROR_INT("stream not defined", __func__, 1 );
960 if (!pix)
961 return ERROR_INT("pix not defined", __func__, 1 );
962 if (strcmp(modestr, "w") && strcmp(modestr, "a")) {
963 L_ERROR("modestr = %s; not 'w' or 'a'\n", __func__, modestr);
964 return 1;
965 }
966
967 if (pixGetDepth(pix) != 1 && comptype != IFF_TIFF &&
968 comptype != IFF_TIFF_LZW && comptype != IFF_TIFF_ZIP &&
969 comptype != IFF_TIFF_JPEG) {
970 L_WARNING("invalid compression type %d for bpp > 1; using TIFF_ZIP\n",
971 __func__, comptype);
972 comptype = IFF_TIFF_ZIP;
973 }
974
975 if ((tif = fopenTiff(fp, modestr)) == NULL)
976 return ERROR_INT("tif not opened", __func__, 1);
977
978 if (pixWriteToTiffStream(tif, pix, comptype, NULL, NULL, NULL, NULL)) {
979 TIFFCleanup(tif);
980 return ERROR_INT("tif write error", __func__, 1);
981 }
982
983 TIFFCleanup(tif);
984 return 0;
985 }
986
987
988 /*!
989 * \brief pixWriteToTiffStream()
990 *
991 * \param[in] tif data structure, opened to a file
992 * \param[in] pix
993 * \param[in] comptype IFF_TIFF: for any image; no compression
994 * IFF_TIFF_RLE, IFF_TIFF_PACKBITS: for 1 bpp only
995 * IFF_TIFF_G4 and IFF_TIFF_G3: for 1 bpp only
996 * IFF_TIFF_LZW, IFF_TIFF_ZIP: lossless for any image
997 * IFF_TIFF_JPEG: lossy 8 bpp gray or rgb
998 * \param[in] natags [optional] NUMA of custom tiff tags
999 * \param[in] savals [optional] SARRAY of values
1000 * \param[in] satypes [optional] SARRAY of types
1001 * \param[in] nasizes [optional] NUMA of sizes
1002 * \return 0 if OK, 1 on error
1003 *
1004 * <pre>
1005 * Notes:
1006 * (1) This static function should only be called through higher
1007 * level functions in this file; namely, pixWriteTiffCustom(),
1008 * pixWriteTiff(), pixWriteStreamTiff(), pixWriteMemTiff()
1009 * and pixWriteMemTiffCustom().
1010 * (2) We only allow PACKBITS for bpp = 1, because for bpp > 1
1011 * it typically expands images that are not synthetically generated.
1012 * (3) See pixWriteTiffCustom() for details on how to use
1013 * the last four parameters for customized tiff tags.
1014 * (4) The only valid pixel depths in leptonica are 1, 2, 4, 8, 16
1015 * and 32. However, it is possible, and in some cases desirable,
1016 * to write out a tiff file using an rgb pix that has 24 bpp.
1017 * This can be created by appending the raster data for a 24 bpp
1018 * image (with proper scanline padding) directly to a 24 bpp
1019 * pix that was created without a data array. See note in
1020 * pixWriteStreamPng() for an example.
1021 * </pre>
1022 */
1023 static l_int32
1024 pixWriteToTiffStream(TIFF *tif,
1025 PIX *pix,
1026 l_int32 comptype,
1027 NUMA *natags,
1028 SARRAY *savals,
1029 SARRAY *satypes,
1030 NUMA *nasizes)
1031 {
1032 l_uint8 *linebuf, *data;
1033 l_uint16 redmap[256], greenmap[256], bluemap[256];
1034 l_int32 w, h, d, spp, i, j, k, wpl, bpl, tiffbpl, ncolors, cmapsize;
1035 l_int32 *rmap, *gmap, *bmap;
1036 l_int32 xres, yres;
1037 l_uint32 *line, *ppixel;
1038 PIX *pixt;
1039 PIXCMAP *cmap;
1040 char *text;
1041
1042 if (!tif)
1043 return ERROR_INT("tif stream not defined", __func__, 1);
1044 if (!pix)
1045 return ERROR_INT( "pix not defined", __func__, 1 );
1046
1047 pixSetPadBits(pix, 0);
1048 pixGetDimensions(pix, &w, &h, &d);
1049 spp = pixGetSpp(pix);
1050 xres = pixGetXRes(pix);
1051 yres = pixGetYRes(pix);
1052 if (xres == 0) xres = DefaultResolution;
1053 if (yres == 0) yres = DefaultResolution;
1054
1055 /* ------------------ Write out the header ------------- */
1056 TIFFSetField(tif, TIFFTAG_RESOLUTIONUNIT, (l_uint32)RESUNIT_INCH);
1057 TIFFSetField(tif, TIFFTAG_XRESOLUTION, (l_float64)xres);
1058 TIFFSetField(tif, TIFFTAG_YRESOLUTION, (l_float64)yres);
1059
1060 TIFFSetField(tif, TIFFTAG_IMAGEWIDTH, (l_uint32)w);
1061 TIFFSetField(tif, TIFFTAG_IMAGELENGTH, (l_uint32)h);
1062 TIFFSetField(tif, TIFFTAG_ORIENTATION, ORIENTATION_TOPLEFT);
1063
1064 if ((text = pixGetText(pix)) != NULL)
1065 TIFFSetField(tif, TIFFTAG_IMAGEDESCRIPTION, text);
1066
1067 if (d == 1 && !pixGetColormap(pix)) {
1068 /* If d == 1, preserve the colormap. Note that when
1069 * d == 1 pix with colormaps are read, the colormaps
1070 * are removed. The only pix in leptonica that have
1071 * colormaps are made programmatically. */
1072 TIFFSetField(tif, TIFFTAG_PHOTOMETRIC, PHOTOMETRIC_MINISWHITE);
1073 } else if ((d == 32 && spp == 3) || d == 24) {
1074 TIFFSetField(tif, TIFFTAG_PHOTOMETRIC, PHOTOMETRIC_RGB);
1075 TIFFSetField(tif, TIFFTAG_SAMPLESPERPIXEL, (l_uint16)3);
1076 TIFFSetField(tif, TIFFTAG_BITSPERSAMPLE,
1077 (l_uint16)8, (l_uint16)8, (l_uint16)8);
1078 } else if (d == 32 && spp == 4) {
1079 l_uint16 val[1];
1080 val[0] = EXTRASAMPLE_ASSOCALPHA;
1081 TIFFSetField(tif, TIFFTAG_EXTRASAMPLES, (l_uint16)1, &val);
1082 TIFFSetField(tif, TIFFTAG_PHOTOMETRIC, PHOTOMETRIC_RGB);
1083 TIFFSetField(tif, TIFFTAG_SAMPLESPERPIXEL, (l_uint16)4);
1084 TIFFSetField(tif, TIFFTAG_BITSPERSAMPLE,
1085 (l_uint16)8, (l_uint16)8, (l_uint16)8, (l_uint16)8);
1086 } else if (d == 16) { /* we only support spp = 1, bps = 16 */
1087 TIFFSetField(tif, TIFFTAG_PHOTOMETRIC, PHOTOMETRIC_MINISBLACK);
1088 } else if ((cmap = pixGetColormap(pix)) == NULL) {
1089 TIFFSetField(tif, TIFFTAG_PHOTOMETRIC, PHOTOMETRIC_MINISBLACK);
1090 } else { /* Save colormap in the tiff; not more than 256 colors */
1091 if (d > 8) {
1092 L_ERROR("d = %d > 8 with colormap!; reducing to 8\n", __func__, d);
1093 d = 8;
1094 }
1095 pixcmapToArrays(cmap, &rmap, &gmap, &bmap, NULL);
1096 ncolors = pixcmapGetCount(cmap);
1097 ncolors = L_MIN(256, ncolors); /* max 256 */
1098 cmapsize = 1 << d;
1099 cmapsize = L_MIN(256, cmapsize); /* power of 2; max 256 */
1100 if (ncolors > cmapsize) {
1101 L_WARNING("too many colors in cmap for tiff; truncating\n",
1102 __func__);
1103 ncolors = cmapsize;
1104 }
1105 for (i = 0; i < ncolors; i++) {
1106 redmap[i] = (rmap[i] << 8) | rmap[i];
1107 greenmap[i] = (gmap[i] << 8) | gmap[i];
1108 bluemap[i] = (bmap[i] << 8) | bmap[i];
1109 }
1110 for (i = ncolors; i < cmapsize; i++) /* init, even though not used */
1111 redmap[i] = greenmap[i] = bluemap[i] = 0;
1112 LEPT_FREE(rmap);
1113 LEPT_FREE(gmap);
1114 LEPT_FREE(bmap);
1115
1116 TIFFSetField(tif, TIFFTAG_PHOTOMETRIC, PHOTOMETRIC_PALETTE);
1117 TIFFSetField(tif, TIFFTAG_SAMPLESPERPIXEL, (l_uint16)1);
1118 TIFFSetField(tif, TIFFTAG_BITSPERSAMPLE, (l_uint16)d);
1119 TIFFSetField(tif, TIFFTAG_COLORMAP, redmap, greenmap, bluemap);
1120 }
1121
1122 if (d <= 16) {
1123 TIFFSetField(tif, TIFFTAG_BITSPERSAMPLE, (l_uint16)d);
1124 TIFFSetField(tif, TIFFTAG_SAMPLESPERPIXEL, (l_uint16)1);
1125 }
1126
1127 TIFFSetField(tif, TIFFTAG_PLANARCONFIG, PLANARCONFIG_CONTIG);
1128 if (comptype == IFF_TIFF) { /* no compression */
1129 TIFFSetField(tif, TIFFTAG_COMPRESSION, COMPRESSION_NONE);
1130 } else if (comptype == IFF_TIFF_G4) {
1131 TIFFSetField(tif, TIFFTAG_COMPRESSION, COMPRESSION_CCITTFAX4);
1132 } else if (comptype == IFF_TIFF_G3) {
1133 TIFFSetField(tif, TIFFTAG_COMPRESSION, COMPRESSION_CCITTFAX3);
1134 } else if (comptype == IFF_TIFF_RLE) {
1135 TIFFSetField(tif, TIFFTAG_COMPRESSION, COMPRESSION_CCITTRLE);
1136 } else if (comptype == IFF_TIFF_PACKBITS) {
1137 TIFFSetField(tif, TIFFTAG_COMPRESSION, COMPRESSION_PACKBITS);
1138 } else if (comptype == IFF_TIFF_LZW) {
1139 TIFFSetField(tif, TIFFTAG_COMPRESSION, COMPRESSION_LZW);
1140 } else if (comptype == IFF_TIFF_ZIP) {
1141 TIFFSetField(tif, TIFFTAG_COMPRESSION, COMPRESSION_ADOBE_DEFLATE);
1142 } else if (comptype == IFF_TIFF_JPEG) {
1143 TIFFSetField(tif, TIFFTAG_COMPRESSION, COMPRESSION_JPEG);
1144 } else {
1145 L_WARNING("unknown tiff compression; using none\n", __func__);
1146 TIFFSetField(tif, TIFFTAG_COMPRESSION, COMPRESSION_NONE);
1147 }
1148
1149 /* This is a no-op if arrays are NULL */
1150 writeCustomTiffTags(tif, natags, savals, satypes, nasizes);
1151
1152 /* ------------- Write out the image data ------------- */
1153 tiffbpl = TIFFScanlineSize(tif);
1154 wpl = pixGetWpl(pix);
1155 bpl = 4 * wpl;
1156 if (tiffbpl > bpl)
1157 lept_stderr("Big trouble: tiffbpl = %d, bpl = %d\n", tiffbpl, bpl);
1158 if ((linebuf = (l_uint8 *)LEPT_CALLOC(1, bpl)) == NULL)
1159 return ERROR_INT("calloc fail for linebuf", __func__, 1);
1160
1161 /* Use single strip for image */
1162 TIFFSetField(tif, TIFFTAG_ROWSPERSTRIP, h);
1163
1164 if (d != 24 && d != 32) {
1165 if (d == 16)
1166 pixt = pixEndianTwoByteSwapNew(pix);
1167 else
1168 pixt = pixEndianByteSwapNew(pix);
1169 data = (l_uint8 *)pixGetData(pixt);
1170 for (i = 0; i < h; i++, data += bpl) {
1171 memcpy(linebuf, data, tiffbpl);
1172 if (TIFFWriteScanline(tif, linebuf, i, 0) < 0)
1173 break;
1174 }
1175 pixDestroy(&pixt);
1176 } else if (d == 24) { /* See note 4 above: special case of 24 bpp rgb */
1177 for (i = 0; i < h; i++) {
1178 line = pixGetData(pix) + i * wpl;
1179 if (TIFFWriteScanline(tif, (l_uint8 *)line, i, 0) < 0)
1180 break;
1181 }
1182 } else { /* 32 bpp rgb or rgba */
1183 for (i = 0; i < h; i++) {
1184 line = pixGetData(pix) + i * wpl;
1185 for (j = 0, k = 0, ppixel = line; j < w; j++) {
1186 linebuf[k++] = GET_DATA_BYTE(ppixel, COLOR_RED);
1187 linebuf[k++] = GET_DATA_BYTE(ppixel, COLOR_GREEN);
1188 linebuf[k++] = GET_DATA_BYTE(ppixel, COLOR_BLUE);
1189 if (spp == 4)
1190 linebuf[k++] = GET_DATA_BYTE(ppixel, L_ALPHA_CHANNEL);
1191 ppixel++;
1192 }
1193 if (TIFFWriteScanline(tif, linebuf, i, 0) < 0)
1194 break;
1195 }
1196 }
1197
1198 /* TIFFWriteDirectory(tif); */
1199 LEPT_FREE(linebuf);
1200
1201 return 0;
1202 }
1203
1204
1205 /*!
1206 * \brief writeCustomTiffTags()
1207 *
1208 * \param[in] tif
1209 * \param[in] natags [optional] NUMA of custom tiff tags
1210 * \param[in] savals [optional] SARRAY of values
1211 * \param[in] satypes [optional] SARRAY of types
1212 * \param[in] nasizes [optional] NUMA of sizes
1213 * \return 0 if OK, 1 on error
1214 *
1215 * <pre>
1216 * Notes:
1217 * (1) This static function should be called indirectly through
1218 * higher level functions, such as pixWriteTiffCustom(),
1219 * which call pixWriteToTiffStream(). See details in
1220 * pixWriteTiffCustom() for using the 4 input arrays.
1221 * (2) This is a no-op if the first 3 arrays are all NULL.
1222 * (3) Otherwise, the first 3 arrays must be defined and all
1223 * of equal size.
1224 * (4) The fourth array is always optional.
1225 * (5) The most commonly used types are "char*" and "u_int16".
1226 * See tiff.h for a full listing of the tiff tags.
1227 * Note that many of these tags, in particular the bit tags,
1228 * are intended to be private, and cannot be set by this function.
1229 * Examples are the STRIPOFFSETS and STRIPBYTECOUNTS tags,
1230 * which are bit tags that are automatically set in the header,
1231 * and can be extracted using tiffdump.
1232 * </pre>
1233 */
1234 static l_int32
1235 writeCustomTiffTags(TIFF *tif,
1236 NUMA *natags,
1237 SARRAY *savals,
1238 SARRAY *satypes,
1239 NUMA *nasizes)
1240 {
1241 char *sval, *type;
1242 l_int32 i, n, ns, size, tagval, val;
1243 l_float64 dval;
1244 l_uint32 uval, uval2;
1245
1246 if (!tif)
1247 return ERROR_INT("tif stream not defined", __func__, 1);
1248 if (!natags && !savals && !satypes)
1249 return 0;
1250 if (!natags || !savals || !satypes)
1251 return ERROR_INT("not all arrays defined", __func__, 1);
1252 n = numaGetCount(natags);
1253 if ((sarrayGetCount(savals) != n) || (sarrayGetCount(satypes) != n))
1254 return ERROR_INT("not all sa the same size", __func__, 1);
1255
1256 /* The sized arrays (4 args to TIFFSetField) are written first */
1257 if (nasizes) {
1258 ns = numaGetCount(nasizes);
1259 if (ns > n)
1260 return ERROR_INT("too many 4-arg tag calls", __func__, 1);
1261 for (i = 0; i < ns; i++) {
1262 numaGetIValue(natags, i, &tagval);
1263 sval = sarrayGetString(savals, i, L_NOCOPY);
1264 type = sarrayGetString(satypes, i, L_NOCOPY);
1265 numaGetIValue(nasizes, i, &size);
1266 if (strcmp(type, "char*") && strcmp(type, "l_uint8*"))
1267 L_WARNING("array type not char* or l_uint8*; ignore\n",
1268 __func__);
1269 TIFFSetField(tif, tagval, size, sval);
1270 }
1271 } else {
1272 ns = 0;
1273 }
1274
1275 /* The typical tags (3 args to TIFFSetField) are now written */
1276 for (i = ns; i < n; i++) {
1277 numaGetIValue(natags, i, &tagval);
1278 sval = sarrayGetString(savals, i, L_NOCOPY);
1279 type = sarrayGetString(satypes, i, L_NOCOPY);
1280 if (!strcmp(type, "char*") || !strcmp(type, "const char*")) {
1281 TIFFSetField(tif, tagval, sval);
1282 } else if (!strcmp(type, "l_uint16")) {
1283 if (sscanf(sval, "%u", &uval) == 1) {
1284 TIFFSetField(tif, tagval, (l_uint16)uval);
1285 } else {
1286 lept_stderr("val %s not of type %s\n", sval, type);
1287 return ERROR_INT("custom tag(s) not written", __func__, 1);
1288 }
1289 } else if (!strcmp(type, "l_uint32")) {
1290 if (sscanf(sval, "%u", &uval) == 1) {
1291 TIFFSetField(tif, tagval, uval);
1292 } else {
1293 lept_stderr("val %s not of type %s\n", sval, type);
1294 return ERROR_INT("custom tag(s) not written", __func__, 1);
1295 }
1296 } else if (!strcmp(type, "l_int32")) {
1297 if (sscanf(sval, "%d", &val) == 1) {
1298 TIFFSetField(tif, tagval, val);
1299 } else {
1300 lept_stderr("val %s not of type %s\n", sval, type);
1301 return ERROR_INT("custom tag(s) not written", __func__, 1);
1302 }
1303 } else if (!strcmp(type, "l_float64")) {
1304 if (sscanf(sval, "%lf", &dval) == 1) {
1305 TIFFSetField(tif, tagval, dval);
1306 } else {
1307 lept_stderr("val %s not of type %s\n", sval, type);
1308 return ERROR_INT("custom tag(s) not written", __func__, 1);
1309 }
1310 } else if (!strcmp(type, "l_uint16-l_uint16")) {
1311 if (sscanf(sval, "%u-%u", &uval, &uval2) == 2) {
1312 TIFFSetField(tif, tagval, (l_uint16)uval, (l_uint16)uval2);
1313 } else {
1314 lept_stderr("val %s not of type %s\n", sval, type);
1315 return ERROR_INT("custom tag(s) not written", __func__, 1);
1316 }
1317 } else {
1318 lept_stderr("unknown type %s\n",type);
1319 return ERROR_INT("unknown type; tag(s) not written", __func__, 1);
1320 }
1321 }
1322 return 0;
1323 }
1324
1325
1326 /*--------------------------------------------------------------*
1327 * Reading and writing multipage tiff *
1328 *--------------------------------------------------------------*/
1329 /*!
1330 * \brief pixReadFromMultipageTiff()
1331 *
1332 * \param[in] fname filename
1333 * \param[in,out] poffset set offset to 0 for first image
1334 * \return pix, or NULL on error or if previous call returned the last image
1335 *
1336 * <pre>
1337 * Notes:
1338 * (1) This allows overhead for traversal of a multipage tiff file
1339 * to be linear in the number of images. This will also work
1340 * with a singlepage tiff file.
1341 * (2) No TIFF internal data structures are exposed to the caller
1342 * (thanks to Jeff Breidenbach).
1343 * (3) offset is the byte offset of a particular image in a multipage
1344 * tiff file. To get the first image in the file, input the
1345 * special offset value of 0.
1346 * (4) The offset is updated to point to the next image, for a
1347 * subsequent call.
1348 * (5) On the last image, the offset returned is 0. Exit the loop
1349 * when the returned offset is 0.
1350 * (6) For reading a multipage tiff from a memory buffer, see
1351 * pixReadMemFromMultipageTiff()
1352 * (7) Example usage for reading all the images in the tif file:
1353 * size_t offset = 0;
1354 * do {
1355 * Pix *pix = pixReadFromMultipageTiff(filename, &offset);
1356 * // do something with pix
1357 * } while (offset != 0);
1358 * </pre>
1359 */
1360 PIX *
1361 pixReadFromMultipageTiff(const char *fname,
1362 size_t *poffset)
1363 {
1364 l_int32 retval;
1365 size_t offset;
1366 PIX *pix;
1367 TIFF *tif;
1368
1369 if (!fname)
1370 return (PIX *)ERROR_PTR("fname not defined", __func__, NULL);
1371 if (!poffset)
1372 return (PIX *)ERROR_PTR("&offset not defined", __func__, NULL);
1373
1374 if ((tif = openTiff(fname, "r")) == NULL) {
1375 L_ERROR("tif open failed for %s\n", __func__, fname);
1376 return NULL;
1377 }
1378
1379 /* Set ptrs in the TIFF to the beginning of the image */
1380 offset = *poffset;
1381 retval = (offset == 0) ? TIFFSetDirectory(tif, 0)
1382 : TIFFSetSubDirectory(tif, offset);
1383 if (retval == 0) {
1384 TIFFClose(tif);
1385 return NULL;
1386 }
1387
1388 if ((pix = pixReadFromTiffStream(tif)) == NULL) {
1389 TIFFClose(tif);
1390 return NULL;
1391 }
1392
1393 /* Advance to the next image and return the new offset */
1394 TIFFReadDirectory(tif);
1395 *poffset = TIFFCurrentDirOffset(tif);
1396 TIFFClose(tif);
1397 return pix;
1398 }
1399
1400
1401 /*!
1402 * \brief pixaReadMultipageTiff()
1403 *
1404 * \param[in] filename input tiff file
1405 * \return pixa of page images, or NULL on error
1406 */
1407 PIXA *
1408 pixaReadMultipageTiff(const char *filename)
1409 {
1410 l_int32 i, npages;
1411 FILE *fp;
1412 PIX *pix;
1413 PIXA *pixa;
1414 TIFF *tif;
1415
1416 if (!filename)
1417 return (PIXA *)ERROR_PTR("filename not defined", __func__, NULL);
1418
1419 if ((fp = fopenReadStream(filename)) == NULL)
1420 return (PIXA *)ERROR_PTR_1("stream not opened",
1421 filename, __func__, NULL);
1422 if (fileFormatIsTiff(fp)) {
1423 tiffGetCount(fp, &npages);
1424 L_INFO(" Tiff: %d pages\n", __func__, npages);
1425 } else {
1426 return (PIXA *)ERROR_PTR_1("file is not tiff",
1427 filename, __func__, NULL);
1428 }
1429
1430 if ((tif = fopenTiff(fp, "r")) == NULL)
1431 return (PIXA *)ERROR_PTR_1("tif not opened",
1432 filename, __func__, NULL);
1433
1434 pixa = pixaCreate(npages);
1435 pix = NULL;
1436 for (i = 0; i < npages; i++) {
1437 if ((pix = pixReadFromTiffStream(tif)) != NULL) {
1438 pixaAddPix(pixa, pix, L_INSERT);
1439 } else {
1440 L_WARNING("pix not read for page %d\n", __func__, i);
1441 }
1442
1443 /* Advance to the next directory (i.e., the next image) */
1444 if (TIFFReadDirectory(tif) == 0)
1445 break;
1446 }
1447
1448 fclose(fp);
1449 TIFFCleanup(tif);
1450 return pixa;
1451 }
1452
1453
1454 /*!
1455 * \brief pixaWriteMultipageTiff()
1456 *
1457 * \param[in] fname input tiff file
1458 * \param[in] pixa any depth; colormap will be removed
1459 * \return 0 if OK, 1 on error
1460 *
1461 * <pre>
1462 * Notes:
1463 * (1) The tiff directory overhead is O(n^2). I have not been
1464 * able to reduce it to O(n). The overhead for n = 2000 is
1465 * about 1 second.
1466 * </pre>
1467 */
1468 l_ok
1469 pixaWriteMultipageTiff(const char *fname,
1470 PIXA *pixa)
1471 {
1472 const char *modestr;
1473 l_int32 i, n;
1474 PIX *pix1;
1475
1476 if (!fname)
1477 return ERROR_INT("fname not defined", __func__, 1);
1478 if (!pixa)
1479 return ERROR_INT("pixa not defined", __func__, 1);
1480
1481 n = pixaGetCount(pixa);
1482 for (i = 0; i < n; i++) {
1483 modestr = (i == 0) ? "w" : "a";
1484 pix1 = pixaGetPix(pixa, i, L_CLONE);
1485 if (pixGetDepth(pix1) == 1)
1486 pixWriteTiff(fname, pix1, IFF_TIFF_G4, modestr);
1487 else
1488 pixWriteTiff(fname, pix1, IFF_TIFF_ZIP, modestr);
1489 pixDestroy(&pix1);
1490 }
1491
1492 return 0;
1493 }
1494
1495
1496 /*!
1497 * \brief writeMultipageTiff()
1498 *
1499 * \param[in] dirin input directory
1500 * \param[in] substr [optional] substring filter on filenames; can be NULL
1501 * \param[in] fileout output multipage tiff file
1502 * \return 0 if OK, 1 on error
1503 *
1504 * <pre>
1505 * Notes:
1506 * (1) This writes a set of image files in a directory out
1507 * as a multipage tiff file. The images can be in any
1508 * initial file format.
1509 * (2) Images with a colormap have the colormap removed before
1510 * re-encoding as tiff.
1511 * (3) All images are encoded losslessly. Those with 1 bpp are
1512 * encoded 'g4'. The rest are encoded as 'zip' (flate encoding).
1513 * Because it is lossless, this is an expensive method for
1514 * saving most rgb images.
1515 * (4) The tiff directory overhead is quadratic in the number of
1516 * images. To avoid this for very large numbers of images to be
1517 * written, apply the method used in pixaWriteMultipageTiff().
1518 * </pre>
1519 */
1520 l_ok
1521 writeMultipageTiff(const char *dirin,
1522 const char *substr,
1523 const char *fileout)
1524 {
1525 SARRAY *sa;
1526
1527 if (!dirin)
1528 return ERROR_INT("dirin not defined", __func__, 1);
1529 if (!fileout)
1530 return ERROR_INT("fileout not defined", __func__, 1);
1531
1532 /* Get all filtered and sorted full pathnames. */
1533 sa = getSortedPathnamesInDirectory(dirin, substr, 0, 0);
1534
1535 /* Generate the tiff file */
1536 writeMultipageTiffSA(sa, fileout);
1537 sarrayDestroy(&sa);
1538 return 0;
1539 }
1540
1541
1542 /*!
1543 * \brief writeMultipageTiffSA()
1544 *
1545 * \param[in] sa string array of full path names
1546 * \param[in] fileout output ps file
1547 * \return 0 if OK, 1 on error
1548 *
1549 * <pre>
1550 * Notes:
1551 * (1) See writeMultipageTiff()
1552 * </pre>
1553 */
1554 l_ok
1555 writeMultipageTiffSA(SARRAY *sa,
1556 const char *fileout)
1557 {
1558 char *fname;
1559 const char *op;
1560 l_int32 i, nfiles, firstfile, format;
1561 PIX *pix;
1562
1563 if (!sa)
1564 return ERROR_INT("sa not defined", __func__, 1);
1565 if (!fileout)
1566 return ERROR_INT("fileout not defined", __func__, 1);
1567
1568 nfiles = sarrayGetCount(sa);
1569 firstfile = TRUE;
1570 for (i = 0; i < nfiles; i++) {
1571 op = (firstfile) ? "w" : "a";
1572 fname = sarrayGetString(sa, i, L_NOCOPY);
1573 findFileFormat(fname, &format);
1574 if (format == IFF_UNKNOWN) {
1575 L_INFO("format of %s not known\n", __func__, fname);
1576 continue;
1577 }
1578
1579 if ((pix = pixRead(fname)) == NULL) {
1580 L_WARNING("pix not made for file: %s\n", __func__, fname);
1581 continue;
1582 }
1583 if (pixGetDepth(pix) == 1)
1584 pixWriteTiff(fileout, pix, IFF_TIFF_G4, op);
1585 else
1586 pixWriteTiff(fileout, pix, IFF_TIFF_ZIP, op);
1587 firstfile = FALSE;
1588 pixDestroy(&pix);
1589 }
1590
1591 return 0;
1592 }
1593
1594
1595 /*--------------------------------------------------------------*
1596 * Print info to stream *
1597 *--------------------------------------------------------------*/
1598 /*!
1599 * \brief fprintTiffInfo()
1600 *
1601 * \param[in] fpout stream for output of tag data
1602 * \param[in] tiffile input
1603 * \return 0 if OK; 1 on error
1604 */
1605 l_ok
1606 fprintTiffInfo(FILE *fpout,
1607 const char *tiffile)
1608 {
1609 TIFF *tif;
1610
1611 if (!tiffile)
1612 return ERROR_INT("tiffile not defined", __func__, 1);
1613 if (!fpout)
1614 return ERROR_INT("stream out not defined", __func__, 1);
1615
1616 if ((tif = openTiff(tiffile, "rb")) == NULL)
1617 return ERROR_INT("tif not open for read", __func__, 1);
1618
1619 TIFFPrintDirectory(tif, fpout, 0);
1620 TIFFClose(tif);
1621
1622 return 0;
1623 }
1624
1625
1626 /*--------------------------------------------------------------*
1627 * Get page count *
1628 *--------------------------------------------------------------*/
1629 /*!
1630 * \brief tiffGetCount()
1631 *
1632 * \param[in] fp file stream opened for read
1633 * \param[out] pn number of images
1634 * \return 0 if OK; 1 on error
1635 */
1636 l_ok
1637 tiffGetCount(FILE *fp,
1638 l_int32 *pn)
1639 {
1640 l_int32 i;
1641 TIFF *tif;
1642
1643 if (!fp)
1644 return ERROR_INT("stream not defined", __func__, 1);
1645 if (!pn)
1646 return ERROR_INT("&n not defined", __func__, 1);
1647 *pn = 0;
1648
1649 if ((tif = fopenTiff(fp, "r")) == NULL)
1650 return ERROR_INT("tif not open for read", __func__, 1);
1651
1652 for (i = 1; ; i++) {
1653 if (TIFFReadDirectory(tif) == 0)
1654 break;
1655 if (i == ManyPagesInTiffFile + 1) {
1656 L_WARNING("big file: more than %d pages\n", __func__,
1657 ManyPagesInTiffFile);
1658 }
1659 }
1660 *pn = i;
1661 TIFFCleanup(tif);
1662 return 0;
1663 }
1664
1665
1666 /*--------------------------------------------------------------*
1667 * Get resolution from tif *
1668 *--------------------------------------------------------------*/
1669 /*!
1670 * \brief getTiffResolution()
1671 *
1672 * \param[in] fp file stream opened for read
1673 * \param[out] pxres, pyres resolution in ppi
1674 * \return 0 if OK; 1 on error
1675 *
1676 * <pre>
1677 * Notes:
1678 * (1) If neither resolution field is set, this is not an error;
1679 * the returned resolution values are 0 (designating 'unknown').
1680 * </pre>
1681 */
1682 l_ok
1683 getTiffResolution(FILE *fp,
1684 l_int32 *pxres,
1685 l_int32 *pyres)
1686 {
1687 TIFF *tif;
1688
1689 if (!pxres || !pyres)
1690 return ERROR_INT("&xres and &yres not both defined", __func__, 1);
1691 *pxres = *pyres = 0;
1692 if (!fp)
1693 return ERROR_INT("stream not opened", __func__, 1);
1694
1695 if ((tif = fopenTiff(fp, "r")) == NULL)
1696 return ERROR_INT("tif not open for read", __func__, 1);
1697 getTiffStreamResolution(tif, pxres, pyres);
1698 TIFFCleanup(tif);
1699 return 0;
1700 }
1701
1702
1703 /*!
1704 * \brief getTiffStreamResolution()
1705 *
1706 * \param[in] tif TIFF handle opened for read
1707 * \param[out] pxres, pyres resolution in ppi
1708 * \return 0 if OK; 1 on error
1709 *
1710 * <pre>
1711 * Notes:
1712 * (1) If neither resolution field is set, this is not an error;
1713 * the returned resolution values are 0 (designating 'unknown').
1714 * </pre>
1715 */
1716 static l_int32
1717 getTiffStreamResolution(TIFF *tif,
1718 l_int32 *pxres,
1719 l_int32 *pyres)
1720 {
1721 l_uint16 resunit;
1722 l_int32 foundxres, foundyres;
1723 l_float32 fxres, fyres;
1724
1725 if (!tif)
1726 return ERROR_INT("tif not opened", __func__, 1);
1727 if (!pxres || !pyres)
1728 return ERROR_INT("&xres and &yres not both defined", __func__, 1);
1729 *pxres = *pyres = 0;
1730
1731 TIFFGetFieldDefaulted(tif, TIFFTAG_RESOLUTIONUNIT, &resunit);
1732 foundxres = TIFFGetField(tif, TIFFTAG_XRESOLUTION, &fxres);
1733 foundyres = TIFFGetField(tif, TIFFTAG_YRESOLUTION, &fyres);
1734 if (!foundxres && !foundyres) return 1;
1735 if (isnan(fxres) || isnan(fyres)) return 1;
1736 if (!foundxres && foundyres)
1737 fxres = fyres;
1738 else if (foundxres && !foundyres)
1739 fyres = fxres;
1740
1741 /* Avoid overflow into int32; set max fxres and fyres to 5 x 10^8 */
1742 if (fxres < 0 || fxres > (1L << 29) || fyres < 0 || fyres > (1L << 29))
1743 return ERROR_INT("fxres and/or fyres values are invalid", __func__, 1);
1744
1745 if (resunit == RESUNIT_CENTIMETER) { /* convert to ppi */
1746 *pxres = (l_int32)(2.54 * fxres + 0.5);
1747 *pyres = (l_int32)(2.54 * fyres + 0.5);
1748 } else {
1749 *pxres = (l_int32)(fxres + 0.5);
1750 *pyres = (l_int32)(fyres + 0.5);
1751 }
1752
1753 return 0;
1754 }
1755
1756
1757 /*--------------------------------------------------------------*
1758 * Get some tiff header information *
1759 *--------------------------------------------------------------*/
1760 /*!
1761 * \brief readHeaderTiff()
1762 *
1763 * \param[in] filename
1764 * \param[in] n page image number: 0-based
1765 * \param[out] pw [optional] width
1766 * \param[out] ph [optional] height
1767 * \param[out] pbps [optional] bits per sample -- 1, 2, 4 or 8
1768 * \param[out] pspp [optional] samples per pixel -- 1 or 3
1769 * \param[out] pres [optional] resolution in x dir; NULL to ignore
1770 * \param[out] pcmap [optional] colormap exists; input NULL to ignore
1771 * \param[out] pformat [optional] tiff format; input NULL to ignore
1772 * \return 0 if OK, 1 on error
1773 *
1774 * <pre>
1775 * Notes:
1776 * (1) If there is a colormap, cmap is returned as 1; else 0.
1777 * (2) If %n is equal to or greater than the number of images, returns 1.
1778 * </pre>
1779 */
1780 l_ok
1781 readHeaderTiff(const char *filename,
1782 l_int32 n,
1783 l_int32 *pw,
1784 l_int32 *ph,
1785 l_int32 *pbps,
1786 l_int32 *pspp,
1787 l_int32 *pres,
1788 l_int32 *pcmap,
1789 l_int32 *pformat)
1790 {
1791 l_int32 ret;
1792 FILE *fp;
1793
1794 if (pw) *pw = 0;
1795 if (ph) *ph = 0;
1796 if (pbps) *pbps = 0;
1797 if (pspp) *pspp = 0;
1798 if (pres) *pres = 0;
1799 if (pcmap) *pcmap = 0;
1800 if (pformat) *pformat = 0;
1801 if (!filename)
1802 return ERROR_INT("filename not defined", __func__, 1);
1803 if (!pw && !ph && !pbps && !pspp && !pres && !pcmap && !pformat)
1804 return ERROR_INT("no results requested", __func__, 1);
1805
1806 if ((fp = fopenReadStream(filename)) == NULL)
1807 return ERROR_INT_1("image file not found", filename, __func__, 1);
1808 ret = freadHeaderTiff(fp, n, pw, ph, pbps, pspp, pres, pcmap, pformat);
1809 fclose(fp);
1810 return ret;
1811 }
1812
1813
1814 /*!
1815 * \brief freadHeaderTiff()
1816 *
1817 * \param[in] fp file stream
1818 * \param[in] n page image number: 0-based
1819 * \param[out] pw [optional] width
1820 * \param[out] ph [optional] height
1821 * \param[out] pbps [optional] bits per sample -- 1, 2, 4 or 8
1822 * \param[out] pspp [optional] samples per pixel -- 1 or 3
1823 * \param[out] pres [optional] resolution in x dir; NULL to ignore
1824 * \param[out] pcmap [optional] colormap exists; input NULL to ignore
1825 * \param[out] pformat [optional] tiff format; input NULL to ignore
1826 * \return 0 if OK, 1 on error
1827 *
1828 * <pre>
1829 * Notes:
1830 * (1) If there is a colormap, cmap is returned as 1; else 0.
1831 * (2) If %n is equal to or greater than the number of images, returns 1.
1832 * </pre>
1833 */
1834 l_ok
1835 freadHeaderTiff(FILE *fp,
1836 l_int32 n,
1837 l_int32 *pw,
1838 l_int32 *ph,
1839 l_int32 *pbps,
1840 l_int32 *pspp,
1841 l_int32 *pres,
1842 l_int32 *pcmap,
1843 l_int32 *pformat)
1844 {
1845 l_int32 i, ret, format;
1846 TIFF *tif;
1847
1848 if (pw) *pw = 0;
1849 if (ph) *ph = 0;
1850 if (pbps) *pbps = 0;
1851 if (pspp) *pspp = 0;
1852 if (pres) *pres = 0;
1853 if (pcmap) *pcmap = 0;
1854 if (pformat) *pformat = 0;
1855 if (!fp)
1856 return ERROR_INT("stream not defined", __func__, 1);
1857 if (n < 0)
1858 return ERROR_INT("image index must be >= 0", __func__, 1);
1859 if (!pw && !ph && !pbps && !pspp && !pres && !pcmap && !pformat)
1860 return ERROR_INT("no results requested", __func__, 1);
1861
1862 findFileFormatStream(fp, &format);
1863 if (!L_FORMAT_IS_TIFF(format))
1864 return ERROR_INT("file not tiff format", __func__, 1);
1865
1866 if ((tif = fopenTiff(fp, "r")) == NULL)
1867 return ERROR_INT("tif not open for read", __func__, 1);
1868
1869 for (i = 0; i < n; i++) {
1870 if (TIFFReadDirectory(tif) == 0)
1871 return ERROR_INT("image n not found in file", __func__, 1);
1872 }
1873
1874 ret = tiffReadHeaderTiff(tif, pw, ph, pbps, pspp, pres, pcmap, pformat);
1875 TIFFCleanup(tif);
1876 return ret;
1877 }
1878
1879
1880 /*!
1881 * \brief readHeaderMemTiff()
1882 *
1883 * \param[in] cdata const; tiff-encoded
1884 * \param[in] size size of data
1885 * \param[in] n page image number: 0-based
1886 * \param[out] pw [optional] width
1887 * \param[out] ph [optional] height
1888 * \param[out] pbps [optional] bits per sample -- 1, 2, 4 or 8
1889 * \param[out] pspp [optional] samples per pixel -- 1 or 3
1890 * \param[out] pres [optional] resolution in x dir; NULL to ignore
1891 * \param[out] pcmap [optional] colormap exists; input NULL to ignore
1892 * \param[out] pformat [optional] tiff format; input NULL to ignore
1893 * \return 0 if OK, 1 on error
1894 *
1895 * <pre>
1896 * Notes:
1897 * (1) Use TIFFClose(); TIFFCleanup() doesn't free internal memstream.
1898 * (2) Returns res = 0 if not set in the file.
1899 * </pre>
1900 */
1901 l_ok
1902 readHeaderMemTiff(const l_uint8 *cdata,
1903 size_t size,
1904 l_int32 n,
1905 l_int32 *pw,
1906 l_int32 *ph,
1907 l_int32 *pbps,
1908 l_int32 *pspp,
1909 l_int32 *pres,
1910 l_int32 *pcmap,
1911 l_int32 *pformat)
1912 {
1913 l_uint8 *data;
1914 l_int32 i, ret;
1915 TIFF *tif;
1916
1917 if (pw) *pw = 0;
1918 if (ph) *ph = 0;
1919 if (pbps) *pbps = 0;
1920 if (pspp) *pspp = 0;
1921 if (pres) *pres = 0;
1922 if (pcmap) *pcmap = 0;
1923 if (pformat) *pformat = 0;
1924 if (!pw && !ph && !pbps && !pspp && !pres && !pcmap && !pformat)
1925 return ERROR_INT("no results requested", __func__, 1);
1926 if (!cdata)
1927 return ERROR_INT("cdata not defined", __func__, 1);
1928
1929 /* Open a tiff stream to memory */
1930 data = (l_uint8 *)cdata; /* we're really not going to change this */
1931 if ((tif = fopenTiffMemstream("tifferror", "r", &data, &size)) == NULL)
1932 return ERROR_INT("tiff stream not opened", __func__, 1);
1933
1934 for (i = 0; i < n; i++) {
1935 if (TIFFReadDirectory(tif) == 0) {
1936 TIFFClose(tif);
1937 return ERROR_INT("image n not found in file", __func__, 1);
1938 }
1939 }
1940
1941 ret = tiffReadHeaderTiff(tif, pw, ph, pbps, pspp, pres, pcmap, pformat);
1942 TIFFClose(tif);
1943 return ret;
1944 }
1945
1946
1947 /*!
1948 * \brief tiffReadHeaderTiff()
1949 *
1950 * \param[in] tif
1951 * \param[out] pw [optional] width
1952 * \param[out] ph [optional] height
1953 * \param[out] pbps [optional] bits per sample -- 1, 2, 4 or 8
1954 * \param[out] pspp [optional] samples per pixel -- 1 or 3
1955 * \param[out] pres [optional] resolution in x dir; NULL to ignore
1956 * \param[out] pcmap [optional] cmap exists; input NULL to ignore
1957 * \param[out] pformat [optional] tiff format; input NULL to ignore
1958 * \return 0 if OK, 1 on error
1959 */
1960 static l_int32
1961 tiffReadHeaderTiff(TIFF *tif,
1962 l_int32 *pw,
1963 l_int32 *ph,
1964 l_int32 *pbps,
1965 l_int32 *pspp,
1966 l_int32 *pres,
1967 l_int32 *pcmap,
1968 l_int32 *pformat)
1969 {
1970 l_uint16 tiffcomp;
1971 l_uint16 bps, spp;
1972 l_uint16 *rmap, *gmap, *bmap;
1973 l_int32 xres, yres;
1974 l_uint32 w, h;
1975
1976 if (!tif)
1977 return ERROR_INT("tif not opened", __func__, 1);
1978
1979 TIFFGetField(tif, TIFFTAG_IMAGEWIDTH, &w);
1980 TIFFGetField(tif, TIFFTAG_IMAGELENGTH, &h);
1981 TIFFGetFieldDefaulted(tif, TIFFTAG_BITSPERSAMPLE, &bps);
1982 TIFFGetFieldDefaulted(tif, TIFFTAG_SAMPLESPERPIXEL, &spp);
1983 if (w < 1 || h < 1)
1984 return ERROR_INT("tif w and h not both > 0", __func__, 1);
1985 if (bps != 1 && bps != 2 && bps != 4 && bps != 8 && bps != 16)
1986 return ERROR_INT("bps not in set {1,2,4,8,16}", __func__, 1);
1987 if (spp != 1 && spp != 2 && spp != 3 && spp != 4)
1988 return ERROR_INT("spp not in set {1,2,3,4}", __func__, 1);
1989 if (pw) *pw = w;
1990 if (ph) *ph = h;
1991 if (pbps) *pbps = bps;
1992 if (pspp) *pspp = spp;
1993 if (pres) {
1994 if (getTiffStreamResolution(tif, &xres, &yres) == 0)
1995 *pres = (l_int32)xres;
1996 }
1997 if (pcmap) {
1998 if (TIFFGetField(tif, TIFFTAG_COLORMAP, &rmap, &gmap, &bmap))
1999 *pcmap = 1;
2000 }
2001 if (pformat) {
2002 TIFFGetFieldDefaulted(tif, TIFFTAG_COMPRESSION, &tiffcomp);
2003 *pformat = getTiffCompressedFormat(tiffcomp);
2004 }
2005 return 0;
2006 }
2007
2008
2009 /*!
2010 * \brief findTiffCompression()
2011 *
2012 * \param[in] fp file stream; must be rewound to BOF
2013 * \param[out] pcomptype compression type
2014 * \return 0 if OK, 1 on error
2015 *
2016 * <pre>
2017 * Notes:
2018 * (1) The returned compression type is that defined in
2019 * the enum in imageio.h. It is not the tiff flag value.
2020 * (2) The compression type is initialized to IFF_UNKNOWN.
2021 * If it is not one of the specified types, the returned
2022 * type is IFF_TIFF, which indicates no compression.
2023 * (3) When this function is called, the stream must be at BOF.
2024 * If the opened stream is to be used again to read the
2025 * file, it must be rewound to BOF after calling this function.
2026 * </pre>
2027 */
2028 l_ok
2029 findTiffCompression(FILE *fp,
2030 l_int32 *pcomptype)
2031 {
2032 l_uint16 tiffcomp;
2033 TIFF *tif;
2034
2035 if (!pcomptype)
2036 return ERROR_INT("&comptype not defined", __func__, 1);
2037 *pcomptype = IFF_UNKNOWN; /* init */
2038 if (!fp)
2039 return ERROR_INT("stream not defined", __func__, 1);
2040
2041 if ((tif = fopenTiff(fp, "r")) == NULL)
2042 return ERROR_INT("tif not opened", __func__, 1);
2043 TIFFGetFieldDefaulted(tif, TIFFTAG_COMPRESSION, &tiffcomp);
2044 *pcomptype = getTiffCompressedFormat(tiffcomp);
2045 TIFFCleanup(tif);
2046 return 0;
2047 }
2048
2049
2050 /*!
2051 * \brief getTiffCompressedFormat()
2052 *
2053 * \param[in] tiffcomp defined in tiff.h
2054 * \return compression format defined in imageio.h
2055 *
2056 * <pre>
2057 * Notes:
2058 * (1) The input must be the actual tiff compression type
2059 * returned by a tiff library call. It should always be
2060 * a valid tiff type.
2061 * (2) The return type is defined in the enum in imageio.h.
2062 * </pre>
2063 */
2064 static l_int32
2065 getTiffCompressedFormat(l_uint16 tiffcomp)
2066 {
2067 l_int32 comptype;
2068
2069 switch (tiffcomp)
2070 {
2071 case COMPRESSION_CCITTFAX4:
2072 comptype = IFF_TIFF_G4;
2073 break;
2074 case COMPRESSION_CCITTFAX3:
2075 comptype = IFF_TIFF_G3;
2076 break;
2077 case COMPRESSION_CCITTRLE:
2078 comptype = IFF_TIFF_RLE;
2079 break;
2080 case COMPRESSION_PACKBITS:
2081 comptype = IFF_TIFF_PACKBITS;
2082 break;
2083 case COMPRESSION_LZW:
2084 comptype = IFF_TIFF_LZW;
2085 break;
2086 case COMPRESSION_ADOBE_DEFLATE:
2087 comptype = IFF_TIFF_ZIP;
2088 break;
2089 case COMPRESSION_JPEG:
2090 comptype = IFF_TIFF_JPEG;
2091 break;
2092 default:
2093 comptype = IFF_TIFF;
2094 break;
2095 }
2096 return comptype;
2097 }
2098
2099
2100 /*--------------------------------------------------------------*
2101 * Extraction of tiff g4 data *
2102 *--------------------------------------------------------------*/
2103 /*!
2104 * \brief extractG4DataFromFile()
2105 *
2106 * \param[in] filein
2107 * \param[out] pdata binary data of ccitt g4 encoded stream
2108 * \param[out] pnbytes size of binary data
2109 * \param[out] pw [optional] image width
2110 * \param[out] ph [optional] image height
2111 * \param[out] pminisblack [optional] boolean
2112 * \return 0 if OK, 1 on error
2113 */
2114 l_ok
2115 extractG4DataFromFile(const char *filein,
2116 l_uint8 **pdata,
2117 size_t *pnbytes,
2118 l_int32 *pw,
2119 l_int32 *ph,
2120 l_int32 *pminisblack)
2121 {
2122 l_uint8 *inarray, *data;
2123 l_uint16 minisblack, comptype; /* accessors require l_uint16 */
2124 l_int32 istiff;
2125 l_uint32 w, h, rowsperstrip; /* accessors require l_uint32 */
2126 l_uint32 diroff;
2127 size_t fbytes, nbytes;
2128 FILE *fpin;
2129 TIFF *tif;
2130
2131 if (!pdata)
2132 return ERROR_INT("&data not defined", __func__, 1);
2133 if (!pnbytes)
2134 return ERROR_INT("&nbytes not defined", __func__, 1);
2135 if (!pw && !ph && !pminisblack)
2136 return ERROR_INT("no output data requested", __func__, 1);
2137 *pdata = NULL;
2138 *pnbytes = 0;
2139
2140 if ((fpin = fopenReadStream(filein)) == NULL)
2141 return ERROR_INT_1("stream not opened to file", filein, __func__, 1);
2142 istiff = fileFormatIsTiff(fpin);
2143 fclose(fpin);
2144 if (!istiff)
2145 return ERROR_INT_1("filein not tiff", filein, __func__, 1);
2146
2147 if ((inarray = l_binaryRead(filein, &fbytes)) == NULL)
2148 return ERROR_INT_1("inarray not made", filein, __func__, 1);
2149
2150 /* Get metadata about the image */
2151 if ((tif = openTiff(filein, "rb")) == NULL) {
2152 LEPT_FREE(inarray);
2153 return ERROR_INT_1("tif not open for read", filein, __func__, 1);
2154 }
2155 TIFFGetField(tif, TIFFTAG_COMPRESSION, &comptype);
2156 if (comptype != COMPRESSION_CCITTFAX4) {
2157 LEPT_FREE(inarray);
2158 TIFFClose(tif);
2159 return ERROR_INT_1("filein is not g4 compressed", filein, __func__, 1);
2160 }
2161
2162 TIFFGetField(tif, TIFFTAG_IMAGEWIDTH, &w);
2163 TIFFGetField(tif, TIFFTAG_IMAGELENGTH, &h);
2164 TIFFGetField(tif, TIFFTAG_ROWSPERSTRIP, &rowsperstrip);
2165 if (h != rowsperstrip)
2166 L_WARNING("more than 1 strip\n", __func__);
2167 /* From the standard:
2168 TIFFTAG_PHOTOMETRIC = 0 (false) --> min value is white.
2169 TIFFTAG_PHOTOMETRIC = 1 (true) --> min value is black.
2170 Most 1 bpp tiffs have the tag value 0 (black is 1),
2171 because there are fewer black pixels than white pixels,
2172 so it makes sense to encode runs of black pixels. */
2173 TIFFGetField(tif, TIFFTAG_PHOTOMETRIC, &minisblack);
2174 /* TIFFPrintDirectory(tif, stderr, 0); */
2175 TIFFClose(tif);
2176 if (pw) *pw = (l_int32)w;
2177 if (ph) *ph = (l_int32)h;
2178 if (pminisblack) *pminisblack = (l_int32)minisblack;
2179
2180 /* The header has 8 bytes: the first 2 are the magic number,
2181 * the next 2 are the version, and the last 4 are the
2182 * offset to the first directory. That's what we want here.
2183 * We have to test the byte order before decoding 4 bytes! */
2184 if (inarray[0] == 0x4d) { /* big-endian */
2185 diroff = (inarray[4] << 24) | (inarray[5] << 16) |
2186 (inarray[6] << 8) | inarray[7];
2187 } else { /* inarray[0] == 0x49 : little-endian */
2188 diroff = (inarray[7] << 24) | (inarray[6] << 16) |
2189 (inarray[5] << 8) | inarray[4];
2190 }
2191 /* lept_stderr(" diroff = %d, %x\n", diroff, diroff); */
2192
2193 /* Extract the ccittg4 encoded data from the tiff file.
2194 * We skip the 8 byte header and take nbytes of data,
2195 * up to the beginning of the directory (at diroff) */
2196 nbytes = diroff - 8;
2197 if (nbytes > MaxNumTiffBytes) {
2198 LEPT_FREE(inarray);
2199 L_ERROR("requesting %zu bytes > %zu\n", __func__,
2200 nbytes, MaxNumTiffBytes);
2201 return 1;
2202 }
2203 *pnbytes = nbytes;
2204 if ((data = (l_uint8 *)LEPT_CALLOC(nbytes, sizeof(l_uint8))) == NULL) {
2205 LEPT_FREE(inarray);
2206 return ERROR_INT("data not allocated", __func__, 1);
2207 }
2208 *pdata = data;
2209 memcpy(data, inarray + 8, nbytes);
2210 LEPT_FREE(inarray);
2211
2212 return 0;
2213 }
2214
2215
2216 /*--------------------------------------------------------------*
2217 * Open tiff stream from file stream *
2218 *--------------------------------------------------------------*/
2219 /*!
2220 * \brief fopenTiff()
2221 *
2222 * \param[in] fp file stream
2223 * \param[in] modestring "r", "w", ...
2224 * \return tiff data structure, opened for a file descriptor
2225 *
2226 * <pre>
2227 * Notes:
2228 * (1) Why is this here? Leffler did not provide a function that
2229 * takes a stream and gives a TIFF. He only gave one that
2230 * generates a TIFF starting with a file descriptor. So we
2231 * need to make it here, because it is useful to have functions
2232 * that take a stream as input.
2233 * (2) We use TIFFClientOpen() together with a set of static wrapper
2234 * functions which map TIFF read, write, seek, close and size.
2235 * to functions expecting a cookie of type stream (i.e. FILE *).
2236 * This implementation was contributed by Jürgen Buchmüller.
2237 * </pre>
2238 */
2239 static TIFF *
2240 fopenTiff(FILE *fp,
2241 const char *modestring)
2242 {
2243 if (!fp)
2244 return (TIFF *)ERROR_PTR("stream not opened", __func__, NULL);
2245 if (!modestring)
2246 return (TIFF *)ERROR_PTR("modestring not defined", __func__, NULL);
2247
2248 TIFFSetWarningHandler(NULL); /* disable warnings */
2249 TIFFSetErrorHandler(NULL); /* disable error messages */
2250
2251 fseek(fp, 0, SEEK_SET);
2252 return TIFFClientOpen("TIFFstream", modestring, (thandle_t)fp,
2253 lept_read_proc, lept_write_proc, lept_seek_proc,
2254 lept_close_proc, lept_size_proc, NULL, NULL);
2255 }
2256
2257
2258 /*--------------------------------------------------------------*
2259 * Wrapper for TIFFOpen *
2260 *--------------------------------------------------------------*/
2261 /*!
2262 * \brief openTiff()
2263 *
2264 * \param[in] filename
2265 * \param[in] modestring "r", "w", ...
2266 * \return tiff data structure
2267 *
2268 * <pre>
2269 * Notes:
2270 * (1) This handles multi-platform file naming.
2271 * </pre>
2272 */
2273 static TIFF *
2274 openTiff(const char *filename,
2275 const char *modestring)
2276 {
2277 char *fname;
2278 TIFF *tif;
2279
2280 if (!filename)
2281 return (TIFF *)ERROR_PTR("filename not defined", __func__, NULL);
2282 if (!modestring)
2283 return (TIFF *)ERROR_PTR("modestring not defined", __func__, NULL);
2284
2285 TIFFSetWarningHandler(NULL); /* disable warnings */
2286 TIFFSetErrorHandler(NULL); /* disable error messages */
2287
2288 fname = genPathname(filename, NULL);
2289 tif = TIFFOpen(fname, modestring);
2290 LEPT_FREE(fname);
2291 return tif;
2292 }
2293
2294
2295 /*----------------------------------------------------------------------*
2296 * Memory I/O: reading memory --> pix and writing pix --> memory *
2297 *----------------------------------------------------------------------*/
2298 /* It would be nice to use open_memstream() and fmemopen()
2299 * for writing and reading to memory, rsp. These functions manage
2300 * memory for writes and reads that use a file streams interface.
2301 * Unfortunately, the tiff library only has an interface for reading
2302 * and writing to file descriptors, not to file streams. The tiff
2303 * library procedure is to open a "tiff stream" and read/write to it.
2304 * The library provides a client interface for managing the I/O
2305 * from memory, which requires seven callbacks. See the TIFFClientOpen
2306 * man page for callback signatures. Adam Langley provided the code
2307 * to do this. */
2308
2309 /*!
2310 * \brief Memory stream buffer used with TIFFClientOpen()
2311 *
2312 * The L_Memstram %buffer has different functions in writing and reading.
2313 *
2314 * * In reading, it is assigned to the data and read from as
2315 * the tiff library uncompresses the data and generates the pix.
2316 * The %offset points to the current read position in the data,
2317 * and the %hw always gives the number of bytes of data.
2318 * The %outdata and %outsize ptrs are not used.
2319 * When finished, tiffCloseCallback() simply frees the L_Memstream.
2320 *
2321 * * In writing, it accepts the data that the tiff library
2322 * produces when a pix is compressed. the buffer points to a
2323 * malloced area of %bufsize bytes. The current writing position
2324 * in the buffer is %offset and the most ever written is %hw.
2325 * The buffer is expanded as necessary. When finished,
2326 * tiffCloseCallback() assigns the %outdata and %outsize ptrs
2327 * to the %buffer and %bufsize results, and frees the L_Memstream.
2328 */
2329 struct L_Memstream
2330 {
2331 l_uint8 *buffer; /* expands to hold data when written to; */
2332 /* fixed size when read from. */
2333 size_t bufsize; /* current size allocated when written to; */
2334 /* fixed size of input data when read from. */
2335 size_t offset; /* byte offset from beginning of buffer. */
2336 size_t hw; /* high-water mark; max bytes in buffer. */
2337 l_uint8 **poutdata; /* input param for writing; data goes here. */
2338 size_t *poutsize; /* input param for writing; data size goes here. */
2339 };
2340 typedef struct L_Memstream L_MEMSTREAM;
2341
2342
2343 /* These are static functions for memory I/O */
2344 static L_MEMSTREAM *memstreamCreateForRead(l_uint8 *indata, size_t pinsize);
2345 static L_MEMSTREAM *memstreamCreateForWrite(l_uint8 **poutdata,
2346 size_t *poutsize);
2347 static tsize_t tiffReadCallback(thandle_t handle, tdata_t data, tsize_t length);
2348 static tsize_t tiffWriteCallback(thandle_t handle, tdata_t data,
2349 tsize_t length);
2350 static toff_t tiffSeekCallback(thandle_t handle, toff_t offset, l_int32 whence);
2351 static l_int32 tiffCloseCallback(thandle_t handle);
2352 static toff_t tiffSizeCallback(thandle_t handle);
2353 static l_int32 tiffMapCallback(thandle_t handle, tdata_t *data, toff_t *length);
2354 static void tiffUnmapCallback(thandle_t handle, tdata_t data, toff_t length);
2355
2356
2357 static L_MEMSTREAM *
2358 memstreamCreateForRead(l_uint8 *indata,
2359 size_t insize)
2360 {
2361 L_MEMSTREAM *mstream;
2362
2363 mstream = (L_MEMSTREAM *)LEPT_CALLOC(1, sizeof(L_MEMSTREAM));
2364 mstream->buffer = indata; /* handle to input data array */
2365 mstream->bufsize = insize; /* amount of input data */
2366 mstream->hw = insize; /* high-water mark fixed at input data size */
2367 mstream->offset = 0; /* offset always starts at 0 */
2368 return mstream;
2369 }
2370
2371
2372 static L_MEMSTREAM *
2373 memstreamCreateForWrite(l_uint8 **poutdata,
2374 size_t *poutsize)
2375 {
2376 L_MEMSTREAM *mstream;
2377
2378 mstream = (L_MEMSTREAM *)LEPT_CALLOC(1, sizeof(L_MEMSTREAM));
2379 mstream->buffer = (l_uint8 *)LEPT_CALLOC(8 * 1024, 1);
2380 mstream->bufsize = 8 * 1024;
2381 mstream->poutdata = poutdata; /* used only at end of write */
2382 mstream->poutsize = poutsize; /* ditto */
2383 mstream->hw = mstream->offset = 0;
2384 return mstream;
2385 }
2386
2387
2388 static tsize_t
2389 tiffReadCallback(thandle_t handle,
2390 tdata_t data,
2391 tsize_t length)
2392 {
2393 L_MEMSTREAM *mstream;
2394 size_t amount;
2395
2396 mstream = (L_MEMSTREAM *)handle;
2397 amount = L_MIN((size_t)length, mstream->hw - mstream->offset);
2398
2399 /* Fuzzed files can create this condition! */
2400 if (mstream->offset + amount < amount || /* overflow */
2401 mstream->offset + amount > mstream->hw) {
2402 lept_stderr("Bad file: amount too big: %zu\n", amount);
2403 return 0;
2404 }
2405
2406 memcpy(data, mstream->buffer + mstream->offset, amount);
2407 mstream->offset += amount;
2408 return amount;
2409 }
2410
2411
2412 static tsize_t
2413 tiffWriteCallback(thandle_t handle,
2414 tdata_t data,
2415 tsize_t length)
2416 {
2417 L_MEMSTREAM *mstream;
2418 size_t newsize;
2419
2420 /* reallocNew() uses calloc to initialize the array.
2421 * If malloc is used instead, for some of the encoding methods,
2422 * not all the data in 'bufsize' bytes in the buffer will
2423 * have been initialized by the end of the compression. */
2424 mstream = (L_MEMSTREAM *)handle;
2425 if (mstream->offset + length > mstream->bufsize) {
2426 newsize = 2 * (mstream->offset + length);
2427 mstream->buffer = (l_uint8 *)reallocNew((void **)&mstream->buffer,
2428 mstream->hw, newsize);
2429 mstream->bufsize = newsize;
2430 }
2431
2432 memcpy(mstream->buffer + mstream->offset, data, length);
2433 mstream->offset += length;
2434 mstream->hw = L_MAX(mstream->offset, mstream->hw);
2435 return length;
2436 }
2437
2438
2439 static toff_t
2440 tiffSeekCallback(thandle_t handle,
2441 toff_t offset,
2442 l_int32 whence)
2443 {
2444 L_MEMSTREAM *mstream;
2445
2446 mstream = (L_MEMSTREAM *)handle;
2447 switch (whence) {
2448 case SEEK_SET:
2449 /* lept_stderr("seek_set: offset = %d\n", offset); */
2450 if((size_t)offset != offset) { /* size_t overflow on uint32 */
2451 return (toff_t)ERROR_INT("too large offset value", __func__, 1);
2452 }
2453 mstream->offset = offset;
2454 break;
2455 case SEEK_CUR:
2456 /* lept_stderr("seek_cur: offset = %d\n", offset); */
2457 mstream->offset += offset;
2458 break;
2459 case SEEK_END:
2460 /* lept_stderr("seek end: hw = %d, offset = %d\n",
2461 mstream->hw, offset); */
2462 mstream->offset = mstream->hw - offset; /* offset >= 0 */
2463 break;
2464 default:
2465 return (toff_t)ERROR_INT("bad whence value", __func__,
2466 mstream->offset);
2467 }
2468
2469 return mstream->offset;
2470 }
2471
2472
2473 static l_int32
2474 tiffCloseCallback(thandle_t handle)
2475 {
2476 L_MEMSTREAM *mstream;
2477
2478 mstream = (L_MEMSTREAM *)handle;
2479 if (mstream->poutdata) { /* writing: save the output data */
2480 *mstream->poutdata = mstream->buffer;
2481 *mstream->poutsize = mstream->hw;
2482 }
2483 LEPT_FREE(mstream); /* never free the buffer! */
2484 return 0;
2485 }
2486
2487
2488 static toff_t
2489 tiffSizeCallback(thandle_t handle)
2490 {
2491 L_MEMSTREAM *mstream;
2492
2493 mstream = (L_MEMSTREAM *)handle;
2494 return mstream->hw;
2495 }
2496
2497
2498 static l_int32
2499 tiffMapCallback(thandle_t handle,
2500 tdata_t *data,
2501 toff_t *length)
2502 {
2503 L_MEMSTREAM *mstream;
2504
2505 mstream = (L_MEMSTREAM *)handle;
2506 *data = mstream->buffer;
2507 *length = mstream->hw;
2508 return 0;
2509 }
2510
2511
2512 static void
2513 tiffUnmapCallback(thandle_t handle,
2514 tdata_t data,
2515 toff_t length)
2516 {
2517 return;
2518 }
2519
2520
2521 /*!
2522 * \brief fopenTiffMemstream()
2523 *
2524 * \param[in] filename for error output; can be ""
2525 * \param[in] operation "w" for write, "r" for read
2526 * \param[out] pdata written data
2527 * \param[out] pdatasize size of written data
2528 * \return tiff data structure, opened for write to memory
2529 *
2530 * <pre>
2531 * Notes:
2532 * (1) This wraps up a number of callbacks for either:
2533 * * reading from tiff in memory buffer --> pix
2534 * * writing from pix --> tiff in memory buffer
2535 * (2) After use, the memstream is automatically destroyed when
2536 * TIFFClose() is called. TIFFCleanup() doesn't free the memstream.
2537 * (3) This does not work in append mode, and in write mode it
2538 * does not append.
2539 * </pre>
2540 */
2541 static TIFF *
2542 fopenTiffMemstream(const char *filename,
2543 const char *operation,
2544 l_uint8 **pdata,
2545 size_t *pdatasize)
2546 {
2547 L_MEMSTREAM *mstream;
2548 TIFF *tif;
2549
2550 if (!filename)
2551 return (TIFF *)ERROR_PTR("filename not defined", __func__, NULL);
2552 if (!operation)
2553 return (TIFF *)ERROR_PTR("operation not defined", __func__, NULL);
2554 if (!pdata)
2555 return (TIFF *)ERROR_PTR("&data not defined", __func__, NULL);
2556 if (!pdatasize)
2557 return (TIFF *)ERROR_PTR("&datasize not defined", __func__, NULL);
2558 if (strcmp(operation, "r") && strcmp(operation, "w"))
2559 return (TIFF *)ERROR_PTR("op not 'r' or 'w'", __func__, NULL);
2560
2561 if (!strcmp(operation, "r"))
2562 mstream = memstreamCreateForRead(*pdata, *pdatasize);
2563 else
2564 mstream = memstreamCreateForWrite(pdata, pdatasize);
2565 if (!mstream)
2566 return (TIFF *)ERROR_PTR("mstream not made", __func__, NULL);
2567
2568 TIFFSetWarningHandler(NULL); /* disable warnings */
2569 TIFFSetErrorHandler(NULL); /* disable error messages */
2570
2571 tif = TIFFClientOpen(filename, operation, (thandle_t)mstream,
2572 tiffReadCallback, tiffWriteCallback,
2573 tiffSeekCallback, tiffCloseCallback,
2574 tiffSizeCallback, tiffMapCallback,
2575 tiffUnmapCallback);
2576 if (!tif)
2577 LEPT_FREE(mstream);
2578 return tif;
2579 }
2580
2581
2582 /*!
2583 * \brief pixReadMemTiff()
2584 *
2585 * \param[in] cdata const; tiff-encoded
2586 * \param[in] size size of cdata
2587 * \param[in] n page image number: 0-based
2588 * \return pix, or NULL on error
2589 *
2590 * <pre>
2591 * Notes:
2592 * (1) This is a version of pixReadTiff(), where the data is read
2593 * from a memory buffer and uncompressed.
2594 * (2) Use TIFFClose(); TIFFCleanup() doesn't free internal memstream.
2595 * (3) No warning messages on failure, because of how multi-page
2596 * TIFF reading works. You are supposed to keep trying until
2597 * it stops working.
2598 * (4) Tiff directory overhead is linear in the input page number.
2599 * If reading many images, use pixReadMemFromMultipageTiff().
2600 * </pre>
2601 */
2602 PIX *
2603 pixReadMemTiff(const l_uint8 *cdata,
2604 size_t size,
2605 l_int32 n)
2606 {
2607 l_uint8 *data;
2608 l_int32 i;
2609 PIX *pix;
2610 TIFF *tif;
2611
2612 if (!cdata)
2613 return (PIX *)ERROR_PTR("cdata not defined", __func__, NULL);
2614
2615 data = (l_uint8 *)cdata; /* we're really not going to change this */
2616 if ((tif = fopenTiffMemstream("tifferror", "r", &data, &size)) == NULL)
2617 return (PIX *)ERROR_PTR("tiff stream not opened", __func__, NULL);
2618
2619 pix = NULL;
2620 for (i = 0; ; i++) {
2621 if (i == n) {
2622 if ((pix = pixReadFromTiffStream(tif)) == NULL) {
2623 TIFFClose(tif);
2624 return NULL;
2625 }
2626 pixSetInputFormat(pix, IFF_TIFF);
2627 break;
2628 }
2629 if (TIFFReadDirectory(tif) == 0)
2630 break;
2631 if (i == ManyPagesInTiffFile + 1) {
2632 L_WARNING("big file: more than %d pages\n", __func__,
2633 ManyPagesInTiffFile);
2634 }
2635 }
2636
2637 TIFFClose(tif);
2638 return pix;
2639 }
2640
2641
2642 /*!
2643 * \brief pixReadMemFromMultipageTiff()
2644 *
2645 * \param[in] cdata const; tiff-encoded
2646 * \param[in] size size of cdata
2647 * \param[in,out] poffset set offset to 0 for first image
2648 * \return pix, or NULL on error or if previous call returned the last image
2649 *
2650 * <pre>
2651 * Notes:
2652 * (1) This is a read-from-memory version of pixReadFromMultipageTiff().
2653 * See that function for usage.
2654 * (2) If reading sequentially from the tiff data, this is more
2655 * efficient than pixReadMemTiff(), which has an overhead
2656 * proportional to the image index n.
2657 * (3) Example usage for reading all the images:
2658 * size_t offset = 0;
2659 * do {
2660 * Pix *pix = pixReadMemFromMultipageTiff(data, size, &offset);
2661 * // do something with pix
2662 * } while (offset != 0);
2663 * </pre>
2664 */
2665 PIX *
2666 pixReadMemFromMultipageTiff(const l_uint8 *cdata,
2667 size_t size,
2668 size_t *poffset)
2669 {
2670 l_uint8 *data;
2671 l_int32 retval;
2672 size_t offset;
2673 PIX *pix;
2674 TIFF *tif;
2675
2676 if (!cdata)
2677 return (PIX *)ERROR_PTR("cdata not defined", __func__, NULL);
2678 if (!poffset)
2679 return (PIX *)ERROR_PTR("&offset not defined", __func__, NULL);
2680
2681 data = (l_uint8 *)cdata; /* we're really not going to change this */
2682 if ((tif = fopenTiffMemstream("tifferror", "r", &data, &size)) == NULL)
2683 return (PIX *)ERROR_PTR("tiff stream not opened", __func__, NULL);
2684
2685 /* Set ptrs in the TIFF to the beginning of the image */
2686 offset = *poffset;
2687 retval = (offset == 0) ? TIFFSetDirectory(tif, 0)
2688 : TIFFSetSubDirectory(tif, offset);
2689 if (retval == 0) {
2690 TIFFClose(tif);
2691 return NULL;
2692 }
2693
2694 if ((pix = pixReadFromTiffStream(tif)) == NULL) {
2695 TIFFClose(tif);
2696 return NULL;
2697 }
2698
2699 /* Advance to the next image and return the new offset */
2700 TIFFReadDirectory(tif);
2701 *poffset = TIFFCurrentDirOffset(tif);
2702 TIFFClose(tif);
2703 return pix;
2704 }
2705
2706
2707 /*!
2708 * \brief pixaReadMemMultipageTiff()
2709 *
2710 * \param[in] data const; multiple pages; tiff-encoded
2711 * \param[in] size size of cdata
2712 * \return pixa, or NULL on error
2713 *
2714 * <pre>
2715 * Notes:
2716 * (1) This is an O(n) read-from-memory version of pixaReadMultipageTiff().
2717 * </pre>
2718 */
2719 PIXA *
2720 pixaReadMemMultipageTiff(const l_uint8 *data,
2721 size_t size)
2722 {
2723 size_t offset;
2724 PIX *pix;
2725 PIXA *pixa;
2726
2727 if (!data)
2728 return (PIXA *)ERROR_PTR("data not defined", __func__, NULL);
2729
2730 offset = 0;
2731 pixa = pixaCreate(0);
2732 do {
2733 pix = pixReadMemFromMultipageTiff(data, size, &offset);
2734 pixaAddPix(pixa, pix, L_INSERT);
2735 } while (offset != 0);
2736 return pixa;
2737 }
2738
2739
2740 /*!
2741 * \brief pixaWriteMemMultipageTiff()
2742 *
2743 * \param[out] pdata const; tiff-encoded
2744 * \param[out] psize size of data
2745 * \param[in] pixa any depth; colormap will be removed
2746 * \return 0 if OK, 1 on error
2747 *
2748 * <pre>
2749 * Notes:
2750 * (1) fopenTiffMemstream() does not work in append mode, so we
2751 * must work-around with a temporary file.
2752 * (2) Getting a file stream from
2753 * open_memstream((char **)pdata, psize)
2754 * does not work with the tiff directory.
2755 * </pre>
2756 */
2757 l_ok
2758 pixaWriteMemMultipageTiff(l_uint8 **pdata,
2759 size_t *psize,
2760 PIXA *pixa)
2761 {
2762 const char *modestr;
2763 l_int32 i, n;
2764 FILE *fp;
2765 PIX *pix1;
2766
2767 if (pdata) *pdata = NULL;
2768 if (!pdata)
2769 return ERROR_INT("pdata not defined", __func__, 1);
2770 if (!pixa)
2771 return ERROR_INT("pixa not defined", __func__, 1);
2772
2773 #ifdef _WIN32
2774 if ((fp = fopenWriteWinTempfile()) == NULL)
2775 return ERROR_INT("tmpfile stream not opened", __func__, 1);
2776 #else
2777 if ((fp = tmpfile()) == NULL)
2778 return ERROR_INT("tmpfile stream not opened", __func__, 1);
2779 #endif /* _WIN32 */
2780
2781 n = pixaGetCount(pixa);
2782 for (i = 0; i < n; i++) {
2783 modestr = (i == 0) ? "w" : "a";
2784 pix1 = pixaGetPix(pixa, i, L_CLONE);
2785 if (pixGetDepth(pix1) == 1)
2786 pixWriteStreamTiffWA(fp, pix1, IFF_TIFF_G4, modestr);
2787 else
2788 pixWriteStreamTiffWA(fp, pix1, IFF_TIFF_ZIP, modestr);
2789 pixDestroy(&pix1);
2790 }
2791
2792 rewind(fp);
2793 *pdata = l_binaryReadStream(fp, psize);
2794 fclose(fp);
2795 return 0;
2796 }
2797
2798
2799 /*!
2800 * \brief pixWriteMemTiff()
2801 *
2802 * \param[out] pdata data of tiff compressed image
2803 * \param[out] psize size of returned data
2804 * \param[in] pix
2805 * \param[in] comptype IFF_TIFF, IFF_TIFF_RLE, IFF_TIFF_PACKBITS,
2806 * IFF_TIFF_G3, IFF_TIFF_G4,
2807 * IFF_TIFF_LZW, IFF_TIFF_ZIP, IFF_TIFF_JPEG
2808 * \return 0 if OK, 1 on error
2809 *
2810 * Usage:
2811 * 1) See pixWriteTiff(. This version writes to
2812 * memory instead of to a file.
2813 */
2814 l_ok
2815 pixWriteMemTiff(l_uint8 **pdata,
2816 size_t *psize,
2817 PIX *pix,
2818 l_int32 comptype)
2819 {
2820 return pixWriteMemTiffCustom(pdata, psize, pix, comptype,
2821 NULL, NULL, NULL, NULL);
2822 }
2823
2824
2825 /*!
2826 * \brief pixWriteMemTiffCustom()
2827 *
2828 * \param[out] pdata data of tiff compressed image
2829 * \param[out] psize size of returned data
2830 * \param[in] pix
2831 * \param[in] comptype IFF_TIFF, IFF_TIFF_RLE, IFF_TIFF_PACKBITS,
2832 * IFF_TIFF_G3, IFF_TIFF_G4,
2833 * IFF_TIFF_LZW, IFF_TIFF_ZIP, IFF_TIFF_JPEG
2834 * \param[in] natags [optional] NUMA of custom tiff tags
2835 * \param[in] savals [optional] SARRAY of values
2836 * \param[in] satypes [optional] SARRAY of types
2837 * \param[in] nasizes [optional] NUMA of sizes
2838 * \return 0 if OK, 1 on error
2839 *
2840 * Usage:
2841 * 1) See pixWriteTiffCustom(. This version writes to
2842 * memory instead of to a file.
2843 * 2) Use TIFFClose(); TIFFCleanup( doesn't free internal memstream.
2844 */
2845 l_ok
2846 pixWriteMemTiffCustom(l_uint8 **pdata,
2847 size_t *psize,
2848 PIX *pix,
2849 l_int32 comptype,
2850 NUMA *natags,
2851 SARRAY *savals,
2852 SARRAY *satypes,
2853 NUMA *nasizes)
2854 {
2855 l_int32 ret;
2856 TIFF *tif;
2857
2858 if (!pdata)
2859 return ERROR_INT("&data not defined", __func__, 1);
2860 if (!psize)
2861 return ERROR_INT("&size not defined", __func__, 1);
2862 if (!pix)
2863 return ERROR_INT("&pix not defined", __func__, 1);
2864 if (pixGetDepth(pix) != 1 && comptype != IFF_TIFF &&
2865 comptype != IFF_TIFF_LZW && comptype != IFF_TIFF_ZIP &&
2866 comptype != IFF_TIFF_JPEG) {
2867 L_WARNING("invalid compression type for bpp > 1\n", __func__);
2868 comptype = IFF_TIFF_ZIP;
2869 }
2870
2871 if ((tif = fopenTiffMemstream("tifferror", "w", pdata, psize)) == NULL)
2872 return ERROR_INT("tiff stream not opened", __func__, 1);
2873 ret = pixWriteToTiffStream(tif, pix, comptype, natags, savals,
2874 satypes, nasizes);
2875
2876 TIFFClose(tif);
2877 return ret;
2878 }
2879
2880 /* ---------------------------------------*/
2881 #endif /* HAVE_LIBTIFF && HAVE_LIBJPEG */
2882 /* ---------------------------------------*/