comparison mupdf-source/thirdparty/leptonica/src/pixcomp.c @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 /*====================================================================*
2 - Copyright (C) 2001 Leptonica. All rights reserved.
3 -
4 - Redistribution and use in source and binary forms, with or without
5 - modification, are permitted provided that the following conditions
6 - are met:
7 - 1. Redistributions of source code must retain the above copyright
8 - notice, this list of conditions and the following disclaimer.
9 - 2. Redistributions in binary form must reproduce the above
10 - copyright notice, this list of conditions and the following
11 - disclaimer in the documentation and/or other materials
12 - provided with the distribution.
13 -
14 - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15 - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16 - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17 - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY
18 - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *====================================================================*/
26
27 /*!
28 * \file pixcomp.c
29 * <pre>
30 *
31 * Pixcomp creation and destruction
32 * PIXC *pixcompCreateFromPix()
33 * PIXC *pixcompCreateFromString()
34 * PIXC *pixcompCreateFromFile()
35 * void pixcompDestroy()
36 * PIXC *pixcompCopy()
37
38 * Pixcomp accessors
39 * l_int32 pixcompGetDimensions()
40 * l_int32 pixcompGetParameters()
41 *
42 * Pixcomp compression selection
43 * l_int32 pixcompDetermineFormat()
44 *
45 * Pixcomp conversion to Pix
46 * PIX *pixCreateFromPixcomp()
47 *
48 * Pixacomp creation and destruction
49 * PIXAC *pixacompCreate()
50 * PIXAC *pixacompCreateWithInit()
51 * PIXAC *pixacompCreateFromPixa()
52 * PIXAC *pixacompCreateFromFiles()
53 * PIXAC *pixacompCreateFromSA()
54 * void pixacompDestroy()
55 *
56 * Pixacomp addition/replacement
57 * l_int32 pixacompAddPix()
58 * l_int32 pixacompAddPixcomp()
59 * static l_int32 pixacompExtendArray()
60 * l_int32 pixacompReplacePix()
61 * l_int32 pixacompReplacePixcomp()
62 * l_int32 pixacompAddBox()
63 *
64 * Pixacomp accessors
65 * l_int32 pixacompGetCount()
66 * PIXC *pixacompGetPixcomp()
67 * PIX *pixacompGetPix()
68 * l_int32 pixacompGetPixDimensions()
69 * BOXA *pixacompGetBoxa()
70 * l_int32 pixacompGetBoxaCount()
71 * BOX *pixacompGetBox()
72 * l_int32 pixacompGetBoxGeometry()
73 * l_int32 pixacompGetOffset()
74 * l_int32 pixacompSetOffset()
75 *
76 * Pixacomp conversion to Pixa
77 * PIXA *pixaCreateFromPixacomp()
78 *
79 * Combining pixacomp
80 * l_int32 pixacompJoin()
81 * PIXAC *pixacompInterleave()
82 *
83 * Pixacomp serialized I/O
84 * PIXAC *pixacompRead()
85 * PIXAC *pixacompReadStream()
86 * PIXAC *pixacompReadMem()
87 * l_int32 pixacompWrite()
88 * l_int32 pixacompWriteStream()
89 * l_int32 pixacompWriteMem()
90 *
91 * Conversion to pdf
92 * l_int32 pixacompConvertToPdf()
93 * l_int32 pixacompConvertToPdfData()
94 * l_int32 pixacompFastConvertToPdfData()
95 *
96 * Output for debugging
97 * l_int32 pixacompWriteStreamInfo()
98 * l_int32 pixcompWriteStreamInfo()
99 * PIX *pixacompDisplayTiledAndScaled()
100 * l_int32 pixacompWriteFiles()
101 * l_int32 pixcompWriteFile()
102 *
103 * The Pixacomp is an array of Pixcomp, where each Pixcomp is a compressed
104 * string of the image. We don't use reference counting here.
105 * The basic application is to allow a large array of highly
106 * compressible images to reside in memory. We purposely don't
107 * reuse the Pixa for this, to avoid confusion and programming errors.
108 *
109 * Three compression formats are used: g4, png and jpeg.
110 * The compression type can be either specified or defaulted.
111 * If specified and it is not possible to compress (for example,
112 * you specify a jpeg on a 1 bpp image or one with a colormap),
113 * the compression type defaults to png. The jpeg compression quality
114 * can be specified using l_setJpegQuality(); otherwise the default is 75.
115 *
116 * The serialized version of the Pixacomp is similar to that for
117 * a Pixa, except that each Pixcomp can be compressed by one of
118 * tiffg4, png, or jpeg. Unlike serialization of the Pixa,
119 * serialization of the Pixacomp does not require any imaging
120 * libraries because it simply reads and writes the compressed data.
121 *
122 * There are two modes of use in accumulating images:
123 * (1) addition to the end of the array
124 * (2) random insertion (replacement) into the array
125 *
126 * In use, we assume that the array is fully populated up to the
127 * index value (n - 1), where n is the value of the pixcomp field n.
128 * Addition can only be made to the end of the fully populated array,
129 * at the index value n. Insertion can be made randomly, but again
130 * only within the array of pixcomps; i.e., within the set of
131 * indices {0 .... n-1}. The functions are pixacompReplacePix()
132 * and pixacompReplacePixcomp(), and they destroy the existing pixcomp.
133 *
134 * For addition to the end of the array, initialize the pixacomp with
135 * pixacompCreate(), which generates an empty array of pixcomps ptrs.
136 * For random insertion and replacement of pixcomp into a pixacomp,
137 * initialize a fully populated array using pixacompCreateWithInit().
138 *
139 * The offset field allows you to use an offset-based index to
140 * access the 0-based ptr array in the pixacomp. This would typically
141 * be used to map the pixacomp array index to a page number, or v.v.
142 * By default, the offset is 0. For example, suppose you have 50 images,
143 * corresponding to page numbers 10 - 59. Then you could use
144 * pixac = pixacompCreateWithInit(50, 10, ...);
145 * This would allocate an array of 50 pixcomps, but if you asked for
146 * the pix at index 10, using pixacompGetPix(pixac, 10), it would
147 * apply the offset internally, returning the pix at index 0 in the array.
148 * </pre>
149 */
150
151 #ifdef HAVE_CONFIG_H
152 #include <config_auto.h>
153 #endif /* HAVE_CONFIG_H */
154
155 #include <string.h>
156 #include "allheaders.h"
157 #include "pix_internal.h"
158
159 /* Bounds on pixacomp array size */
160 static const l_uint32 MaxPtrArraySize = 1000000;
161 static const l_int32 InitialPtrArraySize = 20; /*!< n'importe quoi */
162
163 /* Bound on size for a compressed data string */
164 static const size_t MaxDataSize = 1000000000; /* 1 GB */
165
166 /* These two globals are defined in writefile.c */
167 extern l_int32 NumImageFileFormatExtensions;
168 extern const char *ImageFileFormatExtensions[];
169
170 /* Static functions */
171 static l_int32 pixacompExtendArray(PIXAC *pixac);
172 static l_int32 pixcompFastConvertToPdfData(PIXC *pixc, const char *title,
173 l_uint8 **pdata, size_t *pnbytes);
174
175
176 /*---------------------------------------------------------------------*
177 * Pixcomp creation and destruction *
178 *---------------------------------------------------------------------*/
179 /*!
180 * \brief pixcompCreateFromPix()
181 *
182 * \param[in] pix
183 * \param[in] comptype IFF_DEFAULT, IFF_TIFF_G4, IFF_PNG, IFF_JFIF_JPEG
184 * \return pixc, or NULL on error
185 *
186 * <pre>
187 * Notes:
188 * (1) Use %comptype == IFF_DEFAULT to have the compression
189 * type automatically determined.
190 * (2) To compress jpeg with a quality other than the default (75), use
191 * l_jpegSetQuality()
192 * </pre>
193 */
194 PIXC *
195 pixcompCreateFromPix(PIX *pix,
196 l_int32 comptype)
197 {
198 size_t size;
199 char *text;
200 l_int32 ret, format;
201 l_uint8 *data;
202 PIXC *pixc;
203
204 if (!pix)
205 return (PIXC *)ERROR_PTR("pix not defined", __func__, NULL);
206 if (comptype != IFF_DEFAULT && comptype != IFF_TIFF_G4 &&
207 comptype != IFF_PNG && comptype != IFF_JFIF_JPEG)
208 return (PIXC *)ERROR_PTR("invalid comptype", __func__, NULL);
209
210 pixc = (PIXC *)LEPT_CALLOC(1, sizeof(PIXC));
211 pixGetDimensions(pix, &pixc->w, &pixc->h, &pixc->d);
212 pixGetResolution(pix, &pixc->xres, &pixc->yres);
213 if (pixGetColormap(pix))
214 pixc->cmapflag = 1;
215 if ((text = pixGetText(pix)) != NULL)
216 pixc->text = stringNew(text);
217
218 pixcompDetermineFormat(comptype, pixc->d, pixc->cmapflag, &format);
219 pixc->comptype = format;
220 ret = pixWriteMem(&data, &size, pix, format);
221 if (ret) {
222 L_ERROR("write to memory failed\n", __func__);
223 pixcompDestroy(&pixc);
224 return NULL;
225 }
226 pixc->data = data;
227 pixc->size = size;
228
229 return pixc;
230 }
231
232
233 /*!
234 * \brief pixcompCreateFromString()
235 *
236 * \param[in] data compressed string
237 * \param[in] size number of bytes
238 * \param[in] copyflag L_INSERT or L_COPY
239 * \return pixc, or NULL on error
240 *
241 * <pre>
242 * Notes:
243 * (1) This works when the compressed string is png, jpeg or tiffg4.
244 * (2) The copyflag determines if the data in the new Pixcomp is
245 * a copy of the input data.
246 * </pre>
247 */
248 PIXC *
249 pixcompCreateFromString(l_uint8 *data,
250 size_t size,
251 l_int32 copyflag)
252 {
253 l_int32 format, w, h, d, bps, spp, iscmap;
254 PIXC *pixc;
255
256 if (!data)
257 return (PIXC *)ERROR_PTR("data not defined", __func__, NULL);
258 if (copyflag != L_INSERT && copyflag != L_COPY)
259 return (PIXC *)ERROR_PTR("invalid copyflag", __func__, NULL);
260
261 if (pixReadHeaderMem(data, size, &format, &w, &h, &bps, &spp, &iscmap) == 1)
262 return (PIXC *)ERROR_PTR("header data not read", __func__, NULL);
263 pixc = (PIXC *)LEPT_CALLOC(1, sizeof(PIXC));
264 d = (spp == 3) ? 32 : bps * spp;
265 pixc->w = w;
266 pixc->h = h;
267 pixc->d = d;
268 pixc->comptype = format;
269 pixc->cmapflag = iscmap;
270 if (copyflag == L_INSERT)
271 pixc->data = data;
272 else
273 pixc->data = l_binaryCopy(data, size);
274 pixc->size = size;
275 return pixc;
276 }
277
278
279 /*!
280 * \brief pixcompCreateFromFile()
281 *
282 * \param[in] filename
283 * \param[in] comptype IFF_DEFAULT, IFF_TIFF_G4, IFF_PNG, IFF_JFIF_JPEG
284 * \return pixc, or NULL on error
285 *
286 * <pre>
287 * Notes:
288 * (1) Use %comptype == IFF_DEFAULT to have the compression
289 * type automatically determined.
290 * (2) If the comptype is invalid for this file, the default will
291 * be substituted.
292 * </pre>
293 */
294 PIXC *
295 pixcompCreateFromFile(const char *filename,
296 l_int32 comptype)
297 {
298 l_int32 format;
299 size_t nbytes;
300 l_uint8 *data;
301 PIX *pix;
302 PIXC *pixc;
303
304 if (!filename)
305 return (PIXC *)ERROR_PTR("filename not defined", __func__, NULL);
306 if (comptype != IFF_DEFAULT && comptype != IFF_TIFF_G4 &&
307 comptype != IFF_PNG && comptype != IFF_JFIF_JPEG)
308 return (PIXC *)ERROR_PTR("invalid comptype", __func__, NULL);
309
310 findFileFormat(filename, &format);
311 if (format == IFF_UNKNOWN) {
312 L_ERROR("unreadable file: %s\n", __func__, filename);
313 return NULL;
314 }
315
316 /* Can we accept the encoded file directly? Remember that
317 * png is the "universal" compression type, so if requested
318 * it takes precedence. Otherwise, if the file is already
319 * compressed in g4 or jpeg, just accept the string. */
320 if ((format == IFF_TIFF_G4 && comptype != IFF_PNG) ||
321 (format == IFF_JFIF_JPEG && comptype != IFF_PNG))
322 comptype = format;
323 if (comptype != IFF_DEFAULT && comptype == format) {
324 data = l_binaryRead(filename, &nbytes);
325 if ((pixc = pixcompCreateFromString(data, nbytes, L_INSERT)) == NULL) {
326 LEPT_FREE(data);
327 return (PIXC *)ERROR_PTR("pixc not made (string)", __func__, NULL);
328 }
329 return pixc;
330 }
331
332 /* Need to recompress in the default format */
333 if ((pix = pixRead(filename)) == NULL)
334 return (PIXC *)ERROR_PTR("pix not read", __func__, NULL);
335 if ((pixc = pixcompCreateFromPix(pix, comptype)) == NULL) {
336 pixDestroy(&pix);
337 return (PIXC *)ERROR_PTR("pixc not made", __func__, NULL);
338 }
339 pixDestroy(&pix);
340 return pixc;
341 }
342
343
344 /*!
345 * \brief pixcompDestroy()
346 *
347 * \param[in,out] ppixc use ptr address so it will be nulled
348 * \return void
349 *
350 * <pre>
351 * Notes:
352 * (1) Always nulls the input ptr.
353 * </pre>
354 */
355 void
356 pixcompDestroy(PIXC **ppixc)
357 {
358 PIXC *pixc;
359
360 if (!ppixc) {
361 L_WARNING("ptr address is null!\n", __func__);
362 return;
363 }
364
365 if ((pixc = *ppixc) == NULL)
366 return;
367
368 LEPT_FREE(pixc->data);
369 if (pixc->text)
370 LEPT_FREE(pixc->text);
371 LEPT_FREE(pixc);
372 *ppixc = NULL;
373 }
374
375
376 /*!
377 * \brief pixcompCopy()
378 *
379 * \param[in] pixcs
380 * \return pixcd, or NULL on error
381 *
382 * <pre>
383 * Notes:
384 * (1) Limit the size of the compressed pix to 500 MB.
385 * </pre>
386 */
387 PIXC *
388 pixcompCopy(PIXC *pixcs)
389 {
390 size_t size;
391 l_uint8 *datas, *datad;
392 PIXC *pixcd;
393
394 if (!pixcs)
395 return (PIXC *)ERROR_PTR("pixcs not defined", __func__, NULL);
396 size = pixcs->size;
397 if (size > MaxDataSize)
398 return (PIXC *)ERROR_PTR("size > 1 GB; too big", __func__, NULL);
399
400 pixcd = (PIXC *)LEPT_CALLOC(1, sizeof(PIXC));
401 pixcd->w = pixcs->w;
402 pixcd->h = pixcs->h;
403 pixcd->d = pixcs->d;
404 pixcd->xres = pixcs->xres;
405 pixcd->yres = pixcs->yres;
406 pixcd->comptype = pixcs->comptype;
407 if (pixcs->text != NULL)
408 pixcd->text = stringNew(pixcs->text);
409 pixcd->cmapflag = pixcs->cmapflag;
410
411 /* Copy image data */
412 datas = pixcs->data;
413 if ((datad = (l_uint8 *)LEPT_CALLOC(size, sizeof(l_int8))) == NULL) {
414 pixcompDestroy(&pixcd);
415 return (PIXC *)ERROR_PTR("pixcd not made", __func__, NULL);
416 }
417 memcpy(datad, datas, size);
418 pixcd->data = datad;
419 pixcd->size = size;
420 return pixcd;
421 }
422
423
424 /*---------------------------------------------------------------------*
425 * Pixcomp accessors *
426 *---------------------------------------------------------------------*/
427 /*!
428 * \brief pixcompGetDimensions()
429 *
430 * \param[in] pixc
431 * \param[out] pw, ph, pd [optional]
432 * \return 0 if OK, 1 on error
433 */
434 l_ok
435 pixcompGetDimensions(PIXC *pixc,
436 l_int32 *pw,
437 l_int32 *ph,
438 l_int32 *pd)
439 {
440 if (!pixc)
441 return ERROR_INT("pixc not defined", __func__, 1);
442 if (pw) *pw = pixc->w;
443 if (ph) *ph = pixc->h;
444 if (pd) *pd = pixc->d;
445 return 0;
446 }
447
448
449 /*!
450 * \brief pixcompGetParameters()
451 *
452 * \param[in] pixc
453 * \param[out] pxres, pyres, pcomptype, pcmapflag [optional]
454 * \return 0 if OK, 1 on error
455 */
456 l_ok
457 pixcompGetParameters(PIXC *pixc,
458 l_int32 *pxres,
459 l_int32 *pyres,
460 l_int32 *pcomptype,
461 l_int32 *pcmapflag)
462 {
463 if (!pixc)
464 return ERROR_INT("pixc not defined", __func__, 1);
465 if (pxres) *pxres = pixc->xres;
466 if (pyres) *pyres = pixc->yres;
467 if (pcomptype) *pcomptype = pixc->comptype;
468 if (pcmapflag) *pcmapflag = pixc->cmapflag;
469 return 0;
470 }
471
472
473 /*---------------------------------------------------------------------*
474 * Pixcomp compression selection *
475 *---------------------------------------------------------------------*/
476 /*!
477 * \brief pixcompDetermineFormat()
478 *
479 * \param[in] comptype IFF_DEFAULT, IFF_TIFF_G4, IFF_PNG, IFF_JFIF_JPEG
480 * \param[in] d pix depth
481 * \param[in] cmapflag 1 if pix to be compressed as a colormap; 0 otherwise
482 * \param[out] pformat IFF_TIFF, IFF_PNG or IFF_JFIF_JPEG
483 * \return 0 if OK; 1 on error
484 *
485 * <pre>
486 * Notes:
487 * (1) This determines the best format for a pix, given both
488 * the request (%comptype) and the image characteristics.
489 * (2) If %comptype == IFF_DEFAULT, this does not necessarily result
490 * in png encoding. Instead, it returns one of the three formats
491 * that is both valid and most likely to give best compression.
492 * (3) If %d == 8 with no colormap and:
493 * * you wish to compress with png, use %comptype == IFF_PNG
494 * * you wish to compress with jpeg, use either
495 * %comptype == IFF_JFIF_JPEG or %comptype == IFF_DEFAULT.
496 * (4) If the pix cannot be compressed by the input value of
497 * %comptype, this selects IFF_PNG, which can compress all pix.
498 * </pre>
499 */
500 l_ok
501 pixcompDetermineFormat(l_int32 comptype,
502 l_int32 d,
503 l_int32 cmapflag,
504 l_int32 *pformat)
505 {
506
507 if (!pformat)
508 return ERROR_INT("&format not defined", __func__, 1);
509 *pformat = IFF_PNG; /* init value and default */
510 if (comptype != IFF_DEFAULT && comptype != IFF_TIFF_G4 &&
511 comptype != IFF_PNG && comptype != IFF_JFIF_JPEG)
512 return ERROR_INT("invalid comptype", __func__, 1);
513
514 if (comptype == IFF_DEFAULT) {
515 if (d == 1)
516 *pformat = IFF_TIFF_G4;
517 else if (d == 16)
518 *pformat = IFF_PNG;
519 else if (d >= 8 && !cmapflag)
520 *pformat = IFF_JFIF_JPEG;
521 } else if (comptype == IFF_TIFF_G4 && d == 1) {
522 *pformat = IFF_TIFF_G4;
523 } else if (comptype == IFF_JFIF_JPEG && d >= 8 && !cmapflag) {
524 *pformat = IFF_JFIF_JPEG;
525 }
526
527 return 0;
528 }
529
530
531 /*---------------------------------------------------------------------*
532 * Pixcomp conversion to Pix *
533 *---------------------------------------------------------------------*/
534 /*!
535 * \brief pixCreateFromPixcomp()
536 *
537 * \param[in] pixc
538 * \return pix, or NULL on error
539 */
540 PIX *
541 pixCreateFromPixcomp(PIXC *pixc)
542 {
543 l_int32 w, h, d, cmapinpix, format;
544 PIX *pix;
545
546 if (!pixc)
547 return (PIX *)ERROR_PTR("pixc not defined", __func__, NULL);
548
549 if ((pix = pixReadMem(pixc->data, pixc->size)) == NULL)
550 return (PIX *)ERROR_PTR("pix not read", __func__, NULL);
551 pixSetResolution(pix, pixc->xres, pixc->yres);
552 if (pixc->text)
553 pixSetText(pix, pixc->text);
554
555 /* Check fields for consistency */
556 pixGetDimensions(pix, &w, &h, &d);
557 if (pixc->w != w) {
558 L_INFO("pix width %d != pixc width %d\n", __func__, w, pixc->w);
559 L_ERROR("pix width %d != pixc width\n", __func__, w);
560 }
561 if (pixc->h != h)
562 L_ERROR("pix height %d != pixc height\n", __func__, h);
563 if (pixc->d != d) {
564 if (pixc->d == 16) /* we strip 16 --> 8 bpp by default */
565 L_WARNING("pix depth %d != pixc depth 16\n", __func__, d);
566 else
567 L_ERROR("pix depth %d != pixc depth\n", __func__, d);
568 }
569 cmapinpix = (pixGetColormap(pix) != NULL);
570 if ((cmapinpix && !pixc->cmapflag) || (!cmapinpix && pixc->cmapflag))
571 L_ERROR("pix cmap flag inconsistent\n", __func__);
572 format = pixGetInputFormat(pix);
573 if (format != pixc->comptype) {
574 L_ERROR("pix comptype %d not equal to pixc comptype\n",
575 __func__, format);
576 }
577
578 return pix;
579 }
580
581
582 /*---------------------------------------------------------------------*
583 * Pixacomp creation and destruction *
584 *---------------------------------------------------------------------*/
585 /*!
586 * \brief pixacompCreate()
587 *
588 * \param[in] n initial number of ptrs
589 * \return pixac, or NULL on error
590 */
591 PIXAC *
592 pixacompCreate(l_int32 n)
593 {
594 PIXAC *pixac;
595
596 if (n <= 0 || n > (l_int32)MaxPtrArraySize)
597 n = InitialPtrArraySize;
598
599 pixac = (PIXAC *)LEPT_CALLOC(1, sizeof(PIXAC));
600 pixac->n = 0;
601 pixac->nalloc = n;
602 pixac->offset = 0;
603 if ((pixac->pixc = (PIXC **)LEPT_CALLOC(n, sizeof(PIXC *))) == NULL) {
604 pixacompDestroy(&pixac);
605 return (PIXAC *)ERROR_PTR("pixc ptrs not made", __func__, NULL);
606 }
607 if ((pixac->boxa = boxaCreate(n)) == NULL) {
608 pixacompDestroy(&pixac);
609 return (PIXAC *)ERROR_PTR("boxa not made", __func__, NULL);
610 }
611
612 return pixac;
613 }
614
615
616 /*!
617 * \brief pixacompCreateWithInit()
618 *
619 * \param[in] n initial number of ptrs
620 * \param[in] offset difference: accessor index - pixacomp array index
621 * \param[in] pix [optional] initialize each ptr in pixacomp
622 * to this pix; can be NULL
623 * \param[in] comptype IFF_DEFAULT, IFF_TIFF_G4, IFF_PNG, IFF_JFIF_JPEG
624 * \return pixac, or NULL on error
625 *
626 * <pre>
627 * Notes:
628 * (1) Initializes a pixacomp to be fully populated with %pix,
629 * compressed using %comptype. If %pix == NULL, %comptype
630 * is ignored.
631 * (2) Typically, the array is initialized with a tiny pix.
632 * This is most easily done by setting %pix == NULL, causing
633 * initialization of each array element with a tiny placeholder
634 * pix (w = h = d = 1), using comptype = IFF_TIFF_G4 .
635 * (3) Example usage:
636 * // Generate pixacomp for pages 30 - 49. This has an array
637 * // size of 20 and the page number offset is 30.
638 * PixaComp *pixac = pixacompCreateWithInit(20, 30, NULL,
639 * IFF_TIFF_G4);
640 * // Now insert png-compressed images into the initialized array
641 * for (pageno = 30; pageno < 50; pageno++) {
642 * Pix *pixt = ... // derived from image[pageno]
643 * if (pixt)
644 * pixacompReplacePix(pixac, pageno, pixt, IFF_PNG);
645 * pixDestroy(&pixt);
646 * }
647 * The result is a pixac with 20 compressed strings, and with
648 * selected pixt replacing the placeholders.
649 * To extract the image for page 38, which is decompressed
650 * from element 8 in the array, use:
651 * pixt = pixacompGetPix(pixac, 38);
652 * </pre>
653 */
654 PIXAC *
655 pixacompCreateWithInit(l_int32 n,
656 l_int32 offset,
657 PIX *pix,
658 l_int32 comptype)
659 {
660 l_int32 i;
661 PIX *pixt;
662 PIXC *pixc;
663 PIXAC *pixac;
664
665 if (n <= 0 || n > (l_int32)MaxPtrArraySize)
666 return (PIXAC *)ERROR_PTR("n out of valid bounds", __func__, NULL);
667 if (pix) {
668 if (comptype != IFF_DEFAULT && comptype != IFF_TIFF_G4 &&
669 comptype != IFF_PNG && comptype != IFF_JFIF_JPEG)
670 return (PIXAC *)ERROR_PTR("invalid comptype", __func__, NULL);
671 } else {
672 comptype = IFF_TIFF_G4;
673 }
674 if (offset < 0) {
675 L_WARNING("offset < 0; setting to 0\n", __func__);
676 offset = 0;
677 }
678
679 if ((pixac = pixacompCreate(n)) == NULL)
680 return (PIXAC *)ERROR_PTR("pixac not made", __func__, NULL);
681 pixacompSetOffset(pixac, offset);
682 if (pix)
683 pixt = pixClone(pix);
684 else
685 pixt = pixCreate(1, 1, 1);
686 for (i = 0; i < n; i++) {
687 pixc = pixcompCreateFromPix(pixt, comptype);
688 pixacompAddPixcomp(pixac, pixc, L_INSERT);
689 }
690 pixDestroy(&pixt);
691
692 return pixac;
693 }
694
695
696 /*!
697 * \brief pixacompCreateFromPixa()
698 *
699 * \param[in] pixa
700 * \param[in] comptype IFF_DEFAULT, IFF_TIFF_G4, IFF_PNG, IFF_JFIF_JPEG
701 * \param[in] accesstype L_COPY, L_CLONE, L_COPY_CLONE
702 * \return 0 if OK, 1 on error
703 *
704 * <pre>
705 * Notes:
706 * (1) If %format == IFF_DEFAULT, the conversion format for each
707 * image is chosen automatically. Otherwise, we use the
708 * specified format unless it can't be done (e.g., jpeg
709 * for a 1, 2 or 4 bpp pix, or a pix with a colormap),
710 * in which case we use the default (assumed best) compression.
711 * (2) %accesstype is used to extract a boxa from %pixa.
712 * (3) To compress jpeg with a quality other than the default (75), use
713 * l_jpegSetQuality()
714 * </pre>
715 */
716 PIXAC *
717 pixacompCreateFromPixa(PIXA *pixa,
718 l_int32 comptype,
719 l_int32 accesstype)
720 {
721 l_int32 i, n;
722 BOXA *boxa;
723 PIX *pix;
724 PIXAC *pixac;
725
726 if (!pixa)
727 return (PIXAC *)ERROR_PTR("pixa not defined", __func__, NULL);
728 if (comptype != IFF_DEFAULT && comptype != IFF_TIFF_G4 &&
729 comptype != IFF_PNG && comptype != IFF_JFIF_JPEG)
730 return (PIXAC *)ERROR_PTR("invalid comptype", __func__, NULL);
731 if (accesstype != L_COPY && accesstype != L_CLONE &&
732 accesstype != L_COPY_CLONE)
733 return (PIXAC *)ERROR_PTR("invalid accesstype", __func__, NULL);
734
735 n = pixaGetCount(pixa);
736 if ((pixac = pixacompCreate(n)) == NULL)
737 return (PIXAC *)ERROR_PTR("pixac not made", __func__, NULL);
738 for (i = 0; i < n; i++) {
739 pix = pixaGetPix(pixa, i, L_CLONE);
740 pixacompAddPix(pixac, pix, comptype);
741 pixDestroy(&pix);
742 }
743 if ((boxa = pixaGetBoxa(pixa, accesstype)) != NULL) {
744 boxaDestroy(&pixac->boxa);
745 pixac->boxa = boxa;
746 }
747
748 return pixac;
749 }
750
751
752 /*!
753 * \brief pixacompCreateFromFiles()
754 *
755 * \param[in] dirname
756 * \param[in] substr [optional] substring filter on filenames; can be null
757 * \param[in] comptype IFF_DEFAULT, IFF_TIFF_G4, IFF_PNG, IFF_JFIF_JPEG
758 * \return pixac, or NULL on error
759 *
760 * <pre>
761 * Notes:
762 * (1) %dirname is the full path for the directory.
763 * (2) %substr is the part of the file name (excluding
764 * the directory) that is to be matched. All matching
765 * filenames are read into the Pixa. If substr is NULL,
766 * all filenames are read into the Pixa.
767 * (3) Use %comptype == IFF_DEFAULT to have the compression
768 * type automatically determined for each file.
769 * (4) If the comptype is invalid for a file, the default will
770 * be substituted.
771 * </pre>
772 */
773 PIXAC *
774 pixacompCreateFromFiles(const char *dirname,
775 const char *substr,
776 l_int32 comptype)
777 {
778 PIXAC *pixac;
779 SARRAY *sa;
780
781 if (!dirname)
782 return (PIXAC *)ERROR_PTR("dirname not defined", __func__, NULL);
783 if (comptype != IFF_DEFAULT && comptype != IFF_TIFF_G4 &&
784 comptype != IFF_PNG && comptype != IFF_JFIF_JPEG)
785 return (PIXAC *)ERROR_PTR("invalid comptype", __func__, NULL);
786
787 if ((sa = getSortedPathnamesInDirectory(dirname, substr, 0, 0)) == NULL)
788 return (PIXAC *)ERROR_PTR("sa not made", __func__, NULL);
789 pixac = pixacompCreateFromSA(sa, comptype);
790 sarrayDestroy(&sa);
791 return pixac;
792 }
793
794
795 /*!
796 * \brief pixacompCreateFromSA()
797 *
798 * \param[in] sa full pathnames for all files
799 * \param[in] comptype IFF_DEFAULT, IFF_TIFF_G4, IFF_PNG, IFF_JFIF_JPEG
800 * \return pixac, or NULL on error
801 *
802 * <pre>
803 * Notes:
804 * (1) Use %comptype == IFF_DEFAULT to have the compression
805 * type automatically determined for each file.
806 * (2) If the comptype is invalid for a file, the default will
807 * be substituted.
808 * </pre>
809 */
810 PIXAC *
811 pixacompCreateFromSA(SARRAY *sa,
812 l_int32 comptype)
813 {
814 char *str;
815 l_int32 i, n;
816 PIXC *pixc;
817 PIXAC *pixac;
818
819 if (!sa)
820 return (PIXAC *)ERROR_PTR("sarray not defined", __func__, NULL);
821 if (comptype != IFF_DEFAULT && comptype != IFF_TIFF_G4 &&
822 comptype != IFF_PNG && comptype != IFF_JFIF_JPEG)
823 return (PIXAC *)ERROR_PTR("invalid comptype", __func__, NULL);
824
825 n = sarrayGetCount(sa);
826 pixac = pixacompCreate(n);
827 for (i = 0; i < n; i++) {
828 str = sarrayGetString(sa, i, L_NOCOPY);
829 if ((pixc = pixcompCreateFromFile(str, comptype)) == NULL) {
830 L_ERROR("pixc not read from file: %s\n", __func__, str);
831 continue;
832 }
833 pixacompAddPixcomp(pixac, pixc, L_INSERT);
834 }
835 return pixac;
836 }
837
838
839 /*!
840 * \brief pixacompDestroy()
841 *
842 * \param[in,out] ppixac use ptr address so it will be nulled
843 * \return void
844 *
845 * <pre>
846 * Notes:
847 * (1) Always nulls the input ptr.
848 * </pre>
849 */
850 void
851 pixacompDestroy(PIXAC **ppixac)
852 {
853 l_int32 i;
854 PIXAC *pixac;
855
856 if (ppixac == NULL) {
857 L_WARNING("ptr address is NULL!\n", __func__);
858 return;
859 }
860
861 if ((pixac = *ppixac) == NULL)
862 return;
863
864 for (i = 0; i < pixac->n; i++)
865 pixcompDestroy(&pixac->pixc[i]);
866 LEPT_FREE(pixac->pixc);
867 boxaDestroy(&pixac->boxa);
868 LEPT_FREE(pixac);
869 *ppixac = NULL;
870 }
871
872
873 /*---------------------------------------------------------------------*
874 * Pixacomp addition *
875 *---------------------------------------------------------------------*/
876 /*!
877 * \brief pixacompAddPix()
878 *
879 * \param[in] pixac
880 * \param[in] pix to be added
881 * \param[in] comptype IFF_DEFAULT, IFF_TIFF_G4, IFF_PNG, IFF_JFIF_JPEG
882 * \return 0 if OK; 1 on error
883 *
884 * <pre>
885 * Notes:
886 * (1) The array is filled up to the (n-1)-th element, and this
887 * converts the input pix to a pixc and adds it at
888 * the n-th position.
889 * (2) The pixc produced from the pix is owned by the pixac.
890 * The input pix is not affected.
891 * </pre>
892 */
893 l_ok
894 pixacompAddPix(PIXAC *pixac,
895 PIX *pix,
896 l_int32 comptype)
897 {
898 l_int32 cmapflag, format;
899 PIXC *pixc;
900
901 if (!pixac)
902 return ERROR_INT("pixac not defined", __func__, 1);
903 if (!pix)
904 return ERROR_INT("pix not defined", __func__, 1);
905 if (comptype != IFF_DEFAULT && comptype != IFF_TIFF_G4 &&
906 comptype != IFF_PNG && comptype != IFF_JFIF_JPEG)
907 return ERROR_INT("invalid format", __func__, 1);
908
909 cmapflag = pixGetColormap(pix) ? 1 : 0;
910 pixcompDetermineFormat(comptype, pixGetDepth(pix), cmapflag, &format);
911 if ((pixc = pixcompCreateFromPix(pix, format)) == NULL)
912 return ERROR_INT("pixc not made", __func__, 1);
913 pixacompAddPixcomp(pixac, pixc, L_INSERT);
914 return 0;
915 }
916
917
918 /*!
919 * \brief pixacompAddPixcomp()
920 *
921 * \param[in] pixac
922 * \param[in] pixc to be added by insertion
923 * \param[in] copyflag L_INSERT, L_COPY
924 * \return 0 if OK; 1 on error
925 *
926 * <pre>
927 * Notes:
928 * (1) Anything added to a pixac is owned by the pixac.
929 * So do not L_INSERT a pixc that is owned by another pixac,
930 * or destroy a pixc that has been L_INSERTed.
931 * </pre>
932 */
933 l_ok
934 pixacompAddPixcomp(PIXAC *pixac,
935 PIXC *pixc,
936 l_int32 copyflag)
937 {
938 l_int32 n;
939
940 if (!pixac)
941 return ERROR_INT("pixac not defined", __func__, 1);
942 if (!pixc)
943 return ERROR_INT("pixc not defined", __func__, 1);
944 if (copyflag != L_INSERT && copyflag != L_COPY)
945 return ERROR_INT("invalid copyflag", __func__, 1);
946
947 n = pixac->n;
948 if (n >= pixac->nalloc) {
949 if (pixacompExtendArray(pixac))
950 return ERROR_INT("extension failed", __func__, 1);
951 }
952
953 if (copyflag == L_INSERT)
954 pixac->pixc[n] = pixc;
955 else /* L_COPY */
956 pixac->pixc[n] = pixcompCopy(pixc);
957 pixac->n++;
958
959 return 0;
960 }
961
962
963 /*!
964 * \brief pixacompExtendArray()
965 *
966 * \param[in] pixac
967 * \return 0 if OK; 1 on error
968 *
969 * <pre>
970 * Notes:
971 * (1) We extend the boxa array simultaneously. This is
972 * necessary in case we are NOT adding boxes simultaneously
973 * with adding pixc. We always want the sizes of the
974 * pixac and boxa ptr arrays to be equal.
975 * (2) The max number of pixcomp ptrs is 1M.
976 * </pre>
977 */
978 static l_int32
979 pixacompExtendArray(PIXAC *pixac)
980 {
981 size_t oldsize, newsize;
982
983 if (!pixac)
984 return ERROR_INT("pixac not defined", __func__, 1);
985 if (pixac->nalloc > (l_int32)MaxPtrArraySize) /* belt & suspenders */
986 return ERROR_INT("pixac has too many ptrs", __func__, 1);
987 oldsize = pixac->nalloc * sizeof(PIXC *);
988 newsize = 2 * oldsize;
989 if (newsize > 8 * MaxPtrArraySize) /* ptrs for 1M pixcomp */
990 return ERROR_INT("newsize > 8 MB; too large", __func__, 1);
991
992 if ((pixac->pixc = (PIXC **)reallocNew((void **)&pixac->pixc,
993 oldsize, newsize)) == NULL)
994 return ERROR_INT("new ptr array not returned", __func__, 1);
995 pixac->nalloc *= 2;
996 boxaExtendArray(pixac->boxa);
997 return 0;
998 }
999
1000
1001 /*!
1002 * \brief pixacompReplacePix()
1003 *
1004 * \param[in] pixac
1005 * \param[in] index caller's view of index within pixac; includes offset
1006 * \param[in] pix owned by the caller
1007 * \param[in] comptype IFF_DEFAULT, IFF_TIFF_G4, IFF_PNG, IFF_JFIF_JPEG
1008 * \return 0 if OK; 1 on error
1009 *
1010 * <pre>
1011 * Notes:
1012 * (1) The %index includes the offset, which must be subtracted
1013 * to get the actual index into the ptr array.
1014 * (2) The input %pix is converted to a pixc, which is then inserted
1015 * into the pixac.
1016 * </pre>
1017 */
1018 l_ok
1019 pixacompReplacePix(PIXAC *pixac,
1020 l_int32 index,
1021 PIX *pix,
1022 l_int32 comptype)
1023 {
1024 l_int32 n, aindex;
1025 PIXC *pixc;
1026
1027 if (!pixac)
1028 return ERROR_INT("pixac not defined", __func__, 1);
1029 n = pixacompGetCount(pixac);
1030 aindex = index - pixac->offset;
1031 if (aindex < 0 || aindex >= n)
1032 return ERROR_INT("array index out of bounds", __func__, 1);
1033 if (!pix)
1034 return ERROR_INT("pix not defined", __func__, 1);
1035 if (comptype != IFF_DEFAULT && comptype != IFF_TIFF_G4 &&
1036 comptype != IFF_PNG && comptype != IFF_JFIF_JPEG)
1037 return ERROR_INT("invalid format", __func__, 1);
1038
1039 pixc = pixcompCreateFromPix(pix, comptype);
1040 pixacompReplacePixcomp(pixac, index, pixc);
1041 return 0;
1042 }
1043
1044
1045 /*!
1046 * \brief pixacompReplacePixcomp()
1047 *
1048 * \param[in] pixac
1049 * \param[in] index caller's view of index within pixac; includes offset
1050 * \param[in] pixc to replace existing one, which is destroyed
1051 * \return 0 if OK; 1 on error
1052 *
1053 * <pre>
1054 * Notes:
1055 * (1) The %index includes the offset, which must be subtracted
1056 * to get the actual index into the ptr array.
1057 * (2) The inserted %pixc is now owned by the pixac. The caller
1058 * must not destroy it.
1059 * </pre>
1060 */
1061 l_ok
1062 pixacompReplacePixcomp(PIXAC *pixac,
1063 l_int32 index,
1064 PIXC *pixc)
1065 {
1066 l_int32 n, aindex;
1067 PIXC *pixct;
1068
1069 if (!pixac)
1070 return ERROR_INT("pixac not defined", __func__, 1);
1071 n = pixacompGetCount(pixac);
1072 aindex = index - pixac->offset;
1073 if (aindex < 0 || aindex >= n)
1074 return ERROR_INT("array index out of bounds", __func__, 1);
1075 if (!pixc)
1076 return ERROR_INT("pixc not defined", __func__, 1);
1077
1078 pixct = pixacompGetPixcomp(pixac, index, L_NOCOPY); /* use %index */
1079 pixcompDestroy(&pixct);
1080 pixac->pixc[aindex] = pixc; /* replace; use array index */
1081
1082 return 0;
1083 }
1084
1085
1086 /*!
1087 * \brief pixacompAddBox()
1088 *
1089 * \param[in] pixac
1090 * \param[in] box
1091 * \param[in] copyflag L_INSERT, L_COPY
1092 * \return 0 if OK, 1 on error
1093 */
1094 l_ok
1095 pixacompAddBox(PIXAC *pixac,
1096 BOX *box,
1097 l_int32 copyflag)
1098 {
1099 if (!pixac)
1100 return ERROR_INT("pixac not defined", __func__, 1);
1101 if (!box)
1102 return ERROR_INT("box not defined", __func__, 1);
1103 if (copyflag != L_INSERT && copyflag != L_COPY)
1104 return ERROR_INT("invalid copyflag", __func__, 1);
1105
1106 boxaAddBox(pixac->boxa, box, copyflag);
1107 return 0;
1108 }
1109
1110
1111 /*---------------------------------------------------------------------*
1112 * Pixacomp accessors *
1113 *---------------------------------------------------------------------*/
1114 /*!
1115 * \brief pixacompGetCount()
1116 *
1117 * \param[in] pixac
1118 * \return count, or 0 if no pixa
1119 */
1120 l_int32
1121 pixacompGetCount(PIXAC *pixac)
1122 {
1123 if (!pixac)
1124 return ERROR_INT("pixac not defined", __func__, 0);
1125
1126 return pixac->n;
1127 }
1128
1129
1130 /*!
1131 * \brief pixacompGetPixcomp()
1132 *
1133 * \param[in] pixac
1134 * \param[in] index caller's view of index within pixac; includes offset
1135 * \param[in] copyflag L_NOCOPY, L_COPY
1136 * \return pixc, or NULL on error
1137 *
1138 * <pre>
1139 * Notes:
1140 * (1) The %index includes the offset, which must be subtracted
1141 * to get the actual index into the ptr array.
1142 * (2) If copyflag == L_NOCOPY, the pixc is owned by %pixac; do
1143 * not destroy.
1144 * </pre>
1145 */
1146 PIXC *
1147 pixacompGetPixcomp(PIXAC *pixac,
1148 l_int32 index,
1149 l_int32 copyflag)
1150 {
1151 l_int32 aindex;
1152
1153 if (!pixac)
1154 return (PIXC *)ERROR_PTR("pixac not defined", __func__, NULL);
1155 if (copyflag != L_NOCOPY && copyflag != L_COPY)
1156 return (PIXC *)ERROR_PTR("invalid copyflag", __func__, NULL);
1157 aindex = index - pixac->offset;
1158 if (aindex < 0 || aindex >= pixac->n)
1159 return (PIXC *)ERROR_PTR("array index not valid", __func__, NULL);
1160
1161 if (copyflag == L_NOCOPY)
1162 return pixac->pixc[aindex];
1163 else /* L_COPY */
1164 return pixcompCopy(pixac->pixc[aindex]);
1165 }
1166
1167
1168 /*!
1169 * \brief pixacompGetPix()
1170 *
1171 * \param[in] pixac
1172 * \param[in] index caller's view of index within pixac; includes offset
1173 * \return pix, or NULL on error
1174 *
1175 * <pre>
1176 * Notes:
1177 * (1) The %index includes the offset, which must be subtracted
1178 * to get the actual index into the ptr array.
1179 * </pre>
1180 */
1181 PIX *
1182 pixacompGetPix(PIXAC *pixac,
1183 l_int32 index)
1184 {
1185 l_int32 aindex;
1186 PIXC *pixc;
1187
1188 if (!pixac)
1189 return (PIX *)ERROR_PTR("pixac not defined", __func__, NULL);
1190 aindex = index - pixac->offset;
1191 if (aindex < 0 || aindex >= pixac->n)
1192 return (PIX *)ERROR_PTR("array index not valid", __func__, NULL);
1193
1194 pixc = pixacompGetPixcomp(pixac, index, L_NOCOPY);
1195 return pixCreateFromPixcomp(pixc);
1196 }
1197
1198
1199 /*!
1200 * \brief pixacompGetPixDimensions()
1201 *
1202 * \param[in] pixac
1203 * \param[in] index caller's view of index within pixac;
1204 * includes offset
1205 * \param[out] pw, ph, pd [optional] each can be null
1206 * \return 0 if OK, 1 on error
1207 *
1208 * <pre>
1209 * Notes:
1210 * (1) The %index includes the offset, which must be subtracted
1211 * to get the actual index into the ptr array.
1212 * </pre>
1213 */
1214 l_ok
1215 pixacompGetPixDimensions(PIXAC *pixac,
1216 l_int32 index,
1217 l_int32 *pw,
1218 l_int32 *ph,
1219 l_int32 *pd)
1220 {
1221 l_int32 aindex;
1222 PIXC *pixc;
1223
1224 if (!pixac)
1225 return ERROR_INT("pixac not defined", __func__, 1);
1226 aindex = index - pixac->offset;
1227 if (aindex < 0 || aindex >= pixac->n)
1228 return ERROR_INT("array index not valid", __func__, 1);
1229
1230 if ((pixc = pixac->pixc[aindex]) == NULL)
1231 return ERROR_INT("pixc not found!", __func__, 1);
1232 pixcompGetDimensions(pixc, pw, ph, pd);
1233 return 0;
1234 }
1235
1236
1237 /*!
1238 * \brief pixacompGetBoxa()
1239 *
1240 * \param[in] pixac
1241 * \param[in] accesstype L_COPY, L_CLONE, L_COPY_CLONE
1242 * \return boxa, or NULL on error
1243 */
1244 BOXA *
1245 pixacompGetBoxa(PIXAC *pixac,
1246 l_int32 accesstype)
1247 {
1248 if (!pixac)
1249 return (BOXA *)ERROR_PTR("pixac not defined", __func__, NULL);
1250 if (!pixac->boxa)
1251 return (BOXA *)ERROR_PTR("boxa not defined", __func__, NULL);
1252 if (accesstype != L_COPY && accesstype != L_CLONE &&
1253 accesstype != L_COPY_CLONE)
1254 return (BOXA *)ERROR_PTR("invalid accesstype", __func__, NULL);
1255
1256 return boxaCopy(pixac->boxa, accesstype);
1257 }
1258
1259
1260 /*!
1261 * \brief pixacompGetBoxaCount()
1262 *
1263 * \param[in] pixac
1264 * \return count, or 0 on error
1265 */
1266 l_int32
1267 pixacompGetBoxaCount(PIXAC *pixac)
1268 {
1269 if (!pixac)
1270 return ERROR_INT("pixac not defined", __func__, 0);
1271
1272 return boxaGetCount(pixac->boxa);
1273 }
1274
1275
1276 /*!
1277 * \brief pixacompGetBox()
1278 *
1279 * \param[in] pixac
1280 * \param[in] index caller's view of index within pixac;
1281 * includes offset
1282 * \param[in] accesstype L_COPY or L_CLONE
1283 * \return box if null, not automatically an error, or NULL on error
1284 *
1285 * <pre>
1286 * Notes:
1287 * (1) The %index includes the offset, which must be subtracted
1288 * to get the actual index into the ptr array.
1289 * (2) There is always a boxa with a pixac, and it is initialized so
1290 * that each box ptr is NULL.
1291 * (3) In general, we expect that there is either a box associated
1292 * with each pixc, or no boxes at all in the boxa.
1293 * (4) Having no boxes is thus not an automatic error. Whether it
1294 * is an actual error is determined by the calling program.
1295 * If the caller expects to get a box, it is an error; see, e.g.,
1296 * pixacGetBoxGeometry().
1297 * </pre>
1298 */
1299 BOX *
1300 pixacompGetBox(PIXAC *pixac,
1301 l_int32 index,
1302 l_int32 accesstype)
1303 {
1304 l_int32 aindex;
1305 BOX *box;
1306
1307 if (!pixac)
1308 return (BOX *)ERROR_PTR("pixac not defined", __func__, NULL);
1309 if (!pixac->boxa)
1310 return (BOX *)ERROR_PTR("boxa not defined", __func__, NULL);
1311 aindex = index - pixac->offset;
1312 if (aindex < 0 || aindex >= pixac->boxa->n)
1313 return (BOX *)ERROR_PTR("array index not valid", __func__, NULL);
1314 if (accesstype != L_COPY && accesstype != L_CLONE)
1315 return (BOX *)ERROR_PTR("invalid accesstype", __func__, NULL);
1316
1317 box = pixac->boxa->box[aindex];
1318 if (box) {
1319 if (accesstype == L_COPY)
1320 return boxCopy(box);
1321 else /* accesstype == L_CLONE */
1322 return boxClone(box);
1323 } else {
1324 return NULL;
1325 }
1326 }
1327
1328
1329 /*!
1330 * \brief pixacompGetBoxGeometry()
1331 *
1332 * \param[in] pixac
1333 * \param[in] index caller's view of index within pixac;
1334 * includes offset
1335 * \param[out] px, py, pw, ph [optional] each can be null
1336 * \return 0 if OK, 1 on error
1337 *
1338 * <pre>
1339 * Notes:
1340 * (1) The %index includes the offset, which must be subtracted
1341 * to get the actual index into the ptr array.
1342 * </pre>
1343 */
1344 l_ok
1345 pixacompGetBoxGeometry(PIXAC *pixac,
1346 l_int32 index,
1347 l_int32 *px,
1348 l_int32 *py,
1349 l_int32 *pw,
1350 l_int32 *ph)
1351 {
1352 l_int32 aindex;
1353 BOX *box;
1354
1355 if (!pixac)
1356 return ERROR_INT("pixac not defined", __func__, 1);
1357 aindex = index - pixac->offset;
1358 if (aindex < 0 || aindex >= pixac->n)
1359 return ERROR_INT("array index not valid", __func__, 1);
1360
1361 if ((box = pixacompGetBox(pixac, aindex, L_CLONE)) == NULL)
1362 return ERROR_INT("box not found!", __func__, 1);
1363 boxGetGeometry(box, px, py, pw, ph);
1364 boxDestroy(&box);
1365 return 0;
1366 }
1367
1368
1369 /*!
1370 * \brief pixacompGetOffset()
1371 *
1372 * \param[in] pixac
1373 * \return offset, or 0 on error
1374 *
1375 * <pre>
1376 * Notes:
1377 * (1) The offset is the difference between the caller's view of
1378 * the index into the array and the actual array index.
1379 * By default it is 0.
1380 * </pre>
1381 */
1382 l_int32
1383 pixacompGetOffset(PIXAC *pixac)
1384 {
1385 if (!pixac)
1386 return ERROR_INT("pixac not defined", __func__, 0);
1387 return pixac->offset;
1388 }
1389
1390
1391 /*!
1392 * \brief pixacompSetOffset()
1393 *
1394 * \param[in] pixac
1395 * \param[in] offset non-negative
1396 * \return 0 if OK, 1 on error
1397 *
1398 * <pre>
1399 * Notes:
1400 * (1) The offset is the difference between the caller's view of
1401 * the index into the array and the actual array index.
1402 * By default it is 0.
1403 * </pre>
1404 */
1405 l_ok
1406 pixacompSetOffset(PIXAC *pixac,
1407 l_int32 offset)
1408 {
1409 if (!pixac)
1410 return ERROR_INT("pixac not defined", __func__, 1);
1411 pixac->offset = L_MAX(0, offset);
1412 return 0;
1413 }
1414
1415
1416 /*---------------------------------------------------------------------*
1417 * Pixacomp conversion to Pixa *
1418 *---------------------------------------------------------------------*/
1419 /*!
1420 * \brief pixaCreateFromPixacomp()
1421 *
1422 * \param[in] pixac
1423 * \param[in] accesstype L_COPY, L_CLONE, L_COPY_CLONE; for boxa
1424 * \return pixa if OK, or NULL on error
1425 *
1426 * <pre>
1427 * Notes:
1428 * (1) Because the pixa has no notion of offset, the offset must
1429 * be set to 0 before the conversion, so that pixacompGetPix()
1430 * fetches all the pixcomps. It is reset at the end.
1431 * </pre>
1432 */
1433 PIXA *
1434 pixaCreateFromPixacomp(PIXAC *pixac,
1435 l_int32 accesstype)
1436 {
1437 l_int32 i, n, offset;
1438 PIX *pix;
1439 PIXA *pixa;
1440
1441 if (!pixac)
1442 return (PIXA *)ERROR_PTR("pixac not defined", __func__, NULL);
1443 if (accesstype != L_COPY && accesstype != L_CLONE &&
1444 accesstype != L_COPY_CLONE)
1445 return (PIXA *)ERROR_PTR("invalid accesstype", __func__, NULL);
1446
1447 n = pixacompGetCount(pixac);
1448 offset = pixacompGetOffset(pixac);
1449 pixacompSetOffset(pixac, 0);
1450 if ((pixa = pixaCreate(n)) == NULL)
1451 return (PIXA *)ERROR_PTR("pixa not made", __func__, NULL);
1452 for (i = 0; i < n; i++) {
1453 if ((pix = pixacompGetPix(pixac, i)) == NULL) {
1454 L_WARNING("pix %d not made\n", __func__, i);
1455 continue;
1456 }
1457 pixaAddPix(pixa, pix, L_INSERT);
1458 }
1459 if (pixa->boxa) {
1460 boxaDestroy(&pixa->boxa);
1461 pixa->boxa = pixacompGetBoxa(pixac, accesstype);
1462 }
1463 pixacompSetOffset(pixac, offset);
1464
1465 return pixa;
1466 }
1467
1468
1469 /*---------------------------------------------------------------------*
1470 * Combining pixacomp
1471 *---------------------------------------------------------------------*/
1472 /*!
1473 * \brief pixacompJoin()
1474 *
1475 * \param[in] pixacd dest pixac; add to this one
1476 * \param[in] pixacs [optional] source pixac; add from this one
1477 * \param[in] istart starting index in pixacs
1478 * \param[in] iend ending index in pixacs; use -1 to cat all
1479 * \return 0 if OK, 1 on error
1480 *
1481 * <pre>
1482 * Notes:
1483 * (1) This appends a clone of each indicated pixc in pixcas to pixcad
1484 * (2) istart < 0 is taken to mean 'read from the start' (istart = 0)
1485 * (3) iend < 0 means 'read to the end'
1486 * (4) If pixacs is NULL or contains no pixc, this is a no-op.
1487 * </pre>
1488 */
1489 l_ok
1490 pixacompJoin(PIXAC *pixacd,
1491 PIXAC *pixacs,
1492 l_int32 istart,
1493 l_int32 iend)
1494 {
1495 l_int32 i, n, nb;
1496 BOXA *boxas, *boxad;
1497 PIXC *pixc;
1498
1499 if (!pixacd)
1500 return ERROR_INT("pixacd not defined", __func__, 1);
1501 if (!pixacs || ((n = pixacompGetCount(pixacs)) == 0))
1502 return 0;
1503
1504 if (istart < 0)
1505 istart = 0;
1506 if (iend < 0 || iend >= n)
1507 iend = n - 1;
1508 if (istart > iend)
1509 return ERROR_INT("istart > iend; nothing to add", __func__, 1);
1510
1511 for (i = istart; i <= iend; i++) {
1512 pixc = pixacompGetPixcomp(pixacs, i, L_NOCOPY);
1513 pixacompAddPixcomp(pixacd, pixc, L_COPY);
1514 }
1515
1516 boxas = pixacompGetBoxa(pixacs, L_CLONE);
1517 boxad = pixacompGetBoxa(pixacd, L_CLONE);
1518 nb = pixacompGetBoxaCount(pixacs);
1519 iend = L_MIN(iend, nb - 1);
1520 boxaJoin(boxad, boxas, istart, iend);
1521 boxaDestroy(&boxas); /* just the clones */
1522 boxaDestroy(&boxad); /* ditto */
1523 return 0;
1524 }
1525
1526
1527 /*!
1528 * \brief pixacompInterleave()
1529 *
1530 * \param[in] pixac1 first src pixac
1531 * \param[in] pixac2 second src pixac
1532 * \return pixacd interleaved from sources, or NULL on error.
1533 *
1534 * <pre>
1535 * Notes:
1536 * (1) If the two pixac have different sizes, a warning is issued,
1537 * and the number of pairs returned is the minimum size.
1538 * </pre>
1539 */
1540 PIXAC *
1541 pixacompInterleave(PIXAC *pixac1,
1542 PIXAC *pixac2)
1543 {
1544 l_int32 i, n1, n2, n, nb1, nb2;
1545 BOX *box;
1546 PIXC *pixc1, *pixc2;
1547 PIXAC *pixacd;
1548
1549 if (!pixac1)
1550 return (PIXAC *)ERROR_PTR("pixac1 not defined", __func__, NULL);
1551 if (!pixac2)
1552 return (PIXAC *)ERROR_PTR("pixac2 not defined", __func__, NULL);
1553 n1 = pixacompGetCount(pixac1);
1554 n2 = pixacompGetCount(pixac2);
1555 n = L_MIN(n1, n2);
1556 if (n == 0)
1557 return (PIXAC *)ERROR_PTR("at least one input pixac is empty",
1558 __func__, NULL);
1559 if (n1 != n2)
1560 L_WARNING("counts differ: %d != %d\n", __func__, n1, n2);
1561
1562 pixacd = pixacompCreate(2 * n);
1563 nb1 = pixacompGetBoxaCount(pixac1);
1564 nb2 = pixacompGetBoxaCount(pixac2);
1565 for (i = 0; i < n; i++) {
1566 pixc1 = pixacompGetPixcomp(pixac1, i, L_COPY);
1567 pixacompAddPixcomp(pixacd, pixc1, L_INSERT);
1568 if (i < nb1) {
1569 box = pixacompGetBox(pixac1, i, L_COPY);
1570 pixacompAddBox(pixacd, box, L_INSERT);
1571 }
1572 pixc2 = pixacompGetPixcomp(pixac2, i, L_COPY);
1573 pixacompAddPixcomp(pixacd, pixc2, L_INSERT);
1574 if (i < nb2) {
1575 box = pixacompGetBox(pixac2, i, L_COPY);
1576 pixacompAddBox(pixacd, box, L_INSERT);
1577 }
1578 }
1579
1580 return pixacd;
1581 }
1582
1583
1584 /*---------------------------------------------------------------------*
1585 * Pixacomp serialized I/O *
1586 *---------------------------------------------------------------------*/
1587 /*!
1588 * \brief pixacompRead()
1589 *
1590 * \param[in] filename
1591 * \return pixac, or NULL on error
1592 *
1593 * <pre>
1594 * Notes:
1595 * (1) Unlike the situation with serialized Pixa, where the image
1596 * data is stored in png format, the Pixacomp image data
1597 * can be stored in tiffg4, png and jpg formats.
1598 * </pre>
1599 */
1600 PIXAC *
1601 pixacompRead(const char *filename)
1602 {
1603 FILE *fp;
1604 PIXAC *pixac;
1605
1606 if (!filename)
1607 return (PIXAC *)ERROR_PTR("filename not defined", __func__, NULL);
1608
1609 if ((fp = fopenReadStream(filename)) == NULL)
1610 return (PIXAC *)ERROR_PTR_1("stream not opened",
1611 filename, __func__, NULL);
1612 pixac = pixacompReadStream(fp);
1613 fclose(fp);
1614 if (!pixac)
1615 return (PIXAC *)ERROR_PTR_1("pixac not read",
1616 filename, __func__, NULL);
1617 return pixac;
1618 }
1619
1620
1621 /*!
1622 * \brief pixacompReadStream()
1623 *
1624 * \param[in] fp file stream
1625 * \return pixac, or NULL on error
1626 *
1627 * <pre>
1628 * Notes:
1629 * (1) It is OK for the pixacomp to be empty.
1630 * </pre>
1631 */
1632 PIXAC *
1633 pixacompReadStream(FILE *fp)
1634 {
1635 char buf[256];
1636 l_uint8 *data;
1637 l_int32 n, offset, i, w, h, d, ignore;
1638 l_int32 comptype, cmapflag, version, xres, yres;
1639 size_t size;
1640 BOXA *boxa;
1641 PIXC *pixc;
1642 PIXAC *pixac;
1643
1644 if (!fp)
1645 return (PIXAC *)ERROR_PTR("stream not defined", __func__, NULL);
1646
1647 if (fscanf(fp, "\nPixacomp Version %d\n", &version) != 1)
1648 return (PIXAC *)ERROR_PTR("not a pixacomp file", __func__, NULL);
1649 if (version != PIXACOMP_VERSION_NUMBER)
1650 return (PIXAC *)ERROR_PTR("invalid pixacomp version", __func__, NULL);
1651 if (fscanf(fp, "Number of pixcomp = %d\n", &n) != 1)
1652 return (PIXAC *)ERROR_PTR("not a pixacomp file", __func__, NULL);
1653 if (fscanf(fp, "Offset of index into array = %d", &offset) != 1)
1654 return (PIXAC *)ERROR_PTR("offset not read", __func__, NULL);
1655 if (n < 0)
1656 return (PIXAC *)ERROR_PTR("num pixcomp ptrs < 0", __func__, NULL);
1657 if (n > (l_int32)MaxPtrArraySize)
1658 return (PIXAC *)ERROR_PTR("too many pixcomp ptrs", __func__, NULL);
1659 if (n == 0) L_INFO("the pixacomp is empty\n", __func__);
1660
1661 if ((pixac = pixacompCreate(n)) == NULL)
1662 return (PIXAC *)ERROR_PTR("pixac not made", __func__, NULL);
1663 if ((boxa = boxaReadStream(fp)) == NULL) {
1664 pixacompDestroy(&pixac);
1665 return (PIXAC *)ERROR_PTR("boxa not made", __func__, NULL);
1666 }
1667 boxaDestroy(&pixac->boxa); /* empty */
1668 pixac->boxa = boxa;
1669 pixacompSetOffset(pixac, offset);
1670
1671 for (i = 0; i < n; i++) {
1672 if (fscanf(fp, "\nPixcomp[%d]: w = %d, h = %d, d = %d\n",
1673 &ignore, &w, &h, &d) != 4) {
1674 pixacompDestroy(&pixac);
1675 return (PIXAC *)ERROR_PTR("dimension reading", __func__, NULL);
1676 }
1677 if (fscanf(fp, " comptype = %d, size = %zu, cmapflag = %d\n",
1678 &comptype, &size, &cmapflag) != 3) {
1679 pixacompDestroy(&pixac);
1680 return (PIXAC *)ERROR_PTR("comptype/size reading", __func__, NULL);
1681 }
1682 if (size > MaxDataSize) {
1683 pixacompDestroy(&pixac);
1684 L_ERROR("data size = %zu is too big", __func__, size);
1685 return NULL;
1686 }
1687
1688 /* Use fgets() and sscanf(); not fscanf(), for the last
1689 * bit of header data before the binary data. The reason is
1690 * that fscanf throws away white space, and if the binary data
1691 * happens to begin with ascii character(s) that are white
1692 * space, it will swallow them and all will be lost! */
1693 if (fgets(buf, sizeof(buf), fp) == NULL) {
1694 pixacompDestroy(&pixac);
1695 return (PIXAC *)ERROR_PTR("fgets read fail", __func__, NULL);
1696 }
1697 if (sscanf(buf, " xres = %d, yres = %d\n", &xres, &yres) != 2) {
1698 pixacompDestroy(&pixac);
1699 return (PIXAC *)ERROR_PTR("read fail for res", __func__, NULL);
1700 }
1701 if ((data = (l_uint8 *)LEPT_CALLOC(1, size)) == NULL) {
1702 pixacompDestroy(&pixac);
1703 return (PIXAC *)ERROR_PTR("calloc fail for data", __func__, NULL);
1704 }
1705 if (fread(data, 1, size, fp) != size) {
1706 pixacompDestroy(&pixac);
1707 LEPT_FREE(data);
1708 return (PIXAC *)ERROR_PTR("error reading data", __func__, NULL);
1709 }
1710 fgetc(fp); /* swallow the ending nl */
1711 pixc = (PIXC *)LEPT_CALLOC(1, sizeof(PIXC));
1712 pixc->w = w;
1713 pixc->h = h;
1714 pixc->d = d;
1715 pixc->xres = xres;
1716 pixc->yres = yres;
1717 pixc->comptype = comptype;
1718 pixc->cmapflag = cmapflag;
1719 pixc->data = data;
1720 pixc->size = size;
1721 pixacompAddPixcomp(pixac, pixc, L_INSERT);
1722 }
1723 return pixac;
1724 }
1725
1726
1727 /*!
1728 * \brief pixacompReadMem()
1729 *
1730 * \param[in] data in pixacomp format
1731 * \param[in] size of data
1732 * \return pixac, or NULL on error
1733 *
1734 * <pre>
1735 * Notes:
1736 * (1) Deseralizes a buffer of pixacomp data into a pixac in memory.
1737 * </pre>
1738 */
1739 PIXAC *
1740 pixacompReadMem(const l_uint8 *data,
1741 size_t size)
1742 {
1743 FILE *fp;
1744 PIXAC *pixac;
1745
1746 if (!data)
1747 return (PIXAC *)ERROR_PTR("data not defined", __func__, NULL);
1748 if ((fp = fopenReadFromMemory(data, size)) == NULL)
1749 return (PIXAC *)ERROR_PTR("stream not opened", __func__, NULL);
1750
1751 pixac = pixacompReadStream(fp);
1752 fclose(fp);
1753 if (!pixac) L_ERROR("pixac not read\n", __func__);
1754 return pixac;
1755 }
1756
1757
1758 /*!
1759 * \brief pixacompWrite()
1760 *
1761 * \param[in] filename
1762 * \param[in] pixac
1763 * \return 0 if OK, 1 on error
1764 *
1765 * <pre>
1766 * Notes:
1767 * (1) Unlike the situation with serialized Pixa, where the image
1768 * data is stored in png format, the Pixacomp image data
1769 * can be stored in tiffg4, png and jpg formats.
1770 * </pre>
1771 */
1772 l_ok
1773 pixacompWrite(const char *filename,
1774 PIXAC *pixac)
1775 {
1776 l_int32 ret;
1777 FILE *fp;
1778
1779 if (!filename)
1780 return ERROR_INT("filename not defined", __func__, 1);
1781 if (!pixac)
1782 return ERROR_INT("pixacomp not defined", __func__, 1);
1783
1784 if ((fp = fopenWriteStream(filename, "wb")) == NULL)
1785 return ERROR_INT_1("stream not opened", filename, __func__, 1);
1786 ret = pixacompWriteStream(fp, pixac);
1787 fclose(fp);
1788 if (ret)
1789 return ERROR_INT_1("pixacomp not written to stream",
1790 filename, __func__, 1);
1791 return 0;
1792 }
1793
1794
1795 /*!
1796 * \brief pixacompWriteStream()
1797 *
1798 * \param[in] fp file stream
1799 * \param[in] pixac
1800 * \return 0 if OK, 1 on error
1801 */
1802 l_ok
1803 pixacompWriteStream(FILE *fp,
1804 PIXAC *pixac)
1805 {
1806 l_int32 n, i;
1807 PIXC *pixc;
1808
1809 if (!fp)
1810 return ERROR_INT("stream not defined", __func__, 1);
1811 if (!pixac)
1812 return ERROR_INT("pixac not defined", __func__, 1);
1813
1814 n = pixacompGetCount(pixac);
1815 fprintf(fp, "\nPixacomp Version %d\n", PIXACOMP_VERSION_NUMBER);
1816 fprintf(fp, "Number of pixcomp = %d\n", n);
1817 fprintf(fp, "Offset of index into array = %d", pixac->offset);
1818 boxaWriteStream(fp, pixac->boxa);
1819 for (i = 0; i < n; i++) {
1820 if ((pixc = pixacompGetPixcomp(pixac, pixac->offset + i, L_NOCOPY))
1821 == NULL)
1822 return ERROR_INT("pixc not found", __func__, 1);
1823 fprintf(fp, "\nPixcomp[%d]: w = %d, h = %d, d = %d\n",
1824 i, pixc->w, pixc->h, pixc->d);
1825 fprintf(fp, " comptype = %d, size = %zu, cmapflag = %d\n",
1826 pixc->comptype, pixc->size, pixc->cmapflag);
1827 fprintf(fp, " xres = %d, yres = %d\n", pixc->xres, pixc->yres);
1828 fwrite(pixc->data, 1, pixc->size, fp);
1829 fprintf(fp, "\n");
1830 }
1831 return 0;
1832 }
1833
1834
1835 /*!
1836 * \brief pixacompWriteMem()
1837 *
1838 * \param[out] pdata serialized data of pixac
1839 * \param[out] psize size of serialized data
1840 * \param[in] pixac
1841 * \return 0 if OK, 1 on error
1842 *
1843 * <pre>
1844 * Notes:
1845 * (1) Serializes a pixac in memory and puts the result in a buffer.
1846 * </pre>
1847 */
1848 l_ok
1849 pixacompWriteMem(l_uint8 **pdata,
1850 size_t *psize,
1851 PIXAC *pixac)
1852 {
1853 l_int32 ret;
1854 FILE *fp;
1855
1856 if (pdata) *pdata = NULL;
1857 if (psize) *psize = 0;
1858 if (!pdata)
1859 return ERROR_INT("&data not defined", __func__, 1);
1860 if (!psize)
1861 return ERROR_INT("&size not defined", __func__, 1);
1862 if (!pixac)
1863 return ERROR_INT("&pixac not defined", __func__, 1);
1864
1865 #if HAVE_FMEMOPEN
1866 if ((fp = open_memstream((char **)pdata, psize)) == NULL)
1867 return ERROR_INT("stream not opened", __func__, 1);
1868 ret = pixacompWriteStream(fp, pixac);
1869 fputc('\0', fp);
1870 fclose(fp);
1871 if (*psize > 0) *psize = *psize - 1;
1872 #else
1873 L_INFO("no fmemopen API --> work-around: write to temp file\n", __func__);
1874 #ifdef _WIN32
1875 if ((fp = fopenWriteWinTempfile()) == NULL)
1876 return ERROR_INT("tmpfile stream not opened", __func__, 1);
1877 #else
1878 if ((fp = tmpfile()) == NULL)
1879 return ERROR_INT("tmpfile stream not opened", __func__, 1);
1880 #endif /* _WIN32 */
1881 ret = pixacompWriteStream(fp, pixac);
1882 rewind(fp);
1883 *pdata = l_binaryReadStream(fp, psize);
1884 fclose(fp);
1885 #endif /* HAVE_FMEMOPEN */
1886 return ret;
1887 }
1888
1889
1890 /*--------------------------------------------------------------------*
1891 * Conversion to pdf *
1892 *--------------------------------------------------------------------*/
1893 /*!
1894 * \brief pixacompConvertToPdf()
1895 *
1896 * \param[in] pixac containing images all at the same resolution
1897 * \param[in] res override the resolution of each input image,
1898 * in ppi; 0 to respect the resolution embedded
1899 * in the input
1900 * \param[in] scalefactor scaling factor applied to each image; > 0.0
1901 * \param[in] type encoding type (L_JPEG_ENCODE, L_G4_ENCODE,
1902 * L_FLATE_ENCODE, L_JP2K_ENCODE, or
1903 * L_DEFAULT_ENCODE for default)
1904 * \param[in] quality used for JPEG only; 0 for default (75)
1905 * \param[in] title [optional] pdf title
1906 * \param[in] fileout pdf file of all images
1907 * \return 0 if OK, 1 on error
1908 *
1909 * <pre>
1910 * Notes:
1911 * (1) This follows closely the function pixaConvertToPdf() in pdfio.c.
1912 * (2) The images are encoded with G4 if 1 bpp; JPEG if 8 bpp without
1913 * colormap and many colors, or 32 bpp; FLATE for anything else.
1914 * (3) The scalefactor must be > 0.0; otherwise it is set to 1.0.
1915 * (4) Specifying one of the three encoding types for %type forces
1916 * all images to be compressed with that type. Use 0 to have
1917 * the type determined for each image based on depth and whether
1918 * or not it has a colormap.
1919 * (5) If all images are jpeg compressed, don't require scaling
1920 * and have the same resolution, it is much faster to skip
1921 * transcoding with pixacompFastConvertToPdfData(), and then
1922 * write the data out to file.
1923 * </pre>
1924 */
1925 l_ok
1926 pixacompConvertToPdf(PIXAC *pixac,
1927 l_int32 res,
1928 l_float32 scalefactor,
1929 l_int32 type,
1930 l_int32 quality,
1931 const char *title,
1932 const char *fileout)
1933 {
1934 l_uint8 *data;
1935 l_int32 ret;
1936 size_t nbytes;
1937
1938 if (!pixac)
1939 return ERROR_INT("pixac not defined", __func__, 1);
1940
1941 ret = pixacompConvertToPdfData(pixac, res, scalefactor, type, quality,
1942 title, &data, &nbytes);
1943 if (ret) {
1944 LEPT_FREE(data);
1945 return ERROR_INT("conversion to pdf failed", __func__, 1);
1946 }
1947
1948 ret = l_binaryWrite(fileout, "w", data, nbytes);
1949 LEPT_FREE(data);
1950 if (ret)
1951 L_ERROR("pdf data not written to file\n", __func__);
1952 return ret;
1953 }
1954
1955
1956 /*!
1957 * \brief pixacompConvertToPdfData()
1958 *
1959 * \param[in] pixac containing images all at the same resolution
1960 * \param[in] res input resolution of all images
1961 * \param[in] scalefactor scaling factor applied to each image; > 0.0
1962 * \param[in] type encoding type (L_JPEG_ENCODE, L_G4_ENCODE,
1963 * L_FLATE_ENCODE, L_JP2K_ENCODE, or
1964 * L_DEFAULT_ENCODE for default)
1965 * \param[in] quality used for JPEG only; 0 for default (75)
1966 * \param[in] title [optional] pdf title
1967 * \param[out] pdata output pdf data (of all images
1968 * \param[out] pnbytes size of output pdf data
1969 * \return 0 if OK, 1 on error
1970 *
1971 * <pre>
1972 * Notes:
1973 * (1) See pixacompConvertToPdf().
1974 * </pre>
1975 */
1976 l_ok
1977 pixacompConvertToPdfData(PIXAC *pixac,
1978 l_int32 res,
1979 l_float32 scalefactor,
1980 l_int32 type,
1981 l_int32 quality,
1982 const char *title,
1983 l_uint8 **pdata,
1984 size_t *pnbytes)
1985 {
1986 l_uint8 *imdata;
1987 l_int32 i, n, ret, scaledres, pagetype;
1988 size_t imbytes;
1989 L_BYTEA *ba;
1990 PIX *pixs, *pix;
1991 L_PTRA *pa_data;
1992
1993 if (!pdata)
1994 return ERROR_INT("&data not defined", __func__, 1);
1995 *pdata = NULL;
1996 if (!pnbytes)
1997 return ERROR_INT("&nbytes not defined", __func__, 1);
1998 *pnbytes = 0;
1999 if (!pixac)
2000 return ERROR_INT("pixac not defined", __func__, 1);
2001 if (scalefactor <= 0.0) scalefactor = 1.0;
2002 if (type != L_DEFAULT_ENCODE && type != L_JPEG_ENCODE &&
2003 type != L_G4_ENCODE && type != L_FLATE_ENCODE &&
2004 type != L_JP2K_ENCODE) {
2005 L_WARNING("invalid compression type; using per-page default\n",
2006 __func__);
2007 type = L_DEFAULT_ENCODE;
2008 }
2009
2010 /* Generate all the encoded pdf strings */
2011 n = pixacompGetCount(pixac);
2012 pa_data = ptraCreate(n);
2013 for (i = 0; i < n; i++) {
2014 if ((pixs =
2015 pixacompGetPix(pixac, pixacompGetOffset(pixac) + i)) == NULL) {
2016 L_ERROR("pix[%d] not retrieved\n", __func__, i);
2017 continue;
2018 }
2019 if (pixGetWidth(pixs) == 1) { /* used sometimes as placeholders */
2020 L_INFO("placeholder image[%d] has w = 1\n", __func__, i);
2021 pixDestroy(&pixs);
2022 continue;
2023 }
2024 if (scalefactor != 1.0)
2025 pix = pixScale(pixs, scalefactor, scalefactor);
2026 else
2027 pix = pixClone(pixs);
2028 pixDestroy(&pixs);
2029 scaledres = (l_int32)(res * scalefactor);
2030
2031 /* Select the encoding type */
2032 if (type != L_DEFAULT_ENCODE) {
2033 pagetype = type;
2034 } else if (selectDefaultPdfEncoding(pix, &pagetype) != 0) {
2035 L_ERROR("encoding type selection failed for pix[%d]\n",
2036 __func__, i);
2037 pixDestroy(&pix);
2038 continue;
2039 }
2040
2041 ret = pixConvertToPdfData(pix, pagetype, quality, &imdata, &imbytes,
2042 0, 0, scaledres, title, NULL, 0);
2043 pixDestroy(&pix);
2044 if (ret) {
2045 L_ERROR("pdf encoding failed for pix[%d]\n", __func__, i);
2046 continue;
2047 }
2048 ba = l_byteaInitFromMem(imdata, imbytes);
2049 LEPT_FREE(imdata);
2050 ptraAdd(pa_data, ba);
2051 }
2052 ptraGetActualCount(pa_data, &n);
2053 if (n == 0) {
2054 L_ERROR("no pdf files made\n", __func__);
2055 ptraDestroy(&pa_data, FALSE, FALSE);
2056 return 1;
2057 }
2058
2059 /* Concatenate them */
2060 ret = ptraConcatenatePdfToData(pa_data, NULL, pdata, pnbytes);
2061
2062 ptraGetActualCount(pa_data, &n); /* recalculate in case it changes */
2063 for (i = 0; i < n; i++) {
2064 ba = (L_BYTEA *)ptraRemove(pa_data, i, L_NO_COMPACTION);
2065 l_byteaDestroy(&ba);
2066 }
2067 ptraDestroy(&pa_data, FALSE, FALSE);
2068 return ret;
2069 }
2070
2071
2072 /*!
2073 * \brief pixacompFastConvertToPdfData()
2074 *
2075 * \param[in] pixac containing images all at the same resolution
2076 * \param[in] title [optional] pdf title
2077 * \param[out] pdata output pdf data (of all images
2078 * \param[out] pnbytes size of output pdf data
2079 * \return 0 if OK, 1 on error
2080 *
2081 * <pre>
2082 * Notes:
2083 * (1) This generates the pdf without transcoding if all the
2084 * images in %pixac are compressed with jpeg.
2085 * Images not jpeg compressed are skipped.
2086 * (2) It assumes all images have the same resolution, and that
2087 * the resolution embedded in each jpeg file is correct.
2088 * </pre>
2089 */
2090 l_ok
2091 pixacompFastConvertToPdfData(PIXAC *pixac,
2092 const char *title,
2093 l_uint8 **pdata,
2094 size_t *pnbytes)
2095 {
2096 l_uint8 *imdata;
2097 l_int32 i, n, ret, comptype;
2098 size_t imbytes;
2099 L_BYTEA *ba;
2100 PIXC *pixc;
2101 L_PTRA *pa_data;
2102
2103 if (!pdata)
2104 return ERROR_INT("&data not defined", __func__, 1);
2105 *pdata = NULL;
2106 if (!pnbytes)
2107 return ERROR_INT("&nbytes not defined", __func__, 1);
2108 *pnbytes = 0;
2109 if (!pixac)
2110 return ERROR_INT("pixac not defined", __func__, 1);
2111
2112 /* Generate all the encoded pdf strings */
2113 n = pixacompGetCount(pixac);
2114 pa_data = ptraCreate(n);
2115 for (i = 0; i < n; i++) {
2116 if ((pixc = pixacompGetPixcomp(pixac, i, L_NOCOPY)) == NULL) {
2117 L_ERROR("pixc[%d] not retrieved\n", __func__, i);
2118 continue;
2119 }
2120 pixcompGetParameters(pixc, NULL, NULL, &comptype, NULL);
2121 if (comptype != IFF_JFIF_JPEG) {
2122 L_ERROR("pixc[%d] not jpeg compressed\n", __func__, i);
2123 continue;
2124 }
2125 ret = pixcompFastConvertToPdfData(pixc, title, &imdata, &imbytes);
2126 if (ret) {
2127 L_ERROR("pdf encoding failed for pixc[%d]\n", __func__, i);
2128 continue;
2129 }
2130 ba = l_byteaInitFromMem(imdata, imbytes);
2131 LEPT_FREE(imdata);
2132 ptraAdd(pa_data, ba);
2133 }
2134 ptraGetActualCount(pa_data, &n);
2135 if (n == 0) {
2136 L_ERROR("no pdf files made\n", __func__);
2137 ptraDestroy(&pa_data, FALSE, FALSE);
2138 return 1;
2139 }
2140
2141 /* Concatenate them */
2142 ret = ptraConcatenatePdfToData(pa_data, NULL, pdata, pnbytes);
2143
2144 /* Clean up */
2145 ptraGetActualCount(pa_data, &n); /* recalculate in case it changes */
2146 for (i = 0; i < n; i++) {
2147 ba = (L_BYTEA *)ptraRemove(pa_data, i, L_NO_COMPACTION);
2148 l_byteaDestroy(&ba);
2149 }
2150 ptraDestroy(&pa_data, FALSE, FALSE);
2151 return ret;
2152 }
2153
2154
2155 /*!
2156 * \brief pixcompFastConvertToPdfData()
2157 *
2158 * \param[in] pixc containing images all at the same resolution
2159 * \param[in] title [optional] pdf title
2160 * \param[out] pdata output pdf data (of all images
2161 * \param[out] pnbytes size of output pdf data
2162 * \return 0 if OK, 1 on error
2163 *
2164 * <pre>
2165 * Notes:
2166 * (1) This generates the pdf without transcoding.
2167 * (2) It assumes all images are jpeg encoded, have the same
2168 * resolution, and that the resolution embedded in each
2169 * jpeg file is correct. (It is transferred to the pdf
2170 * via the cid.)
2171 * </pre>
2172 */
2173 static l_int32
2174 pixcompFastConvertToPdfData(PIXC *pixc,
2175 const char *title,
2176 l_uint8 **pdata,
2177 size_t *pnbytes)
2178 {
2179 l_uint8 *data;
2180 L_COMP_DATA *cid;
2181
2182 if (!pdata)
2183 return ERROR_INT("&data not defined", __func__, 1);
2184 *pdata = NULL;
2185 if (!pnbytes)
2186 return ERROR_INT("&nbytes not defined", __func__, 1);
2187 *pnbytes = 0;
2188 if (!pixc)
2189 return ERROR_INT("pixc not defined", __func__, 1);
2190
2191 /* Make a copy of the data */
2192 data = l_binaryCopy(pixc->data, pixc->size);
2193 cid = l_generateJpegDataMem(data, pixc->size, 0);
2194
2195 /* Note: cid is destroyed, along with data, by this function */
2196 return cidConvertToPdfData(cid, title, pdata, pnbytes);
2197 }
2198
2199
2200 /*--------------------------------------------------------------------*
2201 * Output for debugging *
2202 *--------------------------------------------------------------------*/
2203 /*!
2204 * \brief pixacompWriteStreamInfo()
2205 *
2206 * \param[in] fp file stream
2207 * \param[in] pixac
2208 * \param[in] text [optional] identifying string; can be null
2209 * \return 0 if OK, 1 on error
2210 */
2211 l_ok
2212 pixacompWriteStreamInfo(FILE *fp,
2213 PIXAC *pixac,
2214 const char *text)
2215 {
2216 l_int32 i, n, nboxes;
2217 PIXC *pixc;
2218
2219 if (!fp)
2220 return ERROR_INT("fp not defined", __func__, 1);
2221 if (!pixac)
2222 return ERROR_INT("pixac not defined", __func__, 1);
2223
2224 if (text)
2225 fprintf(fp, "Pixacomp Info for %s:\n", text);
2226 else
2227 fprintf(fp, "Pixacomp Info:\n");
2228 n = pixacompGetCount(pixac);
2229 nboxes = pixacompGetBoxaCount(pixac);
2230 fprintf(fp, "Number of pixcomp: %d\n", n);
2231 fprintf(fp, "Size of pixcomp array alloc: %d\n", pixac->nalloc);
2232 fprintf(fp, "Offset of index into array: %d\n", pixac->offset);
2233 if (nboxes > 0)
2234 fprintf(fp, "Boxa has %d boxes\n", nboxes);
2235 else
2236 fprintf(fp, "Boxa is empty\n");
2237 for (i = 0; i < n; i++) {
2238 pixc = pixacompGetPixcomp(pixac, pixac->offset + i, L_NOCOPY);
2239 pixcompWriteStreamInfo(fp, pixc, NULL);
2240 }
2241 return 0;
2242 }
2243
2244
2245 /*!
2246 * \brief pixcompWriteStreamInfo()
2247 *
2248 * \param[in] fp file stream
2249 * \param[in] pixc
2250 * \param[in] text [optional] identifying string; can be null
2251 * \return 0 if OK, 1 on error
2252 */
2253 l_ok
2254 pixcompWriteStreamInfo(FILE *fp,
2255 PIXC *pixc,
2256 const char *text)
2257 {
2258 if (!fp)
2259 return ERROR_INT("fp not defined", __func__, 1);
2260 if (!pixc)
2261 return ERROR_INT("pixc not defined", __func__, 1);
2262
2263 if (text)
2264 fprintf(fp, " Pixcomp Info for %s:", text);
2265 else
2266 fprintf(fp, " Pixcomp Info:");
2267 fprintf(fp, " width = %d, height = %d, depth = %d\n",
2268 pixc->w, pixc->h, pixc->d);
2269 fprintf(fp, " xres = %d, yres = %d, size in bytes = %zu\n",
2270 pixc->xres, pixc->yres, pixc->size);
2271 if (pixc->cmapflag)
2272 fprintf(fp, " has colormap\n");
2273 else
2274 fprintf(fp, " no colormap\n");
2275 if (pixc->comptype < NumImageFileFormatExtensions) {
2276 fprintf(fp, " comptype = %s (%d)\n",
2277 ImageFileFormatExtensions[pixc->comptype], pixc->comptype);
2278 } else {
2279 fprintf(fp, " Error!! Invalid comptype index: %d\n", pixc->comptype);
2280 }
2281 return 0;
2282 }
2283
2284
2285 /*!
2286 * \brief pixacompDisplayTiledAndScaled()
2287 *
2288 * \param[in] pixac
2289 * \param[in] outdepth output depth: 1, 8 or 32 bpp
2290 * \param[in] tilewidth each pix is scaled to this width
2291 * \param[in] ncols number of tiles in each row
2292 * \param[in] background 0 for white, 1 for black; this is the color
2293 * of the spacing between the images
2294 * \param[in] spacing between images, and on outside
2295 * \param[in] border width of additional black border on each image;
2296 * use 0 for no border
2297 * \return pix of tiled images, or NULL on error
2298 *
2299 * <pre>
2300 * Notes:
2301 * (1) This is the same function as pixaDisplayTiledAndScaled(),
2302 * except it works on a Pixacomp instead of a Pix. It is particularly
2303 * useful for showing the images in a Pixacomp at reduced resolution.
2304 * (2) See pixaDisplayTiledAndScaled() for details.
2305 * </pre>
2306 */
2307 PIX *
2308 pixacompDisplayTiledAndScaled(PIXAC *pixac,
2309 l_int32 outdepth,
2310 l_int32 tilewidth,
2311 l_int32 ncols,
2312 l_int32 background,
2313 l_int32 spacing,
2314 l_int32 border)
2315 {
2316 PIX *pixd;
2317 PIXA *pixa;
2318
2319 if (!pixac)
2320 return (PIX *)ERROR_PTR("pixac not defined", __func__, NULL);
2321
2322 if ((pixa = pixaCreateFromPixacomp(pixac, L_COPY)) == NULL)
2323 return (PIX *)ERROR_PTR("pixa not made", __func__, NULL);
2324
2325 pixd = pixaDisplayTiledAndScaled(pixa, outdepth, tilewidth, ncols,
2326 background, spacing, border);
2327 pixaDestroy(&pixa);
2328 return pixd;
2329 }
2330
2331
2332 /*!
2333 * \brief pixacompWriteFiles()
2334 *
2335 * \param[in] pixac
2336 * \param[in] subdir subdirectory of /tmp
2337 * \return 0 if OK, 1 on error
2338 */
2339 l_ok
2340 pixacompWriteFiles(PIXAC *pixac,
2341 const char *subdir)
2342 {
2343 char buf[128];
2344 l_int32 i, n;
2345 PIXC *pixc;
2346
2347 if (!pixac)
2348 return ERROR_INT("pixac not defined", __func__, 1);
2349
2350 if (lept_mkdir(subdir) > 0)
2351 return ERROR_INT("invalid subdir", __func__, 1);
2352
2353 n = pixacompGetCount(pixac);
2354 for (i = 0; i < n; i++) {
2355 pixc = pixacompGetPixcomp(pixac, i, L_NOCOPY);
2356 snprintf(buf, sizeof(buf), "/tmp/%s/%03d", subdir, i);
2357 pixcompWriteFile(buf, pixc);
2358 }
2359 return 0;
2360 }
2361
2362 extern const char *ImageFileFormatExtensions[];
2363
2364 /*!
2365 * \brief pixcompWriteFile()
2366 *
2367 * \param[in] rootname
2368 * \param[in] pixc
2369 * \return 0 if OK, 1 on error
2370 *
2371 * <pre>
2372 * Notes:
2373 * (1) The compressed data is written to file, and the filename is
2374 * generated by appending the format extension to %rootname.
2375 * </pre>
2376 */
2377 l_ok
2378 pixcompWriteFile(const char *rootname,
2379 PIXC *pixc)
2380 {
2381 char buf[128];
2382
2383 if (!pixc)
2384 return ERROR_INT("pixc not defined", __func__, 1);
2385
2386 snprintf(buf, sizeof(buf), "%s.%s", rootname,
2387 ImageFileFormatExtensions[pixc->comptype]);
2388 l_binaryWrite(buf, "w", pixc->data, pixc->size);
2389 return 0;
2390 }