comparison mupdf-source/thirdparty/leptonica/src/psio1.c @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 /*====================================================================*
2 - Copyright (C) 2001 Leptonica. All rights reserved.
3 -
4 - Redistribution and use in source and binary forms, with or without
5 - modification, are permitted provided that the following conditions
6 - are met:
7 - 1. Redistributions of source code must retain the above copyright
8 - notice, this list of conditions and the following disclaimer.
9 - 2. Redistributions in binary form must reproduce the above
10 - copyright notice, this list of conditions and the following
11 - disclaimer in the documentation and/or other materials
12 - provided with the distribution.
13 -
14 - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15 - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16 - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17 - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY
18 - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *====================================================================*/
26
27 /*!
28 * \file psio1.c
29 * <pre>
30 *
31 * |=============================================================|
32 * | Important note |
33 * |=============================================================|
34 * | Some of these functions require I/O libraries such as |
35 * | libtiff, libjpeg, and libz. If you do not have these |
36 * | libraries, some calls will fail. |
37 * | |
38 * | You can manually deactivate all PostScript writing by |
39 * | setting this in environ.h: |
40 * | \code |
41 * | #define USE_PSIO 0 |
42 * | \endcode |
43 * | in environ.h. This will link psio1stub.c |
44 * |=============================================================|
45 *
46 * This is a PostScript "device driver" for wrapping images
47 * in PostScript. The images can be rendered by a PostScript
48 * interpreter for viewing, using evince or gv. They can also be
49 * rasterized for printing, using gs or an embedded interpreter
50 * in a PostScript printer. And they can be converted to a pdf
51 * using gs (ps2pdf).
52 *
53 * Convert specified files to PS
54 * l_int32 convertFilesToPS()
55 * l_int32 sarrayConvertFilesToPS()
56 * l_int32 convertFilesFittedToPS()
57 * l_int32 sarrayConvertFilesFittedToPS()
58 * l_int32 writeImageCompressedToPSFile()
59 *
60 * Convert mixed text/image files to PS
61 * l_int32 convertSegmentedPagesToPS()
62 * l_int32 pixWriteSegmentedPageToPS()
63 * l_int32 pixWriteMixedToPS()
64 *
65 * Convert any image file to PS for embedding
66 * l_int32 convertToPSEmbed()
67 *
68 * Write all images in a pixa out to PS
69 * l_int32 pixaWriteCompressedToPS()
70 * l_int32 pixWriteCompressedToPS()
71 *
72 * These PostScript converters are used in three different ways.
73 *
74 * (1) For embedding a PS file in a program like TeX.
75 * convertToPSEmbed() handles this for levels 1, 2 and 3 output,
76 * and prog/converttops wraps this in an executable.
77 * converttops is a generalization of Thomas Merz's jpeg2ps wrapper,
78 * in that it works for all types (formats, depth, colormap)
79 * of input images and gives PS output in one of these formats
80 * * level 1 (uncompressed)
81 * * level 2 (compressed ccittg4 or dct)
82 * * level 3 (compressed flate)
83 *
84 * (2) For composing a set of pages with any number of images
85 * painted on them, in either level 2 or level 3 formats.
86 *
87 * (3) For printing a page image or a set of page images, at a
88 * resolution that optimally fills the page, using
89 * convertFilesFittedToPS().
90 *
91 * The top-level calls of utilities in category 2, which can compose
92 * multiple images on a page, and which generate a PostScript file for
93 * printing or display (e.g., conversion to pdf), are:
94 * convertFilesToPS()
95 * convertFilesFittedToPS()
96 * convertSegmentedPagesToPS()
97 *
98 * All images are output with page numbers. Bounding box hints are
99 * more subtle. They must be included for embeding images in
100 * TeX, for example, and the low-level writers include bounding
101 * box hints by default. However, these hints should not be included for
102 * multi-page PostScript that is composed of a sequence of images;
103 * consequently, they are not written when calling higher level
104 * functions such as convertFilesToPS(), convertFilesFittedToPS()
105 * and convertSegmentedPagesToPS(). The function l_psWriteBoundingBox()
106 * sets a flag to give low-level control over this.
107 * </pre>
108 */
109
110 #ifdef HAVE_CONFIG_H
111 #include <config_auto.h>
112 #endif /* HAVE_CONFIG_H */
113
114 #include <string.h>
115 #include "allheaders.h"
116
117 /* --------------------------------------------*/
118 #if USE_PSIO /* defined in environ.h */
119 /* --------------------------------------------*/
120
121 /*-------------------------------------------------------------*
122 * Convert files in a directory to PS *
123 *-------------------------------------------------------------*/
124 /*
125 * \brief convertFilesToPS()
126 *
127 * \param[in] dirin input directory
128 * \param[in] substr [optional] substring filter on filenames; can be NULL
129 * \param[in] res typ. 300 or 600 ppi
130 * \param[in] fileout output ps file
131 * \return 0 if OK, 1 on error
132 *
133 * <pre>
134 * Notes:
135 * (1) This generates a PS file for all image files in a specified
136 * directory that contain the substr pattern to be matched.
137 * (2) Each image is written to a separate page in the output PS file.
138 * (3) All images are written compressed:
139 * * if tiffg4 --> use ccittg4
140 * * if jpeg --> use dct
141 * * all others --> use flate
142 * If the image is jpeg or tiffg4, we use the existing compressed
143 * strings for the encoding; otherwise, we read the image into
144 * a pix and flate-encode the pieces.
145 * (4) The resolution is often confusing. It is interpreted
146 * as the resolution of the output display device: "If the
147 * input image were digitized at 300 ppi, what would it
148 * look like when displayed at res ppi." So, for example,
149 * if res = 100 ppi, then the display pixels are 3x larger
150 * than the 300 ppi pixels, and the image will be rendered
151 * 3x larger.
152 * (5) The size of the PostScript file is independent of the resolution,
153 * because the entire file is encoded. The res parameter just
154 * tells the PS decomposer how to render the page. Therefore,
155 * for minimum file size without loss of visual information,
156 * if the output res is less than 300, you should downscale
157 * the image to the output resolution before wrapping in PS.
158 * (6) The "canvas" on which the image is rendered, at the given
159 * output resolution, is a standard page size (8.5 x 11 in).
160 * </pre>
161 */
162 l_ok
163 convertFilesToPS(const char *dirin,
164 const char *substr,
165 l_int32 res,
166 const char *fileout)
167 {
168 SARRAY *sa;
169
170 if (!dirin)
171 return ERROR_INT("dirin not defined", __func__, 1);
172 if (!fileout)
173 return ERROR_INT("fileout not defined", __func__, 1);
174 if (res <= 0) {
175 L_INFO("setting res to 300 ppi\n", __func__);
176 res = 300;
177 }
178 if (res < 10 || res > 4000)
179 L_WARNING("res is typically in the range 300-600 ppi\n", __func__);
180
181 /* Get all filtered and sorted full pathnames. */
182 sa = getSortedPathnamesInDirectory(dirin, substr, 0, 0);
183
184 /* Generate the PS file. Don't use bounding boxes. */
185 l_psWriteBoundingBox(FALSE);
186 sarrayConvertFilesToPS(sa, res, fileout);
187 l_psWriteBoundingBox(TRUE);
188 sarrayDestroy(&sa);
189 return 0;
190 }
191
192
193 /*
194
195 * \brief sarrayConvertFilesToPS()
196 *
197 * \param[in] sarray of full path names
198 * \param[in] res typ. 300 or 600 ppi
199 * \param[in] fileout output ps file
200 * \return 0 if OK, 1 on error
201 *
202 * <pre>
203 * Notes:
204 * (1) See convertFilesToPS()
205 * </pre>
206 */
207 l_ok
208 sarrayConvertFilesToPS(SARRAY *sa,
209 l_int32 res,
210 const char *fileout)
211 {
212 char *fname;
213 l_int32 i, nfiles, index, ret, format;
214
215 if (!sa)
216 return ERROR_INT("sa not defined", __func__, 1);
217 if (!fileout)
218 return ERROR_INT("fileout not defined", __func__, 1);
219 if (res <= 0) {
220 L_INFO("setting res to 300 ppi\n", __func__);
221 res = 300;
222 }
223 if (res < 10 || res > 4000)
224 L_WARNING("res is typically in the range 300-600 ppi\n", __func__);
225
226 nfiles = sarrayGetCount(sa);
227 for (i = 0, index = 0; i < nfiles; i++) {
228 fname = sarrayGetString(sa, i, L_NOCOPY);
229 ret = pixReadHeader(fname, &format, NULL, NULL, NULL, NULL, NULL);
230 if (ret) continue;
231 if (format == IFF_UNKNOWN)
232 continue;
233
234 writeImageCompressedToPSFile(fname, fileout, res, &index);
235 }
236
237 return 0;
238 }
239
240
241 /*
242 * \brief convertFilesFittedToPS()
243 *
244 * \param[in] dirin input directory
245 * \param[in] substr [optional] substring filter on filenames; can be NULL)
246 * \param[in] xpts desired size in printer points; use 0 for default
247 * \param[in] ypts desired size in printer points; use 0 for default
248 * \param[in] fileout output ps file
249 * \return 0 if OK, 1 on error
250 *
251 * <pre>
252 * Notes:
253 * (1) This generates a PS file for all files in a specified directory
254 * that contain the substr pattern to be matched.
255 * (2) Each image is written to a separate page in the output PS file.
256 * (3) All images are written compressed:
257 * * if tiffg4 --> use ccittg4
258 * * if jpeg --> use dct
259 * * all others --> use flate
260 * If the image is jpeg or tiffg4, we use the existing compressed
261 * strings for the encoding; otherwise, we read the image into
262 * a pix and flate-encode the pieces.
263 * (4) The resolution is internally determined such that the images
264 * are rendered, in at least one direction, at 100% of the given
265 * size in printer points. Use 0.0 for xpts or ypts to get
266 * the default value, which is 612.0 or 792.0, rsp.
267 * (5) The size of the PostScript file is independent of the resolution,
268 * because the entire file is encoded. The %xpts and %ypts
269 * parameter tells the PS decomposer how to render the page.
270 * </pre>
271 */
272 l_ok
273 convertFilesFittedToPS(const char *dirin,
274 const char *substr,
275 l_float32 xpts,
276 l_float32 ypts,
277 const char *fileout)
278 {
279 SARRAY *sa;
280
281 if (!dirin)
282 return ERROR_INT("dirin not defined", __func__, 1);
283 if (!fileout)
284 return ERROR_INT("fileout not defined", __func__, 1);
285 if (xpts <= 0.0) {
286 L_INFO("setting xpts to 612.0 ppi\n", __func__);
287 xpts = 612.0;
288 }
289 if (ypts <= 0.0) {
290 L_INFO("setting ypts to 792.0 ppi\n", __func__);
291 ypts = 792.0;
292 }
293 if (xpts < 100.0 || xpts > 2000.0 || ypts < 100.0 || ypts > 2000.0)
294 L_WARNING("xpts,ypts are typically in the range 500-800\n", __func__);
295
296 /* Get all filtered and sorted full pathnames. */
297 sa = getSortedPathnamesInDirectory(dirin, substr, 0, 0);
298
299 /* Generate the PS file. Don't use bounding boxes. */
300 l_psWriteBoundingBox(FALSE);
301 sarrayConvertFilesFittedToPS(sa, xpts, ypts, fileout);
302 l_psWriteBoundingBox(TRUE);
303 sarrayDestroy(&sa);
304 return 0;
305 }
306
307
308 /*
309 * \brief sarrayConvertFilesFittedToPS()
310 *
311 * \param[in] sarray of full path names
312 * \param[in] xpts desired size in printer points; use 0 for default
313 * \param[in] ypts desired size in printer points; use 0 for default
314 * \param[in] fileout output ps file
315 * \return 0 if OK, 1 on error
316 *
317 * <pre>
318 * Notes:
319 * (1) See convertFilesFittedToPS()
320 * </pre>
321 */
322 l_ok
323 sarrayConvertFilesFittedToPS(SARRAY *sa,
324 l_float32 xpts,
325 l_float32 ypts,
326 const char *fileout)
327 {
328 char *fname;
329 l_int32 ret, i, w, h, nfiles, index, format, res;
330
331 if (!sa)
332 return ERROR_INT("sa not defined", __func__, 1);
333 if (!fileout)
334 return ERROR_INT("fileout not defined", __func__, 1);
335 if (xpts <= 0.0) {
336 L_INFO("setting xpts to 612.0\n", __func__);
337 xpts = 612.0;
338 }
339 if (ypts <= 0.0) {
340 L_INFO("setting ypts to 792.0\n", __func__);
341 ypts = 792.0;
342 }
343 if (xpts < 100.0 || xpts > 2000.0 || ypts < 100.0 || ypts > 2000.0)
344 L_WARNING("xpts,ypts are typically in the range 500-800\n", __func__);
345
346 nfiles = sarrayGetCount(sa);
347 for (i = 0, index = 0; i < nfiles; i++) {
348 fname = sarrayGetString(sa, i, L_NOCOPY);
349 ret = pixReadHeader(fname, &format, &w, &h, NULL, NULL, NULL);
350 if (ret) continue;
351 if (format == IFF_UNKNOWN)
352 continue;
353
354 /* Be sure the entire image is wrapped */
355 if (xpts * h < ypts * w)
356 res = (l_int32)((l_float32)w * 72.0 / xpts);
357 else
358 res = (l_int32)((l_float32)h * 72.0 / ypts);
359
360 writeImageCompressedToPSFile(fname, fileout, res, &index);
361 }
362
363 return 0;
364 }
365
366
367 /*
368 * \brief writeImageCompressedToPSFile()
369 *
370 * \param[in] filein input image file
371 * \param[in] fileout output ps file
372 * \param[in] res output printer resolution
373 * \param[in,out] pindex index of image in output ps file
374 * \return 0 if OK, 1 on error
375 *
376 * <pre>
377 * Notes:
378 * (1) This wraps a single page image in PS.
379 * (2) The input file can be in any format. It is compressed as follows:
380 * * if in tiffg4 --> use ccittg4
381 * * if in jpeg --> use dct
382 * * all others --> use flate
383 * (3) Before the first call, set %index = 0. %index is incremented
384 * if the page is successfully written. It is used to decide
385 * whether to write (index == 0) or append (index > 0) to the file.
386 * </pre>
387 */
388 l_ok
389 writeImageCompressedToPSFile(const char *filein,
390 const char *fileout,
391 l_int32 res,
392 l_int32 *pindex)
393 {
394 const char *op;
395 l_int32 format, retval;
396
397 if (!pindex)
398 return ERROR_INT("&index not defined", __func__, 1);
399
400 findFileFormat(filein, &format);
401 if (format == IFF_UNKNOWN) {
402 L_ERROR("format of %s not known\n", __func__, filein);
403 return 1;
404 }
405
406 op = (*pindex == 0) ? "w" : "a";
407 if (format == IFF_JFIF_JPEG) {
408 retval = convertJpegToPS(filein, fileout, op, 0, 0,
409 res, 1.0, *pindex + 1, TRUE);
410 } else if (format == IFF_TIFF_G4) {
411 retval = convertG4ToPS(filein, fileout, op, 0, 0,
412 res, 1.0, *pindex + 1, FALSE, TRUE);
413 } else { /* all other image formats */
414 retval = convertFlateToPS(filein, fileout, op, 0, 0,
415 res, 1.0, *pindex + 1, TRUE);
416 }
417 if (retval == 0) (*pindex)++;
418
419 return retval;
420 }
421
422
423 /*-------------------------------------------------------------*
424 * Convert mixed text/image files to PS *
425 *-------------------------------------------------------------*/
426 /*
427 * \brief convertSegmentedPagesToPS()
428 *
429 * \param[in] pagedir input page image directory
430 * \param[in] pagestr [optional] substring filter on page filenames;
431 * can be NULL
432 * \param[in] page_numpre number of characters in page name before number
433 * \param[in] maskdir input mask image directory
434 * \param[in] maskstr [optional] substring filter on mask filenames;
435 * can be NULL
436 * \param[in] mask_numpre number of characters in mask name before number
437 * \param[in] numpost number of characters in names after number
438 * \param[in] maxnum only consider page numbers up to this value
439 * \param[in] textscale scale of text output relative to pixs
440 * \param[in] imagescale scale of image output relative to pixs
441 * \param[in] threshold for binarization; typ. about 190; 0 for default
442 * \param[in] fileout output ps file
443 * \return 0 if OK, 1 on error
444 *
445 * <pre>
446 * Notes:
447 * (1) This generates a PS file for all page image and mask files in two
448 * specified directories and that contain the page numbers as
449 * specified below. The two directories can be the same, in which
450 * case the page and mask files are differentiated by the two
451 * substrings for string matches.
452 * (2) The page images are taken in lexicographic order.
453 * Mask images whose numbers match the page images are used to
454 * segment the page images. Page images without a matching
455 * mask image are scaled, thresholded and rendered entirely as text.
456 * (3) Each PS page is generated as a compressed representation of
457 * the page image, where the part of the image under the mask
458 * is suitably scaled and compressed as DCT (i.e., jpeg), and
459 * the remaining part of the page is suitably scaled, thresholded,
460 * compressed as G4 (i.e., tiff g4), and rendered by painting
461 * black through the resulting text mask.
462 * (4) The scaling is typically 2x down for the DCT component
463 * (%imagescale = 0.5) and 2x up for the G4 component
464 * (%textscale = 2.0).
465 * (5) The resolution is automatically set to fit to a
466 * letter-size (8.5 x 11 inch) page.
467 * (6) Both the DCT and the G4 encoding are PostScript level 2.
468 * (7) It is assumed that the page number is contained within
469 * the basename (the filename without directory or extension).
470 * %page_numpre is the number of characters in the page basename
471 * preceding the actual page number; %mask_numpre is likewise for
472 * the mask basename; %numpost is the number of characters
473 * following the page number. For example, for mask name
474 * mask_006.tif, mask_numpre = 5 ("mask_).
475 * (8) To render a page as is -- that is, with no thresholding
476 * of any pixels -- use a mask in the mask directory that is
477 * full size with all pixels set to 1. If the page is 1 bpp,
478 * it is not necessary to have a mask.
479 * </pre>
480 */
481 l_ok
482 convertSegmentedPagesToPS(const char *pagedir,
483 const char *pagestr,
484 l_int32 page_numpre,
485 const char *maskdir,
486 const char *maskstr,
487 l_int32 mask_numpre,
488 l_int32 numpost,
489 l_int32 maxnum,
490 l_float32 textscale,
491 l_float32 imagescale,
492 l_int32 threshold,
493 const char *fileout)
494 {
495 l_int32 pageno, i, npages;
496 PIX *pixs, *pixm;
497 SARRAY *sapage, *samask;
498
499 if (!pagedir)
500 return ERROR_INT("pagedir not defined", __func__, 1);
501 if (!maskdir)
502 return ERROR_INT("maskdir not defined", __func__, 1);
503 if (!fileout)
504 return ERROR_INT("fileout not defined", __func__, 1);
505 if (threshold <= 0) {
506 L_INFO("setting threshold to 190\n", __func__);
507 threshold = 190;
508 }
509
510 /* Get numbered full pathnames; max size of sarray is maxnum */
511 sapage = getNumberedPathnamesInDirectory(pagedir, pagestr,
512 page_numpre, numpost, maxnum);
513 samask = getNumberedPathnamesInDirectory(maskdir, maskstr,
514 mask_numpre, numpost, maxnum);
515 sarrayPadToSameSize(sapage, samask, "");
516 if ((npages = sarrayGetCount(sapage)) == 0) {
517 sarrayDestroy(&sapage);
518 sarrayDestroy(&samask);
519 return ERROR_INT("no matching pages found", __func__, 1);
520 }
521
522 /* Generate the PS file */
523 pageno = 1;
524 for (i = 0; i < npages; i++) {
525 if ((pixs = pixReadIndexed(sapage, i)) == NULL)
526 continue;
527 pixm = pixReadIndexed(samask, i);
528 pixWriteSegmentedPageToPS(pixs, pixm, textscale, imagescale,
529 threshold, pageno, fileout);
530 pixDestroy(&pixs);
531 pixDestroy(&pixm);
532 pageno++;
533 }
534
535 sarrayDestroy(&sapage);
536 sarrayDestroy(&samask);
537 return 0;
538 }
539
540
541 /*
542 * \brief pixWriteSegmentedPageToPS()
543 *
544 * \param[in] pixs all depths; colormap ok
545 * \param[in] pixm [optional] 1 bpp segmentation mask over image region
546 * \param[in] textscale scale of text output relative to pixs
547 * \param[in] imagescale scale of image output relative to pixs
548 * \param[in] threshold for binarization; typ. about 190; 0 for default
549 * \param[in] pageno page number in set; use 1 for new output file
550 * \param[in] fileout output ps file
551 * \return 0 if OK, 1 on error
552 *
553 * <pre>
554 * Notes:
555 * (1) This generates the PS string for a mixed text/image page,
556 * and adds it to an existing file if %pageno > 1.
557 * The PS output is determined by fitting the result to
558 * a letter-size (8.5 x 11 inch) page.
559 * (2) The two images (pixs and pixm) are at the same resolution
560 * (typically 300 ppi). They are used to generate two compressed
561 * images, pixb and pixc, that are put directly into the output
562 * PS file.
563 * (3) pixb is the text component. In the PostScript world, we think of
564 * it as a mask through which we paint black. It is produced by
565 * scaling pixs by %textscale, and thresholding to 1 bpp.
566 * (4) pixc is the image component, which is that part of pixs under
567 * the mask pixm. It is scaled from pixs by %imagescale.
568 * (5) Typical values are textscale = 2.0 and imagescale = 0.5.
569 * (6) If pixm == NULL, the page has only text. If it is all black,
570 * the page is all image and has no text.
571 * (7) This can be used to write a multi-page PS file, by using
572 * sequential page numbers with the same output file. It can
573 * also be used to write separate PS files for each page,
574 * by using different output files with %pageno = 0 or 1.
575 * </pre>
576 */
577 l_ok
578 pixWriteSegmentedPageToPS(PIX *pixs,
579 PIX *pixm,
580 l_float32 textscale,
581 l_float32 imagescale,
582 l_int32 threshold,
583 l_int32 pageno,
584 const char *fileout)
585 {
586 l_int32 alltext, notext, d, ret;
587 l_uint32 val;
588 l_float32 scaleratio;
589 PIX *pixmi, *pixmis, *pixt, *pixg, *pixsc, *pixb, *pixc;
590
591 if (!pixs)
592 return ERROR_INT("pixs not defined", __func__, 1);
593 if (!fileout)
594 return ERROR_INT("fileout not defined", __func__, 1);
595 if (imagescale <= 0.0 || textscale <= 0.0)
596 return ERROR_INT("relative scales must be > 0.0", __func__, 1);
597
598 /* Analyze the page. Determine the ratio by which the
599 * binary text mask is scaled relative to the image part.
600 * If there is no image region (alltext == TRUE), the
601 * text mask will be rendered directly to fit the page,
602 * and scaleratio = 1.0. */
603 alltext = TRUE;
604 notext = FALSE;
605 scaleratio = 1.0;
606 if (pixm) {
607 pixZero(pixm, &alltext); /* pixm empty: all text */
608 if (alltext) {
609 pixm = NULL; /* treat it as not existing here */
610 } else {
611 pixmi = pixInvert(NULL, pixm);
612 pixZero(pixmi, &notext); /* pixm full; no text */
613 pixDestroy(&pixmi);
614 scaleratio = textscale / imagescale;
615 }
616 }
617
618 if (pixGetDepth(pixs) == 1) { /* render tiff g4 */
619 pixb = pixClone(pixs);
620 pixc = NULL;
621 } else {
622 pixt = pixConvertTo8Or32(pixs, L_CLONE, 0); /* clone if possible */
623
624 /* Get the binary text mask. Note that pixg cannot be a
625 * clone of pixs, because it may be altered by pixSetMasked(). */
626 pixb = NULL;
627 if (notext == FALSE) {
628 d = pixGetDepth(pixt);
629 if (d == 8)
630 pixg = pixCopy(NULL, pixt);
631 else /* d == 32 */
632 pixg = pixConvertRGBToLuminance(pixt);
633 if (pixm) /* clear out the image parts */
634 pixSetMasked(pixg, pixm, 255);
635 if (textscale == 1.0)
636 pixsc = pixClone(pixg);
637 else if (textscale >= 0.7)
638 pixsc = pixScaleGrayLI(pixg, textscale, textscale);
639 else
640 pixsc = pixScaleAreaMap(pixg, textscale, textscale);
641 pixb = pixThresholdToBinary(pixsc, threshold);
642 pixDestroy(&pixg);
643 pixDestroy(&pixsc);
644 }
645
646 /* Get the scaled image region */
647 pixc = NULL;
648 if (pixm) {
649 if (imagescale == 1.0)
650 pixsc = pixClone(pixt); /* can possibly be a clone of pixs */
651 else
652 pixsc = pixScale(pixt, imagescale, imagescale);
653
654 /* If pixm is not full, clear the pixels in pixsc
655 * corresponding to bg in pixm, where there can be text
656 * that is written through the mask pixb. Note that
657 * we could skip this and use pixsc directly in
658 * pixWriteMixedToPS(); however, clearing these
659 * non-image regions to a white background will reduce
660 * the size of pixc (relative to pixsc), and hence
661 * reduce the size of the PS file that is generated.
662 * Use a copy so that we don't accidentally alter pixs. */
663 if (notext == FALSE) {
664 pixmis = pixScale(pixm, imagescale, imagescale);
665 pixmi = pixInvert(NULL, pixmis);
666 val = (d == 8) ? 0xff : 0xffffff00;
667 pixc = pixCopy(NULL, pixsc);
668 pixSetMasked(pixc, pixmi, val); /* clear non-image part */
669 pixDestroy(&pixmis);
670 pixDestroy(&pixmi);
671 } else {
672 pixc = pixClone(pixsc);
673 }
674 pixDestroy(&pixsc);
675 }
676 pixDestroy(&pixt);
677 }
678
679 /* Generate the PS file. Don't use bounding boxes. */
680 l_psWriteBoundingBox(FALSE);
681 ret = pixWriteMixedToPS(pixb, pixc, scaleratio, pageno, fileout);
682 l_psWriteBoundingBox(TRUE);
683 pixDestroy(&pixb);
684 pixDestroy(&pixc);
685 return ret;
686 }
687
688
689 /*
690 * \brief pixWriteMixedToPS()
691 *
692 * \param[in] pixb [optional] 1 bpp mask; typically for text
693 * \param[in] pixc [optional] 8 or 32 bpp image regions
694 * \param[in] scale scale factor for rendering pixb, relative to pixc;
695 * typ. 4.0
696 * \param[in] pageno page number in set; use 1 for new output file
697 * \param[in] fileout output ps file
698 * \return 0 if OK, 1 on error
699 *
700 * <pre>
701 * Notes:
702 * (1) This low level function generates the PS string for a mixed
703 * text/image page, and adds it to an existing file if
704 * %pageno > 1.
705 * (2) The two images (pixb and pixc) are typically generated at the
706 * resolution that they will be rendered in the PS file.
707 * (3) pixb is the text component. In the PostScript world, we think of
708 * it as a mask through which we paint black.
709 * (4) pixc is the (typically halftone) image component. It is
710 * white in the rest of the page. To minimize the size of the
711 * PS file, it should be rendered at a resolution that is at
712 * least equal to its actual resolution.
713 * (5) %scale gives the ratio of resolution of pixb to pixc.
714 * Typical resolutions are: 600 ppi for pixb, 150 ppi for pixc;
715 * so %scale = 4.0. If one of the images is not defined,
716 * the value of %scale is ignored.
717 * (6) We write pixc with DCT compression (jpeg). This is followed
718 * by painting the text as black through the mask pixb. If
719 * pixc doesn't exist (alltext), we write the text with the
720 * PS "image" operator instead of the "imagemask" operator,
721 * because ghostscript's ps2pdf is flaky when the latter is used.
722 * (7) The actual output resolution is determined by fitting the
723 * result to a letter-size (8.5 x 11 inch) page.
724 * <pre>
725 */
726 l_ok
727 pixWriteMixedToPS(PIX *pixb,
728 PIX *pixc,
729 l_float32 scale,
730 l_int32 pageno,
731 const char *fileout)
732 {
733 char *tname;
734 const char *op;
735 l_int32 resb, resc, endpage, maskop, ret;
736
737 if (!pixb && !pixc)
738 return ERROR_INT("pixb and pixc both undefined", __func__, 1);
739 if (!fileout)
740 return ERROR_INT("fileout not defined", __func__, 1);
741
742 /* Compute the resolution that fills a letter-size page. */
743 if (!pixc) {
744 resb = getResLetterPage(pixGetWidth(pixb), pixGetHeight(pixb), 0);
745 } else {
746 resc = getResLetterPage(pixGetWidth(pixc), pixGetHeight(pixc), 0);
747 if (pixb)
748 resb = (l_int32)(scale * resc);
749 }
750
751 /* Write the jpeg image first */
752 if (pixc) {
753 tname = l_makeTempFilename();
754 pixWrite(tname, pixc, IFF_JFIF_JPEG);
755 endpage = (pixb) ? FALSE : TRUE;
756 op = (pageno <= 1) ? "w" : "a";
757 ret = convertJpegToPS(tname, fileout, op, 0, 0, resc, 1.0,
758 pageno, endpage);
759 lept_rmfile(tname);
760 LEPT_FREE(tname);
761 if (ret)
762 return ERROR_INT("jpeg data not written", __func__, 1);
763 }
764
765 /* Write the binary data, either directly or, if there is
766 * a jpeg image on the page, through the mask. */
767 if (pixb) {
768 tname = l_makeTempFilename();
769 pixWrite(tname, pixb, IFF_TIFF_G4);
770 op = (pageno <= 1 && !pixc) ? "w" : "a";
771 maskop = (pixc) ? 1 : 0;
772 ret = convertG4ToPS(tname, fileout, op, 0, 0, resb, 1.0,
773 pageno, maskop, 1);
774 lept_rmfile(tname);
775 LEPT_FREE(tname);
776 if (ret)
777 return ERROR_INT("tiff data not written", __func__, 1);
778 }
779
780 return 0;
781 }
782
783
784 /*-------------------------------------------------------------*
785 * Convert any image file to PS for embedding *
786 *-------------------------------------------------------------*/
787 /*
788 * \brief convertToPSEmbed()
789 *
790 * \param[in] filein input image file, any format
791 * \param[in] fileout output ps file
792 * \param[in] level PostScript compression: 1 (uncompressed), 2 or 3
793 * \return 0 if OK, 1 on error
794 *
795 * <pre>
796 * Notes:
797 * (1) This is a wrapper function that generates a PS file with
798 * a bounding box, from any input image file.
799 * (2) Do the best job of compression given the specified level.
800 * %level=3 does flate compression on anything that is not
801 * tiffg4 (1 bpp) or jpeg (8 bpp or rgb).
802 * (3) If %level=2 and the file is not tiffg4 or jpeg, it will
803 * first be written to file as jpeg with quality = 75.
804 * This will remove the colormap and cause some degradation
805 * in the image.
806 * (4) The bounding box is required when a program such as TeX
807 * (through epsf) places and rescales the image. It is
808 * sized for fitting the image to an 8.5 x 11.0 inch page.
809 * </pre>
810 */
811 l_ok
812 convertToPSEmbed(const char *filein,
813 const char *fileout,
814 l_int32 level)
815 {
816 char *tname;
817 l_int32 d, format;
818 PIX *pix, *pixs;
819
820 if (!filein)
821 return ERROR_INT("filein not defined", __func__, 1);
822 if (!fileout)
823 return ERROR_INT("fileout not defined", __func__, 1);
824 if (level != 1 && level != 2 && level != 3) {
825 L_ERROR("invalid level specified; using level 2\n", __func__);
826 level = 2;
827 }
828
829 if (level == 1) { /* no compression */
830 pixWritePSEmbed(filein, fileout);
831 return 0;
832 }
833
834 /* Find the format and write out directly if in jpeg or tiff g4 */
835 findFileFormat(filein, &format);
836 if (format == IFF_JFIF_JPEG) {
837 convertJpegToPSEmbed(filein, fileout);
838 return 0;
839 } else if (format == IFF_TIFF_G4) {
840 convertG4ToPSEmbed(filein, fileout);
841 return 0;
842 } else if (format == IFF_UNKNOWN) {
843 L_ERROR("format of %s not known\n", __func__, filein);
844 return 1;
845 }
846
847 /* If level 3, flate encode. */
848 if (level == 3) {
849 convertFlateToPSEmbed(filein, fileout);
850 return 0;
851 }
852
853 /* OK, it's level 2, so we must convert to jpeg or tiff g4 */
854 if ((pixs = pixRead(filein)) == NULL)
855 return ERROR_INT("image not read from file", __func__, 1);
856 d = pixGetDepth(pixs);
857 if ((d == 2 || d == 4) && !pixGetColormap(pixs))
858 pix = pixConvertTo8(pixs, 0);
859 else if (d == 16)
860 pix = pixConvert16To8(pixs, L_MS_BYTE);
861 else
862 pix = pixRemoveColormap(pixs, REMOVE_CMAP_BASED_ON_SRC);
863 pixDestroy(&pixs);
864 if (!pix)
865 return ERROR_INT("converted pix not made", __func__, 1);
866
867 d = pixGetDepth(pix);
868 tname = l_makeTempFilename();
869 if (d == 1) {
870 if (pixWrite(tname, pix, IFF_TIFF_G4)) {
871 LEPT_FREE(tname);
872 pixDestroy(&pix);
873 return ERROR_INT("g4 tiff not written", __func__, 1);
874 }
875 convertG4ToPSEmbed(tname, fileout);
876 } else {
877 if (pixWrite(tname, pix, IFF_JFIF_JPEG)) {
878 LEPT_FREE(tname);
879 pixDestroy(&pix);
880 return ERROR_INT("jpeg not written", __func__, 1);
881 }
882 convertJpegToPSEmbed(tname, fileout);
883 }
884
885 lept_rmfile(tname);
886 LEPT_FREE(tname);
887 pixDestroy(&pix);
888 return 0;
889 }
890
891
892 /*-------------------------------------------------------------*
893 * Write all images in a pixa out to PS *
894 *-------------------------------------------------------------*/
895 /*
896 * \brief pixaWriteCompressedToPS()
897 *
898 * \param[in] pixa any set of images
899 * \param[in] fileout output ps file
900 * \param[in] res resolution for the set of input images
901 * \param[in] level PostScript compression capability: 2 or 3
902 * \return 0 if OK, 1 on error
903 *
904 * <pre>
905 * Notes:
906 * (1) This generates a PostScript file of multiple page images,
907 * all with bounding boxes.
908 * (2) See pixWriteCompressedToPS() for details.
909 * (3) To generate a pdf from %fileout, use:
910 * ps2pdf <infile.ps> <outfile.pdf>
911 * </pre>
912 */
913 l_ok
914 pixaWriteCompressedToPS(PIXA *pixa,
915 const char *fileout,
916 l_int32 res,
917 l_int32 level)
918 {
919 l_int32 i, n, index, ret;
920 PIX *pix;
921
922 if (!pixa)
923 return ERROR_INT("pixa not defined", __func__, 1);
924 if (!fileout)
925 return ERROR_INT("fileout not defined", __func__, 1);
926 if (level != 2 && level != 3) {
927 L_ERROR("only levels 2 and 3 permitted; using level 2\n", __func__);
928 level = 2;
929 }
930
931 index = 0;
932 n = pixaGetCount(pixa);
933 for (i = 0; i < n; i++) {
934 pix = pixaGetPix(pixa, i, L_CLONE);
935 ret = pixWriteCompressedToPS(pix, fileout, res, level, &index);
936 if (ret) L_ERROR("PS string not written for image %d\n", __func__, i);
937 pixDestroy(&pix);
938 }
939 return 0;
940 }
941
942
943 /*
944 * \brief pixWriteCompressedToPS()
945 *
946 * \param[in] pix any depth; colormap OK
947 * \param[in] fileout output ps file
948 * \param[in] res of input image
949 * \param[in] level PostScript compression capability: 2 or 3
950 * \param[in,out] pindex index of image in output ps file
951 * \return 0 if OK, 1 on error
952 *
953 * <pre>
954 * Notes:
955 * (1) This generates a PostScript string for %pix, and writes it
956 * to a file, with a bounding box.
957 * (2) *pindex keeps track of the number of images that have been
958 * written to %fileout. If this is the first image to be
959 * converted, set *pindex == 0 before passing it in. If the
960 * PostScript string is successfully generated, this will increment
961 * *pindex. If *pindex > 0, the PostScript string will be
962 * appended to %fileout.
963 * (3) PostScript level 2 enables lossless tiffg4 and lossy jpeg
964 * compression. Level 3 adds lossless flate (essentially gzip)
965 * compression.
966 * * For images with a colormap, lossless flate is often better in
967 * both quality and size than jpeg.
968 * * The decision for images without a colormap affects compression
969 * efficiency: %level2 (jpeg) is usually better than %level3 (flate)
970 * * Because jpeg does not handle 16 bpp, if %level == 2, the image
971 * is converted to 8 bpp (using MSB) and compressed with jpeg,
972 * cmap + level2: jpeg
973 * cmap + level3: flate
974 * 1 bpp: tiffg4
975 * 2 or 4 bpp + level2: jpeg
976 * 2 or 4 bpp + level3: flate
977 * 8 bpp + level2: jpeg
978 * 8 bpp + level3: flate
979 * 16 bpp + level2: jpeg [converted to 8 bpp, with warning]
980 * 16 bpp + level3: flate
981 * 32 bpp + level2: jpeg
982 * 32 bpp + level3: flate
983 * </pre>
984 */
985 l_ok
986 pixWriteCompressedToPS(PIX *pix,
987 const char *fileout,
988 l_int32 res,
989 l_int32 level,
990 l_int32 *pindex)
991 {
992 char *tname;
993 l_int32 writeout, d;
994 PIX *pixt;
995 PIXCMAP *cmap;
996
997 if (!pix)
998 return ERROR_INT("pix not defined", __func__, 1);
999 if (!fileout)
1000 return ERROR_INT("fileout not defined", __func__, 1);
1001 if (level != 2 && level != 3) {
1002 L_ERROR("only levels 2 and 3 permitted; using level 2\n", __func__);
1003 level = 2;
1004 }
1005 if (!pindex)
1006 return ERROR_INT("&index not defined", __func__, 1);
1007
1008 tname = l_makeTempFilename();
1009 writeout = TRUE;
1010 d = pixGetDepth(pix);
1011 cmap = pixGetColormap(pix);
1012 if (d == 1) {
1013 if (pixWrite(tname, pix, IFF_TIFF_G4))
1014 writeout = FALSE;
1015 } else if (level == 3) {
1016 if (pixWrite(tname, pix, IFF_PNG))
1017 writeout = FALSE;
1018 } else { /* level == 2 */
1019 if (cmap) {
1020 pixt = pixConvertForPSWrap(pix);
1021 if (pixWrite(tname, pixt, IFF_JFIF_JPEG))
1022 writeout = FALSE;
1023 pixDestroy(&pixt);
1024 } else if (d == 16) {
1025 L_WARNING("d = 16; converting to 8 bpp for jpeg\n", __func__);
1026 pixt = pixConvert16To8(pix, L_MS_BYTE);
1027 if (pixWrite(tname, pixt, IFF_JFIF_JPEG))
1028 writeout = FALSE;
1029 pixDestroy(&pixt);
1030 } else if (d == 2 || d == 4) {
1031 pixt = pixConvertTo8(pix, 0);
1032 if (pixWrite(tname, pixt, IFF_JFIF_JPEG))
1033 writeout = FALSE;
1034 pixDestroy(&pixt);
1035 } else if (d == 8 || d == 32) {
1036 if (pixWrite(tname, pix, IFF_JFIF_JPEG))
1037 writeout = FALSE;
1038 } else { /* shouldn't happen */
1039 L_ERROR("invalid depth with level 2: %d\n", __func__, d);
1040 writeout = FALSE;
1041 }
1042 }
1043
1044 if (writeout)
1045 writeImageCompressedToPSFile(tname, fileout, res, pindex);
1046
1047 if (lept_rmfile(tname) != 0)
1048 L_ERROR("temp file %s was not deleted\n", __func__, tname);
1049 LEPT_FREE(tname);
1050 return (writeout) ? 0 : 1;
1051 }
1052
1053 /* --------------------------------------------*/
1054 #endif /* USE_PSIO */
1055 /* --------------------------------------------*/