Mercurial > hgrepos > Python2 > PyMuPDF
diff mupdf-source/thirdparty/leptonica/src/psio1.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mupdf-source/thirdparty/leptonica/src/psio1.c Mon Sep 15 11:43:07 2025 +0200 @@ -0,0 +1,1055 @@ +/*====================================================================* + - Copyright (C) 2001 Leptonica. All rights reserved. + - + - Redistribution and use in source and binary forms, with or without + - modification, are permitted provided that the following conditions + - are met: + - 1. Redistributions of source code must retain the above copyright + - notice, this list of conditions and the following disclaimer. + - 2. Redistributions in binary form must reproduce the above + - copyright notice, this list of conditions and the following + - disclaimer in the documentation and/or other materials + - provided with the distribution. + - + - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY + - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + *====================================================================*/ + +/*! + * \file psio1.c + * <pre> + * + * |=============================================================| + * | Important note | + * |=============================================================| + * | Some of these functions require I/O libraries such as | + * | libtiff, libjpeg, and libz. If you do not have these | + * | libraries, some calls will fail. | + * | | + * | You can manually deactivate all PostScript writing by | + * | setting this in environ.h: | + * | \code | + * | #define USE_PSIO 0 | + * | \endcode | + * | in environ.h. This will link psio1stub.c | + * |=============================================================| + * + * This is a PostScript "device driver" for wrapping images + * in PostScript. The images can be rendered by a PostScript + * interpreter for viewing, using evince or gv. They can also be + * rasterized for printing, using gs or an embedded interpreter + * in a PostScript printer. And they can be converted to a pdf + * using gs (ps2pdf). + * + * Convert specified files to PS + * l_int32 convertFilesToPS() + * l_int32 sarrayConvertFilesToPS() + * l_int32 convertFilesFittedToPS() + * l_int32 sarrayConvertFilesFittedToPS() + * l_int32 writeImageCompressedToPSFile() + * + * Convert mixed text/image files to PS + * l_int32 convertSegmentedPagesToPS() + * l_int32 pixWriteSegmentedPageToPS() + * l_int32 pixWriteMixedToPS() + * + * Convert any image file to PS for embedding + * l_int32 convertToPSEmbed() + * + * Write all images in a pixa out to PS + * l_int32 pixaWriteCompressedToPS() + * l_int32 pixWriteCompressedToPS() + * + * These PostScript converters are used in three different ways. + * + * (1) For embedding a PS file in a program like TeX. + * convertToPSEmbed() handles this for levels 1, 2 and 3 output, + * and prog/converttops wraps this in an executable. + * converttops is a generalization of Thomas Merz's jpeg2ps wrapper, + * in that it works for all types (formats, depth, colormap) + * of input images and gives PS output in one of these formats + * * level 1 (uncompressed) + * * level 2 (compressed ccittg4 or dct) + * * level 3 (compressed flate) + * + * (2) For composing a set of pages with any number of images + * painted on them, in either level 2 or level 3 formats. + * + * (3) For printing a page image or a set of page images, at a + * resolution that optimally fills the page, using + * convertFilesFittedToPS(). + * + * The top-level calls of utilities in category 2, which can compose + * multiple images on a page, and which generate a PostScript file for + * printing or display (e.g., conversion to pdf), are: + * convertFilesToPS() + * convertFilesFittedToPS() + * convertSegmentedPagesToPS() + * + * All images are output with page numbers. Bounding box hints are + * more subtle. They must be included for embeding images in + * TeX, for example, and the low-level writers include bounding + * box hints by default. However, these hints should not be included for + * multi-page PostScript that is composed of a sequence of images; + * consequently, they are not written when calling higher level + * functions such as convertFilesToPS(), convertFilesFittedToPS() + * and convertSegmentedPagesToPS(). The function l_psWriteBoundingBox() + * sets a flag to give low-level control over this. + * </pre> + */ + +#ifdef HAVE_CONFIG_H +#include <config_auto.h> +#endif /* HAVE_CONFIG_H */ + +#include <string.h> +#include "allheaders.h" + +/* --------------------------------------------*/ +#if USE_PSIO /* defined in environ.h */ + /* --------------------------------------------*/ + +/*-------------------------------------------------------------* + * Convert files in a directory to PS * + *-------------------------------------------------------------*/ +/* + * \brief convertFilesToPS() + * + * \param[in] dirin input directory + * \param[in] substr [optional] substring filter on filenames; can be NULL + * \param[in] res typ. 300 or 600 ppi + * \param[in] fileout output ps file + * \return 0 if OK, 1 on error + * + * <pre> + * Notes: + * (1) This generates a PS file for all image files in a specified + * directory that contain the substr pattern to be matched. + * (2) Each image is written to a separate page in the output PS file. + * (3) All images are written compressed: + * * if tiffg4 --> use ccittg4 + * * if jpeg --> use dct + * * all others --> use flate + * If the image is jpeg or tiffg4, we use the existing compressed + * strings for the encoding; otherwise, we read the image into + * a pix and flate-encode the pieces. + * (4) The resolution is often confusing. It is interpreted + * as the resolution of the output display device: "If the + * input image were digitized at 300 ppi, what would it + * look like when displayed at res ppi." So, for example, + * if res = 100 ppi, then the display pixels are 3x larger + * than the 300 ppi pixels, and the image will be rendered + * 3x larger. + * (5) The size of the PostScript file is independent of the resolution, + * because the entire file is encoded. The res parameter just + * tells the PS decomposer how to render the page. Therefore, + * for minimum file size without loss of visual information, + * if the output res is less than 300, you should downscale + * the image to the output resolution before wrapping in PS. + * (6) The "canvas" on which the image is rendered, at the given + * output resolution, is a standard page size (8.5 x 11 in). + * </pre> + */ +l_ok +convertFilesToPS(const char *dirin, + const char *substr, + l_int32 res, + const char *fileout) +{ +SARRAY *sa; + + if (!dirin) + return ERROR_INT("dirin not defined", __func__, 1); + if (!fileout) + return ERROR_INT("fileout not defined", __func__, 1); + if (res <= 0) { + L_INFO("setting res to 300 ppi\n", __func__); + res = 300; + } + if (res < 10 || res > 4000) + L_WARNING("res is typically in the range 300-600 ppi\n", __func__); + + /* Get all filtered and sorted full pathnames. */ + sa = getSortedPathnamesInDirectory(dirin, substr, 0, 0); + + /* Generate the PS file. Don't use bounding boxes. */ + l_psWriteBoundingBox(FALSE); + sarrayConvertFilesToPS(sa, res, fileout); + l_psWriteBoundingBox(TRUE); + sarrayDestroy(&sa); + return 0; +} + + +/* + + * \brief sarrayConvertFilesToPS() + * + * \param[in] sarray of full path names + * \param[in] res typ. 300 or 600 ppi + * \param[in] fileout output ps file + * \return 0 if OK, 1 on error + * + * <pre> + * Notes: + * (1) See convertFilesToPS() + * </pre> + */ +l_ok +sarrayConvertFilesToPS(SARRAY *sa, + l_int32 res, + const char *fileout) +{ +char *fname; +l_int32 i, nfiles, index, ret, format; + + if (!sa) + return ERROR_INT("sa not defined", __func__, 1); + if (!fileout) + return ERROR_INT("fileout not defined", __func__, 1); + if (res <= 0) { + L_INFO("setting res to 300 ppi\n", __func__); + res = 300; + } + if (res < 10 || res > 4000) + L_WARNING("res is typically in the range 300-600 ppi\n", __func__); + + nfiles = sarrayGetCount(sa); + for (i = 0, index = 0; i < nfiles; i++) { + fname = sarrayGetString(sa, i, L_NOCOPY); + ret = pixReadHeader(fname, &format, NULL, NULL, NULL, NULL, NULL); + if (ret) continue; + if (format == IFF_UNKNOWN) + continue; + + writeImageCompressedToPSFile(fname, fileout, res, &index); + } + + return 0; +} + + +/* + * \brief convertFilesFittedToPS() + * + * \param[in] dirin input directory + * \param[in] substr [optional] substring filter on filenames; can be NULL) + * \param[in] xpts desired size in printer points; use 0 for default + * \param[in] ypts desired size in printer points; use 0 for default + * \param[in] fileout output ps file + * \return 0 if OK, 1 on error + * + * <pre> + * Notes: + * (1) This generates a PS file for all files in a specified directory + * that contain the substr pattern to be matched. + * (2) Each image is written to a separate page in the output PS file. + * (3) All images are written compressed: + * * if tiffg4 --> use ccittg4 + * * if jpeg --> use dct + * * all others --> use flate + * If the image is jpeg or tiffg4, we use the existing compressed + * strings for the encoding; otherwise, we read the image into + * a pix and flate-encode the pieces. + * (4) The resolution is internally determined such that the images + * are rendered, in at least one direction, at 100% of the given + * size in printer points. Use 0.0 for xpts or ypts to get + * the default value, which is 612.0 or 792.0, rsp. + * (5) The size of the PostScript file is independent of the resolution, + * because the entire file is encoded. The %xpts and %ypts + * parameter tells the PS decomposer how to render the page. + * </pre> + */ +l_ok +convertFilesFittedToPS(const char *dirin, + const char *substr, + l_float32 xpts, + l_float32 ypts, + const char *fileout) +{ +SARRAY *sa; + + if (!dirin) + return ERROR_INT("dirin not defined", __func__, 1); + if (!fileout) + return ERROR_INT("fileout not defined", __func__, 1); + if (xpts <= 0.0) { + L_INFO("setting xpts to 612.0 ppi\n", __func__); + xpts = 612.0; + } + if (ypts <= 0.0) { + L_INFO("setting ypts to 792.0 ppi\n", __func__); + ypts = 792.0; + } + if (xpts < 100.0 || xpts > 2000.0 || ypts < 100.0 || ypts > 2000.0) + L_WARNING("xpts,ypts are typically in the range 500-800\n", __func__); + + /* Get all filtered and sorted full pathnames. */ + sa = getSortedPathnamesInDirectory(dirin, substr, 0, 0); + + /* Generate the PS file. Don't use bounding boxes. */ + l_psWriteBoundingBox(FALSE); + sarrayConvertFilesFittedToPS(sa, xpts, ypts, fileout); + l_psWriteBoundingBox(TRUE); + sarrayDestroy(&sa); + return 0; +} + + +/* + * \brief sarrayConvertFilesFittedToPS() + * + * \param[in] sarray of full path names + * \param[in] xpts desired size in printer points; use 0 for default + * \param[in] ypts desired size in printer points; use 0 for default + * \param[in] fileout output ps file + * \return 0 if OK, 1 on error + * + * <pre> + * Notes: + * (1) See convertFilesFittedToPS() + * </pre> + */ +l_ok +sarrayConvertFilesFittedToPS(SARRAY *sa, + l_float32 xpts, + l_float32 ypts, + const char *fileout) +{ +char *fname; +l_int32 ret, i, w, h, nfiles, index, format, res; + + if (!sa) + return ERROR_INT("sa not defined", __func__, 1); + if (!fileout) + return ERROR_INT("fileout not defined", __func__, 1); + if (xpts <= 0.0) { + L_INFO("setting xpts to 612.0\n", __func__); + xpts = 612.0; + } + if (ypts <= 0.0) { + L_INFO("setting ypts to 792.0\n", __func__); + ypts = 792.0; + } + if (xpts < 100.0 || xpts > 2000.0 || ypts < 100.0 || ypts > 2000.0) + L_WARNING("xpts,ypts are typically in the range 500-800\n", __func__); + + nfiles = sarrayGetCount(sa); + for (i = 0, index = 0; i < nfiles; i++) { + fname = sarrayGetString(sa, i, L_NOCOPY); + ret = pixReadHeader(fname, &format, &w, &h, NULL, NULL, NULL); + if (ret) continue; + if (format == IFF_UNKNOWN) + continue; + + /* Be sure the entire image is wrapped */ + if (xpts * h < ypts * w) + res = (l_int32)((l_float32)w * 72.0 / xpts); + else + res = (l_int32)((l_float32)h * 72.0 / ypts); + + writeImageCompressedToPSFile(fname, fileout, res, &index); + } + + return 0; +} + + +/* + * \brief writeImageCompressedToPSFile() + * + * \param[in] filein input image file + * \param[in] fileout output ps file + * \param[in] res output printer resolution + * \param[in,out] pindex index of image in output ps file + * \return 0 if OK, 1 on error + * + * <pre> + * Notes: + * (1) This wraps a single page image in PS. + * (2) The input file can be in any format. It is compressed as follows: + * * if in tiffg4 --> use ccittg4 + * * if in jpeg --> use dct + * * all others --> use flate + * (3) Before the first call, set %index = 0. %index is incremented + * if the page is successfully written. It is used to decide + * whether to write (index == 0) or append (index > 0) to the file. + * </pre> + */ +l_ok +writeImageCompressedToPSFile(const char *filein, + const char *fileout, + l_int32 res, + l_int32 *pindex) +{ +const char *op; +l_int32 format, retval; + + if (!pindex) + return ERROR_INT("&index not defined", __func__, 1); + + findFileFormat(filein, &format); + if (format == IFF_UNKNOWN) { + L_ERROR("format of %s not known\n", __func__, filein); + return 1; + } + + op = (*pindex == 0) ? "w" : "a"; + if (format == IFF_JFIF_JPEG) { + retval = convertJpegToPS(filein, fileout, op, 0, 0, + res, 1.0, *pindex + 1, TRUE); + } else if (format == IFF_TIFF_G4) { + retval = convertG4ToPS(filein, fileout, op, 0, 0, + res, 1.0, *pindex + 1, FALSE, TRUE); + } else { /* all other image formats */ + retval = convertFlateToPS(filein, fileout, op, 0, 0, + res, 1.0, *pindex + 1, TRUE); + } + if (retval == 0) (*pindex)++; + + return retval; +} + + +/*-------------------------------------------------------------* + * Convert mixed text/image files to PS * + *-------------------------------------------------------------*/ +/* + * \brief convertSegmentedPagesToPS() + * + * \param[in] pagedir input page image directory + * \param[in] pagestr [optional] substring filter on page filenames; + * can be NULL + * \param[in] page_numpre number of characters in page name before number + * \param[in] maskdir input mask image directory + * \param[in] maskstr [optional] substring filter on mask filenames; + * can be NULL + * \param[in] mask_numpre number of characters in mask name before number + * \param[in] numpost number of characters in names after number + * \param[in] maxnum only consider page numbers up to this value + * \param[in] textscale scale of text output relative to pixs + * \param[in] imagescale scale of image output relative to pixs + * \param[in] threshold for binarization; typ. about 190; 0 for default + * \param[in] fileout output ps file + * \return 0 if OK, 1 on error + * + * <pre> + * Notes: + * (1) This generates a PS file for all page image and mask files in two + * specified directories and that contain the page numbers as + * specified below. The two directories can be the same, in which + * case the page and mask files are differentiated by the two + * substrings for string matches. + * (2) The page images are taken in lexicographic order. + * Mask images whose numbers match the page images are used to + * segment the page images. Page images without a matching + * mask image are scaled, thresholded and rendered entirely as text. + * (3) Each PS page is generated as a compressed representation of + * the page image, where the part of the image under the mask + * is suitably scaled and compressed as DCT (i.e., jpeg), and + * the remaining part of the page is suitably scaled, thresholded, + * compressed as G4 (i.e., tiff g4), and rendered by painting + * black through the resulting text mask. + * (4) The scaling is typically 2x down for the DCT component + * (%imagescale = 0.5) and 2x up for the G4 component + * (%textscale = 2.0). + * (5) The resolution is automatically set to fit to a + * letter-size (8.5 x 11 inch) page. + * (6) Both the DCT and the G4 encoding are PostScript level 2. + * (7) It is assumed that the page number is contained within + * the basename (the filename without directory or extension). + * %page_numpre is the number of characters in the page basename + * preceding the actual page number; %mask_numpre is likewise for + * the mask basename; %numpost is the number of characters + * following the page number. For example, for mask name + * mask_006.tif, mask_numpre = 5 ("mask_). + * (8) To render a page as is -- that is, with no thresholding + * of any pixels -- use a mask in the mask directory that is + * full size with all pixels set to 1. If the page is 1 bpp, + * it is not necessary to have a mask. + * </pre> + */ +l_ok +convertSegmentedPagesToPS(const char *pagedir, + const char *pagestr, + l_int32 page_numpre, + const char *maskdir, + const char *maskstr, + l_int32 mask_numpre, + l_int32 numpost, + l_int32 maxnum, + l_float32 textscale, + l_float32 imagescale, + l_int32 threshold, + const char *fileout) +{ +l_int32 pageno, i, npages; +PIX *pixs, *pixm; +SARRAY *sapage, *samask; + + if (!pagedir) + return ERROR_INT("pagedir not defined", __func__, 1); + if (!maskdir) + return ERROR_INT("maskdir not defined", __func__, 1); + if (!fileout) + return ERROR_INT("fileout not defined", __func__, 1); + if (threshold <= 0) { + L_INFO("setting threshold to 190\n", __func__); + threshold = 190; + } + + /* Get numbered full pathnames; max size of sarray is maxnum */ + sapage = getNumberedPathnamesInDirectory(pagedir, pagestr, + page_numpre, numpost, maxnum); + samask = getNumberedPathnamesInDirectory(maskdir, maskstr, + mask_numpre, numpost, maxnum); + sarrayPadToSameSize(sapage, samask, ""); + if ((npages = sarrayGetCount(sapage)) == 0) { + sarrayDestroy(&sapage); + sarrayDestroy(&samask); + return ERROR_INT("no matching pages found", __func__, 1); + } + + /* Generate the PS file */ + pageno = 1; + for (i = 0; i < npages; i++) { + if ((pixs = pixReadIndexed(sapage, i)) == NULL) + continue; + pixm = pixReadIndexed(samask, i); + pixWriteSegmentedPageToPS(pixs, pixm, textscale, imagescale, + threshold, pageno, fileout); + pixDestroy(&pixs); + pixDestroy(&pixm); + pageno++; + } + + sarrayDestroy(&sapage); + sarrayDestroy(&samask); + return 0; +} + + +/* + * \brief pixWriteSegmentedPageToPS() + * + * \param[in] pixs all depths; colormap ok + * \param[in] pixm [optional] 1 bpp segmentation mask over image region + * \param[in] textscale scale of text output relative to pixs + * \param[in] imagescale scale of image output relative to pixs + * \param[in] threshold for binarization; typ. about 190; 0 for default + * \param[in] pageno page number in set; use 1 for new output file + * \param[in] fileout output ps file + * \return 0 if OK, 1 on error + * + * <pre> + * Notes: + * (1) This generates the PS string for a mixed text/image page, + * and adds it to an existing file if %pageno > 1. + * The PS output is determined by fitting the result to + * a letter-size (8.5 x 11 inch) page. + * (2) The two images (pixs and pixm) are at the same resolution + * (typically 300 ppi). They are used to generate two compressed + * images, pixb and pixc, that are put directly into the output + * PS file. + * (3) pixb is the text component. In the PostScript world, we think of + * it as a mask through which we paint black. It is produced by + * scaling pixs by %textscale, and thresholding to 1 bpp. + * (4) pixc is the image component, which is that part of pixs under + * the mask pixm. It is scaled from pixs by %imagescale. + * (5) Typical values are textscale = 2.0 and imagescale = 0.5. + * (6) If pixm == NULL, the page has only text. If it is all black, + * the page is all image and has no text. + * (7) This can be used to write a multi-page PS file, by using + * sequential page numbers with the same output file. It can + * also be used to write separate PS files for each page, + * by using different output files with %pageno = 0 or 1. + * </pre> + */ +l_ok +pixWriteSegmentedPageToPS(PIX *pixs, + PIX *pixm, + l_float32 textscale, + l_float32 imagescale, + l_int32 threshold, + l_int32 pageno, + const char *fileout) +{ +l_int32 alltext, notext, d, ret; +l_uint32 val; +l_float32 scaleratio; +PIX *pixmi, *pixmis, *pixt, *pixg, *pixsc, *pixb, *pixc; + + if (!pixs) + return ERROR_INT("pixs not defined", __func__, 1); + if (!fileout) + return ERROR_INT("fileout not defined", __func__, 1); + if (imagescale <= 0.0 || textscale <= 0.0) + return ERROR_INT("relative scales must be > 0.0", __func__, 1); + + /* Analyze the page. Determine the ratio by which the + * binary text mask is scaled relative to the image part. + * If there is no image region (alltext == TRUE), the + * text mask will be rendered directly to fit the page, + * and scaleratio = 1.0. */ + alltext = TRUE; + notext = FALSE; + scaleratio = 1.0; + if (pixm) { + pixZero(pixm, &alltext); /* pixm empty: all text */ + if (alltext) { + pixm = NULL; /* treat it as not existing here */ + } else { + pixmi = pixInvert(NULL, pixm); + pixZero(pixmi, ¬ext); /* pixm full; no text */ + pixDestroy(&pixmi); + scaleratio = textscale / imagescale; + } + } + + if (pixGetDepth(pixs) == 1) { /* render tiff g4 */ + pixb = pixClone(pixs); + pixc = NULL; + } else { + pixt = pixConvertTo8Or32(pixs, L_CLONE, 0); /* clone if possible */ + + /* Get the binary text mask. Note that pixg cannot be a + * clone of pixs, because it may be altered by pixSetMasked(). */ + pixb = NULL; + if (notext == FALSE) { + d = pixGetDepth(pixt); + if (d == 8) + pixg = pixCopy(NULL, pixt); + else /* d == 32 */ + pixg = pixConvertRGBToLuminance(pixt); + if (pixm) /* clear out the image parts */ + pixSetMasked(pixg, pixm, 255); + if (textscale == 1.0) + pixsc = pixClone(pixg); + else if (textscale >= 0.7) + pixsc = pixScaleGrayLI(pixg, textscale, textscale); + else + pixsc = pixScaleAreaMap(pixg, textscale, textscale); + pixb = pixThresholdToBinary(pixsc, threshold); + pixDestroy(&pixg); + pixDestroy(&pixsc); + } + + /* Get the scaled image region */ + pixc = NULL; + if (pixm) { + if (imagescale == 1.0) + pixsc = pixClone(pixt); /* can possibly be a clone of pixs */ + else + pixsc = pixScale(pixt, imagescale, imagescale); + + /* If pixm is not full, clear the pixels in pixsc + * corresponding to bg in pixm, where there can be text + * that is written through the mask pixb. Note that + * we could skip this and use pixsc directly in + * pixWriteMixedToPS(); however, clearing these + * non-image regions to a white background will reduce + * the size of pixc (relative to pixsc), and hence + * reduce the size of the PS file that is generated. + * Use a copy so that we don't accidentally alter pixs. */ + if (notext == FALSE) { + pixmis = pixScale(pixm, imagescale, imagescale); + pixmi = pixInvert(NULL, pixmis); + val = (d == 8) ? 0xff : 0xffffff00; + pixc = pixCopy(NULL, pixsc); + pixSetMasked(pixc, pixmi, val); /* clear non-image part */ + pixDestroy(&pixmis); + pixDestroy(&pixmi); + } else { + pixc = pixClone(pixsc); + } + pixDestroy(&pixsc); + } + pixDestroy(&pixt); + } + + /* Generate the PS file. Don't use bounding boxes. */ + l_psWriteBoundingBox(FALSE); + ret = pixWriteMixedToPS(pixb, pixc, scaleratio, pageno, fileout); + l_psWriteBoundingBox(TRUE); + pixDestroy(&pixb); + pixDestroy(&pixc); + return ret; +} + + +/* + * \brief pixWriteMixedToPS() + * + * \param[in] pixb [optional] 1 bpp mask; typically for text + * \param[in] pixc [optional] 8 or 32 bpp image regions + * \param[in] scale scale factor for rendering pixb, relative to pixc; + * typ. 4.0 + * \param[in] pageno page number in set; use 1 for new output file + * \param[in] fileout output ps file + * \return 0 if OK, 1 on error + * + * <pre> + * Notes: + * (1) This low level function generates the PS string for a mixed + * text/image page, and adds it to an existing file if + * %pageno > 1. + * (2) The two images (pixb and pixc) are typically generated at the + * resolution that they will be rendered in the PS file. + * (3) pixb is the text component. In the PostScript world, we think of + * it as a mask through which we paint black. + * (4) pixc is the (typically halftone) image component. It is + * white in the rest of the page. To minimize the size of the + * PS file, it should be rendered at a resolution that is at + * least equal to its actual resolution. + * (5) %scale gives the ratio of resolution of pixb to pixc. + * Typical resolutions are: 600 ppi for pixb, 150 ppi for pixc; + * so %scale = 4.0. If one of the images is not defined, + * the value of %scale is ignored. + * (6) We write pixc with DCT compression (jpeg). This is followed + * by painting the text as black through the mask pixb. If + * pixc doesn't exist (alltext), we write the text with the + * PS "image" operator instead of the "imagemask" operator, + * because ghostscript's ps2pdf is flaky when the latter is used. + * (7) The actual output resolution is determined by fitting the + * result to a letter-size (8.5 x 11 inch) page. + * <pre> + */ +l_ok +pixWriteMixedToPS(PIX *pixb, + PIX *pixc, + l_float32 scale, + l_int32 pageno, + const char *fileout) +{ +char *tname; +const char *op; +l_int32 resb, resc, endpage, maskop, ret; + + if (!pixb && !pixc) + return ERROR_INT("pixb and pixc both undefined", __func__, 1); + if (!fileout) + return ERROR_INT("fileout not defined", __func__, 1); + + /* Compute the resolution that fills a letter-size page. */ + if (!pixc) { + resb = getResLetterPage(pixGetWidth(pixb), pixGetHeight(pixb), 0); + } else { + resc = getResLetterPage(pixGetWidth(pixc), pixGetHeight(pixc), 0); + if (pixb) + resb = (l_int32)(scale * resc); + } + + /* Write the jpeg image first */ + if (pixc) { + tname = l_makeTempFilename(); + pixWrite(tname, pixc, IFF_JFIF_JPEG); + endpage = (pixb) ? FALSE : TRUE; + op = (pageno <= 1) ? "w" : "a"; + ret = convertJpegToPS(tname, fileout, op, 0, 0, resc, 1.0, + pageno, endpage); + lept_rmfile(tname); + LEPT_FREE(tname); + if (ret) + return ERROR_INT("jpeg data not written", __func__, 1); + } + + /* Write the binary data, either directly or, if there is + * a jpeg image on the page, through the mask. */ + if (pixb) { + tname = l_makeTempFilename(); + pixWrite(tname, pixb, IFF_TIFF_G4); + op = (pageno <= 1 && !pixc) ? "w" : "a"; + maskop = (pixc) ? 1 : 0; + ret = convertG4ToPS(tname, fileout, op, 0, 0, resb, 1.0, + pageno, maskop, 1); + lept_rmfile(tname); + LEPT_FREE(tname); + if (ret) + return ERROR_INT("tiff data not written", __func__, 1); + } + + return 0; +} + + +/*-------------------------------------------------------------* + * Convert any image file to PS for embedding * + *-------------------------------------------------------------*/ +/* + * \brief convertToPSEmbed() + * + * \param[in] filein input image file, any format + * \param[in] fileout output ps file + * \param[in] level PostScript compression: 1 (uncompressed), 2 or 3 + * \return 0 if OK, 1 on error + * + * <pre> + * Notes: + * (1) This is a wrapper function that generates a PS file with + * a bounding box, from any input image file. + * (2) Do the best job of compression given the specified level. + * %level=3 does flate compression on anything that is not + * tiffg4 (1 bpp) or jpeg (8 bpp or rgb). + * (3) If %level=2 and the file is not tiffg4 or jpeg, it will + * first be written to file as jpeg with quality = 75. + * This will remove the colormap and cause some degradation + * in the image. + * (4) The bounding box is required when a program such as TeX + * (through epsf) places and rescales the image. It is + * sized for fitting the image to an 8.5 x 11.0 inch page. + * </pre> + */ +l_ok +convertToPSEmbed(const char *filein, + const char *fileout, + l_int32 level) +{ +char *tname; +l_int32 d, format; +PIX *pix, *pixs; + + if (!filein) + return ERROR_INT("filein not defined", __func__, 1); + if (!fileout) + return ERROR_INT("fileout not defined", __func__, 1); + if (level != 1 && level != 2 && level != 3) { + L_ERROR("invalid level specified; using level 2\n", __func__); + level = 2; + } + + if (level == 1) { /* no compression */ + pixWritePSEmbed(filein, fileout); + return 0; + } + + /* Find the format and write out directly if in jpeg or tiff g4 */ + findFileFormat(filein, &format); + if (format == IFF_JFIF_JPEG) { + convertJpegToPSEmbed(filein, fileout); + return 0; + } else if (format == IFF_TIFF_G4) { + convertG4ToPSEmbed(filein, fileout); + return 0; + } else if (format == IFF_UNKNOWN) { + L_ERROR("format of %s not known\n", __func__, filein); + return 1; + } + + /* If level 3, flate encode. */ + if (level == 3) { + convertFlateToPSEmbed(filein, fileout); + return 0; + } + + /* OK, it's level 2, so we must convert to jpeg or tiff g4 */ + if ((pixs = pixRead(filein)) == NULL) + return ERROR_INT("image not read from file", __func__, 1); + d = pixGetDepth(pixs); + if ((d == 2 || d == 4) && !pixGetColormap(pixs)) + pix = pixConvertTo8(pixs, 0); + else if (d == 16) + pix = pixConvert16To8(pixs, L_MS_BYTE); + else + pix = pixRemoveColormap(pixs, REMOVE_CMAP_BASED_ON_SRC); + pixDestroy(&pixs); + if (!pix) + return ERROR_INT("converted pix not made", __func__, 1); + + d = pixGetDepth(pix); + tname = l_makeTempFilename(); + if (d == 1) { + if (pixWrite(tname, pix, IFF_TIFF_G4)) { + LEPT_FREE(tname); + pixDestroy(&pix); + return ERROR_INT("g4 tiff not written", __func__, 1); + } + convertG4ToPSEmbed(tname, fileout); + } else { + if (pixWrite(tname, pix, IFF_JFIF_JPEG)) { + LEPT_FREE(tname); + pixDestroy(&pix); + return ERROR_INT("jpeg not written", __func__, 1); + } + convertJpegToPSEmbed(tname, fileout); + } + + lept_rmfile(tname); + LEPT_FREE(tname); + pixDestroy(&pix); + return 0; +} + + +/*-------------------------------------------------------------* + * Write all images in a pixa out to PS * + *-------------------------------------------------------------*/ +/* + * \brief pixaWriteCompressedToPS() + * + * \param[in] pixa any set of images + * \param[in] fileout output ps file + * \param[in] res resolution for the set of input images + * \param[in] level PostScript compression capability: 2 or 3 + * \return 0 if OK, 1 on error + * + * <pre> + * Notes: + * (1) This generates a PostScript file of multiple page images, + * all with bounding boxes. + * (2) See pixWriteCompressedToPS() for details. + * (3) To generate a pdf from %fileout, use: + * ps2pdf <infile.ps> <outfile.pdf> + * </pre> + */ +l_ok +pixaWriteCompressedToPS(PIXA *pixa, + const char *fileout, + l_int32 res, + l_int32 level) +{ +l_int32 i, n, index, ret; +PIX *pix; + + if (!pixa) + return ERROR_INT("pixa not defined", __func__, 1); + if (!fileout) + return ERROR_INT("fileout not defined", __func__, 1); + if (level != 2 && level != 3) { + L_ERROR("only levels 2 and 3 permitted; using level 2\n", __func__); + level = 2; + } + + index = 0; + n = pixaGetCount(pixa); + for (i = 0; i < n; i++) { + pix = pixaGetPix(pixa, i, L_CLONE); + ret = pixWriteCompressedToPS(pix, fileout, res, level, &index); + if (ret) L_ERROR("PS string not written for image %d\n", __func__, i); + pixDestroy(&pix); + } + return 0; +} + + +/* + * \brief pixWriteCompressedToPS() + * + * \param[in] pix any depth; colormap OK + * \param[in] fileout output ps file + * \param[in] res of input image + * \param[in] level PostScript compression capability: 2 or 3 + * \param[in,out] pindex index of image in output ps file + * \return 0 if OK, 1 on error + * + * <pre> + * Notes: + * (1) This generates a PostScript string for %pix, and writes it + * to a file, with a bounding box. + * (2) *pindex keeps track of the number of images that have been + * written to %fileout. If this is the first image to be + * converted, set *pindex == 0 before passing it in. If the + * PostScript string is successfully generated, this will increment + * *pindex. If *pindex > 0, the PostScript string will be + * appended to %fileout. + * (3) PostScript level 2 enables lossless tiffg4 and lossy jpeg + * compression. Level 3 adds lossless flate (essentially gzip) + * compression. + * * For images with a colormap, lossless flate is often better in + * both quality and size than jpeg. + * * The decision for images without a colormap affects compression + * efficiency: %level2 (jpeg) is usually better than %level3 (flate) + * * Because jpeg does not handle 16 bpp, if %level == 2, the image + * is converted to 8 bpp (using MSB) and compressed with jpeg, + * cmap + level2: jpeg + * cmap + level3: flate + * 1 bpp: tiffg4 + * 2 or 4 bpp + level2: jpeg + * 2 or 4 bpp + level3: flate + * 8 bpp + level2: jpeg + * 8 bpp + level3: flate + * 16 bpp + level2: jpeg [converted to 8 bpp, with warning] + * 16 bpp + level3: flate + * 32 bpp + level2: jpeg + * 32 bpp + level3: flate + * </pre> + */ +l_ok +pixWriteCompressedToPS(PIX *pix, + const char *fileout, + l_int32 res, + l_int32 level, + l_int32 *pindex) +{ +char *tname; +l_int32 writeout, d; +PIX *pixt; +PIXCMAP *cmap; + + if (!pix) + return ERROR_INT("pix not defined", __func__, 1); + if (!fileout) + return ERROR_INT("fileout not defined", __func__, 1); + if (level != 2 && level != 3) { + L_ERROR("only levels 2 and 3 permitted; using level 2\n", __func__); + level = 2; + } + if (!pindex) + return ERROR_INT("&index not defined", __func__, 1); + + tname = l_makeTempFilename(); + writeout = TRUE; + d = pixGetDepth(pix); + cmap = pixGetColormap(pix); + if (d == 1) { + if (pixWrite(tname, pix, IFF_TIFF_G4)) + writeout = FALSE; + } else if (level == 3) { + if (pixWrite(tname, pix, IFF_PNG)) + writeout = FALSE; + } else { /* level == 2 */ + if (cmap) { + pixt = pixConvertForPSWrap(pix); + if (pixWrite(tname, pixt, IFF_JFIF_JPEG)) + writeout = FALSE; + pixDestroy(&pixt); + } else if (d == 16) { + L_WARNING("d = 16; converting to 8 bpp for jpeg\n", __func__); + pixt = pixConvert16To8(pix, L_MS_BYTE); + if (pixWrite(tname, pixt, IFF_JFIF_JPEG)) + writeout = FALSE; + pixDestroy(&pixt); + } else if (d == 2 || d == 4) { + pixt = pixConvertTo8(pix, 0); + if (pixWrite(tname, pixt, IFF_JFIF_JPEG)) + writeout = FALSE; + pixDestroy(&pixt); + } else if (d == 8 || d == 32) { + if (pixWrite(tname, pix, IFF_JFIF_JPEG)) + writeout = FALSE; + } else { /* shouldn't happen */ + L_ERROR("invalid depth with level 2: %d\n", __func__, d); + writeout = FALSE; + } + } + + if (writeout) + writeImageCompressedToPSFile(tname, fileout, res, pindex); + + if (lept_rmfile(tname) != 0) + L_ERROR("temp file %s was not deleted\n", __func__, tname); + LEPT_FREE(tname); + return (writeout) ? 0 : 1; +} + +/* --------------------------------------------*/ +#endif /* USE_PSIO */ +/* --------------------------------------------*/
