diff mupdf-source/thirdparty/leptonica/src/pdfio2.c @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mupdf-source/thirdparty/leptonica/src/pdfio2.c	Mon Sep 15 11:43:07 2025 +0200
@@ -0,0 +1,3058 @@
+/*====================================================================*
+ -  Copyright (C) 2001 Leptonica.  All rights reserved.
+ -
+ -  Redistribution and use in source and binary forms, with or without
+ -  modification, are permitted provided that the following conditions
+ -  are met:
+ -  1. Redistributions of source code must retain the above copyright
+ -     notice, this list of conditions and the following disclaimer.
+ -  2. Redistributions in binary form must reproduce the above
+ -     copyright notice, this list of conditions and the following
+ -     disclaimer in the documentation and/or other materials
+ -     provided with the distribution.
+ -
+ -  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ -  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ -  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ -  A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL ANY
+ -  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ -  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ -  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ -  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ -  OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ -  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ -  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *====================================================================*/
+
+/*!
+ * \file pdfio2.c
+ * <pre>
+ *
+ *    Lower-level operations for generating pdf.
+ *
+ *     Intermediate function for single page, multi-image conversion
+ *          l_int32              pixConvertToPdfData()
+ *
+ *     Intermediate function for generating multipage pdf output
+ *          l_int32              ptraConcatenatePdfToData()
+ *
+ *     Convert tiff multipage to pdf file
+ *          l_int32              convertTiffMultipageToPdf()
+ *
+ *     Generates the CID, transcoding under some conditions
+ *          l_int32              l_generateCIDataForPdf()
+ *          l_int32              l_generateCIData()
+ *
+ *       Lower-level CID generation without transcoding
+ *          L_COMP_DATA         *l_generateFlateDataPdf()
+ *          L_COMP_DATA         *l_generateJpegData()
+ *          L_COMP_DATA         *l_generateJpegDataMem()
+ *          static L_COMP_DATA  *l_generateJp2kData()
+ *          L_COMP_DATA         *l_generateG4Data()
+ *
+ *       Lower-level CID generation with transcoding
+ *          l_int32              pixGenerateCIData()
+ *          L_COMP_DATA         *l_generateFlateData()
+ *          static L_COMP_DATA  *pixGenerateFlateData()
+ *          static L_COMP_DATA  *pixGenerateJpegData()
+ *          static L_COMP_DATA  *pixGenerateJp2kData()
+ *          static L_COMP_DATA  *pixGenerateG4Data()
+ *
+ *       Other CID operations
+ *          l_int32              cidConvertToPdfData()
+ *          void                 l_CIDataDestroy()
+ *
+ *     Helper functions for generating the output pdf string
+ *          static l_int32       l_generatePdf()
+ *          static void          generateFixedStringsPdf()
+ *          static char         *generateEscapeString()
+ *          static void          generateMediaboxPdf()
+ *          static l_int32       generatePageStringPdf()
+ *          static l_int32       generateContentStringPdf()
+ *          static l_int32       generatePreXStringsPdf()
+ *          static l_int32       generateColormapStringsPdf()
+ *          static void          generateTrailerPdf()
+ *          static l_int32       makeTrailerStringPdf()
+ *          static l_int32       generateOutputDataPdf()
+ *
+ *     Helper functions for generating multipage pdf output
+ *          static l_int32       parseTrailerPdf()
+ *          static char         *generatePagesObjStringPdf()
+ *          static L_BYTEA      *substituteObjectNumbers()
+ *
+ *     Create/destroy/access pdf data
+ *          static L_PDF_DATA   *pdfdataCreate()
+ *          static void          pdfdataDestroy()
+ *          static L_COMP_DATA  *pdfdataGetCid()
+ *
+ *     Find number of pages in a pdf
+ *          l_int32              getPdfPageCount()
+ *
+ *     Find widths and heights of pages and media boxes in a pdf
+ *          l_int32              getPdfPageSizes()
+ *          l_int32              getPdfMediaBoxSizes()
+ *
+ *     Find effective resolution of images rendered from a pdf
+ *          l_int32              getPdfRendererResolution()
+ *
+ *     Set flags for special modes
+ *          void                 l_pdfSetG4ImageMask()
+ *          void                 l_pdfSetDateAndVersion()
+ *
+ * </pre>
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <config_auto.h>
+#endif  /* HAVE_CONFIG_H */
+
+#include <string.h>
+#include <math.h>
+#include "allheaders.h"
+
+/* --------------------------------------------*/
+#if  USE_PDFIO   /* defined in environ.h */
+ /* --------------------------------------------*/
+
+    /* Typical scan resolution in ppi (pixels/inch) */
+static const l_int32  DefaultInputRes = 300;
+
+    /* Static helpers */
+static L_COMP_DATA  *l_generateJp2kData(const char *fname);
+static L_COMP_DATA  *pixGenerateFlateData(PIX *pixs, l_int32 ascii85flag);
+static L_COMP_DATA  *pixGenerateJpegData(PIX *pixs, l_int32 ascii85flag,
+                                         l_int32 quality);
+static L_COMP_DATA  *pixGenerateJp2kData(PIX *pixs, l_int32 quality);
+static L_COMP_DATA  *pixGenerateG4Data(PIX *pixs, l_int32 ascii85flag);
+
+static l_int32       l_generatePdf(l_uint8 **pdata, size_t *pnbytes,
+                                   L_PDF_DATA  *lpd);
+static void          generateFixedStringsPdf(L_PDF_DATA *lpd);
+static char         *generateEscapeString(const char  *str);
+static void          generateMediaboxPdf(L_PDF_DATA *lpd);
+static l_int32       generatePageStringPdf(L_PDF_DATA *lpd);
+static l_int32       generateContentStringPdf(L_PDF_DATA *lpd);
+static l_int32       generatePreXStringsPdf(L_PDF_DATA *lpd);
+static l_int32       generateColormapStringsPdf(L_PDF_DATA *lpd);
+static void          generateTrailerPdf(L_PDF_DATA *lpd);
+static char         *makeTrailerStringPdf(L_DNA *daloc);
+static l_int32       generateOutputDataPdf(l_uint8 **pdata, size_t *pnbytes,
+                                       L_PDF_DATA *lpd);
+
+static l_int32       parseTrailerPdf(L_BYTEA *bas, L_DNA **pda);
+static char         *generatePagesObjStringPdf(NUMA *napage);
+static L_BYTEA      *substituteObjectNumbers(L_BYTEA *bas, NUMA *na_objs);
+
+static L_PDF_DATA   *pdfdataCreate(const char *title);
+static void          pdfdataDestroy(L_PDF_DATA **plpd);
+static L_COMP_DATA  *pdfdataGetCid(L_PDF_DATA *lpd, l_int32 index);
+
+
+/* ---------------- Defaults for rendering options ----------------- */
+    /* Output G4 as writing through image mask; this is the default */
+static l_int32   var_WRITE_G4_IMAGE_MASK = 1;
+    /* Write date/time and lib version into pdf; this is the default */
+static l_int32   var_WRITE_DATE_AND_VERSION = 1;
+
+#define L_SMALLBUF   256
+#define L_BIGBUF    2048   /* must be able to hold hex colormap */
+
+
+#ifndef  NO_CONSOLE_IO
+#define  DEBUG_MULTIPAGE      0
+#endif  /* ~NO_CONSOLE_IO */
+
+
+/*---------------------------------------------------------------------*
+ *       Intermediate function for generating multipage pdf output     *
+ *---------------------------------------------------------------------*/
+/*!
+ * \brief   pixConvertToPdfData()
+ *
+ * \param[in]      pix       all depths; cmap OK
+ * \param[in]      type      L_G4_ENCODE, L_JPEG_ENCODE, L_FLATE_ENCODE,
+ *                           L_JP2K_ENCODE
+ * \param[in]      quality   for jpeg: 1-100; 0 for default (75)
+ *                           for jp2k: 27-45; 0 for default (34)
+ * \param[out]     pdata     pdf array
+ * \param[out]     pnbytes   number of bytes in pdf array
+ * \param[in]      x, y      location of lower-left corner of image, in pixels,
+ *                           relative to the PostScript origin (0,0) at
+ *                           the lower-left corner of the page)
+ * \param[in]      res       override the resolution of the input image, in ppi;
+ *                           use 0 to respect resolution embedded in the input
+ * \param[in]      title     [optional] pdf title; can be null
+ * \param[in,out]  plpd      ptr to lpd; created on the first invocation and
+ *                           returned until last image is processed
+ * \param[in]      position  in image sequence: L_FIRST_IMAGE, L_NEXT_IMAGE,
+ *                           L_LAST_IMAGE
+ * \return  0 if OK, 1 on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) If %res == 0 and the input resolution field from the pix is 0,
+ *          this will use DefaultInputRes.
+ *      (2) This only writes %data if it is the last image to be
+ *          written on the page.
+ *      (3) See comments in convertToPdf().
+ * </pre>
+ */
+l_ok
+pixConvertToPdfData(PIX          *pix,
+                    l_int32       type,
+                    l_int32       quality,
+                    l_uint8     **pdata,
+                    size_t       *pnbytes,
+                    l_int32       x,
+                    l_int32       y,
+                    l_int32       res,
+                    const char   *title,
+                    L_PDF_DATA  **plpd,
+                    l_int32       position)
+{
+l_int32       pixres, w, h, ret;
+l_float32     xpt, ypt, wpt, hpt;
+L_COMP_DATA  *cid = NULL;
+L_PDF_DATA   *lpd = NULL;
+
+    if (!pdata)
+        return ERROR_INT("&data not defined", __func__, 1);
+    *pdata = NULL;
+    if (!pnbytes)
+        return ERROR_INT("&nbytes not defined", __func__, 1);
+    *pnbytes = 0;
+    if (!pix)
+        return ERROR_INT("pix not defined", __func__, 1);
+    if (type != L_JPEG_ENCODE && type != L_G4_ENCODE &&
+        type != L_FLATE_ENCODE && type != L_JP2K_ENCODE) {
+        selectDefaultPdfEncoding(pix, &type);
+    }
+    if (quality < 0 || quality > 100)
+        return ERROR_INT("invalid quality", __func__, 1);
+
+    if (plpd) {  /* part of multi-page invocation */
+        if (position == L_FIRST_IMAGE)
+            *plpd = NULL;
+    }
+
+        /* Generate the compressed image data.  It must NOT
+         * be ascii85 encoded. */
+    pixGenerateCIData(pix, type, quality, 0, &cid);
+    if (!cid)
+        return ERROR_INT("cid not made", __func__, 1);
+
+        /* Get media box in pts.  Guess the input image resolution
+         * based on the input parameter %res, the resolution data in
+         * the pix, and the size of the image. */
+    pixres = cid->res;
+    w = cid->w;
+    h = cid->h;
+    if (res <= 0.0)
+        res = (pixres > 0) ? pixres : DefaultInputRes;
+    xpt = x * 72.f / res;
+    ypt = y * 72.f / res;
+    wpt = w * 72.f / res;
+    hpt = h * 72.f / res;
+
+        /* Set up lpd */
+    if (!plpd) {  /* single image */
+        if ((lpd = pdfdataCreate(title)) == NULL)
+            return ERROR_INT("lpd not made", __func__, 1);
+    } else if (position == L_FIRST_IMAGE) {  /* first of multiple images */
+        if ((lpd = pdfdataCreate(title)) == NULL)
+            return ERROR_INT("lpd not made", __func__, 1);
+        *plpd = lpd;
+    } else {  /* not the first of multiple images */
+        lpd = *plpd;
+    }
+
+        /* Add the data to the lpd */
+    ptraAdd(lpd->cida, cid);
+    lpd->n++;
+    ptaAddPt(lpd->xy, xpt, ypt);
+    ptaAddPt(lpd->wh, wpt, hpt);
+
+        /* If a single image or the last of multiple images,
+         * generate the pdf and destroy the lpd */
+    if (!plpd || (position == L_LAST_IMAGE)) {
+        ret = l_generatePdf(pdata, pnbytes, lpd);
+        pdfdataDestroy(&lpd);
+        if (plpd) *plpd = NULL;
+        if (ret)
+            return ERROR_INT("pdf output not made", __func__, 1);
+    }
+
+    return 0;
+}
+
+
+/*---------------------------------------------------------------------*
+ *      Intermediate function for generating multipage pdf output      *
+ *---------------------------------------------------------------------*/
+/*!
+ * \brief   ptraConcatenatePdfToData()
+ *
+ * \param[in]    pa_data    ptra array of pdf strings, each for a
+ *                          single-page pdf file
+ * \param[in]    sa         [optional] string array of pathnames for
+ *                          input pdf files; can be null
+ * \param[out]   pdata      concatenated pdf data in memory
+ * \param[out]   pnbytes    number of bytes in pdf data
+ * \return  0 if OK, 1 on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) This only works with leptonica-formatted single-page pdf files.
+ *          pdf files generated by other programs will have unpredictable
+ *          (and usually bad) results.  The requirements for each pdf file:
+ *            (a) The Catalog and Info objects are the first two.
+ *            (b) Object 3 is Pages
+ *            (c) Object 4 is Page
+ *            (d) The remaining objects are Contents, XObjects, and ColorSpace
+ *      (2) We remove trailers from each page, and append the full trailer
+ *          for all pages at the end.
+ *      (3) For all but the first file, remove the ID and the first 3
+ *          objects (catalog, info, pages), so that each subsequent
+ *          file has only objects of these classes:
+ *              Page, Contents, XObject, ColorSpace (Indexed RGB).
+ *          For those objects, we substitute these refs to objects
+ *          in the local file:
+ *              Page:  Parent(object 3), Contents, XObject(typically multiple)
+ *              XObject:  [ColorSpace if indexed]
+ *          The Pages object on the first page (object 3) has a Kids array
+ *          of references to all the Page objects, with a Count equal
+ *          to the number of pages.  Each Page object refers back to
+ *          this parent.
+ * </pre>
+ */
+l_ok
+ptraConcatenatePdfToData(L_PTRA    *pa_data,
+                         SARRAY    *sa,
+                         l_uint8  **pdata,
+                         size_t    *pnbytes)
+{
+char     *fname, *str_pages, *str_trailer;
+l_uint8  *pdfdata, *data;
+l_int32   i, j, index, nobj, npages;
+l_int32  *sizes, *locs;
+size_t    size;
+L_BYTEA  *bas, *bad, *bat1, *bat2;
+L_DNA    *da_locs, *da_sizes, *da_outlocs, *da;
+L_DNAA   *daa_locs;  /* object locations on each page */
+NUMA     *na_objs, *napage;
+NUMAA    *naa_objs;  /* object mapping numbers to new values */
+
+    if (!pdata)
+        return ERROR_INT("&data not defined", __func__, 1);
+    *pdata = NULL;
+    if (!pnbytes)
+        return ERROR_INT("&nbytes not defined", __func__, 1);
+    *pnbytes = 0;
+    if (!pa_data)
+        return ERROR_INT("pa_data not defined", __func__, 1);
+
+        /* Parse the files and find the object locations.
+         * Remove file data that cannot be parsed. */
+    ptraGetActualCount(pa_data, &npages);
+    daa_locs = l_dnaaCreate(npages);
+    for (i = 0; i < npages; i++) {
+        bas = (L_BYTEA *)ptraGetPtrToItem(pa_data, i);
+        if (parseTrailerPdf(bas, &da_locs) != 0) {
+            bas = (L_BYTEA *)ptraRemove(pa_data, i, L_NO_COMPACTION);
+            l_byteaDestroy(&bas);
+            if (sa) {
+                fname = sarrayGetString(sa, i, L_NOCOPY);
+                L_ERROR("can't parse file %s; skipping\n", __func__, fname);
+            } else {
+                L_ERROR("can't parse file %d; skipping\n", __func__, i);
+            }
+        } else {
+            l_dnaaAddDna(daa_locs, da_locs, L_INSERT);
+        }
+    }
+
+        /* Recompute npages in case some of the files were not pdf */
+    ptraCompactArray(pa_data);
+    ptraGetActualCount(pa_data, &npages);
+    if (npages == 0) {
+        l_dnaaDestroy(&daa_locs);
+        return ERROR_INT("no parsable pdf files found", __func__, 1);
+    }
+
+        /* Find the mapping from initial to final object numbers */
+    naa_objs = numaaCreate(npages);  /* stores final object numbers */
+    napage = numaCreate(npages);  /* stores "Page" object numbers */
+    index = 0;
+    for (i = 0; i < npages; i++) {
+        da = l_dnaaGetDna(daa_locs, i, L_CLONE);
+        nobj = l_dnaGetCount(da);
+        if (i == 0) {
+            numaAddNumber(napage, 4);  /* object 4 on first page */
+            na_objs = numaMakeSequence(0.0, 1.0, nobj - 1);
+            index = nobj - 1;
+        } else {  /* skip the first 3 objects in each file */
+            numaAddNumber(napage, index);  /* Page object is first we add */
+            na_objs = numaMakeConstant(0.0, nobj - 1);
+            numaReplaceNumber(na_objs, 3, 3);  /* refers to parent of all */
+            for (j = 4; j < nobj - 1; j++)
+                numaSetValue(na_objs, j, index++);
+        }
+        numaaAddNuma(naa_objs, na_objs, L_INSERT);
+        l_dnaDestroy(&da);
+    }
+
+        /* Make the Pages object (#3) */
+    str_pages = generatePagesObjStringPdf(napage);
+
+        /* Build the output */
+    bad = l_byteaCreate(5000);
+    da_outlocs = l_dnaCreate(0);  /* locations of all output objects */
+    for (i = 0; i < npages; i++) {
+        bas = (L_BYTEA *)ptraGetPtrToItem(pa_data, i);
+        pdfdata = l_byteaGetData(bas, &size);
+        da_locs = l_dnaaGetDna(daa_locs, i, L_CLONE);  /* locs on this page */
+        na_objs = numaaGetNuma(naa_objs, i, L_CLONE);  /* obj # on this page */
+        nobj = l_dnaGetCount(da_locs) - 1;
+        da_sizes = l_dnaDiffAdjValues(da_locs);  /* object sizes on this page */
+        sizes = l_dnaGetIArray(da_sizes);
+        locs = l_dnaGetIArray(da_locs);
+        if (i == 0) {
+            l_byteaAppendData(bad, pdfdata, sizes[0]);
+            l_byteaAppendData(bad, pdfdata + locs[1], sizes[1]);
+            l_byteaAppendData(bad, pdfdata + locs[2], sizes[2]);
+            l_byteaAppendString(bad, str_pages);
+            for (j = 0; j < 4; j++)
+                l_dnaAddNumber(da_outlocs, locs[j]);
+        }
+        for (j = 4; j < nobj; j++) {
+            l_dnaAddNumber(da_outlocs, l_byteaGetSize(bad));
+            bat1 = l_byteaInitFromMem(pdfdata + locs[j], sizes[j]);
+            bat2 = substituteObjectNumbers(bat1, na_objs);
+            data = l_byteaGetData(bat2, &size);
+            l_byteaAppendData(bad, data, size);
+            l_byteaDestroy(&bat1);
+            l_byteaDestroy(&bat2);
+        }
+        if (i == npages - 1)  /* last one */
+            l_dnaAddNumber(da_outlocs, l_byteaGetSize(bad));
+        LEPT_FREE(sizes);
+        LEPT_FREE(locs);
+        l_dnaDestroy(&da_locs);
+        numaDestroy(&na_objs);
+        l_dnaDestroy(&da_sizes);
+    }
+
+        /* Add the trailer */
+    str_trailer = makeTrailerStringPdf(da_outlocs);
+    l_byteaAppendString(bad, str_trailer);
+
+        /* Transfer the output data */
+    *pdata = l_byteaCopyData(bad, pnbytes);
+    l_byteaDestroy(&bad);
+
+#if  DEBUG_MULTIPAGE
+    lept_stderr("******** object mapper **********");
+    numaaWriteStream(stderr, naa_objs);
+
+    lept_stderr("******** Page object numbers ***********");
+    numaWriteStderr(napage);
+
+    lept_stderr("******** Pages object ***********\n");
+    lept_stderr("%s\n", str_pages);
+#endif  /* DEBUG_MULTIPAGE */
+
+    numaDestroy(&napage);
+    numaaDestroy(&naa_objs);
+    l_dnaDestroy(&da_outlocs);
+    l_dnaaDestroy(&daa_locs);
+    LEPT_FREE(str_pages);
+    LEPT_FREE(str_trailer);
+    return 0;
+}
+
+
+/*---------------------------------------------------------------------*
+ *                  Convert tiff multipage to pdf file                 *
+ *---------------------------------------------------------------------*/
+/*!
+ * \brief   convertTiffMultipageToPdf()
+ *
+ * \param[in]    filein    (tiff)
+ * \param[in]    fileout   (pdf)
+ * \return  0 if OK, 1 on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) A multipage tiff file can also be converted to PS, using
+ *          convertTiffMultipageToPS()
+ * </pre>
+ */
+l_ok
+convertTiffMultipageToPdf(const char  *filein,
+                          const char  *fileout)
+{
+l_int32  istiff;
+PIXA    *pixa;
+FILE    *fp;
+
+    if ((fp = fopenReadStream(filein)) == NULL)
+        return ERROR_INT_1("file not found", filein, __func__, 1);
+    istiff = fileFormatIsTiff(fp);
+    fclose(fp);
+    if (!istiff)
+        return ERROR_INT_1("file not tiff format", filein, __func__, 1);
+
+    pixa = pixaReadMultipageTiff(filein);
+    pixaConvertToPdf(pixa, 0, 1.0, 0, 0, "weasel2", fileout);
+    pixaDestroy(&pixa);
+    return 0;
+}
+
+
+/*---------------------------------------------------------------------*
+ *                          CID-based operations                       *
+ *---------------------------------------------------------------------*/
+/*!
+ * \brief   l_generateCIDataForPdf()
+ *
+ * \param[in]    fname      [optional] can be null
+ * \param[in]    pix        [optional] can be null
+ * \param[in]    quality    for jpeg if transcoded: 1-100; 0 for default (75)
+ *                          for jp2k if transcoded: 27-45; 0 for default (34)
+ * \param[out]   pcid       compressed data
+ * \return  0 if OK, 1 on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) You must set either filename or pix.
+ *      (2) Given an image file and optionally a pix raster of that data,
+ *          this provides a CID that is compatible with PDF, preferably
+ *          without transcoding.
+ *      (3) The pix is included for efficiency, in case transcoding
+ *          is required and the pix is available to the caller.
+ *      (4) We don't try to open files named "stdin" or "-" for Tesseract
+ *          compatibility reasons. We may remove this restriction
+ *          in the future.
+ *      (5) Note that tiff-g4 must be transcoded to properly handle byte
+ *          order and perhaps photometry (e.g., min-is-black).  For a
+ *          multipage tiff file, data will only be extracted from the
+ *          first page, so this should not be invoked.
+ * </pre>
+ */
+l_ok
+l_generateCIDataForPdf(const char    *fname,
+                       PIX           *pix,
+                       l_int32        quality,
+                       L_COMP_DATA  **pcid)
+{
+l_int32       format, type;
+L_COMP_DATA  *cid;
+PIX          *pixt;
+
+    if (!pcid)
+        return ERROR_INT("&cid not defined", __func__, 1);
+    *pcid = cid = NULL;
+    if (!fname && !pix)
+        return ERROR_INT("neither fname nor pix are defined", __func__, 1);
+
+        /* If a compressed file is given that is not 'stdin', see if we
+         * can generate the pdf output without transcoding. */
+    if (fname && strcmp(fname, "-") != 0 && strcmp(fname, "stdin") != 0) {
+        findFileFormat(fname, &format);
+        if (format == IFF_UNKNOWN)
+            L_WARNING("file %s format is unknown\n", __func__, fname);
+        if (format == IFF_PS || format == IFF_LPDF) {
+            L_ERROR("file %s is unsupported format %d\n",
+                  __func__, fname, format);
+            return 1;
+        }
+        if (format == IFF_JFIF_JPEG) {
+            cid = l_generateJpegData(fname, 0);
+        } else if (format == IFF_JP2) {
+            cid = l_generateJp2kData(fname);
+        } else if (format == IFF_PNG) {
+            cid = l_generateFlateDataPdf(fname, pix);
+        }
+    }
+
+        /* Otherwise, use the pix to generate the pdf output */
+    if  (!cid) {
+        if (!pix)
+            pixt = pixRead(fname);
+        else
+            pixt = pixClone(pix);
+        if (!pixt)
+            return ERROR_INT("pixt not made", __func__, 1);
+        if (selectDefaultPdfEncoding(pixt, &type)) {
+            pixDestroy(&pixt);
+            return 1;
+        }
+        pixGenerateCIData(pixt, type, quality, 0, &cid);
+        pixDestroy(&pixt);
+        if (!cid)
+            return ERROR_INT("cid not made from pix", __func__, 1);
+    }
+    *pcid = cid;
+    return 0;
+}
+
+
+/*!
+ * \brief   l_generateCIData()
+ *
+ * \param[in]    fname
+ * \param[in]    type       L_G4_ENCODE, L_JPEG_ENCODE, L_FLATE_ENCODE,
+ *                          L_JP2K_ENCODE
+ * \param[in]    quality    for jpeg if transcoded: 1-100; 0 for default (75)
+ *                          for jp2k if transcoded: 27-45; 0 for default (34)
+ * \param[in]    ascii85    0 for binary; 1 for ascii85-encoded
+ * \param[out]   pcid       compressed data
+ * \return  0 if OK, 1 on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) This can be used for both PostScript and pdf.
+ *      (1) Set ascii85:
+ *           ~ 0 for binary data (PDF only)
+ *           ~ 1 for ascii85 (5 for 4) encoded binary data (PostScript only)
+ *      (2) This attempts to compress according to the requested type.
+ *          If this can't be done, it falls back to ordinary flate encoding.
+ *      (3) This differs from l_generateCIDataForPdf(), which determines
+ *          the file format and only works for pdf.
+ * </pre>
+ */
+l_ok
+l_generateCIData(const char    *fname,
+                 l_int32        type,
+                 l_int32        quality,
+                 l_int32        ascii85,
+                 L_COMP_DATA  **pcid)
+{
+l_int32       format, d, bps, spp, iscmap;
+L_COMP_DATA  *cid;
+PIX          *pix;
+
+    if (!pcid)
+        return ERROR_INT("&cid not defined", __func__, 1);
+    *pcid = NULL;
+    if (!fname)
+        return ERROR_INT("fname not defined", __func__, 1);
+    if (type != L_G4_ENCODE && type != L_JPEG_ENCODE &&
+        type != L_FLATE_ENCODE && type != L_JP2K_ENCODE)
+        return ERROR_INT("invalid conversion type", __func__, 1);
+    if (ascii85 != 0 && ascii85 != 1)
+        return ERROR_INT("invalid ascii85", __func__, 1);
+
+        /* Sanity check on requested encoding */
+    pixReadHeader(fname, &format, NULL, NULL, &bps, &spp, &iscmap);
+    d = bps * spp;
+    if (d == 24) d = 32;
+    if (iscmap && type != L_FLATE_ENCODE) {
+        L_WARNING("pixs has cmap; using flate encoding\n", __func__);
+        type = L_FLATE_ENCODE;
+    } else if (d < 8 && type == L_JPEG_ENCODE) {
+        L_WARNING("pixs has < 8 bpp; using flate encoding\n", __func__);
+        type = L_FLATE_ENCODE;
+    } else if (d < 8 && type == L_JP2K_ENCODE) {
+        L_WARNING("pixs has < 8 bpp; using flate encoding\n", __func__);
+        type = L_FLATE_ENCODE;
+    } else if (d > 1 && type == L_G4_ENCODE) {
+        L_WARNING("pixs has > 1 bpp; using flate encoding\n", __func__);
+        type = L_FLATE_ENCODE;
+    }
+
+    if (type == L_JPEG_ENCODE) {
+        if (format == IFF_JFIF_JPEG) {  /* do not transcode */
+            cid = l_generateJpegData(fname, ascii85);
+        } else {
+            if ((pix = pixRead(fname)) == NULL)
+                return ERROR_INT("pix not returned for JPEG", __func__, 1);
+            cid = pixGenerateJpegData(pix, ascii85, quality);
+            pixDestroy(&pix);
+        }
+        if (!cid)
+            return ERROR_INT("jpeg data not made", __func__, 1);
+    } else if (type == L_JP2K_ENCODE) {
+        if (format == IFF_JP2) {  /* do not transcode */
+            cid = l_generateJp2kData(fname);
+        } else {
+            if ((pix = pixRead(fname)) == NULL)
+                return ERROR_INT("pix not returned for JP2K", __func__, 1);
+            cid = pixGenerateJp2kData(pix, quality);
+            pixDestroy(&pix);
+        }
+        if (!cid)
+            return ERROR_INT("jp2k data not made", __func__, 1);
+    } else if (type == L_G4_ENCODE) {
+        if ((pix = pixRead(fname)) == NULL)
+            return ERROR_INT("pix not returned for G4", __func__, 1);
+        cid = pixGenerateG4Data(pix, ascii85);
+        pixDestroy(&pix);
+        if (!cid)
+            return ERROR_INT("g4 data not made", __func__, 1);
+    } else if (type == L_FLATE_ENCODE) {
+        if ((cid = l_generateFlateData(fname, ascii85)) == NULL)
+            return ERROR_INT("flate data not made", __func__, 1);
+    } else {
+        return ERROR_INT("invalid conversion type", __func__, 1);
+    }
+    *pcid = cid;
+
+    return 0;
+}
+
+
+/*---------------------------------------------------------------------*
+ *                     Low-level CID-based operations                  *
+ *---------------------------------------------------------------------*/
+/*!
+ * \brief   l_generateFlateDataPdf()
+ *
+ * \param[in]    fname     preferably png
+ * \param[in]    pixs      [optional] can be null
+ * \return  cid containing png data, or NULL on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) If you hand this a png file, you are going to get
+ *          png predictors embedded in the flate data. So it has
+ *          come to this. http://xkcd.com/1022/
+ *      (2) Exception: if the png is interlaced or if it is RGBA,
+ *          it will be transcoded.
+ *      (3) If transcoding is required, this will not have to read from
+ *          file if a pix is input.
+ * </pre>
+ */
+L_COMP_DATA *
+l_generateFlateDataPdf(const char  *fname,
+                       PIX         *pixs)
+{
+l_uint8      *pngcomp = NULL;  /* entire PNG compressed file */
+l_uint8      *datacomp = NULL;  /* gzipped raster data */
+l_uint8      *cmapdata = NULL;  /* uncompressed colormap */
+char         *cmapdatahex = NULL;  /* hex ascii uncompressed colormap */
+l_uint32      i, j, n;
+l_int32       format, interlaced;
+l_int32       ncolors;  /* in colormap */
+l_int32       bps;  /* bits/sample: usually 8 */
+l_int32       spp;  /* samples/pixel: 1-grayscale/cmap); 3-rgb; 4-rgba */
+l_int32       w, h, cmapflag;
+l_int32       xres, yres;
+size_t        nbytescomp = 0, nbytespng = 0;
+FILE         *fp;
+L_COMP_DATA  *cid;
+PIX          *pix;
+PIXCMAP      *cmap = NULL;
+
+    if (!fname)
+        return (L_COMP_DATA *)ERROR_PTR("fname not defined", __func__, NULL);
+
+    findFileFormat(fname, &format);
+    spp = 0;  /* init to spp != 4 if not png */
+    interlaced = 0;  /* initialize to no interlacing */
+    bps = 0;  /* initialize to a nonsense value */
+    if (format == IFF_PNG) {
+        isPngInterlaced(fname, &interlaced);
+        if (readHeaderPng(fname, NULL, NULL, &bps, &spp, NULL))
+            return (L_COMP_DATA *)ERROR_PTR("bad png input", __func__, NULL);
+    }
+
+        /* PDF is capable of inlining some types of PNG files, but not all
+           of them. We need to transcode anything with interlacing, an
+           alpha channel, or 1 bpp (which would otherwise be photo-inverted).
+
+           Note: any PNG image file with an alpha channel is converted on
+           reading to RGBA (spp == 4). This includes the (gray + alpha) format
+           with spp == 2.  Because of the conversion, readHeaderPng() gives
+           spp = 2, whereas pixGetSpp() gives spp = 4 on the converted pix. */
+    if (format != IFF_PNG ||
+       (format == IFF_PNG && (interlaced || bps == 1 || spp == 4 || spp == 2)))
+    {  /* lgtm+ analyzer needed the logic expanded */
+        if (!pixs)
+            pix = pixRead(fname);
+        else
+            pix = pixClone(pixs);
+        if (!pix)
+            return (L_COMP_DATA *)ERROR_PTR("pix not made", __func__, NULL);
+        cid = pixGenerateFlateData(pix, 0);
+        pixDestroy(&pix);
+        return cid;
+    }
+
+        /* It's png.  Generate the pdf data without transcoding.
+         * Implementation by Jeff Breidenbach.
+         * First, read the metadata */
+    if ((fp = fopenReadStream(fname)) == NULL)
+        return (L_COMP_DATA *)ERROR_PTR_1("stream not opened",
+                                          fname, __func__, NULL);
+    freadHeaderPng(fp, &w, &h, &bps, &spp, &cmapflag);
+    fgetPngResolution(fp, &xres, &yres);
+    fclose(fp);
+
+        /* We get pdf corruption when inlining the data from 16 bpp png. */
+    if (bps == 16)
+        return l_generateFlateData(fname, 0);
+
+        /* Read the entire png file */
+    if ((pngcomp = l_binaryRead(fname, &nbytespng)) == NULL)
+        return (L_COMP_DATA *)ERROR_PTR_1("unable to read file",
+                                          fname, __func__, NULL);
+
+        /* Extract flate data, copying portions of it to memory, including
+         * the predictor information in a byte at the beginning of each
+         * raster line.  The flate data makes up the vast majority of
+         * the png file, so after extraction we expect datacomp to
+         * be nearly full (i.e., nbytescomp will be only slightly less
+         * than nbytespng).  Also extract the colormap if present. */
+    if ((datacomp = (l_uint8 *)LEPT_CALLOC(1, nbytespng)) == NULL) {
+        LEPT_FREE(pngcomp);
+        return (L_COMP_DATA *)ERROR_PTR("unable to allocate memory",
+                                        __func__, NULL);
+    }
+
+        /* Parse the png file.  Each chunk consists of:
+         *    length: 4 bytes
+         *    name:   4 bytes (e.g., "IDAT")
+         *    data:   n bytes
+         *    CRC:    4 bytes
+         * Start at the beginning of the data section of the first chunk,
+         * byte 16, because the png file begins with 8 bytes of header,
+         * followed by the first 8 bytes of the first chunk
+         * (length and name).  On each loop, increment by 12 bytes to
+         * skip over the CRC, length and name of the next chunk. */
+    for (i = 16; i < nbytespng; i += 12) {  /* do each successive chunk */
+            /* Get the chunk length */
+        n  = pngcomp[i - 8] << 24;
+        n += pngcomp[i - 7] << 16;
+        n += pngcomp[i - 6] << 8;
+        n += pngcomp[i - 5] << 0;
+        if (n >= nbytespng - i) {  /* "n + i" can overflow */
+            LEPT_FREE(pngcomp);
+            LEPT_FREE(datacomp);
+            pixcmapDestroy(&cmap);
+            L_ERROR("invalid png: i = %d, n = %d, nbytes = %zu\n", __func__,
+                    i, n, nbytespng);
+            return NULL;
+        }
+
+            /* Is it a data chunk? */
+        if (memcmp(pngcomp + i - 4, "IDAT", 4) == 0) {
+            memcpy(datacomp + nbytescomp, pngcomp + i, n);
+            nbytescomp += n;
+        }
+
+            /* Is it a palette chunk? */
+        if (cmapflag && !cmap &&
+            memcmp(pngcomp + i - 4, "PLTE", 4) == 0) {
+            if ((n / 3) > (1 << bps)) {
+                LEPT_FREE(pngcomp);
+                LEPT_FREE(datacomp);
+                pixcmapDestroy(&cmap);
+                L_ERROR("invalid png: i = %d, n = %d, cmapsize = %d\n",
+                        __func__, i, n, (1 << bps));
+                return NULL;
+            }
+            cmap = pixcmapCreate(bps);
+            for (j = i; j < i + n; j += 3) {
+                pixcmapAddColor(cmap, pngcomp[j], pngcomp[j + 1],
+                                pngcomp[j + 2]);
+            }
+        }
+        i += n;  /* move to the end of the data chunk */
+    }
+    LEPT_FREE(pngcomp);
+
+    if (nbytescomp == 0) {
+        LEPT_FREE(datacomp);
+        pixcmapDestroy(&cmap);
+        return (L_COMP_DATA *)ERROR_PTR("invalid PNG file", __func__, NULL);
+    }
+
+        /* Extract and encode the colormap data as hexascii  */
+    ncolors = 0;
+    if (cmap) {
+        pixcmapSerializeToMemory(cmap, 3, &ncolors, &cmapdata);
+        pixcmapDestroy(&cmap);
+        if (!cmapdata) {
+            LEPT_FREE(datacomp);
+            return (L_COMP_DATA *)ERROR_PTR("cmapdata not made",
+                                            __func__, NULL);
+        }
+        cmapdatahex = pixcmapConvertToHex(cmapdata, ncolors);
+        LEPT_FREE(cmapdata);
+    }
+
+        /* Note that this is the only situation where the predictor
+         * field of the CID is set to 1.  Adobe's predictor values on
+         * p. 76 of pdf_reference_1-7.pdf give 1 for no predictor and
+         * 10-14 for inline predictors, the specifics of which are
+         * ignored by the pdf interpreter, which just needs to know that
+         * the first byte on each compressed scanline is some predictor
+         * whose type can be inferred from the byte itself.  */
+    cid = (L_COMP_DATA *)LEPT_CALLOC(1, sizeof(L_COMP_DATA));
+    cid->datacomp = datacomp;
+    cid->type = L_FLATE_ENCODE;
+    cid->cmapdatahex = cmapdatahex;
+    cid->nbytescomp = nbytescomp;
+    cid->ncolors = ncolors;
+    cid->predictor = TRUE;
+    cid->w = w;
+    cid->h = h;
+    cid->bps = bps;
+    cid->spp = spp;
+    cid->res = xres;
+    return cid;
+}
+
+
+/*!
+ * \brief   l_generateJpegData()
+ *
+ * \param[in]    fname           of jpeg file
+ * \param[in]    ascii85flag     0 for jpeg; 1 for ascii85-encoded jpeg
+ * \return  cid containing jpeg data, or NULL on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) Set ascii85flag:
+ *           ~ 0 for binary data (PDF only)
+ *           ~ 1 for ascii85 (5 for 4) encoded binary data (PostScript only)
+ *      (2) Most of this function is repeated in l_generateJpegMemData(),
+ *          which is required in pixacompFastConvertToPdfData().
+ * </pre>
+ */
+L_COMP_DATA *
+l_generateJpegData(const char  *fname,
+                   l_int32      ascii85flag)
+{
+char         *data85 = NULL;  /* ascii85 encoded jpeg compressed file */
+l_uint8      *data = NULL;
+l_int32       w, h, xres, yres, bps, spp;
+size_t        nbytes, nbytes85;
+L_COMP_DATA  *cid;
+FILE         *fp;
+
+    if (!fname)
+        return (L_COMP_DATA *)ERROR_PTR("fname not defined", __func__, NULL);
+
+    if (ascii85flag != 0 && ascii85flag != 1)
+        return (L_COMP_DATA *)ERROR_PTR("wrong ascii85flags", __func__, NULL);
+
+        /* Read the metadata */
+    if (readHeaderJpeg(fname, &w, &h, &spp, NULL, NULL))
+        return (L_COMP_DATA *)ERROR_PTR("bad jpeg metadata", __func__, NULL);
+    bps = 8;
+    if ((fp = fopenReadStream(fname)) == NULL)
+        return (L_COMP_DATA *)ERROR_PTR_1("stream not opened",
+                                          fname, __func__, NULL);
+    fgetJpegResolution(fp, &xres, &yres);
+    fclose(fp);
+
+        /* Read the entire jpeg file.  The returned jpeg data in memory
+         * starts with ffd8 and ends with ffd9 */
+    if ((data = l_binaryRead(fname, &nbytes)) == NULL)
+        return (L_COMP_DATA *)ERROR_PTR_1("data not extracted",
+                                          fname, __func__, NULL);
+
+        /* Optionally, encode the compressed data */
+    if (ascii85flag == 1) {
+        data85 = encodeAscii85(data, nbytes, &nbytes85);
+        LEPT_FREE(data);
+        if (!data85)
+            return (L_COMP_DATA *)ERROR_PTR_1("data85 not made",
+                                              fname, __func__, NULL);
+        else
+            data85[nbytes85 - 1] = '\0';  /* remove the newline */
+    }
+
+    cid = (L_COMP_DATA *)LEPT_CALLOC(1, sizeof(L_COMP_DATA));
+    if (ascii85flag == 0) {
+        cid->datacomp = data;
+    } else {  /* ascii85 */
+        cid->data85 = data85;
+        cid->nbytes85 = nbytes85;
+    }
+    cid->type = L_JPEG_ENCODE;
+    cid->nbytescomp = nbytes;
+    cid->w = w;
+    cid->h = h;
+    cid->bps = bps;
+    cid->spp = spp;
+    cid->res = xres;
+    return cid;
+}
+
+
+/*!
+ * \brief   l_generateJpegDataMem()
+ *
+ * \param[in]    data           of jpeg-encoded file
+ * \param[in]    nbytes         size of jpeg-encoded file
+ * \param[in]    ascii85flag    0 for jpeg; 1 for ascii85-encoded jpeg
+ * \return  cid containing jpeg data, or NULL on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) Set ascii85flag:
+ *           ~ 0 for binary data (PDF only)
+ *           ~ 1 for ascii85 (5 for 4) encoded binary data (PostScript only)
+ * </pre>
+ */
+L_COMP_DATA *
+l_generateJpegDataMem(l_uint8  *data,
+                      size_t    nbytes,
+                      l_int32   ascii85flag)
+{
+char         *data85 = NULL;  /* ascii85 encoded jpeg compressed file */
+l_int32       w, h, xres, yres, bps, spp;
+size_t        nbytes85;
+L_COMP_DATA  *cid;
+
+    if (!data)
+        return (L_COMP_DATA *)ERROR_PTR("data not defined", __func__, NULL);
+
+        /* Read the metadata */
+    if (readHeaderMemJpeg(data, nbytes, &w, &h, &spp, NULL, NULL)) {
+        LEPT_FREE(data);
+        return (L_COMP_DATA *)ERROR_PTR("bad jpeg metadata", __func__, NULL);
+    }
+    bps = 8;
+    readResolutionMemJpeg(data, nbytes, &xres, &yres);
+
+        /* Optionally, encode the compressed data */
+    if (ascii85flag == 1) {
+        data85 = encodeAscii85(data, nbytes, &nbytes85);
+        LEPT_FREE(data);
+        if (!data85)
+            return (L_COMP_DATA *)ERROR_PTR("data85 not made", __func__, NULL);
+        else
+            data85[nbytes85 - 1] = '\0';  /* remove the newline */
+    }
+
+    cid = (L_COMP_DATA *)LEPT_CALLOC(1, sizeof(L_COMP_DATA));
+    if (ascii85flag == 0) {
+        cid->datacomp = data;
+    } else {  /* ascii85 */
+        cid->data85 = data85;
+        cid->nbytes85 = nbytes85;
+    }
+    cid->type = L_JPEG_ENCODE;
+    cid->nbytescomp = nbytes;
+    cid->w = w;
+    cid->h = h;
+    cid->bps = bps;
+    cid->spp = spp;
+    cid->res = xres;
+    return cid;
+}
+
+
+/*!
+ * \brief   l_generateJp2kData()
+ *
+ * \param[in]    fname     of jp2k file
+ * \return  cid containing jp2k data, or NULL on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) This is only called after the file is verified to be jp2k.
+ * </pre>
+ */
+static L_COMP_DATA *
+l_generateJp2kData(const char  *fname)
+{
+l_int32       w, h, bps, spp, xres, yres;
+size_t        nbytes;
+L_COMP_DATA  *cid;
+FILE         *fp;
+
+    if (!fname)
+        return (L_COMP_DATA *)ERROR_PTR("fname not defined", __func__, NULL);
+
+    if (readHeaderJp2k(fname, &w, &h, &bps, &spp, NULL))
+        return (L_COMP_DATA *)ERROR_PTR("bad jp2k metadata", __func__, NULL);
+
+        /* The returned jp2k data in memory is the entire jp2k file */
+    cid = (L_COMP_DATA *)LEPT_CALLOC(1, sizeof(L_COMP_DATA));
+    if ((cid->datacomp = l_binaryRead(fname, &nbytes)) == NULL) {
+        l_CIDataDestroy(&cid);
+        return (L_COMP_DATA *)ERROR_PTR("data not extracted", __func__, NULL);
+    }
+
+    xres = yres = 0;
+    if ((fp = fopenReadStream(fname)) != NULL) {
+        fgetJp2kResolution(fp, &xres, &yres);
+        fclose(fp);
+    }
+    cid->type = L_JP2K_ENCODE;
+    cid->nbytescomp = nbytes;
+    cid->w = w;
+    cid->h = h;
+    cid->bps = bps;
+    cid->spp = spp;
+    cid->res = xres;
+    return cid;
+}
+
+
+/*!
+ * \brief   l_generateG4Data()
+ *
+ * \param[in]    fname          of g4 compressed file
+ * \param[in]    ascii85flag    0 for g4 compressed; 1 for ascii85-encoded g4
+ * \return  cid g4 compressed image data, or NULL on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) Set ascii85flag:
+ *           ~ 0 for binary data (PDF only)
+ *           ~ 1 for ascii85 (5 for 4) encoded binary data (PostScript only)
+ *      (2) This does not work for multipage tiff files.
+ * </pre>
+ */
+L_COMP_DATA *
+l_generateG4Data(const char  *fname,
+                 l_int32      ascii85flag)
+{
+l_uint8      *datacomp = NULL;  /* g4 compressed raster data */
+char         *data85 = NULL;  /* ascii85 encoded g4 compressed data */
+l_int32       w, h, xres, yres, npages;
+l_int32       minisblack;  /* TRUE or FALSE */
+size_t        nbytes85, nbytescomp;
+L_COMP_DATA  *cid;
+FILE         *fp;
+
+    if (!fname)
+        return (L_COMP_DATA *)ERROR_PTR("fname not defined", __func__, NULL);
+
+        /* Make sure this is a single page tiff file */
+    if ((fp = fopenReadStream(fname)) == NULL)
+        return (L_COMP_DATA *)ERROR_PTR_1("stream not opened",
+                                          fname, __func__, NULL);
+    tiffGetCount(fp, &npages);
+    fclose(fp);
+    if (npages != 1) {
+        L_ERROR(" %d page tiff; only works with 1 page (file: %s)\n", __func__, npages, fname);
+        return NULL;
+    }
+
+        /* Read the resolution */
+    if ((fp = fopenReadStream(fname)) == NULL)
+        return (L_COMP_DATA *)ERROR_PTR_1("stream not opened",
+                                          fname, __func__, NULL);
+    getTiffResolution(fp, &xres, &yres);
+    fclose(fp);
+
+        /* The returned ccitt g4 data in memory is the block of
+         * bytes in the tiff file, starting after 8 bytes and
+         * ending before the directory. */
+    if (extractG4DataFromFile(fname, &datacomp, &nbytescomp,
+                              &w, &h, &minisblack)) {
+        return (L_COMP_DATA *)ERROR_PTR_1("datacomp not extracted",
+                                          fname, __func__, NULL);
+    }
+
+        /* Optionally, encode the compressed data */
+    if (ascii85flag == 1) {
+        data85 = encodeAscii85(datacomp, nbytescomp, &nbytes85);
+        LEPT_FREE(datacomp);
+        if (!data85)
+            return (L_COMP_DATA *)ERROR_PTR_1("data85 not made",
+                                              fname, __func__, NULL);
+        else
+            data85[nbytes85 - 1] = '\0';  /* remove the newline */
+    }
+
+    cid = (L_COMP_DATA *)LEPT_CALLOC(1, sizeof(L_COMP_DATA));
+    if (ascii85flag == 0) {
+        cid->datacomp = datacomp;
+    } else {  /* ascii85 */
+        cid->data85 = data85;
+        cid->nbytes85 = nbytes85;
+    }
+    cid->type = L_G4_ENCODE;
+    cid->nbytescomp = nbytescomp;
+    cid->w = w;
+    cid->h = h;
+    cid->bps = 1;
+    cid->spp = 1;
+    cid->minisblack = minisblack;
+    cid->res = xres;
+    return cid;
+}
+
+
+/*!
+ * \brief   pixGenerateCIData()
+ *
+ * \param[in]    pixs       8 or 32 bpp, no colormap
+ * \param[in]    type       L_G4_ENCODE, L_JPEG_ENCODE, L_FLATE_ENCODE or
+ *                          L_JP2K_ENCODE
+ * \param[in]    quality    for jpeg if transcoded: 1-100; 0 for default (75)
+ *                          for jp2k if transcoded: 27-45; 0 for default (34)
+ * \param[in]    ascii85    0 for binary; 1 for ascii85-encoded
+ * \param[out]   pcid       compressed data
+ * \return  0 if OK, 1 on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) Set ascii85:
+ *           ~ 0 for binary data (PDF only)
+ *           ~ 1 for ascii85 (5 for 4) encoded binary data (PostScript only)
+ *      (2) Do not accept images with an asperity ratio greater than 10.
+ * </pre>
+ */
+l_ok
+pixGenerateCIData(PIX           *pixs,
+                  l_int32        type,
+                  l_int32        quality,
+                  l_int32        ascii85,
+                  L_COMP_DATA  **pcid)
+{
+l_int32   w, h, d, maxAsp;
+PIXCMAP  *cmap;
+
+    if (!pcid)
+        return ERROR_INT("&cid not defined", __func__, 1);
+    *pcid = NULL;
+    if (!pixs)
+        return ERROR_INT("pixs not defined", __func__, 1);
+    if (type != L_G4_ENCODE && type != L_JPEG_ENCODE &&
+        type != L_FLATE_ENCODE && type != L_JP2K_ENCODE) {
+        selectDefaultPdfEncoding(pixs, &type);
+    }
+    if (ascii85 != 0 && ascii85 != 1)
+        return ERROR_INT("invalid ascii85", __func__, 1);
+    pixGetDimensions(pixs, &w, &h, NULL);
+    if (w == 0 || h == 0)
+        return ERROR_INT("invalid w or h", __func__, 1);
+    maxAsp = L_MAX(w / h, h / w);
+    if (maxAsp > 10)
+        return ERROR_INT("max asperity > 10", __func__, 1);
+
+        /* Conditionally modify the encoding type if libz is
+         * available and the requested library is missing. */
+#if defined(HAVE_LIBZ)
+# if !defined(HAVE_LIBJPEG)
+    if (type == L_JPEG_ENCODE) {
+        L_WARNING("no libjpeg; using flate encoding\n", __func__);
+        type = L_FLATE_ENCODE;
+    }
+# endif /* !defined(HAVE_LIBJPEG) */
+# if !defined(HAVE_LIBJP2K)
+    if (type == L_JP2K_ENCODE) {
+        L_WARNING("no libjp2k; using flate encoding\n", __func__);
+        type = L_FLATE_ENCODE;
+    }
+# endif /* !defined(HAVE_LIBJP2K) */
+# if !defined(HAVE_LIBTIFF)
+    if (type == L_G4_ENCODE) {
+        L_WARNING("no libtiff; using flate encoding\n", __func__);
+        type = L_FLATE_ENCODE;
+    }
+# endif /* !defined(HAVE_LIBTIFF) */
+#endif /* defined(HAVE_LIBZ) */
+
+        /* Sanity check on requested encoding */
+    d = pixGetDepth(pixs);
+    cmap = pixGetColormap(pixs);
+    if (cmap && type != L_FLATE_ENCODE) {
+        L_WARNING("pixs has cmap; using flate encoding\n", __func__);
+        type = L_FLATE_ENCODE;
+    } else if (d < 8 && (type == L_JPEG_ENCODE || type == L_JP2K_ENCODE)) {
+        L_WARNING("pixs has < 8 bpp; using flate encoding\n", __func__);
+        type = L_FLATE_ENCODE;
+    } else if (d > 1 && type == L_G4_ENCODE) {
+        L_WARNING("pixs has > 1 bpp; using flate encoding\n", __func__);
+        type = L_FLATE_ENCODE;
+    }
+
+    if (type == L_JPEG_ENCODE) {
+        if ((*pcid = pixGenerateJpegData(pixs, ascii85, quality)) == NULL)
+            return ERROR_INT("jpeg data not made", __func__, 1);
+    } else if (type == L_JP2K_ENCODE) {
+        if ((*pcid = pixGenerateJp2kData(pixs, quality)) == NULL)
+            return ERROR_INT("jp2k data not made", __func__, 1);
+    } else if (type == L_G4_ENCODE) {
+        if ((*pcid = pixGenerateG4Data(pixs, ascii85)) == NULL)
+            return ERROR_INT("g4 data not made", __func__, 1);
+    } else {  /* type == L_FLATE_ENCODE */
+        if ((*pcid = pixGenerateFlateData(pixs, ascii85)) == NULL)
+            return ERROR_INT("flate data not made", __func__, 1);
+    }
+    return 0;
+}
+
+
+/*!
+ * \brief   l_generateFlateData()
+ *
+ * \param[in]    fname
+ * \param[in]    ascii85flag    0 for gzipped; 1 for ascii85-encoded gzipped
+ * \return  cid flate compressed image data, or NULL on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) The input image is converted to one of these 4 types:
+ *           ~ 1 bpp
+ *           ~ 8 bpp, no colormap
+ *           ~ 8 bpp, colormap
+ *           ~ 32 bpp rgb
+ *      (2) Set ascii85flag:
+ *           ~ 0 for binary data (PDF only)
+ *           ~ 1 for ascii85 (5 for 4) encoded binary data (PostScript only)
+ *      (3) Always transcodes (i.e., first decodes the png file)
+ * </pre>
+ */
+L_COMP_DATA *
+l_generateFlateData(const char  *fname,
+                    l_int32      ascii85flag)
+{
+L_COMP_DATA  *cid;
+PIX          *pixs;
+
+    if (!fname)
+        return (L_COMP_DATA *)ERROR_PTR("fname not defined", __func__, NULL);
+
+    if ((pixs = pixRead(fname)) == NULL)
+        return (L_COMP_DATA *)ERROR_PTR("pixs not made", __func__, NULL);
+    cid = pixGenerateFlateData(pixs, ascii85flag);
+    pixDestroy(&pixs);
+    return cid;
+}
+
+
+/*!
+ * \brief   pixGenerateFlateData()
+ *
+ * \param[in]    pixs
+ * \param[in]    ascii85flag 0    for gzipped; 1 for ascii85-encoded gzipped
+ * \return  cid flate compressed image data, or NULL on error
+ *
+ * <pre>
+ * Notes:
+ *     (1) If called with an RGBA pix (spp == 4), the alpha channel
+ *         will be removed, projecting a white backgrouond through
+ *         any transparency.
+ *     (2) If called with a colormapped pix, any transparency in the
+ *         alpha component in the colormap will be ignored, as it is
+ *         for all leptonica operations on colormapped pix.
+ * </pre>
+ */
+static L_COMP_DATA *
+pixGenerateFlateData(PIX     *pixs,
+                     l_int32  ascii85flag)
+{
+l_uint8      *data = NULL;  /* uncompressed raster data in required format */
+l_uint8      *datacomp = NULL;  /* gzipped raster data */
+char         *data85 = NULL;  /* ascii85 encoded gzipped raster data */
+l_uint8      *cmapdata = NULL;  /* uncompressed colormap */
+char         *cmapdata85 = NULL;  /* ascii85 encoded uncompressed colormap */
+char         *cmapdatahex = NULL;  /* hex ascii uncompressed colormap */
+l_int32       ncolors;  /* in colormap; not used if cmapdata85 is null */
+l_int32       bps;  /* bits/sample: usually 8 */
+l_int32       spp;  /* samples/pixel: 1-grayscale/cmap); 3-rgb */
+l_int32       w, h, d, cmapflag;
+size_t        ncmapbytes85 = 0;
+size_t        nbytes85 = 0;
+size_t        nbytes, nbytescomp;
+L_COMP_DATA  *cid;
+PIX          *pixt;
+PIXCMAP      *cmap;
+
+    if (!pixs)
+        return (L_COMP_DATA *)ERROR_PTR("pixs not defined", __func__, NULL);
+
+        /* Convert the image to one of these 4 types:
+         *     1 bpp
+         *     8 bpp, no colormap
+         *     8 bpp, colormap
+         *     32 bpp rgb    */
+    pixGetDimensions(pixs, &w, &h, &d);
+    cmap = pixGetColormap(pixs);
+    cmapflag = (cmap) ? 1 : 0;
+    if (d == 2 || d == 4 || d == 16) {
+        pixt = pixConvertTo8(pixs, cmapflag);
+        cmap = pixGetColormap(pixt);
+        d = pixGetDepth(pixt);
+    } else if (d == 32 && pixGetSpp(pixs) == 4) {  /* remove alpha */
+        pixt = pixAlphaBlendUniform(pixs, 0xffffff00);
+    } else {
+        pixt = pixClone(pixs);
+    }
+    if (!pixt)
+        return (L_COMP_DATA *)ERROR_PTR("pixt not made", __func__, NULL);
+    spp = (d == 32) ? 3 : 1;
+    bps = (d == 32) ? 8 : d;
+
+        /* Extract and encode the colormap data as both ascii85 and hexascii  */
+    ncolors = 0;
+    if (cmap) {
+        pixcmapSerializeToMemory(cmap, 3, &ncolors, &cmapdata);
+        if (!cmapdata) {
+            pixDestroy(&pixt);
+            return (L_COMP_DATA *)ERROR_PTR("cmapdata not made",
+                                            __func__, NULL);
+        }
+
+        cmapdata85 = encodeAscii85(cmapdata, 3 * ncolors, &ncmapbytes85);
+        cmapdatahex = pixcmapConvertToHex(cmapdata, ncolors);
+        LEPT_FREE(cmapdata);
+    }
+
+        /* Extract and compress the raster data */
+    pixGetRasterData(pixt, &data, &nbytes);
+    pixDestroy(&pixt);
+    if (!data) {
+        LEPT_FREE(cmapdata85);
+        LEPT_FREE(cmapdatahex);
+        return (L_COMP_DATA *)ERROR_PTR("data not returned", __func__, NULL);
+    }
+    datacomp = zlibCompress(data, nbytes, &nbytescomp);
+    LEPT_FREE(data);
+    if (!datacomp) {
+        LEPT_FREE(cmapdata85);
+        LEPT_FREE(cmapdatahex);
+        return (L_COMP_DATA *)ERROR_PTR("datacomp not made", __func__, NULL);
+    }
+
+        /* Optionally, encode the compressed data */
+    if (ascii85flag == 1) {
+        data85 = encodeAscii85(datacomp, nbytescomp, &nbytes85);
+        LEPT_FREE(datacomp);
+        if (!data85) {
+            LEPT_FREE(cmapdata85);
+            LEPT_FREE(cmapdatahex);
+            return (L_COMP_DATA *)ERROR_PTR("data85 not made", __func__, NULL);
+        } else {
+            data85[nbytes85 - 1] = '\0';  /* remove the newline */
+        }
+    }
+
+    cid = (L_COMP_DATA *)LEPT_CALLOC(1, sizeof(L_COMP_DATA));
+    if (ascii85flag == 0) {
+        cid->datacomp = datacomp;
+    } else {  /* ascii85 */
+        cid->data85 = data85;
+        cid->nbytes85 = nbytes85;
+    }
+    cid->type = L_FLATE_ENCODE;
+    cid->cmapdatahex = cmapdatahex;
+    cid->cmapdata85 = cmapdata85;
+    cid->nbytescomp = nbytescomp;
+    cid->ncolors = ncolors;
+    cid->w = w;
+    cid->h = h;
+    cid->bps = bps;
+    cid->spp = spp;
+    cid->res = pixGetXRes(pixs);
+    cid->nbytes = nbytes;  /* only for debugging */
+    return cid;
+}
+
+
+/*!
+ * \brief   pixGenerateJpegData()
+ *
+ * \param[in]    pixs           8, 16 or 32 bpp, no colormap
+ * \param[in]    ascii85flag    0 for jpeg; 1 for ascii85-encoded jpeg
+ * \param[in]    quality        0 for default, which is 75
+ * \return  cid jpeg compressed data, or NULL on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) Set ascii85flag:
+ *           ~ 0 for binary data (PDF only)
+ *           ~ 1 for ascii85 (5 for 4) encoded binary data (PostScript only)
+ *      (2) If 16 bpp, convert first to 8 bpp, using the MSB
+ * </pre>
+ */
+static L_COMP_DATA *
+pixGenerateJpegData(PIX     *pixs,
+                    l_int32  ascii85flag,
+                    l_int32  quality)
+{
+l_int32       d;
+char         *fname;
+L_COMP_DATA  *cid;
+
+    if (!pixs)
+        return (L_COMP_DATA *)ERROR_PTR("pixs not defined", __func__, NULL);
+    if (pixGetColormap(pixs))
+        return (L_COMP_DATA *)ERROR_PTR("pixs has colormap", __func__, NULL);
+    d = pixGetDepth(pixs);
+    if (d != 8 && d != 16 && d != 32)
+        return (L_COMP_DATA *)ERROR_PTR("pixs not 8, 16 or 32 bpp",
+                __func__, NULL);
+
+        /* Compress to a temp jpeg file */
+    fname = l_makeTempFilename();
+    if (pixWriteJpeg(fname, pixs, quality, 0)) {
+        LEPT_FREE(fname);
+        return NULL;
+    }
+
+        /* Generate the data */
+    cid = l_generateJpegData(fname, ascii85flag);
+    if (lept_rmfile(fname) != 0)
+        L_ERROR("temp file %s was not deleted\n", __func__, fname);
+    LEPT_FREE(fname);
+    return cid;
+}
+
+
+/*!
+ * \brief   pixGenerateJp2kData()
+ *
+ * \param[in]    pixs           8 or 32 bpp, no colormap
+ * \param[in]    quality        0 for default, which is 34
+ * \return  cid jp2k compressed data, or NULL on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) The quality can be set between 27 (very poor) and 45
+ *          (nearly perfect).  Use 0 for default (34). Use 100 for lossless,
+ *          but this is very expensive and not recommended.
+ * </pre>
+ */
+static L_COMP_DATA *
+pixGenerateJp2kData(PIX     *pixs,
+                    l_int32  quality)
+{
+l_int32       d;
+char         *fname;
+L_COMP_DATA  *cid;
+
+    if (!pixs)
+        return (L_COMP_DATA *)ERROR_PTR("pixs not defined", __func__, NULL);
+    if (pixGetColormap(pixs))
+        return (L_COMP_DATA *)ERROR_PTR("pixs has colormap", __func__, NULL);
+    d = pixGetDepth(pixs);
+    if (d != 8 && d != 32)
+        return (L_COMP_DATA *)ERROR_PTR("pixs not 8 or 32 bpp", __func__, NULL);
+
+        /* Compress to a temp jp2k file */
+    fname = l_makeTempFilename();
+    if (pixWriteJp2k(fname, pixs, quality, 5, 0, 0)) {
+        LEPT_FREE(fname);
+        return NULL;
+    }
+
+        /* Generate the data */
+    cid = l_generateJp2kData(fname);
+    if (lept_rmfile(fname) != 0)
+        L_ERROR("temp file %s was not deleted\n", __func__, fname);
+    LEPT_FREE(fname);
+    return cid;
+}
+
+
+/*!
+ * \brief   pixGenerateG4Data()
+ *
+ * \param[in]    pixs           1 bpp, no colormap
+ * \param[in]    ascii85flag    0 for gzipped; 1 for ascii85-encoded gzipped
+ * \return  cid g4 compressed image data, or NULL on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) Set ascii85flag:
+ *           ~ 0 for binary data (PDF only)
+ *           ~ 1 for ascii85 (5 for 4) encoded binary data (PostScript only)
+ * </pre>
+ */
+static L_COMP_DATA *
+pixGenerateG4Data(PIX     *pixs,
+                  l_int32  ascii85flag)
+{
+char         *fname;
+L_COMP_DATA  *cid;
+
+    if (!pixs)
+        return (L_COMP_DATA *)ERROR_PTR("pixs not defined", __func__, NULL);
+    if (pixGetDepth(pixs) != 1)
+        return (L_COMP_DATA *)ERROR_PTR("pixs not 1 bpp", __func__, NULL);
+    if (pixGetColormap(pixs))
+        return (L_COMP_DATA *)ERROR_PTR("pixs has colormap", __func__, NULL);
+
+        /* Compress to a temp tiff g4 file */
+    fname = l_makeTempFilename();
+    if (pixWrite(fname, pixs, IFF_TIFF_G4)) {
+        LEPT_FREE(fname);
+        return NULL;
+    }
+
+    cid = l_generateG4Data(fname, ascii85flag);
+    if (lept_rmfile(fname) != 0)
+        L_ERROR("temp file %s was not deleted\n", __func__, fname);
+    LEPT_FREE(fname);
+    return cid;
+}
+
+
+/*!
+ * \brief   cidConvertToPdfData()
+ *
+ * \param[in]    cid       compressed image data
+ * \param[in]    title     [optional] pdf title; can be null
+ * \param[out]   pdata     output pdf data for image
+ * \param[out]   pnbytes   size of output pdf data
+ * \return  0 if OK, 1 on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) Caller must not destroy the cid.  It is absorbed in the
+ *          lpd and destroyed by this function.
+ * </pre>
+ */
+l_ok
+cidConvertToPdfData(L_COMP_DATA  *cid,
+                    const char   *title,
+                    l_uint8     **pdata,
+                    size_t       *pnbytes)
+{
+l_int32      res, ret;
+l_float32    wpt, hpt;
+L_PDF_DATA  *lpd = NULL;
+
+    if (!pdata || !pnbytes)
+        return ERROR_INT("&data and &nbytes not both defined", __func__, 1);
+    *pdata = NULL;
+    *pnbytes = 0;
+    if (!cid)
+        return ERROR_INT("cid not defined", __func__, 1);
+
+        /* Get media box parameters, in pts */
+    res = cid->res;
+    if (res <= 0)
+        res = DefaultInputRes;
+    wpt = cid->w * 72.f / res;
+    hpt = cid->h * 72.f / res;
+
+        /* Set up the pdf data struct (lpd) */
+    if ((lpd = pdfdataCreate(title)) == NULL)
+        return ERROR_INT("lpd not made", __func__, 1);
+    ptraAdd(lpd->cida, cid);
+    lpd->n++;
+    ptaAddPt(lpd->xy, 0, 0);   /* xpt = ypt = 0 */
+    ptaAddPt(lpd->wh, wpt, hpt);
+
+        /* Generate the pdf string and destroy the lpd */
+    ret = l_generatePdf(pdata, pnbytes, lpd);
+    pdfdataDestroy(&lpd);
+    if (ret)
+        return ERROR_INT("pdf output not made", __func__, 1);
+    return 0;
+}
+
+
+/*!
+ * \brief   l_CIDataDestroy()
+ *
+ * \param[in,out]   pcid     will be set to null before returning
+ * \return  void
+ */
+void
+l_CIDataDestroy(L_COMP_DATA  **pcid)
+{
+L_COMP_DATA  *cid;
+
+    if (pcid == NULL) {
+        L_WARNING("ptr address is null!\n", __func__);
+        return;
+    }
+    if ((cid = *pcid) == NULL)
+        return;
+
+    if (cid->datacomp) LEPT_FREE(cid->datacomp);
+    if (cid->data85) LEPT_FREE(cid->data85);
+    if (cid->cmapdata85) LEPT_FREE(cid->cmapdata85);
+    if (cid->cmapdatahex) LEPT_FREE(cid->cmapdatahex);
+    LEPT_FREE(cid);
+    *pcid = NULL;
+}
+
+
+/*---------------------------------------------------------------------*
+ *         Helper functions for generating the output pdf string       *
+ *---------------------------------------------------------------------*/
+/*!
+ * \brief   l_generatePdf()
+ *
+ * \param[out]   pdata     pdf array
+ * \param[out]   pnbytes   number of bytes in pdf array
+ * \param[in]    lpd       all the required input image data
+ * \return  0 if OK, 1 on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) On error, no data is returned.
+ *      (2) The objects are:
+ *            1: Catalog
+ *            2: Info
+ *            3: Pages
+ *            4: Page
+ *            5: Contents  (rendering command)
+ *            6 to 6+n-1: n XObjects
+ *            6+n to 6+n+m-1: m colormaps
+ * </pre>
+ */
+static l_int32
+l_generatePdf(l_uint8    **pdata,
+              size_t      *pnbytes,
+              L_PDF_DATA  *lpd)
+{
+    if (!pdata)
+        return ERROR_INT("&data not defined", __func__, 1);
+    *pdata = NULL;
+    if (!pnbytes)
+        return ERROR_INT("&nbytes not defined", __func__, 1);
+    *pnbytes = 0;
+    if (!lpd)
+        return ERROR_INT("lpd not defined", __func__, 1);
+
+    generateFixedStringsPdf(lpd);
+    generateMediaboxPdf(lpd);
+    generatePageStringPdf(lpd);
+    generateContentStringPdf(lpd);
+    generatePreXStringsPdf(lpd);
+    generateColormapStringsPdf(lpd);
+    generateTrailerPdf(lpd);
+    return generateOutputDataPdf(pdata, pnbytes, lpd);
+}
+
+
+static void
+generateFixedStringsPdf(L_PDF_DATA  *lpd)
+{
+char     buf[L_SMALLBUF];
+char    *version, *datestr;
+SARRAY  *sa;
+
+        /* Accumulate data for the header and objects 1-3 */
+    lpd->id = stringNew("%PDF-1.5\n");
+    l_dnaAddNumber(lpd->objsize, strlen(lpd->id));
+
+    lpd->obj1 = stringNew("1 0 obj\n"
+                          "<<\n"
+                          "/Type /Catalog\n"
+                          "/Pages 3 0 R\n"
+                          ">>\n"
+                          "endobj\n");
+    l_dnaAddNumber(lpd->objsize, strlen(lpd->obj1));
+
+    sa = sarrayCreate(0);
+    sarrayAddString(sa, "2 0 obj\n"
+                        "<<\n", L_COPY);
+    if (var_WRITE_DATE_AND_VERSION) {
+        datestr = l_getFormattedDate();
+        snprintf(buf, sizeof(buf), "/CreationDate (D:%s)\n", datestr);
+        sarrayAddString(sa, buf, L_COPY);
+        LEPT_FREE(datestr);
+        version = getLeptonicaVersion();
+        snprintf(buf, sizeof(buf),
+                 "/Producer (leptonica: %s)\n", version);
+        LEPT_FREE(version);
+    } else {
+        snprintf(buf, sizeof(buf), "/Producer (leptonica)\n");
+    }
+    sarrayAddString(sa, buf, L_COPY);
+    if (lpd->title) {
+        char *hexstr;
+        if ((hexstr = generateEscapeString(lpd->title)) != NULL) {
+            snprintf(buf, sizeof(buf), "/Title %s\n", hexstr);
+            sarrayAddString(sa, buf, L_COPY);
+        } else {
+            L_ERROR("title string is not ascii\n", __func__);
+        }
+        LEPT_FREE(hexstr);
+    }
+    sarrayAddString(sa, ">>\n"
+                                "endobj\n", L_COPY);
+    lpd->obj2 = sarrayToString(sa, 0);
+    l_dnaAddNumber(lpd->objsize, strlen(lpd->obj2));
+    sarrayDestroy(&sa);
+
+    lpd->obj3 = stringNew("3 0 obj\n"
+                          "<<\n"
+                          "/Type /Pages\n"
+                          "/Kids [ 4 0 R ]\n"
+                          "/Count 1\n"
+                          ">>\n");
+    l_dnaAddNumber(lpd->objsize, strlen(lpd->obj3));
+
+        /* Do the post-datastream string */
+    lpd->poststream = stringNew("\n"
+                                "endstream\n"
+                                "endobj\n");
+}
+
+
+/*!
+ * \brief   generateEscapeString()
+ *
+ * \param[in]   str      input string
+ * \return   hex escape string, or null on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) If the input string is not ascii, returns null.
+ *      (2) This takes an input ascii string and generates a hex
+ *          ascii output string with 4 bytes out for each byte in.
+ *          The feff code at the beginning tells the pdf interpreter
+ *          that the data is to be interpreted as big-endian, 4 bytes
+ *          at a time.  For ascii, the first two bytes are 0 and the
+ *          last two bytes are less than 0x80.
+ * </pre>
+ */
+static char  *
+generateEscapeString(const char  *str)
+{
+char     smallbuf[8];
+char    *buffer;
+l_int32  i, nchar, buflen;
+
+    if (!str)
+        return (char *)ERROR_PTR("str not defined", __func__, NULL);
+    nchar = strlen(str);
+    for (i = 0; i < nchar; i++) {
+        if (str[i] < 0)
+            return (char *)ERROR_PTR("str not all ascii", __func__, NULL);
+    }
+
+    buflen = 4 * nchar + 10;
+    buffer = (char *)LEPT_CALLOC(buflen, sizeof(char));
+    stringCat(buffer, buflen, "<feff");
+    for (i = 0; i < nchar; i++) {
+        snprintf(smallbuf, sizeof(smallbuf), "%04x", str[i]);
+        stringCat(buffer, buflen, smallbuf);
+    }
+    stringCat(buffer, buflen, ">");
+    return buffer;
+}
+
+
+static void
+generateMediaboxPdf(L_PDF_DATA  *lpd)
+{
+l_int32    i;
+l_float32  xpt, ypt, wpt, hpt, maxx, maxy;
+
+        /* First get the full extent of all the images.
+         * This is the mediabox, in pts. */
+    maxx = maxy = 0;
+    for (i = 0; i < lpd->n; i++) {
+        ptaGetPt(lpd->xy, i, &xpt, &ypt);
+        ptaGetPt(lpd->wh, i, &wpt, &hpt);
+        maxx = L_MAX(maxx, xpt + wpt);
+        maxy = L_MAX(maxy, ypt + hpt);
+    }
+
+    lpd->mediabox = boxCreate(0, 0, (l_int32)(maxx + 0.5),
+                              (l_int32)(maxy + 0.5));
+
+        /* ypt is in standard image coordinates: the location of
+         * the UL image corner with respect to the UL media box corner.
+         * Rewrite each ypt for PostScript coordinates: the location of
+         * the LL image corner with respect to the LL media box corner. */
+    for (i = 0; i < lpd->n; i++) {
+        ptaGetPt(lpd->xy, i, &xpt, &ypt);
+        ptaGetPt(lpd->wh, i, &wpt, &hpt);
+        ptaSetPt(lpd->xy, i, xpt, maxy - ypt - hpt);
+    }
+}
+
+
+static l_int32
+generatePageStringPdf(L_PDF_DATA  *lpd)
+{
+char    *buf;
+char    *xstr;
+l_int32  bufsize, i, wpt, hpt;
+SARRAY  *sa;
+
+        /* Allocate 1000 bytes for the boilerplate text, and
+         * 50 bytes for each reference to an image in the
+         * ProcSet array.  */
+    bufsize = 1000 + 50 * lpd->n;
+    if ((buf = (char *)LEPT_CALLOC(bufsize, sizeof(char))) == NULL)
+        return ERROR_INT("calloc fail for buf", __func__, 1);
+
+    boxGetGeometry(lpd->mediabox, NULL, NULL, &wpt, &hpt);
+    sa = sarrayCreate(lpd->n);
+    for (i = 0; i < lpd->n; i++) {
+        snprintf(buf, bufsize, "/Im%d %d 0 R   ", i + 1, 6 + i);
+        sarrayAddString(sa, buf, L_COPY);
+    }
+    xstr = sarrayToString(sa, 0);
+    sarrayDestroy(&sa);
+    if (!xstr) {
+        LEPT_FREE(buf);
+        return ERROR_INT("xstr not made", __func__, 1);
+    }
+
+    snprintf(buf, bufsize, "4 0 obj\n"
+                           "<<\n"
+                           "/Type /Page\n"
+                           "/Parent 3 0 R\n"
+                           "/MediaBox [%d %d %d %d]\n"
+                           "/Contents 5 0 R\n"
+                           "/Resources\n"
+                           "<<\n"
+                           "/XObject << %s >>\n"
+                           "/ProcSet [ /ImageB /ImageI /ImageC ]\n"
+                           ">>\n"
+                           ">>\n"
+                           "endobj\n",
+                           0, 0, wpt, hpt, xstr);
+
+    lpd->obj4 = stringNew(buf);
+    l_dnaAddNumber(lpd->objsize, strlen(lpd->obj4));
+    sarrayDestroy(&sa);
+    LEPT_FREE(buf);
+    LEPT_FREE(xstr);
+    return 0;
+}
+
+
+static l_int32
+generateContentStringPdf(L_PDF_DATA  *lpd)
+{
+char      *buf;
+char      *cstr;
+l_int32    i, bufsize;
+l_float32  xpt, ypt, wpt, hpt;
+SARRAY    *sa;
+
+    bufsize = 1000 + 200 * lpd->n;
+    if ((buf = (char *)LEPT_CALLOC(bufsize, sizeof(char))) == NULL)
+        return ERROR_INT("calloc fail for buf", __func__, 1);
+
+    sa = sarrayCreate(lpd->n);
+    for (i = 0; i < lpd->n; i++) {
+        ptaGetPt(lpd->xy, i, &xpt, &ypt);
+        ptaGetPt(lpd->wh, i, &wpt, &hpt);
+        snprintf(buf, bufsize,
+                 "q %.4f %.4f %.4f %.4f %.4f %.4f cm /Im%d Do Q\n",
+                 wpt, 0.0, 0.0, hpt, xpt, ypt, i + 1);
+        sarrayAddString(sa, buf, L_COPY);
+    }
+    cstr = sarrayToString(sa, 0);
+    sarrayDestroy(&sa);
+    if (!cstr) {
+        LEPT_FREE(buf);
+        return ERROR_INT("cstr not made", __func__, 1);
+    }
+
+    snprintf(buf, bufsize, "5 0 obj\n"
+                           "<< /Length %d >>\n"
+                           "stream\n"
+                           "%s"
+                           "endstream\n"
+                           "endobj\n",
+                           (l_int32)strlen(cstr), cstr);
+
+    lpd->obj5 = stringNew(buf);
+    l_dnaAddNumber(lpd->objsize, strlen(lpd->obj5));
+    sarrayDestroy(&sa);
+    LEPT_FREE(buf);
+    LEPT_FREE(cstr);
+    return 0;
+}
+
+
+static l_int32
+generatePreXStringsPdf(L_PDF_DATA  *lpd)
+{
+char          buff[256];
+char          buf[L_BIGBUF];
+char         *cstr, *bstr, *fstr, *pstr, *xstr, *photometry;
+l_int32       i, cmindex;
+L_COMP_DATA  *cid;
+SARRAY       *sa;
+
+    sa = lpd->saprex;
+    cmindex = 6 + lpd->n;  /* starting value */
+    for (i = 0; i < lpd->n; i++) {
+        pstr = cstr = NULL;
+        if ((cid = pdfdataGetCid(lpd, i)) == NULL)
+            return ERROR_INT("cid not found", __func__, 1);
+
+        if (cid->type == L_G4_ENCODE) {
+            if (var_WRITE_G4_IMAGE_MASK) {
+                cstr = stringNew("/ImageMask true\n"
+                                 "/ColorSpace /DeviceGray");
+            } else {
+                cstr = stringNew("/ColorSpace /DeviceGray");
+            }
+            bstr = stringNew("/BitsPerComponent 1\n"
+                             "/Interpolate true");
+                /* Note: the reversal is deliberate.  The BlackIs1 flag
+                 * is misleadingly named: it says whether to invert the
+                 * image on decoding because the black pixels are 0,
+                 * not whether the black pixels are 1!  The default for
+                 * BlackIs1 is "false", which means "don't invert because
+                 * black is 1."  Yikes. */
+            photometry = (cid->minisblack) ? stringNew("true")
+                                           : stringNew("false");
+            snprintf(buff, sizeof(buff),
+                     "/Filter /CCITTFaxDecode\n"
+                     "/DecodeParms\n"
+                     "<<\n"
+                     "/BlackIs1 %s\n"
+                     "/K -1\n"
+                     "/Columns %d\n"
+                     ">>", photometry, cid->w);
+            fstr = stringNew(buff);
+            LEPT_FREE(photometry);
+        } else if (cid->type == L_JPEG_ENCODE) {
+            if (cid->spp == 1)
+                cstr = stringNew("/ColorSpace /DeviceGray");
+            else if (cid->spp == 3)
+                cstr = stringNew("/ColorSpace /DeviceRGB");
+            else if (cid->spp == 4)   /* pdf supports cmyk */
+                cstr = stringNew("/ColorSpace /DeviceCMYK");
+            else
+                L_ERROR("in jpeg: spp != 1, 3 or 4\n", __func__);
+            bstr = stringNew("/BitsPerComponent 8");
+            fstr = stringNew("/Filter /DCTDecode");
+        } else if (cid->type == L_JP2K_ENCODE) {
+            if (cid->spp == 1)
+                cstr = stringNew("/ColorSpace /DeviceGray");
+            else if (cid->spp == 3)
+                cstr = stringNew("/ColorSpace /DeviceRGB");
+            else
+                L_ERROR("in jp2k: spp != 1 && spp != 3\n", __func__);
+            bstr = stringNew("/BitsPerComponent 8");
+            fstr = stringNew("/Filter /JPXDecode");
+        } else {  /* type == L_FLATE_ENCODE */
+            if (cid->ncolors > 0) {  /* cmapped */
+                snprintf(buff, sizeof(buff), "/ColorSpace %d 0 R", cmindex++);
+                cstr = stringNew(buff);
+            } else {
+                if (cid->spp == 1 && cid->bps == 1)
+                    cstr = stringNew("/ColorSpace /DeviceGray\n"
+                                     "/Decode [1 0]");
+                else if (cid->spp == 1)  /* 8 bpp */
+                    cstr = stringNew("/ColorSpace /DeviceGray");
+                else if (cid->spp == 3)
+                    cstr = stringNew("/ColorSpace /DeviceRGB");
+                else
+                    L_ERROR("unknown colorspace: spp = %d\n",
+                            __func__, cid->spp);
+            }
+            snprintf(buff, sizeof(buff), "/BitsPerComponent %d", cid->bps);
+            bstr = stringNew(buff);
+            fstr = stringNew("/Filter /FlateDecode");
+            if (cid->predictor == TRUE) {
+                snprintf(buff, sizeof(buff),
+                         "/DecodeParms\n"
+                         "<<\n"
+                         "  /Columns %d\n"
+                         "  /Predictor 14\n"
+                         "  /Colors %d\n"
+                         "  /BitsPerComponent %d\n"
+                         ">>\n", cid->w, cid->spp, cid->bps);
+                pstr = stringNew(buff);
+            }
+        }
+        if (!pstr)  /* no decode parameters */
+            pstr = stringNew("");
+
+        snprintf(buf, sizeof(buf),
+                 "%d 0 obj\n"
+                 "<<\n"
+                 "/Length %zu\n"
+                 "/Subtype /Image\n"
+                 "%s\n"  /* colorspace */
+                 "/Width %d\n"
+                 "/Height %d\n"
+                 "%s\n"  /* bits/component */
+                 "%s\n"  /* filter */
+                 "%s"   /* decode parms; can be empty */
+                 ">>\n"
+                 "stream\n",
+                 6 + i, cid->nbytescomp, cstr,
+                 cid->w, cid->h, bstr, fstr, pstr);
+        xstr = stringNew(buf);
+        sarrayAddString(sa, xstr, L_INSERT);
+        l_dnaAddNumber(lpd->objsize,
+                      strlen(xstr) + cid->nbytescomp + strlen(lpd->poststream));
+        LEPT_FREE(cstr);
+        LEPT_FREE(bstr);
+        LEPT_FREE(fstr);
+        LEPT_FREE(pstr);
+    }
+
+    return 0;
+}
+
+
+static l_int32
+generateColormapStringsPdf(L_PDF_DATA  *lpd)
+{
+char          buf[L_BIGBUF];
+char         *cmstr;
+l_int32       i, cmindex, ncmap;
+L_COMP_DATA  *cid;
+SARRAY       *sa;
+
+        /* In our canonical format, we have 5 objects, followed
+         * by n XObjects, followed by m colormaps, so the index of
+         * the first colormap object is 6 + n. */
+    sa = lpd->sacmap;
+    cmindex = 6 + lpd->n;  /* starting value */
+    ncmap = 0;
+    for (i = 0; i < lpd->n; i++) {
+        if ((cid = pdfdataGetCid(lpd, i)) == NULL)
+            return ERROR_INT("cid not found", __func__, 1);
+        if (cid->ncolors == 0) continue;
+
+        ncmap++;
+        snprintf(buf, sizeof(buf), "%d 0 obj\n"
+                                   "[ /Indexed /DeviceRGB\n"
+                                   "%d\n"
+                                   "%s\n"
+                                   "]\n"
+                                   "endobj\n",
+                                   cmindex, cid->ncolors - 1, cid->cmapdatahex);
+        cmindex++;
+        cmstr = stringNew(buf);
+        l_dnaAddNumber(lpd->objsize, strlen(cmstr));
+        sarrayAddString(sa, cmstr, L_INSERT);
+    }
+
+    lpd->ncmap = ncmap;
+    return 0;
+}
+
+
+static void
+generateTrailerPdf(L_PDF_DATA  *lpd)
+{
+l_int32  i, n, size, linestart;
+L_DNA   *daloc, *dasize;
+
+        /* Let nobj be the number of numbered objects.  These numbered
+         * objects are indexed by their pdf number in arrays naloc[]
+         * and nasize[].  The 0th object is the 9 byte header.  Then
+         * the number of objects in nasize, which includes the header,
+         * is n = nobj + 1.  The array naloc[] has n + 1 elements,
+         * because it includes as the last element the starting
+         * location of xref.  The indexing of these objects, their
+         * starting locations and sizes are:
+         *
+         *     Object number         Starting location         Size
+         *     -------------         -----------------     --------------
+         *          0                   daloc[0] = 0       dasize[0] = 9
+         *          1                   daloc[1] = 9       dasize[1] = 49
+         *          n                   daloc[n]           dasize[n]
+         *          xref                daloc[n+1]
+         *
+         * We first generate daloc.
+         */
+    dasize = lpd->objsize;
+    daloc = lpd->objloc;
+    linestart = 0;
+    l_dnaAddNumber(daloc, linestart);  /* header */
+    n = l_dnaGetCount(dasize);
+    for (i = 0; i < n; i++) {
+        l_dnaGetIValue(dasize, i, &size);
+        linestart += size;
+        l_dnaAddNumber(daloc, linestart);
+    }
+    l_dnaGetIValue(daloc, n, &lpd->xrefloc);  /* save it */
+
+        /* Now make the actual trailer string */
+    lpd->trailer = makeTrailerStringPdf(daloc);
+}
+
+
+static char *
+makeTrailerStringPdf(L_DNA  *daloc)
+{
+char    *outstr;
+char     buf[L_BIGBUF];
+l_int32  i, n, linestart, xrefloc;
+SARRAY  *sa;
+
+    if (!daloc)
+        return (char *)ERROR_PTR("daloc not defined", __func__, NULL);
+    n = l_dnaGetCount(daloc) - 1;  /* numbered objects + 1 (yes, +1) */
+
+    sa = sarrayCreate(0);
+    snprintf(buf, sizeof(buf), "xref\n"
+                               "0 %d\n"
+                               "0000000000 65535 f \n", n);
+    sarrayAddString(sa, buf, L_COPY);
+    for (i = 1; i < n; i++) {
+        l_dnaGetIValue(daloc, i, &linestart);
+        snprintf(buf, sizeof(buf), "%010d 00000 n \n", linestart);
+        sarrayAddString(sa, buf, L_COPY);
+    }
+
+    l_dnaGetIValue(daloc, n, &xrefloc);
+    snprintf(buf, sizeof(buf), "trailer\n"
+                               "<<\n"
+                               "/Size %d\n"
+                               "/Root 1 0 R\n"
+                               "/Info 2 0 R\n"
+                               ">>\n"
+                               "startxref\n"
+                               "%d\n"
+                               "%%%%EOF\n", n, xrefloc);
+    sarrayAddString(sa, buf, L_COPY);
+    outstr = sarrayToString(sa, 0);
+    sarrayDestroy(&sa);
+    return outstr;
+}
+
+
+/*!
+ * \brief   generateOutputDataPdf()
+ *
+ * \param[out]   pdata      pdf data array
+ * \param[out]   pnbytes    size of pdf data array
+ * \param[in]    lpd        input data used to make pdf
+ * \return  0 if OK, 1 on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) Only called from l_generatePdf().  On error, no data is returned.
+ * </pre>
+ */
+static l_int32
+generateOutputDataPdf(l_uint8    **pdata,
+                      size_t      *pnbytes,
+                      L_PDF_DATA  *lpd)
+{
+char         *str;
+l_uint8      *data;
+l_int32       nimages, i, len;
+l_int32      *sizes, *locs;
+size_t        nbytes;
+L_COMP_DATA  *cid;
+
+    if (!pdata)
+        return ERROR_INT("&data not defined", __func__, 1);
+    *pdata = NULL;
+    if (!pnbytes)
+        return ERROR_INT("&nbytes not defined", __func__, 1);
+    nbytes = lpd->xrefloc + strlen(lpd->trailer);
+    *pnbytes = nbytes;
+    if ((data = (l_uint8 *)LEPT_CALLOC(nbytes, sizeof(l_uint8))) == NULL)
+        return ERROR_INT("calloc fail for data", __func__, 1);
+    *pdata = data;
+
+    sizes = l_dnaGetIArray(lpd->objsize);
+    locs = l_dnaGetIArray(lpd->objloc);
+    memcpy(data, lpd->id, sizes[0]);
+    memcpy(data + locs[1], lpd->obj1, sizes[1]);
+    memcpy(data + locs[2], lpd->obj2, sizes[2]);
+    memcpy(data + locs[3], lpd->obj3, sizes[3]);
+    memcpy(data + locs[4], lpd->obj4, sizes[4]);
+    memcpy(data + locs[5], lpd->obj5, sizes[5]);
+
+        /* Each image has 3 parts: variable preamble, the compressed
+         * data stream, and the fixed poststream. */
+    nimages = lpd->n;
+    for (i = 0; i < nimages; i++) {
+        if ((cid = pdfdataGetCid(lpd, i)) == NULL) {  /* should not happen */
+            LEPT_FREE(sizes);
+            LEPT_FREE(locs);
+            return ERROR_INT("cid not found", __func__, 1);
+        }
+        str = sarrayGetString(lpd->saprex, i, L_NOCOPY);
+        len = strlen(str);
+        memcpy(data + locs[6 + i], str, len);
+        memcpy(data + locs[6 + i] + len,
+               cid->datacomp, cid->nbytescomp);
+        memcpy(data + locs[6 + i] + len + cid->nbytescomp,
+               lpd->poststream, strlen(lpd->poststream));
+    }
+
+        /* Each colormap is simply a stored string */
+    for (i = 0; i < lpd->ncmap; i++) {
+        str = sarrayGetString(lpd->sacmap, i, L_NOCOPY);
+        memcpy(data + locs[6 + nimages + i], str, strlen(str));
+    }
+
+        /* And finally the trailer */
+    memcpy(data + lpd->xrefloc, lpd->trailer, strlen(lpd->trailer));
+    LEPT_FREE(sizes);
+    LEPT_FREE(locs);
+    return 0;
+}
+
+
+/*---------------------------------------------------------------------*
+ *          Helper functions for generating multipage pdf output       *
+ *---------------------------------------------------------------------*/
+/*!
+ * \brief   parseTrailerPdf()
+ *
+ * \param[in]    bas     lba of a pdf file
+ * \param[out]   pda     byte locations of the beginning of each object
+ * \return  0 if OK, 1 on error
+ */
+static l_int32
+parseTrailerPdf(L_BYTEA  *bas,
+                L_DNA   **pda)
+{
+char     *str;
+l_uint8   nl = '\n';
+l_uint8  *data;
+l_int32   i, j, start, startloc, xrefloc, found, loc, nobj, objno, trailer_ok;
+size_t    size;
+L_DNA    *da, *daobj, *daxref;
+SARRAY   *sa;
+
+    if (!pda)
+        return ERROR_INT("&da not defined", __func__, 1);
+    *pda = NULL;
+    if (!bas)
+        return ERROR_INT("bas not defined", __func__, 1);
+    data = l_byteaGetData(bas, &size);
+    if (memcmp(data, "%PDF-1.", 7) != 0)
+        return ERROR_INT("PDF header signature not found", __func__, 1);
+
+        /* Search for "startxref" starting 50 bytes from the EOF */
+    start = 0;
+    if (size > 50)
+        start = size - 50;
+    arrayFindSequence(data + start, size - start,
+                      (l_uint8 *)"startxref\n", 10, &loc, &found);
+    if (!found)
+        return ERROR_INT("startxref not found!", __func__, 1);
+    if (sscanf((char *)(data + start + loc + 10), "%d\n", &xrefloc) != 1)
+        return ERROR_INT("xrefloc not found!", __func__, 1);
+    if (xrefloc < 0 || xrefloc >= size)
+        return ERROR_INT("invalid xrefloc!", __func__, 1);
+    sa = sarrayCreateLinesFromString((char *)(data + xrefloc), 0);
+    str = sarrayGetString(sa, 1, L_NOCOPY);
+    if ((sscanf(str, "0 %d", &nobj)) != 1) {
+        sarrayDestroy(&sa);
+        return ERROR_INT("nobj not found", __func__, 1);
+    }
+
+        /* Get starting locations.  The numa index is the
+         * object number.  loc[0] is the ID; loc[nobj + 1] is xrefloc.  */
+    da = l_dnaCreate(nobj + 1);
+    *pda = da;
+    for (i = 0; i < nobj; i++) {
+        str = sarrayGetString(sa, i + 2, L_NOCOPY);
+        sscanf(str, "%d", &startloc);
+        l_dnaAddNumber(da, startloc);
+    }
+    l_dnaAddNumber(da, xrefloc);
+
+#if  DEBUG_MULTIPAGE
+    lept_stderr("************** Trailer string ************\n");
+    lept_stderr("xrefloc = %d", xrefloc);
+    sarrayWriteStderr(sa);
+
+    lept_stderr("************** Object locations ************");
+    l_dnaWriteStderr(da);
+#endif  /* DEBUG_MULTIPAGE */
+    sarrayDestroy(&sa);
+
+        /* Verify correct parsing */
+    trailer_ok = TRUE;
+    for (i = 1; i < nobj; i++) {
+        l_dnaGetIValue(da, i, &startloc);
+        if ((sscanf((char *)(data + startloc), "%d 0 obj", &objno)) != 1) {
+            L_ERROR("bad trailer for object %d\n", __func__, i);
+            trailer_ok = FALSE;
+            break;
+        }
+    }
+
+        /* If the trailer is broken, reconstruct the correct obj locations */
+    if (!trailer_ok) {
+        L_INFO("rebuilding pdf trailer\n", __func__);
+        l_dnaEmpty(da);
+        l_dnaAddNumber(da, 0);
+        l_byteaFindEachSequence(bas, (l_uint8 *)" 0 obj\n", 7, &daobj);
+        nobj = l_dnaGetCount(daobj);
+        for (i = 0; i < nobj; i++) {
+            l_dnaGetIValue(daobj, i, &loc);
+            for (j = loc - 1; j > 0; j--) {
+                if (data[j] == nl)
+                    break;
+            }
+            l_dnaAddNumber(da, j + 1);
+        }
+        l_byteaFindEachSequence(bas, (l_uint8 *)"xref", 4, &daxref);
+        l_dnaGetIValue(daxref, 0, &loc);
+        l_dnaAddNumber(da, loc);
+        l_dnaDestroy(&daobj);
+        l_dnaDestroy(&daxref);
+    }
+
+    return 0;
+}
+
+
+static char *
+generatePagesObjStringPdf(NUMA  *napage)
+{
+char    *str;
+char    *buf;
+l_int32  i, n, index, bufsize;
+SARRAY  *sa;
+
+    if (!napage)
+        return (char *)ERROR_PTR("napage not defined", __func__, NULL);
+
+    n = numaGetCount(napage);
+    bufsize = 100 + 16 * n;  /* large enough to hold the output string */
+    buf = (char *)LEPT_CALLOC(bufsize, sizeof(char));
+    sa = sarrayCreate(n);
+    for (i = 0; i < n; i++) {
+        numaGetIValue(napage, i, &index);
+        snprintf(buf, bufsize, " %d 0 R ", index);
+        sarrayAddString(sa, buf, L_COPY);
+    }
+
+    str = sarrayToString(sa, 0);
+    snprintf(buf, bufsize - 1, "3 0 obj\n"
+                               "<<\n"
+                               "/Type /Pages\n"
+                               "/Kids [%s]\n"
+                               "/Count %d\n"
+                               ">>\n"
+                               "endobj\n",
+                               str, n);
+    sarrayDestroy(&sa);
+    LEPT_FREE(str);
+    return buf;
+}
+
+
+/*!
+ * \brief   substituteObjectNumbers()
+ *
+ * \param[in]   bas        lba of a pdf object
+ * \param[in]   na_objs    object number mapping array
+ * \return    bad   lba of rewritten pdf for the object
+ *
+ * <pre>
+ * Notes:
+ *      (1) Interpret the first set of bytes as the object number,
+ *          map to the new number, and write it out.
+ *      (2) Find all occurrences of this 4-byte sequence: " 0 R"
+ *      (3) Find the location and value of the integer preceding this,
+ *          and map it to the new value.
+ *      (4) Rewrite the object with new object numbers.
+ * </pre>
+ */
+static L_BYTEA *
+substituteObjectNumbers(L_BYTEA  *bas,
+                        NUMA     *na_objs)
+{
+l_uint8   space = ' ';
+l_uint8  *datas;
+l_uint8   buf[32];  /* only needs to hold one integer in ascii format */
+l_int32   start, nrepl, i, j, nobjs, objin, objout, found;
+l_int32  *objs, *matches;
+size_t    size;
+L_BYTEA  *bad;
+L_DNA    *da_match;
+
+    if (!bas)
+        return (L_BYTEA *)ERROR_PTR("bas not defined", __func__, NULL);
+    if (!na_objs)
+        return (L_BYTEA *)ERROR_PTR("na_objs not defined", __func__, NULL);
+
+    datas = l_byteaGetData(bas, &size);
+    bad = l_byteaCreate(100);
+    objs = numaGetIArray(na_objs);  /* object number mapper */
+    nobjs = numaGetCount(na_objs);  /* use for sanity checking */
+
+        /* Substitute the object number on the first line */
+    sscanf((char *)datas, "%d", &objin);
+    if (objin < 0 || objin >= nobjs) {
+        L_ERROR("index %d into array of size %d\n", __func__, objin, nobjs);
+        LEPT_FREE(objs);
+        return bad;
+    }
+    objout = objs[objin];
+    snprintf((char *)buf, 32, "%d", objout);
+    l_byteaAppendString(bad, (char *)buf);
+
+        /* Find the set of matching locations for object references */
+    arrayFindSequence(datas, size, &space, 1, &start, &found);
+    da_match = arrayFindEachSequence(datas, size, (l_uint8 *)" 0 R", 4);
+    if (!da_match) {
+        l_byteaAppendData(bad, datas + start, size - start);
+        LEPT_FREE(objs);
+        return bad;
+    }
+
+        /* Substitute all the object reference numbers */
+    nrepl = l_dnaGetCount(da_match);
+    matches = l_dnaGetIArray(da_match);
+    for (i = 0; i < nrepl; i++) {
+            /* Find the first space before the object number */
+        for (j = matches[i] - 1; j > 0; j--) {
+            if (datas[j] == space)
+                break;
+        }
+            /* Copy bytes from 'start' up to the object number */
+        l_byteaAppendData(bad, datas + start, j - start + 1);
+        sscanf((char *)(datas + j + 1), "%d", &objin);
+        if (objin < 0 || objin >= nobjs) {
+            L_ERROR("index %d into array of size %d\n", __func__, objin, nobjs);
+            LEPT_FREE(objs);
+            LEPT_FREE(matches);
+            l_dnaDestroy(&da_match);
+            return bad;
+        }
+        objout = objs[objin];
+        snprintf((char *)buf, 32, "%d", objout);
+        l_byteaAppendString(bad, (char *)buf);
+        start = matches[i];
+    }
+    l_byteaAppendData(bad, datas + start, size - start);
+
+    LEPT_FREE(objs);
+    LEPT_FREE(matches);
+    l_dnaDestroy(&da_match);
+    return bad;
+}
+
+
+/*---------------------------------------------------------------------*
+ *                     Create/destroy/access pdf data                  *
+ *---------------------------------------------------------------------*/
+static L_PDF_DATA *
+pdfdataCreate(const char  *title)
+{
+L_PDF_DATA *lpd;
+
+    lpd = (L_PDF_DATA *)LEPT_CALLOC(1, sizeof(L_PDF_DATA));
+    if (title) lpd->title = stringNew(title);
+    lpd->cida = ptraCreate(10);
+    lpd->xy = ptaCreate(10);
+    lpd->wh = ptaCreate(10);
+    lpd->saprex = sarrayCreate(10);
+    lpd->sacmap = sarrayCreate(10);
+    lpd->objsize = l_dnaCreate(20);
+    lpd->objloc = l_dnaCreate(20);
+    return lpd;
+}
+
+static void
+pdfdataDestroy(L_PDF_DATA  **plpd)
+{
+l_int32       i;
+L_COMP_DATA  *cid;
+L_PDF_DATA   *lpd;
+
+    if (plpd== NULL) {
+        L_WARNING("ptr address is null!\n", __func__);
+        return;
+    }
+    if ((lpd = *plpd) == NULL)
+        return;
+
+    if (lpd->title) LEPT_FREE(lpd->title);
+    for (i = 0; i < lpd->n; i++) {
+        cid = (L_COMP_DATA *)ptraRemove(lpd->cida, i, L_NO_COMPACTION);
+        l_CIDataDestroy(&cid);
+    }
+
+    ptraDestroy(&lpd->cida, 0, 0);
+    if (lpd->id) LEPT_FREE(lpd->id);
+    if (lpd->obj1) LEPT_FREE(lpd->obj1);
+    if (lpd->obj2) LEPT_FREE(lpd->obj2);
+    if (lpd->obj3) LEPT_FREE(lpd->obj3);
+    if (lpd->obj4) LEPT_FREE(lpd->obj4);
+    if (lpd->obj5) LEPT_FREE(lpd->obj5);
+    if (lpd->poststream) LEPT_FREE(lpd->poststream);
+    if (lpd->trailer) LEPT_FREE(lpd->trailer);
+    if (lpd->xy) ptaDestroy(&lpd->xy);
+    if (lpd->wh) ptaDestroy(&lpd->wh);
+    if (lpd->mediabox) boxDestroy(&lpd->mediabox);
+    if (lpd->saprex) sarrayDestroy(&lpd->saprex);
+    if (lpd->sacmap) sarrayDestroy(&lpd->sacmap);
+    if (lpd->objsize) l_dnaDestroy(&lpd->objsize);
+    if (lpd->objloc) l_dnaDestroy(&lpd->objloc);
+    LEPT_FREE(lpd);
+    *plpd = NULL;
+}
+
+
+static L_COMP_DATA *
+pdfdataGetCid(L_PDF_DATA  *lpd,
+              l_int32      index)
+{
+    if (!lpd)
+        return (L_COMP_DATA *)ERROR_PTR("lpd not defined", __func__, NULL);
+    if (index < 0 || index >= lpd->n)
+        return (L_COMP_DATA *)ERROR_PTR("invalid image index", __func__, NULL);
+
+    return (L_COMP_DATA *)ptraGetPtrToItem(lpd->cida, index);
+}
+
+
+/*---------------------------------------------------------------------*
+ *                     Find number of pages in a pdf                   *
+ *---------------------------------------------------------------------*/
+/*!
+ * \brief   getPdfPageCount()
+ *
+ * \param[in]    fname      filename
+ * \param[out]   pnpages    number of pages
+ * \return  0 if OK, 1 on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) Looks for the argument of the first instance of /Count in the file.
+ *      (2) This first reads 10000 bytes from the beginning of the file.
+ *          If "/Count" is not in that string, it reads the entire file
+ *          and looks for "/Count".
+ *      (3) This will not work on encrypted pdf files or on files where
+ *          the "/Count" field is binary compressed.  Not finding the
+ *          "/Count" field is not an error, but a warning is given.
+ * </pre>
+ */
+l_ok
+getPdfPageCount(const char  *fname,
+                l_int32     *pnpages)
+{
+l_uint8  *data;
+l_int32   format, loc, ret, npages, found;
+size_t    nread;
+
+    if (!pnpages)
+        return ERROR_INT("&npages not defined", __func__, 1);
+    *pnpages = 0;
+    if (!fname)
+        return ERROR_INT("fname not defined", __func__, 1);
+
+        /* Make sure this a pdf file */
+    findFileFormat(fname, &format);
+    if (format != IFF_LPDF)
+        return ERROR_INT("file is not pdf", __func__, 1);
+
+        /* Read 10000 bytes from the beginning of the file */
+    if ((data = l_binaryReadSelect(fname, 0, 10000, &nread))
+                 == NULL)
+        return ERROR_INT("partial data not read", __func__, 1);
+
+        /* Find the location of the first instance of "/Count".
+         * If it is not found, try reading the entire file and
+         * looking again. */
+    arrayFindSequence(data, nread, (const l_uint8 *)"/Count",
+          strlen("/Count"), &loc, &found);
+    if (!found) {
+        lept_stderr("Reading entire file looking for '/Count'\n");
+        LEPT_FREE(data);
+        if ((data = l_binaryRead(fname, &nread)) == NULL)
+            return ERROR_INT("full data not read", __func__, 1);
+        arrayFindSequence(data, nread, (const l_uint8 *)"/Count",
+             strlen("/Count"), &loc, &found);
+        if (!found) {
+            LEPT_FREE(data);
+            L_WARNING("/Count not found\n", __func__);
+            return 0;
+        }
+    }
+
+        /* Unlikely: make sure we can read the count field */
+    if (nread - loc < 12)  { /* haven't read enough to capture page count */
+        LEPT_FREE(data);
+        return ERROR_INT("data may not include page count field", __func__, 1);
+    }
+
+        /* Read the page count; if not found, puts garbage in npages */
+    ret = sscanf((char *)&data[loc], "/Count %d", &npages);
+    LEPT_FREE(data);
+    if (ret != 1)
+        return ERROR_INT("npages not found", __func__, 1);
+    *pnpages = npages;
+/*    lept_stderr("bytes read = %d, loc = %d, npages = %d\n",
+                nread, loc, *pnpages);  */
+    return 0;
+}
+
+
+/*---------------------------------------------------------------------*
+ *      Find widths and heights of pages and media boxes in a pdf      *
+ *---------------------------------------------------------------------*/
+/*!
+ * \brief   getPdfPageSizes()
+ *
+ * \param[in]    fname        filename
+ * \param[out]   pnaw         [optional] array of page widths
+ * \param[out]   pnah         [optional] array of page heights
+ * \param[out]   pmedw        [optional] median page width
+ * \param[out]   pmedh        [optional] median page height
+ * \return  0 if OK, 1 on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) Finds the arguments of each instance of '/Width' and '/Height'
+ *          in the file.
+ *      (2) This will not work on encrypted pdf files or on files where
+ *          the "/Width" and "/Height" fields are binary compressed.
+ *          Not finding the "/Width" and /Height" fields is not an error,
+ *          but a warning is given.
+ * </pre>
+ */
+l_ok
+getPdfPageSizes(const char  *fname,
+                NUMA       **pnaw,
+                NUMA       **pnah,
+                l_int32     *pmedw,
+                l_int32     *pmedh)
+{
+l_uint8   *data;
+l_int32    i, nw, nh, format, ret, loc, width, height;
+l_float32  fval;
+size_t     nread;
+L_DNA     *dnaw;  /* width locations */
+L_DNA     *dnah;  /* height locations */
+NUMA      *naw;   /* widths */
+NUMA      *nah;   /* heights */
+
+    if (pnaw) *pnaw = NULL;
+    if (pnah) *pnah = NULL;
+    if (pmedw) *pmedw = 0;
+    if (pmedh) *pmedh = 0;
+    if (!pnaw && !pnah && !pmedw && !pmedh)
+        return ERROR_INT("no output requested", __func__, 1);
+    if (!fname)
+        return ERROR_INT("fname not defined", __func__, 1);
+
+        /* Make sure this a pdf file */
+    findFileFormat(fname, &format);
+    if (format != IFF_LPDF)
+        return ERROR_INT("file is not pdf", __func__, 1);
+
+        /* Read the file into memory and find all locations of
+         * '/Width' and '/Height' */
+    if ((data = l_binaryRead(fname, &nread)) == NULL)
+        return ERROR_INT("full data not read", __func__, 1);
+    dnaw = arrayFindEachSequence(data, nread, (const l_uint8 *)"/Width",
+                                 strlen("/Width"));
+    dnah = arrayFindEachSequence(data, nread, (const l_uint8 *)"/Height",
+                                 strlen("/Height"));
+    if (!dnaw)
+        L_WARNING("unable to find widths\n", __func__);
+    if (!dnah)
+        L_WARNING("unable to find heights\n", __func__);
+    if (!dnaw && !dnah) {
+        LEPT_FREE(data);
+        L_WARNING("no fields found\n", __func__);
+        return 0;
+    }
+
+        /* Find the page widths and heights */
+    nw = l_dnaGetCount(dnaw);
+    naw = numaCreate(nw);
+    for (i = 0; i < nw; i++) {
+        l_dnaGetIValue(dnaw, i, &loc);
+        ret = sscanf((char *)&data[loc], "/Width %d", &width);
+        if (ret != 1) {
+            L_ERROR("width not found for item %d at loc %d\n",
+                    __func__, i, loc);
+            continue;
+        }
+        numaAddNumber(naw, width);
+    }
+    nh = l_dnaGetCount(dnah);
+    nah = numaCreate(nh);
+    for (i = 0; i < nh; i++) {
+        l_dnaGetIValue(dnah, i, &loc);
+        ret = sscanf((char *)&data[loc], "/Height %d", &height);
+        if (ret != 1) {
+            L_ERROR("height not found for item %d at loc %d\n",
+                    __func__, i, loc);
+            continue;
+        }
+        numaAddNumber(nah, height);
+    }
+
+    LEPT_FREE(data);
+    l_dnaDestroy(&dnaw);
+    l_dnaDestroy(&dnah);
+    if (pmedw) {
+        numaGetMedian(naw, &fval);
+        *pmedw = lept_roundftoi(fval);
+    }
+    if (pnaw)
+        *pnaw = naw;
+    else
+        numaDestroy(&naw);
+    if (pmedh) {
+        numaGetMedian(nah, &fval);
+        *pmedh = lept_roundftoi(fval);
+    }
+    if (pnah)
+        *pnah = nah;
+    else
+        numaDestroy(&nah);
+    return 0;
+}
+
+
+/*!
+ * \brief   getPdfMediaBoxSizes()
+ *
+ * \param[in]    fname        filename
+ * \param[out]   pnaw         [optional] array of mediabox widths
+ * \param[out]   pnah         [optional] array of mediabox heights
+ * \param[out]   pmedw        [optional] median mediabox width
+ * \param[out]   pmedh        [optional] median mediabox height
+ * \return  0 if OK, 1 on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) Finds the arguments of each instance of '/MediaBox' in the file.
+ *      (2) This will not work on encrypted pdf files or on files where
+ *          the "/MediaBoxes" field is binary compressed.  Not finding
+ *          the "/MediaBoxes" field is not an error, but a warning is given.
+ *      (3) This is useful for determining if the media boxes are
+ *          incorrectly assigned, such as assuming the resolution is 72 ppi.
+ *          If that happens and the input the the renderer assumes the
+ *          resolution is 300 ppi, the rendered images will be over 4x too
+ *          large in each dimension.
+ *      (4) An image dimension of 11 inches corresponds to a MediaBox
+ *          parameter of 792.  We consider a value > 850 to be oversized
+ *          and not to be taken literally.
+ * </pre>
+ */
+l_ok
+getPdfMediaBoxSizes(const char  *fname,
+                    NUMA       **pnaw,
+                    NUMA       **pnah,
+                    l_int32     *pmedw,
+                    l_int32     *pmedh)
+{
+l_uint8   *data;
+l_int32    i, n, format, ret, loc;
+l_float32  fval, ignore1, ignore2, w, h;
+size_t     nread;
+L_DNA     *dna;   /* mediabox locations */
+NUMA      *naw;   /* mediabox widths */
+NUMA      *nah;   /* mediabox heights */
+
+    if (pnaw) *pnaw = NULL;
+    if (pnah) *pnah = NULL;
+    if (pmedw) *pmedw = 0;
+    if (pmedh) *pmedh = 0;
+    if (!pnaw && !pnah && !pmedw && !pmedh)
+        return ERROR_INT("no output requested", __func__, 1);
+    if (!fname)
+        return ERROR_INT("fname not defined", __func__, 1);
+
+        /* Make sure this a pdf file */
+    findFileFormat(fname, &format);
+    if (format != IFF_LPDF)
+        return ERROR_INT("file is not pdf", __func__, 1);
+
+        /* Read the file into memory and find all locations of '/MediaBox' */
+    if ((data = l_binaryRead(fname, &nread)) == NULL)
+        return ERROR_INT("full data not read", __func__, 1);
+    dna = arrayFindEachSequence(data, nread, (const l_uint8 *)"/MediaBox",
+                                strlen("/MediaBox"));
+    if (!dna) {
+        LEPT_FREE(data);
+        L_WARNING("no mediaboxes found\n", __func__);
+        return 1;
+    }
+
+        /* Find the mediabox widths and heights */
+    n = l_dnaGetCount(dna);
+    naw = numaCreate(n);
+    nah = numaCreate(n);
+    for (i = 0; i < n; i++) {
+        l_dnaGetIValue(dna, i, &loc);
+        ret = sscanf((char *)&data[loc], "/MediaBox [ %f %f %f %f",
+                     &ignore1, &ignore2, &w, &h);
+        if (ret != 4) {
+            L_ERROR("mediabox sizes not found for item %d at loc %d\n",
+                    __func__, i, loc);
+            continue;
+        }
+        numaAddNumber(naw, w);
+        numaAddNumber(nah, h);
+    }
+    LEPT_FREE(data);
+    l_dnaDestroy(&dna);
+
+    if (pmedw) {
+        numaGetMedian(naw, &fval);
+        *pmedw = lept_roundftoi(fval);
+        if (*pmedw > 850) lept_stderr("oversize width: %d\n", *pmedw);
+    }
+    if (pnaw)
+        *pnaw = naw;
+    else
+        numaDestroy(&naw);
+    if (pmedh) {
+        numaGetMedian(nah, &fval);
+        *pmedh = lept_roundftoi(fval);
+        if (*pmedh > 850) lept_stderr("oversize height: %d\n", *pmedh);
+    }
+    if (pnah)
+        *pnah = nah;
+    else
+        numaDestroy(&nah);
+    return 0;
+}
+
+
+/*---------------------------------------------------------------------*
+ *       Find effective resolution of images rendered from a pdf       *
+ *---------------------------------------------------------------------*/
+/*!
+ * \brief   getPdfRendererResolution()
+ *
+ * \param[in]    infile       filename of input pdf file
+ * \param[in]    outdir       directory of rendered output images
+ * \param[out]   pres         desired resolution to use with renderer
+ * \return  0 if OK, 1 on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) Finds the input resolution to pdftoppm that will generate
+ *          images with a maximum dimension of about 3300 pixels,
+ *          representing a full page at 300 ppi.
+ *      (2) It is most important is to make sure the renderer does
+ *          not make huge images because of an error in /MediaBox.
+ *          An image dimension of 11 inches corresponds to a MediaBox
+ *          parameter of 792.  We consider a value > 850 to be oversized
+ *          and not to be taken literally.  If the mediaboxes are
+ *          oversized, choose an appropriate lower resolution.
+ *      (3) If the mediaboxes are not accessible, render an image at
+ *          a low known resolution (say, 72 ppi) and based on the image
+ *          size, determine the resolution necessary to make an image
+ *          with 3300 pixels in the largest dimension.
+ *      (4) Requires pdftoppm, so this is disabled on windows for now.
+ *      (5) Requires the ability to call an external program, so it is
+ *          necessary to call setLeptDebugOK(1) before this function.
+ * </pre>
+ */
+l_ok
+getPdfRendererResolution(const char  *infile,
+                         const char  *outdir,
+                         l_int32     *pres)
+{
+char      buf[256];
+char     *tail, *basename, *fname;
+l_int32   ret, res, medw, medh, medmax, npages, pageno, w, h;
+SARRAY   *sa;
+
+    if (!pres)
+        return ERROR_INT("&res not defined", __func__, 1);
+    *pres = 300;  /* default */
+
+#ifdef _WIN32
+    L_INFO("Requires pdftoppm, so this is disabled on windows.\n"
+           "Returns default resolution 300 ppi", __func__);
+    return 0;
+#endif  /* _WIN32 */
+
+    if (!LeptDebugOK) {
+        L_INFO("Running pdftoppm is disabled; "
+               "use setLeptDebugOK(1) to enable\n"
+               "returns default resolution 300 ppi\n", __func__);
+        return 1;
+    }
+
+    if (!infile)
+        return ERROR_INT("infile not defined", __func__, 1);
+    if (!outdir)
+        return ERROR_INT("outdir not defined", __func__, 1);
+
+    res = 300;  /* default value */
+    ret = getPdfMediaBoxSizes(infile, NULL, NULL, &medw, &medh);
+    if (ret == 0) {  /* Check for oversize mediaboxes */
+        lept_stderr("Media Box medians: medw = %d, medh = %d\n", medw, medh);
+        medmax = L_MAX(medw, medh);
+        if (medmax > 850) {
+            res = 300 * ((l_float32)792 / (l_float32)medmax);
+            lept_stderr(" Oversize media box; use resolution = %d\n", res);
+            *pres = res;
+        }
+        return 0;
+    }
+
+        /* No mediaboxes; render one page and measure the max dimension */
+    lept_stderr("Media Box dimensions not found\n");
+    getPdfPageCount(infile, &npages);
+    pageno = (npages > 0) ? (npages + 1) / 2 : 1;
+    splitPathAtDirectory(infile, NULL, &tail);
+    splitPathAtExtension(tail, &basename, NULL);
+    snprintf(buf, sizeof(buf), "pdftoppm -f %d -l %d -r 72 %s %s/%s",
+             pageno, pageno, infile, outdir, basename);
+    LEPT_FREE(tail);
+    LEPT_FREE(basename);
+    callSystemDebug(buf);  /* pdftoppm */
+
+        /* Get the page size */
+    sa = getSortedPathnamesInDirectory(outdir, NULL, 0, 0);
+    fname = sarrayGetString(sa, 0, L_NOCOPY);
+    pixReadHeader(fname, NULL, &w, &h, NULL, NULL, NULL);
+    sarrayDestroy(&sa);
+    if (w > 0 && h > 0) {
+        res = L_MIN((72 * 3300 / L_MAX(w, h)), 600);
+        *pres = res;
+        lept_stderr("Use resolution = %d\n", res);
+    } else {
+        L_ERROR("page size not found; assuming res = 300\n", __func__);
+    }
+
+    return 0;
+}
+
+
+/*---------------------------------------------------------------------*
+ *                      Set flags for special modes                    *
+ *---------------------------------------------------------------------*/
+/*!
+ * \brief   l_pdfSetG4ImageMask()
+ *
+ * \param[in]    flag    1 for writing g4 data as fg only through a mask;
+ *                       0 for writing fg and bg
+ * \return  void
+ *
+ * <pre>
+ * Notes:
+ *      (1) The default is for writing only the fg (through the mask).
+ *          That way when you write a 1 bpp image, the bg is transparent,
+ *          so any previously written image remains visible behind it.
+ * </pre>
+ */
+void
+l_pdfSetG4ImageMask(l_int32  flag)
+{
+    var_WRITE_G4_IMAGE_MASK = flag;
+}
+
+
+/*!
+ * \brief   l_pdfSetDateAndVersion()
+ *
+ * \param[in]    flag    1 for writing date/time and leptonica version;
+ *                       0 for omitting this from the metadata
+ * \return  void
+ *
+ * <pre>
+ * Notes:
+ *      (1) The default is for writing this data.  For regression tests
+ *          that compare output against golden files, it is useful to omit.
+ * </pre>
+ */
+void
+l_pdfSetDateAndVersion(l_int32  flag)
+{
+    var_WRITE_DATE_AND_VERSION = flag;
+}
+
+/* --------------------------------------------*/
+#endif  /* USE_PDFIO */
+/* --------------------------------------------*/