Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/leptonica/src/pdfio1.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /*====================================================================* | |
| 2 - Copyright (C) 2001 Leptonica. All rights reserved. | |
| 3 - | |
| 4 - Redistribution and use in source and binary forms, with or without | |
| 5 - modification, are permitted provided that the following conditions | |
| 6 - are met: | |
| 7 - 1. Redistributions of source code must retain the above copyright | |
| 8 - notice, this list of conditions and the following disclaimer. | |
| 9 - 2. Redistributions in binary form must reproduce the above | |
| 10 - copyright notice, this list of conditions and the following | |
| 11 - disclaimer in the documentation and/or other materials | |
| 12 - provided with the distribution. | |
| 13 - | |
| 14 - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
| 15 - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
| 16 - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
| 17 - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY | |
| 18 - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | |
| 19 - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | |
| 20 - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | |
| 21 - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | |
| 22 - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | |
| 23 - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | |
| 24 - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 25 *====================================================================*/ | |
| 26 | |
| 27 /*! | |
| 28 * \file pdfio1.c | |
| 29 * <pre> | |
| 30 * | |
| 31 * Higher-level operations for generating pdf from images. | |
| 32 * Use poppler's pdftoppm or pdfimages to invert the process, | |
| 33 * extracting raster images from pdf. | |
| 34 * | |
| 35 * |=============================================================| | |
| 36 * | Important notes | | |
| 37 * |=============================================================| | |
| 38 * | Some of these functions require I/O libraries such as | | |
| 39 * | libtiff, libjpeg, libpng, libz and libopenjp2. If you do | | |
| 40 * | not have these libraries, some calls will fail. For | | |
| 41 * | example, if you do not have libopenjp2, you cannot write a | | |
| 42 * | pdf where transcoding is required to incorporate a | | |
| 43 * | jp2k image. | | |
| 44 * | | | |
| 45 * | You can manually deactivate all pdf writing by setting | | |
| 46 * | this in environ.h: | | |
| 47 * | \code | | |
| 48 * | #define USE_PDFIO 0 | | |
| 49 * | \endcode | | |
| 50 * | This will link the stub file pdfiostub.c. | | |
| 51 * |=============================================================| | |
| 52 * | |
| 53 * Set 1. These functions convert a set of image files | |
| 54 * to a multi-page pdf file, with one image on each page. | |
| 55 * All images are rendered at the same (input) resolution. | |
| 56 * The images can be specified as being in a directory, or they | |
| 57 * can be in an sarray. The output pdf can be either a file | |
| 58 * or an array of bytes in memory. | |
| 59 * | |
| 60 * Set 2. These functions are a special case of set 1, where | |
| 61 * no scaling or change in quality is required. For jpeg, jp2k and | |
| 62 * tiffg4 images, the bytes in each file can be directly incorporated | |
| 63 * into the output pdf, and the wrapping up of multiple image | |
| 64 * files is very fast. For non-interlaced png, the data bytes | |
| 65 * including the predictors can also be written directly into the | |
| 66 * flate pdf data. For other image formats transcoding is required, | |
| 67 * where the image data is first decompressed and then flate (gzip), | |
| 68 * DCT (jpeg) or tiffg4 (1 bpp) encodings are generated. | |
| 69 * | |
| 70 * Set 3. These functions convert a set of images in memory | |
| 71 * to a multi-page pdf, with one image on each page. The pdf | |
| 72 * output can be either a file or an array of bytes in memory. | |
| 73 * | |
| 74 * Set 4. These functions implement a pdf output "device driver" | |
| 75 * for wrapping (encoding) any number of images on a single page | |
| 76 * in pdf. The input can be either an image file or a Pix; | |
| 77 * the pdf output can be either a file or an array of bytes in memory. | |
| 78 * | |
| 79 * Set 5. These "segmented" functions take a set of image | |
| 80 * files, along with optional segmentation information, and | |
| 81 * generate a multi-page pdf file, where each page consists | |
| 82 * in general of a mixed raster pdf of image and non-image regions. | |
| 83 * The segmentation information for each page can be input as | |
| 84 * either a mask over the image parts, or as a Boxa of those | |
| 85 * regions. | |
| 86 * | |
| 87 * Set 6. These "segmented" functions convert an image and | |
| 88 * an optional Boxa of image regions into a mixed raster pdf file | |
| 89 * for the page. The input image can be either a file or a Pix. | |
| 90 * | |
| 91 * Set 7. These functions take a set of single-page pdf files | |
| 92 * and concatenates it into a multi-page pdf. The input can be | |
| 93 * a set of either single page pdf files or pdf 'strings' in memory. | |
| 94 * The output can be either a file or an array of bytes in memory. | |
| 95 * | |
| 96 * The images in the pdf file can be rendered using a pdf viewer, | |
| 97 * such as evince, gv, xpdf or acroread. | |
| 98 * | |
| 99 * Reference on the pdf file format: | |
| 100 * http://www.adobe.com/devnet/pdf/pdf_reference_archive.html | |
| 101 * | |
| 102 * 1. Convert specified image files to pdf (one image file per page) | |
| 103 * l_int32 convertFilesToPdf() | |
| 104 * l_int32 saConvertFilesToPdf() | |
| 105 * l_int32 saConvertFilesToPdfData() | |
| 106 * l_int32 selectDefaultPdfEncoding() | |
| 107 * | |
| 108 * 2. Convert specified image files to pdf without scaling | |
| 109 * l_int32 convertUnscaledFilesToPdf() | |
| 110 * l_int32 saConvertUnscaledFilesToPdf() | |
| 111 * l_int32 saConvertUnscaledFilesToPdfData() | |
| 112 * l_int32 convertUnscaledToPdfData() | |
| 113 * | |
| 114 * 3. Convert multiple images to pdf (one image per page) | |
| 115 * l_int32 pixaConvertToPdf() | |
| 116 * l_int32 pixaConvertToPdfData() | |
| 117 * | |
| 118 * 4. Single page, multi-image converters | |
| 119 * l_int32 convertToPdf() | |
| 120 * l_int32 convertImageDataToPdf() | |
| 121 * l_int32 convertToPdfData() | |
| 122 * l_int32 convertImageDataToPdfData() | |
| 123 * l_int32 pixConvertToPdf() | |
| 124 * l_int32 pixWriteStreamPdf() | |
| 125 * l_int32 pixWriteMemPdf() | |
| 126 * | |
| 127 * 5. Segmented multi-page, multi-image converter | |
| 128 * l_int32 convertSegmentedFilesToPdf() | |
| 129 * BOXAA *convertNumberedMasksToBoxaa() | |
| 130 * | |
| 131 * 6. Segmented single page, multi-image converters | |
| 132 * l_int32 convertToPdfSegmented() | |
| 133 * l_int32 pixConvertToPdfSegmented() | |
| 134 * l_int32 convertToPdfDataSegmented() | |
| 135 * l_int32 pixConvertToPdfDataSegmented() | |
| 136 * | |
| 137 * 7. Multipage concatenation | |
| 138 * l_int32 concatenatePdf() | |
| 139 * l_int32 saConcatenatePdf() | |
| 140 * l_int32 ptraConcatenatePdf() | |
| 141 * l_int32 concatenatePdfToData() | |
| 142 * l_int32 saConcatenatePdfToData() | |
| 143 * | |
| 144 * The top-level multi-image functions can be visualized as follows: | |
| 145 * Output pdf data to file: | |
| 146 * convertToPdf() and convertImageDataToPdf() | |
| 147 * --> pixConvertToPdf() | |
| 148 * --> pixConvertToPdfData() | |
| 149 * | |
| 150 * Output pdf data to array in memory: | |
| 151 * convertToPdfData() and convertImageDataToPdfData() | |
| 152 * --> pixConvertToPdfData() | |
| 153 * | |
| 154 * The top-level segmented image functions can be visualized as follows: | |
| 155 * Output pdf data to file: | |
| 156 * convertToPdfSegmented() | |
| 157 * --> pixConvertToPdfSegmented() | |
| 158 * --> pixConvertToPdfDataSegmented() | |
| 159 * | |
| 160 * Output pdf data to array in memory: | |
| 161 * convertToPdfDataSegmented() | |
| 162 * --> pixConvertToPdfDataSegmented() | |
| 163 * | |
| 164 * For multi-page concatenation, there are three different types of input | |
| 165 * (1) directory and optional filename filter | |
| 166 * (2) sarray of filenames | |
| 167 * (3) ptra of byte arrays of pdf data | |
| 168 * and two types of output for the concatenated pdf data | |
| 169 * (1) filename | |
| 170 * (2) data array and size | |
| 171 * High-level interfaces are given for each of the six combinations. | |
| 172 * | |
| 173 * Note: When wrapping small images into pdf, it is useful to give | |
| 174 * them a relatively low resolution value, to avoid rounding errors | |
| 175 * when rendering the images. For example, if you want an image | |
| 176 * of width w pixels to be 5 inches wide on a screen, choose a | |
| 177 * resolution w/5. | |
| 178 * | |
| 179 * The very fast functions in section (2) require neither transcoding | |
| 180 * nor parsing of the compressed jpeg file. With three types of image | |
| 181 * compression, the compressed strings can be incorporated into | |
| 182 * the pdf data without decompression and re-encoding: jpeg, jp2k | |
| 183 * and png. The DCTDecode and JPXDecode filters can handle the | |
| 184 * entire jpeg and jp2k encoded string as a byte array in the pdf file. | |
| 185 * The FlateDecode filter can handle the png compressed image data, | |
| 186 * including predictors that occur as the first byte in each | |
| 187 * raster line, but it is necessary to store only the png IDAT chunk | |
| 188 * data in the pdf array. The alternative for wrapping png images | |
| 189 * is to transcode them: uncompress into a raster (a pix) and then | |
| 190 * gzip the raster data. This typically results in a larger pdf file | |
| 191 * because it doesn't use the two-dimensional png predictor. | |
| 192 * Colormaps, which are found in png PLTE chunks, must always be | |
| 193 * pulled out and included separately in the pdf. For CCITT-G4 | |
| 194 * compression, you can not simply include a tiff G4 file -- you must | |
| 195 * either parse it and extract the G4 compressed data within it, | |
| 196 * or uncompress to a raster and G4 compress again. | |
| 197 * </pre> | |
| 198 */ | |
| 199 | |
| 200 #ifdef HAVE_CONFIG_H | |
| 201 #include <config_auto.h> | |
| 202 #endif /* HAVE_CONFIG_H */ | |
| 203 | |
| 204 #include <string.h> | |
| 205 #include <math.h> | |
| 206 #include "allheaders.h" | |
| 207 | |
| 208 /* --------------------------------------------*/ | |
| 209 #if USE_PDFIO /* defined in environ.h */ | |
| 210 /* --------------------------------------------*/ | |
| 211 | |
| 212 /* Typical scan resolution in ppi (pixels/inch) */ | |
| 213 static const l_int32 DefaultInputRes = 300; | |
| 214 | |
| 215 /*---------------------------------------------------------------------* | |
| 216 * Convert specified image files to pdf (one image file per page) * | |
| 217 *---------------------------------------------------------------------*/ | |
| 218 /*! | |
| 219 * \brief convertFilesToPdf() | |
| 220 * | |
| 221 * \param[in] dirname directory name containing images | |
| 222 * \param[in] substr [optional] substring filter on filenames; | |
| 223 * can be null | |
| 224 * \param[in] res input resolution of all images | |
| 225 * \param[in] scalefactor scaling factor applied to each image; > 0.0 | |
| 226 * \param[in] type encoding type (L_JPEG_ENCODE, L_G4_ENCODE, | |
| 227 * L_FLATE_ENCODE, L_JP2K_ENCODE or | |
| 228 * L_DEFAULT_ENCODE for default) | |
| 229 * \param[in] quality for jpeg: 1-100; 0 for default (75) | |
| 230 * for jp2k: 27-45; 0 for default (34) | |
| 231 * \param[in] title [optional] pdf title; can be null | |
| 232 * \param[in] fileout pdf file of all images | |
| 233 * \return 0 if OK, 1 on error | |
| 234 * | |
| 235 * <pre> | |
| 236 * Notes: | |
| 237 * (1) If %substr is not NULL, only image filenames that contain | |
| 238 * the substring can be used. If %substr == NULL, all files | |
| 239 * in the directory are used. | |
| 240 * (2) The files in the directory, after optional filtering by | |
| 241 * the substring, are lexically sorted in increasing order | |
| 242 * before concatenation. | |
| 243 * (3) The scalefactor is applied to each image before encoding. | |
| 244 * If you enter a value <= 0.0, it will be set to 1.0. | |
| 245 * (4) Specifying one of the four encoding types for %type forces | |
| 246 * all images to be compressed with that type. Use 0 to have | |
| 247 * the type determined for each image based on depth and whether | |
| 248 * or not it has a colormap. | |
| 249 * </pre> | |
| 250 */ | |
| 251 l_ok | |
| 252 convertFilesToPdf(const char *dirname, | |
| 253 const char *substr, | |
| 254 l_int32 res, | |
| 255 l_float32 scalefactor, | |
| 256 l_int32 type, | |
| 257 l_int32 quality, | |
| 258 const char *title, | |
| 259 const char *fileout) | |
| 260 { | |
| 261 l_int32 ret; | |
| 262 SARRAY *sa; | |
| 263 | |
| 264 if (!dirname) | |
| 265 return ERROR_INT("dirname not defined", __func__, 1); | |
| 266 if (!fileout) | |
| 267 return ERROR_INT("fileout not defined", __func__, 1); | |
| 268 | |
| 269 if ((sa = getSortedPathnamesInDirectory(dirname, substr, 0, 0)) == NULL) | |
| 270 return ERROR_INT("sa not made", __func__, 1); | |
| 271 ret = saConvertFilesToPdf(sa, res, scalefactor, type, quality, | |
| 272 title, fileout); | |
| 273 sarrayDestroy(&sa); | |
| 274 return ret; | |
| 275 } | |
| 276 | |
| 277 | |
| 278 /*! | |
| 279 * \brief saConvertFilesToPdf() | |
| 280 * | |
| 281 * \param[in] sa string array of pathnames for images | |
| 282 * \param[in] res input resolution of all images | |
| 283 * \param[in] scalefactor scaling factor applied to each image; > 0.0 | |
| 284 * \param[in] type encoding type (L_JPEG_ENCODE, L_G4_ENCODE, | |
| 285 * L_FLATE_ENCODE, L_JP2K_ENCODE or | |
| 286 * L_DEFAULT_ENCODE for default) | |
| 287 * \param[in] quality for jpeg: 1-100; 0 for default (75) | |
| 288 * for jp2k: 27-45; 0 for default (34) | |
| 289 * \param[in] title [optional] pdf title; can be null | |
| 290 * \param[in] fileout pdf file of all images | |
| 291 * \return 0 if OK, 1 on error | |
| 292 * | |
| 293 * <pre> | |
| 294 * Notes: | |
| 295 * (1) See convertFilesToPdf(). | |
| 296 * </pre> | |
| 297 */ | |
| 298 l_ok | |
| 299 saConvertFilesToPdf(SARRAY *sa, | |
| 300 l_int32 res, | |
| 301 l_float32 scalefactor, | |
| 302 l_int32 type, | |
| 303 l_int32 quality, | |
| 304 const char *title, | |
| 305 const char *fileout) | |
| 306 { | |
| 307 l_uint8 *data; | |
| 308 l_int32 ret; | |
| 309 size_t nbytes; | |
| 310 | |
| 311 if (!sa) | |
| 312 return ERROR_INT("sa not defined", __func__, 1); | |
| 313 | |
| 314 ret = saConvertFilesToPdfData(sa, res, scalefactor, type, quality, | |
| 315 title, &data, &nbytes); | |
| 316 if (ret) { | |
| 317 if (data) LEPT_FREE(data); | |
| 318 return ERROR_INT("pdf data not made", __func__, 1); | |
| 319 } | |
| 320 | |
| 321 ret = l_binaryWrite(fileout, "w", data, nbytes); | |
| 322 LEPT_FREE(data); | |
| 323 if (ret) | |
| 324 L_ERROR("pdf data not written to file\n", __func__); | |
| 325 return ret; | |
| 326 } | |
| 327 | |
| 328 | |
| 329 /*! | |
| 330 * \brief saConvertFilesToPdfData() | |
| 331 * | |
| 332 * \param[in] sa string array of pathnames for images | |
| 333 * \param[in] res input resolution of all images | |
| 334 * \param[in] scalefactor scaling factor applied to each image; > 0.0 | |
| 335 * \param[in] type encoding type (L_JPEG_ENCODE, L_G4_ENCODE, | |
| 336 * L_FLATE_ENCODE, L_JP2K_ENCODE or | |
| 337 * L_DEFAULT_ENCODE for default) | |
| 338 * \param[in] quality for jpeg: 1-100; 0 for default (75) | |
| 339 * for jp2k: 27-45; 0 for default (34) | |
| 340 * \param[in] title [optional] pdf title; can be null | |
| 341 * \param[out] pdata output pdf data (of all images | |
| 342 * \param[out] pnbytes size of output pdf data | |
| 343 * \return 0 if OK, 1 on error | |
| 344 * | |
| 345 * <pre> | |
| 346 * Notes: | |
| 347 * (1) See convertFilesToPdf(). | |
| 348 * </pre> | |
| 349 */ | |
| 350 l_ok | |
| 351 saConvertFilesToPdfData(SARRAY *sa, | |
| 352 l_int32 res, | |
| 353 l_float32 scalefactor, | |
| 354 l_int32 type, | |
| 355 l_int32 quality, | |
| 356 const char *title, | |
| 357 l_uint8 **pdata, | |
| 358 size_t *pnbytes) | |
| 359 { | |
| 360 char *fname; | |
| 361 l_uint8 *imdata; | |
| 362 l_int32 i, n, ret, pagetype, npages, scaledres; | |
| 363 size_t imbytes; | |
| 364 L_BYTEA *ba; | |
| 365 PIX *pixs, *pix; | |
| 366 L_PTRA *pa_data; | |
| 367 | |
| 368 if (!pdata) | |
| 369 return ERROR_INT("&data not defined", __func__, 1); | |
| 370 *pdata = NULL; | |
| 371 if (!pnbytes) | |
| 372 return ERROR_INT("&nbytes not defined", __func__, 1); | |
| 373 *pnbytes = 0; | |
| 374 if (!sa) | |
| 375 return ERROR_INT("sa not defined", __func__, 1); | |
| 376 if (scalefactor <= 0.0) scalefactor = 1.0; | |
| 377 if (type != L_JPEG_ENCODE && type != L_G4_ENCODE && | |
| 378 type != L_FLATE_ENCODE && type != L_JP2K_ENCODE) { | |
| 379 type = L_DEFAULT_ENCODE; | |
| 380 } | |
| 381 | |
| 382 /* Generate all the encoded pdf strings */ | |
| 383 n = sarrayGetCount(sa); | |
| 384 pa_data = ptraCreate(n); | |
| 385 for (i = 0; i < n; i++) { | |
| 386 if (i && (i % 10 == 0)) lept_stderr(".. %d ", i); | |
| 387 fname = sarrayGetString(sa, i, L_NOCOPY); | |
| 388 if ((pixs = pixRead(fname)) == NULL) { | |
| 389 L_ERROR("image not readable from file %s\n", __func__, fname); | |
| 390 continue; | |
| 391 } | |
| 392 if (scalefactor != 1.0) | |
| 393 pix = pixScale(pixs, scalefactor, scalefactor); | |
| 394 else | |
| 395 pix = pixClone(pixs); | |
| 396 pixDestroy(&pixs); | |
| 397 scaledres = (l_int32)(res * scalefactor); | |
| 398 | |
| 399 /* Select the encoding type */ | |
| 400 if (type != L_DEFAULT_ENCODE) { | |
| 401 pagetype = type; | |
| 402 } else if (selectDefaultPdfEncoding(pix, &pagetype) != 0) { | |
| 403 pixDestroy(&pix); | |
| 404 L_ERROR("encoding type selection failed for file %s\n", | |
| 405 __func__, fname); | |
| 406 continue; | |
| 407 } | |
| 408 | |
| 409 ret = pixConvertToPdfData(pix, pagetype, quality, &imdata, &imbytes, | |
| 410 0, 0, scaledres, title, NULL, 0); | |
| 411 pixDestroy(&pix); | |
| 412 if (ret) { | |
| 413 LEPT_FREE(imdata); | |
| 414 L_ERROR("pdf encoding failed for %s\n", __func__, fname); | |
| 415 continue; | |
| 416 } | |
| 417 ba = l_byteaInitFromMem(imdata, imbytes); | |
| 418 LEPT_FREE(imdata); | |
| 419 ptraAdd(pa_data, ba); | |
| 420 } | |
| 421 ptraGetActualCount(pa_data, &npages); | |
| 422 if (npages == 0) { | |
| 423 L_ERROR("no pdf files made\n", __func__); | |
| 424 ptraDestroy(&pa_data, FALSE, FALSE); | |
| 425 return 1; | |
| 426 } | |
| 427 | |
| 428 /* Concatenate them */ | |
| 429 lept_stderr("\nconcatenating ... "); | |
| 430 ret = ptraConcatenatePdfToData(pa_data, NULL, pdata, pnbytes); | |
| 431 lept_stderr("done\n"); | |
| 432 | |
| 433 ptraGetActualCount(pa_data, &npages); /* recalculate in case it changes */ | |
| 434 for (i = 0; i < npages; i++) { | |
| 435 ba = (L_BYTEA *)ptraRemove(pa_data, i, L_NO_COMPACTION); | |
| 436 l_byteaDestroy(&ba); | |
| 437 } | |
| 438 ptraDestroy(&pa_data, FALSE, FALSE); | |
| 439 return ret; | |
| 440 } | |
| 441 | |
| 442 | |
| 443 /*! | |
| 444 * \brief selectDefaultPdfEncoding() | |
| 445 * | |
| 446 * \param[in] pix | |
| 447 * \param[out] ptype L_G4_ENCODE, L_JPEG_ENCODE, L_FLATE_ENCODE | |
| 448 * \return 0 if OK, 1 on error | |
| 449 * | |
| 450 * <pre> | |
| 451 * Notes: | |
| 452 * (1) This attempts to choose an encoding for the pix that results | |
| 453 * in the smallest file, assuming that if jpeg encoded, it will | |
| 454 * use quality = 75. The decision is approximate, in that | |
| 455 * (a) all colormapped images will be losslessly encoded with | |
| 456 * gzip (flate), and (b) an image with less than about 20 colors | |
| 457 * is likely to be smaller if flate encoded than if encoded | |
| 458 * as a jpeg (dct). For example, an image made by pixScaleToGray3() | |
| 459 * will have 10 colors, and flate encoding will give about | |
| 460 * twice the compression as jpeg with quality = 75. | |
| 461 * (2) We could have used L_JP2K_ENCODE instead of L_JPEG_ENCODE. | |
| 462 * However, the jp2k compression is not much better than jpeg, and | |
| 463 * the jpeg library is more commonly available than the jp2k library. | |
| 464 * </pre> | |
| 465 */ | |
| 466 l_ok | |
| 467 selectDefaultPdfEncoding(PIX *pix, | |
| 468 l_int32 *ptype) | |
| 469 { | |
| 470 l_int32 w, h, d, factor, ncolors; | |
| 471 PIXCMAP *cmap; | |
| 472 | |
| 473 if (!ptype) | |
| 474 return ERROR_INT("&type not defined", __func__, 1); | |
| 475 *ptype = L_FLATE_ENCODE; /* default universal encoding */ | |
| 476 if (!pix) | |
| 477 return ERROR_INT("pix not defined", __func__, 1); | |
| 478 pixGetDimensions(pix, &w, &h, &d); | |
| 479 cmap = pixGetColormap(pix); | |
| 480 if (d == 8 && !cmap) { | |
| 481 factor = L_MAX(1, (l_int32)sqrt((l_float64)(w * h) / 20000.)); | |
| 482 pixNumColors(pix, factor, &ncolors); | |
| 483 if (ncolors < 20) | |
| 484 *ptype = L_FLATE_ENCODE; | |
| 485 else | |
| 486 *ptype = L_JPEG_ENCODE; | |
| 487 } else if (d == 1) { | |
| 488 *ptype = L_G4_ENCODE; | |
| 489 } else if (cmap || d == 2 || d == 4) { | |
| 490 *ptype = L_FLATE_ENCODE; | |
| 491 } else if (d == 8 || d == 32) { | |
| 492 *ptype = L_JPEG_ENCODE; | |
| 493 } else if (d == 16) { | |
| 494 *ptype = L_FLATE_ENCODE; | |
| 495 } else { | |
| 496 return ERROR_INT("type selection failure", __func__, 1); | |
| 497 } | |
| 498 | |
| 499 return 0; | |
| 500 } | |
| 501 | |
| 502 | |
| 503 /*---------------------------------------------------------------------* | |
| 504 * Convert specified image files to pdf without scaling * | |
| 505 *---------------------------------------------------------------------*/ | |
| 506 /*! | |
| 507 * \brief convertUnscaledFilesToPdf() | |
| 508 * | |
| 509 * \param[in] dirname directory name containing images | |
| 510 * \param[in] substr [optional] substring filter on filenames; | |
| 511 * can be null | |
| 512 * \param[in] title [optional] pdf title; can be null | |
| 513 * \param[in] fileout pdf file of all images | |
| 514 * \return 0 if OK, 1 on error | |
| 515 * | |
| 516 * <pre> | |
| 517 * Notes: | |
| 518 * (1) If %substr is not NULL, only image filenames that contain | |
| 519 * the substring can be used. If %substr == NULL, all files | |
| 520 * in the directory are used. | |
| 521 * (2) The files in the directory, after optional filtering by | |
| 522 * the substring, are lexically sorted in increasing order | |
| 523 * before concatenation. | |
| 524 * (3) This is very fast for jpeg, jp2k and some png files, | |
| 525 * because the compressed data is wrapped up and concatenated. | |
| 526 * For other types of png, the images must be read and recompressed. | |
| 527 * </pre> | |
| 528 */ | |
| 529 l_ok | |
| 530 convertUnscaledFilesToPdf(const char *dirname, | |
| 531 const char *substr, | |
| 532 const char *title, | |
| 533 const char *fileout) | |
| 534 { | |
| 535 l_int32 ret; | |
| 536 SARRAY *sa; | |
| 537 | |
| 538 if (!dirname) | |
| 539 return ERROR_INT("dirname not defined", __func__, 1); | |
| 540 if (!fileout) | |
| 541 return ERROR_INT("fileout not defined", __func__, 1); | |
| 542 | |
| 543 if ((sa = getSortedPathnamesInDirectory(dirname, substr, 0, 0)) == NULL) | |
| 544 return ERROR_INT("sa not made", __func__, 1); | |
| 545 ret = saConvertUnscaledFilesToPdf(sa, title, fileout); | |
| 546 sarrayDestroy(&sa); | |
| 547 return ret; | |
| 548 } | |
| 549 | |
| 550 | |
| 551 /*! | |
| 552 * \brief saConvertUnscaledFilesToPdf() | |
| 553 * | |
| 554 * \param[in] sa string array of pathnames for images | |
| 555 * \param[in] title [optional] pdf title; can be null | |
| 556 * \param[in] fileout pdf file of all images | |
| 557 * \return 0 if OK, 1 on error | |
| 558 * | |
| 559 * <pre> | |
| 560 * Notes: | |
| 561 * (1) See convertUnscaledFilesToPdf(). | |
| 562 * </pre> | |
| 563 */ | |
| 564 l_ok | |
| 565 saConvertUnscaledFilesToPdf(SARRAY *sa, | |
| 566 const char *title, | |
| 567 const char *fileout) | |
| 568 { | |
| 569 l_uint8 *data; | |
| 570 l_int32 ret; | |
| 571 size_t nbytes; | |
| 572 | |
| 573 if (!sa) | |
| 574 return ERROR_INT("sa not defined", __func__, 1); | |
| 575 | |
| 576 ret = saConvertUnscaledFilesToPdfData(sa, title, &data, &nbytes); | |
| 577 if (ret) { | |
| 578 if (data) LEPT_FREE(data); | |
| 579 return ERROR_INT("pdf data not made", __func__, 1); | |
| 580 } | |
| 581 | |
| 582 ret = l_binaryWrite(fileout, "w", data, nbytes); | |
| 583 LEPT_FREE(data); | |
| 584 if (ret) | |
| 585 L_ERROR("pdf data not written to file\n", __func__); | |
| 586 return ret; | |
| 587 } | |
| 588 | |
| 589 | |
| 590 /*! | |
| 591 * \brief saConvertUnscaledFilesToPdfData() | |
| 592 * | |
| 593 * \param[in] sa string array of pathnames for image files | |
| 594 * \param[in] title [optional] pdf title; can be null | |
| 595 * \param[out] pdata output pdf data (of all images) | |
| 596 * \param[out] pnbytes size of output pdf data | |
| 597 * \return 0 if OK, 1 on error | |
| 598 * | |
| 599 * <pre> | |
| 600 * Notes: | |
| 601 * (1) This is very fast for jpeg, jp2k and some png files, | |
| 602 * because the compressed data is wrapped up and concatenated. | |
| 603 * For other types of png, the images must be read and recompressed. | |
| 604 * </pre> | |
| 605 */ | |
| 606 l_ok | |
| 607 saConvertUnscaledFilesToPdfData(SARRAY *sa, | |
| 608 const char *title, | |
| 609 l_uint8 **pdata, | |
| 610 size_t *pnbytes) | |
| 611 { | |
| 612 char *fname; | |
| 613 l_uint8 *imdata; | |
| 614 l_int32 i, n, ret, npages; | |
| 615 size_t imbytes; | |
| 616 L_BYTEA *ba; | |
| 617 L_PTRA *pa_data; | |
| 618 | |
| 619 if (!pdata) | |
| 620 return ERROR_INT("&data not defined", __func__, 1); | |
| 621 *pdata = NULL; | |
| 622 if (!pnbytes) | |
| 623 return ERROR_INT("&nbytes not defined", __func__, 1); | |
| 624 *pnbytes = 0; | |
| 625 if (!sa) | |
| 626 return ERROR_INT("sa not defined", __func__, 1); | |
| 627 | |
| 628 /* Generate all the encoded pdf strings */ | |
| 629 n = sarrayGetCount(sa); | |
| 630 pa_data = ptraCreate(n); | |
| 631 for (i = 0; i < n; i++) { | |
| 632 if (i && (i % 10 == 0)) lept_stderr(".. %d ", i); | |
| 633 fname = sarrayGetString(sa, i, L_NOCOPY); | |
| 634 | |
| 635 /* Generate the pdf data */ | |
| 636 if (convertUnscaledToPdfData(fname, title, &imdata, &imbytes)) | |
| 637 continue; | |
| 638 | |
| 639 /* ... and add it to the array of single page data */ | |
| 640 ba = l_byteaInitFromMem(imdata, imbytes); | |
| 641 if (imdata) LEPT_FREE(imdata); | |
| 642 ptraAdd(pa_data, ba); | |
| 643 } | |
| 644 ptraGetActualCount(pa_data, &npages); | |
| 645 if (npages == 0) { | |
| 646 L_ERROR("no pdf files made\n", __func__); | |
| 647 ptraDestroy(&pa_data, FALSE, FALSE); | |
| 648 return 1; | |
| 649 } | |
| 650 | |
| 651 /* Concatenate to generate a multipage pdf */ | |
| 652 lept_stderr("\nconcatenating ... "); | |
| 653 ret = ptraConcatenatePdfToData(pa_data, NULL, pdata, pnbytes); | |
| 654 lept_stderr("done\n"); | |
| 655 | |
| 656 /* Clean up */ | |
| 657 ptraGetActualCount(pa_data, &npages); /* maybe failed to read some files */ | |
| 658 for (i = 0; i < npages; i++) { | |
| 659 ba = (L_BYTEA *)ptraRemove(pa_data, i, L_NO_COMPACTION); | |
| 660 l_byteaDestroy(&ba); | |
| 661 } | |
| 662 ptraDestroy(&pa_data, FALSE, FALSE); | |
| 663 return ret; | |
| 664 } | |
| 665 | |
| 666 | |
| 667 /*! | |
| 668 * \brief convertUnscaledToPdfData() | |
| 669 * | |
| 670 * \param[in] fname of image file in all formats | |
| 671 * \param[in] title [optional] pdf title; can be null | |
| 672 * \param[out] pdata output pdf data for image | |
| 673 * \param[out] pnbytes size of output pdf data | |
| 674 * \return 0 if OK, 1 on error | |
| 675 * | |
| 676 * <pre> | |
| 677 * Notes: | |
| 678 * (1) This is very fast for jpeg, jp2k and some png files, | |
| 679 * because the compressed data is wrapped up and concatenated. | |
| 680 * For other types of png, the images must be read and recompressed. | |
| 681 * </pre> | |
| 682 */ | |
| 683 l_ok | |
| 684 convertUnscaledToPdfData(const char *fname, | |
| 685 const char *title, | |
| 686 l_uint8 **pdata, | |
| 687 size_t *pnbytes) | |
| 688 { | |
| 689 l_int32 format; | |
| 690 L_COMP_DATA *cid; | |
| 691 | |
| 692 if (!pdata) | |
| 693 return ERROR_INT("&data not defined", __func__, 1); | |
| 694 *pdata = NULL; | |
| 695 if (!pnbytes) | |
| 696 return ERROR_INT("&nbytes not defined", __func__, 1); | |
| 697 *pnbytes = 0; | |
| 698 if (!fname) | |
| 699 return ERROR_INT("fname not defined", __func__, 1); | |
| 700 | |
| 701 findFileFormat(fname, &format); | |
| 702 if (format == IFF_UNKNOWN) { | |
| 703 L_WARNING("file %s format is unknown; skip\n", __func__, fname); | |
| 704 return 1; | |
| 705 } | |
| 706 if (format == IFF_PS || format == IFF_LPDF) { | |
| 707 L_WARNING("file %s format is %d; skip\n", __func__, fname, format); | |
| 708 return 1; | |
| 709 } | |
| 710 | |
| 711 /* Generate the image data required for pdf generation, always | |
| 712 * in binary (not ascii85) coding. Note that jpeg, jp2k and some | |
| 713 * png files are not transcoded. */ | |
| 714 l_generateCIDataForPdf(fname, NULL, 0, &cid); | |
| 715 if (!cid) { | |
| 716 L_ERROR("file %s format is %d; unreadable\n", __func__, fname, format); | |
| 717 return 1; | |
| 718 } | |
| 719 | |
| 720 /* Generate the pdf string for this page (image). This destroys | |
| 721 * the cid by attaching it to an lpd and destroying the lpd. */ | |
| 722 cidConvertToPdfData(cid, title, pdata, pnbytes); | |
| 723 return 0; | |
| 724 } | |
| 725 | |
| 726 | |
| 727 /*---------------------------------------------------------------------* | |
| 728 * Convert multiple images to pdf (one image per page) * | |
| 729 *---------------------------------------------------------------------*/ | |
| 730 /*! | |
| 731 * \brief pixaConvertToPdf() | |
| 732 * | |
| 733 * \param[in] pixa containing images all at the same resolution | |
| 734 * \param[in] res override the resolution of each input image, | |
| 735 * in ppi; use 0 to respect the resolution | |
| 736 * embedded in the input images | |
| 737 * \param[in] scalefactor scaling factor applied to each image; > 0.0 | |
| 738 * \param[in] type encoding type (L_JPEG_ENCODE, L_G4_ENCODE, | |
| 739 * L_FLATE_ENCODE, L_JP2K_ENCODE, or | |
| 740 * L_DEFAULT_ENCODE for default) | |
| 741 * \param[in] quality for jpeg: 1-100; 0 for default (75) | |
| 742 * for jp2k: 27-45; 0 for default (34) | |
| 743 * \param[in] title [optional] pdf title; can be null | |
| 744 * \param[in] fileout pdf file of all images | |
| 745 * \return 0 if OK, 1 on error | |
| 746 * | |
| 747 * <pre> | |
| 748 * Notes: | |
| 749 * (1) The images are encoded with G4 if 1 bpp; JPEG if 8 bpp without | |
| 750 * colormap and many colors, or 32 bpp; FLATE for anything else. | |
| 751 * (2) The scalefactor must be > 0.0; otherwise it is set to 1.0. | |
| 752 * (3) Specifying one of the three encoding types for %type forces | |
| 753 * all images to be compressed with that type. Use 0 to have | |
| 754 * the type determined for each image based on depth and whether | |
| 755 * or not it has a colormap. | |
| 756 * </pre> | |
| 757 */ | |
| 758 l_ok | |
| 759 pixaConvertToPdf(PIXA *pixa, | |
| 760 l_int32 res, | |
| 761 l_float32 scalefactor, | |
| 762 l_int32 type, | |
| 763 l_int32 quality, | |
| 764 const char *title, | |
| 765 const char *fileout) | |
| 766 { | |
| 767 l_uint8 *data; | |
| 768 l_int32 ret; | |
| 769 size_t nbytes; | |
| 770 | |
| 771 if (!pixa) | |
| 772 return ERROR_INT("pixa not defined", __func__, 1); | |
| 773 | |
| 774 ret = pixaConvertToPdfData(pixa, res, scalefactor, type, quality, | |
| 775 title, &data, &nbytes); | |
| 776 if (ret) { | |
| 777 LEPT_FREE(data); | |
| 778 return ERROR_INT("conversion to pdf failed", __func__, 1); | |
| 779 } | |
| 780 | |
| 781 ret = l_binaryWrite(fileout, "w", data, nbytes); | |
| 782 LEPT_FREE(data); | |
| 783 if (ret) | |
| 784 L_ERROR("pdf data not written to file\n", __func__); | |
| 785 return ret; | |
| 786 } | |
| 787 | |
| 788 | |
| 789 /*! | |
| 790 * \brief pixaConvertToPdfData() | |
| 791 * | |
| 792 * \param[in] pixa containing images all at the same resolution | |
| 793 * \param[in] res input resolution of all images | |
| 794 * \param[in] scalefactor scaling factor applied to each image; > 0.0; <50 | |
| 795 * \param[in] type encoding type (L_JPEG_ENCODE, L_G4_ENCODE, | |
| 796 * L_FLATE_ENCODE, L_JP2K_ENCODE, or | |
| 797 * L_DEFAULT_ENCODE for default) | |
| 798 * \param[in] quality for jpeg: 1-100; 0 for default (75) | |
| 799 * for jp2k: 27-45; 0 for default (34) | |
| 800 * \param[in] title [optional] pdf title; can be null | |
| 801 * \param[out] pdata output pdf data of all images | |
| 802 * \param[out] pnbytes size of output pdf data | |
| 803 * \return 0 if OK, 1 on error | |
| 804 * | |
| 805 * <pre> | |
| 806 * Notes: | |
| 807 * (1) See pixaConvertToPdf(). | |
| 808 * </pre> | |
| 809 */ | |
| 810 l_ok | |
| 811 pixaConvertToPdfData(PIXA *pixa, | |
| 812 l_int32 res, | |
| 813 l_float32 scalefactor, | |
| 814 l_int32 type, | |
| 815 l_int32 quality, | |
| 816 const char *title, | |
| 817 l_uint8 **pdata, | |
| 818 size_t *pnbytes) | |
| 819 { | |
| 820 l_uint8 *imdata; | |
| 821 l_int32 i, n, ret, scaledres, pagetype; | |
| 822 size_t imbytes; | |
| 823 L_BYTEA *ba; | |
| 824 PIX *pixs, *pix; | |
| 825 L_PTRA *pa_data; | |
| 826 | |
| 827 if (!pdata) | |
| 828 return ERROR_INT("&data not defined", __func__, 1); | |
| 829 *pdata = NULL; | |
| 830 if (!pnbytes) | |
| 831 return ERROR_INT("&nbytes not defined", __func__, 1); | |
| 832 *pnbytes = 0; | |
| 833 if (!pixa) | |
| 834 return ERROR_INT("pixa not defined", __func__, 1); | |
| 835 if (scalefactor <= 0.0) scalefactor = 1.0; | |
| 836 if (scalefactor >= 50.0) | |
| 837 return ERROR_INT("scalefactor too large", __func__, 1); | |
| 838 if (type != L_DEFAULT_ENCODE && type != L_JPEG_ENCODE && | |
| 839 type != L_G4_ENCODE && type != L_FLATE_ENCODE && | |
| 840 type != L_JP2K_ENCODE) { | |
| 841 L_WARNING("invalid compression type; using per-page default\n", | |
| 842 __func__); | |
| 843 type = L_DEFAULT_ENCODE; | |
| 844 } | |
| 845 if (quality < 0 || quality > 100) | |
| 846 return ERROR_INT("invalid quality", __func__, 1); | |
| 847 | |
| 848 /* Generate all the encoded pdf strings */ | |
| 849 n = pixaGetCount(pixa); | |
| 850 pa_data = ptraCreate(n); | |
| 851 for (i = 0; i < n; i++) { | |
| 852 if ((pixs = pixaGetPix(pixa, i, L_CLONE)) == NULL) { | |
| 853 L_ERROR("pixs[%d] not retrieved\n", __func__, i); | |
| 854 continue; | |
| 855 } | |
| 856 if (scalefactor != 1.0) | |
| 857 pix = pixScale(pixs, scalefactor, scalefactor); | |
| 858 else | |
| 859 pix = pixClone(pixs); | |
| 860 pixDestroy(&pixs); | |
| 861 if (!pix) { | |
| 862 L_ERROR("pix[%d] not made\n", __func__, i); | |
| 863 continue; | |
| 864 } | |
| 865 scaledres = (l_int32)(res * scalefactor); | |
| 866 | |
| 867 /* Select the encoding type */ | |
| 868 if (type != L_DEFAULT_ENCODE) { | |
| 869 pagetype = type; | |
| 870 } else if (selectDefaultPdfEncoding(pix, &pagetype) != 0) { | |
| 871 L_ERROR("encoding type selection failed for pix[%d]\n", | |
| 872 __func__, i); | |
| 873 pixDestroy(&pix); | |
| 874 continue; | |
| 875 } | |
| 876 | |
| 877 ret = pixConvertToPdfData(pix, pagetype, quality, &imdata, &imbytes, | |
| 878 0, 0, scaledres, title, NULL, 0); | |
| 879 pixDestroy(&pix); | |
| 880 if (ret) { | |
| 881 LEPT_FREE(imdata); | |
| 882 L_ERROR("pdf encoding failed for pix[%d]\n", __func__, i); | |
| 883 continue; | |
| 884 } | |
| 885 ba = l_byteaInitFromMem(imdata, imbytes); | |
| 886 LEPT_FREE(imdata); | |
| 887 ptraAdd(pa_data, ba); | |
| 888 } | |
| 889 ptraGetActualCount(pa_data, &n); | |
| 890 if (n == 0) { | |
| 891 L_ERROR("no pdf files made\n", __func__); | |
| 892 ptraDestroy(&pa_data, FALSE, FALSE); | |
| 893 return 1; | |
| 894 } | |
| 895 | |
| 896 /* Concatenate them */ | |
| 897 ret = ptraConcatenatePdfToData(pa_data, NULL, pdata, pnbytes); | |
| 898 | |
| 899 ptraGetActualCount(pa_data, &n); /* recalculate in case it changes */ | |
| 900 for (i = 0; i < n; i++) { | |
| 901 ba = (L_BYTEA *)ptraRemove(pa_data, i, L_NO_COMPACTION); | |
| 902 l_byteaDestroy(&ba); | |
| 903 } | |
| 904 ptraDestroy(&pa_data, FALSE, FALSE); | |
| 905 return ret; | |
| 906 } | |
| 907 | |
| 908 | |
| 909 /*---------------------------------------------------------------------* | |
| 910 * Single page, multi-image converters * | |
| 911 *---------------------------------------------------------------------*/ | |
| 912 /*! | |
| 913 * \brief convertToPdf() | |
| 914 * | |
| 915 * \param[in] filein input image file -- any format | |
| 916 * \param[in] type encoding type (L_JPEG_ENCODE, L_G4_ENCODE, | |
| 917 * L_FLATE_ENCODE, or L_JP2K_ENCODE) | |
| 918 * \param[in] quality for jpeg: 1-100; 0 for default (75) | |
| 919 * for jp2k: 27-45; 0 for default (34) | |
| 920 * \param[in] fileout output pdf file; only required on last | |
| 921 * image on page | |
| 922 * \param[in] x, y location of lower-left corner of image, | |
| 923 * in pixels, relative to the PostScript origin | |
| 924 * (0,0) at the lower-left corner of the page | |
| 925 * \param[in] res override the resolution of the input image, | |
| 926 * in ppi; use 0 to respect the resolution | |
| 927 * embedded in the input images | |
| 928 * \param[in] title [optional] pdf title; can be null | |
| 929 * \param[in,out] plpd ptr to lpd, which is created on the first | |
| 930 * invocation and returned until last image is | |
| 931 * processed, at which time it is destroyed | |
| 932 * \param[in] position in image sequence: L_FIRST_IMAGE, L_NEXT_IMAGE, | |
| 933 * L_LAST_IMAGE | |
| 934 * \return 0 if OK, 1 on error | |
| 935 * | |
| 936 * <pre> | |
| 937 * Notes: | |
| 938 * (1) To wrap only one image in pdf, input %plpd = NULL, and | |
| 939 * the value of %position will be ignored: | |
| 940 * convertToPdf(... type, quality, x, y, res, NULL, 0); | |
| 941 * (2) To wrap multiple images on a single pdf page, this is called | |
| 942 * once for each successive image. Do it this way: | |
| 943 * L_PDF_DATA *lpd; | |
| 944 * convertToPdf(... type, quality, x, y, res, &lpd, L_FIRST_IMAGE); | |
| 945 * convertToPdf(... type, quality, x, y, res, &lpd, L_NEXT_IMAGE); | |
| 946 * ... | |
| 947 * convertToPdf(... type, quality, x, y, res, &lpd, L_LAST_IMAGE); | |
| 948 * This will write the result to the value of %fileout specified | |
| 949 * in the first call; succeeding values of %fileout are ignored. | |
| 950 * On the last call: the pdf data bytes are computed and written | |
| 951 * to %fileout, lpd is destroyed internally, and the returned | |
| 952 * value of lpd is null. So the client has nothing to clean up. | |
| 953 * (3) (a) Set %res == 0 to respect the resolution embedded in the | |
| 954 * image file. If no resolution is embedded, it will be set | |
| 955 * to the default value. | |
| 956 * (b) Set %res to some other value to override the file resolution. | |
| 957 * (4) (a) If the input %res and the resolution of the output device | |
| 958 * are equal, the image will be "displayed" at the same size | |
| 959 * as the original. | |
| 960 * (b) If the input %res is 72, the output device will render | |
| 961 * the image at 1 pt/pixel. | |
| 962 * (c) Some possible choices for the default input pix resolution are: | |
| 963 * 72 ppi Render pix on any output device at one pt/pixel | |
| 964 * 96 ppi Windows default for generated display images | |
| 965 * 300 ppi Typical default for scanned images. | |
| 966 * We choose 300, which is sensible for rendering page images. | |
| 967 * However, images come from a variety of sources, and | |
| 968 * some are explicitly created for viewing on a display. | |
| 969 * </pre> | |
| 970 */ | |
| 971 l_ok | |
| 972 convertToPdf(const char *filein, | |
| 973 l_int32 type, | |
| 974 l_int32 quality, | |
| 975 const char *fileout, | |
| 976 l_int32 x, | |
| 977 l_int32 y, | |
| 978 l_int32 res, | |
| 979 const char *title, | |
| 980 L_PDF_DATA **plpd, | |
| 981 l_int32 position) | |
| 982 { | |
| 983 l_uint8 *data; | |
| 984 l_int32 ret; | |
| 985 size_t nbytes; | |
| 986 | |
| 987 if (!filein) | |
| 988 return ERROR_INT("filein not defined", __func__, 1); | |
| 989 if (!plpd || (position == L_LAST_IMAGE)) { | |
| 990 if (!fileout) | |
| 991 return ERROR_INT("fileout not defined", __func__, 1); | |
| 992 } | |
| 993 | |
| 994 if (convertToPdfData(filein, type, quality, &data, &nbytes, x, y, | |
| 995 res, title, plpd, position)) | |
| 996 return ERROR_INT("pdf data not made", __func__, 1); | |
| 997 | |
| 998 if (!plpd || (position == L_LAST_IMAGE)) { | |
| 999 ret = l_binaryWrite(fileout, "w", data, nbytes); | |
| 1000 LEPT_FREE(data); | |
| 1001 if (ret) | |
| 1002 return ERROR_INT("pdf data not written to file", __func__, 1); | |
| 1003 } | |
| 1004 | |
| 1005 return 0; | |
| 1006 } | |
| 1007 | |
| 1008 | |
| 1009 /*! | |
| 1010 * \brief convertImageDataToPdf() | |
| 1011 * | |
| 1012 * \param[in] imdata array of formatted image data; e.g., png, jpeg | |
| 1013 * \param[in] size size of image data | |
| 1014 * \param[in] type encoding type (L_JPEG_ENCODE, L_G4_ENCODE, | |
| 1015 * L_FLATE_ENCODE, or L_JP2K_ENCODE) | |
| 1016 * \param[in] quality for jpeg: 1-100; 0 for default (75) | |
| 1017 * for jp2k: 27-45; 0 for default (34) | |
| 1018 * \param[in] fileout output pdf file; only required on last | |
| 1019 * image on page | |
| 1020 * \param[in] x, y location of lower-left corner of image, | |
| 1021 * in pixels, relative to the PostScript origin | |
| 1022 * (0,0) at the lower-left corner of the page | |
| 1023 * \param[in] res override the resolution of the input image, | |
| 1024 * in ppi; use 0 to respect the resolution | |
| 1025 * embedded in the input images | |
| 1026 * \param[in] title [optional] pdf title; can be null | |
| 1027 * \param[in,out] plpd ptr to lpd, which is created on the first | |
| 1028 * invocation and returned until last image is | |
| 1029 * processed, at which time it is destroyed | |
| 1030 * \param[in] position in image sequence: L_FIRST_IMAGE, L_NEXT_IMAGE, | |
| 1031 * L_LAST_IMAGE | |
| 1032 * \return 0 if OK, 1 on error | |
| 1033 * | |
| 1034 * <pre> | |
| 1035 * Notes: | |
| 1036 * (1) If %res == 0 and the input resolution field is 0, | |
| 1037 * this will use DefaultInputRes. | |
| 1038 * (2) See comments in convertToPdf(). | |
| 1039 * </pre> | |
| 1040 */ | |
| 1041 l_ok | |
| 1042 convertImageDataToPdf(l_uint8 *imdata, | |
| 1043 size_t size, | |
| 1044 l_int32 type, | |
| 1045 l_int32 quality, | |
| 1046 const char *fileout, | |
| 1047 l_int32 x, | |
| 1048 l_int32 y, | |
| 1049 l_int32 res, | |
| 1050 const char *title, | |
| 1051 L_PDF_DATA **plpd, | |
| 1052 l_int32 position) | |
| 1053 { | |
| 1054 l_int32 ret; | |
| 1055 PIX *pix; | |
| 1056 | |
| 1057 if (!imdata) | |
| 1058 return ERROR_INT("image data not defined", __func__, 1); | |
| 1059 if (!plpd || (position == L_LAST_IMAGE)) { | |
| 1060 if (!fileout) | |
| 1061 return ERROR_INT("fileout not defined", __func__, 1); | |
| 1062 } | |
| 1063 | |
| 1064 if ((pix = pixReadMem(imdata, size)) == NULL) | |
| 1065 return ERROR_INT("pix not read", __func__, 1); | |
| 1066 if (type != L_JPEG_ENCODE && type != L_G4_ENCODE && | |
| 1067 type != L_FLATE_ENCODE && type != L_JP2K_ENCODE) { | |
| 1068 selectDefaultPdfEncoding(pix, &type); | |
| 1069 } | |
| 1070 ret = pixConvertToPdf(pix, type, quality, fileout, x, y, res, | |
| 1071 title, plpd, position); | |
| 1072 pixDestroy(&pix); | |
| 1073 return ret; | |
| 1074 } | |
| 1075 | |
| 1076 | |
| 1077 /*! | |
| 1078 * \brief convertToPdfData() | |
| 1079 * | |
| 1080 * \param[in] filein input image file -- any format | |
| 1081 * \param[in] type encoding type (L_JPEG_ENCODE, L_G4_ENCODE, | |
| 1082 * L_FLATE_ENCODE, or L_JP2K_ENCODE) | |
| 1083 * \param[in] quality for jpeg: 1-100; 0 for default (75) | |
| 1084 * for jp2k: 27-45; 0 for default (34) | |
| 1085 * \param[out] pdata pdf data in memory | |
| 1086 * \param[out] pnbytes number of bytes in pdf data | |
| 1087 * \param[in] x, y location of lower-left corner of image, | |
| 1088 * in pixels, relative to the PostScript origin | |
| 1089 * (0,0) at the lower-left corner of the page | |
| 1090 * \param[in] res override the resolution of the input image, | |
| 1091 * in ppi; use 0 to respect the resolution | |
| 1092 * embedded in the input images | |
| 1093 * \param[in] title [optional] pdf title; can be null | |
| 1094 * \param[in,out] plpd ptr to lpd, which is created on the first | |
| 1095 * invocation and returned until last image is | |
| 1096 * processed, at which time it is destroyed | |
| 1097 * \param[in] position in image sequence: L_FIRST_IMAGE, L_NEXT_IMAGE, | |
| 1098 * L_LAST_IMAGE | |
| 1099 * \return 0 if OK, 1 on error | |
| 1100 * | |
| 1101 * <pre> | |
| 1102 * Notes: | |
| 1103 * (1) If %res == 0 and the input resolution field is 0, | |
| 1104 * this will use DefaultInputRes. | |
| 1105 * (2) See comments in convertToPdf(). | |
| 1106 * </pre> | |
| 1107 */ | |
| 1108 l_ok | |
| 1109 convertToPdfData(const char *filein, | |
| 1110 l_int32 type, | |
| 1111 l_int32 quality, | |
| 1112 l_uint8 **pdata, | |
| 1113 size_t *pnbytes, | |
| 1114 l_int32 x, | |
| 1115 l_int32 y, | |
| 1116 l_int32 res, | |
| 1117 const char *title, | |
| 1118 L_PDF_DATA **plpd, | |
| 1119 l_int32 position) | |
| 1120 { | |
| 1121 PIX *pix; | |
| 1122 | |
| 1123 if (!pdata) | |
| 1124 return ERROR_INT("&data not defined", __func__, 1); | |
| 1125 *pdata = NULL; | |
| 1126 if (!pnbytes) | |
| 1127 return ERROR_INT("&nbytes not defined", __func__, 1); | |
| 1128 *pnbytes = 0; | |
| 1129 if (!filein) | |
| 1130 return ERROR_INT("filein not defined", __func__, 1); | |
| 1131 | |
| 1132 if ((pix = pixRead(filein)) == NULL) | |
| 1133 return ERROR_INT("pix not made", __func__, 1); | |
| 1134 | |
| 1135 pixConvertToPdfData(pix, type, quality, pdata, pnbytes, | |
| 1136 x, y, res, title, plpd, position); | |
| 1137 pixDestroy(&pix); | |
| 1138 return 0; | |
| 1139 } | |
| 1140 | |
| 1141 | |
| 1142 /*! | |
| 1143 * \brief convertImageDataToPdfData() | |
| 1144 * | |
| 1145 * \param[in] imdata array of formatted image data; e.g., png, jpeg | |
| 1146 * \param[in] size size of image data | |
| 1147 * \param[in] type encoding type (L_JPEG_ENCODE, L_G4_ENCODE, | |
| 1148 * L_FLATE_ENCODE, or L_JP2K_ENCODE) | |
| 1149 * \param[in] quality for jpeg: 1-100; 0 for default (75) | |
| 1150 * for jp2k: 27-45; 0 for default (34) | |
| 1151 * \param[out] pdata pdf data in memory | |
| 1152 * \param[out] pnbytes number of bytes in pdf data | |
| 1153 * \param[in] x, y location of lower-left corner of image, | |
| 1154 * in pixels, relative to the PostScript origin | |
| 1155 * (0,0) at the lower-left corner of the page | |
| 1156 * \param[in] res override the resolution of the input image, | |
| 1157 * in ppi; use 0 to respect the resolution | |
| 1158 * embedded in the input images | |
| 1159 * \param[in] title [optional] pdf title; can be null | |
| 1160 * \param[out] plpd ptr to lpd, which is created on the first | |
| 1161 * invocation and returned until last image is | |
| 1162 * processed, at which time it is destroyed | |
| 1163 * \param[in] position in image sequence: L_FIRST_IMAGE, L_NEXT_IMAGE, | |
| 1164 * L_LAST_IMAGE | |
| 1165 * \return 0 if OK, 1 on error | |
| 1166 * | |
| 1167 * <pre> | |
| 1168 * Notes: | |
| 1169 * (1) If %res == 0 and the input resolution field is 0, | |
| 1170 * this will use DefaultInputRes. | |
| 1171 * (2) See comments in convertToPdf(). | |
| 1172 * </pre> | |
| 1173 */ | |
| 1174 l_ok | |
| 1175 convertImageDataToPdfData(l_uint8 *imdata, | |
| 1176 size_t size, | |
| 1177 l_int32 type, | |
| 1178 l_int32 quality, | |
| 1179 l_uint8 **pdata, | |
| 1180 size_t *pnbytes, | |
| 1181 l_int32 x, | |
| 1182 l_int32 y, | |
| 1183 l_int32 res, | |
| 1184 const char *title, | |
| 1185 L_PDF_DATA **plpd, | |
| 1186 l_int32 position) | |
| 1187 { | |
| 1188 l_int32 ret; | |
| 1189 PIX *pix; | |
| 1190 | |
| 1191 if (!pdata) | |
| 1192 return ERROR_INT("&data not defined", __func__, 1); | |
| 1193 *pdata = NULL; | |
| 1194 if (!pnbytes) | |
| 1195 return ERROR_INT("&nbytes not defined", __func__, 1); | |
| 1196 *pnbytes = 0; | |
| 1197 if (!imdata) | |
| 1198 return ERROR_INT("image data not defined", __func__, 1); | |
| 1199 if (plpd) { /* part of multi-page invocation */ | |
| 1200 if (position == L_FIRST_IMAGE) | |
| 1201 *plpd = NULL; | |
| 1202 } | |
| 1203 | |
| 1204 if ((pix = pixReadMem(imdata, size)) == NULL) | |
| 1205 return ERROR_INT("pix not read", __func__, 1); | |
| 1206 if (type != L_JPEG_ENCODE && type != L_G4_ENCODE && | |
| 1207 type != L_FLATE_ENCODE && type != L_JP2K_ENCODE) { | |
| 1208 selectDefaultPdfEncoding(pix, &type); | |
| 1209 } | |
| 1210 ret = pixConvertToPdfData(pix, type, quality, pdata, pnbytes, | |
| 1211 x, y, res, title, plpd, position); | |
| 1212 pixDestroy(&pix); | |
| 1213 return ret; | |
| 1214 } | |
| 1215 | |
| 1216 | |
| 1217 /*! | |
| 1218 * \brief pixConvertToPdf() | |
| 1219 * | |
| 1220 * \param[in] pix | |
| 1221 * \param[in] type encoding type (L_JPEG_ENCODE, L_G4_ENCODE, | |
| 1222 * L_FLATE_ENCODE, L_JP2K_ENCODE) | |
| 1223 * \param[in] quality for jpeg: 1-100; 0 for default (75) | |
| 1224 * for jp2k: 27-45; 0 for default (34) | |
| 1225 * \param[in] fileout output pdf file; only required on last | |
| 1226 * image on page | |
| 1227 * \param[in] x, y location of lower-left corner of image, | |
| 1228 * in pixels, relative to the PostScript origin | |
| 1229 * (0,0) at the lower-left corner of the page | |
| 1230 * \param[in] res override the resolution of the input image, | |
| 1231 * in ppi; use 0 to respect the resolution | |
| 1232 * embedded in the input images | |
| 1233 * \param[in] title [optional] pdf title; can be null | |
| 1234 * \param[in,out] plpd ptr to lpd, which is created on the first | |
| 1235 * invocation and returned until last image is | |
| 1236 * processed, at which time it is destroyed | |
| 1237 * \param[in] position in image sequence: L_FIRST_IMAGE, L_NEXT_IMAGE, | |
| 1238 * L_LAST_IMAGE | |
| 1239 * \return 0 if OK, 1 on error | |
| 1240 * | |
| 1241 * <pre> | |
| 1242 * Notes: | |
| 1243 * (1) If %res == 0 and the input resolution field is 0, | |
| 1244 * this will use DefaultInputRes. | |
| 1245 * (2) This only writes data to fileout if it is the last | |
| 1246 * image to be written on the page. | |
| 1247 * (3) See comments in convertToPdf(). | |
| 1248 * </pre> | |
| 1249 */ | |
| 1250 l_ok | |
| 1251 pixConvertToPdf(PIX *pix, | |
| 1252 l_int32 type, | |
| 1253 l_int32 quality, | |
| 1254 const char *fileout, | |
| 1255 l_int32 x, | |
| 1256 l_int32 y, | |
| 1257 l_int32 res, | |
| 1258 const char *title, | |
| 1259 L_PDF_DATA **plpd, | |
| 1260 l_int32 position) | |
| 1261 { | |
| 1262 l_uint8 *data; | |
| 1263 l_int32 ret; | |
| 1264 size_t nbytes; | |
| 1265 | |
| 1266 if (!pix) | |
| 1267 return ERROR_INT("pix not defined", __func__, 1); | |
| 1268 if (!plpd || (position == L_LAST_IMAGE)) { | |
| 1269 if (!fileout) | |
| 1270 return ERROR_INT("fileout not defined", __func__, 1); | |
| 1271 } | |
| 1272 | |
| 1273 if (pixConvertToPdfData(pix, type, quality, &data, &nbytes, | |
| 1274 x, y, res, title, plpd, position)) { | |
| 1275 LEPT_FREE(data); | |
| 1276 return ERROR_INT("pdf data not made", __func__, 1); | |
| 1277 } | |
| 1278 | |
| 1279 if (!plpd || (position == L_LAST_IMAGE)) { | |
| 1280 ret = l_binaryWrite(fileout, "w", data, nbytes); | |
| 1281 LEPT_FREE(data); | |
| 1282 if (ret) | |
| 1283 return ERROR_INT("pdf data not written to file", __func__, 1); | |
| 1284 } | |
| 1285 return 0; | |
| 1286 } | |
| 1287 | |
| 1288 | |
| 1289 /*! | |
| 1290 * \brief pixWriteStreamPdf() | |
| 1291 * | |
| 1292 * \param[in] fp file stream opened for writing | |
| 1293 * \param[in] pix all depths, cmap OK | |
| 1294 * \param[in] res override the resolution of the input image, in ppi; | |
| 1295 * use 0 to respect the resolution embedded in the input | |
| 1296 * \param[in] title [optional] pdf title; can be null | |
| 1297 * \return 0 if OK, 1 on error | |
| 1298 * | |
| 1299 * <pre> | |
| 1300 * Notes: | |
| 1301 * (1) This is the simplest interface for writing a single image | |
| 1302 * with pdf encoding to a stream. It uses G4 encoding for 1 bpp, | |
| 1303 * JPEG encoding for 8 bpp (no cmap) and 32 bpp, and FLATE | |
| 1304 * encoding for everything else. | |
| 1305 * </pre> | |
| 1306 */ | |
| 1307 l_ok | |
| 1308 pixWriteStreamPdf(FILE *fp, | |
| 1309 PIX *pix, | |
| 1310 l_int32 res, | |
| 1311 const char *title) | |
| 1312 { | |
| 1313 l_uint8 *data; | |
| 1314 size_t nbytes, nbytes_written; | |
| 1315 | |
| 1316 if (!fp) | |
| 1317 return ERROR_INT("stream not opened", __func__, 1); | |
| 1318 if (!pix) | |
| 1319 return ERROR_INT("pix not defined", __func__, 1); | |
| 1320 | |
| 1321 if (pixWriteMemPdf(&data, &nbytes, pix, res, title) != 0) { | |
| 1322 LEPT_FREE(data); | |
| 1323 return ERROR_INT("pdf data not made", __func__, 1); | |
| 1324 } | |
| 1325 | |
| 1326 nbytes_written = fwrite(data, 1, nbytes, fp); | |
| 1327 LEPT_FREE(data); | |
| 1328 if (nbytes != nbytes_written) | |
| 1329 return ERROR_INT("failure writing pdf data to stream", __func__, 1); | |
| 1330 return 0; | |
| 1331 } | |
| 1332 | |
| 1333 | |
| 1334 /*! | |
| 1335 * \brief pixWriteMemPdf() | |
| 1336 * | |
| 1337 * \param[out] pdata pdf as byte array | |
| 1338 * \param[out] pnbytes number of bytes in pdf array | |
| 1339 * \param[in] pix all depths, cmap OK | |
| 1340 * \param[in] res override the resolution of the input image, in ppi; | |
| 1341 * use 0 to respect the res embedded in the input | |
| 1342 * \param[in] title [optional] pdf title; can be null | |
| 1343 * \return 0 if OK, 1 on error | |
| 1344 * | |
| 1345 * <pre> | |
| 1346 * Notes: | |
| 1347 * (1) This is the simplest interface for writing a single image | |
| 1348 * with pdf encoding to memory. It uses G4 encoding for 1 bpp, | |
| 1349 * and makes a guess whether to use JPEG or FLATE encoding for | |
| 1350 * everything else. | |
| 1351 * </pre> | |
| 1352 */ | |
| 1353 l_ok | |
| 1354 pixWriteMemPdf(l_uint8 **pdata, | |
| 1355 size_t *pnbytes, | |
| 1356 PIX *pix, | |
| 1357 l_int32 res, | |
| 1358 const char *title) | |
| 1359 { | |
| 1360 l_int32 ret, type; | |
| 1361 | |
| 1362 if (pdata) *pdata = NULL; | |
| 1363 if (pnbytes) *pnbytes = 0; | |
| 1364 if (!pdata || !pnbytes) | |
| 1365 return ERROR_INT("&data or &nbytes not defined", __func__, 1); | |
| 1366 if (!pix) | |
| 1367 return ERROR_INT("pix not defined", __func__, 1); | |
| 1368 | |
| 1369 selectDefaultPdfEncoding(pix, &type); | |
| 1370 ret = pixConvertToPdfData(pix, type, 75, pdata, pnbytes, | |
| 1371 0, 0, res, title, NULL, 0); | |
| 1372 if (ret) | |
| 1373 return ERROR_INT("pdf data not made", __func__, 1); | |
| 1374 return 0; | |
| 1375 } | |
| 1376 | |
| 1377 | |
| 1378 /*---------------------------------------------------------------------* | |
| 1379 * Segmented multi-page, multi-image converter * | |
| 1380 *---------------------------------------------------------------------*/ | |
| 1381 /*! | |
| 1382 * \brief convertSegmentedFilesToPdf() | |
| 1383 * | |
| 1384 * \param[in] dirname directory name containing images | |
| 1385 * \param[in] substr [optional] substring filter on filenames; | |
| 1386 * can be null | |
| 1387 * \param[in] res input resolution of all images | |
| 1388 * \param[in] type compression type for non-image regions; the | |
| 1389 * image regions are always compressed with | |
| 1390 * L_JPEG_ENCODE | |
| 1391 * \param[in] thresh used for converting gray --> 1 bpp with | |
| 1392 * L_G4_ENCODE | |
| 1393 * \param[in] baa [optional] boxaa of image regions | |
| 1394 * \param[in] quality used for JPEG only; 0 for default (75) | |
| 1395 * \param[in] scalefactor scaling factor applied to each image region | |
| 1396 * \param[in] title [optional] pdf title; can be null | |
| 1397 * \param[in] fileout pdf file of all images | |
| 1398 * \return 0 if OK, 1 on error | |
| 1399 * | |
| 1400 * <pre> | |
| 1401 * Notes: | |
| 1402 * (1) If %substr is not NULL, only image filenames that contain | |
| 1403 * the substring can be used. If %substr == NULL, all files | |
| 1404 * in the directory are used. | |
| 1405 * (2) The files in the directory, after optional filtering by | |
| 1406 * the substring, are lexically sorted in increasing order | |
| 1407 * before concatenation. | |
| 1408 * (3) The images are encoded with G4 if 1 bpp; JPEG if 8 bpp without | |
| 1409 * colormap and many colors, or 32 bpp; FLATE for anything else. | |
| 1410 * (4) The boxaa, if it exists, contains one boxa of "image regions" | |
| 1411 * for each image file. The boxa must be aligned with the | |
| 1412 * sorted set of images. | |
| 1413 * (5) The scalefactor is applied to each image region. It is | |
| 1414 * typically < 1.0, to save bytes in the final pdf, because | |
| 1415 * the resolution is often not critical in non-text regions. | |
| 1416 * (6) If the non-image regions have pixel depth > 1 and the encoding | |
| 1417 * type is G4, they are automatically scaled up by 2x and | |
| 1418 * thresholded. Otherwise, no scaling is performed on them. | |
| 1419 * (7) Note that this function can be used to generate multipage | |
| 1420 * G4 compressed pdf from any input, by using %boxaa == NULL | |
| 1421 * and %type == L_G4_ENCODE. | |
| 1422 * </pre> | |
| 1423 */ | |
| 1424 l_ok | |
| 1425 convertSegmentedFilesToPdf(const char *dirname, | |
| 1426 const char *substr, | |
| 1427 l_int32 res, | |
| 1428 l_int32 type, | |
| 1429 l_int32 thresh, | |
| 1430 BOXAA *baa, | |
| 1431 l_int32 quality, | |
| 1432 l_float32 scalefactor, | |
| 1433 const char *title, | |
| 1434 const char *fileout) | |
| 1435 { | |
| 1436 char *fname; | |
| 1437 l_uint8 *imdata, *data; | |
| 1438 l_int32 i, npages, nboxa, nboxes, ret; | |
| 1439 size_t imbytes, databytes; | |
| 1440 BOXA *boxa; | |
| 1441 L_BYTEA *ba; | |
| 1442 L_PTRA *pa_data; | |
| 1443 SARRAY *sa; | |
| 1444 | |
| 1445 if (!dirname) | |
| 1446 return ERROR_INT("dirname not defined", __func__, 1); | |
| 1447 if (!fileout) | |
| 1448 return ERROR_INT("fileout not defined", __func__, 1); | |
| 1449 | |
| 1450 if ((sa = getNumberedPathnamesInDirectory(dirname, substr, 0, 0, 10000)) | |
| 1451 == NULL) | |
| 1452 return ERROR_INT("sa not made", __func__, 1); | |
| 1453 | |
| 1454 npages = sarrayGetCount(sa); | |
| 1455 /* If necessary, extend the boxaa, which is page-aligned with | |
| 1456 * the image files, to be as large as the set of images. */ | |
| 1457 if (baa) { | |
| 1458 nboxa = boxaaGetCount(baa); | |
| 1459 if (nboxa < npages) { | |
| 1460 boxa = boxaCreate(1); | |
| 1461 boxaaExtendWithInit(baa, npages, boxa); | |
| 1462 boxaDestroy(&boxa); | |
| 1463 } | |
| 1464 } | |
| 1465 | |
| 1466 /* Generate and save all the encoded pdf strings */ | |
| 1467 pa_data = ptraCreate(npages); | |
| 1468 for (i = 0; i < npages; i++) { | |
| 1469 fname = sarrayGetString(sa, i, L_NOCOPY); | |
| 1470 if (!strcmp(fname, "")) continue; | |
| 1471 boxa = NULL; | |
| 1472 if (baa) { | |
| 1473 boxa = boxaaGetBoxa(baa, i, L_CLONE); | |
| 1474 nboxes = boxaGetCount(boxa); | |
| 1475 if (nboxes == 0) | |
| 1476 boxaDestroy(&boxa); | |
| 1477 } | |
| 1478 ret = convertToPdfDataSegmented(fname, res, type, thresh, boxa, | |
| 1479 quality, scalefactor, title, | |
| 1480 &imdata, &imbytes); | |
| 1481 boxaDestroy(&boxa); /* safe; in case nboxes > 0 */ | |
| 1482 if (ret) { | |
| 1483 L_ERROR("pdf encoding failed for %s\n", __func__, fname); | |
| 1484 continue; | |
| 1485 } | |
| 1486 ba = l_byteaInitFromMem(imdata, imbytes); | |
| 1487 if (imdata) LEPT_FREE(imdata); | |
| 1488 ptraAdd(pa_data, ba); | |
| 1489 } | |
| 1490 sarrayDestroy(&sa); | |
| 1491 | |
| 1492 ptraGetActualCount(pa_data, &npages); | |
| 1493 if (npages == 0) { | |
| 1494 L_ERROR("no pdf files made\n", __func__); | |
| 1495 ptraDestroy(&pa_data, FALSE, FALSE); | |
| 1496 return 1; | |
| 1497 } | |
| 1498 | |
| 1499 /* Concatenate */ | |
| 1500 ret = ptraConcatenatePdfToData(pa_data, NULL, &data, &databytes); | |
| 1501 | |
| 1502 /* Clean up */ | |
| 1503 ptraGetActualCount(pa_data, &npages); /* recalculate in case it changes */ | |
| 1504 for (i = 0; i < npages; i++) { | |
| 1505 ba = (L_BYTEA *)ptraRemove(pa_data, i, L_NO_COMPACTION); | |
| 1506 l_byteaDestroy(&ba); | |
| 1507 } | |
| 1508 ptraDestroy(&pa_data, FALSE, FALSE); | |
| 1509 | |
| 1510 if (ret) { | |
| 1511 if (data) LEPT_FREE(data); | |
| 1512 return ERROR_INT("pdf data not made", __func__, 1); | |
| 1513 } | |
| 1514 | |
| 1515 ret = l_binaryWrite(fileout, "w", data, databytes); | |
| 1516 LEPT_FREE(data); | |
| 1517 if (ret) | |
| 1518 L_ERROR("pdf data not written to file\n", __func__); | |
| 1519 return ret; | |
| 1520 } | |
| 1521 | |
| 1522 | |
| 1523 /*! | |
| 1524 * \brief convertNumberedMasksToBoxaa() | |
| 1525 * | |
| 1526 * \param[in] dirname directory name containing mask images | |
| 1527 * \param[in] substr [optional] substring filter on filenames; | |
| 1528 * can be null | |
| 1529 * \param[in] numpre number of characters in name before number | |
| 1530 * \param[in] numpost number of characters in name after number, | |
| 1531 * up to a dot before an extension | |
| 1532 * \return boxaa of mask regions, or NULL on error | |
| 1533 * | |
| 1534 * <pre> | |
| 1535 * Notes: | |
| 1536 * (1) This is conveniently used to generate the input boxaa | |
| 1537 * for convertSegmentedFilesToPdf(). It guarantees that the | |
| 1538 * boxa will be aligned with the page images, even if some | |
| 1539 * of the boxa are empty. | |
| 1540 * </pre> | |
| 1541 */ | |
| 1542 BOXAA * | |
| 1543 convertNumberedMasksToBoxaa(const char *dirname, | |
| 1544 const char *substr, | |
| 1545 l_int32 numpre, | |
| 1546 l_int32 numpost) | |
| 1547 { | |
| 1548 char *fname; | |
| 1549 l_int32 i, n; | |
| 1550 BOXA *boxa; | |
| 1551 BOXAA *baa; | |
| 1552 PIX *pix; | |
| 1553 SARRAY *sa; | |
| 1554 | |
| 1555 if (!dirname) | |
| 1556 return (BOXAA *)ERROR_PTR("dirname not defined", __func__, NULL); | |
| 1557 | |
| 1558 if ((sa = getNumberedPathnamesInDirectory(dirname, substr, numpre, | |
| 1559 numpost, 10000)) == NULL) | |
| 1560 return (BOXAA *)ERROR_PTR("sa not made", __func__, NULL); | |
| 1561 | |
| 1562 /* Generate and save all the encoded pdf strings */ | |
| 1563 n = sarrayGetCount(sa); | |
| 1564 baa = boxaaCreate(n); | |
| 1565 boxa = boxaCreate(1); | |
| 1566 boxaaInitFull(baa, boxa); | |
| 1567 boxaDestroy(&boxa); | |
| 1568 for (i = 0; i < n; i++) { | |
| 1569 fname = sarrayGetString(sa, i, L_NOCOPY); | |
| 1570 if (!strcmp(fname, "")) continue; | |
| 1571 if ((pix = pixRead(fname)) == NULL) { | |
| 1572 L_WARNING("invalid image on page %d\n", __func__, i); | |
| 1573 continue; | |
| 1574 } | |
| 1575 boxa = pixConnComp(pix, NULL, 8); | |
| 1576 boxaaReplaceBoxa(baa, i, boxa); | |
| 1577 pixDestroy(&pix); | |
| 1578 } | |
| 1579 | |
| 1580 sarrayDestroy(&sa); | |
| 1581 return baa; | |
| 1582 } | |
| 1583 | |
| 1584 | |
| 1585 /*---------------------------------------------------------------------* | |
| 1586 * Segmented single page, multi-image converters * | |
| 1587 *---------------------------------------------------------------------*/ | |
| 1588 /*! | |
| 1589 * \brief convertToPdfSegmented() | |
| 1590 * | |
| 1591 * \param[in] filein input image file -- any format | |
| 1592 * \param[in] res input image resolution; typ. 300 ppi; | |
| 1593 * use 0 for default | |
| 1594 * \param[in] type compression type for non-image regions; image | |
| 1595 * regions are always compressed with L_JPEG_ENCODE | |
| 1596 * \param[in] thresh for converting gray --> 1 bpp with L_G4_ENCODE | |
| 1597 * \param[in] boxa [optional] of image regions; can be null | |
| 1598 * \param[in] quality used for jpeg image regions; 0 for default | |
| 1599 * \param[in] scalefactor used for jpeg regions; must be <= 1.0 | |
| 1600 * \param[in] title [optional] pdf title; can be null | |
| 1601 * \param[in] fileout output pdf file | |
| 1602 * \return 0 if OK, 1 on error | |
| 1603 * | |
| 1604 * <pre> | |
| 1605 * Notes: | |
| 1606 * (1) If there are no image regions, set %boxa == NULL; | |
| 1607 * %quality and %scalefactor are ignored. | |
| 1608 * (2) Typically, %scalefactor is < 1.0, because the image regions | |
| 1609 * can be rendered at a lower resolution (for better compression) | |
| 1610 * than the text regions. If %scalefactor == 0, we use 1.0. | |
| 1611 * If the input image is 1 bpp and scalefactor < 1.0, we | |
| 1612 * use scaleToGray() to downsample the image regions to gray | |
| 1613 * before compressing them. | |
| 1614 * (3) If the compression type for non-image regions is L_G4_ENCODE | |
| 1615 * and bpp > 1, the image is upscaled 2x and thresholded | |
| 1616 * to 1 bpp. That is the only situation where %thresh is used. | |
| 1617 * (4) The parameter %quality is only used for image regions. | |
| 1618 * If %type == L_JPEG_ENCODE, default jpeg quality (75) is | |
| 1619 * used for the non-image regions. | |
| 1620 * (5) Processing matrix for non-image regions. | |
| 1621 * | |
| 1622 * Input G4 JPEG FLATE | |
| 1623 * ----------|--------------------------------------------------- | |
| 1624 * 1 bpp | 1x, 1 bpp 1x flate, 1 bpp 1x, 1 bpp | |
| 1625 * | | |
| 1626 * cmap | 2x, 1 bpp 1x flate, cmap 1x, cmap | |
| 1627 * | | |
| 1628 * 2,4 bpp | 2x, 1 bpp 1x flate 1x, 2,4 bpp | |
| 1629 * no cmap | 2,4 bpp | |
| 1630 * | | |
| 1631 * 8,32 bpp | 2x, 1 bpp 1x (jpeg) 1x, 8,32 bpp | |
| 1632 * no cmap | 8,32 bpp | |
| 1633 * | |
| 1634 * Summary: | |
| 1635 * (a) if G4 is requested, G4 is used, with 2x upscaling | |
| 1636 * for all cases except 1 bpp. | |
| 1637 * (b) if JPEG is requested, use flate encoding for all cases | |
| 1638 * except 8 bpp without cmap and 32 bpp (rgb). | |
| 1639 * (c) if FLATE is requested, use flate with no transformation | |
| 1640 * of the raster data. | |
| 1641 * (6) Calling options/sequence for these functions: | |
| 1642 * file --> file (convertToPdfSegmented) | |
| 1643 * pix --> file (pixConvertToPdfSegmented) | |
| 1644 * pix --> data (pixConvertToPdfDataSegmented) | |
| 1645 * file --> data (convertToPdfDataSegmented) | |
| 1646 * pix --> data (pixConvertToPdfDataSegmented) | |
| 1647 * </pre> | |
| 1648 */ | |
| 1649 l_ok | |
| 1650 convertToPdfSegmented(const char *filein, | |
| 1651 l_int32 res, | |
| 1652 l_int32 type, | |
| 1653 l_int32 thresh, | |
| 1654 BOXA *boxa, | |
| 1655 l_int32 quality, | |
| 1656 l_float32 scalefactor, | |
| 1657 const char *title, | |
| 1658 const char *fileout) | |
| 1659 { | |
| 1660 l_int32 ret; | |
| 1661 PIX *pixs; | |
| 1662 | |
| 1663 if (!filein) | |
| 1664 return ERROR_INT("filein not defined", __func__, 1); | |
| 1665 if (!fileout) | |
| 1666 return ERROR_INT("fileout not defined", __func__, 1); | |
| 1667 if (type != L_G4_ENCODE && type != L_JPEG_ENCODE && | |
| 1668 type != L_FLATE_ENCODE) | |
| 1669 return ERROR_INT("invalid conversion type", __func__, 1); | |
| 1670 if (boxa && scalefactor > 1.0) { | |
| 1671 L_WARNING("setting scalefactor to 1.0\n", __func__); | |
| 1672 scalefactor = 1.0; | |
| 1673 } | |
| 1674 | |
| 1675 if ((pixs = pixRead(filein)) == NULL) | |
| 1676 return ERROR_INT("pixs not made", __func__, 1); | |
| 1677 | |
| 1678 ret = pixConvertToPdfSegmented(pixs, res, type, thresh, boxa, quality, | |
| 1679 scalefactor, title, fileout); | |
| 1680 pixDestroy(&pixs); | |
| 1681 return ret; | |
| 1682 } | |
| 1683 | |
| 1684 | |
| 1685 /*! | |
| 1686 * \brief pixConvertToPdfSegmented() | |
| 1687 * | |
| 1688 * \param[in] pixs any depth, cmap OK | |
| 1689 * \param[in] res input image resolution; typ. 300 ppi; | |
| 1690 * use 0 for default | |
| 1691 * \param[in] type compression type for non-image regions; image | |
| 1692 * regions are always compressed with L_JPEG_ENCODE | |
| 1693 * \param[in] thresh for converting gray --> 1 bpp with L_G4_ENCODE | |
| 1694 * \param[in] boxa [optional] of image regions; can be null | |
| 1695 * \param[in] quality used for jpeg image regions; 0 for default | |
| 1696 * \param[in] scalefactor used for jpeg regions; must be <= 1.0 | |
| 1697 * \param[in] title [optional] pdf title; can be null | |
| 1698 * \param[in] fileout output pdf file | |
| 1699 * \return 0 if OK, 1 on error | |
| 1700 * | |
| 1701 * <pre> | |
| 1702 * Notes: | |
| 1703 * (1) See convertToPdfSegmented() for details. | |
| 1704 * </pre> | |
| 1705 */ | |
| 1706 l_ok | |
| 1707 pixConvertToPdfSegmented(PIX *pixs, | |
| 1708 l_int32 res, | |
| 1709 l_int32 type, | |
| 1710 l_int32 thresh, | |
| 1711 BOXA *boxa, | |
| 1712 l_int32 quality, | |
| 1713 l_float32 scalefactor, | |
| 1714 const char *title, | |
| 1715 const char *fileout) | |
| 1716 { | |
| 1717 l_uint8 *data; | |
| 1718 l_int32 ret; | |
| 1719 size_t nbytes; | |
| 1720 | |
| 1721 if (!pixs) | |
| 1722 return ERROR_INT("pixs not defined", __func__, 1); | |
| 1723 if (!fileout) | |
| 1724 return ERROR_INT("fileout not defined", __func__, 1); | |
| 1725 if (type != L_G4_ENCODE && type != L_JPEG_ENCODE && | |
| 1726 type != L_FLATE_ENCODE) | |
| 1727 return ERROR_INT("invalid conversion type", __func__, 1); | |
| 1728 if (boxa && scalefactor > 1.0) { | |
| 1729 L_WARNING("setting scalefactor to 1.0\n", __func__); | |
| 1730 scalefactor = 1.0; | |
| 1731 } | |
| 1732 | |
| 1733 ret = pixConvertToPdfDataSegmented(pixs, res, type, thresh, boxa, quality, | |
| 1734 scalefactor, title, &data, &nbytes); | |
| 1735 if (ret) | |
| 1736 return ERROR_INT("pdf generation failure", __func__, 1); | |
| 1737 | |
| 1738 ret = l_binaryWrite(fileout, "w", data, nbytes); | |
| 1739 if (data) LEPT_FREE(data); | |
| 1740 return ret; | |
| 1741 } | |
| 1742 | |
| 1743 | |
| 1744 /*! | |
| 1745 * \brief convertToPdfDataSegmented() | |
| 1746 * | |
| 1747 * \param[in] filein input image file -- any format | |
| 1748 * \param[in] res input image resolution; typ. 300 ppi; | |
| 1749 * use 0 for default | |
| 1750 * \param[in] type compression type for non-image regions; image | |
| 1751 * regions are always compressed with L_JPEG_ENCODE | |
| 1752 * \param[in] thresh for converting gray --> 1 bpp with L_G4_ENCODE | |
| 1753 * \param[in] boxa [optional] image regions; can be null | |
| 1754 * \param[in] quality used for jpeg image regions; 0 for default | |
| 1755 * \param[in] scalefactor used for jpeg regions; must be <= 1.0 | |
| 1756 * \param[in] title [optional] pdf title; can be null | |
| 1757 * \param[out] pdata pdf data in memory | |
| 1758 * \param[out] pnbytes number of bytes in pdf data | |
| 1759 * \return 0 if OK, 1 on error | |
| 1760 * | |
| 1761 * <pre> | |
| 1762 * Notes: | |
| 1763 * (1) If there are no image regions, set %boxa == NULL; | |
| 1764 * %quality and %scalefactor are ignored. | |
| 1765 * (2) Typically, %scalefactor is < 1.0. The image regions are | |
| 1766 * </pre> | |
| 1767 */ | |
| 1768 l_ok | |
| 1769 convertToPdfDataSegmented(const char *filein, | |
| 1770 l_int32 res, | |
| 1771 l_int32 type, | |
| 1772 l_int32 thresh, | |
| 1773 BOXA *boxa, | |
| 1774 l_int32 quality, | |
| 1775 l_float32 scalefactor, | |
| 1776 const char *title, | |
| 1777 l_uint8 **pdata, | |
| 1778 size_t *pnbytes) | |
| 1779 { | |
| 1780 l_int32 ret; | |
| 1781 PIX *pixs; | |
| 1782 | |
| 1783 if (!pdata) | |
| 1784 return ERROR_INT("&data not defined", __func__, 1); | |
| 1785 *pdata = NULL; | |
| 1786 if (!pnbytes) | |
| 1787 return ERROR_INT("&nbytes not defined", __func__, 1); | |
| 1788 *pnbytes = 0; | |
| 1789 if (!filein) | |
| 1790 return ERROR_INT("filein not defined", __func__, 1); | |
| 1791 if (type != L_G4_ENCODE && type != L_JPEG_ENCODE && | |
| 1792 type != L_FLATE_ENCODE) | |
| 1793 return ERROR_INT("invalid conversion type", __func__, 1); | |
| 1794 if (boxa && scalefactor > 1.0) { | |
| 1795 L_WARNING("setting scalefactor to 1.0\n", __func__); | |
| 1796 scalefactor = 1.0; | |
| 1797 } | |
| 1798 | |
| 1799 if ((pixs = pixRead(filein)) == NULL) | |
| 1800 return ERROR_INT("pixs not made", __func__, 1); | |
| 1801 | |
| 1802 ret = pixConvertToPdfDataSegmented(pixs, res, type, thresh, boxa, | |
| 1803 quality, scalefactor, title, | |
| 1804 pdata, pnbytes); | |
| 1805 pixDestroy(&pixs); | |
| 1806 return ret; | |
| 1807 } | |
| 1808 | |
| 1809 | |
| 1810 /*! | |
| 1811 * \brief pixConvertToPdfDataSegmented() | |
| 1812 * | |
| 1813 * \param[in] pixs any depth, cmap OK | |
| 1814 * \param[in] res input image resolution; typ. 300 ppi; | |
| 1815 * use 0 for default | |
| 1816 * \param[in] type compression type for non-image regions; image | |
| 1817 * regions are always compressed with L_JPEG_ENCODE | |
| 1818 * \param[in] thresh for converting gray --> 1 bpp with L_G4_ENCODE | |
| 1819 * \param[in] boxa [optional] of image regions; can be null | |
| 1820 * \param[in] quality used for jpeg image regions; 0 for default | |
| 1821 * \param[in] scalefactor used for jpeg regions; must be <= 1.0 | |
| 1822 * \param[in] title [optional] pdf title; can be null | |
| 1823 * \param[out] pdata pdf data in memory | |
| 1824 * \param[out] pnbytes number of bytes in pdf data | |
| 1825 * \return 0 if OK, 1 on error | |
| 1826 * | |
| 1827 * <pre> | |
| 1828 * Notes: | |
| 1829 * (1) See convertToPdfSegmented() for details. | |
| 1830 * </pre> | |
| 1831 */ | |
| 1832 l_ok | |
| 1833 pixConvertToPdfDataSegmented(PIX *pixs, | |
| 1834 l_int32 res, | |
| 1835 l_int32 type, | |
| 1836 l_int32 thresh, | |
| 1837 BOXA *boxa, | |
| 1838 l_int32 quality, | |
| 1839 l_float32 scalefactor, | |
| 1840 const char *title, | |
| 1841 l_uint8 **pdata, | |
| 1842 size_t *pnbytes) | |
| 1843 { | |
| 1844 l_int32 i, nbox, seq, bx, by, bw, bh, upscale; | |
| 1845 l_float32 scale; | |
| 1846 BOX *box, *boxc, *box2; | |
| 1847 PIX *pix, *pixt1, *pixt2, *pixt3, *pixt4, *pixt5, *pixt6; | |
| 1848 PIXCMAP *cmap; | |
| 1849 L_PDF_DATA *lpd; | |
| 1850 | |
| 1851 if (!pdata) | |
| 1852 return ERROR_INT("&data not defined", __func__, 1); | |
| 1853 *pdata = NULL; | |
| 1854 if (!pnbytes) | |
| 1855 return ERROR_INT("&nbytes not defined", __func__, 1); | |
| 1856 *pnbytes = 0; | |
| 1857 if (!pixs) | |
| 1858 return ERROR_INT("pixs not defined", __func__, 1); | |
| 1859 if (type != L_G4_ENCODE && type != L_JPEG_ENCODE && | |
| 1860 type != L_FLATE_ENCODE) | |
| 1861 return ERROR_INT("invalid conversion type", __func__, 1); | |
| 1862 if (boxa && (scalefactor <= 0.0 || scalefactor > 1.0)) { | |
| 1863 L_WARNING("setting scalefactor to 1.0\n", __func__); | |
| 1864 scalefactor = 1.0; | |
| 1865 } | |
| 1866 | |
| 1867 /* Adjust scalefactor so that the product with res gives an integer */ | |
| 1868 if (res <= 0) | |
| 1869 res = DefaultInputRes; | |
| 1870 scale = (l_float32)((l_int32)(scalefactor * res + 0.5)) / (l_float32)res; | |
| 1871 cmap = pixGetColormap(pixs); | |
| 1872 | |
| 1873 /* Simple case: single image to be encoded */ | |
| 1874 if (!boxa || boxaGetCount(boxa) == 0) { | |
| 1875 if (pixGetDepth(pixs) > 1 && type == L_G4_ENCODE) { | |
| 1876 if (cmap) | |
| 1877 pixt1 = pixRemoveColormap(pixs, REMOVE_CMAP_TO_GRAYSCALE); | |
| 1878 else | |
| 1879 pixt1 = pixConvertTo8(pixs, FALSE); | |
| 1880 pixt2 = pixScaleGray2xLIThresh(pixt1, thresh); | |
| 1881 pixConvertToPdfData(pixt2, type, quality, pdata, pnbytes, | |
| 1882 0, 0, 2 * res, title, NULL, 0); | |
| 1883 pixDestroy(&pixt1); | |
| 1884 pixDestroy(&pixt2); | |
| 1885 } else { | |
| 1886 pixConvertToPdfData(pixs, type, quality, pdata, pnbytes, | |
| 1887 0, 0, res, title, NULL, 0); | |
| 1888 } | |
| 1889 return 0; | |
| 1890 } | |
| 1891 | |
| 1892 /* Multiple images to be encoded. If %type == L_G4_ENCODE, | |
| 1893 * jpeg encode a version of pixs that is blanked in the non-image | |
| 1894 * regions, and paint the scaled non-image part onto it through a mask. | |
| 1895 * Otherwise, we must put the non-image part down first and | |
| 1896 * then render all the image regions separately on top of it, | |
| 1897 * at their own resolution. */ | |
| 1898 pixt1 = pixSetBlackOrWhiteBoxa(pixs, boxa, L_SET_WHITE); /* non-image */ | |
| 1899 nbox = boxaGetCount(boxa); | |
| 1900 if (type == L_G4_ENCODE) { | |
| 1901 pixt2 = pixCreateTemplate(pixs); /* only image regions */ | |
| 1902 pixSetBlackOrWhite(pixt2, L_SET_WHITE); | |
| 1903 for (i = 0; i < nbox; i++) { | |
| 1904 box = boxaGetBox(boxa, i, L_CLONE); | |
| 1905 pix = pixClipRectangle(pixs, box, &boxc); | |
| 1906 boxGetGeometry(boxc, &bx, &by, &bw, &bh); | |
| 1907 pixRasterop(pixt2, bx, by, bw, bh, PIX_SRC, pix, 0, 0); | |
| 1908 pixDestroy(&pix); | |
| 1909 boxDestroy(&box); | |
| 1910 boxDestroy(&boxc); | |
| 1911 } | |
| 1912 pixt3 = pixRemoveColormap(pixt2, REMOVE_CMAP_BASED_ON_SRC); | |
| 1913 if (pixGetDepth(pixt3) == 1) | |
| 1914 pixt4 = pixScaleToGray(pixt3, scale); | |
| 1915 else | |
| 1916 pixt4 = pixScale(pixt3, scale, scale); | |
| 1917 pixConvertToPdfData(pixt4, L_JPEG_ENCODE, quality, pdata, pnbytes, | |
| 1918 0, 0, (l_int32)(scale * res), title, | |
| 1919 &lpd, L_FIRST_IMAGE); | |
| 1920 | |
| 1921 if (pixGetDepth(pixt1) == 1) { | |
| 1922 pixt5 = pixClone(pixt1); | |
| 1923 upscale = 1; | |
| 1924 } else { | |
| 1925 pixt6 = pixConvertTo8(pixt1, 0); | |
| 1926 pixt5 = pixScaleGray2xLIThresh(pixt6, thresh); | |
| 1927 pixDestroy(&pixt6); | |
| 1928 upscale = 2; | |
| 1929 } | |
| 1930 pixConvertToPdfData(pixt5, L_G4_ENCODE, quality, pdata, pnbytes, | |
| 1931 0, 0, upscale * res, title, &lpd, L_LAST_IMAGE); | |
| 1932 pixDestroy(&pixt2); | |
| 1933 pixDestroy(&pixt3); | |
| 1934 pixDestroy(&pixt4); | |
| 1935 pixDestroy(&pixt5); | |
| 1936 } else { | |
| 1937 /* Put the non-image part down first. This is the full | |
| 1938 size of the page, so we can use it to find the page | |
| 1939 height in pixels, which is required for determining | |
| 1940 the LL corner of the image relative to the LL corner | |
| 1941 of the page. */ | |
| 1942 pixConvertToPdfData(pixt1, type, quality, pdata, pnbytes, 0, 0, | |
| 1943 res, title, &lpd, L_FIRST_IMAGE); | |
| 1944 for (i = 0; i < nbox; i++) { | |
| 1945 box = boxaGetBox(boxa, i, L_CLONE); | |
| 1946 pixt2 = pixClipRectangle(pixs, box, &boxc); | |
| 1947 pixt3 = pixRemoveColormap(pixt2, REMOVE_CMAP_BASED_ON_SRC); | |
| 1948 if (pixGetDepth(pixt3) == 1) | |
| 1949 pixt4 = pixScaleToGray(pixt3, scale); | |
| 1950 else | |
| 1951 pixt4 = pixScale(pixt3, scale, scale); | |
| 1952 box2 = boxTransform(boxc, 0, 0, scale, scale); | |
| 1953 boxGetGeometry(box2, &bx, &by, NULL, &bh); | |
| 1954 seq = (i == nbox - 1) ? L_LAST_IMAGE : L_NEXT_IMAGE; | |
| 1955 pixConvertToPdfData(pixt4, L_JPEG_ENCODE, quality, pdata, pnbytes, | |
| 1956 bx, by, (l_int32)(scale * res), title, | |
| 1957 &lpd, seq); | |
| 1958 pixDestroy(&pixt2); | |
| 1959 pixDestroy(&pixt3); | |
| 1960 pixDestroy(&pixt4); | |
| 1961 boxDestroy(&box); | |
| 1962 boxDestroy(&boxc); | |
| 1963 boxDestroy(&box2); | |
| 1964 } | |
| 1965 } | |
| 1966 | |
| 1967 pixDestroy(&pixt1); | |
| 1968 return 0; | |
| 1969 } | |
| 1970 | |
| 1971 | |
| 1972 /*---------------------------------------------------------------------* | |
| 1973 * Multi-page concatenation * | |
| 1974 *---------------------------------------------------------------------*/ | |
| 1975 /*! | |
| 1976 * \brief concatenatePdf() | |
| 1977 * | |
| 1978 * \param[in] dirname directory name containing single-page pdf files | |
| 1979 * \param[in] substr [optional] substring filter on filenames; | |
| 1980 * can be null | |
| 1981 * \param[in] fileout concatenated pdf file | |
| 1982 * \return 0 if OK, 1 on error | |
| 1983 * | |
| 1984 * <pre> | |
| 1985 * Notes: | |
| 1986 * (1) This only works with leptonica-formatted single-page pdf files. | |
| 1987 * (2) If %substr is not NULL, only filenames that contain | |
| 1988 * the substring can be returned. If %substr == NULL, | |
| 1989 * none of the filenames are filtered out. | |
| 1990 * (3) The files in the directory, after optional filtering by | |
| 1991 * the substring, are lexically sorted in increasing order | |
| 1992 * before concatenation. | |
| 1993 * </pre> | |
| 1994 */ | |
| 1995 l_ok | |
| 1996 concatenatePdf(const char *dirname, | |
| 1997 const char *substr, | |
| 1998 const char *fileout) | |
| 1999 { | |
| 2000 l_int32 ret; | |
| 2001 SARRAY *sa; | |
| 2002 | |
| 2003 if (!dirname) | |
| 2004 return ERROR_INT("dirname not defined", __func__, 1); | |
| 2005 if (!fileout) | |
| 2006 return ERROR_INT("fileout not defined", __func__, 1); | |
| 2007 | |
| 2008 if ((sa = getSortedPathnamesInDirectory(dirname, substr, 0, 0)) == NULL) | |
| 2009 return ERROR_INT("sa not made", __func__, 1); | |
| 2010 ret = saConcatenatePdf(sa, fileout); | |
| 2011 sarrayDestroy(&sa); | |
| 2012 return ret; | |
| 2013 } | |
| 2014 | |
| 2015 | |
| 2016 /*! | |
| 2017 * \brief saConcatenatePdf() | |
| 2018 * | |
| 2019 * \param[in] sa string array of pathnames for single-page pdf files | |
| 2020 * \param[in] fileout concatenated pdf file | |
| 2021 * \return 0 if OK, 1 on error | |
| 2022 * | |
| 2023 * <pre> | |
| 2024 * Notes: | |
| 2025 * (1) This only works with leptonica-formatted single-page pdf files. | |
| 2026 * </pre> | |
| 2027 */ | |
| 2028 l_ok | |
| 2029 saConcatenatePdf(SARRAY *sa, | |
| 2030 const char *fileout) | |
| 2031 { | |
| 2032 l_uint8 *data; | |
| 2033 l_int32 ret; | |
| 2034 size_t nbytes; | |
| 2035 | |
| 2036 if (!sa) | |
| 2037 return ERROR_INT("sa not defined", __func__, 1); | |
| 2038 if (!fileout) | |
| 2039 return ERROR_INT("fileout not defined", __func__, 1); | |
| 2040 | |
| 2041 ret = saConcatenatePdfToData(sa, &data, &nbytes); | |
| 2042 if (ret) | |
| 2043 return ERROR_INT("pdf data not made", __func__, 1); | |
| 2044 ret = l_binaryWrite(fileout, "w", data, nbytes); | |
| 2045 LEPT_FREE(data); | |
| 2046 return ret; | |
| 2047 } | |
| 2048 | |
| 2049 | |
| 2050 /*! | |
| 2051 * \brief ptraConcatenatePdf() | |
| 2052 * | |
| 2053 * \param[in] pa array of pdf strings, each for a single-page pdf file | |
| 2054 * \param[in] fileout concatenated pdf file | |
| 2055 * \return 0 if OK, 1 on error | |
| 2056 * | |
| 2057 * <pre> | |
| 2058 * Notes: | |
| 2059 * (1) This only works with leptonica-formatted single-page pdf files. | |
| 2060 * </pre> | |
| 2061 */ | |
| 2062 l_ok | |
| 2063 ptraConcatenatePdf(L_PTRA *pa, | |
| 2064 const char *fileout) | |
| 2065 { | |
| 2066 l_uint8 *data; | |
| 2067 l_int32 ret; | |
| 2068 size_t nbytes; | |
| 2069 | |
| 2070 if (!pa) | |
| 2071 return ERROR_INT("pa not defined", __func__, 1); | |
| 2072 if (!fileout) | |
| 2073 return ERROR_INT("fileout not defined", __func__, 1); | |
| 2074 | |
| 2075 ret = ptraConcatenatePdfToData(pa, NULL, &data, &nbytes); | |
| 2076 if (ret) | |
| 2077 return ERROR_INT("pdf data not made", __func__, 1); | |
| 2078 ret = l_binaryWrite(fileout, "w", data, nbytes); | |
| 2079 LEPT_FREE(data); | |
| 2080 return ret; | |
| 2081 } | |
| 2082 | |
| 2083 | |
| 2084 /*! | |
| 2085 * \brief concatenatePdfToData() | |
| 2086 * | |
| 2087 * \param[in] dirname directory name containing single-page pdf files | |
| 2088 * \param[in] substr [optional] substring filter on filenames; | |
| 2089 * can be null | |
| 2090 * \param[out] pdata concatenated pdf data in memory | |
| 2091 * \param[out] pnbytes number of bytes in pdf data | |
| 2092 * \return 0 if OK, 1 on error | |
| 2093 * | |
| 2094 * <pre> | |
| 2095 * Notes: | |
| 2096 * (1) This only works with leptonica-formatted single-page pdf files. | |
| 2097 * (2) If %substr is not NULL, only filenames that contain | |
| 2098 * the substring can be returned. If %substr == NULL, | |
| 2099 * none of the filenames are filtered out. | |
| 2100 * (3) The files in the directory, after optional filtering by | |
| 2101 * the substring, are lexically sorted in increasing order | |
| 2102 * before concatenation. | |
| 2103 * </pre> | |
| 2104 */ | |
| 2105 l_ok | |
| 2106 concatenatePdfToData(const char *dirname, | |
| 2107 const char *substr, | |
| 2108 l_uint8 **pdata, | |
| 2109 size_t *pnbytes) | |
| 2110 { | |
| 2111 l_int32 ret; | |
| 2112 SARRAY *sa; | |
| 2113 | |
| 2114 if (!pdata) | |
| 2115 return ERROR_INT("&data not defined", __func__, 1); | |
| 2116 *pdata = NULL; | |
| 2117 if (!pnbytes) | |
| 2118 return ERROR_INT("&nbytes not defined", __func__, 1); | |
| 2119 *pnbytes = 0; | |
| 2120 if (!dirname) | |
| 2121 return ERROR_INT("dirname not defined", __func__, 1); | |
| 2122 | |
| 2123 if ((sa = getSortedPathnamesInDirectory(dirname, substr, 0, 0)) == NULL) | |
| 2124 return ERROR_INT("sa not made", __func__, 1); | |
| 2125 ret = saConcatenatePdfToData(sa, pdata, pnbytes); | |
| 2126 sarrayDestroy(&sa); | |
| 2127 return ret; | |
| 2128 } | |
| 2129 | |
| 2130 | |
| 2131 /*! | |
| 2132 * \brief saConcatenatePdfToData() | |
| 2133 * | |
| 2134 * \param[in] sa string array of pathnames for single-page pdf files | |
| 2135 * \param[out] pdata concatenated pdf data in memory | |
| 2136 * \param[out] pnbytes number of bytes in pdf data | |
| 2137 * \return 0 if OK, 1 on error | |
| 2138 * | |
| 2139 * <pre> | |
| 2140 * Notes: | |
| 2141 * (1) This only works with leptonica-formatted single-page pdf files. | |
| 2142 * </pre> | |
| 2143 */ | |
| 2144 l_ok | |
| 2145 saConcatenatePdfToData(SARRAY *sa, | |
| 2146 l_uint8 **pdata, | |
| 2147 size_t *pnbytes) | |
| 2148 { | |
| 2149 char *fname; | |
| 2150 l_int32 i, npages, ret; | |
| 2151 L_BYTEA *bas; | |
| 2152 L_PTRA *pa_data; /* input pdf data for each page */ | |
| 2153 | |
| 2154 if (!pdata) | |
| 2155 return ERROR_INT("&data not defined", __func__, 1); | |
| 2156 *pdata = NULL; | |
| 2157 if (!pnbytes) | |
| 2158 return ERROR_INT("&nbytes not defined", __func__, 1); | |
| 2159 *pnbytes = 0; | |
| 2160 if (!sa) | |
| 2161 return ERROR_INT("sa not defined", __func__, 1); | |
| 2162 | |
| 2163 /* Read the pdf files into memory */ | |
| 2164 if ((npages = sarrayGetCount(sa)) == 0) | |
| 2165 return ERROR_INT("no filenames found", __func__, 1); | |
| 2166 pa_data = ptraCreate(npages); | |
| 2167 for (i = 0; i < npages; i++) { | |
| 2168 fname = sarrayGetString(sa, i, L_NOCOPY); | |
| 2169 bas = l_byteaInitFromFile(fname); | |
| 2170 ptraAdd(pa_data, bas); | |
| 2171 } | |
| 2172 | |
| 2173 ret = ptraConcatenatePdfToData(pa_data, sa, pdata, pnbytes); | |
| 2174 | |
| 2175 /* Cleanup: some pages could have been removed */ | |
| 2176 ptraGetActualCount(pa_data, &npages); | |
| 2177 for (i = 0; i < npages; i++) { | |
| 2178 bas = (L_BYTEA *)ptraRemove(pa_data, i, L_NO_COMPACTION); | |
| 2179 l_byteaDestroy(&bas); | |
| 2180 } | |
| 2181 ptraDestroy(&pa_data, FALSE, FALSE); | |
| 2182 return ret; | |
| 2183 } | |
| 2184 | |
| 2185 /* --------------------------------------------*/ | |
| 2186 #endif /* USE_PDFIO */ | |
| 2187 /* --------------------------------------------*/ |
