Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/leptonica/src/renderpdf.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /*====================================================================* | |
| 2 - Copyright (C) 2001 Leptonica. All rights reserved. | |
| 3 - | |
| 4 - Redistribution and use in source and binary forms, with or without | |
| 5 - modification, are permitted provided that the following conditions | |
| 6 - are met: | |
| 7 - 1. Redistributions of source code must retain the above copyright | |
| 8 - notice, this list of conditions and the following disclaimer. | |
| 9 - 2. Redistributions in binary form must reproduce the above | |
| 10 - copyright notice, this list of conditions and the following | |
| 11 - disclaimer in the documentation and/or other materials | |
| 12 - provided with the distribution. | |
| 13 - | |
| 14 - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
| 15 - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
| 16 - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
| 17 - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY | |
| 18 - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | |
| 19 - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | |
| 20 - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | |
| 21 - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | |
| 22 - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | |
| 23 - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | |
| 24 - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 25 *====================================================================*/ | |
| 26 | |
| 27 /*! | |
| 28 * \file renderpdf.c | |
| 29 * <pre> | |
| 30 * | |
| 31 * Rendering pdf files using an external library | |
| 32 * l_int32 l_pdfRenderFile() | |
| 33 * l_int32 l_pdfRenderFiles() | |
| 34 * | |
| 35 * Utility for rendering a set of pdf files as page images. | |
| 36 * The images are rendered for full page images at a specified | |
| 37 * resolution between 50 and 300 ppi, in the directory | |
| 38 * /tmp/lept/renderpdf/ | |
| 39 * | |
| 40 * An application like cleanpdf performs a sequence of: | |
| 41 * (1) rendering the pdfs into a set of images, | |
| 42 * (2) doing image processing on each image to generate new images, and | |
| 43 * (3) wrapping the new images up in a single pdf file. | |
| 44 * Typically, the processed images made by step (2) are stored compressed | |
| 45 * in memory in a PixaComp, before wrapping them up in step (3). | |
| 46 * | |
| 47 * This requires the Poppler package of pdf utilities, in particular | |
| 48 * the program pdftoppm. For non-unix systems, this requires | |
| 49 * installation of the cygwin Poppler package: | |
| 50 * https://cygwin.com/cgi-bin2/package-cat.cgi?file=x86/poppler/ | |
| 51 * poppler-0.26.5-1 | |
| 52 * | |
| 53 * For the rasterizer, use pdftoppm: | |
| 54 * pdftoppm -r res fname outroot ('-r res' renders output at res ppi) | |
| 55 * This works on all pdf pages, both wrapped images and pages that | |
| 56 * were made orthographically. The default output resolution for | |
| 57 * pdftoppm is 150 ppi, but we typically use 300 ppi. This makes large | |
| 58 * uncompressed RGB image files (e.g., a standard size RGB page image | |
| 59 * at 300 ppi is 25 MB), but it is very fast. | |
| 60 * | |
| 61 * The size of the resulting images does not depend on the resolution | |
| 62 * of the images stored in the input pdf. We compute the value of the | |
| 63 * resolution parameter (render_res) that when input to pdftoppm | |
| 64 * will generate a page-size image (612 x 792 pts) at the requested | |
| 65 * output resolution. | |
| 66 * | |
| 67 * We do NOT use pdfimages: | |
| 68 * pdfimages -j fname outroot (-j outputs jpeg if input is dct) | |
| 69 * pdfimages only works when all pages are pdf wrappers around images. | |
| 70 * Further, in some cases, it scrambles the order of the output pages | |
| 71 * and inserts extra images. | |
| 72 | |
| 73 * By default, this function will not run, because it makes a call | |
| 74 * to system(1). To render pdfs as a set of images in a directory, | |
| 75 * three things are required: | |
| 76 * (1) To have poppler installed. | |
| 77 * (2) To enable debug operations using setLeptDebugOK(1). | |
| 78 * (3) To link the functions that generate pdf files in the library | |
| 79 * (in pdfio1.c, pdfio2.c). | |
| 80 * </pre> | |
| 81 */ | |
| 82 | |
| 83 #ifdef HAVE_CONFIG_H | |
| 84 #include <config_auto.h> | |
| 85 #endif /* HAVE_CONFIG_H */ | |
| 86 | |
| 87 #include "allheaders.h" | |
| 88 | |
| 89 /* --------------------------------------------*/ | |
| 90 #if USE_PDFIO /* defined in environ.h */ | |
| 91 /* --------------------------------------------*/ | |
| 92 | |
| 93 /*-----------------------------------------------------------------* | |
| 94 * Rendering pdf files using an external library * | |
| 95 *-----------------------------------------------------------------*/ | |
| 96 /*! | |
| 97 * \brief l_pdfRenderFile() | |
| 98 * | |
| 99 * \param[in] filename input pdf file | |
| 100 * \param[in] res output resolution (0, [50 ... 300]) ppi | |
| 101 * \param[out] psaout sarray of filenames of rasterized images | |
| 102 * \return 0 if OK, 1 on error | |
| 103 * | |
| 104 * <pre> | |
| 105 * Notes: | |
| 106 * (1) Wrapper to l_padfRenderFiles() for a single input pdf file. | |
| 107 * </pre> | |
| 108 */ | |
| 109 l_ok | |
| 110 l_pdfRenderFile(const char *filename, | |
| 111 l_int32 res, | |
| 112 SARRAY **psaout) | |
| 113 { | |
| 114 l_int32 ret; | |
| 115 SARRAY *sain; | |
| 116 | |
| 117 if (!psaout) | |
| 118 return ERROR_INT("&saout not defined", __func__, 1); | |
| 119 *psaout = NULL; | |
| 120 if (!filename) | |
| 121 return ERROR_INT("filename not defined", __func__, 1); | |
| 122 | |
| 123 sain = sarrayCreate(1); | |
| 124 sarrayAddString(sain, filename, L_COPY); | |
| 125 ret = l_pdfRenderFiles(NULL, sain, res, psaout); | |
| 126 sarrayDestroy(&sain); | |
| 127 return ret; | |
| 128 } | |
| 129 | |
| 130 | |
| 131 /*! | |
| 132 * \brief l_pdfRenderFiles() | |
| 133 * | |
| 134 * \param[in] dir directory of input pdf files | |
| 135 * \param[in] sain sarray of input pdf filenames | |
| 136 * \param[in] res output resolution (0, [50 ... 300]) ppi | |
| 137 * \param[out] psaout sarray of output filenames of rendered images | |
| 138 * \return 0 if OK, 1 on error | |
| 139 * | |
| 140 * <pre> | |
| 141 * Notes: | |
| 142 * (1) Because this uses the "system" call, it is disabled by default | |
| 143 * on all platforms. It is not supported and therefor3 disabled | |
| 144 * on iOS 11. | |
| 145 * (2) Input pdf file(s) are specified either by an input directory | |
| 146 * or an sarray with the paths. Use the sarray if it is given; | |
| 147 * otherwise, use all files in the directory with extention "pdf", | |
| 148 * and name the rendered images in lexical order of the filenames. | |
| 149 * (3) The allowed output rendering resolutions are between 50 ppi | |
| 150 * and 300 ppi. Typical resolutions are 150 and 300 ppi. | |
| 151 * Default input value of 0 can be used for 300 ppi resolution. | |
| 152 * (4) Images are rendered in ppm format in directory /tmp/lept/renderpdf | |
| 153 * and named in lexical order of the input filenames. On invocation, | |
| 154 * any existing files in this directory are removed. | |
| 155 * (5) This requires pdftoppm from the Poppler package of pdf utilities. | |
| 156 * </pre> | |
| 157 */ | |
| 158 l_ok | |
| 159 l_pdfRenderFiles(const char *dir, | |
| 160 SARRAY *sain, | |
| 161 l_int32 res, | |
| 162 SARRAY **psaout) | |
| 163 { | |
| 164 char buf[256]; | |
| 165 char *imagedir, *firstfile, *fname, *basename, *tail; | |
| 166 l_int32 i, nfiles, render_res; | |
| 167 SARRAY *sa; | |
| 168 | |
| 169 if (!LeptDebugOK) { | |
| 170 L_INFO("running pdftoppm is disabled; " | |
| 171 "use setLeptDebugOK(1) to enable\n", __func__); | |
| 172 return 0; | |
| 173 } | |
| 174 | |
| 175 #ifdef OS_IOS /* iOS 11 does not support system() */ | |
| 176 return ERROR_INT("iOS 11 does not support system()", __func__, 0); | |
| 177 #endif /* OS_IOS */ | |
| 178 | |
| 179 if (!psaout) | |
| 180 return ERROR_INT("&saout not defined", __func__, 1); | |
| 181 *psaout = NULL; | |
| 182 if (res == 0) res = 300; | |
| 183 if (res < 50 || res > 300) | |
| 184 return ERROR_INT("res not in range [50 ... 300]", __func__, 1); | |
| 185 if (!dir && !sain) | |
| 186 return ERROR_INT("neither dir or sain are defined", __func__, 1); | |
| 187 if (sain) { | |
| 188 sa = sarrayCopy(sain); | |
| 189 } else { | |
| 190 sa = getSortedPathnamesInDirectory(dir, "pdf", 0, 0); | |
| 191 if (!sa) | |
| 192 return ERROR_INT("no files found in dir", __func__, 1); | |
| 193 } | |
| 194 nfiles = sarrayGetCount(sa); | |
| 195 | |
| 196 /* Set up directory for rendered page images. */ | |
| 197 lept_rmdir("lept/renderpdf"); | |
| 198 lept_mkdir("lept/renderpdf"); | |
| 199 imagedir = genPathname("/tmp/lept/renderpdf", NULL); | |
| 200 | |
| 201 /* Figure out the resolution to use with the image renderer. | |
| 202 This first checks the media box sizes, which give the output | |
| 203 image size in printer points (1/72 inch). The largest expected | |
| 204 output image has a max dimension of about 11 inches, corresponding | |
| 205 to 792 points. At a resolution of 300 ppi, the max image size | |
| 206 is then 3300. For robustness, use the median of media box sizes. | |
| 207 If the max dimension of this median is significantly larger than | |
| 208 792, reduce the input resolution to the renderer. Specifically: | |
| 209 * Calculate the median of the MediaBox widths and heights. | |
| 210 * If the max exceeds 850, reduce the resolution so that the max | |
| 211 dimension of the rendered image is 3300. The new resolution | |
| 212 input to the renderer is reduced from 300 by the factor: | |
| 213 (792 / medmax) | |
| 214 If the media boxes are not found, render a page using a small | |
| 215 given resolution (72) and use the max dimension to find the | |
| 216 resolution, render_res, that will produce an out with | |
| 217 3300 pixels in the largest dimension. */ | |
| 218 firstfile = sarrayGetString(sa, 0, L_NOCOPY); | |
| 219 getPdfRendererResolution(firstfile, imagedir, &render_res); | |
| 220 | |
| 221 /* The input %res gives the actual resolution at which the page is | |
| 222 to be rendered. If this is less than 300 ppi, reduce render_res, | |
| 223 the resolution input to pdftoppm, by the factor: | |
| 224 (res / 300) */ | |
| 225 render_res = (render_res * res) / 300; | |
| 226 | |
| 227 /* Rasterize: '-r res' renders output at res ppi | |
| 228 * pdftoppm -r res fname outroot */ | |
| 229 for (i = 0; i < nfiles; i++) { | |
| 230 fname = sarrayGetString(sa, i, L_NOCOPY); | |
| 231 splitPathAtDirectory(fname, NULL, &tail); | |
| 232 splitPathAtExtension(tail, &basename, NULL); | |
| 233 snprintf(buf, sizeof(buf), "pdftoppm -r %d %s %s/%s", | |
| 234 render_res, fname, imagedir, basename); | |
| 235 lept_free(tail); | |
| 236 lept_free(basename); | |
| 237 lept_stderr("%s\n", buf); | |
| 238 callSystemDebug(buf); /* pdftoppm */ | |
| 239 } | |
| 240 sarrayDestroy(&sa); | |
| 241 | |
| 242 /* Generate the output array of image file names */ | |
| 243 *psaout = getSortedPathnamesInDirectory(imagedir, NULL, 0, 0); | |
| 244 lept_free(imagedir); | |
| 245 return 0; | |
| 246 } | |
| 247 | |
| 248 | |
| 249 /* --------------------------------------------*/ | |
| 250 #endif /* USE_PDFIO */ | |
| 251 /* --------------------------------------------*/ | |
| 252 | |
| 253 | |
| 254 | |
| 255 /* ------------------------------------------------------------------------- * | |
| 256 * Stubs if pdf is not supported * | |
| 257 * ------------------------------------------------------------------------- */ | |
| 258 | |
| 259 /* -----------------------------------------------------------------*/ | |
| 260 #if !USE_PDFIO | |
| 261 /* -----------------------------------------------------------------*/ | |
| 262 | |
| 263 l_ok l_pdfRenderFile(const char *filename, l_int32 res, SARRAY **psaout) | |
| 264 { | |
| 265 return ERROR_INT("function not present", __func__, 1); | |
| 266 } | |
| 267 | |
| 268 /* -----------------------------------------------------------*/ | |
| 269 | |
| 270 l_ok l_pdfRenderFiles(const char *dir, SARRAY *sain, l_int32 res, | |
| 271 SARRAY **psaout) | |
| 272 { | |
| 273 return ERROR_INT("function not present", __func__, 1); | |
| 274 } | |
| 275 | |
| 276 /* -----------------------------------------------------------------*/ | |
| 277 #endif /* !USE_PDFIO */ | |
| 278 /* -----------------------------------------------------------------*/ | |
| 279 |
