Mercurial > hgrepos > Python2 > PyMuPDF
diff mupdf-source/thirdparty/leptonica/src/renderpdf.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mupdf-source/thirdparty/leptonica/src/renderpdf.c Mon Sep 15 11:43:07 2025 +0200 @@ -0,0 +1,279 @@ +/*====================================================================* + - Copyright (C) 2001 Leptonica. All rights reserved. + - + - Redistribution and use in source and binary forms, with or without + - modification, are permitted provided that the following conditions + - are met: + - 1. Redistributions of source code must retain the above copyright + - notice, this list of conditions and the following disclaimer. + - 2. Redistributions in binary form must reproduce the above + - copyright notice, this list of conditions and the following + - disclaimer in the documentation and/or other materials + - provided with the distribution. + - + - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY + - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + *====================================================================*/ + +/*! + * \file renderpdf.c + * <pre> + * + * Rendering pdf files using an external library + * l_int32 l_pdfRenderFile() + * l_int32 l_pdfRenderFiles() + * + * Utility for rendering a set of pdf files as page images. + * The images are rendered for full page images at a specified + * resolution between 50 and 300 ppi, in the directory + * /tmp/lept/renderpdf/ + * + * An application like cleanpdf performs a sequence of: + * (1) rendering the pdfs into a set of images, + * (2) doing image processing on each image to generate new images, and + * (3) wrapping the new images up in a single pdf file. + * Typically, the processed images made by step (2) are stored compressed + * in memory in a PixaComp, before wrapping them up in step (3). + * + * This requires the Poppler package of pdf utilities, in particular + * the program pdftoppm. For non-unix systems, this requires + * installation of the cygwin Poppler package: + * https://cygwin.com/cgi-bin2/package-cat.cgi?file=x86/poppler/ + * poppler-0.26.5-1 + * + * For the rasterizer, use pdftoppm: + * pdftoppm -r res fname outroot ('-r res' renders output at res ppi) + * This works on all pdf pages, both wrapped images and pages that + * were made orthographically. The default output resolution for + * pdftoppm is 150 ppi, but we typically use 300 ppi. This makes large + * uncompressed RGB image files (e.g., a standard size RGB page image + * at 300 ppi is 25 MB), but it is very fast. + * + * The size of the resulting images does not depend on the resolution + * of the images stored in the input pdf. We compute the value of the + * resolution parameter (render_res) that when input to pdftoppm + * will generate a page-size image (612 x 792 pts) at the requested + * output resolution. + * + * We do NOT use pdfimages: + * pdfimages -j fname outroot (-j outputs jpeg if input is dct) + * pdfimages only works when all pages are pdf wrappers around images. + * Further, in some cases, it scrambles the order of the output pages + * and inserts extra images. + + * By default, this function will not run, because it makes a call + * to system(1). To render pdfs as a set of images in a directory, + * three things are required: + * (1) To have poppler installed. + * (2) To enable debug operations using setLeptDebugOK(1). + * (3) To link the functions that generate pdf files in the library + * (in pdfio1.c, pdfio2.c). + * </pre> + */ + +#ifdef HAVE_CONFIG_H +#include <config_auto.h> +#endif /* HAVE_CONFIG_H */ + +#include "allheaders.h" + +/* --------------------------------------------*/ +#if USE_PDFIO /* defined in environ.h */ +/* --------------------------------------------*/ + +/*-----------------------------------------------------------------* + * Rendering pdf files using an external library * + *-----------------------------------------------------------------*/ +/*! + * \brief l_pdfRenderFile() + * + * \param[in] filename input pdf file + * \param[in] res output resolution (0, [50 ... 300]) ppi + * \param[out] psaout sarray of filenames of rasterized images + * \return 0 if OK, 1 on error + * + * <pre> + * Notes: + * (1) Wrapper to l_padfRenderFiles() for a single input pdf file. + * </pre> + */ +l_ok +l_pdfRenderFile(const char *filename, + l_int32 res, + SARRAY **psaout) +{ +l_int32 ret; +SARRAY *sain; + + if (!psaout) + return ERROR_INT("&saout not defined", __func__, 1); + *psaout = NULL; + if (!filename) + return ERROR_INT("filename not defined", __func__, 1); + + sain = sarrayCreate(1); + sarrayAddString(sain, filename, L_COPY); + ret = l_pdfRenderFiles(NULL, sain, res, psaout); + sarrayDestroy(&sain); + return ret; +} + + +/*! + * \brief l_pdfRenderFiles() + * + * \param[in] dir directory of input pdf files + * \param[in] sain sarray of input pdf filenames + * \param[in] res output resolution (0, [50 ... 300]) ppi + * \param[out] psaout sarray of output filenames of rendered images + * \return 0 if OK, 1 on error + * + * <pre> + * Notes: + * (1) Because this uses the "system" call, it is disabled by default + * on all platforms. It is not supported and therefor3 disabled + * on iOS 11. + * (2) Input pdf file(s) are specified either by an input directory + * or an sarray with the paths. Use the sarray if it is given; + * otherwise, use all files in the directory with extention "pdf", + * and name the rendered images in lexical order of the filenames. + * (3) The allowed output rendering resolutions are between 50 ppi + * and 300 ppi. Typical resolutions are 150 and 300 ppi. + * Default input value of 0 can be used for 300 ppi resolution. + * (4) Images are rendered in ppm format in directory /tmp/lept/renderpdf + * and named in lexical order of the input filenames. On invocation, + * any existing files in this directory are removed. + * (5) This requires pdftoppm from the Poppler package of pdf utilities. + * </pre> + */ +l_ok +l_pdfRenderFiles(const char *dir, + SARRAY *sain, + l_int32 res, + SARRAY **psaout) +{ +char buf[256]; +char *imagedir, *firstfile, *fname, *basename, *tail; +l_int32 i, nfiles, render_res; +SARRAY *sa; + + if (!LeptDebugOK) { + L_INFO("running pdftoppm is disabled; " + "use setLeptDebugOK(1) to enable\n", __func__); + return 0; + } + + #ifdef OS_IOS /* iOS 11 does not support system() */ + return ERROR_INT("iOS 11 does not support system()", __func__, 0); + #endif /* OS_IOS */ + + if (!psaout) + return ERROR_INT("&saout not defined", __func__, 1); + *psaout = NULL; + if (res == 0) res = 300; + if (res < 50 || res > 300) + return ERROR_INT("res not in range [50 ... 300]", __func__, 1); + if (!dir && !sain) + return ERROR_INT("neither dir or sain are defined", __func__, 1); + if (sain) { + sa = sarrayCopy(sain); + } else { + sa = getSortedPathnamesInDirectory(dir, "pdf", 0, 0); + if (!sa) + return ERROR_INT("no files found in dir", __func__, 1); + } + nfiles = sarrayGetCount(sa); + + /* Set up directory for rendered page images. */ + lept_rmdir("lept/renderpdf"); + lept_mkdir("lept/renderpdf"); + imagedir = genPathname("/tmp/lept/renderpdf", NULL); + + /* Figure out the resolution to use with the image renderer. + This first checks the media box sizes, which give the output + image size in printer points (1/72 inch). The largest expected + output image has a max dimension of about 11 inches, corresponding + to 792 points. At a resolution of 300 ppi, the max image size + is then 3300. For robustness, use the median of media box sizes. + If the max dimension of this median is significantly larger than + 792, reduce the input resolution to the renderer. Specifically: + * Calculate the median of the MediaBox widths and heights. + * If the max exceeds 850, reduce the resolution so that the max + dimension of the rendered image is 3300. The new resolution + input to the renderer is reduced from 300 by the factor: + (792 / medmax) + If the media boxes are not found, render a page using a small + given resolution (72) and use the max dimension to find the + resolution, render_res, that will produce an out with + 3300 pixels in the largest dimension. */ + firstfile = sarrayGetString(sa, 0, L_NOCOPY); + getPdfRendererResolution(firstfile, imagedir, &render_res); + + /* The input %res gives the actual resolution at which the page is + to be rendered. If this is less than 300 ppi, reduce render_res, + the resolution input to pdftoppm, by the factor: + (res / 300) */ + render_res = (render_res * res) / 300; + + /* Rasterize: '-r res' renders output at res ppi + * pdftoppm -r res fname outroot */ + for (i = 0; i < nfiles; i++) { + fname = sarrayGetString(sa, i, L_NOCOPY); + splitPathAtDirectory(fname, NULL, &tail); + splitPathAtExtension(tail, &basename, NULL); + snprintf(buf, sizeof(buf), "pdftoppm -r %d %s %s/%s", + render_res, fname, imagedir, basename); + lept_free(tail); + lept_free(basename); + lept_stderr("%s\n", buf); + callSystemDebug(buf); /* pdftoppm */ + } + sarrayDestroy(&sa); + + /* Generate the output array of image file names */ + *psaout = getSortedPathnamesInDirectory(imagedir, NULL, 0, 0); + lept_free(imagedir); + return 0; +} + + +/* --------------------------------------------*/ +#endif /* USE_PDFIO */ +/* --------------------------------------------*/ + + + +/* ------------------------------------------------------------------------- * + * Stubs if pdf is not supported * + * ------------------------------------------------------------------------- */ + +/* -----------------------------------------------------------------*/ +#if !USE_PDFIO +/* -----------------------------------------------------------------*/ + +l_ok l_pdfRenderFile(const char *filename, l_int32 res, SARRAY **psaout) +{ + return ERROR_INT("function not present", __func__, 1); +} + +/* -----------------------------------------------------------*/ + +l_ok l_pdfRenderFiles(const char *dir, SARRAY *sain, l_int32 res, + SARRAY **psaout) +{ + return ERROR_INT("function not present", __func__, 1); +} + +/* -----------------------------------------------------------------*/ +#endif /* !USE_PDFIO */ +/* -----------------------------------------------------------------*/ +
