Mercurial > hgrepos > Python2 > PyMuPDF
diff mupdf-source/thirdparty/leptonica/src/partify.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mupdf-source/thirdparty/leptonica/src/partify.c Mon Sep 15 11:43:07 2025 +0200 @@ -0,0 +1,317 @@ +/*====================================================================* + - Copyright (C) 2001 Leptonica. All rights reserved. + - + - Redistribution and use in source and binary forms, with or without + - modification, are permitted provided that the following conditions + - are met: + - 1. Redistributions of source code must retain the above copyright + - notice, this list of conditions and the following disclaimer. + - 2. Redistributions in binary form must reproduce the above + - copyright notice, this list of conditions and the following + - disclaimer in the documentation and/or other materials + - provided with the distribution. + - + - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY + - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + *====================================================================*/ + +/*! + * \file partify.c + * <pre> + * + * Top level + * l_int32 partifyFiles() + * l_int32 partifyPixac() + * + * Helpers + * static BOXA *pixLocateStaveSets() + * static l_int32 boxaRemoveVGaps() + * </pre> + */ + +#ifdef HAVE_CONFIG_H +#include <config_auto.h> +#endif /* HAVE_CONFIG_H */ + +#include "allheaders.h" + + /* Static helplers */ +static BOXA *pixLocateStaveSets(PIX *pixs, l_int32 pageno, PIXA *pixadb); +static l_ok boxaRemoveVGaps(BOXA *boxa); + +/*---------------------------------------------------------------------* + * Top level * + *---------------------------------------------------------------------*/ +/*! + * \brief partifyFiles() + * + * \param[in] dirname directory of files + * \param[in] substr required filename substring; use NULL for all files + * \param[in] nparts number of parts to generate (counting from top) + * \param[in] outroot root name of output pdf files + * \param[in] debugfile [optional] set to NULL for no debug output + * \return 0 if OK, 1 on error + * + * <pre> + * Notes: + * (1) All page images are compressed in png format into a pixacomp. + * (2) Each page image is deskewed, binarized at 300 ppi, + * partified into %nparts, and saved in a set of pixacomps + * in tiff-g4 format. + * (3) Each partified pixacomp is rendered into a set of page images, + * and output as a pdf. + * </pre> + */ +l_ok +partifyFiles(const char *dirname, + const char *substr, + l_int32 nparts, + const char *outroot, + const char *debugfile) +{ +PIXA *pixadb; +PIXAC *pixac; + + if (!dirname) + return ERROR_INT("dirname not defined", __func__, 1); + if (nparts < 0 || nparts > 10) + return ERROR_INT("nparts not in [1 ... 10]", __func__, 1); + if (!outroot || outroot[0] == '\n') + return ERROR_INT("outroot undefined or empty", __func__, 1); + + pixadb = (debugfile) ? pixaCreate(0) : NULL; + pixac = pixacompCreateFromFiles(dirname, substr, IFF_PNG); + partifyPixac(pixac, nparts, outroot, pixadb); + if (pixadb) { + L_INFO("writing debug output to %s\n", __func__, debugfile); + pixaConvertToPdf(pixadb, 300, 1.0, L_FLATE_ENCODE, 0, + "Partify Debug", debugfile); + } + pixacompDestroy(&pixac); + pixaDestroy(&pixadb); + return 0; +} + + +/*! + * \brief partifyPixac() + * + * \param[in] pixac with at least one image + * \param[in] nparts number of parts to generate (counting from top) + * \param[in] outroot root name of output pdf files + * \param[in] pixadb [optional] debug pixa; can be NULL + * \return 0 if OK, 1 on error + * + * <pre> + * Notes: + * (1) See partifyPixac(). + * (2) If the image files do not have a resolution, 300 ppi is assumed. + * </pre> + */ +l_ok +partifyPixac(PIXAC *pixac, + l_int32 nparts, + const char *outroot, + PIXA *pixadb) +{ +char buf[512]; +l_int32 i, j, pageno, res, npage, nbox, icount, line; +l_float32 factor; +L_BMF *bmf; +BOX *box1, *box2; +BOXA *boxa1, *boxa2, *boxa3; +PIX *pix1, *pix2, *pix3, *pix4, *pix5; +PIXAC **pixaca; + + if (!pixac) + return ERROR_INT("pixac not defined", __func__, 1); + if ((npage = pixacompGetCount(pixac)) == 0) + return ERROR_INT("pixac is empty", __func__, 1); + if (nparts < 1 || nparts > 10) + return ERROR_INT("nparts not in [1 ... 10]", __func__, 1); + if (!outroot || outroot[0] == '\n') + return ERROR_INT("outroot undefined or empty", __func__, 1); + + /* Initialize the output array for each of the nparts */ + pixaca = (PIXAC **)LEPT_CALLOC(nparts, sizeof(PIXAC *)); + for (i = 0; i < nparts; i++) + pixaca[i] = pixacompCreate(0); + + /* Process each page */ + line = 1; + bmf = bmfCreate(NULL, 10); + for (pageno = 0; pageno < npage; pageno++) { + if ((pix1 = pixacompGetPix(pixac, pageno)) == NULL) { + L_ERROR("pix for page %d not found\n", __func__, pageno); + continue; + } + + /* Scale, binarize and deskew */ + res = pixGetXRes(pix1); + if (res == 0 || res == 300 || res > 600) { + pix2 = pixClone(pix1); + } else { + factor = 300.0f / (l_float32)res; + if (factor > 3) + L_WARNING("resolution is very low\n", __func__); + pix2 = pixScale(pix1, factor, factor); + } + pix3 = pixConvertTo1Adaptive(pix2); + pix4 = pixDeskew(pix3, 0); + pixDestroy(&pix1); + pixDestroy(&pix2); + pixDestroy(&pix3); + if (!pix4) { + L_ERROR("pix for page %d not deskewed\n", __func__, pageno); + continue; + } + pix1 = pixClone(pix4); /* rename */ + pixDestroy(&pix4); + + /* Find the stave sets at 4x reduction */ + boxa1 = pixLocateStaveSets(pix1, pageno, pixadb); + + /* Break each stave set into the separate staves (parts). + * A typical set will have more than one part, but if one of + * the parts is a keyboard, it will usually have two staves + * (also called a Grand Staff), composed of treble and + * bass staves. For example, a classical violin sonata + * could have a staff for the violin and two staves for + * the piano. We would set nparts == 2, and extract both + * of the piano staves as the piano part. */ + nbox = boxaGetCount(boxa1); + lept_stderr("number of boxes in page %d: %d\n", pageno, nbox); + for (i = 0; i < nbox; i++, line++) { + snprintf(buf, sizeof(buf), "%d", line); + box1 = boxaGetBox(boxa1, i, L_COPY); + pix2 = pixClipRectangle(pix1, box1, NULL); + pix3 = pixMorphSequence(pix2, "d1.20 + o50.1 + o1.30", 0); + boxa2 = pixConnCompBB(pix3, 8); + boxa3 = boxaSort(boxa2, L_SORT_BY_Y, L_SORT_INCREASING, NULL); + boxaRemoveVGaps(boxa3); + icount = boxaGetCount(boxa3); + if (icount < nparts) + L_WARNING("nparts requested = %d, but only found %d\n", + __func__, nparts, icount); + for (j = 0; j < icount && j < nparts; j++) { + box2 = boxaGetBox(boxa3, j, L_COPY); + if (j == nparts - 1) /* extend the box to the bottom */ + boxSetSideLocations(box2, -1, -1, -1, + pixGetHeight(pix1) - 1); + pix4 = pixClipRectangle(pix2, box2, NULL); + pix5 = pixAddTextlines(pix4, bmf, buf, 1, L_ADD_LEFT); + pixacompAddPix(pixaca[j], pix5, IFF_TIFF_G4); + boxDestroy(&box2); + pixDestroy(&pix4); + pixDestroy(&pix5); + } + boxaDestroy(&boxa2); + boxaDestroy(&boxa3); + boxDestroy(&box1); + pixDestroy(&pix2); + pixDestroy(&pix3); + } + boxaDestroy(&boxa1); + pixDestroy(&pix1); + } + + /* Output separate pdfs for each part */ + for (i = 0; i < nparts; i++) { + snprintf(buf, sizeof(buf), "%s-%d.pdf", outroot, i); + L_INFO("writing part %d: %s\n", __func__, i, buf); + pixacompConvertToPdf(pixaca[i], 300, 1.0, L_G4_ENCODE, 0, NULL, buf); + pixacompDestroy(&pixaca[i]); + } + LEPT_FREE(pixaca); + bmfDestroy(&bmf); + return 0; +} + + +/* + * \brief pixLocateStaveSets() + * + * \param[in] pixs 1 bpp, 300 ppi, deskewed + * \param[in] pageno page number; used for debug output + * \param[in] pixadb [optional] debug pixa; can be NULL + * \return boxa containing the stave sets at full resolution + */ +static BOXA * +pixLocateStaveSets(PIX *pixs, + l_int32 pageno, + PIXA *pixadb) +{ +BOXA *boxa1, *boxa2, *boxa3, *boxa4; +PIX *pix1, *pix2; + + if (!pixs) + return (BOXA *)ERROR_PTR("pixs not defined", __func__, NULL); + + /* Find the stave sets at 4x reduction */ + pix1 = pixMorphSequence(pixs, "r11", 0); + boxa1 = pixConnCompBB(pix1, 8); + boxa2 = boxaSelectByArea(boxa1, 15000, L_SELECT_IF_GT, NULL); + boxa3 = boxaSort(boxa2, L_SORT_BY_Y, L_SORT_INCREASING, NULL); + if (pixadb) { + pix2 = pixConvertTo32(pix1); + pixRenderBoxaArb(pix2, boxa3, 2, 255, 0, 0); + pixaAddPix(pixadb, pix2, L_INSERT); + pixDisplay(pix2, 100 * pageno, 100); + } + boxaDestroy(&boxa1); + boxaDestroy(&boxa2); + + boxaRemoveVGaps(boxa3); + if (pixadb) { + pix2 = pixConvertTo32(pix1); + pixRenderBoxaArb(pix2, boxa3, 2, 0, 255, 0); + pixaAddPix(pixadb, pix2, L_INSERT); + pixDisplay(pix2, 100 * pageno, 600); + } + boxa4 = boxaTransform(boxa3, 0, 0, 4.0, 4.0); /* back to full res */ + boxaDestroy(&boxa3); + pixDestroy(&pix1); + return boxa4; +} + + +/* + * \brief boxaRemoveVGaps() + * + * \param[in] boxa + * \return 0 if OK, 1 on error + * + * <pre> + * Notes: + * (1) The boxes in %boxa are aligned vertically. Move the horizontal + * edges vertically to remove the gaps between boxes. + * </pre> + */ +static l_ok +boxaRemoveVGaps(BOXA *boxa) +{ +l_int32 nbox, i, y1, h1, y2, h2, delta; + + if (!boxa) + return ERROR_INT("boxa not defined", __func__, 1); + if ((nbox = boxaGetCount(boxa)) == 0) + return ERROR_INT("boxa is empty", __func__, 1); + for (i = 0; i < nbox - 1; i++) { + boxaGetBoxGeometry(boxa, i, NULL, &y1, NULL, &h1); + boxaGetBoxGeometry(boxa, i + 1, NULL, &y2, NULL, &h2); + delta = (y2 - y1 - h1) / 2; + boxaAdjustBoxSides(boxa, i, 0, 0, 0, delta); + boxaAdjustBoxSides(boxa, i + 1, 0, 0, -delta, 0); + } + boxaAdjustBoxSides(boxa, nbox - 1, 0, 0, 0, delta); /* bot of last */ + return 0; +}
