Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/leptonica/src/partify.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /*====================================================================* | |
| 2 - Copyright (C) 2001 Leptonica. All rights reserved. | |
| 3 - | |
| 4 - Redistribution and use in source and binary forms, with or without | |
| 5 - modification, are permitted provided that the following conditions | |
| 6 - are met: | |
| 7 - 1. Redistributions of source code must retain the above copyright | |
| 8 - notice, this list of conditions and the following disclaimer. | |
| 9 - 2. Redistributions in binary form must reproduce the above | |
| 10 - copyright notice, this list of conditions and the following | |
| 11 - disclaimer in the documentation and/or other materials | |
| 12 - provided with the distribution. | |
| 13 - | |
| 14 - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
| 15 - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
| 16 - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
| 17 - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY | |
| 18 - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | |
| 19 - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | |
| 20 - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | |
| 21 - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | |
| 22 - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | |
| 23 - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | |
| 24 - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 25 *====================================================================*/ | |
| 26 | |
| 27 /*! | |
| 28 * \file partify.c | |
| 29 * <pre> | |
| 30 * | |
| 31 * Top level | |
| 32 * l_int32 partifyFiles() | |
| 33 * l_int32 partifyPixac() | |
| 34 * | |
| 35 * Helpers | |
| 36 * static BOXA *pixLocateStaveSets() | |
| 37 * static l_int32 boxaRemoveVGaps() | |
| 38 * </pre> | |
| 39 */ | |
| 40 | |
| 41 #ifdef HAVE_CONFIG_H | |
| 42 #include <config_auto.h> | |
| 43 #endif /* HAVE_CONFIG_H */ | |
| 44 | |
| 45 #include "allheaders.h" | |
| 46 | |
| 47 /* Static helplers */ | |
| 48 static BOXA *pixLocateStaveSets(PIX *pixs, l_int32 pageno, PIXA *pixadb); | |
| 49 static l_ok boxaRemoveVGaps(BOXA *boxa); | |
| 50 | |
| 51 /*---------------------------------------------------------------------* | |
| 52 * Top level * | |
| 53 *---------------------------------------------------------------------*/ | |
| 54 /*! | |
| 55 * \brief partifyFiles() | |
| 56 * | |
| 57 * \param[in] dirname directory of files | |
| 58 * \param[in] substr required filename substring; use NULL for all files | |
| 59 * \param[in] nparts number of parts to generate (counting from top) | |
| 60 * \param[in] outroot root name of output pdf files | |
| 61 * \param[in] debugfile [optional] set to NULL for no debug output | |
| 62 * \return 0 if OK, 1 on error | |
| 63 * | |
| 64 * <pre> | |
| 65 * Notes: | |
| 66 * (1) All page images are compressed in png format into a pixacomp. | |
| 67 * (2) Each page image is deskewed, binarized at 300 ppi, | |
| 68 * partified into %nparts, and saved in a set of pixacomps | |
| 69 * in tiff-g4 format. | |
| 70 * (3) Each partified pixacomp is rendered into a set of page images, | |
| 71 * and output as a pdf. | |
| 72 * </pre> | |
| 73 */ | |
| 74 l_ok | |
| 75 partifyFiles(const char *dirname, | |
| 76 const char *substr, | |
| 77 l_int32 nparts, | |
| 78 const char *outroot, | |
| 79 const char *debugfile) | |
| 80 { | |
| 81 PIXA *pixadb; | |
| 82 PIXAC *pixac; | |
| 83 | |
| 84 if (!dirname) | |
| 85 return ERROR_INT("dirname not defined", __func__, 1); | |
| 86 if (nparts < 0 || nparts > 10) | |
| 87 return ERROR_INT("nparts not in [1 ... 10]", __func__, 1); | |
| 88 if (!outroot || outroot[0] == '\n') | |
| 89 return ERROR_INT("outroot undefined or empty", __func__, 1); | |
| 90 | |
| 91 pixadb = (debugfile) ? pixaCreate(0) : NULL; | |
| 92 pixac = pixacompCreateFromFiles(dirname, substr, IFF_PNG); | |
| 93 partifyPixac(pixac, nparts, outroot, pixadb); | |
| 94 if (pixadb) { | |
| 95 L_INFO("writing debug output to %s\n", __func__, debugfile); | |
| 96 pixaConvertToPdf(pixadb, 300, 1.0, L_FLATE_ENCODE, 0, | |
| 97 "Partify Debug", debugfile); | |
| 98 } | |
| 99 pixacompDestroy(&pixac); | |
| 100 pixaDestroy(&pixadb); | |
| 101 return 0; | |
| 102 } | |
| 103 | |
| 104 | |
| 105 /*! | |
| 106 * \brief partifyPixac() | |
| 107 * | |
| 108 * \param[in] pixac with at least one image | |
| 109 * \param[in] nparts number of parts to generate (counting from top) | |
| 110 * \param[in] outroot root name of output pdf files | |
| 111 * \param[in] pixadb [optional] debug pixa; can be NULL | |
| 112 * \return 0 if OK, 1 on error | |
| 113 * | |
| 114 * <pre> | |
| 115 * Notes: | |
| 116 * (1) See partifyPixac(). | |
| 117 * (2) If the image files do not have a resolution, 300 ppi is assumed. | |
| 118 * </pre> | |
| 119 */ | |
| 120 l_ok | |
| 121 partifyPixac(PIXAC *pixac, | |
| 122 l_int32 nparts, | |
| 123 const char *outroot, | |
| 124 PIXA *pixadb) | |
| 125 { | |
| 126 char buf[512]; | |
| 127 l_int32 i, j, pageno, res, npage, nbox, icount, line; | |
| 128 l_float32 factor; | |
| 129 L_BMF *bmf; | |
| 130 BOX *box1, *box2; | |
| 131 BOXA *boxa1, *boxa2, *boxa3; | |
| 132 PIX *pix1, *pix2, *pix3, *pix4, *pix5; | |
| 133 PIXAC **pixaca; | |
| 134 | |
| 135 if (!pixac) | |
| 136 return ERROR_INT("pixac not defined", __func__, 1); | |
| 137 if ((npage = pixacompGetCount(pixac)) == 0) | |
| 138 return ERROR_INT("pixac is empty", __func__, 1); | |
| 139 if (nparts < 1 || nparts > 10) | |
| 140 return ERROR_INT("nparts not in [1 ... 10]", __func__, 1); | |
| 141 if (!outroot || outroot[0] == '\n') | |
| 142 return ERROR_INT("outroot undefined or empty", __func__, 1); | |
| 143 | |
| 144 /* Initialize the output array for each of the nparts */ | |
| 145 pixaca = (PIXAC **)LEPT_CALLOC(nparts, sizeof(PIXAC *)); | |
| 146 for (i = 0; i < nparts; i++) | |
| 147 pixaca[i] = pixacompCreate(0); | |
| 148 | |
| 149 /* Process each page */ | |
| 150 line = 1; | |
| 151 bmf = bmfCreate(NULL, 10); | |
| 152 for (pageno = 0; pageno < npage; pageno++) { | |
| 153 if ((pix1 = pixacompGetPix(pixac, pageno)) == NULL) { | |
| 154 L_ERROR("pix for page %d not found\n", __func__, pageno); | |
| 155 continue; | |
| 156 } | |
| 157 | |
| 158 /* Scale, binarize and deskew */ | |
| 159 res = pixGetXRes(pix1); | |
| 160 if (res == 0 || res == 300 || res > 600) { | |
| 161 pix2 = pixClone(pix1); | |
| 162 } else { | |
| 163 factor = 300.0f / (l_float32)res; | |
| 164 if (factor > 3) | |
| 165 L_WARNING("resolution is very low\n", __func__); | |
| 166 pix2 = pixScale(pix1, factor, factor); | |
| 167 } | |
| 168 pix3 = pixConvertTo1Adaptive(pix2); | |
| 169 pix4 = pixDeskew(pix3, 0); | |
| 170 pixDestroy(&pix1); | |
| 171 pixDestroy(&pix2); | |
| 172 pixDestroy(&pix3); | |
| 173 if (!pix4) { | |
| 174 L_ERROR("pix for page %d not deskewed\n", __func__, pageno); | |
| 175 continue; | |
| 176 } | |
| 177 pix1 = pixClone(pix4); /* rename */ | |
| 178 pixDestroy(&pix4); | |
| 179 | |
| 180 /* Find the stave sets at 4x reduction */ | |
| 181 boxa1 = pixLocateStaveSets(pix1, pageno, pixadb); | |
| 182 | |
| 183 /* Break each stave set into the separate staves (parts). | |
| 184 * A typical set will have more than one part, but if one of | |
| 185 * the parts is a keyboard, it will usually have two staves | |
| 186 * (also called a Grand Staff), composed of treble and | |
| 187 * bass staves. For example, a classical violin sonata | |
| 188 * could have a staff for the violin and two staves for | |
| 189 * the piano. We would set nparts == 2, and extract both | |
| 190 * of the piano staves as the piano part. */ | |
| 191 nbox = boxaGetCount(boxa1); | |
| 192 lept_stderr("number of boxes in page %d: %d\n", pageno, nbox); | |
| 193 for (i = 0; i < nbox; i++, line++) { | |
| 194 snprintf(buf, sizeof(buf), "%d", line); | |
| 195 box1 = boxaGetBox(boxa1, i, L_COPY); | |
| 196 pix2 = pixClipRectangle(pix1, box1, NULL); | |
| 197 pix3 = pixMorphSequence(pix2, "d1.20 + o50.1 + o1.30", 0); | |
| 198 boxa2 = pixConnCompBB(pix3, 8); | |
| 199 boxa3 = boxaSort(boxa2, L_SORT_BY_Y, L_SORT_INCREASING, NULL); | |
| 200 boxaRemoveVGaps(boxa3); | |
| 201 icount = boxaGetCount(boxa3); | |
| 202 if (icount < nparts) | |
| 203 L_WARNING("nparts requested = %d, but only found %d\n", | |
| 204 __func__, nparts, icount); | |
| 205 for (j = 0; j < icount && j < nparts; j++) { | |
| 206 box2 = boxaGetBox(boxa3, j, L_COPY); | |
| 207 if (j == nparts - 1) /* extend the box to the bottom */ | |
| 208 boxSetSideLocations(box2, -1, -1, -1, | |
| 209 pixGetHeight(pix1) - 1); | |
| 210 pix4 = pixClipRectangle(pix2, box2, NULL); | |
| 211 pix5 = pixAddTextlines(pix4, bmf, buf, 1, L_ADD_LEFT); | |
| 212 pixacompAddPix(pixaca[j], pix5, IFF_TIFF_G4); | |
| 213 boxDestroy(&box2); | |
| 214 pixDestroy(&pix4); | |
| 215 pixDestroy(&pix5); | |
| 216 } | |
| 217 boxaDestroy(&boxa2); | |
| 218 boxaDestroy(&boxa3); | |
| 219 boxDestroy(&box1); | |
| 220 pixDestroy(&pix2); | |
| 221 pixDestroy(&pix3); | |
| 222 } | |
| 223 boxaDestroy(&boxa1); | |
| 224 pixDestroy(&pix1); | |
| 225 } | |
| 226 | |
| 227 /* Output separate pdfs for each part */ | |
| 228 for (i = 0; i < nparts; i++) { | |
| 229 snprintf(buf, sizeof(buf), "%s-%d.pdf", outroot, i); | |
| 230 L_INFO("writing part %d: %s\n", __func__, i, buf); | |
| 231 pixacompConvertToPdf(pixaca[i], 300, 1.0, L_G4_ENCODE, 0, NULL, buf); | |
| 232 pixacompDestroy(&pixaca[i]); | |
| 233 } | |
| 234 LEPT_FREE(pixaca); | |
| 235 bmfDestroy(&bmf); | |
| 236 return 0; | |
| 237 } | |
| 238 | |
| 239 | |
| 240 /* | |
| 241 * \brief pixLocateStaveSets() | |
| 242 * | |
| 243 * \param[in] pixs 1 bpp, 300 ppi, deskewed | |
| 244 * \param[in] pageno page number; used for debug output | |
| 245 * \param[in] pixadb [optional] debug pixa; can be NULL | |
| 246 * \return boxa containing the stave sets at full resolution | |
| 247 */ | |
| 248 static BOXA * | |
| 249 pixLocateStaveSets(PIX *pixs, | |
| 250 l_int32 pageno, | |
| 251 PIXA *pixadb) | |
| 252 { | |
| 253 BOXA *boxa1, *boxa2, *boxa3, *boxa4; | |
| 254 PIX *pix1, *pix2; | |
| 255 | |
| 256 if (!pixs) | |
| 257 return (BOXA *)ERROR_PTR("pixs not defined", __func__, NULL); | |
| 258 | |
| 259 /* Find the stave sets at 4x reduction */ | |
| 260 pix1 = pixMorphSequence(pixs, "r11", 0); | |
| 261 boxa1 = pixConnCompBB(pix1, 8); | |
| 262 boxa2 = boxaSelectByArea(boxa1, 15000, L_SELECT_IF_GT, NULL); | |
| 263 boxa3 = boxaSort(boxa2, L_SORT_BY_Y, L_SORT_INCREASING, NULL); | |
| 264 if (pixadb) { | |
| 265 pix2 = pixConvertTo32(pix1); | |
| 266 pixRenderBoxaArb(pix2, boxa3, 2, 255, 0, 0); | |
| 267 pixaAddPix(pixadb, pix2, L_INSERT); | |
| 268 pixDisplay(pix2, 100 * pageno, 100); | |
| 269 } | |
| 270 boxaDestroy(&boxa1); | |
| 271 boxaDestroy(&boxa2); | |
| 272 | |
| 273 boxaRemoveVGaps(boxa3); | |
| 274 if (pixadb) { | |
| 275 pix2 = pixConvertTo32(pix1); | |
| 276 pixRenderBoxaArb(pix2, boxa3, 2, 0, 255, 0); | |
| 277 pixaAddPix(pixadb, pix2, L_INSERT); | |
| 278 pixDisplay(pix2, 100 * pageno, 600); | |
| 279 } | |
| 280 boxa4 = boxaTransform(boxa3, 0, 0, 4.0, 4.0); /* back to full res */ | |
| 281 boxaDestroy(&boxa3); | |
| 282 pixDestroy(&pix1); | |
| 283 return boxa4; | |
| 284 } | |
| 285 | |
| 286 | |
| 287 /* | |
| 288 * \brief boxaRemoveVGaps() | |
| 289 * | |
| 290 * \param[in] boxa | |
| 291 * \return 0 if OK, 1 on error | |
| 292 * | |
| 293 * <pre> | |
| 294 * Notes: | |
| 295 * (1) The boxes in %boxa are aligned vertically. Move the horizontal | |
| 296 * edges vertically to remove the gaps between boxes. | |
| 297 * </pre> | |
| 298 */ | |
| 299 static l_ok | |
| 300 boxaRemoveVGaps(BOXA *boxa) | |
| 301 { | |
| 302 l_int32 nbox, i, y1, h1, y2, h2, delta; | |
| 303 | |
| 304 if (!boxa) | |
| 305 return ERROR_INT("boxa not defined", __func__, 1); | |
| 306 if ((nbox = boxaGetCount(boxa)) == 0) | |
| 307 return ERROR_INT("boxa is empty", __func__, 1); | |
| 308 for (i = 0; i < nbox - 1; i++) { | |
| 309 boxaGetBoxGeometry(boxa, i, NULL, &y1, NULL, &h1); | |
| 310 boxaGetBoxGeometry(boxa, i + 1, NULL, &y2, NULL, &h2); | |
| 311 delta = (y2 - y1 - h1) / 2; | |
| 312 boxaAdjustBoxSides(boxa, i, 0, 0, 0, delta); | |
| 313 boxaAdjustBoxSides(boxa, i + 1, 0, 0, -delta, 0); | |
| 314 } | |
| 315 boxaAdjustBoxSides(boxa, nbox - 1, 0, 0, 0, delta); /* bot of last */ | |
| 316 return 0; | |
| 317 } |
