comparison mupdf-source/thirdparty/leptonica/src/partify.c @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 /*====================================================================*
2 - Copyright (C) 2001 Leptonica. All rights reserved.
3 -
4 - Redistribution and use in source and binary forms, with or without
5 - modification, are permitted provided that the following conditions
6 - are met:
7 - 1. Redistributions of source code must retain the above copyright
8 - notice, this list of conditions and the following disclaimer.
9 - 2. Redistributions in binary form must reproduce the above
10 - copyright notice, this list of conditions and the following
11 - disclaimer in the documentation and/or other materials
12 - provided with the distribution.
13 -
14 - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15 - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16 - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17 - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY
18 - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *====================================================================*/
26
27 /*!
28 * \file partify.c
29 * <pre>
30 *
31 * Top level
32 * l_int32 partifyFiles()
33 * l_int32 partifyPixac()
34 *
35 * Helpers
36 * static BOXA *pixLocateStaveSets()
37 * static l_int32 boxaRemoveVGaps()
38 * </pre>
39 */
40
41 #ifdef HAVE_CONFIG_H
42 #include <config_auto.h>
43 #endif /* HAVE_CONFIG_H */
44
45 #include "allheaders.h"
46
47 /* Static helplers */
48 static BOXA *pixLocateStaveSets(PIX *pixs, l_int32 pageno, PIXA *pixadb);
49 static l_ok boxaRemoveVGaps(BOXA *boxa);
50
51 /*---------------------------------------------------------------------*
52 * Top level *
53 *---------------------------------------------------------------------*/
54 /*!
55 * \brief partifyFiles()
56 *
57 * \param[in] dirname directory of files
58 * \param[in] substr required filename substring; use NULL for all files
59 * \param[in] nparts number of parts to generate (counting from top)
60 * \param[in] outroot root name of output pdf files
61 * \param[in] debugfile [optional] set to NULL for no debug output
62 * \return 0 if OK, 1 on error
63 *
64 * <pre>
65 * Notes:
66 * (1) All page images are compressed in png format into a pixacomp.
67 * (2) Each page image is deskewed, binarized at 300 ppi,
68 * partified into %nparts, and saved in a set of pixacomps
69 * in tiff-g4 format.
70 * (3) Each partified pixacomp is rendered into a set of page images,
71 * and output as a pdf.
72 * </pre>
73 */
74 l_ok
75 partifyFiles(const char *dirname,
76 const char *substr,
77 l_int32 nparts,
78 const char *outroot,
79 const char *debugfile)
80 {
81 PIXA *pixadb;
82 PIXAC *pixac;
83
84 if (!dirname)
85 return ERROR_INT("dirname not defined", __func__, 1);
86 if (nparts < 0 || nparts > 10)
87 return ERROR_INT("nparts not in [1 ... 10]", __func__, 1);
88 if (!outroot || outroot[0] == '\n')
89 return ERROR_INT("outroot undefined or empty", __func__, 1);
90
91 pixadb = (debugfile) ? pixaCreate(0) : NULL;
92 pixac = pixacompCreateFromFiles(dirname, substr, IFF_PNG);
93 partifyPixac(pixac, nparts, outroot, pixadb);
94 if (pixadb) {
95 L_INFO("writing debug output to %s\n", __func__, debugfile);
96 pixaConvertToPdf(pixadb, 300, 1.0, L_FLATE_ENCODE, 0,
97 "Partify Debug", debugfile);
98 }
99 pixacompDestroy(&pixac);
100 pixaDestroy(&pixadb);
101 return 0;
102 }
103
104
105 /*!
106 * \brief partifyPixac()
107 *
108 * \param[in] pixac with at least one image
109 * \param[in] nparts number of parts to generate (counting from top)
110 * \param[in] outroot root name of output pdf files
111 * \param[in] pixadb [optional] debug pixa; can be NULL
112 * \return 0 if OK, 1 on error
113 *
114 * <pre>
115 * Notes:
116 * (1) See partifyPixac().
117 * (2) If the image files do not have a resolution, 300 ppi is assumed.
118 * </pre>
119 */
120 l_ok
121 partifyPixac(PIXAC *pixac,
122 l_int32 nparts,
123 const char *outroot,
124 PIXA *pixadb)
125 {
126 char buf[512];
127 l_int32 i, j, pageno, res, npage, nbox, icount, line;
128 l_float32 factor;
129 L_BMF *bmf;
130 BOX *box1, *box2;
131 BOXA *boxa1, *boxa2, *boxa3;
132 PIX *pix1, *pix2, *pix3, *pix4, *pix5;
133 PIXAC **pixaca;
134
135 if (!pixac)
136 return ERROR_INT("pixac not defined", __func__, 1);
137 if ((npage = pixacompGetCount(pixac)) == 0)
138 return ERROR_INT("pixac is empty", __func__, 1);
139 if (nparts < 1 || nparts > 10)
140 return ERROR_INT("nparts not in [1 ... 10]", __func__, 1);
141 if (!outroot || outroot[0] == '\n')
142 return ERROR_INT("outroot undefined or empty", __func__, 1);
143
144 /* Initialize the output array for each of the nparts */
145 pixaca = (PIXAC **)LEPT_CALLOC(nparts, sizeof(PIXAC *));
146 for (i = 0; i < nparts; i++)
147 pixaca[i] = pixacompCreate(0);
148
149 /* Process each page */
150 line = 1;
151 bmf = bmfCreate(NULL, 10);
152 for (pageno = 0; pageno < npage; pageno++) {
153 if ((pix1 = pixacompGetPix(pixac, pageno)) == NULL) {
154 L_ERROR("pix for page %d not found\n", __func__, pageno);
155 continue;
156 }
157
158 /* Scale, binarize and deskew */
159 res = pixGetXRes(pix1);
160 if (res == 0 || res == 300 || res > 600) {
161 pix2 = pixClone(pix1);
162 } else {
163 factor = 300.0f / (l_float32)res;
164 if (factor > 3)
165 L_WARNING("resolution is very low\n", __func__);
166 pix2 = pixScale(pix1, factor, factor);
167 }
168 pix3 = pixConvertTo1Adaptive(pix2);
169 pix4 = pixDeskew(pix3, 0);
170 pixDestroy(&pix1);
171 pixDestroy(&pix2);
172 pixDestroy(&pix3);
173 if (!pix4) {
174 L_ERROR("pix for page %d not deskewed\n", __func__, pageno);
175 continue;
176 }
177 pix1 = pixClone(pix4); /* rename */
178 pixDestroy(&pix4);
179
180 /* Find the stave sets at 4x reduction */
181 boxa1 = pixLocateStaveSets(pix1, pageno, pixadb);
182
183 /* Break each stave set into the separate staves (parts).
184 * A typical set will have more than one part, but if one of
185 * the parts is a keyboard, it will usually have two staves
186 * (also called a Grand Staff), composed of treble and
187 * bass staves. For example, a classical violin sonata
188 * could have a staff for the violin and two staves for
189 * the piano. We would set nparts == 2, and extract both
190 * of the piano staves as the piano part. */
191 nbox = boxaGetCount(boxa1);
192 lept_stderr("number of boxes in page %d: %d\n", pageno, nbox);
193 for (i = 0; i < nbox; i++, line++) {
194 snprintf(buf, sizeof(buf), "%d", line);
195 box1 = boxaGetBox(boxa1, i, L_COPY);
196 pix2 = pixClipRectangle(pix1, box1, NULL);
197 pix3 = pixMorphSequence(pix2, "d1.20 + o50.1 + o1.30", 0);
198 boxa2 = pixConnCompBB(pix3, 8);
199 boxa3 = boxaSort(boxa2, L_SORT_BY_Y, L_SORT_INCREASING, NULL);
200 boxaRemoveVGaps(boxa3);
201 icount = boxaGetCount(boxa3);
202 if (icount < nparts)
203 L_WARNING("nparts requested = %d, but only found %d\n",
204 __func__, nparts, icount);
205 for (j = 0; j < icount && j < nparts; j++) {
206 box2 = boxaGetBox(boxa3, j, L_COPY);
207 if (j == nparts - 1) /* extend the box to the bottom */
208 boxSetSideLocations(box2, -1, -1, -1,
209 pixGetHeight(pix1) - 1);
210 pix4 = pixClipRectangle(pix2, box2, NULL);
211 pix5 = pixAddTextlines(pix4, bmf, buf, 1, L_ADD_LEFT);
212 pixacompAddPix(pixaca[j], pix5, IFF_TIFF_G4);
213 boxDestroy(&box2);
214 pixDestroy(&pix4);
215 pixDestroy(&pix5);
216 }
217 boxaDestroy(&boxa2);
218 boxaDestroy(&boxa3);
219 boxDestroy(&box1);
220 pixDestroy(&pix2);
221 pixDestroy(&pix3);
222 }
223 boxaDestroy(&boxa1);
224 pixDestroy(&pix1);
225 }
226
227 /* Output separate pdfs for each part */
228 for (i = 0; i < nparts; i++) {
229 snprintf(buf, sizeof(buf), "%s-%d.pdf", outroot, i);
230 L_INFO("writing part %d: %s\n", __func__, i, buf);
231 pixacompConvertToPdf(pixaca[i], 300, 1.0, L_G4_ENCODE, 0, NULL, buf);
232 pixacompDestroy(&pixaca[i]);
233 }
234 LEPT_FREE(pixaca);
235 bmfDestroy(&bmf);
236 return 0;
237 }
238
239
240 /*
241 * \brief pixLocateStaveSets()
242 *
243 * \param[in] pixs 1 bpp, 300 ppi, deskewed
244 * \param[in] pageno page number; used for debug output
245 * \param[in] pixadb [optional] debug pixa; can be NULL
246 * \return boxa containing the stave sets at full resolution
247 */
248 static BOXA *
249 pixLocateStaveSets(PIX *pixs,
250 l_int32 pageno,
251 PIXA *pixadb)
252 {
253 BOXA *boxa1, *boxa2, *boxa3, *boxa4;
254 PIX *pix1, *pix2;
255
256 if (!pixs)
257 return (BOXA *)ERROR_PTR("pixs not defined", __func__, NULL);
258
259 /* Find the stave sets at 4x reduction */
260 pix1 = pixMorphSequence(pixs, "r11", 0);
261 boxa1 = pixConnCompBB(pix1, 8);
262 boxa2 = boxaSelectByArea(boxa1, 15000, L_SELECT_IF_GT, NULL);
263 boxa3 = boxaSort(boxa2, L_SORT_BY_Y, L_SORT_INCREASING, NULL);
264 if (pixadb) {
265 pix2 = pixConvertTo32(pix1);
266 pixRenderBoxaArb(pix2, boxa3, 2, 255, 0, 0);
267 pixaAddPix(pixadb, pix2, L_INSERT);
268 pixDisplay(pix2, 100 * pageno, 100);
269 }
270 boxaDestroy(&boxa1);
271 boxaDestroy(&boxa2);
272
273 boxaRemoveVGaps(boxa3);
274 if (pixadb) {
275 pix2 = pixConvertTo32(pix1);
276 pixRenderBoxaArb(pix2, boxa3, 2, 0, 255, 0);
277 pixaAddPix(pixadb, pix2, L_INSERT);
278 pixDisplay(pix2, 100 * pageno, 600);
279 }
280 boxa4 = boxaTransform(boxa3, 0, 0, 4.0, 4.0); /* back to full res */
281 boxaDestroy(&boxa3);
282 pixDestroy(&pix1);
283 return boxa4;
284 }
285
286
287 /*
288 * \brief boxaRemoveVGaps()
289 *
290 * \param[in] boxa
291 * \return 0 if OK, 1 on error
292 *
293 * <pre>
294 * Notes:
295 * (1) The boxes in %boxa are aligned vertically. Move the horizontal
296 * edges vertically to remove the gaps between boxes.
297 * </pre>
298 */
299 static l_ok
300 boxaRemoveVGaps(BOXA *boxa)
301 {
302 l_int32 nbox, i, y1, h1, y2, h2, delta;
303
304 if (!boxa)
305 return ERROR_INT("boxa not defined", __func__, 1);
306 if ((nbox = boxaGetCount(boxa)) == 0)
307 return ERROR_INT("boxa is empty", __func__, 1);
308 for (i = 0; i < nbox - 1; i++) {
309 boxaGetBoxGeometry(boxa, i, NULL, &y1, NULL, &h1);
310 boxaGetBoxGeometry(boxa, i + 1, NULL, &y2, NULL, &h2);
311 delta = (y2 - y1 - h1) / 2;
312 boxaAdjustBoxSides(boxa, i, 0, 0, 0, delta);
313 boxaAdjustBoxSides(boxa, i + 1, 0, 0, -delta, 0);
314 }
315 boxaAdjustBoxSides(boxa, nbox - 1, 0, 0, 0, delta); /* bot of last */
316 return 0;
317 }