comparison mupdf-source/thirdparty/leptonica/src/jbclass.c @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 /*====================================================================*
2 - Copyright (C) 2001 Leptonica. All rights reserved.
3 -
4 - Redistribution and use in source and binary forms, with or without
5 - modification, are permitted provided that the following conditions
6 - are met:
7 - 1. Redistributions of source code must retain the above copyright
8 - notice, this list of conditions and the following disclaimer.
9 - 2. Redistributions in binary form must reproduce the above
10 - copyright notice, this list of conditions and the following
11 - disclaimer in the documentation and/or other materials
12 - provided with the distribution.
13 -
14 - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15 - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16 - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17 - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY
18 - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *====================================================================*/
26
27 /*
28 * jbclass.c
29 *
30 * These are functions for unsupervised classification of
31 * collections of connected components -- either characters or
32 * words -- in binary images. They can be used as image
33 * processing steps in jbig2 compression.
34 *
35 * Initialization
36 *
37 * JBCLASSER *jbRankHausInit() [rank hausdorff encoder]
38 * JBCLASSER *jbCorrelationInit() [correlation encoder]
39 * JBCLASSER *jbCorrelationInitWithoutComponents() [ditto]
40 * static JBCLASSER *jbCorrelationInitInternal()
41 *
42 * Classify the pages
43 *
44 * l_int32 jbAddPages()
45 * l_int32 jbAddPage()
46 * l_int32 jbAddPageComponents()
47 *
48 * Rank hausdorff classifier
49 *
50 * l_int32 jbClassifyRankHaus()
51 * l_int32 pixHaustest()
52 * l_int32 pixRankHaustest()
53 *
54 * Binary correlation classifier
55 *
56 * l_int32 jbClassifyCorrelation()
57 *
58 * Determine the image components we start with
59 *
60 * l_int32 jbGetComponents()
61 * l_int32 pixWordMaskByDilation()
62 * l_int32 pixWordBoxesByDilation()
63 *
64 * Build grayscale composites (templates)
65 *
66 * PIXA *jbAccumulateComposites
67 * PIXA *jbTemplatesFromComposites
68 *
69 * Utility functions for Classer
70 *
71 * JBCLASSER *jbClasserCreate()
72 * void jbClasserDestroy()
73 *
74 * Utility functions for Data
75 *
76 * JBDATA *jbDataSave()
77 * void jbDataDestroy()
78 * l_int32 jbDataWrite()
79 * JBDATA *jbDataRead()
80 * PIXA *jbDataRender()
81 * l_int32 jbGetULCorners()
82 * l_int32 jbGetLLCorners()
83 *
84 * Static helpers
85 *
86 * static JBFINDCTX *findSimilarSizedTemplatesInit()
87 * static l_int32 findSimilarSizedTemplatesNext()
88 * static void findSimilarSizedTemplatesDestroy()
89 * static l_int32 finalPositioningForAlignment()
90 *
91 * Note: this is NOT an implementation of the JPEG jbig2
92 * proposed standard encoder, the specifications for which
93 * can be found at http://www.jpeg.org/jbigpt2.html.
94 * (See below for a full implementation.)
95 * It is an implementation of the lower-level part of an encoder that:
96 *
97 * (1) identifies connected components that are going to be used
98 * (2) puts them in similarity classes (this is an unsupervised
99 * classifier), and
100 * (3) stores the result in a simple file format (2 files,
101 * one for templates and one for page/coordinate/template-index
102 * quartets).
103 *
104 * An actual implementation of the official jbig2 encoder could
105 * start with parts (1) and (2), and would then compress the quartets
106 * according to the standards requirements (e.g., Huffman or
107 * arithmetic coding of coordinate differences and image templates).
108 *
109 * The low-level part of the encoder provided here has the
110 * following useful features:
111 *
112 * ~ It is accurate in the identification of templates
113 * and classes because it uses a windowed hausdorff
114 * distance metric.
115 * ~ It is accurate in the placement of the connected
116 * components, doing a two step process of first aligning
117 * the the centroids of the template with those of each instance,
118 * and then making a further correction of up to +- 1 pixel
119 * in each direction to best align the templates.
120 * ~ It is fast because it uses a morphologically based
121 * matching algorithm to implement the hausdorff criterion,
122 * and it selects the patterns that are possible matches
123 * based on their size.
124 *
125 * We provide two different matching functions, one using Hausdorff
126 * distance and one using a simple image correlation.
127 * The Hausdorff method sometimes produces better results for the
128 * same number of classes, because it gives a relatively small
129 * effective weight to foreground pixels near the boundary,
130 * and a relatively large weight to foreground pixels that are
131 * not near the boundary. By effectively ignoring these boundary
132 * pixels, Hausdorff weighting corresponds better to the expected
133 * probabilities of the pixel values in a scanned image, where the
134 * variations in instances of the same printed character are much
135 * more likely to be in pixels near the boundary. By contrast,
136 * the correlation method gives equal weight to all foreground pixels.
137 *
138 * For best results, use the correlation method. Correlation takes
139 * the number of fg pixels in the AND of instance and template,
140 * divided by the product of the number of fg pixels in instance
141 * and template. It compares this with a threshold that, in
142 * general, depends on the fractional coverage of the template.
143 * For heavy text, the threshold is raised above that for light
144 * text, By using both these parameters (basic threshold and
145 * adjustment factor for text weight), one has more flexibility
146 * and can arrive at the fewest substitution errors, although
147 * this comes at the price of more templates.
148 *
149 * The strict Hausdorff scoring is not a rank weighting, because a
150 * single pixel beyond the given distance will cause a match
151 * failure. A rank Hausdorff is more robust to non-boundary noise,
152 * but it is also more susceptible to confusing components that
153 * should be in different classes. For implementing a jbig2
154 * application for visually lossless binary image compression,
155 * you have two choices:
156 *
157 * (1) use a 3x3 structuring element (size = 3) and a strict
158 * Hausdorff comparison (rank = 1.0 in the rank Hausdorff
159 * function). This will result in a minimal number of classes,
160 * but confusion of small characters, such as italic and
161 * non-italic lower-case 'o', can still occur.
162 * (2) use the correlation method with a threshold of 0.85
163 * and a weighting factor of about 0.7. This will result in
164 * a larger number of classes, but should not be confused
165 * either by similar small characters or by extremely
166 * thick sans serif characters, such as in prog/cootoots.png.
167 *
168 * As mentioned above, if visual substitution errors must be
169 * avoided, you should use the correlation method.
170 *
171 * We provide executables that show how to do the encoding:
172 * prog/jbrankhaus.c
173 * prog/jbcorrelation.c
174 *
175 * The basic flow for correlation classification goes as follows,
176 * where specific choices have been made for parameters (Hausdorff
177 * is the same except for initialization):
178 *
179 * // Initialize and save data in the classer
180 * JBCLASSER *classer =
181 * jbCorrelationInit(JB_CONN_COMPS, 0, 0, 0.8, 0.7);
182 * SARRAY *safiles = getSortedPathnamesInDirectory(directory,
183 * NULL, 0, 0);
184 * jbAddPages(classer, safiles);
185 *
186 * // Save the data in a data structure for serialization,
187 * // and write it into two files.
188 * JBDATA *data = jbDataSave(classer);
189 * jbDataWrite(rootname, data);
190 *
191 * // Reconstruct (render) the pages from the encoded data.
192 * PIXA *pixa = jbDataRender(data, FALSE);
193 *
194 * Adam Langley has built a jbig2 standards-compliant encoder, the
195 * first one to appear in open source. You can get this encoder at:
196 * http://www.imperialviolet.org/jbig2.html
197 *
198 * It uses arithmetic encoding throughout. It encodes binary images
199 * losslessly with a single arithmetic coding over the full image.
200 * It also does both lossy and lossless encoding from connected
201 * components, using leptonica to generate the templates representing
202 * each cluster.
203 */
204
205 #ifdef HAVE_CONFIG_H
206 #include <config_auto.h>
207 #endif /* HAVE_CONFIG_H */
208
209 #include <string.h>
210 #include <math.h>
211 #include "allheaders.h"
212 #include "array_internal.h"
213
214 #define L_BUF_SIZE 512
215
216 /* For jbClassifyRankHaus(): size of border added around
217 * pix of each c.c., to allow further processing. This
218 * should be at least the sum of the MAX_DIFF_HEIGHT
219 * (or MAX_DIFF_WIDTH) and one-half the size of the Sel */
220 static const l_int32 JB_ADDED_PIXELS = 6;
221
222 /* For pixHaustest(), pixRankHaustest() and pixCorrelationScore():
223 * choose these to be 2 or greater */
224 static const l_int32 MAX_DIFF_WIDTH = 2; /* use at least 2 */
225 static const l_int32 MAX_DIFF_HEIGHT = 2; /* use at least 2 */
226
227 /* In initialization, you have the option to discard components
228 * (cc, characters or words) that have either width or height larger
229 * than a given size. This is convenient for jbDataSave(), because
230 * the components are placed onto a regular lattice with cell
231 * dimension equal to the maximum component size. The default
232 * values are given here. If you want to save all components,
233 * use a sufficiently large set of dimensions. */
234 static const l_int32 MAX_CONN_COMP_WIDTH = 350; /* default max cc width */
235 static const l_int32 MAX_CHAR_COMP_WIDTH = 350; /* default max char width */
236 static const l_int32 MAX_WORD_COMP_WIDTH = 1000; /* default max word width */
237 static const l_int32 MAX_COMP_HEIGHT = 120; /* default max component height */
238
239 /* This stores the state of a state machine which fetches
240 * similar sized templates */
241 struct JbFindTemplatesState
242 {
243 JBCLASSER *classer; /* classer */
244 l_int32 w; /* desired width */
245 l_int32 h; /* desired height */
246 l_int32 i; /* index into two_by_two step array */
247 L_DNA *dna; /* current number array */
248 l_int32 n; /* current element of dna */
249 };
250 typedef struct JbFindTemplatesState JBFINDCTX;
251
252 /* Static initialization function */
253 static JBCLASSER * jbCorrelationInitInternal(l_int32 components,
254 l_int32 maxwidth, l_int32 maxheight, l_float32 thresh,
255 l_float32 weightfactor, l_int32 keep_components);
256
257 /* Static helper functions */
258 static JBFINDCTX * findSimilarSizedTemplatesInit(JBCLASSER *classer, PIX *pixs);
259 static l_int32 findSimilarSizedTemplatesNext(JBFINDCTX *context);
260 static void findSimilarSizedTemplatesDestroy(JBFINDCTX **pcontext);
261 static l_int32 finalPositioningForAlignment(PIX *pixs, l_int32 x, l_int32 y,
262 l_int32 idelx, l_int32 idely, PIX *pixt,
263 l_int32 *sumtab, l_int32 *pdx, l_int32 *pdy);
264
265 #ifndef NO_CONSOLE_IO
266 #define DEBUG_CORRELATION_SCORE 0
267 #endif /* ~NO_CONSOLE_IO */
268
269 /*----------------------------------------------------------------------*
270 * Initialization *
271 *----------------------------------------------------------------------*/
272 /*!
273 * \brief jbRankHausInit()
274 *
275 * \param[in] components JB_CONN_COMPS, JB_CHARACTERS, JB_WORDS
276 * \param[in] maxwidth of component; use 0 for default
277 * \param[in] maxheight of component; use 0 for default
278 * \param[in] size of square structuring element; 2, representing
279 * 2x2 sel, is necessary for reasonable accuracy of
280 * small components; combine this with rank ~ 0.97
281 * to avoid undue class expansion
282 * \param[in] rank rank val of match, each way; in [0.5 - 1.0];
283 * when using size = 2, 0.97 is a reasonable value
284 * \return jbclasser if OK; NULL on error
285 */
286 JBCLASSER *
287 jbRankHausInit(l_int32 components,
288 l_int32 maxwidth,
289 l_int32 maxheight,
290 l_int32 size,
291 l_float32 rank)
292 {
293 JBCLASSER *classer;
294
295 if (components != JB_CONN_COMPS && components != JB_CHARACTERS &&
296 components != JB_WORDS)
297 return (JBCLASSER *)ERROR_PTR("invalid components", __func__, NULL);
298 if (size < 1 || size > 10)
299 return (JBCLASSER *)ERROR_PTR("size not reasonable", __func__, NULL);
300 if (rank < 0.5 || rank > 1.0)
301 return (JBCLASSER *)ERROR_PTR("rank not in [0.5-1.0]", __func__, NULL);
302 if (maxwidth == 0) {
303 if (components == JB_CONN_COMPS)
304 maxwidth = MAX_CONN_COMP_WIDTH;
305 else if (components == JB_CHARACTERS)
306 maxwidth = MAX_CHAR_COMP_WIDTH;
307 else /* JB_WORDS */
308 maxwidth = MAX_WORD_COMP_WIDTH;
309 }
310 if (maxheight == 0)
311 maxheight = MAX_COMP_HEIGHT;
312
313 if ((classer = jbClasserCreate(JB_RANKHAUS, components)) == NULL)
314 return (JBCLASSER *)ERROR_PTR("classer not made", __func__, NULL);
315 classer->maxwidth = maxwidth;
316 classer->maxheight = maxheight;
317 classer->sizehaus = size;
318 classer->rankhaus = rank;
319 classer->dahash = l_dnaHashCreate(5507, 4); /* 5507 is prime */
320 classer->keep_pixaa = 1; /* keep all components in pixaa */
321 return classer;
322 }
323
324
325 /*!
326 * \brief jbCorrelationInit()
327 *
328 * \param[in] components JB_CONN_COMPS, JB_CHARACTERS, JB_WORDS
329 * \param[in] maxwidth of component; use 0 for default
330 * \param[in] maxheight of component; use 0 for default
331 * \param[in] thresh value for correlation score: in [0.4 - 0.98]
332 * \param[in] weightfactor corrects thresh for thick characters [0.0 - 1.0]
333 * \return jbclasser if OK; NULL on error
334 *
335 * <pre>
336 * Notes:
337 * (1) For scanned text, suggested input values are:
338 * thresh ~ [0.8 - 0.85]
339 * weightfactor ~ [0.5 - 0.6]
340 * (2) For electronically generated fonts (e.g., rasterized pdf),
341 * a very high thresh (e.g., 0.95) will not cause a significant
342 * increase in the number of classes.
343 * </pre>
344 */
345 JBCLASSER *
346 jbCorrelationInit(l_int32 components,
347 l_int32 maxwidth,
348 l_int32 maxheight,
349 l_float32 thresh,
350 l_float32 weightfactor)
351 {
352 return jbCorrelationInitInternal(components, maxwidth, maxheight, thresh,
353 weightfactor, 1);
354 }
355
356 /*!
357 * \brief jbCorrelationInitWithoutComponents()
358 *
359 * \param[in] components JB_CONN_COMPS, JB_CHARACTERS, JB_WORDS
360 * \param[in] maxwidth of component; use 0 for default
361 * \param[in] maxheight of component; use 0 for default
362 * \param[in] thresh value for correlation score: in [0.4 - 0.98]
363 * \param[in] weightfactor corrects thresh for thick characters [0.0 - 1.0]
364 * \return jbclasser if OK; NULL on error
365 *
366 * <pre>
367 * Notes:
368 * Acts the same as jbCorrelationInit(), but the resulting
369 * object doesn't keep a list of all the components.
370 * </pre>
371 */
372 JBCLASSER *
373 jbCorrelationInitWithoutComponents(l_int32 components,
374 l_int32 maxwidth,
375 l_int32 maxheight,
376 l_float32 thresh,
377 l_float32 weightfactor)
378 {
379 return jbCorrelationInitInternal(components, maxwidth, maxheight, thresh,
380 weightfactor, 0);
381 }
382
383
384 static JBCLASSER *
385 jbCorrelationInitInternal(l_int32 components,
386 l_int32 maxwidth,
387 l_int32 maxheight,
388 l_float32 thresh,
389 l_float32 weightfactor,
390 l_int32 keep_components)
391 {
392 JBCLASSER *classer;
393
394 if (components != JB_CONN_COMPS && components != JB_CHARACTERS &&
395 components != JB_WORDS)
396 return (JBCLASSER *)ERROR_PTR("invalid components", __func__, NULL);
397 if (thresh < 0.4 || thresh > 0.98)
398 return (JBCLASSER *)ERROR_PTR("thresh not in range [0.4 - 0.98]",
399 __func__, NULL);
400 if (weightfactor < 0.0 || weightfactor > 1.0)
401 return (JBCLASSER *)ERROR_PTR("weightfactor not in range [0.0 - 1.0]",
402 __func__, NULL);
403 if (maxwidth == 0) {
404 if (components == JB_CONN_COMPS)
405 maxwidth = MAX_CONN_COMP_WIDTH;
406 else if (components == JB_CHARACTERS)
407 maxwidth = MAX_CHAR_COMP_WIDTH;
408 else /* JB_WORDS */
409 maxwidth = MAX_WORD_COMP_WIDTH;
410 }
411 if (maxheight == 0)
412 maxheight = MAX_COMP_HEIGHT;
413
414
415 if ((classer = jbClasserCreate(JB_CORRELATION, components)) == NULL)
416 return (JBCLASSER *)ERROR_PTR("classer not made", __func__, NULL);
417 classer->maxwidth = maxwidth;
418 classer->maxheight = maxheight;
419 classer->thresh = thresh;
420 classer->weightfactor = weightfactor;
421 classer->dahash = l_dnaHashCreate(5507, 4); /* 5507 is prime */
422 classer->keep_pixaa = keep_components;
423 return classer;
424 }
425
426
427 /*----------------------------------------------------------------------*
428 * Classify the pages *
429 *----------------------------------------------------------------------*/
430 /*!
431 * \brief jbAddPages()
432 *
433 * \param[in] jbclasser
434 * \param[in] safiles of page image file names
435 * \return 0 if OK; 1 on error
436 *
437 * <pre>
438 * Notes:
439 * (1) jbclasser makes a copy of the array of file names.
440 * (2) The caller is still responsible for destroying the input array.
441 * </pre>
442 */
443 l_ok
444 jbAddPages(JBCLASSER *classer,
445 SARRAY *safiles)
446 {
447 l_int32 i, nfiles;
448 char *fname;
449 PIX *pix;
450
451 if (!classer)
452 return ERROR_INT("classer not defined", __func__, 1);
453 if (!safiles)
454 return ERROR_INT("safiles not defined", __func__, 1);
455
456 classer->safiles = sarrayCopy(safiles);
457 nfiles = sarrayGetCount(safiles);
458 for (i = 0; i < nfiles; i++) {
459 fname = sarrayGetString(safiles, i, L_NOCOPY);
460 if ((pix = pixRead(fname)) == NULL) {
461 L_WARNING("image file %d not read\n", __func__, i);
462 continue;
463 }
464 if (pixGetDepth(pix) != 1) {
465 L_WARNING("image file %d not 1 bpp\n", __func__, i);
466 continue;
467 }
468 jbAddPage(classer, pix);
469 pixDestroy(&pix);
470 }
471
472 return 0;
473 }
474
475
476 /*!
477 * \brief jbAddPage()
478 *
479 * \param[in] jbclasser
480 * \param[in] pixs input page
481 * \return 0 if OK; 1 on error
482 */
483 l_ok
484 jbAddPage(JBCLASSER *classer,
485 PIX *pixs)
486 {
487 BOXA *boxas;
488 PIXA *pixas;
489
490 if (!classer)
491 return ERROR_INT("classer not defined", __func__, 1);
492 if (!pixs || pixGetDepth(pixs) != 1)
493 return ERROR_INT("pixs not defined or not 1 bpp", __func__, 1);
494
495 classer->w = pixGetWidth(pixs);
496 classer->h = pixGetHeight(pixs);
497
498 /* Get the appropriate components and their bounding boxes */
499 if (jbGetComponents(pixs, classer->components, classer->maxwidth,
500 classer->maxheight, &boxas, &pixas)) {
501 return ERROR_INT("components not made", __func__, 1);
502 }
503
504 jbAddPageComponents(classer, pixs, boxas, pixas);
505 boxaDestroy(&boxas);
506 pixaDestroy(&pixas);
507 return 0;
508 }
509
510
511 /*!
512 * \brief jbAddPageComponents()
513 *
514 * \param[in] jbclasser
515 * \param[in] pixs input page
516 * \param[in] boxas b.b. of components for this page
517 * \param[in] pixas components for this page
518 * \return 0 if OK; 1 on error
519 *
520 * <pre>
521 * Notes:
522 * (1) If there are no components on the page, we don't require input
523 * of empty boxas or pixas, although that's the typical situation.
524 * </pre>
525 */
526 l_ok
527 jbAddPageComponents(JBCLASSER *classer,
528 PIX *pixs,
529 BOXA *boxas,
530 PIXA *pixas)
531 {
532 l_int32 n;
533
534 if (!classer)
535 return ERROR_INT("classer not defined", __func__, 1);
536 if (!pixs)
537 return ERROR_INT("pix not defined", __func__, 1);
538
539 /* Test for no components on the current page. Always update the
540 * number of pages processed, even if nothing is on it. */
541 if (!boxas || !pixas || (boxaGetCount(boxas) == 0)) {
542 classer->npages++;
543 return 0;
544 }
545
546 /* Get classes. For hausdorff, it uses a specified size of
547 * structuring element and specified rank. For correlation,
548 * it uses a specified threshold. */
549 if (classer->method == JB_RANKHAUS) {
550 if (jbClassifyRankHaus(classer, boxas, pixas))
551 return ERROR_INT("rankhaus classification failed", __func__, 1);
552 } else { /* classer->method == JB_CORRELATION */
553 if (jbClassifyCorrelation(classer, boxas, pixas))
554 return ERROR_INT("correlation classification failed", __func__, 1);
555 }
556
557 /* Find the global UL corners, adjusted for each instance so
558 * that the class template and instance will have their
559 * centroids in the same place. Then the template can be
560 * used to replace the instance. */
561 if (jbGetULCorners(classer, pixs, boxas))
562 return ERROR_INT("UL corners not found", __func__, 1);
563
564 /* Update total component counts and number of pages processed. */
565 n = boxaGetCount(boxas);
566 classer->baseindex += n;
567 numaAddNumber(classer->nacomps, (l_float32)n);
568 classer->npages++;
569 return 0;
570 }
571
572
573 /*----------------------------------------------------------------------*
574 * Classification using windowed rank hausdorff metric *
575 *----------------------------------------------------------------------*/
576 /*!
577 * \brief jbClassifyRankHaus()
578 *
579 * \param[in] jbclasser
580 * \param[in] boxa new components for classification
581 * \param[in] pixas new components for classification
582 * \return 0 if OK; 1 on error
583 */
584 l_ok
585 jbClassifyRankHaus(JBCLASSER *classer,
586 BOXA *boxa,
587 PIXA *pixas)
588 {
589 l_int32 n, nt, i, wt, ht, iclass, size, found, testval;
590 l_int32 npages, area1, area3;
591 l_int32 *tab8;
592 l_float32 rank, x1, y1, x2, y2;
593 BOX *box;
594 NUMA *naclass, *napage;
595 NUMA *nafg; /* fg area of all instances */
596 NUMA *nafgt; /* fg area of all templates */
597 JBFINDCTX *findcontext;
598 L_DNAHASH *dahash;
599 PIX *pix, *pix1, *pix2, *pix3, *pix4;
600 PIXA *pixa, *pixa1, *pixa2, *pixat, *pixatd;
601 PIXAA *pixaa;
602 PTA *pta, *ptac, *ptact;
603 SEL *sel;
604
605 if (!classer)
606 return ERROR_INT("classer not defined", __func__, 1);
607 if (!boxa)
608 return ERROR_INT("boxa not defined", __func__, 1);
609 if (!pixas)
610 return ERROR_INT("pixas not defined", __func__, 1);
611 if ((n = pixaGetCount(pixas)) == 0)
612 return ERROR_INT("pixas is empty", __func__, 1);
613 if ((nafg = pixaCountPixels(pixas)) == NULL) /* areas for this page */
614 return ERROR_INT("fg counting failed", __func__, 1);
615
616 npages = classer->npages;
617 size = classer->sizehaus;
618 sel = selCreateBrick(size, size, size / 2, size / 2, SEL_HIT);
619
620 /* Generate the bordered pixa, with and without dilation.
621 * pixa1 and pixa2 contain all the input components. */
622 pixa1 = pixaCreate(n);
623 pixa2 = pixaCreate(n);
624 for (i = 0; i < n; i++) {
625 pix = pixaGetPix(pixas, i, L_CLONE);
626 pix1 = pixAddBorderGeneral(pix, JB_ADDED_PIXELS, JB_ADDED_PIXELS,
627 JB_ADDED_PIXELS, JB_ADDED_PIXELS, 0);
628 pix2 = pixDilate(NULL, pix1, sel);
629 pixaAddPix(pixa1, pix1, L_INSERT); /* un-dilated */
630 pixaAddPix(pixa2, pix2, L_INSERT); /* dilated */
631 pixDestroy(&pix);
632 }
633
634 /* Get the centroids of all the bordered images.
635 * These are relative to the UL corner of each (bordered) pix. */
636 pta = pixaCentroids(pixa1); /* centroids for this page; use here */
637 ptac = classer->ptac; /* holds centroids of components up to this page */
638 ptaJoin(ptac, pta, 0, -1); /* save centroids of all components */
639 ptact = classer->ptact; /* holds centroids of templates */
640
641 /* Use these to save the class and page of each component. */
642 naclass = classer->naclass;
643 napage = classer->napage;
644
645 /* Store the unbordered pix in a pixaa, in a hierarchical
646 * set of arrays. There is one pixa for each class,
647 * and the pix in each pixa are all the instances found
648 * of that class. This is actually more than one would need
649 * for a jbig2 encoder, but there are two reasons to keep
650 * them around: (1) the set of instances for each class
651 * can be used to make an improved binary (or, better,
652 * a grayscale) template, rather than simply using the first
653 * one in the set; (2) we can investigate the failures
654 * of the classifier. This pixaa grows as we process
655 * successive pages. */
656 pixaa = classer->pixaa;
657
658 /* arrays to store class exemplars (templates) */
659 pixat = classer->pixat; /* un-dilated */
660 pixatd = classer->pixatd; /* dilated */
661
662 /* Fill up the pixaa tree with the template exemplars as
663 * the first pix in each pixa. As we add each pix,
664 * we also add the associated box to the pixa.
665 * We also keep track of the centroid of each pix,
666 * and use the difference between centroids (of the
667 * pix with the exemplar we are checking it with)
668 * to align the two when checking that the Hausdorff
669 * distance does not exceed a threshold.
670 * The threshold is set by the Sel used for dilating.
671 * For example, a 3x3 brick, sel_3, corresponds to a
672 * Hausdorff distance of 1. In general, for an NxN brick,
673 * with N odd, corresponds to a Hausdorff distance of (N - 1)/2.
674 * It turns out that we actually need to use a sel of size 2x2
675 * to avoid small bad components when there is a halftone image
676 * from which components can be chosen.
677 * The larger the Sel you use, the fewer the number of classes,
678 * and the greater the likelihood of putting semantically
679 * different objects in the same class. For simplicity,
680 * we do this separately for the case of rank == 1.0 (exact
681 * match within the Hausdorff distance) and rank < 1.0. */
682 rank = classer->rankhaus;
683 dahash = classer->dahash;
684 if (rank == 1.0) {
685 for (i = 0; i < n; i++) {
686 pix1 = pixaGetPix(pixa1, i, L_CLONE);
687 pix2 = pixaGetPix(pixa2, i, L_CLONE);
688 ptaGetPt(pta, i, &x1, &y1);
689 nt = pixaGetCount(pixat); /* number of templates */
690 found = FALSE;
691 findcontext = findSimilarSizedTemplatesInit(classer, pix1);
692 while ((iclass = findSimilarSizedTemplatesNext(findcontext)) > -1) {
693 /* Find score for this template */
694 pix3 = pixaGetPix(pixat, iclass, L_CLONE);
695 pix4 = pixaGetPix(pixatd, iclass, L_CLONE);
696 ptaGetPt(ptact, iclass, &x2, &y2);
697 testval = pixHaustest(pix1, pix2, pix3, pix4, x1 - x2, y1 - y2,
698 MAX_DIFF_WIDTH, MAX_DIFF_HEIGHT);
699 pixDestroy(&pix3);
700 pixDestroy(&pix4);
701 if (testval == 1) {
702 found = TRUE;
703 numaAddNumber(naclass, (l_float32)iclass);
704 numaAddNumber(napage, (l_float32)npages);
705 if (classer->keep_pixaa) {
706 pixa = pixaaGetPixa(pixaa, iclass, L_CLONE);
707 pix = pixaGetPix(pixas, i, L_CLONE);
708 pixaAddPix(pixa, pix, L_INSERT);
709 box = boxaGetBox(boxa, i, L_CLONE);
710 pixaAddBox(pixa, box, L_INSERT);
711 pixaDestroy(&pixa);
712 }
713 break;
714 }
715 }
716 findSimilarSizedTemplatesDestroy(&findcontext);
717 if (found == FALSE) { /* new class */
718 numaAddNumber(naclass, (l_float32)nt);
719 numaAddNumber(napage, (l_float32)npages);
720 pixa = pixaCreate(0);
721 pix = pixaGetPix(pixas, i, L_CLONE); /* unbordered instance */
722 pixaAddPix(pixa, pix, L_INSERT);
723 wt = pixGetWidth(pix);
724 ht = pixGetHeight(pix);
725 l_dnaHashAdd(dahash, (l_uint64)ht * wt, nt);
726 box = boxaGetBox(boxa, i, L_CLONE);
727 pixaAddBox(pixa, box, L_INSERT);
728 pixaaAddPixa(pixaa, pixa, L_INSERT); /* unbordered instance */
729 ptaAddPt(ptact, x1, y1);
730 pixaAddPix(pixat, pix1, L_INSERT); /* bordered template */
731 pixaAddPix(pixatd, pix2, L_INSERT); /* bordered dil template */
732 } else { /* don't save them */
733 pixDestroy(&pix1);
734 pixDestroy(&pix2);
735 }
736 }
737 } else { /* rank < 1.0 */
738 nafgt = classer->nafgt;
739 tab8 = makePixelSumTab8();
740 for (i = 0; i < n; i++) { /* all instances on this page */
741 pix1 = pixaGetPix(pixa1, i, L_CLONE);
742 numaGetIValue(nafg, i, &area1);
743 pix2 = pixaGetPix(pixa2, i, L_CLONE);
744 ptaGetPt(pta, i, &x1, &y1); /* use pta for this page */
745 nt = pixaGetCount(pixat); /* number of templates */
746 found = FALSE;
747 findcontext = findSimilarSizedTemplatesInit(classer, pix1);
748 while ((iclass = findSimilarSizedTemplatesNext(findcontext)) > -1) {
749 /* Find score for this template */
750 pix3 = pixaGetPix(pixat, iclass, L_CLONE);
751 numaGetIValue(nafgt, iclass, &area3);
752 pix4 = pixaGetPix(pixatd, iclass, L_CLONE);
753 ptaGetPt(ptact, iclass, &x2, &y2);
754 testval = pixRankHaustest(pix1, pix2, pix3, pix4,
755 x1 - x2, y1 - y2,
756 MAX_DIFF_WIDTH, MAX_DIFF_HEIGHT,
757 area1, area3, rank, tab8);
758 pixDestroy(&pix3);
759 pixDestroy(&pix4);
760 if (testval == 1) { /* greedy match; take the first */
761 found = TRUE;
762 numaAddNumber(naclass, (l_float32)iclass);
763 numaAddNumber(napage, (l_float32)npages);
764 if (classer->keep_pixaa) {
765 pixa = pixaaGetPixa(pixaa, iclass, L_CLONE);
766 pix = pixaGetPix(pixas, i, L_CLONE);
767 pixaAddPix(pixa, pix, L_INSERT);
768 box = boxaGetBox(boxa, i, L_CLONE);
769 pixaAddBox(pixa, box, L_INSERT);
770 pixaDestroy(&pixa);
771 }
772 break;
773 }
774 }
775 findSimilarSizedTemplatesDestroy(&findcontext);
776 if (found == FALSE) { /* new class */
777 numaAddNumber(naclass, (l_float32)nt);
778 numaAddNumber(napage, (l_float32)npages);
779 pixa = pixaCreate(0);
780 pix = pixaGetPix(pixas, i, L_CLONE); /* unbordered instance */
781 pixaAddPix(pixa, pix, L_INSERT);
782 wt = pixGetWidth(pix);
783 ht = pixGetHeight(pix);
784 l_dnaHashAdd(dahash, (l_uint64)ht * wt, nt);
785 box = boxaGetBox(boxa, i, L_CLONE);
786 pixaAddBox(pixa, box, L_INSERT);
787 pixaaAddPixa(pixaa, pixa, L_INSERT); /* unbordered instance */
788 ptaAddPt(ptact, x1, y1);
789 pixaAddPix(pixat, pix1, L_INSERT); /* bordered template */
790 pixaAddPix(pixatd, pix2, L_INSERT); /* ditto */
791 numaAddNumber(nafgt, (l_float32)area1);
792 } else { /* don't save them */
793 pixDestroy(&pix1);
794 pixDestroy(&pix2);
795 }
796 }
797 LEPT_FREE(tab8);
798 }
799 classer->nclass = pixaGetCount(pixat);
800
801 numaDestroy(&nafg);
802 ptaDestroy(&pta);
803 pixaDestroy(&pixa1);
804 pixaDestroy(&pixa2);
805 selDestroy(&sel);
806 return 0;
807 }
808
809
810 /*!
811 * \brief pixHaustest()
812 *
813 * \param[in] pix1 new pix, not dilated
814 * \param[in] pix2 new pix, dilated
815 * \param[in] pix3 exemplar pix, not dilated
816 * \param[in] pix4 exemplar pix, dilated
817 * \param[in] delx x comp of centroid difference
818 * \param[in] dely y comp of centroid difference
819 * \param[in] maxdiffw max width difference of pix1 and pix2
820 * \param[in] maxdiffh max height difference of pix1 and pix2
821 * \return 0 FALSE) if no match, 1 (TRUE if the new
822 * pix is in the same class as the exemplar.
823 *
824 * <pre>
825 * Notes:
826 * We check first that the two pix are roughly
827 * the same size. Only if they meet that criterion do
828 * we compare the bitmaps. The Hausdorff is a 2-way
829 * check. The centroid difference is used to align the two
830 * images to the nearest integer for each of the checks.
831 * These check that the dilated image of one contains
832 * ALL the pixels of the undilated image of the other.
833 * Checks are done in both direction. A single pixel not
834 * contained in either direction results in failure of the test.
835 * </pre>
836 */
837 l_int32
838 pixHaustest(PIX *pix1,
839 PIX *pix2,
840 PIX *pix3,
841 PIX *pix4,
842 l_float32 delx, /* x(1) - x(3) */
843 l_float32 dely, /* y(1) - y(3) */
844 l_int32 maxdiffw,
845 l_int32 maxdiffh)
846 {
847 l_int32 wi, hi, wt, ht, delw, delh, idelx, idely, boolmatch;
848 PIX *pixt;
849
850 /* Eliminate possible matches based on size difference */
851 wi = pixGetWidth(pix1);
852 hi = pixGetHeight(pix1);
853 wt = pixGetWidth(pix3);
854 ht = pixGetHeight(pix3);
855 delw = L_ABS(wi - wt);
856 if (delw > maxdiffw)
857 return FALSE;
858 delh = L_ABS(hi - ht);
859 if (delh > maxdiffh)
860 return FALSE;
861
862 /* Round difference in centroid location to nearest integer;
863 * use this as a shift when doing the matching. */
864 if (delx >= 0)
865 idelx = (l_int32)(delx + 0.5);
866 else
867 idelx = (l_int32)(delx - 0.5);
868 if (dely >= 0)
869 idely = (l_int32)(dely + 0.5);
870 else
871 idely = (l_int32)(dely - 0.5);
872
873 /* Do 1-direction hausdorff, checking that every pixel in pix1
874 * is within a dilation distance of some pixel in pix3. Namely,
875 * that pix4 entirely covers pix1:
876 * pixt = pixSubtract(NULL, pix1, pix4), including shift
877 * where pixt has no ON pixels. */
878 pixt = pixCreateTemplate(pix1);
879 pixRasterop(pixt, 0, 0, wi, hi, PIX_SRC, pix1, 0, 0);
880 pixRasterop(pixt, idelx, idely, wi, hi, PIX_DST & PIX_NOT(PIX_SRC),
881 pix4, 0, 0);
882 pixZero(pixt, &boolmatch);
883 if (boolmatch == 0) {
884 pixDestroy(&pixt);
885 return FALSE;
886 }
887
888 /* Do 1-direction hausdorff, checking that every pixel in pix3
889 * is within a dilation distance of some pixel in pix1. Namely,
890 * that pix2 entirely covers pix3:
891 * pixSubtract(pixt, pix3, pix2), including shift
892 * where pixt has no ON pixels. */
893 pixRasterop(pixt, idelx, idely, wt, ht, PIX_SRC, pix3, 0, 0);
894 pixRasterop(pixt, 0, 0, wt, ht, PIX_DST & PIX_NOT(PIX_SRC), pix2, 0, 0);
895 pixZero(pixt, &boolmatch);
896 pixDestroy(&pixt);
897 return boolmatch;
898 }
899
900
901 /*!
902 * \brief pixRankHaustest()
903 *
904 * \param[in] pix1 new pix, not dilated
905 * \param[in] pix2 new pix, dilated
906 * \param[in] pix3 exemplar pix, not dilated
907 * \param[in] pix4 exemplar pix, dilated
908 * \param[in] delx x comp of centroid difference
909 * \param[in] dely y comp of centroid difference
910 * \param[in] maxdiffw max width difference of pix1 and pix2
911 * \param[in] maxdiffh max height difference of pix1 and pix2
912 * \param[in] area1 fg pixels in pix1
913 * \param[in] area3 fg pixels in pix3
914 * \param[in] rank rank value of test, each way
915 * \param[in] tab8 table of pixel sums for byte
916 * \return 0 FALSE) if no match, 1 (TRUE if the new
917 * pix is in the same class as the exemplar.
918 *
919 * <pre>
920 * Notes:
921 * We check first that the two pix are roughly
922 * the same size. Only if they meet that criterion do
923 * we compare the bitmaps. We convert the rank value to
924 * a number of pixels by multiplying the rank fraction by the number
925 * of pixels in the undilated image. The Hausdorff is a 2-way
926 * check. The centroid difference is used to align the two
927 * images to the nearest integer for each of the checks.
928 * The rank hausdorff checks that the dilated image of one
929 * contains the rank fraction of the pixels of the undilated
930 * image of the other. Checks are done in both direction.
931 * Failure of the test in either direction results in failure
932 * of the test.
933 * </pre>
934 */
935 l_int32
936 pixRankHaustest(PIX *pix1,
937 PIX *pix2,
938 PIX *pix3,
939 PIX *pix4,
940 l_float32 delx, /* x(1) - x(3) */
941 l_float32 dely, /* y(1) - y(3) */
942 l_int32 maxdiffw,
943 l_int32 maxdiffh,
944 l_int32 area1,
945 l_int32 area3,
946 l_float32 rank,
947 l_int32 *tab8)
948 {
949 l_int32 wi, hi, wt, ht, delw, delh, idelx, idely, boolmatch;
950 l_int32 thresh1, thresh3;
951 PIX *pixt;
952
953 /* Eliminate possible matches based on size difference */
954 wi = pixGetWidth(pix1);
955 hi = pixGetHeight(pix1);
956 wt = pixGetWidth(pix3);
957 ht = pixGetHeight(pix3);
958 delw = L_ABS(wi - wt);
959 if (delw > maxdiffw)
960 return FALSE;
961 delh = L_ABS(hi - ht);
962 if (delh > maxdiffh)
963 return FALSE;
964
965 /* Upper bounds in remaining pixels for allowable match */
966 thresh1 = (l_int32)(area1 * (1. - rank) + 0.5);
967 thresh3 = (l_int32)(area3 * (1. - rank) + 0.5);
968
969 /* Round difference in centroid location to nearest integer;
970 * use this as a shift when doing the matching. */
971 if (delx >= 0)
972 idelx = (l_int32)(delx + 0.5);
973 else
974 idelx = (l_int32)(delx - 0.5);
975 if (dely >= 0)
976 idely = (l_int32)(dely + 0.5);
977 else
978 idely = (l_int32)(dely - 0.5);
979
980 /* Do 1-direction hausdorff, checking that every pixel in pix1
981 * is within a dilation distance of some pixel in pix3. Namely,
982 * that pix4 entirely covers pix1:
983 * pixt = pixSubtract(NULL, pix1, pix4), including shift
984 * where pixt has no ON pixels. */
985 pixt = pixCreateTemplate(pix1);
986 pixRasterop(pixt, 0, 0, wi, hi, PIX_SRC, pix1, 0, 0);
987 pixRasterop(pixt, idelx, idely, wi, hi, PIX_DST & PIX_NOT(PIX_SRC),
988 pix4, 0, 0);
989 pixThresholdPixelSum(pixt, thresh1, &boolmatch, tab8);
990 if (boolmatch == 1) { /* above thresh1 */
991 pixDestroy(&pixt);
992 return FALSE;
993 }
994
995 /* Do 1-direction hausdorff, checking that every pixel in pix3
996 * is within a dilation distance of some pixel in pix1. Namely,
997 * that pix2 entirely covers pix3:
998 * pixSubtract(pixt, pix3, pix2), including shift
999 * where pixt has no ON pixels. */
1000 pixRasterop(pixt, idelx, idely, wt, ht, PIX_SRC, pix3, 0, 0);
1001 pixRasterop(pixt, 0, 0, wt, ht, PIX_DST & PIX_NOT(PIX_SRC), pix2, 0, 0);
1002 pixThresholdPixelSum(pixt, thresh3, &boolmatch, tab8);
1003 pixDestroy(&pixt);
1004 if (boolmatch == 1) /* above thresh3 */
1005 return FALSE;
1006 else
1007 return TRUE;
1008 }
1009
1010
1011 /*----------------------------------------------------------------------*
1012 * Classification using windowed correlation score *
1013 *----------------------------------------------------------------------*/
1014 /*!
1015 * \brief jbClassifyCorrelation()
1016 *
1017 * \param[in] jbclasser
1018 * \param[in] boxa new components for classification
1019 * \param[in] pixas new components for classification
1020 * \return 0 if OK; 1 on error
1021 */
1022 l_ok
1023 jbClassifyCorrelation(JBCLASSER *classer,
1024 BOXA *boxa,
1025 PIXA *pixas)
1026 {
1027 l_int32 n, nt, i, iclass, wt, ht, found, area, area1, area2, npages,
1028 overthreshold;
1029 l_int32 *sumtab, *centtab;
1030 l_uint32 *row, word;
1031 l_float32 x1, y1, x2, y2, xsum, ysum;
1032 l_float32 thresh, weight, threshold;
1033 BOX *box;
1034 NUMA *naclass, *napage;
1035 NUMA *nafgt; /* fg area of all templates */
1036 NUMA *naarea; /* w * h area of all templates */
1037 JBFINDCTX *findcontext;
1038 L_DNAHASH *dahash;
1039 PIX *pix, *pix1, *pix2;
1040 PIXA *pixa, *pixa1, *pixat;
1041 PIXAA *pixaa;
1042 PTA *pta, *ptac, *ptact;
1043 l_int32 *pixcts; /* pixel counts of each pixa */
1044 l_int32 **pixrowcts; /* row-by-row pixel counts of each pixa */
1045 l_int32 x, y, rowcount, downcount, wpl;
1046 l_uint8 byte;
1047
1048 if (!classer)
1049 return ERROR_INT("classer not found", __func__, 1);
1050 if (!boxa)
1051 return ERROR_INT("boxa not found", __func__, 1);
1052 if (!pixas)
1053 return ERROR_INT("pixas not found", __func__, 1);
1054
1055 npages = classer->npages;
1056
1057 /* Generate the bordered pixa, which contains all the the
1058 * input components. This will not be saved. */
1059 if ((n = pixaGetCount(pixas)) == 0) {
1060 L_WARNING("pixas is empty\n", __func__);
1061 return 0;
1062 }
1063 pixa1 = pixaCreate(n);
1064 for (i = 0; i < n; i++) {
1065 pix = pixaGetPix(pixas, i, L_CLONE);
1066 pix1 = pixAddBorderGeneral(pix, JB_ADDED_PIXELS, JB_ADDED_PIXELS,
1067 JB_ADDED_PIXELS, JB_ADDED_PIXELS, 0);
1068 pixaAddPix(pixa1, pix1, L_INSERT);
1069 pixDestroy(&pix);
1070 }
1071
1072 /* Use these to save the class and page of each component. */
1073 naclass = classer->naclass;
1074 napage = classer->napage;
1075
1076 /* Get the number of fg pixels in each component. */
1077 nafgt = classer->nafgt; /* holds fg areas of the templates */
1078 sumtab = makePixelSumTab8();
1079
1080 pixcts = (l_int32 *)LEPT_CALLOC(n, sizeof(*pixcts));
1081 pixrowcts = (l_int32 **)LEPT_CALLOC(n, sizeof(*pixrowcts));
1082 centtab = makePixelCentroidTab8();
1083
1084 /* Count the "1" pixels in each row of the pix in pixa1; this
1085 * allows pixCorrelationScoreThresholded to abort early if a match
1086 * is impossible. This loop merges three calculations: the total
1087 * number of "1" pixels, the number of "1" pixels in each row, and
1088 * the centroid. The centroids are relative to the UL corner of
1089 * each (bordered) pix. The pixrowcts[i][y] are the total number
1090 * of fg pixels in pixa[i] below row y. */
1091 pta = ptaCreate(n);
1092 for (i = 0; i < n; i++) {
1093 pix = pixaGetPix(pixa1, i, L_CLONE);
1094 pixrowcts[i] = (l_int32 *)LEPT_CALLOC(pixGetHeight(pix),
1095 sizeof(**pixrowcts));
1096 xsum = 0;
1097 ysum = 0;
1098 wpl = pixGetWpl(pix);
1099 row = pixGetData(pix) + (pixGetHeight(pix) - 1) * wpl;
1100 downcount = 0;
1101 for (y = pixGetHeight(pix) - 1; y >= 0; y--, row -= wpl) {
1102 pixrowcts[i][y] = downcount;
1103 rowcount = 0;
1104 for (x = 0; x < wpl; x++) {
1105 word = row[x];
1106 byte = word & 0xff;
1107 rowcount += sumtab[byte];
1108 xsum += centtab[byte] + (x * 32 + 24) * sumtab[byte];
1109 byte = (word >> 8) & 0xff;
1110 rowcount += sumtab[byte];
1111 xsum += centtab[byte] + (x * 32 + 16) * sumtab[byte];
1112 byte = (word >> 16) & 0xff;
1113 rowcount += sumtab[byte];
1114 xsum += centtab[byte] + (x * 32 + 8) * sumtab[byte];
1115 byte = (word >> 24) & 0xff;
1116 rowcount += sumtab[byte];
1117 xsum += centtab[byte] + x * 32 * sumtab[byte];
1118 }
1119 downcount += rowcount;
1120 ysum += rowcount * y;
1121 }
1122 pixcts[i] = downcount;
1123 if (downcount > 0) {
1124 ptaAddPt(pta,
1125 xsum / (l_float32)downcount, ysum / (l_float32)downcount);
1126 } else { /* no pixels; shouldn't happen */
1127 L_ERROR("downcount == 0 !\n", __func__);
1128 ptaAddPt(pta, pixGetWidth(pix) / 2, pixGetHeight(pix) / 2);
1129 }
1130 pixDestroy(&pix);
1131 }
1132
1133 ptac = classer->ptac; /* holds centroids of components up to this page */
1134 ptaJoin(ptac, pta, 0, -1); /* save centroids of all components */
1135 ptact = classer->ptact; /* holds centroids of templates */
1136
1137 /* Store the unbordered pix in a pixaa, in a hierarchical
1138 * set of arrays. There is one pixa for each class,
1139 * and the pix in each pixa are all the instances found
1140 * of that class. This is actually more than one would need
1141 * for a jbig2 encoder, but there are two reasons to keep
1142 * them around: (1) the set of instances for each class
1143 * can be used to make an improved binary (or, better,
1144 * a grayscale) template, rather than simply using the first
1145 * one in the set; (2) we can investigate the failures
1146 * of the classifier. This pixaa grows as we process
1147 * successive pages. */
1148 pixaa = classer->pixaa;
1149
1150 /* Array to store class exemplars */
1151 pixat = classer->pixat;
1152
1153 /* Fill up the pixaa tree with the template exemplars as
1154 * the first pix in each pixa. As we add each pix,
1155 * we also add the associated box to the pixa.
1156 * We also keep track of the centroid of each pix,
1157 * and use the difference between centroids (of the
1158 * pix with the exemplar we are checking it with)
1159 * to align the two when checking that the correlation
1160 * score exceeds a threshold. The correlation score
1161 * is given by the square of the area of the AND
1162 * between aligned instance and template, divided by
1163 * the product of areas of each image. For identical
1164 * template and instance, the score is 1.0.
1165 * If the threshold is too small, non-equivalent instances
1166 * will be placed in the same class; if too large, there will
1167 * be an unnecessary division of classes representing the
1168 * same character. The weightfactor adds in some of the
1169 * difference (1.0 - thresh), depending on the heaviness
1170 * of the template (measured as the fraction of fg pixels). */
1171 thresh = classer->thresh;
1172 weight = classer->weightfactor;
1173 naarea = classer->naarea;
1174 dahash = classer->dahash;
1175 for (i = 0; i < n; i++) {
1176 pix1 = pixaGetPix(pixa1, i, L_CLONE);
1177 area1 = pixcts[i];
1178 ptaGetPt(pta, i, &x1, &y1); /* centroid for this instance */
1179 nt = pixaGetCount(pixat);
1180 found = FALSE;
1181 findcontext = findSimilarSizedTemplatesInit(classer, pix1);
1182 while ( (iclass = findSimilarSizedTemplatesNext(findcontext)) > -1) {
1183 /* Get the template */
1184 pix2 = pixaGetPix(pixat, iclass, L_CLONE);
1185 numaGetIValue(nafgt, iclass, &area2);
1186 ptaGetPt(ptact, iclass, &x2, &y2); /* template centroid */
1187
1188 /* Find threshold for this template */
1189 if (weight > 0.0) {
1190 numaGetIValue(naarea, iclass, &area);
1191 threshold = thresh + (1.f - thresh) * weight * area2 / area;
1192 } else {
1193 threshold = thresh;
1194 }
1195
1196 /* Find score for this template */
1197 overthreshold = pixCorrelationScoreThresholded(pix1, pix2,
1198 area1, area2, x1 - x2, y1 - y2,
1199 MAX_DIFF_WIDTH, MAX_DIFF_HEIGHT,
1200 sumtab, pixrowcts[i], threshold);
1201 #if DEBUG_CORRELATION_SCORE
1202 {
1203 l_float32 score, testscore;
1204 l_int32 count, testcount;
1205 pixCorrelationScore(pix1, pix2, area1, area2, x1 - x2, y1 - y2,
1206 MAX_DIFF_WIDTH, MAX_DIFF_HEIGHT,
1207 sumtab, &score);
1208
1209 pixCorrelationScoreSimple(pix1, pix2, area1, area2,
1210 x1 - x2, y1 - y2, MAX_DIFF_WIDTH,
1211 MAX_DIFF_HEIGHT, sumtab, &testscore);
1212 count = (l_int32)rint(sqrt(score * area1 * area2));
1213 testcount = (l_int32)rint(sqrt(testscore * area1 * area2));
1214 if ((score >= threshold) != (testscore >= threshold)) {
1215 lept_stderr("Correlation score mismatch: "
1216 "%d(%g,%d) vs %d(%g,%d) (%g)\n",
1217 count, score, score >= threshold,
1218 testcount, testscore, testscore >= threshold,
1219 score - testscore);
1220 }
1221
1222 if ((score >= threshold) != overthreshold) {
1223 lept_stderr("Mismatch between correlation/threshold "
1224 "comparison: %g(%g,%d) >= %g(%g) vs %s\n",
1225 score, score*area1*area2, count, threshold,
1226 threshold*area1*area2,
1227 (overthreshold ? "true" : "false"));
1228 }
1229 }
1230 #endif /* DEBUG_CORRELATION_SCORE */
1231 pixDestroy(&pix2);
1232
1233 if (overthreshold) { /* greedy match */
1234 found = TRUE;
1235 numaAddNumber(naclass, (l_float32)iclass);
1236 numaAddNumber(napage, (l_float32)npages);
1237 if (classer->keep_pixaa) {
1238 /* We are keeping a record of all components */
1239 pixa = pixaaGetPixa(pixaa, iclass, L_CLONE);
1240 pix = pixaGetPix(pixas, i, L_CLONE);
1241 pixaAddPix(pixa, pix, L_INSERT);
1242 box = boxaGetBox(boxa, i, L_CLONE);
1243 pixaAddBox(pixa, box, L_INSERT);
1244 pixaDestroy(&pixa);
1245 }
1246 break;
1247 }
1248 }
1249 findSimilarSizedTemplatesDestroy(&findcontext);
1250 if (found == FALSE) { /* new class */
1251 numaAddNumber(naclass, (l_float32)nt);
1252 numaAddNumber(napage, (l_float32)npages);
1253 pixa = pixaCreate(0);
1254 pix = pixaGetPix(pixas, i, L_CLONE); /* unbordered instance */
1255 pixaAddPix(pixa, pix, L_INSERT);
1256 wt = pixGetWidth(pix);
1257 ht = pixGetHeight(pix);
1258 l_dnaHashAdd(dahash, (l_uint64)ht * wt, nt);
1259 box = boxaGetBox(boxa, i, L_CLONE);
1260 pixaAddBox(pixa, box, L_INSERT);
1261 pixaaAddPixa(pixaa, pixa, L_INSERT); /* unbordered instance */
1262 ptaAddPt(ptact, x1, y1);
1263 numaAddNumber(nafgt, (l_float32)area1);
1264 pixaAddPix(pixat, pix1, L_INSERT); /* bordered template */
1265 area = (pixGetWidth(pix1) - 2 * JB_ADDED_PIXELS) *
1266 (pixGetHeight(pix1) - 2 * JB_ADDED_PIXELS);
1267 numaAddNumber(naarea, (l_float32)area);
1268 } else { /* don't save it */
1269 pixDestroy(&pix1);
1270 }
1271 }
1272 classer->nclass = pixaGetCount(pixat);
1273
1274 LEPT_FREE(pixcts);
1275 LEPT_FREE(centtab);
1276 for (i = 0; i < n; i++) {
1277 LEPT_FREE(pixrowcts[i]);
1278 }
1279 LEPT_FREE(pixrowcts);
1280
1281 LEPT_FREE(sumtab);
1282 ptaDestroy(&pta);
1283 pixaDestroy(&pixa1);
1284 return 0;
1285 }
1286
1287
1288 /*----------------------------------------------------------------------*
1289 * Determine the image components we start with *
1290 *----------------------------------------------------------------------*/
1291 /*!
1292 * \brief jbGetComponents()
1293 *
1294 * \param[in] pixs 1 bpp
1295 * \param[in] components JB_CONN_COMPS, JB_CHARACTERS, JB_WORDS
1296 * \param[in] maxwidth of saved components; larger are discarded
1297 * \param[in] maxheight of saved components; larger are discarded
1298 * \param[out] ppboxa b.b. of component items
1299 * \param[out] pppixa component items
1300 * \return 0 if OK, 1 on error
1301 */
1302 l_ok
1303 jbGetComponents(PIX *pixs,
1304 l_int32 components,
1305 l_int32 maxwidth,
1306 l_int32 maxheight,
1307 BOXA **pboxad,
1308 PIXA **ppixad)
1309 {
1310 l_int32 empty, res, redfactor;
1311 BOXA *boxa;
1312 PIX *pix1, *pix2, *pix3;
1313 PIXA *pixa, *pixat;
1314
1315 if (!pboxad)
1316 return ERROR_INT("&boxad not defined", __func__, 1);
1317 *pboxad = NULL;
1318 if (!ppixad)
1319 return ERROR_INT("&pixad not defined", __func__, 1);
1320 *ppixad = NULL;
1321 if (!pixs)
1322 return ERROR_INT("pixs not defined", __func__, 1);
1323 if (components != JB_CONN_COMPS && components != JB_CHARACTERS &&
1324 components != JB_WORDS)
1325 return ERROR_INT("invalid components", __func__, 1);
1326
1327 pixZero(pixs, &empty);
1328 if (empty) {
1329 *pboxad = boxaCreate(0);
1330 *ppixad = pixaCreate(0);
1331 return 0;
1332 }
1333
1334 /* If required, preprocess input pixs. The method for both
1335 * characters and words is to generate a connected component
1336 * mask over the units that we want to aggregrate, which are,
1337 * in general, sets of related connected components in pixs.
1338 * For characters, we want to include the dots with
1339 * 'i', 'j' and '!', so we do a small vertical closing to
1340 * generate the mask. For words, we make a mask over all
1341 * characters in each word. This is a bit more tricky, because
1342 * the spacing between words is difficult to predict a priori,
1343 * and words can be typeset with variable spacing that can
1344 * in some cases be barely larger than the space between
1345 * characters. The first step is to generate the mask and
1346 * identify each of its connected components. */
1347 if (components == JB_CONN_COMPS) { /* no preprocessing */
1348 boxa = pixConnComp(pixs, &pixa, 8);
1349 } else if (components == JB_CHARACTERS) {
1350 pix1 = pixMorphSequence(pixs, "c1.6", 0);
1351 boxa = pixConnComp(pix1, &pixat, 8);
1352 pixa = pixaClipToPix(pixat, pixs);
1353 pixDestroy(&pix1);
1354 pixaDestroy(&pixat);
1355 } else { /* components == JB_WORDS */
1356
1357 /* Do the operations at about 150 ppi resolution.
1358 * It is much faster at 75 ppi, but the results are
1359 * more accurate at 150 ppi. This will segment the
1360 * words in body text. It can be expected that relatively
1361 * infrequent words in a larger font will be split. */
1362 res = pixGetXRes(pixs);
1363 if (res <= 200) {
1364 redfactor = 1;
1365 pix1 = pixClone(pixs);
1366 } else if (res <= 400) {
1367 redfactor = 2;
1368 pix1 = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0);
1369 } else {
1370 redfactor = 4;
1371 pix1 = pixReduceRankBinaryCascade(pixs, 1, 1, 0, 0);
1372 }
1373
1374 /* Estimate the word mask, at approximately 150 ppi.
1375 * This has both very large and very small components left in. */
1376 pixWordMaskByDilation(pix1, &pix2, NULL, NULL);
1377
1378 /* Expand the optimally dilated word mask to full res. */
1379 pix3 = pixExpandReplicate(pix2, redfactor);
1380
1381 /* Pull out the pixels in pixs corresponding to the mask
1382 * components in pix3. Note that above we used threshold
1383 * levels in the reduction of 1 to insure that the resulting
1384 * mask fully covers the input pixs. The downside of using
1385 * a threshold of 1 is that very close characters from adjacent
1386 * lines can be joined. But with a level of 2 or greater,
1387 * it is necessary to use a seedfill, followed by a pixOr():
1388 * pixt4 = pixSeedfillBinary(NULL, pix3, pixs, 8);
1389 * pixOr(pix3, pix3, pixt4);
1390 * to insure that the mask coverage is complete over pixs. */
1391 boxa = pixConnComp(pix3, &pixat, 4);
1392 pixa = pixaClipToPix(pixat, pixs);
1393 pixaDestroy(&pixat);
1394 pixDestroy(&pix1);
1395 pixDestroy(&pix2);
1396 pixDestroy(&pix3);
1397 }
1398
1399 /* Remove large components, and save the results. */
1400 *ppixad = pixaSelectBySize(pixa, maxwidth, maxheight, L_SELECT_IF_BOTH,
1401 L_SELECT_IF_LTE, NULL);
1402 *pboxad = boxaSelectBySize(boxa, maxwidth, maxheight, L_SELECT_IF_BOTH,
1403 L_SELECT_IF_LTE, NULL);
1404 pixaDestroy(&pixa);
1405 boxaDestroy(&boxa);
1406
1407 return 0;
1408 }
1409
1410
1411 /*!
1412 * \brief pixWordMaskByDilation()
1413 *
1414 * \param[in] pixs 1 bpp; typ. at 75 to 150 ppi
1415 * \param[out] pmask [optional] dilated word mask
1416 * \param[out] psize [optional] size of good horizontal dilation
1417 * \param[out] pixadb [optional] debug: pixa of intermediate steps
1418 * \return 0 if OK, 1 on error
1419 *
1420 * <pre>
1421 * Notes:
1422 * (1) This gives an estimate of the word masks. See
1423 * pixWordBoxesByDilation() for further filtering of the word boxes.
1424 * (2) The resolution should be between 75 and 150 ppi, and the optimal
1425 * dilation will be between 3 and 10.
1426 * (3) A good size for dilating to get word masks is optionally returned.
1427 * (4) Typically, the number of c.c. reduced with each successive
1428 * dilation (stored in nadiff) decreases quickly to a minimum
1429 * (where the characters in a word are joined), and then
1430 * increases again as the smaller number of words are joined.
1431 * For the typical case, you can then look for this minimum
1432 * and dilate to get the word mask. However, there are many
1433 * cases where the function is not so simple. For example, if the
1434 * pix has been upscaled 2x, the nadiff function oscillates, with
1435 * every other value being zero! And for some images it tails
1436 * off without a clear minimum to indicate where to break.
1437 * So a more simple and robust method is to find the dilation
1438 * where the initial number of c.c. has been reduced by some
1439 * fraction (we use a 70% reduction).
1440 * </pre>
1441 */
1442 l_ok
1443 pixWordMaskByDilation(PIX *pixs,
1444 PIX **ppixm,
1445 l_int32 *psize,
1446 PIXA *pixadb)
1447 {
1448 l_int32 i, n, ndil, maxdiff, diff, ibest;
1449 l_int32 check, count, total, xres;
1450 l_int32 ncc[13]; /* max dilation + 1 */
1451 l_int32 *diffa;
1452 BOXA *boxa;
1453 NUMA *nacc, *nadiff;
1454 PIX *pix1, *pix2;
1455
1456 if (ppixm) *ppixm = NULL;
1457 if (psize) *psize = 0;
1458 if (!pixs || pixGetDepth(pixs) != 1)
1459 return ERROR_INT("pixs undefined or not 1 bpp", __func__, 1);
1460 if (!ppixm && !psize)
1461 return ERROR_INT("no output requested", __func__, 1);
1462
1463 /* Find a good dilation to create the word mask, by successively
1464 * increasing dilation size and counting the connected components. */
1465 pix1 = pixCopy(NULL, pixs);
1466 ndil = 12; /* appropriate for 75 to 150 ppi */
1467 nacc = numaCreate(ndil + 1);
1468 nadiff = numaCreate(ndil + 1);
1469 for (i = 0; i <= ndil; i++) {
1470 if (i == 0) /* first one not dilated */
1471 pix2 = pixCopy(NULL, pix1);
1472 else /* successive dilation by sel_2h */
1473 pix2 = pixMorphSequence(pix1, "d2.1", 0);
1474 boxa = pixConnCompBB(pix2, 4);
1475 ncc[i] = boxaGetCount(boxa);
1476 numaAddNumber(nacc, (l_float32)ncc[i]);
1477 if (i == 0) total = ncc[0];
1478 if (i > 0) {
1479 diff = ncc[i - 1] - ncc[i];
1480 numaAddNumber(nadiff, (l_float32)diff);
1481 }
1482 pixDestroy(&pix1);
1483 pix1 = pix2;
1484 boxaDestroy(&boxa);
1485 }
1486 pixDestroy(&pix1);
1487
1488 /* Find the dilation at which the c.c. count has reduced
1489 * to 30% of the initial value. Although 30% seems high,
1490 * it seems better to use this but add one to ibest. */
1491 diffa = numaGetIArray(nadiff);
1492 n = numaGetCount(nadiff);
1493 maxdiff = 0;
1494 check = TRUE;
1495 ibest = 2;
1496 for (i = 1; i < n; i++) {
1497 numaGetIValue(nacc, i, &count);
1498 if (check && count < 0.3 * total) {
1499 ibest = i + 1;
1500 check = FALSE;
1501 }
1502 diff = diffa[i];
1503 if (diff > maxdiff)
1504 maxdiff = diff;
1505 }
1506 LEPT_FREE(diffa);
1507
1508 /* Add small compensation for higher resolution */
1509 xres = pixGetXRes(pixs);
1510 if (xres == 0) xres = 150;
1511 if (xres > 110) ibest++;
1512 if (ibest < 2) {
1513 L_INFO("setting ibest to minimum allowed value of 2\n", __func__);
1514 ibest = 2;
1515 }
1516
1517 if (pixadb) {
1518 lept_mkdir("lept/jb");
1519 {NUMA *naseq;
1520 PIX *pix3, *pix4;
1521 L_INFO("Best dilation: %d\n", __func__, L_MAX(3, ibest + 1));
1522 naseq = numaMakeSequence(1, 1, numaGetCount(nacc));
1523 pix3 = gplotGeneralPix2(naseq, nacc, GPLOT_LINES,
1524 "/tmp/lept/jb/numcc",
1525 "Number of cc vs. horizontal dilation",
1526 "Sel horiz", "Number of cc");
1527 pixaAddPix(pixadb, pix3, L_INSERT);
1528 numaDestroy(&naseq);
1529 naseq = numaMakeSequence(1, 1, numaGetCount(nadiff));
1530 pix3 = gplotGeneralPix2(naseq, nadiff, GPLOT_LINES,
1531 "/tmp/lept/jb/diffcc",
1532 "Diff count of cc vs. horizontal dilation",
1533 "Sel horiz", "Diff in cc");
1534 pixaAddPix(pixadb, pix3, L_INSERT);
1535 numaDestroy(&naseq);
1536 pix3 = pixCloseBrick(NULL, pixs, ibest + 1, 1);
1537 pix4 = pixScaleToSize(pix3, 600, 0);
1538 pixaAddPix(pixadb, pix4, L_INSERT);
1539 pixDestroy(&pix3);
1540 }
1541 }
1542
1543 if (psize) *psize = ibest + 1;
1544 if (ppixm)
1545 *ppixm = pixCloseBrick(NULL, pixs, ibest + 1, 1);
1546
1547 numaDestroy(&nacc);
1548 numaDestroy(&nadiff);
1549 return 0;
1550 }
1551
1552
1553 /*!
1554 * \brief pixWordBoxesByDilation()
1555 *
1556 * \param[in] pixs 1 bpp; typ. 75 - 200 ppi
1557 * \param[in] minwidth saved components; smaller are discarded
1558 * \param[in] minheight saved components; smaller are discarded
1559 * \param[in] maxwidth saved components; larger are discarded
1560 * \param[in] maxheight saved components; larger are discarded
1561 * \param[out] pboxa of dilated word mask
1562 * \param[out] psize [optional] size of good horizontal dilation
1563 * \param[out] pixadb [optional] debug: pixa of intermediate steps
1564 * \return 0 if OK, 1 on error
1565 *
1566 * <pre>
1567 * Notes:
1568 * (1) Returns a pruned set of word boxes.
1569 * (2) See pixWordMaskByDilation().
1570 * </pre>
1571 */
1572 l_ok
1573 pixWordBoxesByDilation(PIX *pixs,
1574 l_int32 minwidth,
1575 l_int32 minheight,
1576 l_int32 maxwidth,
1577 l_int32 maxheight,
1578 BOXA **pboxa,
1579 l_int32 *psize,
1580 PIXA *pixadb)
1581 {
1582 BOXA *boxa1, *boxa2;
1583 PIX *pix1, *pix2;
1584
1585 if (psize) *psize = 0;
1586 if (!pixs || pixGetDepth(pixs) != 1)
1587 return ERROR_INT("pixs undefined or not 1 bpp", __func__, 1);
1588 if (!pboxa)
1589 return ERROR_INT("&boxa not defined", __func__, 1);
1590 *pboxa = NULL;
1591
1592 /* Make a first estimate of the word mask */
1593 if (pixWordMaskByDilation(pixs, &pix1, psize, pixadb))
1594 return ERROR_INT("pixWordMaskByDilation() failed", __func__, 1);
1595
1596 /* Prune the word mask. Get the bounding boxes of the words.
1597 * Remove the small ones, which can be due to punctuation
1598 * that was not joined to a word. Also remove the large ones,
1599 * which are not likely to be words. */
1600 boxa1 = pixConnComp(pix1, NULL, 8);
1601 boxa2 = boxaSelectBySize(boxa1, minwidth, minheight, L_SELECT_IF_BOTH,
1602 L_SELECT_IF_GTE, NULL);
1603 *pboxa = boxaSelectBySize(boxa2, maxwidth, maxheight, L_SELECT_IF_BOTH,
1604 L_SELECT_IF_LTE, NULL);
1605 if (pixadb) {
1606 pix2 = pixUnpackBinary(pixs, 32, 1);
1607 pixRenderBoxaArb(pix2, boxa1, 2, 255, 0, 0);
1608 pixaAddPix(pixadb, pix2, L_INSERT);
1609 pix2 = pixUnpackBinary(pixs, 32, 1);
1610 pixRenderBoxaArb(pix2, boxa2, 2, 0, 255, 0);
1611 pixaAddPix(pixadb, pix2, L_INSERT);
1612 }
1613 boxaDestroy(&boxa1);
1614 boxaDestroy(&boxa2);
1615 pixDestroy(&pix1);
1616 return 0;
1617 }
1618
1619
1620 /*----------------------------------------------------------------------*
1621 * Build grayscale composites (templates) *
1622 *----------------------------------------------------------------------*/
1623 /*!
1624 * \brief jbAccumulateComposites()
1625 *
1626 * \param[in] pixaa one pixa for each class
1627 * \param[out] ppna number of samples used to build each composite
1628 * \param[out] pptat centroids of bordered composites
1629 * \return pixad accumulated sum of samples in each class, or NULL on error
1630 *
1631 */
1632 PIXA *
1633 jbAccumulateComposites(PIXAA *pixaa,
1634 NUMA **pna,
1635 PTA **pptat)
1636 {
1637 l_int32 n, nt, i, j, d, minw, maxw, minh, maxh, xdiff, ydiff;
1638 l_float32 x, y, xave, yave;
1639 NUMA *na;
1640 PIX *pix, *pixt1, *pixt2, *pixsum;
1641 PIXA *pixa, *pixad;
1642 PTA *ptat, *pta;
1643
1644 if (!pptat)
1645 return (PIXA *)ERROR_PTR("&ptat not defined", __func__, NULL);
1646 *pptat = NULL;
1647 if (!pna)
1648 return (PIXA *)ERROR_PTR("&na not defined", __func__, NULL);
1649 *pna = NULL;
1650 if (!pixaa)
1651 return (PIXA *)ERROR_PTR("pixaa not defined", __func__, NULL);
1652
1653 n = pixaaGetCount(pixaa, NULL);
1654 if ((ptat = ptaCreate(n)) == NULL)
1655 return (PIXA *)ERROR_PTR("ptat not made", __func__, NULL);
1656 *pptat = ptat;
1657 pixad = pixaCreate(n);
1658 na = numaCreate(n);
1659 *pna = na;
1660
1661 for (i = 0; i < n; i++) {
1662 pixa = pixaaGetPixa(pixaa, i, L_CLONE);
1663 nt = pixaGetCount(pixa);
1664 numaAddNumber(na, nt);
1665 if (nt == 0) {
1666 L_WARNING("empty pixa found!\n", __func__);
1667 pixaDestroy(&pixa);
1668 continue;
1669 }
1670 pixaSizeRange(pixa, &minw, &minh, &maxw, &maxh);
1671 pix = pixaGetPix(pixa, 0, L_CLONE);
1672 d = pixGetDepth(pix);
1673 pixDestroy(&pix);
1674 pixt1 = pixCreate(maxw, maxh, d);
1675 pixsum = pixInitAccumulate(maxw, maxh, 0);
1676 pta = pixaCentroids(pixa);
1677
1678 /* Find the average value of the centroids ... */
1679 xave = yave = 0;
1680 for (j = 0; j < nt; j++) {
1681 ptaGetPt(pta, j, &x, &y);
1682 xave += x;
1683 yave += y;
1684 }
1685 xave = xave / (l_float32)nt;
1686 yave = yave / (l_float32)nt;
1687
1688 /* and place all centroids at their average value */
1689 for (j = 0; j < nt; j++) {
1690 pixt2 = pixaGetPix(pixa, j, L_CLONE);
1691 ptaGetPt(pta, j, &x, &y);
1692 xdiff = (l_int32)(x - xave);
1693 ydiff = (l_int32)(y - yave);
1694 pixClearAll(pixt1);
1695 pixRasterop(pixt1, xdiff, ydiff, maxw, maxh, PIX_SRC,
1696 pixt2, 0, 0);
1697 pixAccumulate(pixsum, pixt1, L_ARITH_ADD);
1698 pixDestroy(&pixt2);
1699 }
1700 pixaAddPix(pixad, pixsum, L_INSERT);
1701 ptaAddPt(ptat, xave, yave);
1702
1703 pixaDestroy(&pixa);
1704 pixDestroy(&pixt1);
1705 ptaDestroy(&pta);
1706 }
1707
1708 return pixad;
1709 }
1710
1711
1712 /*!
1713 * \brief jbTemplatesFromComposites()
1714 *
1715 * \param[in] pixac one pix of composites for each class
1716 * \param[in] na number of samples used for each class composite
1717 * \return pixad 8 bpp templates for each class, or NULL on error
1718 *
1719 */
1720 PIXA *
1721 jbTemplatesFromComposites(PIXA *pixac,
1722 NUMA *na)
1723 {
1724 l_int32 n, i;
1725 l_float32 nt; /* number of samples in the composite; always an integer */
1726 l_float32 factor;
1727 PIX *pixsum; /* accumulated composite */
1728 PIX *pixd;
1729 PIXA *pixad;
1730
1731 if (!pixac)
1732 return (PIXA *)ERROR_PTR("pixac not defined", __func__, NULL);
1733 if (!na)
1734 return (PIXA *)ERROR_PTR("na not defined", __func__, NULL);
1735
1736 n = pixaGetCount(pixac);
1737 pixad = pixaCreate(n);
1738 for (i = 0; i < n; i++) {
1739 pixsum = pixaGetPix(pixac, i, L_COPY); /* changed internally */
1740 numaGetFValue(na, i, &nt);
1741 factor = 255.f / nt;
1742 pixMultConstAccumulate(pixsum, factor, 0); /* changes pixsum */
1743 pixd = pixFinalAccumulate(pixsum, 0, 8);
1744 pixaAddPix(pixad, pixd, L_INSERT);
1745 pixDestroy(&pixsum);
1746 }
1747
1748 return pixad;
1749 }
1750
1751
1752
1753 /*----------------------------------------------------------------------*
1754 * jbig2 utility routines *
1755 *----------------------------------------------------------------------*/
1756 /*!
1757 * \brief jbClasserCreate()
1758 *
1759 * \param[in] method JB_RANKHAUS, JB_CORRELATION
1760 * \param[in] components JB_CONN_COMPS, JB_CHARACTERS, JB_WORDS
1761 * \return jbclasser, or NULL on error
1762 */
1763 JBCLASSER *
1764 jbClasserCreate(l_int32 method,
1765 l_int32 components)
1766 {
1767 JBCLASSER *classer;
1768
1769 if (method != JB_RANKHAUS && method != JB_CORRELATION)
1770 return (JBCLASSER *)ERROR_PTR("invalid method", __func__, NULL);
1771 if (components != JB_CONN_COMPS && components != JB_CHARACTERS &&
1772 components != JB_WORDS)
1773 return (JBCLASSER *)ERROR_PTR("invalid component", __func__, NULL);
1774
1775 classer = (JBCLASSER *)LEPT_CALLOC(1, sizeof(JBCLASSER));
1776 classer->method = method;
1777 classer->components = components;
1778 classer->nacomps = numaCreate(0);
1779 classer->pixaa = pixaaCreate(0);
1780 classer->pixat = pixaCreate(0);
1781 classer->pixatd = pixaCreate(0);
1782 classer->nafgt = numaCreate(0);
1783 classer->naarea = numaCreate(0);
1784 classer->ptac = ptaCreate(0);
1785 classer->ptact = ptaCreate(0);
1786 classer->naclass = numaCreate(0);
1787 classer->napage = numaCreate(0);
1788 classer->ptaul = ptaCreate(0);
1789 return classer;
1790 }
1791
1792
1793 /*
1794 * \brief jbClasserDestroy()
1795 *
1796 * \param[in,out] pclasser will be set to null before returning
1797 * \return void
1798 */
1799 void
1800 jbClasserDestroy(JBCLASSER **pclasser)
1801 {
1802 JBCLASSER *classer;
1803
1804 if (!pclasser)
1805 return;
1806 if ((classer = *pclasser) == NULL)
1807 return;
1808
1809 sarrayDestroy(&classer->safiles);
1810 numaDestroy(&classer->nacomps);
1811 pixaaDestroy(&classer->pixaa);
1812 pixaDestroy(&classer->pixat);
1813 pixaDestroy(&classer->pixatd);
1814 l_dnaHashDestroy(&classer->dahash);
1815 numaDestroy(&classer->nafgt);
1816 numaDestroy(&classer->naarea);
1817 ptaDestroy(&classer->ptac);
1818 ptaDestroy(&classer->ptact);
1819 numaDestroy(&classer->naclass);
1820 numaDestroy(&classer->napage);
1821 ptaDestroy(&classer->ptaul);
1822 ptaDestroy(&classer->ptall);
1823 LEPT_FREE(classer);
1824 *pclasser = NULL;
1825 }
1826
1827
1828 /*!
1829 * \brief jbDataSave()
1830 *
1831 * \param[in] jbclasser
1832 * \param[in] latticew cell width used to store each connected
1833 * component in the composite
1834 * \param[in] latticeh ditto for cell height
1835 * \return jbdata, or NULL on error
1836 *
1837 * <pre>
1838 * Notes:
1839 * (1) This routine stores the jbig2-type data required for
1840 * generating a lossy jbig2 version of the image.
1841 * It can be losslessly written to (and read from) two files.
1842 * (2) It generates and stores the mosaic of templates.
1843 * (3) It clones the Numa and Pta arrays, so these must all
1844 * be destroyed by the caller.
1845 * (4) Input 0 to use the default values for latticew and/or latticeh,
1846 * </pre>
1847 */
1848 JBDATA *
1849 jbDataSave(JBCLASSER *classer)
1850 {
1851 l_int32 maxw, maxh;
1852 JBDATA *data;
1853 PIX *pix;
1854
1855 if (!classer)
1856 return (JBDATA *)ERROR_PTR("classer not defined", __func__, NULL);
1857
1858 /* Write the templates into an array. */
1859 pixaSizeRange(classer->pixat, NULL, NULL, &maxw, &maxh);
1860 pix = pixaDisplayOnLattice(classer->pixat, maxw + 1, maxh + 1,
1861 NULL, NULL);
1862 if (!pix)
1863 return (JBDATA *)ERROR_PTR("data not made", __func__, NULL);
1864
1865 data = (JBDATA *)LEPT_CALLOC(1, sizeof(JBDATA));
1866 data->pix = pix;
1867 data->npages = classer->npages;
1868 data->w = classer->w;
1869 data->h = classer->h;
1870 data->nclass = classer->nclass;
1871 data->latticew = maxw + 1;
1872 data->latticeh = maxh + 1;
1873 data->naclass = numaClone(classer->naclass);
1874 data->napage = numaClone(classer->napage);
1875 data->ptaul = ptaClone(classer->ptaul);
1876 return data;
1877 }
1878
1879
1880 /*
1881 * \brief jbDataDestroy()
1882 *
1883 * \param[in,out] pdata will be set to null before returning
1884 * \return void
1885 */
1886 void
1887 jbDataDestroy(JBDATA **pdata)
1888 {
1889 JBDATA *data;
1890
1891 if (!pdata)
1892 return;
1893 if ((data = *pdata) == NULL)
1894 return;
1895
1896 pixDestroy(&data->pix);
1897 numaDestroy(&data->naclass);
1898 numaDestroy(&data->napage);
1899 ptaDestroy(&data->ptaul);
1900 LEPT_FREE(data);
1901 *pdata = NULL;
1902 }
1903
1904
1905 /*!
1906 * \brief jbDataWrite()
1907 *
1908 * \param[in] rootname for output files; everything but the extension
1909 * \param[in] jbdata
1910 * \return 0 if OK, 1 on error
1911 *
1912 * <pre>
1913 * Notes:
1914 * (1) Serialization function that writes data in jbdata to file.
1915 * </pre>
1916 */
1917 l_ok
1918 jbDataWrite(const char *rootout,
1919 JBDATA *jbdata)
1920 {
1921 char buf[L_BUF_SIZE];
1922 l_int32 w, h, nclass, npages, cellw, cellh, ncomp, i, x, y, iclass, ipage;
1923 NUMA *naclass, *napage;
1924 PTA *ptaul;
1925 PIX *pixt;
1926 FILE *fp;
1927
1928 if (!rootout)
1929 return ERROR_INT("no rootout", __func__, 1);
1930 if (!jbdata)
1931 return ERROR_INT("no jbdata", __func__, 1);
1932
1933 npages = jbdata->npages;
1934 w = jbdata->w;
1935 h = jbdata->h;
1936 pixt = jbdata->pix;
1937 nclass = jbdata->nclass;
1938 cellw = jbdata->latticew;
1939 cellh = jbdata->latticeh;
1940 naclass = jbdata->naclass;
1941 napage = jbdata->napage;
1942 ptaul = jbdata->ptaul;
1943
1944 snprintf(buf, L_BUF_SIZE, "%s%s", rootout, JB_TEMPLATE_EXT);
1945 pixWrite(buf, pixt, IFF_PNG);
1946
1947 snprintf(buf, L_BUF_SIZE, "%s%s", rootout, JB_DATA_EXT);
1948 if ((fp = fopenWriteStream(buf, "wb")) == NULL)
1949 return ERROR_INT_1("stream not opened", buf, __func__, 1);
1950 ncomp = ptaGetCount(ptaul);
1951 fprintf(fp, "jb data file\n");
1952 fprintf(fp, "num pages = %d\n", npages);
1953 fprintf(fp, "page size: w = %d, h = %d\n", w, h);
1954 fprintf(fp, "num components = %d\n", ncomp);
1955 fprintf(fp, "num classes = %d\n", nclass);
1956 fprintf(fp, "template lattice size: w = %d, h = %d\n", cellw, cellh);
1957 for (i = 0; i < ncomp; i++) {
1958 numaGetIValue(napage, i, &ipage);
1959 numaGetIValue(naclass, i, &iclass);
1960 ptaGetIPt(ptaul, i, &x, &y);
1961 fprintf(fp, "%d %d %d %d\n", ipage, iclass, x, y);
1962 }
1963 fclose(fp);
1964
1965 return 0;
1966 }
1967
1968
1969 /*!
1970 * \brief jbDataRead()
1971 *
1972 * \param[in] rootname for template and data files
1973 * \return jbdata, or NULL on error
1974 */
1975 JBDATA *
1976 jbDataRead(const char *rootname)
1977 {
1978 char fname[L_BUF_SIZE];
1979 char *linestr;
1980 l_uint8 *data;
1981 l_int32 nsa, i, w, h, cellw, cellh, x, y, iclass, ipage;
1982 l_int32 npages, nclass, ncomp, ninit;
1983 size_t size;
1984 JBDATA *jbdata;
1985 NUMA *naclass, *napage;
1986 PIX *pixs;
1987 PTA *ptaul;
1988 SARRAY *sa;
1989
1990 if (!rootname)
1991 return (JBDATA *)ERROR_PTR("rootname not defined", __func__, NULL);
1992
1993 snprintf(fname, L_BUF_SIZE, "%s%s", rootname, JB_TEMPLATE_EXT);
1994 if ((pixs = pixRead(fname)) == NULL)
1995 return (JBDATA *)ERROR_PTR("pix not read", __func__, NULL);
1996
1997 snprintf(fname, L_BUF_SIZE, "%s%s", rootname, JB_DATA_EXT);
1998 if ((data = l_binaryRead(fname, &size)) == NULL) {
1999 pixDestroy(&pixs);
2000 return (JBDATA *)ERROR_PTR("data not read", __func__, NULL);
2001 }
2002
2003 if ((sa = sarrayCreateLinesFromString((char *)data, 0)) == NULL) {
2004 pixDestroy(&pixs);
2005 LEPT_FREE(data);
2006 return (JBDATA *)ERROR_PTR("sa not made", __func__, NULL);
2007 }
2008 nsa = sarrayGetCount(sa); /* number of cc + 6 */
2009 linestr = sarrayGetString(sa, 0, L_NOCOPY);
2010 if (strcmp(linestr, "jb data file") != 0) {
2011 pixDestroy(&pixs);
2012 LEPT_FREE(data);
2013 sarrayDestroy(&sa);
2014 return (JBDATA *)ERROR_PTR("invalid jb data file", __func__, NULL);
2015 }
2016 linestr = sarrayGetString(sa, 1, L_NOCOPY);
2017 sscanf(linestr, "num pages = %d", &npages);
2018 linestr = sarrayGetString(sa, 2, L_NOCOPY);
2019 sscanf(linestr, "page size: w = %d, h = %d", &w, &h);
2020 linestr = sarrayGetString(sa, 3, L_NOCOPY);
2021 sscanf(linestr, "num components = %d", &ncomp);
2022 linestr = sarrayGetString(sa, 4, L_NOCOPY);
2023 sscanf(linestr, "num classes = %d\n", &nclass);
2024 linestr = sarrayGetString(sa, 5, L_NOCOPY);
2025 sscanf(linestr, "template lattice size: w = %d, h = %d\n", &cellw, &cellh);
2026
2027 #if 1
2028 lept_stderr("num pages = %d\n", npages);
2029 lept_stderr("page size: w = %d, h = %d\n", w, h);
2030 lept_stderr("num components = %d\n", ncomp);
2031 lept_stderr("num classes = %d\n", nclass);
2032 lept_stderr("template lattice size: w = %d, h = %d\n", cellw, cellh);
2033 #endif
2034
2035 ninit = ncomp;
2036 if (ncomp > 1000000) { /* fuzz protection */
2037 L_WARNING("ncomp > 1M\n", __func__);
2038 ninit = 1000000;
2039 }
2040 naclass = numaCreate(ninit);
2041 napage = numaCreate(ninit);
2042 ptaul = ptaCreate(ninit);
2043 for (i = 6; i < nsa; i++) {
2044 linestr = sarrayGetString(sa, i, L_NOCOPY);
2045 sscanf(linestr, "%d %d %d %d\n", &ipage, &iclass, &x, &y);
2046 numaAddNumber(napage, (l_float32)ipage);
2047 numaAddNumber(naclass, (l_float32)iclass);
2048 ptaAddPt(ptaul, (l_float32)x, (l_float32)y);
2049 }
2050
2051 jbdata = (JBDATA *)LEPT_CALLOC(1, sizeof(JBDATA));
2052 jbdata->pix = pixs;
2053 jbdata->npages = npages;
2054 jbdata->w = w;
2055 jbdata->h = h;
2056 jbdata->nclass = nclass;
2057 jbdata->latticew = cellw;
2058 jbdata->latticeh = cellh;
2059 jbdata->naclass = naclass;
2060 jbdata->napage = napage;
2061 jbdata->ptaul = ptaul;
2062
2063 LEPT_FREE(data);
2064 sarrayDestroy(&sa);
2065 return jbdata;
2066 }
2067
2068
2069 /*!
2070 * \brief jbDataRender()
2071 *
2072 * \param[in] jbdata
2073 * \param[in] debugflag if TRUE, writes into 2 bpp pix and adds
2074 * component outlines in color
2075 * \return pixa reconstruction of original images, using templates or
2076 * NULL on error
2077 */
2078 PIXA *
2079 jbDataRender(JBDATA *data,
2080 l_int32 debugflag)
2081 {
2082 l_int32 i, w, h, cellw, cellh, x, y, iclass, ipage;
2083 l_int32 npages, nclass, ncomp, wp, hp;
2084 BOX *box;
2085 BOXA *boxa;
2086 BOXAA *baa;
2087 NUMA *naclass, *napage;
2088 PIX *pixt, *pix1, *pix2, *pixd;
2089 PIXA *pixat; /* pixa of templates */
2090 PIXA *pixad; /* pixa of output images */
2091 PIXCMAP *cmap;
2092 PTA *ptaul;
2093
2094 if (!data)
2095 return (PIXA *)ERROR_PTR("data not defined", __func__, NULL);
2096
2097 npages = data->npages;
2098 w = data->w;
2099 h = data->h;
2100 pixt = data->pix;
2101 nclass = data->nclass;
2102 cellw = data->latticew;
2103 cellh = data->latticeh;
2104 naclass = data->naclass;
2105 napage = data->napage;
2106 ptaul = data->ptaul;
2107 ncomp = numaGetCount(naclass);
2108
2109 /* Reconstruct the original set of images from the templates
2110 * and the data associated with each component. First,
2111 * generate the output pixa as a set of empty pix. For debug,
2112 * where the bounding boxes of each component will be displayed
2113 * in red, use 2 bpp colormapped output pix. */
2114 if ((pixad = pixaCreate(npages)) == NULL)
2115 return (PIXA *)ERROR_PTR("pixad not made", __func__, NULL);
2116 for (i = 0; i < npages; i++) {
2117 if (debugflag == FALSE) {
2118 pix1 = pixCreate(w, h, 1);
2119 } else {
2120 pix1 = pixCreate(w, h, 2);
2121 cmap = pixcmapCreate(2);
2122 pixcmapAddColor(cmap, 255, 255, 255);
2123 pixcmapAddColor(cmap, 0, 0, 0);
2124 pixSetColormap(pix1, cmap);
2125 }
2126 pixaAddPix(pixad, pix1, L_INSERT);
2127 }
2128
2129 /* Put the class templates into a pixa. */
2130 if ((pixat = pixaCreateFromPix(pixt, nclass, cellw, cellh)) == NULL) {
2131 pixaDestroy(&pixad);
2132 return (PIXA *)ERROR_PTR("pixat not made", __func__, NULL);
2133 }
2134
2135 /* Place each component in the right location on its page.
2136 * For debug, first generate the boxa of component bounding
2137 * boxes for each page, and save the results in a boxaa.
2138 * Nota bene. In general we cannot use rasterop on colormap
2139 * indices with operations like PIX_SRC | PIX_DST. So we must
2140 * do the rasterop of image components first, and then paint
2141 * the component bounding boxes later. We can use rasterop
2142 * on the 2 bpp pix here because the colormap has only two
2143 * index values, 0 and 1, * so doing a bit-or between pixels
2144 * only affects the lower-order bit and does not generate
2145 * spurious colormap indices. */
2146 if (debugflag == TRUE) {
2147 baa = boxaaCreate(npages);
2148 boxa = boxaCreate(0);
2149 boxaaInitFull(baa, boxa);
2150 boxaDestroy(&boxa);
2151 }
2152 for (i = 0; i < ncomp; i++) {
2153 numaGetIValue(napage, i, &ipage);
2154 numaGetIValue(naclass, i, &iclass);
2155 pix1 = pixaGetPix(pixat, iclass, L_CLONE); /* the template */
2156 wp = pixGetWidth(pix1);
2157 hp = pixGetHeight(pix1);
2158 ptaGetIPt(ptaul, i, &x, &y);
2159 pixd = pixaGetPix(pixad, ipage, L_CLONE); /* the output page */
2160 if (debugflag == FALSE) {
2161 pixRasterop(pixd, x, y, wp, hp, PIX_SRC | PIX_DST, pix1, 0, 0);
2162 } else {
2163 pix2 = pixConvert1To2Cmap(pix1);
2164 pixRasterop(pixd, x, y, wp, hp, PIX_SRC | PIX_DST, pix2, 0, 0);
2165 boxa = boxaaGetBoxa(baa, ipage, L_CLONE);
2166 box = boxCreate(x, y, wp, hp);
2167 boxaAddBox(boxa, box, L_INSERT);
2168 boxaDestroy(&boxa); /* clone */
2169 pixDestroy(&pix2); /* clone */
2170 }
2171 pixDestroy(&pix1); /* clone */
2172 pixDestroy(&pixd); /* clone */
2173 }
2174
2175 /* For debug, for each page image, render the box outlines in red.
2176 * This adds a red colormap entry to each page. */
2177 if (debugflag == TRUE) {
2178 for (i = 0; i < npages; i++) {
2179 pixd = pixaGetPix(pixad, i, L_CLONE);
2180 boxa = boxaaGetBoxa(baa, i, L_CLONE);
2181 pixRenderBoxaArb(pixd, boxa, 1, 255, 0, 0);
2182 pixDestroy(&pixd); /* clone */
2183 boxaDestroy(&boxa); /* clone */
2184 }
2185 boxaaDestroy(&baa);
2186 }
2187
2188 pixaDestroy(&pixat);
2189 return pixad;
2190 }
2191
2192
2193 /*!
2194 * \brief jbGetULCorners()
2195 *
2196 * \param[in] jbclasser
2197 * \param[in] pixs full res image
2198 * \param[in] boxa of c.c. bounding rectangles for this page
2199 * \return 0 if OK, 1 on error
2200 *
2201 * <pre>
2202 * Notes:
2203 * (1) This computes the ptaul field, which has the global UL corners,
2204 * adjusted for each specific component, so that each component
2205 * can be replaced by the template for its class and have the
2206 * centroid in the template in the same position as the
2207 * centroid of the original connected component. It is important
2208 * that this be done properly to avoid a wavy baseline in the
2209 * result.
2210 * (2) The array fields ptac and ptact give the centroids of
2211 * those components relative to the UL corner of each component.
2212 * Here, we compute the difference in each component, round to
2213 * nearest integer, and correct the box->x and box->y by
2214 * the appropriate integral difference.
2215 * (3) The templates and stored instances are all bordered.
2216 * </pre>
2217 */
2218 l_ok
2219 jbGetULCorners(JBCLASSER *classer,
2220 PIX *pixs,
2221 BOXA *boxa)
2222 {
2223 l_int32 i, baseindex, index, n, iclass, idelx, idely, x, y, dx, dy;
2224 l_int32 *sumtab;
2225 l_float32 x1, x2, y1, y2, delx, dely;
2226 BOX *box;
2227 NUMA *naclass;
2228 PIX *pixt;
2229 PTA *ptac, *ptact, *ptaul;
2230
2231 if (!classer)
2232 return ERROR_INT("classer not defined", __func__, 1);
2233 if (!pixs)
2234 return ERROR_INT("pixs not defined", __func__, 1);
2235 if (!boxa)
2236 return ERROR_INT("boxa not defined", __func__, 1);
2237
2238 n = boxaGetCount(boxa);
2239 ptaul = classer->ptaul;
2240 naclass = classer->naclass;
2241 ptac = classer->ptac;
2242 ptact = classer->ptact;
2243 baseindex = classer->baseindex; /* num components before this page */
2244 sumtab = makePixelSumTab8();
2245 for (i = 0; i < n; i++) {
2246 index = baseindex + i;
2247 ptaGetPt(ptac, index, &x1, &y1);
2248 numaGetIValue(naclass, index, &iclass);
2249 ptaGetPt(ptact, iclass, &x2, &y2);
2250 delx = x2 - x1;
2251 dely = y2 - y1;
2252 if (delx >= 0)
2253 idelx = (l_int32)(delx + 0.5);
2254 else
2255 idelx = (l_int32)(delx - 0.5);
2256 if (dely >= 0)
2257 idely = (l_int32)(dely + 0.5);
2258 else
2259 idely = (l_int32)(dely - 0.5);
2260 if ((box = boxaGetBox(boxa, i, L_CLONE)) == NULL) {
2261 LEPT_FREE(sumtab);
2262 return ERROR_INT("box not found", __func__, 1);
2263 }
2264 boxGetGeometry(box, &x, &y, NULL, NULL);
2265
2266 /* Get final increments dx and dy for best alignment */
2267 pixt = pixaGetPix(classer->pixat, iclass, L_CLONE);
2268 finalPositioningForAlignment(pixs, x, y, idelx, idely,
2269 pixt, sumtab, &dx, &dy);
2270 /* if (i % 20 == 0)
2271 lept_stderr("dx = %d, dy = %d\n", dx, dy); */
2272 ptaAddPt(ptaul, (l_float32)(x - idelx + dx), (l_float32)(y - idely + dy));
2273 boxDestroy(&box);
2274 pixDestroy(&pixt);
2275 }
2276
2277 LEPT_FREE(sumtab);
2278 return 0;
2279 }
2280
2281
2282 /*!
2283 * \brief jbGetLLCorners()
2284 *
2285 * \param[in] jbclasser
2286 * \return 0 if OK, 1 on error
2287 *
2288 * <pre>
2289 * Notes:
2290 * (1) This computes the ptall field, which has the global LL corners,
2291 * adjusted for each specific component, so that each component
2292 * can be replaced by the template for its class and have the
2293 * centroid in the template in the same position as the
2294 * centroid of the original connected component. It is important
2295 * that this be done properly to avoid a wavy baseline in the result.
2296 * (2) It is computed here from the corresponding UL corners, where
2297 * the input templates and stored instances are all bordered.
2298 * This should be done after all pages have been processed.
2299 * (3) For proper substitution, the templates whose LL corners are
2300 * placed in these locations must be UN-bordered.
2301 * This is available for a realistic jbig2 encoder, which would
2302 * (1) encode each template without a border, and (2) encode
2303 * the position using the LL corner (rather than the UL
2304 * corner) because the difference between y-values
2305 * of successive instances is typically close to zero.
2306 * </pre>
2307 */
2308 l_ok
2309 jbGetLLCorners(JBCLASSER *classer)
2310 {
2311 l_int32 i, iclass, n, x1, y1, h;
2312 NUMA *naclass;
2313 PIX *pix;
2314 PIXA *pixat;
2315 PTA *ptaul, *ptall;
2316
2317 if (!classer)
2318 return ERROR_INT("classer not defined", __func__, 1);
2319
2320 ptaul = classer->ptaul;
2321 naclass = classer->naclass;
2322 pixat = classer->pixat;
2323
2324 ptaDestroy(&classer->ptall);
2325 n = ptaGetCount(ptaul);
2326 ptall = ptaCreate(n);
2327 classer->ptall = ptall;
2328
2329 /* If the templates were bordered, we would add h - 1 to the UL
2330 * corner y-value. However, because the templates to be used
2331 * here have their borders removed, and the borders are
2332 * JB_ADDED_PIXELS on each side, we add h - 1 - 2 * JB_ADDED_PIXELS
2333 * to the UL corner y-value. */
2334 for (i = 0; i < n; i++) {
2335 ptaGetIPt(ptaul, i, &x1, &y1);
2336 numaGetIValue(naclass, i, &iclass);
2337 pix = pixaGetPix(pixat, iclass, L_CLONE);
2338 h = pixGetHeight(pix);
2339 ptaAddPt(ptall, (l_float32)x1, y1 + h - 1 - 2 * JB_ADDED_PIXELS);
2340 pixDestroy(&pix);
2341 }
2342
2343 return 0;
2344 }
2345
2346
2347 /*----------------------------------------------------------------------*
2348 * Static helpers *
2349 *----------------------------------------------------------------------*/
2350 /* When looking for similar matches we check templates whose size is +/- 2 in
2351 * each direction. This involves 25 possible sizes. This array contains the
2352 * offsets for each of those positions in a spiral pattern. There are 25 pairs
2353 * of numbers in this array: even positions are x values. */
2354 static int two_by_two_walk[50] = {
2355 0, 0,
2356 0, 1,
2357 -1, 0,
2358 0, -1,
2359 1, 0,
2360 -1, 1,
2361 1, 1,
2362 -1, -1,
2363 1, -1,
2364 0, -2,
2365 2, 0,
2366 0, 2,
2367 -2, 0,
2368 -1, -2,
2369 1, -2,
2370 2, -1,
2371 2, 1,
2372 1, 2,
2373 -1, 2,
2374 -2, 1,
2375 -2, -1,
2376 -2, -2,
2377 2, -2,
2378 2, 2,
2379 -2, 2};
2380
2381
2382 /*!
2383 * \brief findSimilarSizedTemplatesInit()
2384 *
2385 * \param[in] classer
2386 * \param[in] pixs instance to be matched
2387 * \return Allocated context to be used with findSimilar*
2388 */
2389 static JBFINDCTX *
2390 findSimilarSizedTemplatesInit(JBCLASSER *classer,
2391 PIX *pixs)
2392 {
2393 JBFINDCTX *state;
2394
2395 state = (JBFINDCTX *)LEPT_CALLOC(1, sizeof(JBFINDCTX));
2396 state->w = pixGetWidth(pixs) - 2 * JB_ADDED_PIXELS;
2397 state->h = pixGetHeight(pixs) - 2 * JB_ADDED_PIXELS;
2398 state->classer = classer;
2399 return state;
2400 }
2401
2402
2403 static void
2404 findSimilarSizedTemplatesDestroy(JBFINDCTX **pstate)
2405 {
2406 JBFINDCTX *state;
2407
2408 if (pstate == NULL) {
2409 L_WARNING("ptr address is null\n", __func__);
2410 return;
2411 }
2412 if ((state = *pstate) == NULL)
2413 return;
2414
2415 l_dnaDestroy(&state->dna);
2416 LEPT_FREE(state);
2417 *pstate = NULL;
2418 return;
2419 }
2420
2421
2422 /*!
2423 * \brief findSimilarSizedTemplatesNext()
2424 *
2425 * \param[in] state from findSimilarSizedTemplatesInit
2426 * \return next template number, or -1 when finished
2427 *
2428 * We have a dna hash table that maps template area to a list of template
2429 * numbers with that area. We wish to find similar sized templates,
2430 * so we first look for templates with the same width and height, and
2431 * then with width + 1, etc. This walk is guided by the
2432 * two_by_two_walk array, above.
2433 *
2434 * We don't want to have to collect the whole list of templates first,
2435 * because we hope to find a well-matching template quickly. So we
2436 * keep the context for this walk in an explictit state structure,
2437 * and this function acts like a generator.
2438 */
2439 static l_int32
2440 findSimilarSizedTemplatesNext(JBFINDCTX *state)
2441 {
2442 l_int32 desiredh, desiredw, size, templ;
2443 PIX *pixt;
2444
2445 while(1) { /* Continue the walk over step 'i' */
2446 if (state->i >= 25) { /* all done; didn't find a good match */
2447 return -1;
2448 }
2449
2450 desiredw = state->w + two_by_two_walk[2 * state->i];
2451 desiredh = state->h + two_by_two_walk[2 * state->i + 1];
2452 if (desiredh < 1 || desiredw < 1) { /* invalid size */
2453 state->i++;
2454 continue;
2455 }
2456
2457 if (!state->dna) {
2458 /* We have yet to start walking the array for the step 'i' */
2459 state->dna = l_dnaHashGetDna(state->classer->dahash,
2460 (l_uint64)desiredh * desiredw, L_CLONE);
2461 if (!state->dna) { /* nothing there */
2462 state->i++;
2463 continue;
2464 }
2465
2466 state->n = 0; /* OK, we got a dna. */
2467 }
2468
2469 /* Continue working on this dna */
2470 size = l_dnaGetCount(state->dna);
2471 for ( ; state->n < size; ) {
2472 templ = (l_int32)(state->dna->array[state->n++] + 0.5);
2473 pixt = pixaGetPix(state->classer->pixat, templ, L_CLONE);
2474 if (pixGetWidth(pixt) - 2 * JB_ADDED_PIXELS == desiredw &&
2475 pixGetHeight(pixt) - 2 * JB_ADDED_PIXELS == desiredh) {
2476 pixDestroy(&pixt);
2477 return templ;
2478 }
2479 pixDestroy(&pixt);
2480 }
2481
2482 /* Exhausted the dna (no match found); take another step and
2483 * try again. */
2484 state->i++;
2485 l_dnaDestroy(&state->dna);
2486 continue;
2487 }
2488 }
2489
2490
2491 /*!
2492 * \brief finalPositioningForAlignment()
2493 *
2494 * \param[in] pixs input page image
2495 * \param[in] x, y location of UL corner of bb of component in pixs
2496 * \param[in] idelx, idely compensation to match centroids of component
2497 * and template
2498 * \param[in] pixt template, with JB_ADDED_PIXELS of padding
2499 * on all sides
2500 * \param[in] sumtab for summing fg pixels in an image
2501 * \param[in] pdx, pdy return delta on position for best match; each
2502 * one is in the set {-1, 0, 1}
2503 * \return 0 if OK, 1 on error
2504 *
2505 */
2506 static l_int32
2507 finalPositioningForAlignment(PIX *pixs,
2508 l_int32 x,
2509 l_int32 y,
2510 l_int32 idelx,
2511 l_int32 idely,
2512 PIX *pixt,
2513 l_int32 *sumtab,
2514 l_int32 *pdx,
2515 l_int32 *pdy)
2516 {
2517 l_int32 w, h, i, j, minx, miny, count, mincount;
2518 PIX *pixi; /* clipped from source pixs */
2519 PIX *pixr; /* temporary storage */
2520 BOX *box;
2521
2522 if (!pixs)
2523 return ERROR_INT("pixs not defined", __func__, 1);
2524 if (!pixt)
2525 return ERROR_INT("pixt not defined", __func__, 1);
2526 if (!pdx || !pdy)
2527 return ERROR_INT("&dx and &dy not both defined", __func__, 1);
2528 if (!sumtab)
2529 return ERROR_INT("sumtab not defined", __func__, 1);
2530 *pdx = *pdy = 0;
2531
2532 /* Use JB_ADDED_PIXELS pixels padding on each side */
2533 pixGetDimensions(pixt, &w, &h, NULL);
2534 box = boxCreate(x - idelx - JB_ADDED_PIXELS,
2535 y - idely - JB_ADDED_PIXELS, w, h);
2536 pixi = pixClipRectangle(pixs, box, NULL);
2537 boxDestroy(&box);
2538 if (!pixi)
2539 return ERROR_INT("pixi not made", __func__, 1);
2540
2541 pixr = pixCreate(pixGetWidth(pixi), pixGetHeight(pixi), 1);
2542 mincount = 0x7fffffff;
2543 for (i = -1; i <= 1; i++) {
2544 for (j = -1; j <= 1; j++) {
2545 pixCopy(pixr, pixi);
2546 pixRasterop(pixr, j, i, w, h, PIX_SRC ^ PIX_DST, pixt, 0, 0);
2547 pixCountPixels(pixr, &count, sumtab);
2548 if (count < mincount) {
2549 minx = j;
2550 miny = i;
2551 mincount = count;
2552 }
2553 }
2554 }
2555 pixDestroy(&pixi);
2556 pixDestroy(&pixr);
2557
2558 *pdx = minx;
2559 *pdy = miny;
2560 return 0;
2561 }