comparison mupdf-source/thirdparty/leptonica/src/compare.c @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 /*====================================================================*
2 - Copyright (C) 2001 Leptonica. All rights reserved.
3 -
4 - Redistribution and use in source and binary forms, with or without
5 - modification, are permitted provided that the following conditions
6 - are met:
7 - 1. Redistributions of source code must retain the above copyright
8 - notice, this list of conditions and the following disclaimer.
9 - 2. Redistributions in binary form must reproduce the above
10 - copyright notice, this list of conditions and the following
11 - disclaimer in the documentation and/or other materials
12 - provided with the distribution.
13 -
14 - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15 - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16 - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17 - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY
18 - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *====================================================================*/
26
27 /*!
28 * \file compare.c
29 * <pre>
30 *
31 * Test for pix equality
32 * l_int32 pixEqual()
33 * l_int32 pixEqualWithAlpha()
34 * l_int32 pixEqualWithCmap()
35 * l_int32 cmapEqual()
36 * l_int32 pixUsesCmapColor()
37 *
38 * Binary correlation
39 * l_int32 pixCorrelationBinary()
40 *
41 * Difference of two images of same size
42 * l_int32 pixDisplayDiff()
43 * l_int32 pixDisplayDiffBinary()
44 * l_int32 pixCompareBinary()
45 * l_int32 pixCompareGrayOrRGB()
46 * l_int32 pixCompareGray()
47 * l_int32 pixCompareRGB()
48 * l_int32 pixCompareTiled()
49 *
50 * Other measures of the difference of two images of the same size
51 * NUMA *pixCompareRankDifference()
52 * l_int32 pixTestForSimilarity()
53 * l_int32 pixGetDifferenceStats()
54 * NUMA *pixGetDifferenceHistogram()
55 * l_int32 pixGetPerceptualDiff()
56 * l_int32 pixGetPSNR()
57 *
58 * Comparison of photo regions by histogram
59 * l_int32 pixaComparePhotoRegionsByHisto() -- top-level
60 * l_int32 pixComparePhotoRegionsByHisto() -- top-level for 2
61 * l_int32 pixGenPhotoHistos()
62 * PIX *pixPadToCenterCentroid()
63 * l_int32 pixCentroid8()
64 * l_int32 pixDecideIfPhotoImage()
65 * static l_int32 findHistoGridDimensions()
66 * l_int32 compareTilesByHisto()
67 *
68 * l_int32 pixCompareGrayByHisto() -- top-level for 2
69 * static l_int32 pixCompareTilesByHisto()
70 * l_int32 pixCropAlignedToCentroid()
71 *
72 * l_uint8 *l_compressGrayHistograms()
73 * NUMAA *l_uncompressGrayHistograms()
74 *
75 * Translated images at the same resolution
76 * l_int32 pixCompareWithTranslation()
77 * l_int32 pixBestCorrelation()
78 *
79 * For comparing images using tiled histograms, essentially all the
80 * computation goes into deciding if a region of an image is a photo,
81 * whether that photo region is amenable to similarity measurements
82 * using histograms, and finally the calculation of the gray histograms
83 * for each of the tiled regions. The actual comparison is essentially
84 * instantaneous. Therefore, with a large number of images to compare
85 * with each other, it is important to first calculate the histograms
86 * for each image. Then the comparisons, which go as the square of the
87 * number of images, actually takes no time.
88 *
89 * A high level function that takes a pixa of images and does
90 * all comparisons, pixaComparePhotosByHisto(), uses this split
91 * approach. It pads the images so that the centroid is in the center,
92 * which will allow the tiles to be better aligned.
93 *
94 * For testing purposes, two functions are given that do all the work
95 * to compare just two photo regions:
96 * * pixComparePhotoRegionsByHisto() uses the split approach, qualifying
97 * the images first with pixGenPhotoHistos(), and then comparing
98 * with compareTilesByHisto().
99 * * pixCompareGrayByHisto() aligns the two images by centroid
100 * and calls pixCompareTilesByHisto() to generate the histograms
101 * and do the comparison.
102 *
103 * </pre>
104 */
105
106 #ifdef HAVE_CONFIG_H
107 #include <config_auto.h>
108 #endif /* HAVE_CONFIG_H */
109
110 #include <string.h>
111 #include <math.h>
112 #include "allheaders.h"
113
114 /* Small enough to consider equal to 0.0, for plot output */
115 static const l_float32 TINY = 0.00001f;
116
117 static l_ok findHistoGridDimensions(l_int32 n, l_int32 w, l_int32 h,
118 l_int32 *pnx, l_int32 *pny, l_int32 debug);
119 static l_ok pixCompareTilesByHisto(PIX *pix1, PIX *pix2, l_int32 maxgray,
120 l_int32 factor, l_int32 n,
121 l_float32 *pscore, PIXA *pixadebug);
122
123 /*------------------------------------------------------------------*
124 * Test for pix equality *
125 *------------------------------------------------------------------*/
126 /*!
127 * \brief pixEqual()
128 *
129 * \param[in] pix1
130 * \param[in] pix2
131 * \param[out] psame 1 if same; 0 if different
132 * \return 0 if OK; 1 on error
133 *
134 * <pre>
135 * Notes:
136 * (1) Equality is defined as having the same pixel values for
137 * each respective image pixel.
138 * (2) This works on two pix of any depth. If one or both pix
139 * have a colormap, the depths can be different and the
140 * two pix can still be equal.
141 * (3) This ignores the alpha component for 32 bpp images.
142 * (4) If both pix have colormaps and the depths are equal,
143 * use the pixEqualWithCmap() function, which does a fast
144 * comparison if the colormaps are identical and a relatively
145 * slow comparison otherwise.
146 * (5) In all other cases, any existing colormaps must first be
147 * removed before doing pixel comparison. After the colormaps
148 * are removed, the resulting two images must have the same depth.
149 * The "lowest common denominator" is RGB, but this is only
150 * chosen when necessary, or when both have colormaps but
151 * different depths.
152 * (6) For images without colormaps that are not 32 bpp, all bits
153 * in the image part of the data array must be identical.
154 * </pre>
155 */
156 l_ok
157 pixEqual(PIX *pix1,
158 PIX *pix2,
159 l_int32 *psame)
160 {
161 return pixEqualWithAlpha(pix1, pix2, 0, psame);
162 }
163
164
165 /*!
166 * \brief pixEqualWithAlpha()
167 *
168 * \param[in] pix1
169 * \param[in] pix2
170 * \param[in] use_alpha 1 to compare alpha in RGBA; 0 to ignore
171 * \param[out] psame 1 if same; 0 if different
172 * \return 0 if OK; 1 on error
173 *
174 * <pre>
175 * Notes:
176 * (1) See notes in pixEqual().
177 * (2) This is more general than pixEqual(), in that for 32 bpp
178 * RGBA images, where spp = 4, you can optionally include
179 * the alpha component in the comparison.
180 * </pre>
181 */
182 l_ok
183 pixEqualWithAlpha(PIX *pix1,
184 PIX *pix2,
185 l_int32 use_alpha,
186 l_int32 *psame)
187 {
188 l_int32 w1, h1, d1, w2, h2, d2, wpl1, wpl2;
189 l_int32 spp1, spp2, i, j, color, mismatch, opaque;
190 l_int32 fullwords, linebits, endbits;
191 l_uint32 endmask, wordmask;
192 l_uint32 *data1, *data2, *line1, *line2;
193 PIX *pixs1, *pixs2, *pixt1, *pixt2, *pixalpha;
194 PIXCMAP *cmap1, *cmap2;
195
196 if (!psame)
197 return ERROR_INT("psame not defined", __func__, 1);
198 *psame = 0; /* init to not equal */
199 if (!pix1 || !pix2)
200 return ERROR_INT("pix1 and pix2 not both defined", __func__, 1);
201 pixGetDimensions(pix1, &w1, &h1, &d1);
202 pixGetDimensions(pix2, &w2, &h2, &d2);
203 if (w1 != w2 || h1 != h2) {
204 L_INFO("pix sizes differ\n", __func__);
205 return 0;
206 }
207
208 /* Suppose the use_alpha flag is true.
209 * If only one of two 32 bpp images has spp == 4, we call that
210 * a "mismatch" of the alpha component. In the case of a mismatch,
211 * if the 4 bpp pix does not have all alpha components opaque (255),
212 * the images are not-equal. However if they are all opaque,
213 * this image is equivalent to spp == 3, so we allow the
214 * comparison to go forward, testing only for the RGB equality. */
215 spp1 = pixGetSpp(pix1);
216 spp2 = pixGetSpp(pix2);
217 mismatch = 0;
218 if (use_alpha && d1 == 32 && d2 == 32) {
219 mismatch = ((spp1 == 4 && spp2 != 4) || (spp1 != 4 && spp2 == 4));
220 if (mismatch) {
221 pixalpha = (spp1 == 4) ? pix1 : pix2;
222 pixAlphaIsOpaque(pixalpha, &opaque);
223 if (!opaque) {
224 L_INFO("just one pix has a non-opaque alpha layer\n", __func__);
225 return 0;
226 }
227 }
228 }
229
230 cmap1 = pixGetColormap(pix1);
231 cmap2 = pixGetColormap(pix2);
232 if (!cmap1 && !cmap2 && (d1 != d2) && (d1 == 32 || d2 == 32)) {
233 L_INFO("no colormaps, pix depths unequal, and one of them is RGB\n",
234 __func__);
235 return 0;
236 }
237
238 if (cmap1 && cmap2 && (d1 == d2)) /* use special function */
239 return pixEqualWithCmap(pix1, pix2, psame);
240
241 /* Must remove colormaps if they exist, and in the process
242 * end up with the resulting images having the same depth. */
243 if (cmap1 && !cmap2) {
244 pixUsesCmapColor(pix1, &color);
245 if (color && d2 <= 8) /* can't be equal */
246 return 0;
247 if (d2 < 8)
248 pixs2 = pixConvertTo8(pix2, FALSE);
249 else
250 pixs2 = pixClone(pix2);
251 if (d2 <= 8)
252 pixs1 = pixRemoveColormap(pix1, REMOVE_CMAP_TO_GRAYSCALE);
253 else
254 pixs1 = pixRemoveColormap(pix1, REMOVE_CMAP_TO_FULL_COLOR);
255 } else if (!cmap1 && cmap2) {
256 pixUsesCmapColor(pix2, &color);
257 if (color && d1 <= 8) /* can't be equal */
258 return 0;
259 if (d1 < 8)
260 pixs1 = pixConvertTo8(pix1, FALSE);
261 else
262 pixs1 = pixClone(pix1);
263 if (d1 <= 8)
264 pixs2 = pixRemoveColormap(pix2, REMOVE_CMAP_TO_GRAYSCALE);
265 else
266 pixs2 = pixRemoveColormap(pix2, REMOVE_CMAP_TO_FULL_COLOR);
267 } else if (cmap1 && cmap2) { /* depths not equal; use rgb */
268 pixs1 = pixRemoveColormap(pix1, REMOVE_CMAP_TO_FULL_COLOR);
269 pixs2 = pixRemoveColormap(pix2, REMOVE_CMAP_TO_FULL_COLOR);
270 } else { /* no colormaps */
271 pixs1 = pixClone(pix1);
272 pixs2 = pixClone(pix2);
273 }
274
275 /* OK, we have no colormaps, but the depths may still be different */
276 d1 = pixGetDepth(pixs1);
277 d2 = pixGetDepth(pixs2);
278 if (d1 != d2) {
279 if (d1 == 16 || d2 == 16) {
280 L_INFO("one pix is 16 bpp\n", __func__);
281 pixDestroy(&pixs1);
282 pixDestroy(&pixs2);
283 return 0;
284 }
285 pixt1 = pixConvertLossless(pixs1, 8);
286 pixt2 = pixConvertLossless(pixs2, 8);
287 if (!pixt1 || !pixt2) {
288 L_INFO("failure to convert to 8 bpp\n", __func__);
289 pixDestroy(&pixs1);
290 pixDestroy(&pixs2);
291 pixDestroy(&pixt1);
292 pixDestroy(&pixt2);
293 return 0;
294 }
295 } else {
296 pixt1 = pixClone(pixs1);
297 pixt2 = pixClone(pixs2);
298 }
299 pixDestroy(&pixs1);
300 pixDestroy(&pixs2);
301
302 /* No colormaps, equal depths; do pixel comparisons */
303 d1 = pixGetDepth(pixt1);
304 d2 = pixGetDepth(pixt2);
305 wpl1 = pixGetWpl(pixt1);
306 wpl2 = pixGetWpl(pixt2);
307 data1 = pixGetData(pixt1);
308 data2 = pixGetData(pixt2);
309
310 if (d1 == 32) { /* test either RGB or RGBA pixels */
311 if (use_alpha && !mismatch)
312 wordmask = (spp1 == 3) ? 0xffffff00 : 0xffffffff;
313 else
314 wordmask = 0xffffff00;
315 for (i = 0; i < h1; i++) {
316 line1 = data1 + wpl1 * i;
317 line2 = data2 + wpl2 * i;
318 for (j = 0; j < wpl1; j++) {
319 if ((*line1 ^ *line2) & wordmask) {
320 pixDestroy(&pixt1);
321 pixDestroy(&pixt2);
322 return 0;
323 }
324 line1++;
325 line2++;
326 }
327 }
328 } else { /* all bits count */
329 linebits = d1 * w1;
330 fullwords = linebits / 32;
331 endbits = linebits & 31;
332 endmask = (endbits == 0) ? 0 : (0xffffffff << (32 - endbits));
333 for (i = 0; i < h1; i++) {
334 line1 = data1 + wpl1 * i;
335 line2 = data2 + wpl2 * i;
336 for (j = 0; j < fullwords; j++) {
337 if (*line1 ^ *line2) {
338 pixDestroy(&pixt1);
339 pixDestroy(&pixt2);
340 return 0;
341 }
342 line1++;
343 line2++;
344 }
345 if (endbits) {
346 if ((*line1 ^ *line2) & endmask) {
347 pixDestroy(&pixt1);
348 pixDestroy(&pixt2);
349 return 0;
350 }
351 }
352 }
353 }
354
355 pixDestroy(&pixt1);
356 pixDestroy(&pixt2);
357 *psame = 1;
358 return 0;
359 }
360
361
362 /*!
363 * \brief pixEqualWithCmap()
364 *
365 * \param[in] pix1
366 * \param[in] pix2
367 * \param[out] psame
368 * \return 0 if OK, 1 on error
369 *
370 * <pre>
371 * Notes:
372 * (1) This returns same = TRUE if the images have identical content.
373 * (2) Both pix must have a colormap, and be of equal size and depth.
374 * If these conditions are not satisfied, it is not an error;
375 * the returned result is same = FALSE.
376 * (3) We then check whether the colormaps are the same; if so,
377 * the comparison proceeds 32 bits at a time.
378 * (4) If the colormaps are different, the comparison is done by
379 * slow brute force.
380 * </pre>
381 */
382 l_ok
383 pixEqualWithCmap(PIX *pix1,
384 PIX *pix2,
385 l_int32 *psame)
386 {
387 l_int32 d, w, h, wpl1, wpl2, i, j, linebits, fullwords, endbits;
388 l_int32 rval1, rval2, gval1, gval2, bval1, bval2, samecmaps;
389 l_uint32 endmask, val1, val2;
390 l_uint32 *data1, *data2, *line1, *line2;
391 PIXCMAP *cmap1, *cmap2;
392
393 if (!psame)
394 return ERROR_INT("&same not defined", __func__, 1);
395 *psame = 0;
396 if (!pix1)
397 return ERROR_INT("pix1 not defined", __func__, 1);
398 if (!pix2)
399 return ERROR_INT("pix2 not defined", __func__, 1);
400
401 if (pixSizesEqual(pix1, pix2) == 0)
402 return 0;
403 cmap1 = pixGetColormap(pix1);
404 cmap2 = pixGetColormap(pix2);
405 if (!cmap1 || !cmap2) {
406 L_INFO("both images don't have colormap\n", __func__);
407 return 0;
408 }
409 pixGetDimensions(pix1, &w, &h, &d);
410 if (d != 1 && d != 2 && d != 4 && d != 8) {
411 L_INFO("pix depth not in {1, 2, 4, 8}\n", __func__);
412 return 0;
413 }
414
415 cmapEqual(cmap1, cmap2, 3, &samecmaps);
416 if (samecmaps == TRUE) { /* colormaps are identical; compare by words */
417 linebits = d * w;
418 wpl1 = pixGetWpl(pix1);
419 wpl2 = pixGetWpl(pix2);
420 data1 = pixGetData(pix1);
421 data2 = pixGetData(pix2);
422 fullwords = linebits / 32;
423 endbits = linebits & 31;
424 endmask = (endbits == 0) ? 0 : (0xffffffff << (32 - endbits));
425 for (i = 0; i < h; i++) {
426 line1 = data1 + wpl1 * i;
427 line2 = data2 + wpl2 * i;
428 for (j = 0; j < fullwords; j++) {
429 if (*line1 ^ *line2)
430 return 0;
431 line1++;
432 line2++;
433 }
434 if (endbits) {
435 if ((*line1 ^ *line2) & endmask)
436 return 0;
437 }
438 }
439 *psame = 1;
440 return 0;
441 }
442
443 /* Colormaps aren't identical; compare pixel by pixel */
444 for (i = 0; i < h; i++) {
445 for (j = 0; j < w; j++) {
446 pixGetPixel(pix1, j, i, &val1);
447 pixGetPixel(pix2, j, i, &val2);
448 pixcmapGetColor(cmap1, val1, &rval1, &gval1, &bval1);
449 pixcmapGetColor(cmap2, val2, &rval2, &gval2, &bval2);
450 if (rval1 != rval2 || gval1 != gval2 || bval1 != bval2)
451 return 0;
452 }
453 }
454
455 *psame = 1;
456 return 0;
457 }
458
459
460 /*!
461 * \brief cmapEqual()
462 *
463 * \param[in] cmap1
464 * \param[in] cmap2
465 * \param[in] ncomps 3 for RGB, 4 for RGBA
466 * \param[out] psame
467 * \return 0 if OK, 1 on error
468 *
469 * <pre>
470 * Notes:
471 * (1) This returns %same = TRUE if the colormaps have identical entries.
472 * (2) If %ncomps == 4, the alpha components of the colormaps are also
473 * compared.
474 * </pre>
475 */
476 l_ok
477 cmapEqual(PIXCMAP *cmap1,
478 PIXCMAP *cmap2,
479 l_int32 ncomps,
480 l_int32 *psame)
481 {
482 l_int32 n1, n2, i, rval1, rval2, gval1, gval2, bval1, bval2, aval1, aval2;
483
484 if (!psame)
485 return ERROR_INT("&same not defined", __func__, 1);
486 *psame = FALSE;
487 if (!cmap1)
488 return ERROR_INT("cmap1 not defined", __func__, 1);
489 if (!cmap2)
490 return ERROR_INT("cmap2 not defined", __func__, 1);
491 if (ncomps != 3 && ncomps != 4)
492 return ERROR_INT("ncomps not 3 or 4", __func__, 1);
493
494 n1 = pixcmapGetCount(cmap1);
495 n2 = pixcmapGetCount(cmap2);
496 if (n1 != n2) {
497 L_INFO("colormap sizes are different\n", __func__);
498 return 0;
499 }
500
501 for (i = 0; i < n1; i++) {
502 pixcmapGetRGBA(cmap1, i, &rval1, &gval1, &bval1, &aval1);
503 pixcmapGetRGBA(cmap2, i, &rval2, &gval2, &bval2, &aval2);
504 if (rval1 != rval2 || gval1 != gval2 || bval1 != bval2)
505 return 0;
506 if (ncomps == 4 && aval1 != aval2)
507 return 0;
508 }
509 *psame = TRUE;
510 return 0;
511 }
512
513
514 /*!
515 * \brief pixUsesCmapColor()
516 *
517 * \param[in] pixs any depth, colormap
518 * \param[out] pcolor TRUE if color found
519 * \return 0 if OK, 1 on error
520 *
521 * <pre>
522 * Notes:
523 * (1) This returns color = TRUE if three things are obtained:
524 * (a) the pix has a colormap
525 * (b) the colormap has at least one color entry
526 * (c) a color entry is actually used
527 * (2) It is used in pixEqual() for comparing two images, in a
528 * situation where it is required to know if the colormap
529 * has color entries that are actually used in the image.
530 * </pre>
531 */
532 l_ok
533 pixUsesCmapColor(PIX *pixs,
534 l_int32 *pcolor)
535 {
536 l_int32 n, i, rval, gval, bval, numpix;
537 NUMA *na;
538 PIXCMAP *cmap;
539
540 if (!pcolor)
541 return ERROR_INT("&color not defined", __func__, 1);
542 *pcolor = 0;
543 if (!pixs)
544 return ERROR_INT("pixs not defined", __func__, 1);
545
546 if ((cmap = pixGetColormap(pixs)) == NULL)
547 return 0;
548
549 pixcmapHasColor(cmap, pcolor);
550 if (*pcolor == 0) /* no color */
551 return 0;
552
553 /* The cmap has color entries. Are they used? */
554 na = pixGetGrayHistogram(pixs, 1);
555 n = pixcmapGetCount(cmap);
556 for (i = 0; i < n; i++) {
557 pixcmapGetColor(cmap, i, &rval, &gval, &bval);
558 numaGetIValue(na, i, &numpix);
559 if ((rval != gval || rval != bval) && numpix) { /* color found! */
560 *pcolor = 1;
561 break;
562 }
563 }
564 numaDestroy(&na);
565
566 return 0;
567 }
568
569
570 /*------------------------------------------------------------------*
571 * Binary correlation *
572 *------------------------------------------------------------------*/
573 /*!
574 * \brief pixCorrelationBinary()
575 *
576 * \param[in] pix1 1 bpp
577 * \param[in] pix2 1 bpp
578 * \param[out] pval correlation
579 * \return 0 if OK; 1 on error
580 *
581 * <pre>
582 * Notes:
583 * (1) The correlation is a number between 0.0 and 1.0,
584 * based on foreground similarity:
585 * (|1 AND 2|)**2
586 * correlation = --------------
587 * |1| * |2|
588 * where |x| is the count of foreground pixels in image x.
589 * If the images are identical, this is 1.0.
590 * If they have no fg pixels in common, this is 0.0.
591 * If one or both images have no fg pixels, the correlation is 0.0.
592 * (2) Typically the two images are of equal size, but this
593 * is not enforced. Instead, the UL corners are aligned.
594 * </pre>
595 */
596 l_ok
597 pixCorrelationBinary(PIX *pix1,
598 PIX *pix2,
599 l_float32 *pval)
600 {
601 l_int32 count1, count2, countn;
602 l_int32 *tab8;
603 PIX *pixn;
604
605 if (!pval)
606 return ERROR_INT("&pval not defined", __func__, 1);
607 *pval = 0.0;
608 if (!pix1)
609 return ERROR_INT("pix1 not defined", __func__, 1);
610 if (!pix2)
611 return ERROR_INT("pix2 not defined", __func__, 1);
612
613 tab8 = makePixelSumTab8();
614 pixCountPixels(pix1, &count1, tab8);
615 pixCountPixels(pix2, &count2, tab8);
616 if (count1 == 0 || count2 == 0) {
617 LEPT_FREE(tab8);
618 return 0;
619 }
620 pixn = pixAnd(NULL, pix1, pix2);
621 pixCountPixels(pixn, &countn, tab8);
622 *pval = (l_float32)countn * (l_float32)countn /
623 ((l_float32)count1 * (l_float32)count2);
624 LEPT_FREE(tab8);
625 pixDestroy(&pixn);
626 return 0;
627 }
628
629
630 /*------------------------------------------------------------------*
631 * Difference of two images *
632 *------------------------------------------------------------------*/
633 /*!
634 * \brief pixDisplayDiff()
635 *
636 * \param[in] pix1 any depth
637 * \param[in] pix2 any depth
638 * \param[in] showall 1 to display input images; 0 to only display result
639 * \param[in] mindiff min difference to identify pixel
640 * \param[in] diffcolor color of pixel indicating difference >= mindiff
641 * \return pixd 32 bpp rgb, or NULL on error
642 *
643 * <pre>
644 * Notes:
645 * (1) This aligns the UL corners of pix1 and pix2, crops to the
646 * overlapping pixels, and shows which pixels have a significant
647 * difference in value.
648 * (2) Requires %pix1 and %pix2 to have the same depth.
649 * (3) If rgb, a pixel is identified as different if any component
650 * values of the corresponding pixels equals or exceeds %mindiff.
651 * (4) %diffcolor is in format 0xrrggbbaa.
652 * (5) If %pix1 and %pix2 are 1 bpp, ignores %mindiff and %diffcolor,
653 * and uses the result of pixDisplayDiffBinary().
654 * </pre>
655 */
656 PIX *
657 pixDisplayDiff(PIX *pix1,
658 PIX *pix2,
659 l_int32 showall,
660 l_int32 mindiff,
661 l_uint32 diffcolor)
662 {
663 l_int32 i, j, w1, h1, d1, w2, h2, d2, minw, minh, wpl1, wpl2, wpl3;
664 l_int32 rval1, gval1, bval1, rval2, gval2, bval2;
665 l_uint32 val1, val2;
666 l_uint32 *data1, *data2, *data3, *line1, *line2, *line3;
667 PIX *pix3 = NULL, *pix4 = NULL, *pixd;
668 PIXA *pixa1;
669
670 if (!pix1 || !pix2)
671 return (PIX *)ERROR_PTR("pix1, pix2 not both defined", __func__, NULL);
672 pixGetDimensions(pix1, &w1, &h1, &d1);
673 pixGetDimensions(pix2, &w2, &h2, &d2);
674 if (d1 != d2)
675 return (PIX *)ERROR_PTR("unequal depths", __func__, NULL);
676 if (mindiff <= 0)
677 return (PIX *)ERROR_PTR("mindiff must be > 0", __func__, NULL);
678
679 if (d1 == 1) {
680 pix3 = pixDisplayDiffBinary(pix1, pix2);
681 pixd = pixConvertTo32(pix3);
682 pixDestroy(&pix3);
683 } else {
684 minw = L_MIN(w1, w2);
685 minh = L_MIN(h1, h2);
686 pix3 = pixConvertTo32(pix1);
687 pix4 = pixConvertTo32(pix2);
688 pixd = pixCreate(minw, minh, 32);
689 pixRasterop(pixd, 0, 0, minw, minh, PIX_SRC, pix3, 0, 0);
690 data1 = pixGetData(pix3);
691 wpl1 = pixGetWpl(pix3);
692 data2 = pixGetData(pix4);
693 wpl2 = pixGetWpl(pix4);
694 data3 = pixGetData(pixd);
695 wpl3 = pixGetWpl(pixd);
696 for (i = 0; i < minh; i++) {
697 line1 = data1 + i * wpl1;
698 line2 = data2 + i * wpl2;
699 line3 = data3 + i * wpl3;
700 for (j = 0; j < minw; j++) {
701 val1 = GET_DATA_FOUR_BYTES(line1, j);
702 val2 = GET_DATA_FOUR_BYTES(line2, j);
703 extractRGBValues(val1, &rval1, &gval1, &bval1);
704 extractRGBValues(val2, &rval2, &gval2, &bval2);
705 if (L_ABS(rval1 - rval2) >= mindiff ||
706 L_ABS(gval1 - gval2) >= mindiff ||
707 L_ABS(bval1 - bval2) >= mindiff)
708 SET_DATA_FOUR_BYTES(line3, j, diffcolor);
709 }
710 }
711 }
712
713 if (showall) {
714 pixa1 = pixaCreate(3);
715 if (d1 == 1) {
716 pixaAddPix(pixa1, pix1, L_COPY);
717 pixaAddPix(pixa1, pix2, L_COPY);
718 } else {
719 pixaAddPix(pixa1, pix3, L_INSERT);
720 pixaAddPix(pixa1, pix4, L_INSERT);
721 }
722 pixaAddPix(pixa1, pixd, L_INSERT); /* save diff image */
723 pixd = pixaDisplayTiledInColumns(pixa1, 2, 1.0, 30, 2); /* all 3 */
724 pixaDestroy(&pixa1);
725 } else if (d1 != 1) {
726 pixDestroy(&pix3);
727 pixDestroy(&pix4);
728 }
729 return pixd;
730 }
731
732
733 /*!
734 * \brief pixDisplayDiffBinary()
735 *
736 * \param[in] pix1 1 bpp
737 * \param[in] pix2 1 bpp
738 * \return pixd 4 bpp cmapped, or NULL on error
739 *
740 * <pre>
741 * Notes:
742 * (1) This gives a color representation of the difference between
743 * pix1 and pix2. The color difference depends on the order.
744 * The pixels in pixd have 4 colors:
745 * * unchanged: black (on), white (off)
746 * * on in pix1, off in pix2: red
747 * * on in pix2, off in pix1: green
748 * (2) This aligns the UL corners of pix1 and pix2, and crops
749 * to the overlapping pixels.
750 * </pre>
751 */
752 PIX *
753 pixDisplayDiffBinary(PIX *pix1,
754 PIX *pix2)
755 {
756 l_int32 w1, h1, d1, w2, h2, d2, minw, minh;
757 PIX *pixt, *pixd;
758 PIXCMAP *cmap;
759
760 if (!pix1 || !pix2)
761 return (PIX *)ERROR_PTR("pix1, pix2 not both defined", __func__, NULL);
762 pixGetDimensions(pix1, &w1, &h1, &d1);
763 pixGetDimensions(pix2, &w2, &h2, &d2);
764 if (d1 != 1 || d2 != 1)
765 return (PIX *)ERROR_PTR("pix1 and pix2 not 1 bpp", __func__, NULL);
766 minw = L_MIN(w1, w2);
767 minh = L_MIN(h1, h2);
768
769 pixd = pixCreate(minw, minh, 4);
770 cmap = pixcmapCreate(4);
771 pixcmapAddColor(cmap, 255, 255, 255); /* initialized to white */
772 pixcmapAddColor(cmap, 0, 0, 0);
773 pixcmapAddColor(cmap, 255, 0, 0);
774 pixcmapAddColor(cmap, 0, 255, 0);
775 pixSetColormap(pixd, cmap);
776
777 pixt = pixAnd(NULL, pix1, pix2);
778 pixPaintThroughMask(pixd, pixt, 0, 0, 0x0); /* black */
779 pixSubtract(pixt, pix1, pix2);
780 pixPaintThroughMask(pixd, pixt, 0, 0, 0xff000000); /* red */
781 pixSubtract(pixt, pix2, pix1);
782 pixPaintThroughMask(pixd, pixt, 0, 0, 0x00ff0000); /* green */
783 pixDestroy(&pixt);
784 return pixd;
785 }
786
787
788 /*!
789 * \brief pixCompareBinary()
790 *
791 * \param[in] pix1 1 bpp
792 * \param[in] pix2 1 bpp
793 * \param[in] comptype L_COMPARE_XOR, L_COMPARE_SUBTRACT
794 * \param[out] pfract fraction of pixels that are different
795 * \param[out] ppixdiff [optional] pix of difference
796 * \return 0 if OK; 1 on error
797 *
798 * <pre>
799 * Notes:
800 * (1) The two images are aligned at the UL corner, and do not
801 * need to be the same size.
802 * (2) If using L_COMPARE_SUBTRACT, pix2 is subtracted from pix1.
803 * (3) The total number of pixels is determined by pix1.
804 * (4) On error, the returned fraction is 1.0.
805 * </pre>
806 */
807 l_ok
808 pixCompareBinary(PIX *pix1,
809 PIX *pix2,
810 l_int32 comptype,
811 l_float32 *pfract,
812 PIX **ppixdiff)
813 {
814 l_int32 w, h, count;
815 PIX *pixt;
816
817 if (ppixdiff) *ppixdiff = NULL;
818 if (!pfract)
819 return ERROR_INT("&pfract not defined", __func__, 1);
820 *pfract = 1.0; /* initialize to max difference */
821 if (!pix1 || pixGetDepth(pix1) != 1)
822 return ERROR_INT("pix1 not defined or not 1 bpp", __func__, 1);
823 if (!pix2 || pixGetDepth(pix2) != 1)
824 return ERROR_INT("pix2 not defined or not 1 bpp", __func__, 1);
825 if (comptype != L_COMPARE_XOR && comptype != L_COMPARE_SUBTRACT)
826 return ERROR_INT("invalid comptype", __func__, 1);
827
828 if (comptype == L_COMPARE_XOR)
829 pixt = pixXor(NULL, pix1, pix2);
830 else /* comptype == L_COMPARE_SUBTRACT) */
831 pixt = pixSubtract(NULL, pix1, pix2);
832 pixCountPixels(pixt, &count, NULL);
833 pixGetDimensions(pix1, &w, &h, NULL);
834 *pfract = (l_float32)(count) / (l_float32)(w * h);
835
836 if (ppixdiff)
837 *ppixdiff = pixt;
838 else
839 pixDestroy(&pixt);
840 return 0;
841 }
842
843
844 /*!
845 * \brief pixCompareGrayOrRGB()
846 *
847 * \param[in] pix1 2,4,8,16 bpp gray, 32 bpp rgb, or colormapped
848 * \param[in] pix2 2,4,8,16 bpp gray, 32 bpp rgb, or colormapped
849 * \param[in] comptype L_COMPARE_SUBTRACT, L_COMPARE_ABS_DIFF
850 * \param[in] plottype gplot plot output type, or 0 for no plot
851 * \param[out] psame [optional] 1 if pixel values are identical
852 * \param[out] pdiff [optional] average difference
853 * \param[out] prmsdiff [optional] rms of difference
854 * \param[out] ppixdiff [optional] pix of difference
855 * \return 0 if OK; 1 on error
856 *
857 * <pre>
858 * Notes:
859 * (1) The two images are aligned at the UL corner, and do not
860 * need to be the same size. If they are not the same size,
861 * the comparison will be made over overlapping pixels.
862 * (2) If there is a colormap, it is removed and the result
863 * is either gray or RGB depending on the colormap.
864 * (3) If RGB, each component is compared separately.
865 * (4) If type is L_COMPARE_ABS_DIFF, pix2 is subtracted from pix1
866 * and the absolute value is taken.
867 * (5) If type is L_COMPARE_SUBTRACT, pix2 is subtracted from pix1
868 * and the result is clipped to 0.
869 * (6) The plot output types are specified in gplot.h.
870 * Use 0 if no difference plot is to be made.
871 * (7) If the images are pixelwise identical, no difference
872 * plot is made, even if requested. The result (TRUE or FALSE)
873 * is optionally returned in the parameter 'same'.
874 * (8) The average difference (either subtracting or absolute value)
875 * is optionally returned in the parameter 'diff'.
876 * (9) The RMS difference is optionally returned in the
877 * parameter 'rmsdiff'. For RGB, we return the average of
878 * the RMS differences for each of the components.
879 * (10) Because pixel values are compared, pix1 and pix2 can be equal when:
880 * * they are both gray with different depth
881 * * one is colormapped and the other is not
882 * * they are both colormapped and have different size colormaps
883 * </pre>
884 */
885 l_ok
886 pixCompareGrayOrRGB(PIX *pix1,
887 PIX *pix2,
888 l_int32 comptype,
889 l_int32 plottype,
890 l_int32 *psame,
891 l_float32 *pdiff,
892 l_float32 *prmsdiff,
893 PIX **ppixdiff)
894 {
895 l_int32 retval, d1, d2;
896 PIX *pixt1, *pixt2, *pixs1, *pixs2;
897
898 if (psame) *psame = 0;
899 if (pdiff) *pdiff = 255.0;
900 if (prmsdiff) *prmsdiff = 255.0;
901 if (ppixdiff) *ppixdiff = NULL;
902 if (!pix1 || pixGetDepth(pix1) == 1)
903 return ERROR_INT("pix1 not defined or 1 bpp", __func__, 1);
904 if (!pix2 || pixGetDepth(pix2) == 1)
905 return ERROR_INT("pix2 not defined or 1 bpp", __func__, 1);
906 if (comptype != L_COMPARE_SUBTRACT && comptype != L_COMPARE_ABS_DIFF)
907 return ERROR_INT("invalid comptype", __func__, 1);
908 if (plottype < 0 || plottype >= NUM_GPLOT_OUTPUTS)
909 return ERROR_INT("invalid plottype", __func__, 1);
910
911 pixt1 = pixRemoveColormap(pix1, REMOVE_CMAP_BASED_ON_SRC);
912 pixt2 = pixRemoveColormap(pix2, REMOVE_CMAP_BASED_ON_SRC);
913 d1 = pixGetDepth(pixt1);
914 d2 = pixGetDepth(pixt2);
915 if (d1 < 8)
916 pixs1 = pixConvertTo8(pixt1, FALSE);
917 else
918 pixs1 = pixClone(pixt1);
919 if (d2 < 8)
920 pixs2 = pixConvertTo8(pixt2, FALSE);
921 else
922 pixs2 = pixClone(pixt2);
923 pixDestroy(&pixt1);
924 pixDestroy(&pixt2);
925 d1 = pixGetDepth(pixs1);
926 d2 = pixGetDepth(pixs2);
927 if (d1 != d2) {
928 pixDestroy(&pixs1);
929 pixDestroy(&pixs2);
930 return ERROR_INT("intrinsic depths are not equal", __func__, 1);
931 }
932
933 if (d1 == 8 || d1 == 16)
934 retval = pixCompareGray(pixs1, pixs2, comptype, plottype, psame,
935 pdiff, prmsdiff, ppixdiff);
936 else /* d1 == 32 */
937 retval = pixCompareRGB(pixs1, pixs2, comptype, plottype, psame,
938 pdiff, prmsdiff, ppixdiff);
939 pixDestroy(&pixs1);
940 pixDestroy(&pixs2);
941 return retval;
942 }
943
944
945 /*!
946 * \brief pixCompareGray()
947 *
948 * \param[in] pix1 8 or 16 bpp, not cmapped
949 * \param[in] pix2 8 or 16 bpp, not cmapped
950 * \param[in] comptype L_COMPARE_SUBTRACT, L_COMPARE_ABS_DIFF
951 * \param[in] plottype gplot plot output type, or 0 for no plot
952 * \param[out] psame [optional] 1 if pixel values are identical
953 * \param[out] pdiff [optional] average difference
954 * \param[out] prmsdiff [optional] rms of difference
955 * \param[out] ppixdiff [optional] pix of difference
956 * \return 0 if OK; 1 on error
957 *
958 * <pre>
959 * Notes:
960 * (1) See pixCompareGrayOrRGB() for details.
961 * (2) Use pixCompareGrayOrRGB() if the input pix are colormapped.
962 * (3) Note: setting %plottype > 0 can result in writing named
963 * output files.
964 * </pre>
965 */
966 l_ok
967 pixCompareGray(PIX *pix1,
968 PIX *pix2,
969 l_int32 comptype,
970 l_int32 plottype,
971 l_int32 *psame,
972 l_float32 *pdiff,
973 l_float32 *prmsdiff,
974 PIX **ppixdiff)
975 {
976 char buf[64];
977 static l_atomic index = 0;
978 l_int32 d1, d2, same, first, last;
979 GPLOT *gplot;
980 NUMA *na, *nac;
981 PIX *pixt;
982
983 if (psame) *psame = 0;
984 if (pdiff) *pdiff = 255.0;
985 if (prmsdiff) *prmsdiff = 255.0;
986 if (ppixdiff) *ppixdiff = NULL;
987 if (!pix1)
988 return ERROR_INT("pix1 not defined", __func__, 1);
989 if (!pix2)
990 return ERROR_INT("pix2 not defined", __func__, 1);
991 d1 = pixGetDepth(pix1);
992 d2 = pixGetDepth(pix2);
993 if ((d1 != d2) || (d1 != 8 && d1 != 16))
994 return ERROR_INT("depths unequal or not 8 or 16 bpp", __func__, 1);
995 if (pixGetColormap(pix1) || pixGetColormap(pix2))
996 return ERROR_INT("pix1 and/or pix2 are colormapped", __func__, 1);
997 if (comptype != L_COMPARE_SUBTRACT && comptype != L_COMPARE_ABS_DIFF)
998 return ERROR_INT("invalid comptype", __func__, 1);
999 if (plottype < 0 || plottype >= NUM_GPLOT_OUTPUTS)
1000 return ERROR_INT("invalid plottype", __func__, 1);
1001
1002 lept_mkdir("lept/comp");
1003
1004 if (comptype == L_COMPARE_SUBTRACT)
1005 pixt = pixSubtractGray(NULL, pix1, pix2);
1006 else /* comptype == L_COMPARE_ABS_DIFF) */
1007 pixt = pixAbsDifference(pix1, pix2);
1008
1009 pixZero(pixt, &same);
1010 if (same)
1011 L_INFO("Images are pixel-wise identical\n", __func__);
1012 if (psame) *psame = same;
1013
1014 if (pdiff)
1015 pixGetAverageMasked(pixt, NULL, 0, 0, 1, L_MEAN_ABSVAL, pdiff);
1016
1017 /* Don't bother to plot if the images are the same */
1018 if (plottype && !same) {
1019 L_INFO("Images differ: output plots will be generated\n", __func__);
1020 na = pixGetGrayHistogram(pixt, 1);
1021 numaGetNonzeroRange(na, TINY, &first, &last);
1022 nac = numaClipToInterval(na, 0, last);
1023 snprintf(buf, sizeof(buf), "/tmp/lept/comp/compare_gray%d", index);
1024 gplot = gplotCreate(buf, plottype,
1025 "Pixel Difference Histogram", "diff val",
1026 "number of pixels");
1027 gplotAddPlot(gplot, NULL, nac, GPLOT_LINES, "gray");
1028 gplotMakeOutput(gplot);
1029 gplotDestroy(&gplot);
1030 snprintf(buf, sizeof(buf), "/tmp/lept/comp/compare_gray%d.png",
1031 index++);
1032 l_fileDisplay(buf, 100, 100, 1.0);
1033 numaDestroy(&na);
1034 numaDestroy(&nac);
1035 }
1036
1037 if (ppixdiff)
1038 *ppixdiff = pixCopy(NULL, pixt);
1039
1040 if (prmsdiff) {
1041 if (comptype == L_COMPARE_SUBTRACT) { /* wrong type for rms diff */
1042 pixDestroy(&pixt);
1043 pixt = pixAbsDifference(pix1, pix2);
1044 }
1045 pixGetAverageMasked(pixt, NULL, 0, 0, 1, L_ROOT_MEAN_SQUARE, prmsdiff);
1046 }
1047
1048 pixDestroy(&pixt);
1049 return 0;
1050 }
1051
1052
1053 /*!
1054 * \brief pixCompareRGB()
1055 *
1056 * \param[in] pix1 32 bpp rgb
1057 * \param[in] pix2 32 bpp rgb
1058 * \param[in] comptype L_COMPARE_SUBTRACT, L_COMPARE_ABS_DIFF
1059 * \param[in] plottype gplot plot output type, or 0 for no plot
1060 * \param[out] psame [optional] 1 if pixel values are identical
1061 * \param[out] pdiff [optional] average difference
1062 * \param[out] prmsdiff [optional] rms of difference
1063 * \param[out] ppixdiff [optional] pix of difference
1064 * \return 0 if OK; 1 on error
1065 *
1066 * <pre>
1067 * Notes:
1068 * (1) See pixCompareGrayOrRGB() for details.
1069 * (2) Note: setting %plottype > 0 can result in writing named
1070 * output files.
1071 * </pre>
1072 */
1073 l_ok
1074 pixCompareRGB(PIX *pix1,
1075 PIX *pix2,
1076 l_int32 comptype,
1077 l_int32 plottype,
1078 l_int32 *psame,
1079 l_float32 *pdiff,
1080 l_float32 *prmsdiff,
1081 PIX **ppixdiff)
1082 {
1083 char buf[64];
1084 static l_atomic index = 0;
1085 l_int32 rsame, gsame, bsame, same, first, rlast, glast, blast, last;
1086 l_float32 rdiff, gdiff, bdiff;
1087 GPLOT *gplot;
1088 NUMA *nar, *nag, *nab, *narc, *nagc, *nabc;
1089 PIX *pixr1, *pixr2, *pixg1, *pixg2, *pixb1, *pixb2;
1090 PIX *pixr, *pixg, *pixb;
1091
1092 if (psame) *psame = 0;
1093 if (pdiff) *pdiff = 0.0;
1094 if (prmsdiff) *prmsdiff = 0.0;
1095 if (ppixdiff) *ppixdiff = NULL;
1096 if (!pix1 || pixGetDepth(pix1) != 32)
1097 return ERROR_INT("pix1 not defined or not 32 bpp", __func__, 1);
1098 if (!pix2 || pixGetDepth(pix2) != 32)
1099 return ERROR_INT("pix2 not defined or not ew bpp", __func__, 1);
1100 if (comptype != L_COMPARE_SUBTRACT && comptype != L_COMPARE_ABS_DIFF)
1101 return ERROR_INT("invalid comptype", __func__, 1);
1102 if (plottype < 0 || plottype >= NUM_GPLOT_OUTPUTS)
1103 return ERROR_INT("invalid plottype", __func__, 1);
1104
1105 lept_mkdir("lept/comp");
1106
1107 pixr1 = pixGetRGBComponent(pix1, COLOR_RED);
1108 pixr2 = pixGetRGBComponent(pix2, COLOR_RED);
1109 pixg1 = pixGetRGBComponent(pix1, COLOR_GREEN);
1110 pixg2 = pixGetRGBComponent(pix2, COLOR_GREEN);
1111 pixb1 = pixGetRGBComponent(pix1, COLOR_BLUE);
1112 pixb2 = pixGetRGBComponent(pix2, COLOR_BLUE);
1113 if (comptype == L_COMPARE_SUBTRACT) {
1114 pixr = pixSubtractGray(NULL, pixr1, pixr2);
1115 pixg = pixSubtractGray(NULL, pixg1, pixg2);
1116 pixb = pixSubtractGray(NULL, pixb1, pixb2);
1117 } else { /* comptype == L_COMPARE_ABS_DIFF) */
1118 pixr = pixAbsDifference(pixr1, pixr2);
1119 pixg = pixAbsDifference(pixg1, pixg2);
1120 pixb = pixAbsDifference(pixb1, pixb2);
1121 }
1122
1123 pixZero(pixr, &rsame);
1124 pixZero(pixg, &gsame);
1125 pixZero(pixb, &bsame);
1126 same = rsame && gsame && bsame;
1127 if (same)
1128 L_INFO("Images are pixel-wise identical\n", __func__);
1129 if (psame) *psame = same;
1130
1131 if (pdiff) {
1132 pixGetAverageMasked(pixr, NULL, 0, 0, 1, L_MEAN_ABSVAL, &rdiff);
1133 pixGetAverageMasked(pixg, NULL, 0, 0, 1, L_MEAN_ABSVAL, &gdiff);
1134 pixGetAverageMasked(pixb, NULL, 0, 0, 1, L_MEAN_ABSVAL, &bdiff);
1135 *pdiff = (rdiff + gdiff + bdiff) / 3.0;
1136 }
1137
1138 /* Don't bother to plot if the images are the same */
1139 if (plottype && !same) {
1140 L_INFO("Images differ: output plots will be generated\n", __func__);
1141 nar = pixGetGrayHistogram(pixr, 1);
1142 nag = pixGetGrayHistogram(pixg, 1);
1143 nab = pixGetGrayHistogram(pixb, 1);
1144 numaGetNonzeroRange(nar, TINY, &first, &rlast);
1145 numaGetNonzeroRange(nag, TINY, &first, &glast);
1146 numaGetNonzeroRange(nab, TINY, &first, &blast);
1147 last = L_MAX(rlast, glast);
1148 last = L_MAX(last, blast);
1149 narc = numaClipToInterval(nar, 0, last);
1150 nagc = numaClipToInterval(nag, 0, last);
1151 nabc = numaClipToInterval(nab, 0, last);
1152 snprintf(buf, sizeof(buf), "/tmp/lept/comp/compare_rgb%d", index);
1153 gplot = gplotCreate(buf, plottype,
1154 "Pixel Difference Histogram", "diff val",
1155 "number of pixels");
1156 gplotAddPlot(gplot, NULL, narc, GPLOT_LINES, "red");
1157 gplotAddPlot(gplot, NULL, nagc, GPLOT_LINES, "green");
1158 gplotAddPlot(gplot, NULL, nabc, GPLOT_LINES, "blue");
1159 gplotMakeOutput(gplot);
1160 gplotDestroy(&gplot);
1161 snprintf(buf, sizeof(buf), "/tmp/lept/comp/compare_rgb%d.png",
1162 index++);
1163 l_fileDisplay(buf, 100, 100, 1.0);
1164 numaDestroy(&nar);
1165 numaDestroy(&nag);
1166 numaDestroy(&nab);
1167 numaDestroy(&narc);
1168 numaDestroy(&nagc);
1169 numaDestroy(&nabc);
1170 }
1171
1172 if (ppixdiff)
1173 *ppixdiff = pixCreateRGBImage(pixr, pixg, pixb);
1174
1175 if (prmsdiff) {
1176 if (comptype == L_COMPARE_SUBTRACT) {
1177 pixDestroy(&pixr);
1178 pixDestroy(&pixg);
1179 pixDestroy(&pixb);
1180 pixr = pixAbsDifference(pixr1, pixr2);
1181 pixg = pixAbsDifference(pixg1, pixg2);
1182 pixb = pixAbsDifference(pixb1, pixb2);
1183 }
1184 pixGetAverageMasked(pixr, NULL, 0, 0, 1, L_ROOT_MEAN_SQUARE, &rdiff);
1185 pixGetAverageMasked(pixg, NULL, 0, 0, 1, L_ROOT_MEAN_SQUARE, &gdiff);
1186 pixGetAverageMasked(pixb, NULL, 0, 0, 1, L_ROOT_MEAN_SQUARE, &bdiff);
1187 *prmsdiff = (rdiff + gdiff + bdiff) / 3.0;
1188 }
1189
1190 pixDestroy(&pixr1);
1191 pixDestroy(&pixr2);
1192 pixDestroy(&pixg1);
1193 pixDestroy(&pixg2);
1194 pixDestroy(&pixb1);
1195 pixDestroy(&pixb2);
1196 pixDestroy(&pixr);
1197 pixDestroy(&pixg);
1198 pixDestroy(&pixb);
1199 return 0;
1200 }
1201
1202
1203 /*!
1204 * \brief pixCompareTiled()
1205 *
1206 * \param[in] pix1 8 bpp or 32 bpp rgb
1207 * \param[in] pix2 8 bpp 32 bpp rgb
1208 * \param[in] sx, sy tile size; must be > 1 in each dimension
1209 * \param[in] type L_MEAN_ABSVAL or L_ROOT_MEAN_SQUARE
1210 * \param[out] ppixdiff pix of difference
1211 * \return 0 if OK; 1 on error
1212 *
1213 * <pre>
1214 * Notes:
1215 * (1) With L_MEAN_ABSVAL, we compute for each tile the
1216 * average abs value of the pixel component difference between
1217 * the two (aligned) images. With L_ROOT_MEAN_SQUARE, we
1218 * compute instead the rms difference over all components.
1219 * (2) The two input pix must be the same depth. Comparison is made
1220 * using UL corner alignment.
1221 * (3) For 32 bpp, the distance between corresponding tiles
1222 * is found by averaging the measured difference over all three
1223 * components of each pixel in the tile.
1224 * (4) The result, pixdiff, contains one pixel for each source tile.
1225 * </pre>
1226 */
1227 l_ok
1228 pixCompareTiled(PIX *pix1,
1229 PIX *pix2,
1230 l_int32 sx,
1231 l_int32 sy,
1232 l_int32 type,
1233 PIX **ppixdiff)
1234 {
1235 l_int32 d1, d2, w, h;
1236 PIX *pixt, *pixr, *pixg, *pixb;
1237 PIX *pixrdiff, *pixgdiff, *pixbdiff;
1238 PIXACC *pixacc;
1239
1240 if (!ppixdiff)
1241 return ERROR_INT("&pixdiff not defined", __func__, 1);
1242 *ppixdiff = NULL;
1243 if (!pix1)
1244 return ERROR_INT("pix1 not defined", __func__, 1);
1245 if (!pix2)
1246 return ERROR_INT("pix2 not defined", __func__, 1);
1247 d1 = pixGetDepth(pix1);
1248 d2 = pixGetDepth(pix2);
1249 if (d1 != d2)
1250 return ERROR_INT("depths not equal", __func__, 1);
1251 if (d1 != 8 && d1 != 32)
1252 return ERROR_INT("pix1 not 8 or 32 bpp", __func__, 1);
1253 if (d2 != 8 && d2 != 32)
1254 return ERROR_INT("pix2 not 8 or 32 bpp", __func__, 1);
1255 if (sx < 2 || sy < 2)
1256 return ERROR_INT("sx and sy not both > 1", __func__, 1);
1257 if (type != L_MEAN_ABSVAL && type != L_ROOT_MEAN_SQUARE)
1258 return ERROR_INT("invalid type", __func__, 1);
1259
1260 pixt = pixAbsDifference(pix1, pix2);
1261 if (d1 == 8) {
1262 *ppixdiff = pixGetAverageTiled(pixt, sx, sy, type);
1263 } else { /* d1 == 32 */
1264 pixr = pixGetRGBComponent(pixt, COLOR_RED);
1265 pixg = pixGetRGBComponent(pixt, COLOR_GREEN);
1266 pixb = pixGetRGBComponent(pixt, COLOR_BLUE);
1267 pixrdiff = pixGetAverageTiled(pixr, sx, sy, type);
1268 pixgdiff = pixGetAverageTiled(pixg, sx, sy, type);
1269 pixbdiff = pixGetAverageTiled(pixb, sx, sy, type);
1270 pixGetDimensions(pixrdiff, &w, &h, NULL);
1271 pixacc = pixaccCreate(w, h, 0);
1272 pixaccAdd(pixacc, pixrdiff);
1273 pixaccAdd(pixacc, pixgdiff);
1274 pixaccAdd(pixacc, pixbdiff);
1275 pixaccMultConst(pixacc, 1.f / 3.f);
1276 *ppixdiff = pixaccFinal(pixacc, 8);
1277 pixDestroy(&pixr);
1278 pixDestroy(&pixg);
1279 pixDestroy(&pixb);
1280 pixDestroy(&pixrdiff);
1281 pixDestroy(&pixgdiff);
1282 pixDestroy(&pixbdiff);
1283 pixaccDestroy(&pixacc);
1284 }
1285 pixDestroy(&pixt);
1286 return 0;
1287 }
1288
1289
1290 /*------------------------------------------------------------------*
1291 * Other measures of the difference of two images *
1292 *------------------------------------------------------------------*/
1293 /*!
1294 * \brief pixCompareRankDifference()
1295 *
1296 * \param[in] pix1 8 bpp gray or 32 bpp rgb, or colormapped
1297 * \param[in] pix2 8 bpp gray or 32 bpp rgb, or colormapped
1298 * \param[in] factor subsampling factor; use 0 or 1 for no subsampling
1299 * \return narank numa of rank difference, or NULL on error
1300 *
1301 * <pre>
1302 * Notes:
1303 * (1) This answers the question: if the pixel values in each
1304 * component are compared by absolute difference, for
1305 * any value of difference, what is the fraction of
1306 * pixel pairs that have a difference of this magnitude
1307 * or greater. For a difference of 0, the fraction is 1.0.
1308 * In this sense, it is a mapping from pixel difference to
1309 * rank order of difference.
1310 * (2) The two images are aligned at the UL corner, and do not
1311 * need to be the same size. If they are not the same size,
1312 * the comparison will be made over overlapping pixels.
1313 * (3) If there is a colormap, it is removed and the result
1314 * is either gray or RGB depending on the colormap.
1315 * (4) If RGB, pixel differences for each component are aggregated
1316 * into a single histogram.
1317 * </pre>
1318 */
1319 NUMA *
1320 pixCompareRankDifference(PIX *pix1,
1321 PIX *pix2,
1322 l_int32 factor)
1323 {
1324 l_int32 i;
1325 l_float32 *array1, *array2;
1326 NUMA *nah, *nan, *nad;
1327
1328 if (!pix1)
1329 return (NUMA *)ERROR_PTR("pix1 not defined", __func__, NULL);
1330 if (!pix2)
1331 return (NUMA *)ERROR_PTR("pix2 not defined", __func__, NULL);
1332
1333 if ((nah = pixGetDifferenceHistogram(pix1, pix2, factor)) == NULL)
1334 return (NUMA *)ERROR_PTR("na not made", __func__, NULL);
1335
1336 nan = numaNormalizeHistogram(nah, 1.0);
1337 array1 = numaGetFArray(nan, L_NOCOPY);
1338
1339 nad = numaCreate(256);
1340 numaSetCount(nad, 256); /* all initialized to 0.0 */
1341 array2 = numaGetFArray(nad, L_NOCOPY);
1342
1343 /* Do rank accumulation on normalized histo of diffs */
1344 array2[0] = 1.0;
1345 for (i = 1; i < 256; i++)
1346 array2[i] = array2[i - 1] - array1[i - 1];
1347
1348 numaDestroy(&nah);
1349 numaDestroy(&nan);
1350 return nad;
1351 }
1352
1353
1354 /*!
1355 * \brief pixTestForSimilarity()
1356 *
1357 * \param[in] pix1 8 bpp gray or 32 bpp rgb, or colormapped
1358 * \param[in] pix2 8 bpp gray or 32 bpp rgb, or colormapped
1359 * \param[in] factor subsampling factor; use 0 or 1 for no subsampling
1360 * \param[in] mindiff minimum pixel difference to be counted; > 0
1361 * \param[in] maxfract maximum fraction of pixels allowed to have
1362 * diff greater than or equal to mindiff
1363 * \param[in] maxave maximum average difference of pixels allowed for
1364 * pixels with diff greater than or equal to
1365 * mindiff, after subtracting mindiff
1366 * \param[out] psimilar 1 if similar, 0 otherwise
1367 * \param[in] details use 1 to give normalized histogram and other data
1368 * \return 0 if OK, 1 on error
1369 *
1370 * <pre>
1371 * Notes:
1372 * (1) This takes 2 pix that are the same size and determines using
1373 * 3 input parameters if they are "similar". The first parameter
1374 * %mindiff establishes a criterion of pixel-to-pixel similarity:
1375 * two pixels are not similar if their difference in value is
1376 * at least mindiff. Then %maxfract and %maxave are thresholds
1377 * on the number and distribution of dissimilar pixels
1378 * allowed for the two pix to be similar. If the pix are
1379 * to be similar, neither threshold can be exceeded.
1380 * (2) In setting the %maxfract and %maxave thresholds, you have
1381 * these options:
1382 * (a) Base the comparison only on %maxfract. Then set
1383 * %maxave = 0.0 or 256.0. (If 0, we always ignore it.)
1384 * (b) Base the comparison only on %maxave. Then set
1385 * %maxfract = 1.0.
1386 * (c) Base the comparison on both thresholds.
1387 * (3) Example of values that can be expected at mindiff = 15 when
1388 * comparing lossless png encoding with jpeg encoding, q=75:
1389 * (smoothish bg) fractdiff = 0.01, avediff = 2.5
1390 * (natural scene) fractdiff = 0.13, avediff = 3.5
1391 * To identify these images as 'similar', select maxfract
1392 * and maxave to be upper bounds of what you expect.
1393 * (4) See pixGetDifferenceStats() for a discussion of why we subtract
1394 * mindiff from the computed average diff of the nonsimilar pixels
1395 * to get the 'avediff' returned by that function.
1396 * (5) If there is a colormap, it is removed and the result
1397 * is either gray or RGB depending on the colormap.
1398 * (6) If RGB, the maximum difference between pixel components is
1399 * saved in the histogram.
1400 * </pre>
1401 */
1402 l_ok
1403 pixTestForSimilarity(PIX *pix1,
1404 PIX *pix2,
1405 l_int32 factor,
1406 l_int32 mindiff,
1407 l_float32 maxfract,
1408 l_float32 maxave,
1409 l_int32 *psimilar,
1410 l_int32 details)
1411 {
1412 l_float32 fractdiff, avediff;
1413
1414 if (!psimilar)
1415 return ERROR_INT("&similar not defined", __func__, 1);
1416 *psimilar = 0;
1417 if (!pix1)
1418 return ERROR_INT("pix1 not defined", __func__, 1);
1419 if (!pix2)
1420 return ERROR_INT("pix2 not defined", __func__, 1);
1421 if (pixSizesEqual(pix1, pix2) == 0)
1422 return ERROR_INT("pix sizes not equal", __func__, 1);
1423 if (mindiff <= 0)
1424 return ERROR_INT("mindiff must be > 0", __func__, 1);
1425
1426 if (pixGetDifferenceStats(pix1, pix2, factor, mindiff,
1427 &fractdiff, &avediff, details))
1428 return ERROR_INT("diff stats not found", __func__, 1);
1429
1430 if (maxave <= 0.0) maxave = 256.0;
1431 if (fractdiff <= maxfract && avediff <= maxave)
1432 *psimilar = 1;
1433 return 0;
1434 }
1435
1436
1437 /*!
1438 * \brief pixGetDifferenceStats()
1439 *
1440 * \param[in] pix1 8 bpp gray or 32 bpp rgb, or colormapped
1441 * \param[in] pix2 8 bpp gray or 32 bpp rgb, or colormapped
1442 * \param[in] factor subsampling factor; use 0 or 1 for no subsampling
1443 * \param[in] mindiff minimum pixel difference to be counted; > 0
1444 * \param[out] pfractdiff fraction of pixels with diff greater than or
1445 * equal to mindiff
1446 * \param[out] pavediff average difference of pixels with diff greater
1447 * than or equal to mindiff, less mindiff
1448 * \param[in] details use 1 to give normalized histogram and other data
1449 * \return 0 if OK, 1 on error
1450 *
1451 * <pre>
1452 * Notes:
1453 * (1) This takes a threshold %mindiff and describes the difference
1454 * between two images in terms of two numbers:
1455 * (a) the fraction of pixels, %fractdiff, whose difference
1456 * equals or exceeds the threshold %mindiff, and
1457 * (b) the average value %avediff of the difference in pixel value
1458 * for the pixels in the set given by (a), after you subtract
1459 * %mindiff. The reason for subtracting %mindiff is that
1460 * you then get a useful measure for the rate of falloff
1461 * of the distribution for larger differences. For example,
1462 * if %mindiff = 10 and you find that %avediff = 2.5, it
1463 * says that of the pixels with diff > 10, the average of
1464 * their diffs is just mindiff + 2.5 = 12.5. This is a
1465 * fast falloff in the histogram with increasing difference.
1466 * (2) The two images are aligned at the UL corner, and do not
1467 * need to be the same size. If they are not the same size,
1468 * the comparison will be made over overlapping pixels.
1469 * (3) If there is a colormap, it is removed and the result
1470 * is either gray or RGB depending on the colormap.
1471 * (4) If RGB, the maximum difference between pixel components is
1472 * saved in the histogram.
1473 * (5) Set %details == 1 to see the difference histogram and get
1474 * an output that shows for each value of %mindiff, what are the
1475 * minimum values required for fractdiff and avediff in order
1476 * that the two pix will be considered similar.
1477 * </pre>
1478 */
1479 l_ok
1480 pixGetDifferenceStats(PIX *pix1,
1481 PIX *pix2,
1482 l_int32 factor,
1483 l_int32 mindiff,
1484 l_float32 *pfractdiff,
1485 l_float32 *pavediff,
1486 l_int32 details)
1487 {
1488 l_int32 i, first, last, diff;
1489 l_float32 fract, ave;
1490 l_float32 *array;
1491 NUMA *nah, *nan, *nac;
1492
1493 if (pfractdiff) *pfractdiff = 0.0;
1494 if (pavediff) *pavediff = 0.0;
1495 if (!pfractdiff)
1496 return ERROR_INT("&fractdiff not defined", __func__, 1);
1497 if (!pavediff)
1498 return ERROR_INT("&avediff not defined", __func__, 1);
1499 if (!pix1)
1500 return ERROR_INT("pix1 not defined", __func__, 1);
1501 if (!pix2)
1502 return ERROR_INT("pix2 not defined", __func__, 1);
1503 if (mindiff <= 0)
1504 return ERROR_INT("mindiff must be > 0", __func__, 1);
1505
1506 if ((nah = pixGetDifferenceHistogram(pix1, pix2, factor)) == NULL)
1507 return ERROR_INT("na not made", __func__, 1);
1508
1509 if ((nan = numaNormalizeHistogram(nah, 1.0)) == NULL) {
1510 numaDestroy(&nah);
1511 return ERROR_INT("nan not made", __func__, 1);
1512 }
1513 array = numaGetFArray(nan, L_NOCOPY);
1514
1515 if (details) {
1516 lept_mkdir("lept/comp");
1517 numaGetNonzeroRange(nan, 0.0, &first, &last);
1518 nac = numaClipToInterval(nan, first, last);
1519 gplotSimple1(nac, GPLOT_PNG, "/tmp/lept/comp/histo",
1520 "Difference histogram");
1521 l_fileDisplay("/tmp/lept/comp/histo.png", 500, 0, 1.0);
1522 lept_stderr("\nNonzero values in normalized histogram:");
1523 numaWriteStderr(nac);
1524 numaDestroy(&nac);
1525 lept_stderr(" Mindiff fractdiff avediff\n");
1526 lept_stderr(" -----------------------------------\n");
1527 for (diff = 1; diff < L_MIN(2 * mindiff, last); diff++) {
1528 fract = 0.0;
1529 ave = 0.0;
1530 for (i = diff; i <= last; i++) {
1531 fract += array[i];
1532 ave += (l_float32)i * array[i];
1533 }
1534 ave = (fract == 0.0) ? 0.0 : ave / fract;
1535 ave -= diff;
1536 lept_stderr("%5d %7.4f %7.4f\n",
1537 diff, fract, ave);
1538 }
1539 lept_stderr(" -----------------------------------\n");
1540 }
1541
1542 fract = 0.0;
1543 ave = 0.0;
1544 for (i = mindiff; i < 256; i++) {
1545 fract += array[i];
1546 ave += (l_float32)i * array[i];
1547 }
1548 ave = (fract == 0.0) ? 0.0 : ave / fract;
1549 ave -= mindiff;
1550
1551 *pfractdiff = fract;
1552 *pavediff = ave;
1553
1554 numaDestroy(&nah);
1555 numaDestroy(&nan);
1556 return 0;
1557 }
1558
1559
1560 /*!
1561 * \brief pixGetDifferenceHistogram()
1562 *
1563 * \param[in] pix1 8 bpp gray or 32 bpp rgb, or colormapped
1564 * \param[in] pix2 8 bpp gray or 32 bpp rgb, or colormapped
1565 * \param[in] factor subsampling factor; use 0 or 1 for no subsampling
1566 * \return na Numa of histogram of differences, or NULL on error
1567 *
1568 * <pre>
1569 * Notes:
1570 * (1) The two images are aligned at the UL corner, and do not
1571 * need to be the same size. If they are not the same size,
1572 * the comparison will be made over overlapping pixels.
1573 * (2) If there is a colormap, it is removed and the result
1574 * is either gray or RGB depending on the colormap.
1575 * (3) If RGB, the maximum difference between pixel components is
1576 * saved in the histogram.
1577 * </pre>
1578 */
1579 NUMA *
1580 pixGetDifferenceHistogram(PIX *pix1,
1581 PIX *pix2,
1582 l_int32 factor)
1583 {
1584 l_int32 w1, h1, d1, w2, h2, d2, w, h, wpl1, wpl2;
1585 l_int32 i, j, val, val1, val2;
1586 l_int32 rval1, rval2, gval1, gval2, bval1, bval2;
1587 l_int32 rdiff, gdiff, bdiff, maxdiff;
1588 l_uint32 *data1, *data2, *line1, *line2;
1589 l_float32 *array;
1590 NUMA *na;
1591 PIX *pixt1, *pixt2;
1592
1593 if (!pix1)
1594 return (NUMA *)ERROR_PTR("pix1 not defined", __func__, NULL);
1595 if (!pix2)
1596 return (NUMA *)ERROR_PTR("pix2 not defined", __func__, NULL);
1597 d1 = pixGetDepth(pix1);
1598 d2 = pixGetDepth(pix2);
1599 if (d1 == 16 || d2 == 16)
1600 return (NUMA *)ERROR_PTR("d == 16 not supported", __func__, NULL);
1601 if (d1 < 8 && !pixGetColormap(pix1))
1602 return (NUMA *)ERROR_PTR("pix1 depth < 8 bpp and not cmapped",
1603 __func__, NULL);
1604 if (d2 < 8 && !pixGetColormap(pix2))
1605 return (NUMA *)ERROR_PTR("pix2 depth < 8 bpp and not cmapped",
1606 __func__, NULL);
1607 pixt1 = pixRemoveColormap(pix1, REMOVE_CMAP_BASED_ON_SRC);
1608 pixt2 = pixRemoveColormap(pix2, REMOVE_CMAP_BASED_ON_SRC);
1609 pixGetDimensions(pixt1, &w1, &h1, &d1);
1610 pixGetDimensions(pixt2, &w2, &h2, &d2);
1611 if (d1 != d2) {
1612 pixDestroy(&pixt1);
1613 pixDestroy(&pixt2);
1614 return (NUMA *)ERROR_PTR("pix depths not equal", __func__, NULL);
1615 }
1616 if (factor < 1) factor = 1;
1617
1618 na = numaCreate(256);
1619 numaSetCount(na, 256); /* all initialized to 0.0 */
1620 array = numaGetFArray(na, L_NOCOPY);
1621 w = L_MIN(w1, w2);
1622 h = L_MIN(h1, h2);
1623 data1 = pixGetData(pixt1);
1624 data2 = pixGetData(pixt2);
1625 wpl1 = pixGetWpl(pixt1);
1626 wpl2 = pixGetWpl(pixt2);
1627 if (d1 == 8) {
1628 for (i = 0; i < h; i += factor) {
1629 line1 = data1 + i * wpl1;
1630 line2 = data2 + i * wpl2;
1631 for (j = 0; j < w; j += factor) {
1632 val1 = GET_DATA_BYTE(line1, j);
1633 val2 = GET_DATA_BYTE(line2, j);
1634 val = L_ABS(val1 - val2);
1635 array[val]++;
1636 }
1637 }
1638 } else { /* d1 == 32 */
1639 for (i = 0; i < h; i += factor) {
1640 line1 = data1 + i * wpl1;
1641 line2 = data2 + i * wpl2;
1642 for (j = 0; j < w; j += factor) {
1643 extractRGBValues(line1[j], &rval1, &gval1, &bval1);
1644 extractRGBValues(line2[j], &rval2, &gval2, &bval2);
1645 rdiff = L_ABS(rval1 - rval2);
1646 gdiff = L_ABS(gval1 - gval2);
1647 bdiff = L_ABS(bval1 - bval2);
1648 maxdiff = L_MAX(rdiff, gdiff);
1649 maxdiff = L_MAX(maxdiff, bdiff);
1650 array[maxdiff]++;
1651 }
1652 }
1653 }
1654
1655 pixDestroy(&pixt1);
1656 pixDestroy(&pixt2);
1657 return na;
1658 }
1659
1660
1661 /*!
1662 * \brief pixGetPerceptualDiff()
1663 *
1664 * \param[in] pixs1 8 bpp gray or 32 bpp rgb, or colormapped
1665 * \param[in] pixs2 8 bpp gray or 32 bpp rgb, or colormapped
1666 * \param[in] sampling subsampling factor; use 0 or 1 for no subsampling
1667 * \param[in] dilation size of grayscale or color Sel; odd
1668 * \param[in] mindiff minimum pixel difference to be counted; > 0
1669 * \param[out] pfract fraction of pixels with diff greater than mindiff
1670 * \param[out] ppixdiff1 [optional] showing difference (gray or color)
1671 * \param[out] ppixdiff2 [optional] showing pixels of sufficient diff
1672 * \return 0 if OK, 1 on error
1673 *
1674 * <pre>
1675 * Notes:
1676 * (1) This takes 2 pix and determines, using 2 input parameters:
1677 * * %dilation specifies the amount of grayscale or color
1678 * dilation to apply to the images, to compensate for
1679 * a small amount of misregistration. A typical number might
1680 * be 5, which uses a 5x5 Sel. Grayscale dilation expands
1681 * lighter pixels into darker pixel regions.
1682 * * %mindiff determines the threshold on the difference in
1683 * pixel values to be counted -- two pixels are not similar
1684 * if their difference in value is at least %mindiff. For
1685 * color pixels, we use the maximum component difference.
1686 * (2) The pixelwise comparison is always done with the UL corners
1687 * aligned. The sizes of pix1 and pix2 need not be the same,
1688 * although in practice it can be useful to scale to the same size.
1689 * (3) If there is a colormap, it is removed and the result
1690 * is either gray or RGB depending on the colormap.
1691 * (4) Two optional diff images can be retrieved (typ. for debugging):
1692 * pixdiff1: the gray or color difference
1693 * pixdiff2: thresholded to 1 bpp for pixels exceeding %mindiff
1694 * (5) The returned value of fract can be compared to some threshold,
1695 * which is application dependent.
1696 * (6) This method is in analogy to the two-sided hausdorff transform,
1697 * except here it is for d > 1. For d == 1 (see pixRankHaustest()),
1698 * we verify that when one pix1 is dilated, it covers at least a
1699 * given fraction of the pixels in pix2, and v.v.; in that
1700 * case, the two pix are sufficiently similar. Here, we
1701 * do an analogous thing: subtract the dilated pix1 from pix2 to
1702 * get a 1-sided hausdorff-like transform. Then do it the
1703 * other way. Take the component-wise max of the two results,
1704 * and threshold to get the fraction of pixels with a difference
1705 * below the threshold.
1706 * </pre>
1707 */
1708 l_ok
1709 pixGetPerceptualDiff(PIX *pixs1,
1710 PIX *pixs2,
1711 l_int32 sampling,
1712 l_int32 dilation,
1713 l_int32 mindiff,
1714 l_float32 *pfract,
1715 PIX **ppixdiff1,
1716 PIX **ppixdiff2)
1717 {
1718 l_int32 d1, d2, w, h, count;
1719 PIX *pix1, *pix2, *pix3, *pix4, *pix5, *pix6, *pix7, *pix8, *pix9;
1720 PIX *pix10, *pix11;
1721
1722 if (ppixdiff1) *ppixdiff1 = NULL;
1723 if (ppixdiff2) *ppixdiff2 = NULL;
1724 if (!pfract)
1725 return ERROR_INT("&fract not defined", __func__, 1);
1726 *pfract = 1.0; /* init to completely different */
1727 if ((dilation & 1) == 0)
1728 return ERROR_INT("dilation must be odd", __func__, 1);
1729 if (!pixs1)
1730 return ERROR_INT("pixs1 not defined", __func__, 1);
1731 if (!pixs2)
1732 return ERROR_INT("pixs2 not defined", __func__, 1);
1733 d1 = pixGetDepth(pixs1);
1734 d2 = pixGetDepth(pixs2);
1735 if (!pixGetColormap(pixs1) && d1 < 8)
1736 return ERROR_INT("pixs1 not cmapped and < 8 bpp", __func__, 1);
1737 if (!pixGetColormap(pixs2) && d2 < 8)
1738 return ERROR_INT("pixs2 not cmapped and < 8 bpp", __func__, 1);
1739
1740 /* Integer downsample if requested */
1741 if (sampling > 1) {
1742 pix1 = pixScaleByIntSampling(pixs1, sampling);
1743 pix2 = pixScaleByIntSampling(pixs2, sampling);
1744 } else {
1745 pix1 = pixClone(pixs1);
1746 pix2 = pixClone(pixs2);
1747 }
1748
1749 /* Remove colormaps */
1750 if (pixGetColormap(pix1)) {
1751 pix3 = pixRemoveColormap(pix1, REMOVE_CMAP_BASED_ON_SRC);
1752 d1 = pixGetDepth(pix3);
1753 } else {
1754 pix3 = pixClone(pix1);
1755 }
1756 if (pixGetColormap(pix2)) {
1757 pix4 = pixRemoveColormap(pix2, REMOVE_CMAP_BASED_ON_SRC);
1758 d2 = pixGetDepth(pix4);
1759 } else {
1760 pix4 = pixClone(pix2);
1761 }
1762 pixDestroy(&pix1);
1763 pixDestroy(&pix2);
1764 if (d1 != d2 || (d1 != 8 && d1 != 32)) {
1765 pixDestroy(&pix3);
1766 pixDestroy(&pix4);
1767 L_INFO("depths unequal or not in {8,32}: d1 = %d, d2 = %d\n",
1768 __func__, d1, d2);
1769 return 1;
1770 }
1771
1772 /* In each direction, do a small dilation and subtract the dilated
1773 * image from the other image to get a one-sided difference.
1774 * Then take the max of the differences for each direction
1775 * and clipping each component to 255 if necessary. Note that
1776 * for RGB images, the dilations and max selection are done
1777 * component-wise, and the conversion to grayscale also uses the
1778 * maximum component. The resulting grayscale images are
1779 * thresholded using %mindiff. */
1780 if (d1 == 8) {
1781 pix5 = pixDilateGray(pix3, dilation, dilation);
1782 pixCompareGray(pix4, pix5, L_COMPARE_SUBTRACT, 0, NULL, NULL, NULL,
1783 &pix7);
1784 pix6 = pixDilateGray(pix4, dilation, dilation);
1785 pixCompareGray(pix3, pix6, L_COMPARE_SUBTRACT, 0, NULL, NULL, NULL,
1786 &pix8);
1787 pix9 = pixMinOrMax(NULL, pix7, pix8, L_CHOOSE_MAX);
1788 pix10 = pixThresholdToBinary(pix9, mindiff);
1789 pixInvert(pix10, pix10);
1790 pixCountPixels(pix10, &count, NULL);
1791 pixGetDimensions(pix10, &w, &h, NULL);
1792 *pfract = (w <= 0 || h <= 0) ? 0.0 :
1793 (l_float32)count / (l_float32)(w * h);
1794 pixDestroy(&pix5);
1795 pixDestroy(&pix6);
1796 pixDestroy(&pix7);
1797 pixDestroy(&pix8);
1798 if (ppixdiff1)
1799 *ppixdiff1 = pix9;
1800 else
1801 pixDestroy(&pix9);
1802 if (ppixdiff2)
1803 *ppixdiff2 = pix10;
1804 else
1805 pixDestroy(&pix10);
1806 } else { /* d1 == 32 */
1807 pix5 = pixColorMorph(pix3, L_MORPH_DILATE, dilation, dilation);
1808 pixCompareRGB(pix4, pix5, L_COMPARE_SUBTRACT, 0, NULL, NULL, NULL,
1809 &pix7);
1810 pix6 = pixColorMorph(pix4, L_MORPH_DILATE, dilation, dilation);
1811 pixCompareRGB(pix3, pix6, L_COMPARE_SUBTRACT, 0, NULL, NULL, NULL,
1812 &pix8);
1813 pix9 = pixMinOrMax(NULL, pix7, pix8, L_CHOOSE_MAX);
1814 pix10 = pixConvertRGBToGrayMinMax(pix9, L_CHOOSE_MAX);
1815 pix11 = pixThresholdToBinary(pix10, mindiff);
1816 pixInvert(pix11, pix11);
1817 pixCountPixels(pix11, &count, NULL);
1818 pixGetDimensions(pix11, &w, &h, NULL);
1819 *pfract = (w <= 0 || h <= 0) ? 0.0 :
1820 (l_float32)count / (l_float32)(w * h);
1821 pixDestroy(&pix5);
1822 pixDestroy(&pix6);
1823 pixDestroy(&pix7);
1824 pixDestroy(&pix8);
1825 pixDestroy(&pix10);
1826 if (ppixdiff1)
1827 *ppixdiff1 = pix9;
1828 else
1829 pixDestroy(&pix9);
1830 if (ppixdiff2)
1831 *ppixdiff2 = pix11;
1832 else
1833 pixDestroy(&pix11);
1834
1835 }
1836 pixDestroy(&pix3);
1837 pixDestroy(&pix4);
1838 return 0;
1839 }
1840
1841
1842 /*!
1843 * \brief pixGetPSNR()
1844 *
1845 * \param[in] pix1, pix2 8 or 32 bpp; no colormap
1846 * \param[in] factor sampling factor; >= 1
1847 * \param[out] ppsnr power signal/noise ratio difference
1848 * \return 0 if OK, 1 on error
1849 *
1850 * <pre>
1851 * Notes:
1852 * (1) This computes the power S/N ratio, in dB, for the difference
1853 * between two images. By convention, the power S/N
1854 * for a grayscale image is ('log' == log base 10,
1855 * and 'ln == log base e):
1856 * PSNR = 10 * log((255/MSE)^2)
1857 * = 4.3429 * ln((255/MSE)^2)
1858 * = -4.3429 * ln((MSE/255)^2)
1859 * where MSE is the mean squared error.
1860 * Here are some examples:
1861 * MSE PSNR
1862 * --- ----
1863 * 10 28.1
1864 * 3 38.6
1865 * 1 48.1
1866 * 0.1 68.1
1867 * (2) If pix1 and pix2 have the same pixel values, the MSE = 0.0
1868 * and the PSNR is infinity. For that case, this returns
1869 * PSNR = 1000, which corresponds to the very small MSE of
1870 * about 10^(-48).
1871 * </pre>
1872 */
1873 l_ok
1874 pixGetPSNR(PIX *pix1,
1875 PIX *pix2,
1876 l_int32 factor,
1877 l_float32 *ppsnr)
1878 {
1879 l_int32 same, i, j, w, h, d, wpl1, wpl2, v1, v2, r1, g1, b1, r2, g2, b2;
1880 l_uint32 *data1, *data2, *line1, *line2;
1881 l_float32 mse; /* mean squared error */
1882
1883 if (!ppsnr)
1884 return ERROR_INT("&psnr not defined", __func__, 1);
1885 *ppsnr = 0.0;
1886 if (!pix1 || !pix2)
1887 return ERROR_INT("empty input pix", __func__, 1);
1888 if (!pixSizesEqual(pix1, pix2))
1889 return ERROR_INT("pix sizes unequal", __func__, 1);
1890 if (pixGetColormap(pix1))
1891 return ERROR_INT("pix1 has colormap", __func__, 1);
1892 if (pixGetColormap(pix2))
1893 return ERROR_INT("pix2 has colormap", __func__, 1);
1894 pixGetDimensions(pix1, &w, &h, &d);
1895 if (d != 8 && d != 32)
1896 return ERROR_INT("pix not 8 or 32 bpp", __func__, 1);
1897 if (factor < 1)
1898 return ERROR_INT("invalid sampling factor", __func__, 1);
1899
1900 pixEqual(pix1, pix2, &same);
1901 if (same) {
1902 *ppsnr = 1000.0; /* crazy big exponent */
1903 return 0;
1904 }
1905
1906 data1 = pixGetData(pix1);
1907 data2 = pixGetData(pix2);
1908 wpl1 = pixGetWpl(pix1);
1909 wpl2 = pixGetWpl(pix2);
1910 mse = 0.0;
1911 if (d == 8) {
1912 for (i = 0; i < h; i += factor) {
1913 line1 = data1 + i * wpl1;
1914 line2 = data2 + i * wpl2;
1915 for (j = 0; j < w; j += factor) {
1916 v1 = GET_DATA_BYTE(line1, j);
1917 v2 = GET_DATA_BYTE(line2, j);
1918 mse += (l_float32)(v1 - v2) * (v1 - v2);
1919 }
1920 }
1921 } else { /* d == 32 */
1922 for (i = 0; i < h; i += factor) {
1923 line1 = data1 + i * wpl1;
1924 line2 = data2 + i * wpl2;
1925 for (j = 0; j < w; j += factor) {
1926 extractRGBValues(line1[j], &r1, &g1, &b1);
1927 extractRGBValues(line2[j], &r2, &g2, &b2);
1928 mse += ((l_float32)(r1 - r2) * (r1 - r2) +
1929 (g1 - g2) * (g1 - g2) +
1930 (b1 - b2) * (b1 - b2)) / 3.0;
1931 }
1932 }
1933 }
1934 mse = mse / ((l_float32)(w) * h);
1935
1936 *ppsnr = -4.3429448 * log(mse / (255 * 255));
1937 return 0;
1938 }
1939
1940
1941 /*------------------------------------------------------------------*
1942 * Comparison of photo regions by histogram *
1943 *------------------------------------------------------------------*/
1944 /*!
1945 * \brief pixaComparePhotoRegionsByHisto()
1946 *
1947 * \param[in] pixa any depth; colormap OK
1948 * \param[in] minratio requiring sizes be compatible; < 1.0
1949 * \param[in] textthresh threshold for text/photo; use 0 for default
1950 * \param[in] factor subsampling; >= 1
1951 * \param[in] n in range {1, ... 7}. n^2 is the maximum number
1952 * of subregions for histograms; typ. n = 3.
1953 * \param[in] simthresh threshold for similarity; use 0 for default
1954 * \param[out] pnai array giving similarity class indices
1955 * \param[out] pscores [optional] score matrix as 1-D array of size N^2
1956 * \param[out] ppixd [optional] pix of similarity classes
1957 * \param[in] debug 1 to output histograms; 0 otherwise
1958 * \return 0 if OK, 1 on error
1959 *
1960 * <pre>
1961 * Notes:
1962 * (1) This function takes a pixa of cropped photo images and
1963 * compares each one to the others for similarity.
1964 * Each image is first tested to see if it is a photo that can
1965 * be compared by tiled histograms. If so, it is padded to put
1966 * the centroid in the center of the image, and the histograms
1967 * are generated. The final step of comparing each histogram
1968 * with all the others is very fast.
1969 * (2) To make the histograms, each image is subdivided in a maximum
1970 * of n^2 subimages. The parameter %n specifies the "side" of
1971 * an n x n grid of such subimages. If the subimages have an
1972 * aspect ratio larger than 2, the grid will change, again using n^2
1973 * as a maximum for the number of subimages. For example,
1974 * if n == 3, but the image is 600 x 200 pixels, a 3x3 grid
1975 * would have subimages of 200 x 67 pixels, which is more
1976 * than 2:1, so we change to a 4x2 grid where each subimage
1977 * has 150 x 100 pixels.
1978 * (3) An initial filter gives %score = 0 if the ratio of widths
1979 * and heights (smallest / largest) does not exceed a
1980 * threshold %minratio. If set at 1.0, both images must be
1981 * exactly the same size. A typical value for %minratio is 0.9.
1982 * (4) The comparison score between two images is a value in [0.0 .. 1.0].
1983 * If the comparison score >= %simthresh, the images are placed in
1984 * the same similarity class. Default value for %simthresh is 0.25.
1985 * (5) An array %nai of similarity class indices for pix in the
1986 * input pixa is returned.
1987 * (6) There are two debugging options:
1988 * * An optional 2D matrix of scores is returned as a 1D array.
1989 * A visualization of this is written to a temp file.
1990 * * An optional pix showing the similarity classes can be
1991 * returned. Text in each input pix is reproduced.
1992 * (7) See the notes in pixComparePhotoRegionsByHisto() for details
1993 * on the implementation.
1994 * </pre>
1995 */
1996 l_ok
1997 pixaComparePhotoRegionsByHisto(PIXA *pixa,
1998 l_float32 minratio,
1999 l_float32 textthresh,
2000 l_int32 factor,
2001 l_int32 n,
2002 l_float32 simthresh,
2003 NUMA **pnai,
2004 l_float32 **pscores,
2005 PIX **ppixd,
2006 l_int32 debug)
2007 {
2008 char *text;
2009 l_int32 i, j, nim, w, h, w1, h1, w2, h2, ival, index, classid;
2010 l_float32 score;
2011 l_float32 *scores;
2012 NUMA *nai, *naw, *nah;
2013 NUMAA *naa;
2014 NUMAA **n3a; /* array of naa */
2015 PIX *pix;
2016
2017 if (pscores) *pscores = NULL;
2018 if (ppixd) *ppixd = NULL;
2019 if (!pnai)
2020 return ERROR_INT("&na not defined", __func__, 1);
2021 *pnai = NULL;
2022 if (!pixa)
2023 return ERROR_INT("pixa not defined", __func__, 1);
2024 if (minratio < 0.0 || minratio > 1.0)
2025 return ERROR_INT("minratio not in [0.0 ... 1.0]", __func__, 1);
2026 if (textthresh <= 0.0) textthresh = 1.3f;
2027 if (factor < 1)
2028 return ERROR_INT("subsampling factor must be >= 1", __func__, 1);
2029 if (n < 1 || n > 7) {
2030 L_WARNING("n = %d is invalid; setting to 4\n", __func__, n);
2031 n = 4;
2032 }
2033 if (simthresh <= 0.0) simthresh = 0.25;
2034 if (simthresh > 1.0)
2035 return ERROR_INT("simthresh invalid; should be near 0.25", __func__, 1);
2036
2037 /* Prepare the histograms */
2038 nim = pixaGetCount(pixa);
2039 if ((n3a = (NUMAA **)LEPT_CALLOC(nim, sizeof(NUMAA *))) == NULL)
2040 return ERROR_INT("calloc fail for n3a", __func__, 1);
2041 naw = numaCreate(0);
2042 nah = numaCreate(0);
2043 for (i = 0; i < nim; i++) {
2044 pix = pixaGetPix(pixa, i, L_CLONE);
2045 text = pixGetText(pix);
2046 pixSetResolution(pix, 150, 150);
2047 index = (debug) ? i : 0;
2048 pixGenPhotoHistos(pix, NULL, factor, textthresh, n,
2049 &naa, &w, &h, index);
2050 n3a[i] = naa;
2051 numaAddNumber(naw, w);
2052 numaAddNumber(nah, h);
2053 if (naa)
2054 lept_stderr("Image %s is photo\n", text);
2055 else
2056 lept_stderr("Image %s is NOT photo\n", text);
2057 pixDestroy(&pix);
2058 }
2059
2060 /* Do the comparisons. We are making a set of classes, where
2061 * all similar images are placed in the same class. There are
2062 * 'nim' input images. The classes are labeled by 'classid' (all
2063 * similar images get the same 'classid' value), and 'nai' maps
2064 * the classid of the image in the input array to the classid
2065 * of the similarity class. */
2066 if ((scores =
2067 (l_float32 *)LEPT_CALLOC((size_t)nim * nim, sizeof(l_float32)))
2068 == NULL) {
2069 L_ERROR("calloc fail for scores\n", __func__);
2070 goto cleanup;
2071 }
2072 nai = numaMakeConstant(-1, nim); /* classid array */
2073 for (i = 0, classid = 0; i < nim; i++) {
2074 scores[nim * i + i] = 1.0;
2075 numaGetIValue(nai, i, &ival);
2076 if (ival != -1) /* already set */
2077 continue;
2078 numaSetValue(nai, i, classid);
2079 if (n3a[i] == NULL) { /* not a photo */
2080 classid++;
2081 continue;
2082 }
2083 numaGetIValue(naw, i, &w1);
2084 numaGetIValue(nah, i, &h1);
2085 for (j = i + 1; j < nim; j++) {
2086 numaGetIValue(nai, j, &ival);
2087 if (ival != -1) /* already set */
2088 continue;
2089 if (n3a[j] == NULL) /* not a photo */
2090 continue;
2091 numaGetIValue(naw, j, &w2);
2092 numaGetIValue(nah, j, &h2);
2093 compareTilesByHisto(n3a[i], n3a[j], minratio, w1, h1, w2, h2,
2094 &score, NULL);
2095 scores[nim * i + j] = score;
2096 scores[nim * j + i] = score; /* the score array is symmetric */
2097 /* lept_stderr("score = %5.3f\n", score); */
2098 if (score > simthresh) {
2099 numaSetValue(nai, j, classid);
2100 lept_stderr(
2101 "Setting %d similar to %d, in class %d; score %5.3f\n",
2102 j, i, classid, score);
2103 }
2104 }
2105 classid++;
2106 }
2107 *pnai = nai;
2108
2109 /* Debug: optionally save and display the score array.
2110 * All images that are photos are represented by a point on
2111 * the diagonal. Other images in the same similarity class
2112 * are on the same horizontal raster line to the right.
2113 * The array has been symmetrized, so images in the same
2114 * same similarity class also appear on the same column below. */
2115 if (pscores) {
2116 l_int32 wpl, fact;
2117 l_uint32 *line, *data;
2118 PIX *pix2, *pix3;
2119 pix2 = pixCreate(nim, nim, 8);
2120 data = pixGetData(pix2);
2121 wpl = pixGetWpl(pix2);
2122 for (i = 0; i < nim; i++) {
2123 line = data + i * wpl;
2124 for (j = 0; j < nim; j++) {
2125 SET_DATA_BYTE(line, j,
2126 L_MIN(255, 4.0 * 255 * scores[nim * i + j]));
2127 }
2128 }
2129 fact = L_MAX(2, 1000 / nim);
2130 pix3 = pixExpandReplicate(pix2, fact);
2131 lept_stderr("Writing to /tmp/lept/comp/scorearray.png\n");
2132 lept_mkdir("lept/comp");
2133 pixWrite("/tmp/lept/comp/scorearray.png", pix3, IFF_PNG);
2134 pixDestroy(&pix2);
2135 pixDestroy(&pix3);
2136 *pscores = scores;
2137 } else {
2138 LEPT_FREE(scores);
2139 }
2140
2141 /* Debug: optionally display and save the image comparisons.
2142 * Image similarity classes are displayed by column; similar
2143 * images are displayed in the same column. */
2144 if (ppixd)
2145 *ppixd = pixaDisplayTiledByIndex(pixa, nai, 200, 20, 2, 6, 0x0000ff00);
2146
2147 cleanup:
2148 numaDestroy(&naw);
2149 numaDestroy(&nah);
2150 for (i = 0; i < nim; i++)
2151 numaaDestroy(&n3a[i]);
2152 LEPT_FREE(n3a);
2153 return 0;
2154 }
2155
2156
2157 /*!
2158 * \brief pixComparePhotoRegionsByHisto()
2159 *
2160 * \param[in] pix1, pix2 any depth; colormap OK
2161 * \param[in] box1, box2 [optional] photo regions from each; can be null
2162 * \param[in] minratio requiring sizes be compatible; < 1.0
2163 * \param[in] factor subsampling factor; >= 1
2164 * \param[in] n in range {1, ... 7}. n^2 is the maximum number
2165 * of subregions for histograms; typ. n = 3.
2166 * \param[out] pscore similarity score of histograms
2167 * \param[in] debugflag 1 for debug output; 0 for no debugging
2168 * \return 0 if OK, 1 on error
2169 *
2170 * <pre>
2171 * Notes:
2172 * (1) This function compares two grayscale photo regions. If a
2173 * box is given, the region is clipped; otherwise assume
2174 * the entire images are photo regions. This is done with a
2175 * set of not more than n^2 spatially aligned histograms, which are
2176 * aligned using the centroid of the inverse image.
2177 * (2) The parameter %n specifies the "side" of an n x n grid
2178 * of subimages. If the subimages have an aspect ratio larger
2179 * than 2, the grid will change, using n^2 as a maximum for
2180 * the number of subimages. For example, if n == 3, but the
2181 * image is 600 x 200 pixels, a 3x3 grid would have subimages
2182 * of 200 x 67 pixels, which is more than 2:1, so we change
2183 * to a 4x2 grid where each subimage has 150 x 100 pixels.
2184 * (3) An initial filter gives %score = 0 if the ratio of widths
2185 * and heights (smallest / largest) does not exceed a
2186 * threshold %minratio. This must be between 0.5 and 1.0.
2187 * If set at 1.0, both images must be exactly the same size.
2188 * A typical value for %minratio is 0.9.
2189 * (4) Because this function should not be used on text or
2190 * line graphics, which can give false positive results
2191 * (i.e., high scores for different images), filter the images
2192 * using pixGenPhotoHistos(), which returns tiled histograms
2193 * only if an image is not text and comparison is expected
2194 * to work with histograms. If either image fails the test,
2195 * the comparison returns a score of 0.0.
2196 * (5) The white value counts in the histograms are removed; they
2197 * are typically pixels that were padded to achieve alignment.
2198 * (6) For an efficient representation of the histogram, normalize
2199 * using a multiplicative factor so that the number in the
2200 * maximum bucket is 255. It then takes 256 bytes to store.
2201 * (7) When comparing the histograms of two regions, use the
2202 * Earth Mover distance (EMD), with the histograms normalized
2203 * so that the sum over bins is the same. Further normalize
2204 * by dividing by 255, so that the result is in [0.0 ... 1.0].
2205 * (8) Get a similarity score S = 1.0 - k * D, where
2206 * k is a constant, say in the range 5-10
2207 * D = normalized EMD
2208 * and for multiple tiles, take the Min(S) to be the final score.
2209 * Using aligned tiles gives protection against accidental
2210 * similarity of the overall grayscale histograms.
2211 * A small number of aligned tiles works well.
2212 * (9) With debug on, you get a pdf that shows, for each tile,
2213 * the images, histograms and score.
2214 * </pre>
2215 */
2216 l_ok
2217 pixComparePhotoRegionsByHisto(PIX *pix1,
2218 PIX *pix2,
2219 BOX *box1,
2220 BOX *box2,
2221 l_float32 minratio,
2222 l_int32 factor,
2223 l_int32 n,
2224 l_float32 *pscore,
2225 l_int32 debugflag)
2226 {
2227 l_int32 w1, h1, w2, h2, w1c, h1c, w2c, h2c, debugindex;
2228 l_float32 wratio, hratio;
2229 NUMAA *naa1, *naa2;
2230 PIX *pix3, *pix4;
2231 PIXA *pixa;
2232
2233 if (!pscore)
2234 return ERROR_INT("&score not defined", __func__, 1);
2235 *pscore = 0.0;
2236 if (!pix1 || !pix2)
2237 return ERROR_INT("pix1 and pix2 not both defined", __func__, 1);
2238 if (minratio < 0.5 || minratio > 1.0)
2239 return ERROR_INT("minratio not in [0.5 ... 1.0]", __func__, 1);
2240 if (factor < 1)
2241 return ERROR_INT("subsampling factor must be >= 1", __func__, 1);
2242 if (n < 1 || n > 7) {
2243 L_WARNING("n = %d is invalid; setting to 4\n", __func__, n);
2244 n = 4;
2245 }
2246
2247 debugindex = 0;
2248 if (debugflag) {
2249 lept_mkdir("lept/comp");
2250 debugindex = 666; /* arbitrary number used for naming output */
2251 }
2252
2253 /* Initial filter by size */
2254 if (box1)
2255 boxGetGeometry(box1, NULL, NULL, &w1, &h1);
2256 else
2257 pixGetDimensions(pix1, &w1, &h1, NULL);
2258 if (box2)
2259 boxGetGeometry(box2, NULL, NULL, &w2, &h2);
2260 else
2261 pixGetDimensions(pix1, &w2, &h2, NULL);
2262 wratio = (w1 < w2) ? (l_float32)w1 / (l_float32)w2 :
2263 (l_float32)w2 / (l_float32)w1;
2264 hratio = (h1 < h2) ? (l_float32)h1 / (l_float32)h2 :
2265 (l_float32)h2 / (l_float32)h1;
2266 if (wratio < minratio || hratio < minratio)
2267 return 0;
2268
2269 /* Initial crop, if necessary, and make histos */
2270 if (box1)
2271 pix3 = pixClipRectangle(pix1, box1, NULL);
2272 else
2273 pix3 = pixClone(pix1);
2274 pixGenPhotoHistos(pix3, NULL, factor, 0, n, &naa1, &w1c, &h1c, debugindex);
2275 pixDestroy(&pix3);
2276 if (!naa1) return 0;
2277 if (box2)
2278 pix4 = pixClipRectangle(pix2, box2, NULL);
2279 else
2280 pix4 = pixClone(pix2);
2281 pixGenPhotoHistos(pix4, NULL, factor, 0, n, &naa2, &w2c, &h2c, debugindex);
2282 pixDestroy(&pix4);
2283 if (!naa2) return 0;
2284
2285 /* Compare histograms */
2286 pixa = (debugflag) ? pixaCreate(0) : NULL;
2287 compareTilesByHisto(naa1, naa2, minratio, w1c, h1c, w2c, h2c, pscore, pixa);
2288 pixaDestroy(&pixa);
2289 return 0;
2290 }
2291
2292
2293 /*!
2294 * \brief pixGenPhotoHistos()
2295 *
2296 * \param[in] pixs depth > 1 bpp; colormap OK
2297 * \param[in] box [optional] region to be selected; can be null
2298 * \param[in] factor subsampling; >= 1
2299 * \param[in] thresh threshold for photo/text; use 0 for default
2300 * \param[in] n in range {1, ... 7}. n^2 is the maximum number
2301 * of subregions for histograms; typ. n = 3.
2302 * \param[out] pnaa nx * ny 256-entry gray histograms
2303 * \param[out] pw width of image used to make histograms
2304 * \param[out] ph height of image used to make histograms
2305 * \param[in] debugindex 0 for no debugging; positive integer otherwise
2306 * \return 0 if OK, 1 on error
2307 *
2308 * <pre>
2309 * Notes:
2310 * (1) This crops and converts to 8 bpp if necessary. It adds a
2311 * minimal white boundary such that the centroid of the
2312 * photo-inverted image is in the center. This allows
2313 * automatic alignment with histograms of other image regions.
2314 * (2) The parameter %n specifies the "side" of the n x n grid
2315 * of subimages. If the subimages have an aspect ratio larger
2316 * than 2, the grid will change, using n^2 as a maximum for
2317 * the number of subimages. For example, if n == 3, but the
2318 * image is 600 x 200 pixels, a 3x3 grid would have subimages
2319 * of 200 x 67 pixels, which is more than 2:1, so we change
2320 * to a 4x2 grid where each subimage has 150 x 100 pixels.
2321 * (3) The white value in the histogram is removed, because of
2322 * the padding.
2323 * (4) Use 0 for conservative default (1.3) for thresh.
2324 * (5) For an efficient representation of the histogram, normalize
2325 * using a multiplicative factor so that the number in the
2326 * maximum bucket is 255. It then takes 256 bytes to store.
2327 * (6) With %debugindex > 0, this makes a pdf that shows, for each tile,
2328 * the images and histograms.
2329 * </pre>
2330 */
2331 l_ok
2332 pixGenPhotoHistos(PIX *pixs,
2333 BOX *box,
2334 l_int32 factor,
2335 l_float32 thresh,
2336 l_int32 n,
2337 NUMAA **pnaa,
2338 l_int32 *pw,
2339 l_int32 *ph,
2340 l_int32 debugindex)
2341 {
2342 char buf[64];
2343 NUMAA *naa;
2344 PIX *pix1, *pix2, *pix3, *pixm;
2345 PIXA *pixa;
2346
2347 if (pnaa) *pnaa = NULL;
2348 if (pw) *pw = 0;
2349 if (ph) *ph = 0;
2350 if (!pnaa)
2351 return ERROR_INT("&naa not defined", __func__, 1);
2352 if (!pw || !ph)
2353 return ERROR_INT("&w and &h not both defined", __func__, 1);
2354 if (!pixs || pixGetDepth(pixs) == 1)
2355 return ERROR_INT("pixs not defined or 1 bpp", __func__, 1);
2356 if (factor < 1)
2357 return ERROR_INT("subsampling factor must be >= 1", __func__, 1);
2358 if (thresh <= 0.0) thresh = 1.3f; /* default */
2359 if (n < 1 || n > 7) {
2360 L_WARNING("n = %d is invalid; setting to 4\n", __func__, n);
2361 n = 4;
2362 }
2363
2364 pixa = NULL;
2365 if (debugindex > 0) {
2366 pixa = pixaCreate(0);
2367 lept_mkdir("lept/comp");
2368 }
2369
2370 /* Initial crop, if necessary */
2371 if (box)
2372 pix1 = pixClipRectangle(pixs, box, NULL);
2373 else
2374 pix1 = pixClone(pixs);
2375
2376 /* Convert to 8 bpp and pad to center the centroid */
2377 pix2 = pixConvertTo8(pix1, FALSE);
2378 pix3 = pixPadToCenterCentroid(pix2, factor);
2379
2380 /* Set to 255 all pixels above 230. Do this so that light gray
2381 * pixels do not enter into the comparison. */
2382 pixm = pixThresholdToBinary(pix3, 230);
2383 pixInvert(pixm, pixm);
2384 pixSetMaskedGeneral(pix3, pixm, 255, 0, 0);
2385 pixDestroy(&pixm);
2386
2387 if (debugindex > 0) {
2388 PIX *pix4, *pix5, *pix6, *pix7, *pix8;
2389 PIXA *pixa2;
2390 pix4 = pixConvertTo32(pix2);
2391 pix5 = pixConvertTo32(pix3);
2392 pix6 = pixScaleToSize(pix4, 400, 0);
2393 pix7 = pixScaleToSize(pix5, 400, 0);
2394 pixa2 = pixaCreate(2);
2395 pixaAddPix(pixa2, pix6, L_INSERT);
2396 pixaAddPix(pixa2, pix7, L_INSERT);
2397 pix8 = pixaDisplayTiledInRows(pixa2, 32, 1000, 1.0, 0, 50, 3);
2398 pixaAddPix(pixa, pix8, L_INSERT);
2399 pixDestroy(&pix4);
2400 pixDestroy(&pix5);
2401 pixaDestroy(&pixa2);
2402 }
2403 pixDestroy(&pix1);
2404 pixDestroy(&pix2);
2405
2406 /* Test if this is a photoimage */
2407 pixDecideIfPhotoImage(pix3, factor, thresh, n, &naa, pixa);
2408 if (naa) {
2409 *pnaa = naa;
2410 *pw = pixGetWidth(pix3);
2411 *ph = pixGetHeight(pix3);
2412 }
2413
2414 if (pixa) {
2415 snprintf(buf, sizeof(buf), "/tmp/lept/comp/tiledhistos.%d.pdf",
2416 debugindex);
2417 lept_stderr("Writing to %s\n", buf);
2418 pixaConvertToPdf(pixa, 300, 1.0, L_FLATE_ENCODE, 0, NULL, buf);
2419 pixaDestroy(&pixa);
2420 }
2421
2422 pixDestroy(&pix3);
2423 return 0;
2424 }
2425
2426
2427 /*!
2428 * \brief pixPadToCenterCentroid()
2429 *
2430 * \param[in] pixs any depth, colormap OK
2431 * \param[in] factor subsampling for centroid; >= 1
2432 * \return pixd padded with white pixels, or NULL on error.
2433 *
2434 * <pre>
2435 * Notes:
2436 * (1) This add minimum white padding to an 8 bpp pix, such that
2437 * the centroid of the photometric inverse is in the center of
2438 * the resulting image. Thus in computing the centroid,
2439 * black pixels have weight 255, and white pixels have weight 0.
2440 * </pre>
2441 */
2442 PIX *
2443 pixPadToCenterCentroid(PIX *pixs,
2444 l_int32 factor)
2445
2446 {
2447 l_float32 cx, cy;
2448 l_int32 xs, ys, delx, dely, icx, icy, ws, hs, wd, hd;
2449 PIX *pix1, *pixd;
2450
2451 if (!pixs)
2452 return (PIX *)ERROR_PTR("pixs not defined", __func__, NULL);
2453 if (factor < 1)
2454 return (PIX *)ERROR_PTR("invalid sampling factor", __func__, NULL);
2455
2456 pix1 = pixConvertTo8(pixs, FALSE);
2457 pixCentroid8(pix1, factor, &cx, &cy);
2458 icx = (l_int32)(cx + 0.5);
2459 icy = (l_int32)(cy + 0.5);
2460 pixGetDimensions(pix1, &ws, &hs, NULL);
2461 delx = ws - 2 * icx;
2462 dely = hs - 2 * icy;
2463 xs = L_MAX(0, delx);
2464 ys = L_MAX(0, dely);
2465 wd = 2 * L_MAX(icx, ws - icx);
2466 hd = 2 * L_MAX(icy, hs - icy);
2467 pixd = pixCreate(wd, hd, 8);
2468 pixSetAll(pixd); /* to white */
2469 pixCopyResolution(pixd, pixs);
2470 pixRasterop(pixd, xs, ys, ws, hs, PIX_SRC, pix1, 0, 0);
2471 pixDestroy(&pix1);
2472 return pixd;
2473 }
2474
2475
2476 /*!
2477 * \brief pixCentroid8()
2478 *
2479 * \param[in] pixs 8 bpp
2480 * \param[in] factor subsampling factor; >= 1
2481 * \param[out] pcx x value of centroid
2482 * \param[out] pcy y value of centroid
2483 * \return 0 if OK, 1 on error
2484 *
2485 * <pre>
2486 * Notes:
2487 * (1) This first does a photometric inversion (black = 255, white = 0).
2488 * It then finds the centroid of the result. The inversion is
2489 * done because white is usually background, so the centroid
2490 * is computed based on the "foreground" gray pixels, and the
2491 * darker the pixel, the more weight it is given.
2492 * </pre>
2493 */
2494 l_ok
2495 pixCentroid8(PIX *pixs,
2496 l_int32 factor,
2497 l_float32 *pcx,
2498 l_float32 *pcy)
2499 {
2500 l_int32 i, j, w, h, wpl, val;
2501 l_float32 sumx, sumy, sumv;
2502 l_uint32 *data, *line;
2503 PIX *pix1;
2504
2505 if (pcx) *pcx = 0.0;
2506 if (pcy) *pcy = 0.0;
2507 if (!pixs || pixGetDepth(pixs) != 8)
2508 return ERROR_INT("pixs undefined or not 8 bpp", __func__, 1);
2509 if (factor < 1)
2510 return ERROR_INT("subsampling factor must be >= 1", __func__, 1);
2511 if (!pcx || !pcy)
2512 return ERROR_INT("&cx and &cy not both defined", __func__, 1);
2513
2514 pix1 = pixInvert(NULL, pixs);
2515 pixGetDimensions(pix1, &w, &h, NULL);
2516 data = pixGetData(pix1);
2517 wpl = pixGetWpl(pix1);
2518 sumx = sumy = sumv = 0.0;
2519 for (i = 0; i < h; i++) {
2520 line = data + i * wpl;
2521 for (j = 0; j < w; j++) {
2522 val = GET_DATA_BYTE(line, j);
2523 sumx += val * j;
2524 sumy += val * i;
2525 sumv += val;
2526 }
2527 }
2528 pixDestroy(&pix1);
2529
2530 if (sumv == 0) {
2531 L_INFO("input image is white\n", __func__);
2532 *pcx = (l_float32)(w) / 2;
2533 *pcy = (l_float32)(h) / 2;
2534 } else {
2535 *pcx = sumx / sumv;
2536 *pcy = sumy / sumv;
2537 }
2538
2539 return 0;
2540 }
2541
2542
2543 /*!
2544 * \brief pixDecideIfPhotoImage()
2545 *
2546 * \param[in] pix 8 bpp, centroid in center
2547 * \param[in] factor subsampling for histograms; >= 1
2548 * \param[in] thresh threshold for photo/text; use 0 for default
2549 * \param[in] n in range {1, ... 7}. n^2 is the maximum number
2550 * of subregions for histograms; typ. n = 3.
2551 * \param[out] pnaa array of normalized histograms
2552 * \param[in] pixadebug [optional] use only for debug output
2553 * \return 0 if OK, 1 on error
2554 *
2555 * <pre>
2556 * Notes:
2557 * (1) The input image must be 8 bpp (no colormap), and padded with
2558 * white pixels so the centroid of photo-inverted pixels is at
2559 * the center of the image.
2560 * (2) The parameter %n specifies the "side" of the n x n grid
2561 * of subimages. If the subimages have an aspect ratio larger
2562 * than 2, the grid will change, using n^2 as a maximum for
2563 * the number of subimages. For example, if n == 3, but the
2564 * image is 600 x 200 pixels, a 3x3 grid would have subimages
2565 * of 200 x 67 pixels, which is more than 2:1, so we change
2566 * to a 4x2 grid where each subimage has 150 x 100 pixels.
2567 * (3) If the pix is not almost certainly a photoimage, the returned
2568 * histograms (%naa) are null.
2569 * (4) If histograms are generated, the white (255) count is set
2570 * to 0. This removes all pixels values above 230, including
2571 * white padding from the centroid matching operation, from
2572 * consideration. The resulting histograms are then normalized
2573 * so the maximum count is 255.
2574 * (5) Default for %thresh is 1.3; this seems sufficiently conservative.
2575 * (6) Use %pixadebug == NULL unless debug output is requested.
2576 * </pre>
2577 */
2578 l_ok
2579 pixDecideIfPhotoImage(PIX *pix,
2580 l_int32 factor,
2581 l_float32 thresh,
2582 l_int32 n,
2583 NUMAA **pnaa,
2584 PIXA *pixadebug)
2585 {
2586 char buf[64];
2587 l_int32 i, w, h, nx, ny, ngrids, istext, isphoto;
2588 l_float32 maxval, sum1, sum2, ratio;
2589 L_BMF *bmf;
2590 NUMA *na1, *na2, *na3, *narv;
2591 NUMAA *naa;
2592 PIX *pix1;
2593 PIXA *pixa1, *pixa2, *pixa3;
2594
2595 if (!pnaa)
2596 return ERROR_INT("&naa not defined", __func__, 1);
2597 *pnaa = NULL;
2598 if (!pix || pixGetDepth(pix) != 8 || pixGetColormap(pix))
2599 return ERROR_INT("pix undefined or invalid", __func__, 1);
2600 if (n < 1 || n > 7) {
2601 L_WARNING("n = %d is invalid; setting to 4\n", __func__, n);
2602 n = 4;
2603 }
2604 if (thresh <= 0.0) thresh = 1.3f; /* default */
2605
2606 /* Look for text lines */
2607 pixDecideIfText(pix, NULL, &istext, pixadebug);
2608 if (istext) {
2609 L_INFO("Image is text\n", __func__);
2610 return 0;
2611 }
2612
2613 /* Determine grid from n */
2614 pixGetDimensions(pix, &w, &h, NULL);
2615 if (w == 0 || h == 0)
2616 return ERROR_INT("invalid pix dimension", __func__, 1);
2617 findHistoGridDimensions(n, w, h, &nx, &ny, 1);
2618
2619 /* Evaluate histograms in each tile */
2620 pixa1 = pixaSplitPix(pix, nx, ny, 0, 0);
2621 ngrids = nx * ny;
2622 bmf = (pixadebug) ? bmfCreate(NULL, 6) : NULL;
2623 naa = numaaCreate(ngrids);
2624 if (pixadebug) {
2625 lept_rmdir("lept/compplot");
2626 lept_mkdir("lept/compplot");
2627 }
2628 for (i = 0; i < ngrids; i++) {
2629 pix1 = pixaGetPix(pixa1, i, L_CLONE);
2630
2631 /* Get histograms, set white count to 0, normalize max to 255 */
2632 na1 = pixGetGrayHistogram(pix1, factor);
2633 numaSetValue(na1, 255, 0);
2634 na2 = numaWindowedMean(na1, 5); /* do some smoothing */
2635 numaGetMax(na2, &maxval, NULL);
2636 na3 = numaTransform(na2, 0, 255.0 / maxval);
2637 if (pixadebug) {
2638 snprintf(buf, sizeof(buf), "/tmp/lept/compplot/plot.%d", i);
2639 gplotSimple1(na3, GPLOT_PNG, buf, "Histos");
2640 }
2641
2642 numaaAddNuma(naa, na3, L_INSERT);
2643 numaDestroy(&na1);
2644 numaDestroy(&na2);
2645 pixDestroy(&pix1);
2646 }
2647 if (pixadebug) {
2648 pix1 = pixaDisplayTiledInColumns(pixa1, nx, 1.0, 30, 2);
2649 pixaAddPix(pixadebug, pix1, L_INSERT);
2650 pixa2 = pixaReadFiles("/tmp/lept/compplot", ".png");
2651 pixa3 = pixaScale(pixa2, 0.4f, 0.4f);
2652 pix1 = pixaDisplayTiledInColumns(pixa3, nx, 1.0, 30, 2);
2653 pixaAddPix(pixadebug, pix1, L_INSERT);
2654 pixaDestroy(&pixa2);
2655 pixaDestroy(&pixa3);
2656 }
2657
2658 /* Compute the standard deviation between these histos to decide
2659 * if the image is photo or something more like line art,
2660 * which does not support good comparison by tiled histograms. */
2661 grayInterHistogramStats(naa, 5, NULL, NULL, NULL, &narv);
2662
2663 /* For photos, the root variance has a larger weight of
2664 * values in the range [50 ... 150] compared to [200 ... 230],
2665 * than text or line art. For the latter, most of the variance
2666 * between tiles is in the lightest parts of the image, well
2667 * above 150. */
2668 numaGetSumOnInterval(narv, 50, 150, &sum1);
2669 numaGetSumOnInterval(narv, 200, 230, &sum2);
2670 if (sum2 == 0.0) { /* shouldn't happen */
2671 ratio = 0.001f; /* anything very small for debug output */
2672 isphoto = 0; /* be conservative */
2673 } else {
2674 ratio = sum1 / sum2;
2675 isphoto = (ratio > thresh) ? 1 : 0;
2676 }
2677 if (pixadebug) {
2678 if (isphoto)
2679 L_INFO("ratio %f > %f; isphoto is true\n",
2680 __func__, ratio, thresh);
2681 else
2682 L_INFO("ratio %f < %f; isphoto is false\n",
2683 __func__, ratio, thresh);
2684 }
2685 if (isphoto)
2686 *pnaa = naa;
2687 else
2688 numaaDestroy(&naa);
2689 bmfDestroy(&bmf);
2690 numaDestroy(&narv);
2691 pixaDestroy(&pixa1);
2692 return 0;
2693 }
2694
2695
2696 /*!
2697 * \brief findHistoGridDimensions()
2698 *
2699 * \param[in] n max number of grid elements is n^2; typ. n = 3
2700 * \param[in] w width of image to be subdivided
2701 * \param[in] h height of image to be subdivided
2702 * \param[out] pnx number of grid elements in x direction
2703 * \param[out] pny number of grid elements in y direction
2704 * \param[in] debug 1 for debug output to stderr
2705 * \return 0 if OK, 1 on error
2706 *
2707 * <pre>
2708 * Notes:
2709 * (1) This determines the number of subdivisions to be used on
2710 * the image in each direction. A histogram will be built
2711 * for each subimage.
2712 * (2) The parameter %n specifies the "side" of the n x n grid
2713 * of subimages. If the subimages have an aspect ratio larger
2714 * than 2, the grid will change, using n^2 as a maximum for
2715 * the number of subimages. For example, if n == 3, but the
2716 * image is 600 x 200 pixels, a 3x3 grid would have subimages
2717 * of 200 x 67 pixels, which is more than 2:1, so we change
2718 * to a 4x2 grid where each subimage has 150 x 100 pixels.
2719 * </pre>
2720 */
2721 static l_ok
2722 findHistoGridDimensions(l_int32 n,
2723 l_int32 w,
2724 l_int32 h,
2725 l_int32 *pnx,
2726 l_int32 *pny,
2727 l_int32 debug)
2728 {
2729 l_int32 nx, ny, max;
2730 l_float32 ratio;
2731
2732 ratio = (l_float32)w / (l_float32)h;
2733 max = n * n;
2734 nx = ny = n;
2735 while (nx > 1 && ny > 1) {
2736 if (ratio > 2.0) { /* reduce ny */
2737 ny--;
2738 nx = max / ny;
2739 if (debug)
2740 lept_stderr("nx = %d, ny = %d, ratio w/h = %4.2f\n",
2741 nx, ny, ratio);
2742 } else if (ratio < 0.5) { /* reduce nx */
2743 nx--;
2744 ny = max / nx;
2745 if (debug)
2746 lept_stderr("nx = %d, ny = %d, ratio w/h = %4.2f\n",
2747 nx, ny, ratio);
2748 } else { /* we're ok */
2749 if (debug)
2750 lept_stderr("nx = %d, ny = %d, ratio w/h = %4.2f\n",
2751 nx, ny, ratio);
2752 break;
2753 }
2754 ratio = (l_float32)(ny * w) / (l_float32)(nx * h);
2755 }
2756 *pnx = nx;
2757 *pny = ny;
2758 return 0;
2759 }
2760
2761
2762 /*!
2763 * \brief compareTilesByHisto()
2764 *
2765 * \param[in] naa1, naa2 each is a set of 256 entry histograms
2766 * \param[in] minratio requiring image sizes be compatible; < 1.0
2767 * \param[in] w1, h1, w2, h2 image sizes from which histograms were made
2768 * \param[out] pscore similarity score of histograms
2769 * \param[in] pixadebug [optional] use only for debug output
2770 * \return 0 if OK, 1 on error
2771 *
2772 * <pre>
2773 * Notes:
2774 * (1) naa1 and naa2 must be generated using pixGenPhotoHistos(),
2775 * using the same tile sizes.
2776 * (2) The image dimensions must be similar. The score is 0.0
2777 * if the ratio of widths and heights (smallest / largest)
2778 * exceeds a threshold %minratio, which must be between
2779 * 0.5 and 1.0. If set at 1.0, both images must be exactly
2780 * the same size. A typical value for %minratio is 0.9.
2781 * (3) The input pixadebug is null unless debug output is requested.
2782 * </pre>
2783 */
2784 l_ok
2785 compareTilesByHisto(NUMAA *naa1,
2786 NUMAA *naa2,
2787 l_float32 minratio,
2788 l_int32 w1,
2789 l_int32 h1,
2790 l_int32 w2,
2791 l_int32 h2,
2792 l_float32 *pscore,
2793 PIXA *pixadebug)
2794 {
2795 char buf1[128], buf2[128];
2796 l_int32 i, n;
2797 l_float32 wratio, hratio, score, minscore, dist;
2798 L_BMF *bmf;
2799 NUMA *na1, *na2, *nadist, *nascore;
2800
2801 if (!pscore)
2802 return ERROR_INT("&score not defined", __func__, 1);
2803 *pscore = 0.0;
2804 if (!naa1 || !naa2)
2805 return ERROR_INT("naa1 and naa2 not both defined", __func__, 1);
2806
2807 /* Filter for different sizes */
2808 wratio = (w1 < w2) ? (l_float32)w1 / (l_float32)w2 :
2809 (l_float32)w2 / (l_float32)w1;
2810 hratio = (h1 < h2) ? (l_float32)h1 / (l_float32)h2 :
2811 (l_float32)h2 / (l_float32)h1;
2812 if (wratio < minratio || hratio < minratio) {
2813 if (pixadebug)
2814 L_INFO("Sizes differ: wratio = %f, hratio = %f\n",
2815 __func__, wratio, hratio);
2816 return 0;
2817 }
2818 n = numaaGetCount(naa1);
2819 if (n != numaaGetCount(naa2)) { /* due to differing w/h ratio */
2820 L_INFO("naa1 and naa2 sizes are different\n", __func__);
2821 return 0;
2822 }
2823
2824 if (pixadebug) {
2825 lept_rmdir("lept/comptile");
2826 lept_mkdir("lept/comptile");
2827 }
2828
2829
2830 /* Evaluate histograms in each tile. Remove white before
2831 * computing EMD, because there are may be a lot of white
2832 * pixels due to padding, and we don't want to include them.
2833 * This also makes the debug histo plots more informative. */
2834 minscore = 1.0;
2835 nadist = numaCreate(n);
2836 nascore = numaCreate(n);
2837 bmf = (pixadebug) ? bmfCreate(NULL, 6) : NULL;
2838 for (i = 0; i < n; i++) {
2839 na1 = numaaGetNuma(naa1, i, L_CLONE);
2840 na2 = numaaGetNuma(naa2, i, L_CLONE);
2841 numaSetValue(na1, 255, 0.0);
2842 numaSetValue(na2, 255, 0.0);
2843
2844 /* To compare histograms, use the normalized earthmover distance.
2845 * Further normalize to get the EM distance as a fraction of the
2846 * maximum distance in the histogram (255). Finally, scale this
2847 * up by 10.0, and subtract from 1.0 to get a similarity score. */
2848 numaEarthMoverDistance(na1, na2, &dist);
2849 score = L_MAX(0.0, 1.0 - 10.0 * (dist / 255.));
2850 numaAddNumber(nadist, dist);
2851 numaAddNumber(nascore, score);
2852 minscore = L_MIN(minscore, score);
2853 if (pixadebug) {
2854 snprintf(buf1, sizeof(buf1), "/tmp/lept/comptile/plot.%d", i);
2855 gplotSimple2(na1, na2, GPLOT_PNG, buf1, "Histos");
2856 }
2857 numaDestroy(&na1);
2858 numaDestroy(&na2);
2859 }
2860 *pscore = minscore;
2861
2862 if (pixadebug) {
2863 for (i = 0; i < n; i++) {
2864 PIX *pix1, *pix2;
2865 snprintf(buf1, sizeof(buf1), "/tmp/lept/comptile/plot.%d.png", i);
2866 pix1 = pixRead(buf1);
2867 numaGetFValue(nadist, i, &dist);
2868 numaGetFValue(nascore, i, &score);
2869 snprintf(buf2, sizeof(buf2),
2870 "Image %d\ndist = %5.3f, score = %5.3f", i, dist, score);
2871 pix2 = pixAddTextlines(pix1, bmf, buf2, 0x0000ff00, L_ADD_BELOW);
2872 pixaAddPix(pixadebug, pix2, L_INSERT);
2873 pixDestroy(&pix1);
2874 }
2875 lept_stderr("Writing to /tmp/lept/comptile/comparegray.pdf\n");
2876 pixaConvertToPdf(pixadebug, 300, 1.0, L_FLATE_ENCODE, 0, NULL,
2877 "/tmp/lept/comptile/comparegray.pdf");
2878 numaWriteDebug("/tmp/lept/comptile/scores.na", nascore);
2879 numaWriteDebug("/tmp/lept/comptile/dists.na", nadist);
2880 }
2881
2882 bmfDestroy(&bmf);
2883 numaDestroy(&nadist);
2884 numaDestroy(&nascore);
2885 return 0;
2886 }
2887
2888
2889 /*!
2890 * \brief pixCompareGrayByHisto()
2891 *
2892 * \param[in] pix1, pix2 any depth; colormap OK
2893 * \param[in] box1, box2 [optional] region selected from each; can be null
2894 * \param[in] minratio requiring sizes be compatible; < 1.0
2895 * \param[in] maxgray max value to keep in histo; >= 200, 255 to keep all
2896 * \param[in] factor subsampling factor; >= 1
2897 * \param[in] n in range {1, ... 7}. n^2 is the maximum number
2898 * of subregions for histograms; typ. n = 3.
2899 * \param[out] pscore similarity score of histograms
2900 * \param[in] debugflag 1 for debug output; 0 for no debugging
2901 * \return 0 if OK, 1 on error
2902 *
2903 * <pre>
2904 * Notes:
2905 * (1) This function compares two grayscale photo regions. It can
2906 * do it with a single histogram from each region, or with a
2907 * set of spatially aligned histograms. For both cases,
2908 * align the regions using the centroid of the inverse image,
2909 * and crop to the smallest of the two.
2910 * (2) The parameter %n specifies the "side" of an n x n grid
2911 * of subimages. If the subimages have an aspect ratio larger
2912 * than 2, the grid will change, using n^2 as a maximum for
2913 * the number of subimages. For example, if n == 3, but the
2914 * image is 600 x 200 pixels, a 3x3 grid would have subimages
2915 * of 200 x 67 pixels, which is more than 2:1, so we change
2916 * to a 4x2 grid where each subimage has 150 x 100 pixels.
2917 * (3) An initial filter gives %score = 0 if the ratio of widths
2918 * and heights (smallest / largest) does not exceed a
2919 * threshold %minratio. This must be between 0.5 and 1.0.
2920 * If set at 1.0, both images must be exactly the same size.
2921 * A typical value for %minratio is 0.9.
2922 * (4) The lightest values in the histogram can be disregarded.
2923 * Set %maxgray to the lightest value to be kept. For example,
2924 * to eliminate white (255), set %maxgray = 254. %maxgray must
2925 * be >= 200.
2926 * (5) For an efficient representation of the histogram, normalize
2927 * using a multiplicative factor so that the number in the
2928 * maximum bucket is 255. It then takes 256 bytes to store.
2929 * (6) When comparing the histograms of two regions:
2930 * ~ Use %maxgray = 254 to ignore the white pixels, the number
2931 * of which may be sensitive to the crop region if the pixels
2932 * outside that region are white.
2933 * ~ Use the Earth Mover distance (EMD), with the histograms
2934 * normalized so that the sum over bins is the same.
2935 * Further normalize by dividing by 255, so that the result
2936 * is in [0.0 ... 1.0].
2937 * (7) Get a similarity score S = 1.0 - k * D, where
2938 * k is a constant, say in the range 5-10
2939 * D = normalized EMD
2940 * and for multiple tiles, take the Min(S) to be the final score.
2941 * Using aligned tiles gives protection against accidental
2942 * similarity of the overall grayscale histograms.
2943 * A small number of aligned tiles works well.
2944 * (8) With debug on, you get a pdf that shows, for each tile,
2945 * the images, histograms and score.
2946 * (9) When to use:
2947 * (a) Because this function should not be used on text or
2948 * line graphics, which can give false positive results
2949 * (i.e., high scores for different images), the input
2950 * images should be filtered.
2951 * (b) To filter, first use pixDecideIfText(). If that function
2952 * says the image is text, do not use it. If the function
2953 * says it is not text, it still may be line graphics, and
2954 * in that case, use:
2955 * pixGetGrayHistogramTiled()
2956 * grayInterHistogramStats()
2957 * to determine whether it is photo or line graphics.
2958 * </pre>
2959 */
2960 l_ok
2961 pixCompareGrayByHisto(PIX *pix1,
2962 PIX *pix2,
2963 BOX *box1,
2964 BOX *box2,
2965 l_float32 minratio,
2966 l_int32 maxgray,
2967 l_int32 factor,
2968 l_int32 n,
2969 l_float32 *pscore,
2970 l_int32 debugflag)
2971 {
2972 l_int32 w1, h1, w2, h2;
2973 l_float32 wratio, hratio;
2974 BOX *box3, *box4;
2975 PIX *pix3, *pix4, *pix5, *pix6, *pix7, *pix8;
2976 PIXA *pixa;
2977
2978 if (!pscore)
2979 return ERROR_INT("&score not defined", __func__, 1);
2980 *pscore = 0.0;
2981 if (!pix1 || !pix2)
2982 return ERROR_INT("pix1 and pix2 not both defined", __func__, 1);
2983 if (minratio < 0.5 || minratio > 1.0)
2984 return ERROR_INT("minratio not in [0.5 ... 1.0]", __func__, 1);
2985 if (maxgray < 200)
2986 return ERROR_INT("invalid maxgray; should be >= 200", __func__, 1);
2987 maxgray = L_MIN(255, maxgray);
2988 if (factor < 1)
2989 return ERROR_INT("subsampling factor must be >= 1", __func__, 1);
2990 if (n < 1 || n > 7) {
2991 L_WARNING("n = %d is invalid; setting to 4\n", __func__, n);
2992 n = 4;
2993 }
2994
2995 if (debugflag)
2996 lept_mkdir("lept/comp");
2997
2998 /* Initial filter by size */
2999 if (box1)
3000 boxGetGeometry(box1, NULL, NULL, &w1, &h1);
3001 else
3002 pixGetDimensions(pix1, &w1, &h1, NULL);
3003 if (box2)
3004 boxGetGeometry(box2, NULL, NULL, &w2, &h2);
3005 else
3006 pixGetDimensions(pix1, &w2, &h2, NULL);
3007 wratio = (w1 < w2) ? (l_float32)w1 / (l_float32)w2 :
3008 (l_float32)w2 / (l_float32)w1;
3009 hratio = (h1 < h2) ? (l_float32)h1 / (l_float32)h2 :
3010 (l_float32)h2 / (l_float32)h1;
3011 if (wratio < minratio || hratio < minratio)
3012 return 0;
3013
3014 /* Initial crop, if necessary */
3015 if (box1)
3016 pix3 = pixClipRectangle(pix1, box1, NULL);
3017 else
3018 pix3 = pixClone(pix1);
3019 if (box2)
3020 pix4 = pixClipRectangle(pix2, box2, NULL);
3021 else
3022 pix4 = pixClone(pix2);
3023
3024 /* Convert to 8 bpp, align centroids and do maximal crop */
3025 pix5 = pixConvertTo8(pix3, FALSE);
3026 pix6 = pixConvertTo8(pix4, FALSE);
3027 pixCropAlignedToCentroid(pix5, pix6, factor, &box3, &box4);
3028 pix7 = pixClipRectangle(pix5, box3, NULL);
3029 pix8 = pixClipRectangle(pix6, box4, NULL);
3030 pixa = (debugflag) ? pixaCreate(0) : NULL;
3031 if (debugflag) {
3032 PIX *pix9, *pix10, *pix11, *pix12, *pix13;
3033 PIXA *pixa2;
3034 pix9 = pixConvertTo32(pix5);
3035 pix10 = pixConvertTo32(pix6);
3036 pixRenderBoxArb(pix9, box3, 2, 255, 0, 0);
3037 pixRenderBoxArb(pix10, box4, 2, 255, 0, 0);
3038 pix11 = pixScaleToSize(pix9, 400, 0);
3039 pix12 = pixScaleToSize(pix10, 400, 0);
3040 pixa2 = pixaCreate(2);
3041 pixaAddPix(pixa2, pix11, L_INSERT);
3042 pixaAddPix(pixa2, pix12, L_INSERT);
3043 pix13 = pixaDisplayTiledInRows(pixa2, 32, 1000, 1.0, 0, 50, 0);
3044 pixaAddPix(pixa, pix13, L_INSERT);
3045 pixDestroy(&pix9);
3046 pixDestroy(&pix10);
3047 pixaDestroy(&pixa2);
3048 }
3049 pixDestroy(&pix3);
3050 pixDestroy(&pix4);
3051 pixDestroy(&pix5);
3052 pixDestroy(&pix6);
3053 boxDestroy(&box3);
3054 boxDestroy(&box4);
3055
3056 /* Tile and compare histograms */
3057 pixCompareTilesByHisto(pix7, pix8, maxgray, factor, n, pscore, pixa);
3058 pixaDestroy(&pixa);
3059 pixDestroy(&pix7);
3060 pixDestroy(&pix8);
3061 return 0;
3062 }
3063
3064
3065 /*!
3066 * \brief pixCompareTilesByHisto()
3067 *
3068 * \param[in] pix1, pix2 8 bpp
3069 * \param[in] maxgray max value to keep in histo; 255 to keep all
3070 * \param[in] factor subsampling factor; >= 1
3071 * \param[in] n see pixCompareGrayByHisto()
3072 * \param[out] pscore similarity score of histograms
3073 * \param[in] pixadebug [optional] use only for debug output
3074 * \return 0 if OK, 1 on error
3075 *
3076 * <pre>
3077 * Notes:
3078 * (1) This static function is only called from pixCompareGrayByHisto().
3079 * The input images have been converted to 8 bpp if necessary,
3080 * aligned and cropped.
3081 * (2) The input pixadebug is null unless debug output is requested.
3082 * (3) See pixCompareGrayByHisto() for details.
3083 * </pre>
3084 */
3085 static l_ok
3086 pixCompareTilesByHisto(PIX *pix1,
3087 PIX *pix2,
3088 l_int32 maxgray,
3089 l_int32 factor,
3090 l_int32 n,
3091 l_float32 *pscore,
3092 PIXA *pixadebug)
3093 {
3094 char buf[64];
3095 l_int32 w, h, i, j, nx, ny, ngr;
3096 l_float32 score, minscore, maxval1, maxval2, dist;
3097 L_BMF *bmf;
3098 NUMA *na1, *na2, *na3, *na4, *na5, *na6, *na7;
3099 PIX *pix3, *pix4;
3100 PIXA *pixa1, *pixa2;
3101
3102 if (!pscore)
3103 return ERROR_INT("&score not defined", __func__, 1);
3104 *pscore = 0.0;
3105 if (!pix1 || !pix2)
3106 return ERROR_INT("pix1 and pix2 not both defined", __func__, 1);
3107
3108 /* Determine grid from n */
3109 pixGetDimensions(pix1, &w, &h, NULL);
3110 findHistoGridDimensions(n, w, h, &nx, &ny, 1);
3111 ngr = nx * ny;
3112
3113 /* Evaluate histograms in each tile */
3114 pixa1 = pixaSplitPix(pix1, nx, ny, 0, 0);
3115 pixa2 = pixaSplitPix(pix2, nx, ny, 0, 0);
3116 na7 = (pixadebug) ? numaCreate(ngr) : NULL;
3117 bmf = (pixadebug) ? bmfCreate(NULL, 6) : NULL;
3118 minscore = 1.0;
3119 for (i = 0; i < ngr; i++) {
3120 pix3 = pixaGetPix(pixa1, i, L_CLONE);
3121 pix4 = pixaGetPix(pixa2, i, L_CLONE);
3122
3123 /* Get histograms, set white count to 0, normalize max to 255 */
3124 na1 = pixGetGrayHistogram(pix3, factor);
3125 na2 = pixGetGrayHistogram(pix4, factor);
3126 if (maxgray < 255) {
3127 for (j = maxgray + 1; j <= 255; j++) {
3128 numaSetValue(na1, j, 0);
3129 numaSetValue(na2, j, 0);
3130 }
3131 }
3132 na3 = numaWindowedMean(na1, 5);
3133 na4 = numaWindowedMean(na2, 5);
3134 numaGetMax(na3, &maxval1, NULL);
3135 numaGetMax(na4, &maxval2, NULL);
3136 na5 = numaTransform(na3, 0, 255.0 / maxval1);
3137 na6 = numaTransform(na4, 0, 255.0 / maxval2);
3138 if (pixadebug) {
3139 gplotSimple2(na5, na6, GPLOT_PNG, "/tmp/lept/comp/plot1", "Histos");
3140 }
3141
3142 /* To compare histograms, use the normalized earthmover distance.
3143 * Further normalize to get the EM distance as a fraction of the
3144 * maximum distance in the histogram (255). Finally, scale this
3145 * up by 10.0, and subtract from 1.0 to get a similarity score. */
3146 numaEarthMoverDistance(na5, na6, &dist);
3147 score = L_MAX(0.0, 1.0 - 8.0 * (dist / 255.));
3148 if (pixadebug) numaAddNumber(na7, score);
3149 minscore = L_MIN(minscore, score);
3150 if (pixadebug) {
3151 PIX *pix5, *pix6, *pix7, *pix8, *pix9, *pix10;
3152 PIXA *pixa3;
3153 l_int32 w, h, wscale;
3154 pixa3 = pixaCreate(3);
3155 pixGetDimensions(pix3, &w, &h, NULL);
3156 wscale = (w > h) ? 700 : 400;
3157 pix5 = pixScaleToSize(pix3, wscale, 0);
3158 pix6 = pixScaleToSize(pix4, wscale, 0);
3159 pixaAddPix(pixa3, pix5, L_INSERT);
3160 pixaAddPix(pixa3, pix6, L_INSERT);
3161 pix7 = pixRead("/tmp/lept/comp/plot1.png");
3162 pix8 = pixScaleToSize(pix7, 700, 0);
3163 snprintf(buf, sizeof(buf), "%5.3f", score);
3164 pix9 = pixAddTextlines(pix8, bmf, buf, 0x0000ff00, L_ADD_RIGHT);
3165 pixaAddPix(pixa3, pix9, L_INSERT);
3166 pix10 = pixaDisplayTiledInRows(pixa3, 32, 1000, 1.0, 0, 50, 0);
3167 pixaAddPix(pixadebug, pix10, L_INSERT);
3168 pixDestroy(&pix7);
3169 pixDestroy(&pix8);
3170 pixaDestroy(&pixa3);
3171 }
3172 numaDestroy(&na1);
3173 numaDestroy(&na2);
3174 numaDestroy(&na3);
3175 numaDestroy(&na4);
3176 numaDestroy(&na5);
3177 numaDestroy(&na6);
3178 pixDestroy(&pix3);
3179 pixDestroy(&pix4);
3180 }
3181 *pscore = minscore;
3182
3183 if (pixadebug) {
3184 pixaConvertToPdf(pixadebug, 300, 1.0, L_FLATE_ENCODE, 0, NULL,
3185 "/tmp/lept/comp/comparegray.pdf");
3186 numaWriteDebug("/tmp/lept/comp/tilescores.na", na7);
3187 }
3188
3189 bmfDestroy(&bmf);
3190 numaDestroy(&na7);
3191 pixaDestroy(&pixa1);
3192 pixaDestroy(&pixa2);
3193 return 0;
3194 }
3195
3196
3197 /*!
3198 * \brief pixCropAlignedToCentroid()
3199 *
3200 * \param[in] pix1, pix2 any depth; colormap OK
3201 * \param[in] factor subsampling; >= 1
3202 * \param[out] pbox1 crop box for pix1
3203 * \param[out] pbox2 crop box for pix2
3204 * \return 0 if OK, 1 on error
3205 *
3206 * <pre>
3207 * Notes:
3208 * (1) This finds the maximum crop boxes for two 8 bpp images when
3209 * their centroids of their photometric inverses are aligned.
3210 * Black pixels have weight 255; white pixels have weight 0.
3211 * </pre>
3212 */
3213 l_ok
3214 pixCropAlignedToCentroid(PIX *pix1,
3215 PIX *pix2,
3216 l_int32 factor,
3217 BOX **pbox1,
3218 BOX **pbox2)
3219 {
3220 l_float32 cx1, cy1, cx2, cy2;
3221 l_int32 w1, h1, w2, h2, icx1, icy1, icx2, icy2;
3222 l_int32 xm, xm1, xm2, xp, xp1, xp2, ym, ym1, ym2, yp, yp1, yp2;
3223 PIX *pix3, *pix4;
3224
3225 if (pbox1) *pbox1 = NULL;
3226 if (pbox2) *pbox2 = NULL;
3227 if (!pix1 || !pix2)
3228 return ERROR_INT("pix1 and pix2 not both defined", __func__, 1);
3229 if (factor < 1)
3230 return ERROR_INT("subsampling factor must be >= 1", __func__, 1);
3231 if (!pbox1 || !pbox2)
3232 return ERROR_INT("&box1 and &box2 not both defined", __func__, 1);
3233
3234 pix3 = pixConvertTo8(pix1, FALSE);
3235 pix4 = pixConvertTo8(pix2, FALSE);
3236 pixCentroid8(pix3, factor, &cx1, &cy1);
3237 pixCentroid8(pix4, factor, &cx2, &cy2);
3238 pixGetDimensions(pix3, &w1, &h1, NULL);
3239 pixGetDimensions(pix4, &w2, &h2, NULL);
3240 pixDestroy(&pix3);
3241 pixDestroy(&pix4);
3242
3243 icx1 = (l_int32)(cx1 + 0.5);
3244 icy1 = (l_int32)(cy1 + 0.5);
3245 icx2 = (l_int32)(cx2 + 0.5);
3246 icy2 = (l_int32)(cy2 + 0.5);
3247 xm = L_MIN(icx1, icx2);
3248 xm1 = icx1 - xm;
3249 xm2 = icx2 - xm;
3250 xp = L_MIN(w1 - icx1, w2 - icx2); /* one pixel beyond to the right */
3251 xp1 = icx1 + xp;
3252 xp2 = icx2 + xp;
3253 ym = L_MIN(icy1, icy2);
3254 ym1 = icy1 - ym;
3255 ym2 = icy2 - ym;
3256 yp = L_MIN(h1 - icy1, h2 - icy2); /* one pixel below the bottom */
3257 yp1 = icy1 + yp;
3258 yp2 = icy2 + yp;
3259 *pbox1 = boxCreate(xm1, ym1, xp1 - xm1, yp1 - ym1);
3260 *pbox2 = boxCreate(xm2, ym2, xp2 - xm2, yp2 - ym2);
3261 return 0;
3262 }
3263
3264
3265 /*!
3266 * \brief l_compressGrayHistograms()
3267 *
3268 * \param[in] naa set of 256-entry histograms
3269 * \param[in] w, h size of image
3270 * \param[out] psize size of byte array
3271 * \return 0 if OK, 1 on error
3272 *
3273 * <pre>
3274 * Notes:
3275 * (1) This first writes w and h to the byte array as 4 byte ints.
3276 * (2) Then it normalizes each histogram to a max value of 255,
3277 * and saves each value as a byte. If there are
3278 * N histograms, the output bytearray has 8 + 256 * N bytes.
3279 * (3) Further compression of the array with zlib yields only about
3280 * a 25% decrease in size, so we don't bother. If size reduction
3281 * were important, a lossy transform using a 1-dimensional DCT
3282 * would be effective, because we don't care about the fine
3283 * details of these histograms.
3284 * </pre>
3285 */
3286 l_uint8 *
3287 l_compressGrayHistograms(NUMAA *naa,
3288 l_int32 w,
3289 l_int32 h,
3290 size_t *psize)
3291 {
3292 l_uint8 *bytea;
3293 l_int32 i, j, n, nn, ival;
3294 l_float32 maxval;
3295 NUMA *na1, *na2;
3296
3297 if (!psize)
3298 return (l_uint8 *)ERROR_PTR("&size not defined", __func__, NULL);
3299 *psize = 0;
3300 if (!naa)
3301 return (l_uint8 *)ERROR_PTR("naa not defined", __func__, NULL);
3302 n = numaaGetCount(naa);
3303 for (i = 0; i < n; i++) {
3304 nn = numaaGetNumaCount(naa, i);
3305 if (nn != 256) {
3306 L_ERROR("%d numbers in numa[%d]\n", __func__, nn, i);
3307 return NULL;
3308 }
3309 }
3310
3311 if ((bytea = (l_uint8 *)LEPT_CALLOC(8 + 256 * n, sizeof(l_uint8))) == NULL)
3312 return (l_uint8 *)ERROR_PTR("bytea not made", __func__, NULL);
3313 *psize = 8 + 256 * n;
3314 l_setDataFourBytes(bytea, 0, w);
3315 l_setDataFourBytes(bytea, 1, h);
3316 for (i = 0; i < n; i++) {
3317 na1 = numaaGetNuma(naa, i, L_COPY);
3318 numaGetMax(na1, &maxval, NULL);
3319 na2 = numaTransform(na1, 0, 255.0 / maxval);
3320 for (j = 0; j < 256; j++) {
3321 numaGetIValue(na2, j, &ival);
3322 bytea[8 + 256 * i + j] = ival;
3323 }
3324 numaDestroy(&na1);
3325 numaDestroy(&na2);
3326 }
3327
3328 return bytea;
3329 }
3330
3331
3332 /*!
3333 * \brief l_uncompressGrayHistograms()
3334 *
3335 * \param[in] bytea byte array of size 8 + 256 * N, N an integer
3336 * \param[in] size size of byte array
3337 * \param[out] pw width of the image that generated the histograms
3338 * \param[out] ph height of the image
3339 * \return numaa representing N histograms, each with 256 bins,
3340 * or NULL on error.
3341 *
3342 * <pre>
3343 * Notes:
3344 * (1) The first 8 bytes are read as two 32-bit ints.
3345 * (2) Then this constructs a numaa representing some number of
3346 * gray histograms that are normalized such that the max value
3347 * in each histogram is 255. The data is stored as a byte
3348 * array, with 256 bytes holding the data for each histogram.
3349 * Each gray histogram was computed from a tile of a grayscale image.
3350 * </pre>
3351 */
3352 NUMAA *
3353 l_uncompressGrayHistograms(l_uint8 *bytea,
3354 size_t size,
3355 l_int32 *pw,
3356 l_int32 *ph)
3357 {
3358 l_int32 i, j, n;
3359 NUMA *na;
3360 NUMAA *naa;
3361
3362 if (pw) *pw = 0;
3363 if (ph) *ph = 0;
3364 if (!pw || !ph)
3365 return (NUMAA *)ERROR_PTR("&w and &h not both defined", __func__, NULL);
3366 if (!bytea)
3367 return (NUMAA *)ERROR_PTR("bytea not defined", __func__, NULL);
3368 n = (size - 8) / 256;
3369 if ((size - 8) % 256 != 0)
3370 return (NUMAA *)ERROR_PTR("bytea size is invalid", __func__, NULL);
3371
3372 *pw = l_getDataFourBytes(bytea, 0);
3373 *ph = l_getDataFourBytes(bytea, 1);
3374 naa = numaaCreate(n);
3375 for (i = 0; i < n; i++) {
3376 na = numaCreate(256);
3377 for (j = 0; j < 256; j++)
3378 numaAddNumber(na, bytea[8 + 256 * i + j]);
3379 numaaAddNuma(naa, na, L_INSERT);
3380 }
3381
3382 return naa;
3383 }
3384
3385
3386 /*------------------------------------------------------------------*
3387 * Translated images at the same resolution *
3388 *------------------------------------------------------------------*/
3389 /*!
3390 * \brief pixCompareWithTranslation()
3391 *
3392 * \param[in] pix1, pix2 any depth; colormap OK
3393 * \param[in] thresh threshold for converting to 1 bpp
3394 * \param[out] pdelx x translation on pix2 to align with pix1
3395 * \param[out] pdely y translation on pix2 to align with pix1
3396 * \param[out] pscore correlation score at best alignment
3397 * \param[in] debugflag 1 for debug output; 0 for no debugging
3398 * \return 0 if OK, 1 on error
3399 *
3400 * <pre>
3401 * Notes:
3402 * (1) This does a coarse-to-fine search for best translational
3403 * alignment of two images, measured by a scoring function
3404 * that is the correlation between the fg pixels.
3405 * (2) The threshold is used if the images aren't 1 bpp.
3406 * (3) With debug on, you get a pdf that shows, as a grayscale
3407 * image, the score as a function of shift from the initial
3408 * estimate, for each of the four levels. The shift is 0 at
3409 * the center of the image.
3410 * (4) With debug on, you also get a pdf that shows the
3411 * difference at the best alignment between the two images,
3412 * at each of the four levels. The red and green pixels
3413 * show locations where one image has a fg pixel and the
3414 * other doesn't. The black pixels are where both images
3415 * have fg pixels, and white pixels are where neither image
3416 * has fg pixels.
3417 * </pre>
3418 */
3419 l_ok
3420 pixCompareWithTranslation(PIX *pix1,
3421 PIX *pix2,
3422 l_int32 thresh,
3423 l_int32 *pdelx,
3424 l_int32 *pdely,
3425 l_float32 *pscore,
3426 l_int32 debugflag)
3427 {
3428 l_uint8 *subtab;
3429 l_int32 i, level, area1, area2, delx, dely;
3430 l_int32 etransx, etransy, maxshift, dbint;
3431 l_int32 *stab, *ctab;
3432 l_float32 cx1, cx2, cy1, cy2, score;
3433 PIX *pixb1, *pixb2, *pixt1, *pixt2, *pixt3, *pixt4;
3434 PIXA *pixa1, *pixa2, *pixadb = NULL;
3435
3436 if (pdelx) *pdelx = 0;
3437 if (pdely) *pdely = 0;
3438 if (pscore) *pscore = 0.0;
3439 if (!pdelx || !pdely)
3440 return ERROR_INT("&delx and &dely not defined", __func__, 1);
3441 if (!pscore)
3442 return ERROR_INT("&score not defined", __func__, 1);
3443 if (!pix1)
3444 return ERROR_INT("pix1 not defined", __func__, 1);
3445 if (!pix2)
3446 return ERROR_INT("pix2 not defined", __func__, 1);
3447
3448 /* Make tables */
3449 subtab = makeSubsampleTab2x();
3450 stab = makePixelSumTab8();
3451 ctab = makePixelCentroidTab8();
3452
3453 /* Binarize each image */
3454 pixb1 = pixConvertTo1(pix1, thresh);
3455 pixb2 = pixConvertTo1(pix2, thresh);
3456
3457 /* Make a cascade of 2x reduced images for each, thresholding
3458 * with level 2 (neutral), down to 8x reduction */
3459 pixa1 = pixaCreate(4);
3460 pixa2 = pixaCreate(4);
3461 if (debugflag)
3462 pixadb = pixaCreate(4);
3463 pixaAddPix(pixa1, pixb1, L_INSERT);
3464 pixaAddPix(pixa2, pixb2, L_INSERT);
3465 for (i = 0; i < 3; i++) {
3466 pixt1 = pixReduceRankBinary2(pixb1, 2, subtab);
3467 pixt2 = pixReduceRankBinary2(pixb2, 2, subtab);
3468 pixaAddPix(pixa1, pixt1, L_INSERT);
3469 pixaAddPix(pixa2, pixt2, L_INSERT);
3470 pixb1 = pixt1;
3471 pixb2 = pixt2;
3472 }
3473
3474 /* At the lowest level, use the centroids with a maxshift of 6
3475 * to search for the best alignment. Then at higher levels,
3476 * use the result from the level below as the initial approximation
3477 * for the alignment, and search with a maxshift of 2. */
3478 for (level = 3; level >= 0; level--) {
3479 pixt1 = pixaGetPix(pixa1, level, L_CLONE);
3480 pixt2 = pixaGetPix(pixa2, level, L_CLONE);
3481 pixCountPixels(pixt1, &area1, stab);
3482 pixCountPixels(pixt2, &area2, stab);
3483 if (level == 3) {
3484 pixCentroid(pixt1, ctab, stab, &cx1, &cy1);
3485 pixCentroid(pixt2, ctab, stab, &cx2, &cy2);
3486 etransx = lept_roundftoi(cx1 - cx2);
3487 etransy = lept_roundftoi(cy1 - cy2);
3488 maxshift = 6;
3489 } else {
3490 etransx = 2 * delx;
3491 etransy = 2 * dely;
3492 maxshift = 2;
3493 }
3494 dbint = (debugflag) ? level + 1 : 0;
3495 pixBestCorrelation(pixt1, pixt2, area1, area2, etransx, etransy,
3496 maxshift, stab, &delx, &dely, &score, dbint);
3497 if (debugflag) {
3498 lept_stderr("Level %d: delx = %d, dely = %d, score = %7.4f\n",
3499 level, delx, dely, score);
3500 pixRasteropIP(pixt2, delx, dely, L_BRING_IN_WHITE);
3501 pixt3 = pixDisplayDiffBinary(pixt1, pixt2);
3502 pixt4 = pixExpandReplicate(pixt3, 8 / (1 << (3 - level)));
3503 pixaAddPix(pixadb, pixt4, L_INSERT);
3504 pixDestroy(&pixt3);
3505 }
3506 pixDestroy(&pixt1);
3507 pixDestroy(&pixt2);
3508 }
3509
3510 if (debugflag) {
3511 pixaConvertToPdf(pixadb, 300, 1.0, L_FLATE_ENCODE, 0, NULL,
3512 "/tmp/lept/comp/compare.pdf");
3513 convertFilesToPdf("/tmp/lept/comp", "correl_", 30, 1.0, L_FLATE_ENCODE,
3514 0, "Correlation scores at levels 1 through 5",
3515 "/tmp/lept/comp/correl.pdf");
3516 pixaDestroy(&pixadb);
3517 }
3518
3519 *pdelx = delx;
3520 *pdely = dely;
3521 *pscore = score;
3522 pixaDestroy(&pixa1);
3523 pixaDestroy(&pixa2);
3524 LEPT_FREE(subtab);
3525 LEPT_FREE(stab);
3526 LEPT_FREE(ctab);
3527 return 0;
3528 }
3529
3530
3531 /*!
3532 * \brief pixBestCorrelation()
3533 *
3534 * \param[in] pix1 1 bpp
3535 * \param[in] pix2 1 bpp
3536 * \param[in] area1 number of on pixels in pix1
3537 * \param[in] area2 number of on pixels in pix2
3538 * \param[in] etransx estimated x translation of pix2 to align with pix1
3539 * \param[in] etransy estimated y translation of pix2 to align with pix1
3540 * \param[in] maxshift max x and y shift of pix2, around the estimated
3541 * alignment location, relative to pix1
3542 * \param[in] tab8 [optional] sum tab for ON pixels in byte; can be NULL
3543 * \param[out] pdelx [optional] best x shift of pix2 relative to pix1
3544 * \param[out] pdely [optional] best y shift of pix2 relative to pix1
3545 * \param[out] pscore [optional] maximum score found; can be NULL
3546 * \param[in] debugflag <= 0 to skip; positive to generate output.
3547 * The integer is used to label the debug image.
3548 * \return 0 if OK, 1 on error
3549 *
3550 * <pre>
3551 * Notes:
3552 * (1) This maximizes the correlation score between two 1 bpp images,
3553 * by starting with an estimate of the alignment
3554 * (%etransx, %etransy) and computing the correlation around this.
3555 * It optionally returns the shift (%delx, %dely) that maximizes
3556 * the correlation score when pix2 is shifted by this amount
3557 * relative to pix1.
3558 * (2) Get the centroids of pix1 and pix2, using pixCentroid(),
3559 * to compute (%etransx, %etransy). Get the areas using
3560 * pixCountPixels().
3561 * (3) The centroid of pix2 is shifted with respect to the centroid
3562 * of pix1 by all values between -maxshiftx and maxshiftx,
3563 * and likewise for the y shifts. Therefore, the number of
3564 * correlations computed is:
3565 * (2 * maxshiftx + 1) * (2 * maxshifty + 1)
3566 * Consequently, if pix1 and pix2 are large, you should do this
3567 * in a coarse-to-fine sequence. See the use of this function
3568 * in pixCompareWithTranslation().
3569 * </pre>
3570 */
3571 l_ok
3572 pixBestCorrelation(PIX *pix1,
3573 PIX *pix2,
3574 l_int32 area1,
3575 l_int32 area2,
3576 l_int32 etransx,
3577 l_int32 etransy,
3578 l_int32 maxshift,
3579 l_int32 *tab8,
3580 l_int32 *pdelx,
3581 l_int32 *pdely,
3582 l_float32 *pscore,
3583 l_int32 debugflag)
3584 {
3585 l_int32 shiftx, shifty, delx, dely;
3586 l_int32 *tab;
3587 l_float32 maxscore, score;
3588 FPIX *fpix = NULL;
3589 PIX *pix3, *pix4;
3590
3591 if (pdelx) *pdelx = 0;
3592 if (pdely) *pdely = 0;
3593 if (pscore) *pscore = 0.0;
3594 if (!pix1 || pixGetDepth(pix1) != 1)
3595 return ERROR_INT("pix1 not defined or not 1 bpp", __func__, 1);
3596 if (!pix2 || pixGetDepth(pix2) != 1)
3597 return ERROR_INT("pix2 not defined or not 1 bpp", __func__, 1);
3598 if (!area1 || !area2)
3599 return ERROR_INT("areas must be > 0", __func__, 1);
3600
3601 if (debugflag > 0)
3602 fpix = fpixCreate(2 * maxshift + 1, 2 * maxshift + 1);
3603
3604 if (!tab8)
3605 tab = makePixelSumTab8();
3606 else
3607 tab = tab8;
3608
3609 /* Search over a set of {shiftx, shifty} for the max */
3610 maxscore = 0;
3611 delx = etransx;
3612 dely = etransy;
3613 for (shifty = -maxshift; shifty <= maxshift; shifty++) {
3614 for (shiftx = -maxshift; shiftx <= maxshift; shiftx++) {
3615 pixCorrelationScoreShifted(pix1, pix2, area1, area2,
3616 etransx + shiftx,
3617 etransy + shifty, tab, &score);
3618 if (debugflag > 0) {
3619 fpixSetPixel(fpix, maxshift + shiftx, maxshift + shifty,
3620 1000.0 * score);
3621 /* lept_stderr("(sx, sy) = (%d, %d): score = %6.4f\n",
3622 shiftx, shifty, score); */
3623 }
3624 if (score > maxscore) {
3625 maxscore = score;
3626 delx = etransx + shiftx;
3627 dely = etransy + shifty;
3628 }
3629 }
3630 }
3631
3632 if (debugflag > 0) {
3633 char buf[128];
3634 lept_mkdir("lept/comp");
3635 pix3 = fpixDisplayMaxDynamicRange(fpix);
3636 pix4 = pixExpandReplicate(pix3, 20);
3637 snprintf(buf, sizeof(buf), "/tmp/lept/comp/correl_%d.png",
3638 debugflag);
3639 pixWrite(buf, pix4, IFF_PNG);
3640 pixDestroy(&pix3);
3641 pixDestroy(&pix4);
3642 fpixDestroy(&fpix);
3643 }
3644
3645 if (pdelx) *pdelx = delx;
3646 if (pdely) *pdely = dely;
3647 if (pscore) *pscore = maxscore;
3648 if (!tab8) LEPT_FREE(tab);
3649 return 0;
3650 }