Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/leptonica/src/correlscore.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /*====================================================================* | |
| 2 - Copyright (C) 2001 Leptonica. All rights reserved. | |
| 3 - | |
| 4 - Redistribution and use in source and binary forms, with or without | |
| 5 - modification, are permitted provided that the following conditions | |
| 6 - are met: | |
| 7 - 1. Redistributions of source code must retain the above copyright | |
| 8 - notice, this list of conditions and the following disclaimer. | |
| 9 - 2. Redistributions in binary form must reproduce the above | |
| 10 - copyright notice, this list of conditions and the following | |
| 11 - disclaimer in the documentation and/or other materials | |
| 12 - provided with the distribution. | |
| 13 - | |
| 14 - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
| 15 - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
| 16 - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
| 17 - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY | |
| 18 - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | |
| 19 - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | |
| 20 - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | |
| 21 - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | |
| 22 - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | |
| 23 - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | |
| 24 - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 25 *====================================================================*/ | |
| 26 | |
| 27 /* | |
| 28 * correlscore.c | |
| 29 * | |
| 30 * These are functions for computing correlation between | |
| 31 * pairs of 1 bpp images. | |
| 32 * | |
| 33 * Optimized 2 pix correlators (for jbig2 clustering) | |
| 34 * l_int32 pixCorrelationScore() | |
| 35 * l_int32 pixCorrelationScoreThresholded() | |
| 36 * | |
| 37 * Simple 2 pix correlators | |
| 38 * l_int32 pixCorrelationScoreSimple() | |
| 39 * l_int32 pixCorrelationScoreShifted() | |
| 40 * | |
| 41 * There are other, more application-oriented functions, that | |
| 42 * compute the correlation between two binary images, taking into | |
| 43 * account small translational shifts, between two binary images. | |
| 44 * These are: | |
| 45 * compare.c: pixBestCorrelation() | |
| 46 * Uses coarse-to-fine translations of full image | |
| 47 * recogident.c: pixCorrelationBestShift() | |
| 48 * Uses small shifts between c.c. centroids. | |
| 49 */ | |
| 50 | |
| 51 #ifdef HAVE_CONFIG_H | |
| 52 #include <config_auto.h> | |
| 53 #endif /* HAVE_CONFIG_H */ | |
| 54 | |
| 55 #include <math.h> | |
| 56 #include "allheaders.h" | |
| 57 | |
| 58 /* -------------------------------------------------------------------- * | |
| 59 * Optimized 2 pix correlators (for jbig2 clustering) * | |
| 60 * -------------------------------------------------------------------- */ | |
| 61 /*! | |
| 62 * \brief pixCorrelationScore() | |
| 63 * | |
| 64 * \param[in] pix1 test pix, 1 bpp | |
| 65 * \param[in] pix2 exemplar pix, 1 bpp | |
| 66 * \param[in] area1 number of on pixels in pix1 | |
| 67 * \param[in] area2 number of on pixels in pix2 | |
| 68 * \param[in] delx x comp of centroid difference | |
| 69 * \param[in] dely y comp of centroid difference | |
| 70 * \param[in] maxdiffw max width difference of pix1 and pix2 | |
| 71 * \param[in] maxdiffh max height difference of pix1 and pix2 | |
| 72 * \param[in] tab sum tab for byte | |
| 73 * \param[out] pscore correlation score | |
| 74 * \return 0 if OK, 1 on error | |
| 75 * | |
| 76 * <pre> | |
| 77 * Notes: | |
| 78 * We check first that the two pix are roughly the same size. | |
| 79 * For jbclass (jbig2) applications at roughly 300 ppi, maxdiffw and | |
| 80 * maxdiffh should be at least 2. | |
| 81 * | |
| 82 * Only if they meet that criterion do we compare the bitmaps. | |
| 83 * The centroid difference is used to align the two images to the | |
| 84 * nearest integer for the correlation. | |
| 85 * | |
| 86 * The correlation score is the ratio of the square of the number of | |
| 87 * pixels in the AND of the two bitmaps to the product of the number | |
| 88 * of ON pixels in each. Denote the number of ON pixels in pix1 | |
| 89 * by |1|, the number in pix2 by |2|, and the number in the AND | |
| 90 * of pix1 and pix2 by |1 & 2|. The correlation score is then | |
| 91 * (|1 & 2|)**2 / (|1|*|2|). | |
| 92 * | |
| 93 * This score is compared with an input threshold, which can | |
| 94 * be modified depending on the weight of the template. | |
| 95 * The modified threshold is | |
| 96 * thresh + (1.0 - thresh) * weight * R | |
| 97 * where | |
| 98 * weight is a fixed input factor between 0.0 and 1.0 | |
| 99 * R = |2| / area(2) | |
| 100 * and area(2) is the total number of pixels in 2 (i.e., width x height). | |
| 101 * | |
| 102 * To understand why a weight factor is useful, consider what happens | |
| 103 * with thick, sans-serif characters that look similar and have a value | |
| 104 * of R near 1. Different characters can have a high correlation value, | |
| 105 * and the classifier will make incorrect substitutions. The weight | |
| 106 * factor raises the threshold for these characters. | |
| 107 * | |
| 108 * Yet another approach to reduce such substitutions is to run the classifier | |
| 109 * in a non-greedy way, matching to the template with the highest | |
| 110 * score, not the first template with a score satisfying the matching | |
| 111 * constraint. However, this is not particularly effective. | |
| 112 * | |
| 113 * The implementation here gives the same result as in | |
| 114 * pixCorrelationScoreSimple(), where a temporary Pix is made to hold | |
| 115 * the AND and implementation uses rasterop: | |
| 116 * pixt = pixCreateTemplate(pix1); | |
| 117 * pixRasterop(pixt, idelx, idely, wt, ht, PIX_SRC, pix2, 0, 0); | |
| 118 * pixRasterop(pixt, 0, 0, wi, hi, PIX_SRC & PIX_DST, pix1, 0, 0); | |
| 119 * pixCountPixels(pixt, &count, tab); | |
| 120 * pixDestroy(&pixt); | |
| 121 * However, here it is done in a streaming fashion, counting as it goes, | |
| 122 * and touching memory exactly once, giving a 3-4x speedup over the | |
| 123 * simple implementation. This very fast correlation matcher was | |
| 124 * contributed by William Rucklidge. | |
| 125 * </pre> | |
| 126 */ | |
| 127 l_ok | |
| 128 pixCorrelationScore(PIX *pix1, | |
| 129 PIX *pix2, | |
| 130 l_int32 area1, | |
| 131 l_int32 area2, | |
| 132 l_float32 delx, /* x(1) - x(3) */ | |
| 133 l_float32 dely, /* y(1) - y(3) */ | |
| 134 l_int32 maxdiffw, | |
| 135 l_int32 maxdiffh, | |
| 136 l_int32 *tab, | |
| 137 l_float32 *pscore) | |
| 138 { | |
| 139 l_int32 wi, hi, wt, ht, delw, delh, idelx, idely, count; | |
| 140 l_int32 wpl1, wpl2, lorow, hirow, locol, hicol; | |
| 141 l_int32 x, y, pix1lskip, pix2lskip, rowwords1, rowwords2; | |
| 142 l_uint32 word1, word2, andw; | |
| 143 l_uint32 *row1, *row2; | |
| 144 | |
| 145 if (!pscore) | |
| 146 return ERROR_INT("&score not defined", __func__, 1); | |
| 147 *pscore = 0.0; | |
| 148 if (!pix1 || pixGetDepth(pix1) != 1) | |
| 149 return ERROR_INT("pix1 undefined or not 1 bpp", __func__, 1); | |
| 150 if (!pix2 || pixGetDepth(pix2) != 1) | |
| 151 return ERROR_INT("pix2 undefined or not 1 bpp", __func__, 1); | |
| 152 if (!tab) | |
| 153 return ERROR_INT("tab not defined", __func__, 1); | |
| 154 if (area1 <= 0 || area2 <= 0) | |
| 155 return ERROR_INT("areas must be > 0", __func__, 1); | |
| 156 | |
| 157 /* Eliminate based on size difference */ | |
| 158 pixGetDimensions(pix1, &wi, &hi, NULL); | |
| 159 pixGetDimensions(pix2, &wt, &ht, NULL); | |
| 160 delw = L_ABS(wi - wt); | |
| 161 if (delw > maxdiffw) | |
| 162 return 0; | |
| 163 delh = L_ABS(hi - ht); | |
| 164 if (delh > maxdiffh) | |
| 165 return 0; | |
| 166 | |
| 167 /* Round difference to nearest integer */ | |
| 168 if (delx >= 0) | |
| 169 idelx = (l_int32)(delx + 0.5); | |
| 170 else | |
| 171 idelx = (l_int32)(delx - 0.5); | |
| 172 if (dely >= 0) | |
| 173 idely = (l_int32)(dely + 0.5); | |
| 174 else | |
| 175 idely = (l_int32)(dely - 0.5); | |
| 176 | |
| 177 count = 0; | |
| 178 wpl1 = pixGetWpl(pix1); | |
| 179 wpl2 = pixGetWpl(pix2); | |
| 180 rowwords2 = wpl2; | |
| 181 | |
| 182 /* What rows of pix1 need to be considered? Only those underlying the | |
| 183 * shifted pix2. */ | |
| 184 lorow = L_MAX(idely, 0); | |
| 185 hirow = L_MIN(ht + idely, hi); | |
| 186 | |
| 187 /* Get the pointer to the first row of each image that will be | |
| 188 * considered. */ | |
| 189 row1 = pixGetData(pix1) + wpl1 * lorow; | |
| 190 row2 = pixGetData(pix2) + wpl2 * (lorow - idely); | |
| 191 | |
| 192 /* Similarly, figure out which columns of pix1 will be considered. */ | |
| 193 locol = L_MAX(idelx, 0); | |
| 194 hicol = L_MIN(wt + idelx, wi); | |
| 195 | |
| 196 if (idelx >= 32) { | |
| 197 /* pix2 is shifted far enough to the right that pix1's first | |
| 198 * word(s) won't contribute to the count. Increment its | |
| 199 * pointer to point to the first word that will contribute, | |
| 200 * and adjust other values accordingly. */ | |
| 201 pix1lskip = idelx >> 5; /* # of words to skip on left */ | |
| 202 row1 += pix1lskip; | |
| 203 locol -= pix1lskip << 5; | |
| 204 hicol -= pix1lskip << 5; | |
| 205 idelx &= 31; | |
| 206 } else if (idelx <= -32) { | |
| 207 /* pix2 is shifted far enough to the left that its first word(s) | |
| 208 * won't contribute to the count. Increment its pointer | |
| 209 * to point to the first word that will contribute, | |
| 210 * and adjust other values accordingly. */ | |
| 211 pix2lskip = -((idelx + 31) >> 5); /* # of words to skip on left */ | |
| 212 row2 += pix2lskip; | |
| 213 rowwords2 -= pix2lskip; | |
| 214 idelx += pix2lskip << 5; | |
| 215 } | |
| 216 | |
| 217 if ((locol >= hicol) || (lorow >= hirow)) { /* there is no overlap */ | |
| 218 count = 0; | |
| 219 } else { | |
| 220 /* How many words of each row of pix1 need to be considered? */ | |
| 221 rowwords1 = (hicol + 31) >> 5; | |
| 222 | |
| 223 if (idelx == 0) { | |
| 224 /* There's no lateral offset; simple case. */ | |
| 225 for (y = lorow; y < hirow; y++, row1 += wpl1, row2 += wpl2) { | |
| 226 for (x = 0; x < rowwords1; x++) { | |
| 227 andw = row1[x] & row2[x]; | |
| 228 count += tab[andw & 0xff] + | |
| 229 tab[(andw >> 8) & 0xff] + | |
| 230 tab[(andw >> 16) & 0xff] + | |
| 231 tab[andw >> 24]; | |
| 232 } | |
| 233 } | |
| 234 } else if (idelx > 0) { | |
| 235 /* pix2 is shifted to the right. word 0 of pix1 is touched by | |
| 236 * word 0 of pix2; word 1 of pix1 is touched by word 0 and word | |
| 237 * 1 of pix2, and so on up to the last word of pix1 (word N), | |
| 238 * which is touched by words N-1 and N of pix1... if there is a | |
| 239 * word N. Handle the two cases (pix2 has N-1 words and pix2 | |
| 240 * has at least N words) separately. | |
| 241 * | |
| 242 * Note: we know that pix2 has at least N-1 words (i.e., | |
| 243 * rowwords2 >= rowwords1 - 1) by the following logic. | |
| 244 * We can pretend that idelx <= 31 because the >= 32 logic | |
| 245 * above adjusted everything appropriately. Then | |
| 246 * hicol <= wt + idelx <= wt + 31, so | |
| 247 * hicol + 31 <= wt + 62 | |
| 248 * rowwords1 = (hicol + 31) >> 5 <= (wt + 62) >> 5 | |
| 249 * rowwords2 == (wt + 31) >> 5, so | |
| 250 * rowwords1 <= rowwords2 + 1 */ | |
| 251 if (rowwords2 < rowwords1) { | |
| 252 for (y = lorow; y < hirow; y++, row1 += wpl1, row2 += wpl2) { | |
| 253 /* Do the first iteration so the loop can be | |
| 254 * branch-free. */ | |
| 255 word1 = row1[0]; | |
| 256 word2 = row2[0] >> idelx; | |
| 257 andw = word1 & word2; | |
| 258 count += tab[andw & 0xff] + | |
| 259 tab[(andw >> 8) & 0xff] + | |
| 260 tab[(andw >> 16) & 0xff] + | |
| 261 tab[andw >> 24]; | |
| 262 | |
| 263 for (x = 1; x < rowwords2; x++) { | |
| 264 word1 = row1[x]; | |
| 265 word2 = (row2[x] >> idelx) | | |
| 266 (row2[x - 1] << (32 - idelx)); | |
| 267 andw = word1 & word2; | |
| 268 count += tab[andw & 0xff] + | |
| 269 tab[(andw >> 8) & 0xff] + | |
| 270 tab[(andw >> 16) & 0xff] + | |
| 271 tab[andw >> 24]; | |
| 272 } | |
| 273 | |
| 274 /* Now the last iteration - we know that this is safe | |
| 275 * (i.e. rowwords1 >= 2) because rowwords1 > rowwords2 | |
| 276 * > 0 (if it was 0, we'd have taken the "count = 0" | |
| 277 * fast-path out of here). */ | |
| 278 word1 = row1[x]; | |
| 279 word2 = row2[x - 1] << (32 - idelx); | |
| 280 andw = word1 & word2; | |
| 281 count += tab[andw & 0xff] + | |
| 282 tab[(andw >> 8) & 0xff] + | |
| 283 tab[(andw >> 16) & 0xff] + | |
| 284 tab[andw >> 24]; | |
| 285 } | |
| 286 } else { | |
| 287 for (y = lorow; y < hirow; y++, row1 += wpl1, row2 += wpl2) { | |
| 288 /* Do the first iteration so the loop can be | |
| 289 * branch-free. This section is the same as above | |
| 290 * except for the different limit on the loop, since | |
| 291 * the last iteration is the same as all the other | |
| 292 * iterations (beyond the first). */ | |
| 293 word1 = row1[0]; | |
| 294 word2 = row2[0] >> idelx; | |
| 295 andw = word1 & word2; | |
| 296 count += tab[andw & 0xff] + | |
| 297 tab[(andw >> 8) & 0xff] + | |
| 298 tab[(andw >> 16) & 0xff] + | |
| 299 tab[andw >> 24]; | |
| 300 | |
| 301 for (x = 1; x < rowwords1; x++) { | |
| 302 word1 = row1[x]; | |
| 303 word2 = (row2[x] >> idelx) | | |
| 304 (row2[x - 1] << (32 - idelx)); | |
| 305 andw = word1 & word2; | |
| 306 count += tab[andw & 0xff] + | |
| 307 tab[(andw >> 8) & 0xff] + | |
| 308 tab[(andw >> 16) & 0xff] + | |
| 309 tab[andw >> 24]; | |
| 310 } | |
| 311 } | |
| 312 } | |
| 313 } else { | |
| 314 /* pix2 is shifted to the left. word 0 of pix1 is touched by | |
| 315 * word 0 and word 1 of pix2, and so on up to the last word of | |
| 316 * pix1 (word N), which is touched by words N and N+1 of | |
| 317 * pix2... if there is a word N+1. Handle the two cases (pix2 | |
| 318 * has N words and pix2 has at least N+1 words) separately. */ | |
| 319 if (rowwords1 < rowwords2) { | |
| 320 /* pix2 has at least N+1 words, so every iteration through | |
| 321 * the loop can be the same. */ | |
| 322 for (y = lorow; y < hirow; y++, row1 += wpl1, row2 += wpl2) { | |
| 323 for (x = 0; x < rowwords1; x++) { | |
| 324 word1 = row1[x]; | |
| 325 word2 = row2[x] << -idelx; | |
| 326 word2 |= row2[x + 1] >> (32 + idelx); | |
| 327 andw = word1 & word2; | |
| 328 count += tab[andw & 0xff] + | |
| 329 tab[(andw >> 8) & 0xff] + | |
| 330 tab[(andw >> 16) & 0xff] + | |
| 331 tab[andw >> 24]; | |
| 332 } | |
| 333 } | |
| 334 } else { | |
| 335 /* pix2 has only N words, so the last iteration is broken | |
| 336 * out. */ | |
| 337 for (y = lorow; y < hirow; y++, row1 += wpl1, row2 += wpl2) { | |
| 338 for (x = 0; x < rowwords1 - 1; x++) { | |
| 339 word1 = row1[x]; | |
| 340 word2 = row2[x] << -idelx; | |
| 341 word2 |= row2[x + 1] >> (32 + idelx); | |
| 342 andw = word1 & word2; | |
| 343 count += tab[andw & 0xff] + | |
| 344 tab[(andw >> 8) & 0xff] + | |
| 345 tab[(andw >> 16) & 0xff] + | |
| 346 tab[andw >> 24]; | |
| 347 } | |
| 348 | |
| 349 word1 = row1[x]; | |
| 350 word2 = row2[x] << -idelx; | |
| 351 andw = word1 & word2; | |
| 352 count += tab[andw & 0xff] + | |
| 353 tab[(andw >> 8) & 0xff] + | |
| 354 tab[(andw >> 16) & 0xff] + | |
| 355 tab[andw >> 24]; | |
| 356 } | |
| 357 } | |
| 358 } | |
| 359 } | |
| 360 | |
| 361 *pscore = (l_float32)count * (l_float32)count / | |
| 362 ((l_float32)area1 * (l_float32)area2); | |
| 363 /* lept_stderr("score = %5.3f, count = %d, area1 = %d, area2 = %d\n", | |
| 364 *pscore, count, area1, area2); */ | |
| 365 return 0; | |
| 366 } | |
| 367 | |
| 368 | |
| 369 /*! | |
| 370 * \brief pixCorrelationScoreThresholded() | |
| 371 * | |
| 372 * \param[in] pix1 test pix, 1 bpp | |
| 373 * \param[in] pix2 exemplar pix, 1 bpp | |
| 374 * \param[in] area1 number of on pixels in pix1 | |
| 375 * \param[in] area2 number of on pixels in pix2 | |
| 376 * \param[in] delx x comp of centroid difference | |
| 377 * \param[in] dely y comp of centroid difference | |
| 378 * \param[in] maxdiffw max width difference of pix1 and pix2 | |
| 379 * \param[in] maxdiffh max height difference of pix1 and pix2 | |
| 380 * \param[in] tab sum tab for byte | |
| 381 * \param[in] downcount count of 1 pixels below each row of pix1 | |
| 382 * \param[in] score_threshold | |
| 383 * \return whether the correlation score is >= score_threshold | |
| 384 * | |
| 385 * | |
| 386 * <pre> | |
| 387 * Notes: | |
| 388 * We check first that the two pix are roughly the same size. | |
| 389 * Only if they meet that criterion do we compare the bitmaps. | |
| 390 * The centroid difference is used to align the two images to the | |
| 391 * nearest integer for the correlation. | |
| 392 * | |
| 393 * The correlation score is the ratio of the square of the number of | |
| 394 * pixels in the AND of the two bitmaps to the product of the number | |
| 395 * of ON pixels in each. Denote the number of ON pixels in pix1 | |
| 396 * by |1|, the number in pix2 by |2|, and the number in the AND | |
| 397 * of pix1 and pix2 by |1 & 2|. The correlation score is then | |
| 398 * (|1 & 2|)**2 / (|1|*|2|). | |
| 399 * | |
| 400 * This score is compared with an input threshold, which can | |
| 401 * be modified depending on the weight of the template. | |
| 402 * The modified threshold is | |
| 403 * thresh + (1.0 - thresh) * weight * R | |
| 404 * where | |
| 405 * weight is a fixed input factor between 0.0 and 1.0 | |
| 406 * R = |2| / area(2) | |
| 407 * and area(2) is the total number of pixels in 2 (i.e., width x height). | |
| 408 * | |
| 409 * To understand why a weight factor is useful, consider what happens | |
| 410 * with thick, sans-serif characters that look similar and have a value | |
| 411 * of R near 1. Different characters can have a high correlation value, | |
| 412 * and the classifier will make incorrect substitutions. The weight | |
| 413 * factor raises the threshold for these characters. | |
| 414 * | |
| 415 * Yet another approach to reduce such substitutions is to run the classifier | |
| 416 * in a non-greedy way, matching to the template with the highest | |
| 417 * score, not the first template with a score satisfying the matching | |
| 418 * constraint. However, this is not particularly effective. | |
| 419 * | |
| 420 * This very fast correlation matcher was contributed by William Rucklidge. | |
| 421 * </pre> | |
| 422 */ | |
| 423 l_int32 | |
| 424 pixCorrelationScoreThresholded(PIX *pix1, | |
| 425 PIX *pix2, | |
| 426 l_int32 area1, | |
| 427 l_int32 area2, | |
| 428 l_float32 delx, /* x(1) - x(3) */ | |
| 429 l_float32 dely, /* y(1) - y(3) */ | |
| 430 l_int32 maxdiffw, | |
| 431 l_int32 maxdiffh, | |
| 432 l_int32 *tab, | |
| 433 l_int32 *downcount, | |
| 434 l_float32 score_threshold) | |
| 435 { | |
| 436 l_int32 wi, hi, wt, ht, delw, delh, idelx, idely, count; | |
| 437 l_int32 wpl1, wpl2, lorow, hirow, locol, hicol, untouchable; | |
| 438 l_int32 x, y, pix1lskip, pix2lskip, rowwords1, rowwords2; | |
| 439 l_uint32 word1, word2, andw; | |
| 440 l_uint32 *row1, *row2; | |
| 441 l_float32 score; | |
| 442 l_int32 threshold; | |
| 443 | |
| 444 if (!pix1 || pixGetDepth(pix1) != 1) | |
| 445 return ERROR_INT("pix1 undefined or not 1 bpp", __func__, 0); | |
| 446 if (!pix2 || pixGetDepth(pix2) != 1) | |
| 447 return ERROR_INT("pix2 undefined or not 1 bpp", __func__, 0); | |
| 448 if (!tab) | |
| 449 return ERROR_INT("tab not defined", __func__, 0); | |
| 450 if (area1 <= 0 || area2 <= 0) | |
| 451 return ERROR_INT("areas must be > 0", __func__, 0); | |
| 452 | |
| 453 /* Eliminate based on size difference */ | |
| 454 pixGetDimensions(pix1, &wi, &hi, NULL); | |
| 455 pixGetDimensions(pix2, &wt, &ht, NULL); | |
| 456 delw = L_ABS(wi - wt); | |
| 457 if (delw > maxdiffw) | |
| 458 return FALSE; | |
| 459 delh = L_ABS(hi - ht); | |
| 460 if (delh > maxdiffh) | |
| 461 return FALSE; | |
| 462 | |
| 463 /* Round difference to nearest integer */ | |
| 464 if (delx >= 0) | |
| 465 idelx = (l_int32)(delx + 0.5); | |
| 466 else | |
| 467 idelx = (l_int32)(delx - 0.5); | |
| 468 if (dely >= 0) | |
| 469 idely = (l_int32)(dely + 0.5); | |
| 470 else | |
| 471 idely = (l_int32)(dely - 0.5); | |
| 472 | |
| 473 /* Compute the correlation count that is needed so that | |
| 474 * count * count / (area1 * area2) >= score_threshold */ | |
| 475 threshold = (l_int32)ceil(sqrt((l_float64)score_threshold * area1 * area2)); | |
| 476 | |
| 477 count = 0; | |
| 478 wpl1 = pixGetWpl(pix1); | |
| 479 wpl2 = pixGetWpl(pix2); | |
| 480 rowwords2 = wpl2; | |
| 481 | |
| 482 /* What rows of pix1 need to be considered? Only those underlying the | |
| 483 * shifted pix2. */ | |
| 484 lorow = L_MAX(idely, 0); | |
| 485 hirow = L_MIN(ht + idely, hi); | |
| 486 | |
| 487 /* Get the pointer to the first row of each image that will be | |
| 488 * considered. */ | |
| 489 row1 = pixGetData(pix1) + wpl1 * lorow; | |
| 490 row2 = pixGetData(pix2) + wpl2 * (lorow - idely); | |
| 491 if (hirow <= hi) { | |
| 492 /* Some rows of pix1 will never contribute to count */ | |
| 493 untouchable = downcount[hirow - 1]; | |
| 494 } | |
| 495 | |
| 496 /* Similarly, figure out which columns of pix1 will be considered. */ | |
| 497 locol = L_MAX(idelx, 0); | |
| 498 hicol = L_MIN(wt + idelx, wi); | |
| 499 | |
| 500 if (idelx >= 32) { | |
| 501 /* pix2 is shifted far enough to the right that pix1's first | |
| 502 * word(s) won't contribute to the count. Increment its | |
| 503 * pointer to point to the first word that will contribute, | |
| 504 * and adjust other values accordingly. */ | |
| 505 pix1lskip = idelx >> 5; /* # of words to skip on left */ | |
| 506 row1 += pix1lskip; | |
| 507 locol -= pix1lskip << 5; | |
| 508 hicol -= pix1lskip << 5; | |
| 509 idelx &= 31; | |
| 510 } else if (idelx <= -32) { | |
| 511 /* pix2 is shifted far enough to the left that its first word(s) | |
| 512 * won't contribute to the count. Increment its pointer | |
| 513 * to point to the first word that will contribute, | |
| 514 * and adjust other values accordingly. */ | |
| 515 pix2lskip = -((idelx + 31) >> 5); /* # of words to skip on left */ | |
| 516 row2 += pix2lskip; | |
| 517 rowwords2 -= pix2lskip; | |
| 518 idelx += pix2lskip << 5; | |
| 519 } | |
| 520 | |
| 521 if ((locol >= hicol) || (lorow >= hirow)) { /* there is no overlap */ | |
| 522 count = 0; | |
| 523 } else { | |
| 524 /* How many words of each row of pix1 need to be considered? */ | |
| 525 rowwords1 = (hicol + 31) >> 5; | |
| 526 | |
| 527 if (idelx == 0) { | |
| 528 /* There's no lateral offset; simple case. */ | |
| 529 for (y = lorow; y < hirow; y++, row1 += wpl1, row2 += wpl2) { | |
| 530 for (x = 0; x < rowwords1; x++) { | |
| 531 andw = row1[x] & row2[x]; | |
| 532 count += tab[andw & 0xff] + | |
| 533 tab[(andw >> 8) & 0xff] + | |
| 534 tab[(andw >> 16) & 0xff] + | |
| 535 tab[andw >> 24]; | |
| 536 } | |
| 537 | |
| 538 /* If the count is over the threshold, no need to | |
| 539 * calculate any further. Likewise, return early if the | |
| 540 * count plus the maximum count attainable from further | |
| 541 * rows is below the threshold. */ | |
| 542 if (count >= threshold) return TRUE; | |
| 543 if (count + downcount[y] - untouchable < threshold) { | |
| 544 return FALSE; | |
| 545 } | |
| 546 } | |
| 547 } else if (idelx > 0) { | |
| 548 /* pix2 is shifted to the right. word 0 of pix1 is touched by | |
| 549 * word 0 of pix2; word 1 of pix1 is touched by word 0 and word | |
| 550 * 1 of pix2, and so on up to the last word of pix1 (word N), | |
| 551 * which is touched by words N-1 and N of pix1... if there is a | |
| 552 * word N. Handle the two cases (pix2 has N-1 words and pix2 | |
| 553 * has at least N words) separately. | |
| 554 * | |
| 555 * Note: we know that pix2 has at least N-1 words (i.e., | |
| 556 * rowwords2 >= rowwords1 - 1) by the following logic. | |
| 557 * We can pretend that idelx <= 31 because the >= 32 logic | |
| 558 * above adjusted everything appropriately. Then | |
| 559 * hicol <= wt + idelx <= wt + 31, so | |
| 560 * hicol + 31 <= wt + 62 | |
| 561 * rowwords1 = (hicol + 31) >> 5 <= (wt + 62) >> 5 | |
| 562 * rowwords2 == (wt + 31) >> 5, so | |
| 563 * rowwords1 <= rowwords2 + 1 */ | |
| 564 if (rowwords2 < rowwords1) { | |
| 565 for (y = lorow; y < hirow; y++, row1 += wpl1, row2 += wpl2) { | |
| 566 /* Do the first iteration so the loop can be | |
| 567 * branch-free. */ | |
| 568 word1 = row1[0]; | |
| 569 word2 = row2[0] >> idelx; | |
| 570 andw = word1 & word2; | |
| 571 count += tab[andw & 0xff] + | |
| 572 tab[(andw >> 8) & 0xff] + | |
| 573 tab[(andw >> 16) & 0xff] + | |
| 574 tab[andw >> 24]; | |
| 575 | |
| 576 for (x = 1; x < rowwords2; x++) { | |
| 577 word1 = row1[x]; | |
| 578 word2 = (row2[x] >> idelx) | | |
| 579 (row2[x - 1] << (32 - idelx)); | |
| 580 andw = word1 & word2; | |
| 581 count += tab[andw & 0xff] + | |
| 582 tab[(andw >> 8) & 0xff] + | |
| 583 tab[(andw >> 16) & 0xff] + | |
| 584 tab[andw >> 24]; | |
| 585 } | |
| 586 | |
| 587 /* Now the last iteration - we know that this is safe | |
| 588 * (i.e. rowwords1 >= 2) because rowwords1 > rowwords2 | |
| 589 * > 0 (if it was 0, we'd have taken the "count = 0" | |
| 590 * fast-path out of here). */ | |
| 591 word1 = row1[x]; | |
| 592 word2 = row2[x - 1] << (32 - idelx); | |
| 593 andw = word1 & word2; | |
| 594 count += tab[andw & 0xff] + | |
| 595 tab[(andw >> 8) & 0xff] + | |
| 596 tab[(andw >> 16) & 0xff] + | |
| 597 tab[andw >> 24]; | |
| 598 | |
| 599 if (count >= threshold) return TRUE; | |
| 600 if (count + downcount[y] - untouchable < threshold) { | |
| 601 return FALSE; | |
| 602 } | |
| 603 } | |
| 604 } else { | |
| 605 for (y = lorow; y < hirow; y++, row1 += wpl1, row2 += wpl2) { | |
| 606 /* Do the first iteration so the loop can be | |
| 607 * branch-free. This section is the same as above | |
| 608 * except for the different limit on the loop, since | |
| 609 * the last iteration is the same as all the other | |
| 610 * iterations (beyond the first). */ | |
| 611 word1 = row1[0]; | |
| 612 word2 = row2[0] >> idelx; | |
| 613 andw = word1 & word2; | |
| 614 count += tab[andw & 0xff] + | |
| 615 tab[(andw >> 8) & 0xff] + | |
| 616 tab[(andw >> 16) & 0xff] + | |
| 617 tab[andw >> 24]; | |
| 618 | |
| 619 for (x = 1; x < rowwords1; x++) { | |
| 620 word1 = row1[x]; | |
| 621 word2 = (row2[x] >> idelx) | | |
| 622 (row2[x - 1] << (32 - idelx)); | |
| 623 andw = word1 & word2; | |
| 624 count += tab[andw & 0xff] + | |
| 625 tab[(andw >> 8) & 0xff] + | |
| 626 tab[(andw >> 16) & 0xff] + | |
| 627 tab[andw >> 24]; | |
| 628 } | |
| 629 | |
| 630 if (count >= threshold) return TRUE; | |
| 631 if (count + downcount[y] - untouchable < threshold) { | |
| 632 return FALSE; | |
| 633 } | |
| 634 } | |
| 635 } | |
| 636 } else { | |
| 637 /* pix2 is shifted to the left. word 0 of pix1 is touched by | |
| 638 * word 0 and word 1 of pix2, and so on up to the last word of | |
| 639 * pix1 (word N), which is touched by words N and N+1 of | |
| 640 * pix2... if there is a word N+1. Handle the two cases (pix2 | |
| 641 * has N words and pix2 has at least N+1 words) separately. */ | |
| 642 if (rowwords1 < rowwords2) { | |
| 643 /* pix2 has at least N+1 words, so every iteration through | |
| 644 * the loop can be the same. */ | |
| 645 for (y = lorow; y < hirow; y++, row1 += wpl1, row2 += wpl2) { | |
| 646 for (x = 0; x < rowwords1; x++) { | |
| 647 word1 = row1[x]; | |
| 648 word2 = row2[x] << -idelx; | |
| 649 word2 |= row2[x + 1] >> (32 + idelx); | |
| 650 andw = word1 & word2; | |
| 651 count += tab[andw & 0xff] + | |
| 652 tab[(andw >> 8) & 0xff] + | |
| 653 tab[(andw >> 16) & 0xff] + | |
| 654 tab[andw >> 24]; | |
| 655 } | |
| 656 | |
| 657 if (count >= threshold) return TRUE; | |
| 658 if (count + downcount[y] - untouchable < threshold) { | |
| 659 return FALSE; | |
| 660 } | |
| 661 } | |
| 662 } else { | |
| 663 /* pix2 has only N words, so the last iteration is broken | |
| 664 * out. */ | |
| 665 for (y = lorow; y < hirow; y++, row1 += wpl1, row2 += wpl2) { | |
| 666 for (x = 0; x < rowwords1 - 1; x++) { | |
| 667 word1 = row1[x]; | |
| 668 word2 = row2[x] << -idelx; | |
| 669 word2 |= row2[x + 1] >> (32 + idelx); | |
| 670 andw = word1 & word2; | |
| 671 count += tab[andw & 0xff] + | |
| 672 tab[(andw >> 8) & 0xff] + | |
| 673 tab[(andw >> 16) & 0xff] + | |
| 674 tab[andw >> 24]; | |
| 675 } | |
| 676 | |
| 677 word1 = row1[x]; | |
| 678 word2 = row2[x] << -idelx; | |
| 679 andw = word1 & word2; | |
| 680 count += tab[andw & 0xff] + | |
| 681 tab[(andw >> 8) & 0xff] + | |
| 682 tab[(andw >> 16) & 0xff] + | |
| 683 tab[andw >> 24]; | |
| 684 | |
| 685 if (count >= threshold) return TRUE; | |
| 686 if (count + downcount[y] - untouchable < threshold) { | |
| 687 return FALSE; | |
| 688 } | |
| 689 } | |
| 690 } | |
| 691 } | |
| 692 } | |
| 693 | |
| 694 score = (l_float32)count * (l_float32)count / | |
| 695 ((l_float32)area1 * (l_float32)area2); | |
| 696 if (score >= score_threshold) { | |
| 697 lept_stderr( | |
| 698 "count %d < threshold %d but score %g >= score_threshold %g\n", | |
| 699 count, threshold, score, score_threshold); | |
| 700 } | |
| 701 return FALSE; | |
| 702 } | |
| 703 | |
| 704 | |
| 705 /* -------------------------------------------------------------------- * | |
| 706 * Simple 2 pix correlators (for jbig2 clustering) * | |
| 707 * -------------------------------------------------------------------- */ | |
| 708 /*! | |
| 709 * \brief pixCorrelationScoreSimple() | |
| 710 * | |
| 711 * \param[in] pix1 test pix, 1 bpp | |
| 712 * \param[in] pix2 exemplar pix, 1 bpp | |
| 713 * \param[in] area1 number of on pixels in pix1 | |
| 714 * \param[in] area2 number of on pixels in pix2 | |
| 715 * \param[in] delx x comp of centroid difference | |
| 716 * \param[in] dely y comp of centroid difference | |
| 717 * \param[in] maxdiffw max width difference of pix1 and pix2 | |
| 718 * \param[in] maxdiffh max height difference of pix1 and pix2 | |
| 719 * \param[in] tab sum tab for byte | |
| 720 * \param[out] pscore correlation score, in range [0.0 ... 1.0] | |
| 721 * \return 0 if OK, 1 on error | |
| 722 * | |
| 723 * <pre> | |
| 724 * Notes: | |
| 725 * (1) This calculates exactly the same value as pixCorrelationScore(). | |
| 726 * It is 2-3x slower, but much simpler to understand. | |
| 727 * (2) The returned correlation score is 0.0 if the width or height | |
| 728 * exceed %maxdiffw or %maxdiffh. | |
| 729 * </pre> | |
| 730 */ | |
| 731 l_ok | |
| 732 pixCorrelationScoreSimple(PIX *pix1, | |
| 733 PIX *pix2, | |
| 734 l_int32 area1, | |
| 735 l_int32 area2, | |
| 736 l_float32 delx, /* x(1) - x(3) */ | |
| 737 l_float32 dely, /* y(1) - y(3) */ | |
| 738 l_int32 maxdiffw, | |
| 739 l_int32 maxdiffh, | |
| 740 l_int32 *tab, | |
| 741 l_float32 *pscore) | |
| 742 { | |
| 743 l_int32 wi, hi, wt, ht, delw, delh, idelx, idely, count; | |
| 744 PIX *pixt; | |
| 745 | |
| 746 if (!pscore) | |
| 747 return ERROR_INT("&score not defined", __func__, 1); | |
| 748 *pscore = 0.0; | |
| 749 if (!pix1 || pixGetDepth(pix1) != 1) | |
| 750 return ERROR_INT("pix1 undefined or not 1 bpp", __func__, 1); | |
| 751 if (!pix2 || pixGetDepth(pix2) != 1) | |
| 752 return ERROR_INT("pix2 undefined or not 1 bpp", __func__, 1); | |
| 753 if (!tab) | |
| 754 return ERROR_INT("tab not defined", __func__, 1); | |
| 755 if (!area1 || !area2) | |
| 756 return ERROR_INT("areas must be > 0", __func__, 1); | |
| 757 | |
| 758 /* Eliminate based on size difference */ | |
| 759 pixGetDimensions(pix1, &wi, &hi, NULL); | |
| 760 pixGetDimensions(pix2, &wt, &ht, NULL); | |
| 761 delw = L_ABS(wi - wt); | |
| 762 if (delw > maxdiffw) | |
| 763 return 0; | |
| 764 delh = L_ABS(hi - ht); | |
| 765 if (delh > maxdiffh) | |
| 766 return 0; | |
| 767 | |
| 768 /* Round difference to nearest integer */ | |
| 769 if (delx >= 0) | |
| 770 idelx = (l_int32)(delx + 0.5); | |
| 771 else | |
| 772 idelx = (l_int32)(delx - 0.5); | |
| 773 if (dely >= 0) | |
| 774 idely = (l_int32)(dely + 0.5); | |
| 775 else | |
| 776 idely = (l_int32)(dely - 0.5); | |
| 777 | |
| 778 /* pixt = pixAnd(NULL, pix1, pix2), including shift. | |
| 779 * To insure that pixels are ON only within the | |
| 780 * intersection of pix1 and the shifted pix2: | |
| 781 * (1) Start with pixt cleared and equal in size to pix1. | |
| 782 * (2) Blit the shifted pix2 onto pixt. Then all ON pixels | |
| 783 * are within the intersection of pix1 and the shifted pix2. | |
| 784 * (3) AND pix1 with pixt. */ | |
| 785 pixt = pixCreateTemplate(pix1); | |
| 786 pixRasterop(pixt, idelx, idely, wt, ht, PIX_SRC, pix2, 0, 0); | |
| 787 pixRasterop(pixt, 0, 0, wi, hi, PIX_SRC & PIX_DST, pix1, 0, 0); | |
| 788 pixCountPixels(pixt, &count, tab); | |
| 789 pixDestroy(&pixt); | |
| 790 | |
| 791 *pscore = (l_float32)count * (l_float32)count / | |
| 792 ((l_float32)area1 * (l_float32)area2); | |
| 793 /* lept_stderr("score = %5.3f, count = %d, area1 = %d, area2 = %d\n", | |
| 794 *pscore, count, area1, area2); */ | |
| 795 return 0; | |
| 796 } | |
| 797 | |
| 798 | |
| 799 /*! | |
| 800 * \brief pixCorrelationScoreShifted() | |
| 801 * | |
| 802 * \param[in] pix1 1 bpp | |
| 803 * \param[in] pix2 1 bpp | |
| 804 * \param[in] area1 number of on pixels in pix1 | |
| 805 * \param[in] area2 number of on pixels in pix2 | |
| 806 * \param[in] delx x translation of pix2 relative to pix1 | |
| 807 * \param[in] dely y translation of pix2 relative to pix1 | |
| 808 * \param[in] tab sum tab for byte | |
| 809 * \param[out] pscore correlation score | |
| 810 * \return 0 if OK, 1 on error | |
| 811 * | |
| 812 * <pre> | |
| 813 * Notes: | |
| 814 * (1) This finds the correlation between two 1 bpp images, | |
| 815 * when pix2 is shifted by (delx, dely) with respect | |
| 816 * to each other. | |
| 817 * (2) This is implemented by starting with a copy of pix1 and | |
| 818 * ANDing its pixels with those of a shifted pix2. | |
| 819 * (3) Get the pixel counts for area1 and area2 using piCountPixels(). | |
| 820 * (4) A good estimate for a shift that would maximize the correlation | |
| 821 * is to align the centroids (cx1, cy1; cx2, cy2), giving the | |
| 822 * relative translations etransx and etransy: | |
| 823 * etransx = cx1 - cx2 | |
| 824 * etransy = cy1 - cy2 | |
| 825 * Typically delx is chosen to be near etransx; ditto for dely. | |
| 826 * This function is used in pixBestCorrelation(), where the | |
| 827 * translations delx and dely are varied to find the best alignment. | |
| 828 * (5) We do not check the sizes of pix1 and pix2, because they should | |
| 829 * be comparable. | |
| 830 * </pre> | |
| 831 */ | |
| 832 l_ok | |
| 833 pixCorrelationScoreShifted(PIX *pix1, | |
| 834 PIX *pix2, | |
| 835 l_int32 area1, | |
| 836 l_int32 area2, | |
| 837 l_int32 delx, | |
| 838 l_int32 dely, | |
| 839 l_int32 *tab, | |
| 840 l_float32 *pscore) | |
| 841 { | |
| 842 l_int32 w1, h1, w2, h2, count; | |
| 843 PIX *pixt; | |
| 844 | |
| 845 if (!pscore) | |
| 846 return ERROR_INT("&score not defined", __func__, 1); | |
| 847 *pscore = 0.0; | |
| 848 if (!pix1 || pixGetDepth(pix1) != 1) | |
| 849 return ERROR_INT("pix1 undefined or not 1 bpp", __func__, 1); | |
| 850 if (!pix2 || pixGetDepth(pix2) != 1) | |
| 851 return ERROR_INT("pix2 undefined or not 1 bpp", __func__, 1); | |
| 852 if (!tab) | |
| 853 return ERROR_INT("tab not defined", __func__, 1); | |
| 854 if (!area1 || !area2) | |
| 855 return ERROR_INT("areas must be > 0", __func__, 1); | |
| 856 | |
| 857 pixGetDimensions(pix1, &w1, &h1, NULL); | |
| 858 pixGetDimensions(pix2, &w2, &h2, NULL); | |
| 859 | |
| 860 /* To insure that pixels are ON only within the | |
| 861 * intersection of pix1 and the shifted pix2: | |
| 862 * (1) Start with pixt cleared and equal in size to pix1. | |
| 863 * (2) Blit the shifted pix2 onto pixt. Then all ON pixels | |
| 864 * are within the intersection of pix1 and the shifted pix2. | |
| 865 * (3) AND pix1 with pixt. */ | |
| 866 pixt = pixCreateTemplate(pix1); | |
| 867 pixRasterop(pixt, delx, dely, w2, h2, PIX_SRC, pix2, 0, 0); | |
| 868 pixRasterop(pixt, 0, 0, w1, h1, PIX_SRC & PIX_DST, pix1, 0, 0); | |
| 869 pixCountPixels(pixt, &count, tab); | |
| 870 pixDestroy(&pixt); | |
| 871 | |
| 872 *pscore = (l_float32)count * (l_float32)count / | |
| 873 ((l_float32)area1 * (l_float32)area2); | |
| 874 return 0; | |
| 875 } |
