Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/leptonica/src/numafunc2.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /*====================================================================* | |
| 2 - Copyright (C) 2001 Leptonica. All rights reserved. | |
| 3 - | |
| 4 - Redistribution and use in source and binary forms, with or without | |
| 5 - modification, are permitted provided that the following conditions | |
| 6 - are met: | |
| 7 - 1. Redistributions of source code must retain the above copyright | |
| 8 - notice, this list of conditions and the following disclaimer. | |
| 9 - 2. Redistributions in binary form must reproduce the above | |
| 10 - copyright notice, this list of conditions and the following | |
| 11 - disclaimer in the documentation and/or other materials | |
| 12 - provided with the distribution. | |
| 13 - | |
| 14 - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
| 15 - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
| 16 - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
| 17 - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY | |
| 18 - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | |
| 19 - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | |
| 20 - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | |
| 21 - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | |
| 22 - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | |
| 23 - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | |
| 24 - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 25 *====================================================================*/ | |
| 26 | |
| 27 /*! | |
| 28 * \file numafunc2.c | |
| 29 * <pre> | |
| 30 * | |
| 31 * -------------------------------------- | |
| 32 * This file has these Numa utilities: | |
| 33 * - morphological operations | |
| 34 * - arithmetic transforms | |
| 35 * - windowed statistical operations | |
| 36 * - histogram extraction | |
| 37 * - histogram comparison | |
| 38 * - extrema finding | |
| 39 * - frequency and crossing analysis | |
| 40 * -------------------------------------- | |
| 41 | |
| 42 * Morphological (min/max) operations | |
| 43 * NUMA *numaErode() | |
| 44 * NUMA *numaDilate() | |
| 45 * NUMA *numaOpen() | |
| 46 * NUMA *numaClose() | |
| 47 * | |
| 48 * Other transforms | |
| 49 * NUMA *numaTransform() | |
| 50 * l_int32 numaSimpleStats() | |
| 51 * l_int32 numaWindowedStats() | |
| 52 * NUMA *numaWindowedMean() | |
| 53 * NUMA *numaWindowedMeanSquare() | |
| 54 * l_int32 numaWindowedVariance() | |
| 55 * NUMA *numaWindowedMedian() | |
| 56 * NUMA *numaConvertToInt() | |
| 57 * | |
| 58 * Histogram generation and statistics | |
| 59 * NUMA *numaMakeHistogram() | |
| 60 * NUMA *numaMakeHistogramAuto() | |
| 61 * NUMA *numaMakeHistogramClipped() | |
| 62 * NUMA *numaRebinHistogram() | |
| 63 * NUMA *numaNormalizeHistogram() | |
| 64 * l_int32 numaGetStatsUsingHistogram() | |
| 65 * l_int32 numaGetHistogramStats() | |
| 66 * l_int32 numaGetHistogramStatsOnInterval() | |
| 67 * l_int32 numaMakeRankFromHistogram() | |
| 68 * l_int32 numaHistogramGetRankFromVal() | |
| 69 * l_int32 numaHistogramGetValFromRank() | |
| 70 * l_int32 numaDiscretizeSortedInBins() | |
| 71 * l_int32 numaDiscretizeHistoInBins() | |
| 72 * l_int32 numaGetRankBinValues() | |
| 73 * NUMA *numaGetUniformBinSizes() | |
| 74 * | |
| 75 * Splitting a distribution | |
| 76 * l_int32 numaSplitDistribution() | |
| 77 * | |
| 78 * Comparing histograms | |
| 79 * l_int32 grayHistogramsToEMD() | |
| 80 * l_int32 numaEarthMoverDistance() | |
| 81 * l_int32 grayInterHistogramStats() | |
| 82 * | |
| 83 * Extrema finding | |
| 84 * NUMA *numaFindPeaks() | |
| 85 * NUMA *numaFindExtrema() | |
| 86 * NUMA *numaFindLocForThreshold() | |
| 87 * l_int32 *numaCountReversals() | |
| 88 * | |
| 89 * Threshold crossings and frequency analysis | |
| 90 * l_int32 numaSelectCrossingThreshold() | |
| 91 * NUMA *numaCrossingsByThreshold() | |
| 92 * NUMA *numaCrossingsByPeaks() | |
| 93 * NUMA *numaEvalBestHaarParameters() | |
| 94 * l_int32 numaEvalHaarSum() | |
| 95 * | |
| 96 * Generating numbers in a range under constraints | |
| 97 * NUMA *genConstrainedNumaInRange() | |
| 98 * | |
| 99 * Things to remember when using the Numa: | |
| 100 * | |
| 101 * (1) The numa is a struct, not an array. Always use accessors | |
| 102 * (see numabasic.c), never the fields directly. | |
| 103 * | |
| 104 * (2) The number array holds l_float32 values. It can also | |
| 105 * be used to store l_int32 values. See numabasic.c for | |
| 106 * details on using the accessors. Integers larger than | |
| 107 * about 10M will lose accuracy due on retrieval due to round-off. | |
| 108 * For large integers, use the dna (array of l_float64) instead. | |
| 109 * | |
| 110 * (3) Occasionally, in the comments we denote the i-th element of a | |
| 111 * numa by na[i]. This is conceptual only -- the numa is not an array! | |
| 112 * | |
| 113 * Some general comments on histograms: | |
| 114 * | |
| 115 * (1) Histograms are the generic statistical representation of | |
| 116 * the data about some attribute. Typically they're not | |
| 117 * normalized -- they simply give the number of occurrences | |
| 118 * within each range of values of the attribute. This range | |
| 119 * of values is referred to as a 'bucket'. For example, | |
| 120 * the histogram could specify how many connected components | |
| 121 * are found for each value of their width; in that case, | |
| 122 * the bucket size is 1. | |
| 123 * | |
| 124 * (2) In leptonica, all buckets have the same size. Histograms | |
| 125 * are therefore specified by a numa of occurrences, along | |
| 126 * with two other numbers: the 'value' associated with the | |
| 127 * occupants of the first bucket and the size (i.e., 'width') | |
| 128 * of each bucket. These two numbers then allow us to calculate | |
| 129 * the value associated with the occupants of each bucket. | |
| 130 * These numbers are fields in the numa, initialized to | |
| 131 * a startx value of 0.0 and a binsize of 1.0. Accessors for | |
| 132 * these fields are functions numa*Parameters(). All histograms | |
| 133 * must have these two numbers properly set. | |
| 134 * </pre> | |
| 135 */ | |
| 136 | |
| 137 #ifdef HAVE_CONFIG_H | |
| 138 #include <config_auto.h> | |
| 139 #endif /* HAVE_CONFIG_H */ | |
| 140 | |
| 141 #include <math.h> | |
| 142 #include "allheaders.h" | |
| 143 | |
| 144 /* bin sizes in numaMakeHistogram() */ | |
| 145 static const l_int32 BinSizeArray[] = {2, 5, 10, 20, 50, 100, 200, 500, 1000,\ | |
| 146 2000, 5000, 10000, 20000, 50000, 100000, 200000,\ | |
| 147 500000, 1000000, 2000000, 5000000, 10000000,\ | |
| 148 200000000, 50000000, 100000000}; | |
| 149 static const l_int32 NBinSizes = 24; | |
| 150 | |
| 151 | |
| 152 #ifndef NO_CONSOLE_IO | |
| 153 #define DEBUG_HISTO 0 | |
| 154 #define DEBUG_CROSSINGS 0 | |
| 155 #define DEBUG_FREQUENCY 0 | |
| 156 #endif /* ~NO_CONSOLE_IO */ | |
| 157 | |
| 158 /*----------------------------------------------------------------------* | |
| 159 * Morphological operations * | |
| 160 *----------------------------------------------------------------------*/ | |
| 161 /*! | |
| 162 * \brief numaErode() | |
| 163 * | |
| 164 * \param[in] nas | |
| 165 * \param[in] size of sel; greater than 0, odd. The origin | |
| 166 * is implicitly in the center. | |
| 167 * \return nad eroded, or NULL on error | |
| 168 * | |
| 169 * <pre> | |
| 170 * Notes: | |
| 171 * (1) The structuring element (sel) is linear, all "hits" | |
| 172 * (2) If size == 1, this returns a copy | |
| 173 * (3) General comment. The morphological operations are equivalent | |
| 174 * to those that would be performed on a 1-dimensional fpix. | |
| 175 * However, because we have not implemented morphological | |
| 176 * operations on fpix, we do this here. Because it is only | |
| 177 * 1 dimensional, there is no reason to use the more | |
| 178 * complicated van Herk/Gil-Werman algorithm, and we do it | |
| 179 * by brute force. | |
| 180 * </pre> | |
| 181 */ | |
| 182 NUMA * | |
| 183 numaErode(NUMA *nas, | |
| 184 l_int32 size) | |
| 185 { | |
| 186 l_int32 i, j, n, hsize, len; | |
| 187 l_float32 minval; | |
| 188 l_float32 *fa, *fas, *fad; | |
| 189 NUMA *nad; | |
| 190 | |
| 191 if (!nas) | |
| 192 return (NUMA *)ERROR_PTR("nas not defined", __func__, NULL); | |
| 193 if (size <= 0) | |
| 194 return (NUMA *)ERROR_PTR("size must be > 0", __func__, NULL); | |
| 195 if ((size & 1) == 0 ) { | |
| 196 L_WARNING("sel size must be odd; increasing by 1\n", __func__); | |
| 197 size++; | |
| 198 } | |
| 199 | |
| 200 if (size == 1) | |
| 201 return numaCopy(nas); | |
| 202 | |
| 203 /* Make a source fa (fas) that has an added (size / 2) boundary | |
| 204 * on left and right, contains a copy of nas in the interior region | |
| 205 * (between 'size' and 'size + n', and has large values | |
| 206 * inserted in the boundary (because it is an erosion). */ | |
| 207 n = numaGetCount(nas); | |
| 208 hsize = size / 2; | |
| 209 len = n + 2 * hsize; | |
| 210 if ((fas = (l_float32 *)LEPT_CALLOC(len, sizeof(l_float32))) == NULL) | |
| 211 return (NUMA *)ERROR_PTR("fas not made", __func__, NULL); | |
| 212 for (i = 0; i < hsize; i++) | |
| 213 fas[i] = 1.0e37f; | |
| 214 for (i = hsize + n; i < len; i++) | |
| 215 fas[i] = 1.0e37f; | |
| 216 fa = numaGetFArray(nas, L_NOCOPY); | |
| 217 for (i = 0; i < n; i++) | |
| 218 fas[hsize + i] = fa[i]; | |
| 219 | |
| 220 nad = numaMakeConstant(0, n); | |
| 221 numaCopyParameters(nad, nas); | |
| 222 fad = numaGetFArray(nad, L_NOCOPY); | |
| 223 for (i = 0; i < n; i++) { | |
| 224 minval = 1.0e37f; /* start big */ | |
| 225 for (j = 0; j < size; j++) | |
| 226 minval = L_MIN(minval, fas[i + j]); | |
| 227 fad[i] = minval; | |
| 228 } | |
| 229 | |
| 230 LEPT_FREE(fas); | |
| 231 return nad; | |
| 232 } | |
| 233 | |
| 234 | |
| 235 /*! | |
| 236 * \brief numaDilate() | |
| 237 * | |
| 238 * \param[in] nas | |
| 239 * \param[in] size of sel; greater than 0, odd. The origin | |
| 240 * is implicitly in the center. | |
| 241 * \return nad dilated, or NULL on error | |
| 242 * | |
| 243 * <pre> | |
| 244 * Notes: | |
| 245 * (1) The structuring element (sel) is linear, all "hits" | |
| 246 * (2) If size == 1, this returns a copy | |
| 247 * </pre> | |
| 248 */ | |
| 249 NUMA * | |
| 250 numaDilate(NUMA *nas, | |
| 251 l_int32 size) | |
| 252 { | |
| 253 l_int32 i, j, n, hsize, len; | |
| 254 l_float32 maxval; | |
| 255 l_float32 *fa, *fas, *fad; | |
| 256 NUMA *nad; | |
| 257 | |
| 258 if (!nas) | |
| 259 return (NUMA *)ERROR_PTR("nas not defined", __func__, NULL); | |
| 260 if (size <= 0) | |
| 261 return (NUMA *)ERROR_PTR("size must be > 0", __func__, NULL); | |
| 262 if ((size & 1) == 0 ) { | |
| 263 L_WARNING("sel size must be odd; increasing by 1\n", __func__); | |
| 264 size++; | |
| 265 } | |
| 266 | |
| 267 if (size == 1) | |
| 268 return numaCopy(nas); | |
| 269 | |
| 270 /* Make a source fa (fas) that has an added (size / 2) boundary | |
| 271 * on left and right, contains a copy of nas in the interior region | |
| 272 * (between 'size' and 'size + n', and has small values | |
| 273 * inserted in the boundary (because it is a dilation). */ | |
| 274 n = numaGetCount(nas); | |
| 275 hsize = size / 2; | |
| 276 len = n + 2 * hsize; | |
| 277 if ((fas = (l_float32 *)LEPT_CALLOC(len, sizeof(l_float32))) == NULL) | |
| 278 return (NUMA *)ERROR_PTR("fas not made", __func__, NULL); | |
| 279 for (i = 0; i < hsize; i++) | |
| 280 fas[i] = -1.0e37f; | |
| 281 for (i = hsize + n; i < len; i++) | |
| 282 fas[i] = -1.0e37f; | |
| 283 fa = numaGetFArray(nas, L_NOCOPY); | |
| 284 for (i = 0; i < n; i++) | |
| 285 fas[hsize + i] = fa[i]; | |
| 286 | |
| 287 nad = numaMakeConstant(0, n); | |
| 288 numaCopyParameters(nad, nas); | |
| 289 fad = numaGetFArray(nad, L_NOCOPY); | |
| 290 for (i = 0; i < n; i++) { | |
| 291 maxval = -1.0e37f; /* start small */ | |
| 292 for (j = 0; j < size; j++) | |
| 293 maxval = L_MAX(maxval, fas[i + j]); | |
| 294 fad[i] = maxval; | |
| 295 } | |
| 296 | |
| 297 LEPT_FREE(fas); | |
| 298 return nad; | |
| 299 } | |
| 300 | |
| 301 | |
| 302 /*! | |
| 303 * \brief numaOpen() | |
| 304 * | |
| 305 * \param[in] nas | |
| 306 * \param[in] size of sel; greater than 0, odd. The origin | |
| 307 * is implicitly in the center. | |
| 308 * \return nad opened, or NULL on error | |
| 309 * | |
| 310 * <pre> | |
| 311 * Notes: | |
| 312 * (1) The structuring element (sel) is linear, all "hits" | |
| 313 * (2) If size == 1, this returns a copy | |
| 314 * </pre> | |
| 315 */ | |
| 316 NUMA * | |
| 317 numaOpen(NUMA *nas, | |
| 318 l_int32 size) | |
| 319 { | |
| 320 NUMA *nat, *nad; | |
| 321 | |
| 322 if (!nas) | |
| 323 return (NUMA *)ERROR_PTR("nas not defined", __func__, NULL); | |
| 324 if (size <= 0) | |
| 325 return (NUMA *)ERROR_PTR("size must be > 0", __func__, NULL); | |
| 326 if ((size & 1) == 0 ) { | |
| 327 L_WARNING("sel size must be odd; increasing by 1\n", __func__); | |
| 328 size++; | |
| 329 } | |
| 330 | |
| 331 if (size == 1) | |
| 332 return numaCopy(nas); | |
| 333 | |
| 334 nat = numaErode(nas, size); | |
| 335 nad = numaDilate(nat, size); | |
| 336 numaDestroy(&nat); | |
| 337 return nad; | |
| 338 } | |
| 339 | |
| 340 | |
| 341 /*! | |
| 342 * \brief numaClose() | |
| 343 * | |
| 344 * \param[in] nas | |
| 345 * \param[in] size of sel; greater than 0, odd. The origin | |
| 346 * is implicitly in the center. | |
| 347 * \return nad closed, or NULL on error | |
| 348 * | |
| 349 * <pre> | |
| 350 * Notes: | |
| 351 * (1) The structuring element (sel) is linear, all "hits" | |
| 352 * (2) If size == 1, this returns a copy | |
| 353 * (3) We add a border before doing this operation, for the same | |
| 354 * reason that we add a border to a pix before doing a safe closing. | |
| 355 * Without the border, a small component near the border gets | |
| 356 * clipped at the border on dilation, and can be entirely removed | |
| 357 * by the following erosion, violating the basic extensivity | |
| 358 * property of closing. | |
| 359 * </pre> | |
| 360 */ | |
| 361 NUMA * | |
| 362 numaClose(NUMA *nas, | |
| 363 l_int32 size) | |
| 364 { | |
| 365 NUMA *nab, *nat1, *nat2, *nad; | |
| 366 | |
| 367 if (!nas) | |
| 368 return (NUMA *)ERROR_PTR("nas not defined", __func__, NULL); | |
| 369 if (size <= 0) | |
| 370 return (NUMA *)ERROR_PTR("size must be > 0", __func__, NULL); | |
| 371 if ((size & 1) == 0 ) { | |
| 372 L_WARNING("sel size must be odd; increasing by 1\n", __func__); | |
| 373 size++; | |
| 374 } | |
| 375 | |
| 376 if (size == 1) | |
| 377 return numaCopy(nas); | |
| 378 | |
| 379 nab = numaAddBorder(nas, size, size, 0); /* to preserve extensivity */ | |
| 380 nat1 = numaDilate(nab, size); | |
| 381 nat2 = numaErode(nat1, size); | |
| 382 nad = numaRemoveBorder(nat2, size, size); | |
| 383 numaDestroy(&nab); | |
| 384 numaDestroy(&nat1); | |
| 385 numaDestroy(&nat2); | |
| 386 return nad; | |
| 387 } | |
| 388 | |
| 389 | |
| 390 /*----------------------------------------------------------------------* | |
| 391 * Other transforms * | |
| 392 *----------------------------------------------------------------------*/ | |
| 393 /*! | |
| 394 * \brief numaTransform() | |
| 395 * | |
| 396 * \param[in] nas | |
| 397 * \param[in] shift add this to each number | |
| 398 * \param[in] scale multiply each number by this | |
| 399 * \return nad with all values shifted and scaled, or NULL on error | |
| 400 * | |
| 401 * <pre> | |
| 402 * Notes: | |
| 403 * (1) Each number is shifted before scaling. | |
| 404 * </pre> | |
| 405 */ | |
| 406 NUMA * | |
| 407 numaTransform(NUMA *nas, | |
| 408 l_float32 shift, | |
| 409 l_float32 scale) | |
| 410 { | |
| 411 l_int32 i, n; | |
| 412 l_float32 val; | |
| 413 NUMA *nad; | |
| 414 | |
| 415 if (!nas) | |
| 416 return (NUMA *)ERROR_PTR("nas not defined", __func__, NULL); | |
| 417 n = numaGetCount(nas); | |
| 418 if ((nad = numaCreate(n)) == NULL) | |
| 419 return (NUMA *)ERROR_PTR("nad not made", __func__, NULL); | |
| 420 numaCopyParameters(nad, nas); | |
| 421 for (i = 0; i < n; i++) { | |
| 422 numaGetFValue(nas, i, &val); | |
| 423 val = scale * (val + shift); | |
| 424 numaAddNumber(nad, val); | |
| 425 } | |
| 426 return nad; | |
| 427 } | |
| 428 | |
| 429 | |
| 430 /*! | |
| 431 * \brief numaSimpleStats() | |
| 432 * | |
| 433 * \param[in] na input numa | |
| 434 * \param[in] first first element to use | |
| 435 * \param[in] last last element to use; -1 to go to the end | |
| 436 * \param[out] pmean [optional] mean value | |
| 437 * \param[out] pvar [optional] variance | |
| 438 * \param[out] prvar [optional] rms deviation from the mean | |
| 439 * \return 0 if OK, 1 on error | |
| 440 */ | |
| 441 l_ok | |
| 442 numaSimpleStats(NUMA *na, | |
| 443 l_int32 first, | |
| 444 l_int32 last, | |
| 445 l_float32 *pmean, | |
| 446 l_float32 *pvar, | |
| 447 l_float32 *prvar) | |
| 448 { | |
| 449 l_int32 i, n, ni; | |
| 450 l_float32 sum, sumsq, val, mean, var; | |
| 451 | |
| 452 if (pmean) *pmean = 0.0; | |
| 453 if (pvar) *pvar = 0.0; | |
| 454 if (prvar) *prvar = 0.0; | |
| 455 if (!pmean && !pvar && !prvar) | |
| 456 return ERROR_INT("nothing requested", __func__, 1); | |
| 457 if (!na) | |
| 458 return ERROR_INT("na not defined", __func__, 1); | |
| 459 if ((n = numaGetCount(na)) == 0) | |
| 460 return ERROR_INT("na is empty", __func__, 1); | |
| 461 first = L_MAX(0, first); | |
| 462 if (last < 0) last = n - 1; | |
| 463 if (first >= n) | |
| 464 return ERROR_INT("invalid first", __func__, 1); | |
| 465 if (last >= n) { | |
| 466 L_WARNING("last = %d is beyond max index = %d; adjusting\n", | |
| 467 __func__, last, n - 1); | |
| 468 last = n - 1; | |
| 469 } | |
| 470 if (first > last) | |
| 471 return ERROR_INT("first > last\n", __func__, 1); | |
| 472 ni = last - first + 1; | |
| 473 sum = sumsq = 0.0; | |
| 474 for (i = first; i <= last; i++) { | |
| 475 numaGetFValue(na, i, &val); | |
| 476 sum += val; | |
| 477 sumsq += val * val; | |
| 478 } | |
| 479 | |
| 480 mean = sum / ni; | |
| 481 if (pmean) | |
| 482 *pmean = mean; | |
| 483 if (pvar || prvar) { | |
| 484 var = sumsq / ni - mean * mean; | |
| 485 if (pvar) *pvar = var; | |
| 486 if (prvar) *prvar = sqrtf(var); | |
| 487 } | |
| 488 | |
| 489 return 0; | |
| 490 } | |
| 491 | |
| 492 | |
| 493 /*! | |
| 494 * \brief numaWindowedStats() | |
| 495 * | |
| 496 * \param[in] nas input numa | |
| 497 * \param[in] wc half width of the window | |
| 498 * \param[out] pnam [optional] mean value in window | |
| 499 * \param[out] pnams [optional] mean square value in window | |
| 500 * \param[out] pnav [optional] variance in window | |
| 501 * \param[out] pnarv [optional] rms deviation from the mean | |
| 502 * \return 0 if OK, 1 on error | |
| 503 * | |
| 504 * <pre> | |
| 505 * Notes: | |
| 506 * (1) This is a high-level convenience function for calculating | |
| 507 * any or all of these derived arrays. | |
| 508 * (2) These statistical measures over the values in the | |
| 509 * rectangular window are: | |
| 510 * ~ average value: [x] (nam) | |
| 511 * ~ average squared value: [x*x] (nams) | |
| 512 * ~ variance: [(x - [x])*(x - [x])] = [x*x] - [x]*[x] (nav) | |
| 513 * ~ square-root of variance: (narv) | |
| 514 * where the brackets [ .. ] indicate that the average value is | |
| 515 * to be taken over the window. | |
| 516 * (3) Note that the variance is just the mean square difference from | |
| 517 * the mean value; and the square root of the variance is the | |
| 518 * root mean square difference from the mean, sometimes also | |
| 519 * called the 'standard deviation'. | |
| 520 * (4) Internally, use mirrored borders to handle values near the | |
| 521 * end of each array. | |
| 522 * </pre> | |
| 523 */ | |
| 524 l_ok | |
| 525 numaWindowedStats(NUMA *nas, | |
| 526 l_int32 wc, | |
| 527 NUMA **pnam, | |
| 528 NUMA **pnams, | |
| 529 NUMA **pnav, | |
| 530 NUMA **pnarv) | |
| 531 { | |
| 532 NUMA *nam, *nams; | |
| 533 | |
| 534 if (!nas) | |
| 535 return ERROR_INT("nas not defined", __func__, 1); | |
| 536 if (2 * wc + 1 > numaGetCount(nas)) | |
| 537 L_WARNING("filter wider than input array!\n", __func__); | |
| 538 | |
| 539 if (!pnav && !pnarv) { | |
| 540 if (pnam) *pnam = numaWindowedMean(nas, wc); | |
| 541 if (pnams) *pnams = numaWindowedMeanSquare(nas, wc); | |
| 542 return 0; | |
| 543 } | |
| 544 | |
| 545 nam = numaWindowedMean(nas, wc); | |
| 546 nams = numaWindowedMeanSquare(nas, wc); | |
| 547 numaWindowedVariance(nam, nams, pnav, pnarv); | |
| 548 if (pnam) | |
| 549 *pnam = nam; | |
| 550 else | |
| 551 numaDestroy(&nam); | |
| 552 if (pnams) | |
| 553 *pnams = nams; | |
| 554 else | |
| 555 numaDestroy(&nams); | |
| 556 return 0; | |
| 557 } | |
| 558 | |
| 559 | |
| 560 /*! | |
| 561 * \brief numaWindowedMean() | |
| 562 * | |
| 563 * \param[in] nas | |
| 564 * \param[in] wc half width of the convolution window | |
| 565 * \return nad after low-pass filtering, or NULL on error | |
| 566 * | |
| 567 * <pre> | |
| 568 * Notes: | |
| 569 * (1) This is a convolution. The window has width = 2 * %wc + 1. | |
| 570 * (2) We add a mirrored border of size %wc to each end of the array. | |
| 571 * </pre> | |
| 572 */ | |
| 573 NUMA * | |
| 574 numaWindowedMean(NUMA *nas, | |
| 575 l_int32 wc) | |
| 576 { | |
| 577 l_int32 i, n, n1, width; | |
| 578 l_float32 sum, norm; | |
| 579 l_float32 *fa1, *fad, *suma; | |
| 580 NUMA *na1, *nad; | |
| 581 | |
| 582 if (!nas) | |
| 583 return (NUMA *)ERROR_PTR("nas not defined", __func__, NULL); | |
| 584 n = numaGetCount(nas); | |
| 585 width = 2 * wc + 1; /* filter width */ | |
| 586 if (width > n) | |
| 587 L_WARNING("filter wider than input array!\n", __func__); | |
| 588 | |
| 589 na1 = numaAddSpecifiedBorder(nas, wc, wc, L_MIRRORED_BORDER); | |
| 590 n1 = n + 2 * wc; | |
| 591 fa1 = numaGetFArray(na1, L_NOCOPY); | |
| 592 nad = numaMakeConstant(0, n); | |
| 593 fad = numaGetFArray(nad, L_NOCOPY); | |
| 594 | |
| 595 /* Make sum array; note the indexing */ | |
| 596 if ((suma = (l_float32 *)LEPT_CALLOC(n1 + 1, sizeof(l_float32))) == NULL) { | |
| 597 numaDestroy(&na1); | |
| 598 numaDestroy(&nad); | |
| 599 return (NUMA *)ERROR_PTR("suma not made", __func__, NULL); | |
| 600 } | |
| 601 sum = 0.0; | |
| 602 suma[0] = 0.0; | |
| 603 for (i = 0; i < n1; i++) { | |
| 604 sum += fa1[i]; | |
| 605 suma[i + 1] = sum; | |
| 606 } | |
| 607 | |
| 608 norm = 1.f / (2 * wc + 1); | |
| 609 for (i = 0; i < n; i++) | |
| 610 fad[i] = norm * (suma[width + i] - suma[i]); | |
| 611 | |
| 612 LEPT_FREE(suma); | |
| 613 numaDestroy(&na1); | |
| 614 return nad; | |
| 615 } | |
| 616 | |
| 617 | |
| 618 /*! | |
| 619 * \brief numaWindowedMeanSquare() | |
| 620 * | |
| 621 * \param[in] nas | |
| 622 * \param[in] wc half width of the window | |
| 623 * \return nad containing windowed mean square values, or NULL on error | |
| 624 * | |
| 625 * <pre> | |
| 626 * Notes: | |
| 627 * (1) The window has width = 2 * %wc + 1. | |
| 628 * (2) We add a mirrored border of size %wc to each end of the array. | |
| 629 * </pre> | |
| 630 */ | |
| 631 NUMA * | |
| 632 numaWindowedMeanSquare(NUMA *nas, | |
| 633 l_int32 wc) | |
| 634 { | |
| 635 l_int32 i, n, n1, width; | |
| 636 l_float32 sum, norm; | |
| 637 l_float32 *fa1, *fad, *suma; | |
| 638 NUMA *na1, *nad; | |
| 639 | |
| 640 if (!nas) | |
| 641 return (NUMA *)ERROR_PTR("nas not defined", __func__, NULL); | |
| 642 n = numaGetCount(nas); | |
| 643 width = 2 * wc + 1; /* filter width */ | |
| 644 if (width > n) | |
| 645 L_WARNING("filter wider than input array!\n", __func__); | |
| 646 | |
| 647 na1 = numaAddSpecifiedBorder(nas, wc, wc, L_MIRRORED_BORDER); | |
| 648 n1 = n + 2 * wc; | |
| 649 fa1 = numaGetFArray(na1, L_NOCOPY); | |
| 650 nad = numaMakeConstant(0, n); | |
| 651 fad = numaGetFArray(nad, L_NOCOPY); | |
| 652 | |
| 653 /* Make sum array; note the indexing */ | |
| 654 if ((suma = (l_float32 *)LEPT_CALLOC(n1 + 1, sizeof(l_float32))) == NULL) { | |
| 655 numaDestroy(&na1); | |
| 656 numaDestroy(&nad); | |
| 657 return (NUMA *)ERROR_PTR("suma not made", __func__, NULL); | |
| 658 } | |
| 659 sum = 0.0; | |
| 660 suma[0] = 0.0; | |
| 661 for (i = 0; i < n1; i++) { | |
| 662 sum += fa1[i] * fa1[i]; | |
| 663 suma[i + 1] = sum; | |
| 664 } | |
| 665 | |
| 666 norm = 1.f / (2 * wc + 1); | |
| 667 for (i = 0; i < n; i++) | |
| 668 fad[i] = norm * (suma[width + i] - suma[i]); | |
| 669 | |
| 670 LEPT_FREE(suma); | |
| 671 numaDestroy(&na1); | |
| 672 return nad; | |
| 673 } | |
| 674 | |
| 675 | |
| 676 /*! | |
| 677 * \brief numaWindowedVariance() | |
| 678 * | |
| 679 * \param[in] nam windowed mean values | |
| 680 * \param[in] nams windowed mean square values | |
| 681 * \param[out] pnav [optional] numa of variance -- the ms deviation | |
| 682 * from the mean | |
| 683 * \param[out] pnarv [optional] numa of rms deviation from the mean | |
| 684 * \return 0 if OK, 1 on error | |
| 685 * | |
| 686 * <pre> | |
| 687 * Notes: | |
| 688 * (1) The numas of windowed mean and mean square are precomputed, | |
| 689 * using numaWindowedMean() and numaWindowedMeanSquare(). | |
| 690 * (2) Either or both of the variance and square-root of variance | |
| 691 * are returned, where the variance is the average over the | |
| 692 * window of the mean square difference of the pixel value | |
| 693 * from the mean: | |
| 694 * [(x - [x])*(x - [x])] = [x*x] - [x]*[x] | |
| 695 * </pre> | |
| 696 */ | |
| 697 l_ok | |
| 698 numaWindowedVariance(NUMA *nam, | |
| 699 NUMA *nams, | |
| 700 NUMA **pnav, | |
| 701 NUMA **pnarv) | |
| 702 { | |
| 703 l_int32 i, nm, nms; | |
| 704 l_float32 var; | |
| 705 l_float32 *fam, *fams, *fav = NULL, *farv = NULL; | |
| 706 NUMA *nav, *narv; /* variance and square root of variance */ | |
| 707 | |
| 708 if (pnav) *pnav = NULL; | |
| 709 if (pnarv) *pnarv = NULL; | |
| 710 if (!pnav && !pnarv) | |
| 711 return ERROR_INT("neither &nav nor &narv are defined", __func__, 1); | |
| 712 if (!nam) | |
| 713 return ERROR_INT("nam not defined", __func__, 1); | |
| 714 if (!nams) | |
| 715 return ERROR_INT("nams not defined", __func__, 1); | |
| 716 nm = numaGetCount(nam); | |
| 717 nms = numaGetCount(nams); | |
| 718 if (nm != nms) | |
| 719 return ERROR_INT("sizes of nam and nams differ", __func__, 1); | |
| 720 | |
| 721 if (pnav) { | |
| 722 nav = numaMakeConstant(0, nm); | |
| 723 *pnav = nav; | |
| 724 fav = numaGetFArray(nav, L_NOCOPY); | |
| 725 } | |
| 726 if (pnarv) { | |
| 727 narv = numaMakeConstant(0, nm); | |
| 728 *pnarv = narv; | |
| 729 farv = numaGetFArray(narv, L_NOCOPY); | |
| 730 } | |
| 731 fam = numaGetFArray(nam, L_NOCOPY); | |
| 732 fams = numaGetFArray(nams, L_NOCOPY); | |
| 733 | |
| 734 for (i = 0; i < nm; i++) { | |
| 735 var = fams[i] - fam[i] * fam[i]; | |
| 736 if (pnav) | |
| 737 fav[i] = var; | |
| 738 if (pnarv) | |
| 739 farv[i] = sqrtf(var); | |
| 740 } | |
| 741 | |
| 742 return 0; | |
| 743 } | |
| 744 | |
| 745 | |
| 746 /*! | |
| 747 * \brief numaWindowedMedian() | |
| 748 * | |
| 749 * \param[in] nas | |
| 750 * \param[in] halfwin half width of window over which the median is found | |
| 751 * \return nad after windowed median filtering, or NULL on error | |
| 752 * | |
| 753 * <pre> | |
| 754 * Notes: | |
| 755 * (1) The requested window has width = 2 * %halfwin + 1. | |
| 756 * (2) If the input nas has less then 3 elements, return a copy. | |
| 757 * (3) If the filter is too small (%halfwin <= 0), return a copy. | |
| 758 * (4) If the filter is too large, it is reduced in size. | |
| 759 * (5) We add a mirrored border of size %halfwin to each end of | |
| 760 * the array to simplify the calculation by avoiding end-effects. | |
| 761 * </pre> | |
| 762 */ | |
| 763 NUMA * | |
| 764 numaWindowedMedian(NUMA *nas, | |
| 765 l_int32 halfwin) | |
| 766 { | |
| 767 l_int32 i, n; | |
| 768 l_float32 medval; | |
| 769 NUMA *na1, *na2, *nad; | |
| 770 | |
| 771 if (!nas) | |
| 772 return (NUMA *)ERROR_PTR("nas not defined", __func__, NULL); | |
| 773 if ((n = numaGetCount(nas)) < 3) | |
| 774 return numaCopy(nas); | |
| 775 if (halfwin <= 0) { | |
| 776 L_ERROR("filter too small; returning a copy\n", __func__); | |
| 777 return numaCopy(nas); | |
| 778 } | |
| 779 | |
| 780 if (halfwin > (n - 1) / 2) { | |
| 781 halfwin = (n - 1) / 2; | |
| 782 L_INFO("reducing filter to halfwin = %d\n", __func__, halfwin); | |
| 783 } | |
| 784 | |
| 785 /* Add a border to both ends */ | |
| 786 na1 = numaAddSpecifiedBorder(nas, halfwin, halfwin, L_MIRRORED_BORDER); | |
| 787 | |
| 788 /* Get the median value at the center of each window, corresponding | |
| 789 * to locations in the input nas. */ | |
| 790 nad = numaCreate(n); | |
| 791 for (i = 0; i < n; i++) { | |
| 792 na2 = numaClipToInterval(na1, i, i + 2 * halfwin); | |
| 793 numaGetMedian(na2, &medval); | |
| 794 numaAddNumber(nad, medval); | |
| 795 numaDestroy(&na2); | |
| 796 } | |
| 797 | |
| 798 numaDestroy(&na1); | |
| 799 return nad; | |
| 800 } | |
| 801 | |
| 802 | |
| 803 /*! | |
| 804 * \brief numaConvertToInt() | |
| 805 * | |
| 806 * \param[in] nas source numa | |
| 807 * \return na with all values rounded to nearest integer, or | |
| 808 * NULL on error | |
| 809 */ | |
| 810 NUMA * | |
| 811 numaConvertToInt(NUMA *nas) | |
| 812 { | |
| 813 l_int32 i, n, ival; | |
| 814 NUMA *nad; | |
| 815 | |
| 816 if (!nas) | |
| 817 return (NUMA *)ERROR_PTR("nas not defined", __func__, NULL); | |
| 818 | |
| 819 n = numaGetCount(nas); | |
| 820 if ((nad = numaCreate(n)) == NULL) | |
| 821 return (NUMA *)ERROR_PTR("nad not made", __func__, NULL); | |
| 822 numaCopyParameters(nad, nas); | |
| 823 for (i = 0; i < n; i++) { | |
| 824 numaGetIValue(nas, i, &ival); | |
| 825 numaAddNumber(nad, ival); | |
| 826 } | |
| 827 return nad; | |
| 828 } | |
| 829 | |
| 830 | |
| 831 /*----------------------------------------------------------------------* | |
| 832 * Histogram generation and statistics * | |
| 833 *----------------------------------------------------------------------*/ | |
| 834 /*! | |
| 835 * \brief numaMakeHistogram() | |
| 836 * | |
| 837 * \param[in] na | |
| 838 * \param[in] maxbins max number of histogram bins | |
| 839 * \param[out] pbinsize [optional] size of histogram bins | |
| 840 * \param[out] pbinstart [optional] start val of minimum bin; | |
| 841 * input NULL to force start at 0 | |
| 842 * \return na consisiting of histogram of integerized values, | |
| 843 * or NULL on error. | |
| 844 * | |
| 845 * <pre> | |
| 846 * Notes: | |
| 847 * (1) This simple interface is designed for integer data. | |
| 848 * The bins are of integer width and start on integer boundaries, | |
| 849 * so the results on float data will not have high precision. | |
| 850 * (2) Specify the max number of input bins. Then %binsize, | |
| 851 * the size of bins necessary to accommodate the input data, | |
| 852 * is returned. It is optionally returned and one of the sequence: | |
| 853 * {1, 2, 5, 10, 20, 50, ...}. | |
| 854 * (3) If &binstart is given, all values are accommodated, | |
| 855 * and the min value of the starting bin is returned. | |
| 856 * Otherwise, all negative values are discarded and | |
| 857 * the histogram bins start at 0. | |
| 858 * </pre> | |
| 859 */ | |
| 860 NUMA * | |
| 861 numaMakeHistogram(NUMA *na, | |
| 862 l_int32 maxbins, | |
| 863 l_int32 *pbinsize, | |
| 864 l_int32 *pbinstart) | |
| 865 { | |
| 866 l_int32 i, n, ival, hval; | |
| 867 l_int32 iminval, imaxval, range, binsize, nbins, ibin; | |
| 868 l_float32 val, ratio; | |
| 869 NUMA *nai, *nahist; | |
| 870 | |
| 871 if (pbinsize) *pbinsize = 0; | |
| 872 if (pbinstart) *pbinstart = 0; | |
| 873 if (!na) | |
| 874 return (NUMA *)ERROR_PTR("na not defined", __func__, NULL); | |
| 875 if (maxbins < 1) | |
| 876 return (NUMA *)ERROR_PTR("maxbins < 1", __func__, NULL); | |
| 877 | |
| 878 /* Determine input range */ | |
| 879 numaGetMin(na, &val, NULL); | |
| 880 iminval = (l_int32)(val + 0.5); | |
| 881 numaGetMax(na, &val, NULL); | |
| 882 imaxval = (l_int32)(val + 0.5); | |
| 883 if (pbinstart == NULL) { /* clip negative vals; start from 0 */ | |
| 884 iminval = 0; | |
| 885 if (imaxval < 0) | |
| 886 return (NUMA *)ERROR_PTR("all values < 0", __func__, NULL); | |
| 887 } | |
| 888 | |
| 889 /* Determine binsize */ | |
| 890 range = imaxval - iminval + 1; | |
| 891 if (range > maxbins - 1) { | |
| 892 ratio = (l_float32)range / (l_float32)maxbins; | |
| 893 binsize = 0; | |
| 894 for (i = 0; i < NBinSizes; i++) { | |
| 895 if (ratio < BinSizeArray[i]) { | |
| 896 binsize = BinSizeArray[i]; | |
| 897 break; | |
| 898 } | |
| 899 } | |
| 900 if (binsize == 0) | |
| 901 return (NUMA *)ERROR_PTR("numbers too large", __func__, NULL); | |
| 902 } else { | |
| 903 binsize = 1; | |
| 904 } | |
| 905 if (pbinsize) *pbinsize = binsize; | |
| 906 nbins = 1 + range / binsize; /* +1 seems to be sufficient */ | |
| 907 | |
| 908 /* Redetermine iminval */ | |
| 909 if (pbinstart && binsize > 1) { | |
| 910 if (iminval >= 0) | |
| 911 iminval = binsize * (iminval / binsize); | |
| 912 else | |
| 913 iminval = binsize * ((iminval - binsize + 1) / binsize); | |
| 914 } | |
| 915 if (pbinstart) *pbinstart = iminval; | |
| 916 | |
| 917 #if DEBUG_HISTO | |
| 918 lept_stderr(" imaxval = %d, range = %d, nbins = %d\n", | |
| 919 imaxval, range, nbins); | |
| 920 #endif /* DEBUG_HISTO */ | |
| 921 | |
| 922 /* Use integerized data for input */ | |
| 923 if ((nai = numaConvertToInt(na)) == NULL) | |
| 924 return (NUMA *)ERROR_PTR("nai not made", __func__, NULL); | |
| 925 n = numaGetCount(nai); | |
| 926 | |
| 927 /* Make histogram, converting value in input array | |
| 928 * into a bin number for this histogram array. */ | |
| 929 if ((nahist = numaCreate(nbins)) == NULL) { | |
| 930 numaDestroy(&nai); | |
| 931 return (NUMA *)ERROR_PTR("nahist not made", __func__, NULL); | |
| 932 } | |
| 933 numaSetCount(nahist, nbins); | |
| 934 numaSetParameters(nahist, iminval, binsize); | |
| 935 for (i = 0; i < n; i++) { | |
| 936 numaGetIValue(nai, i, &ival); | |
| 937 ibin = (ival - iminval) / binsize; | |
| 938 if (ibin >= 0 && ibin < nbins) { | |
| 939 numaGetIValue(nahist, ibin, &hval); | |
| 940 numaSetValue(nahist, ibin, hval + 1.0f); | |
| 941 } | |
| 942 } | |
| 943 | |
| 944 numaDestroy(&nai); | |
| 945 return nahist; | |
| 946 } | |
| 947 | |
| 948 | |
| 949 /*! | |
| 950 * \brief numaMakeHistogramAuto() | |
| 951 * | |
| 952 * \param[in] na numa of floats; these may be integers | |
| 953 * \param[in] maxbins max number of histogram bins; >= 1 | |
| 954 * \return na consisiting of histogram of quantized float values, | |
| 955 * or NULL on error. | |
| 956 * | |
| 957 * <pre> | |
| 958 * Notes: | |
| 959 * (1) This simple interface is designed for accurate binning | |
| 960 * of both integer and float data. | |
| 961 * (2) If the array data is integers, and the range of integers | |
| 962 * is smaller than %maxbins, they are binned as they fall, | |
| 963 * with binsize = 1. | |
| 964 * (3) If the range of data, (maxval - minval), is larger than | |
| 965 * %maxbins, or if the data is floats, they are binned into | |
| 966 * exactly %maxbins bins. | |
| 967 * (4) Unlike numaMakeHistogram(), these bins in general have | |
| 968 * non-integer location and width, even for integer data. | |
| 969 * </pre> | |
| 970 */ | |
| 971 NUMA * | |
| 972 numaMakeHistogramAuto(NUMA *na, | |
| 973 l_int32 maxbins) | |
| 974 { | |
| 975 l_int32 i, n, imin, imax, irange, ibin, ival, allints; | |
| 976 l_float32 minval, maxval, range, binsize, fval; | |
| 977 NUMA *nah; | |
| 978 | |
| 979 if (!na) | |
| 980 return (NUMA *)ERROR_PTR("na not defined", __func__, NULL); | |
| 981 maxbins = L_MAX(1, maxbins); | |
| 982 | |
| 983 /* Determine input range */ | |
| 984 numaGetMin(na, &minval, NULL); | |
| 985 numaGetMax(na, &maxval, NULL); | |
| 986 | |
| 987 /* Determine if values are all integers */ | |
| 988 n = numaGetCount(na); | |
| 989 numaHasOnlyIntegers(na, &allints); | |
| 990 | |
| 991 /* Do simple integer binning if possible */ | |
| 992 if (allints && (maxval - minval < maxbins)) { | |
| 993 imin = (l_int32)minval; | |
| 994 imax = (l_int32)maxval; | |
| 995 irange = imax - imin + 1; | |
| 996 nah = numaCreate(irange); | |
| 997 numaSetCount(nah, irange); /* init */ | |
| 998 numaSetParameters(nah, minval, 1.0); | |
| 999 for (i = 0; i < n; i++) { | |
| 1000 numaGetIValue(na, i, &ival); | |
| 1001 ibin = ival - imin; | |
| 1002 numaGetIValue(nah, ibin, &ival); | |
| 1003 numaSetValue(nah, ibin, ival + 1.0f); | |
| 1004 } | |
| 1005 | |
| 1006 return nah; | |
| 1007 } | |
| 1008 | |
| 1009 /* Do float binning, even if the data is integers. */ | |
| 1010 range = maxval - minval; | |
| 1011 binsize = range / (l_float32)maxbins; | |
| 1012 if (range == 0.0) { | |
| 1013 nah = numaCreate(1); | |
| 1014 numaSetParameters(nah, minval, binsize); | |
| 1015 numaAddNumber(nah, n); | |
| 1016 return nah; | |
| 1017 } | |
| 1018 nah = numaCreate(maxbins); | |
| 1019 numaSetCount(nah, maxbins); | |
| 1020 numaSetParameters(nah, minval, binsize); | |
| 1021 for (i = 0; i < n; i++) { | |
| 1022 numaGetFValue(na, i, &fval); | |
| 1023 ibin = (l_int32)((fval - minval) / binsize); | |
| 1024 ibin = L_MIN(ibin, maxbins - 1); /* "edge" case; stay in bounds */ | |
| 1025 numaGetIValue(nah, ibin, &ival); | |
| 1026 numaSetValue(nah, ibin, ival + 1.0f); | |
| 1027 } | |
| 1028 | |
| 1029 return nah; | |
| 1030 } | |
| 1031 | |
| 1032 | |
| 1033 /*! | |
| 1034 * \brief numaMakeHistogramClipped() | |
| 1035 * | |
| 1036 * \param[in] na | |
| 1037 * \param[in] binsize typically 1.0 | |
| 1038 * \param[in] maxsize of histogram ordinate | |
| 1039 * \return na histogram of bins of size %binsize, starting with | |
| 1040 * the na[0] (x = 0.0 and going up to a maximum of | |
| 1041 * x = %maxsize, by increments of %binsize), or NULL on error | |
| 1042 * | |
| 1043 * <pre> | |
| 1044 * Notes: | |
| 1045 * (1) This simple function generates a histogram of values | |
| 1046 * from na, discarding all values < 0.0 or greater than | |
| 1047 * min(%maxsize, maxval), where maxval is the maximum value in na. | |
| 1048 * The histogram data is put in bins of size delx = %binsize, | |
| 1049 * starting at x = 0.0. We use as many bins as are | |
| 1050 * needed to hold the data. | |
| 1051 * </pre> | |
| 1052 */ | |
| 1053 NUMA * | |
| 1054 numaMakeHistogramClipped(NUMA *na, | |
| 1055 l_float32 binsize, | |
| 1056 l_float32 maxsize) | |
| 1057 { | |
| 1058 l_int32 i, n, nbins, ival, ibin; | |
| 1059 l_float32 val, maxval; | |
| 1060 NUMA *nad; | |
| 1061 | |
| 1062 if (!na) | |
| 1063 return (NUMA *)ERROR_PTR("na not defined", __func__, NULL); | |
| 1064 if (binsize <= 0.0) | |
| 1065 return (NUMA *)ERROR_PTR("binsize must be > 0.0", __func__, NULL); | |
| 1066 if (binsize > maxsize) | |
| 1067 binsize = maxsize; /* just one bin */ | |
| 1068 | |
| 1069 numaGetMax(na, &maxval, NULL); | |
| 1070 n = numaGetCount(na); | |
| 1071 maxsize = L_MIN(maxsize, maxval); | |
| 1072 nbins = (l_int32)(maxsize / binsize) + 1; | |
| 1073 | |
| 1074 /* lept_stderr("maxsize = %7.3f, nbins = %d\n", maxsize, nbins); */ | |
| 1075 | |
| 1076 if ((nad = numaCreate(nbins)) == NULL) | |
| 1077 return (NUMA *)ERROR_PTR("nad not made", __func__, NULL); | |
| 1078 numaSetParameters(nad, 0.0, binsize); | |
| 1079 numaSetCount(nad, nbins); /* interpret zeroes in bins as data */ | |
| 1080 for (i = 0; i < n; i++) { | |
| 1081 numaGetFValue(na, i, &val); | |
| 1082 ibin = (l_int32)(val / binsize); | |
| 1083 if (ibin >= 0 && ibin < nbins) { | |
| 1084 numaGetIValue(nad, ibin, &ival); | |
| 1085 numaSetValue(nad, ibin, ival + 1.0f); | |
| 1086 } | |
| 1087 } | |
| 1088 | |
| 1089 return nad; | |
| 1090 } | |
| 1091 | |
| 1092 | |
| 1093 /*! | |
| 1094 * \brief numaRebinHistogram() | |
| 1095 * | |
| 1096 * \param[in] nas input histogram | |
| 1097 * \param[in] newsize number of old bins contained in each new bin | |
| 1098 * \return nad more coarsely re-binned histogram, or NULL on error | |
| 1099 */ | |
| 1100 NUMA * | |
| 1101 numaRebinHistogram(NUMA *nas, | |
| 1102 l_int32 newsize) | |
| 1103 { | |
| 1104 l_int32 i, j, ns, nd, index, count, val; | |
| 1105 l_float32 start, oldsize; | |
| 1106 NUMA *nad; | |
| 1107 | |
| 1108 if (!nas) | |
| 1109 return (NUMA *)ERROR_PTR("nas not defined", __func__, NULL); | |
| 1110 if (newsize <= 1) | |
| 1111 return (NUMA *)ERROR_PTR("newsize must be > 1", __func__, NULL); | |
| 1112 if ((ns = numaGetCount(nas)) == 0) | |
| 1113 return (NUMA *)ERROR_PTR("no bins in nas", __func__, NULL); | |
| 1114 | |
| 1115 nd = (ns + newsize - 1) / newsize; | |
| 1116 if ((nad = numaCreate(nd)) == NULL) | |
| 1117 return (NUMA *)ERROR_PTR("nad not made", __func__, NULL); | |
| 1118 numaGetParameters(nad, &start, &oldsize); | |
| 1119 numaSetParameters(nad, start, oldsize * newsize); | |
| 1120 | |
| 1121 for (i = 0; i < nd; i++) { /* new bins */ | |
| 1122 count = 0; | |
| 1123 index = i * newsize; | |
| 1124 for (j = 0; j < newsize; j++) { | |
| 1125 if (index < ns) { | |
| 1126 numaGetIValue(nas, index, &val); | |
| 1127 count += val; | |
| 1128 index++; | |
| 1129 } | |
| 1130 } | |
| 1131 numaAddNumber(nad, count); | |
| 1132 } | |
| 1133 | |
| 1134 return nad; | |
| 1135 } | |
| 1136 | |
| 1137 | |
| 1138 /*! | |
| 1139 * \brief numaNormalizeHistogram() | |
| 1140 * | |
| 1141 * \param[in] nas input histogram | |
| 1142 * \param[in] tsum target sum of all numbers in dest histogram; e.g., use | |
| 1143 * %tsum= 1.0 if this represents a probability distribution | |
| 1144 * \return nad normalized histogram, or NULL on error | |
| 1145 */ | |
| 1146 NUMA * | |
| 1147 numaNormalizeHistogram(NUMA *nas, | |
| 1148 l_float32 tsum) | |
| 1149 { | |
| 1150 l_int32 i, ns; | |
| 1151 l_float32 sum, factor, fval; | |
| 1152 NUMA *nad; | |
| 1153 | |
| 1154 if (!nas) | |
| 1155 return (NUMA *)ERROR_PTR("nas not defined", __func__, NULL); | |
| 1156 if (tsum <= 0.0) | |
| 1157 return (NUMA *)ERROR_PTR("tsum must be > 0.0", __func__, NULL); | |
| 1158 if ((ns = numaGetCount(nas)) == 0) | |
| 1159 return (NUMA *)ERROR_PTR("no bins in nas", __func__, NULL); | |
| 1160 | |
| 1161 numaGetSum(nas, &sum); | |
| 1162 factor = tsum / sum; | |
| 1163 | |
| 1164 if ((nad = numaCreate(ns)) == NULL) | |
| 1165 return (NUMA *)ERROR_PTR("nad not made", __func__, NULL); | |
| 1166 numaCopyParameters(nad, nas); | |
| 1167 | |
| 1168 for (i = 0; i < ns; i++) { | |
| 1169 numaGetFValue(nas, i, &fval); | |
| 1170 fval *= factor; | |
| 1171 numaAddNumber(nad, fval); | |
| 1172 } | |
| 1173 | |
| 1174 return nad; | |
| 1175 } | |
| 1176 | |
| 1177 | |
| 1178 /*! | |
| 1179 * \brief numaGetStatsUsingHistogram() | |
| 1180 * | |
| 1181 * \param[in] na an arbitrary set of numbers; not ordered and not | |
| 1182 * a histogram | |
| 1183 * \param[in] maxbins the maximum number of bins to be allowed in | |
| 1184 * the histogram; use an integer larger than the | |
| 1185 * largest number in %na for consecutive integer bins | |
| 1186 * \param[out] pmin [optional] min value of set | |
| 1187 * \param[out] pmax [optional] max value of set | |
| 1188 * \param[out] pmean [optional] mean value of set | |
| 1189 * \param[out] pvariance [optional] variance | |
| 1190 * \param[out] pmedian [optional] median value of set | |
| 1191 * \param[in] rank in [0.0 ... 1.0]; median has a rank 0.5; | |
| 1192 * ignored if &rval == NULL | |
| 1193 * \param[out] prval [optional] value in na corresponding to %rank | |
| 1194 * \param[out] phisto [optional] Numa histogram; use NULL to prevent | |
| 1195 * \return 0 if OK, 1 on error | |
| 1196 * | |
| 1197 * <pre> | |
| 1198 * Notes: | |
| 1199 * (1) This is a simple interface for gathering statistics | |
| 1200 * from a numa, where a histogram is used 'under the covers' | |
| 1201 * to avoid sorting if a rank value is requested. In that case, | |
| 1202 * by using a histogram we are trading speed for accuracy, because | |
| 1203 * the values in %na are quantized to the center of a set of bins. | |
| 1204 * (2) If the median, other rank value, or histogram are not requested, | |
| 1205 * the calculation is all performed on the input Numa. | |
| 1206 * (3) The variance is the average of the square of the | |
| 1207 * difference from the mean. The median is the value in na | |
| 1208 * with rank 0.5. | |
| 1209 * (4) There are two situations where this gives rank results with | |
| 1210 * accuracy comparable to computing stastics directly on the input | |
| 1211 * data, without binning into a histogram: | |
| 1212 * (a) the data is integers and the range of data is less than | |
| 1213 * %maxbins, and | |
| 1214 * (b) the data is floats and the range is small compared to | |
| 1215 * %maxbins, so that the binsize is much less than 1. | |
| 1216 * (5) If a histogram is used and the numbers in the Numa extend | |
| 1217 * over a large range, you can limit the required storage by | |
| 1218 * specifying the maximum number of bins in the histogram. | |
| 1219 * Use %maxbins == 0 to force the bin size to be 1. | |
| 1220 * (6) This optionally returns the median and one arbitrary rank value. | |
| 1221 * If you need several rank values, return the histogram and use | |
| 1222 * numaHistogramGetValFromRank(nah, rank, &rval) | |
| 1223 * multiple times. | |
| 1224 * </pre> | |
| 1225 */ | |
| 1226 l_ok | |
| 1227 numaGetStatsUsingHistogram(NUMA *na, | |
| 1228 l_int32 maxbins, | |
| 1229 l_float32 *pmin, | |
| 1230 l_float32 *pmax, | |
| 1231 l_float32 *pmean, | |
| 1232 l_float32 *pvariance, | |
| 1233 l_float32 *pmedian, | |
| 1234 l_float32 rank, | |
| 1235 l_float32 *prval, | |
| 1236 NUMA **phisto) | |
| 1237 { | |
| 1238 l_int32 i, n; | |
| 1239 l_float32 minval, maxval, fval, mean, sum; | |
| 1240 NUMA *nah; | |
| 1241 | |
| 1242 if (pmin) *pmin = 0.0; | |
| 1243 if (pmax) *pmax = 0.0; | |
| 1244 if (pmean) *pmean = 0.0; | |
| 1245 if (pvariance) *pvariance = 0.0; | |
| 1246 if (pmedian) *pmedian = 0.0; | |
| 1247 if (prval) *prval = 0.0; | |
| 1248 if (phisto) *phisto = NULL; | |
| 1249 if (!na) | |
| 1250 return ERROR_INT("na not defined", __func__, 1); | |
| 1251 if ((n = numaGetCount(na)) == 0) | |
| 1252 return ERROR_INT("numa is empty", __func__, 1); | |
| 1253 | |
| 1254 numaGetMin(na, &minval, NULL); | |
| 1255 numaGetMax(na, &maxval, NULL); | |
| 1256 if (pmin) *pmin = minval; | |
| 1257 if (pmax) *pmax = maxval; | |
| 1258 if (pmean || pvariance) { | |
| 1259 sum = 0.0; | |
| 1260 for (i = 0; i < n; i++) { | |
| 1261 numaGetFValue(na, i, &fval); | |
| 1262 sum += fval; | |
| 1263 } | |
| 1264 mean = sum / (l_float32)n; | |
| 1265 if (pmean) *pmean = mean; | |
| 1266 } | |
| 1267 if (pvariance) { | |
| 1268 sum = 0.0; | |
| 1269 for (i = 0; i < n; i++) { | |
| 1270 numaGetFValue(na, i, &fval); | |
| 1271 sum += fval * fval; | |
| 1272 } | |
| 1273 *pvariance = sum / (l_float32)n - mean * mean; | |
| 1274 } | |
| 1275 | |
| 1276 if (!pmedian && !prval && !phisto) | |
| 1277 return 0; | |
| 1278 | |
| 1279 nah = numaMakeHistogramAuto(na, maxbins); | |
| 1280 if (pmedian) | |
| 1281 numaHistogramGetValFromRank(nah, 0.5, pmedian); | |
| 1282 if (prval) | |
| 1283 numaHistogramGetValFromRank(nah, rank, prval); | |
| 1284 if (phisto) | |
| 1285 *phisto = nah; | |
| 1286 else | |
| 1287 numaDestroy(&nah); | |
| 1288 return 0; | |
| 1289 } | |
| 1290 | |
| 1291 | |
| 1292 /*! | |
| 1293 * \brief numaGetHistogramStats() | |
| 1294 * | |
| 1295 * \param[in] nahisto histogram: y(x(i)), i = 0 ... nbins - 1 | |
| 1296 * \param[in] startx x value of first bin: x(0) | |
| 1297 * \param[in] deltax x increment between bins; the bin size; x(1) - x(0) | |
| 1298 * \param[out] pxmean [optional] mean value of histogram | |
| 1299 * \param[out] pxmedian [optional] median value of histogram | |
| 1300 * \param[out] pxmode [optional] mode value of histogram: | |
| 1301 * xmode = x(imode), where y(xmode) >= y(x(i)) for | |
| 1302 * all i != imode | |
| 1303 * \param[out] pxvariance [optional] variance of x | |
| 1304 * \return 0 if OK, 1 on error | |
| 1305 * | |
| 1306 * <pre> | |
| 1307 * Notes: | |
| 1308 * (1) If the histogram represents the relation y(x), the | |
| 1309 * computed values that are returned are the x values. | |
| 1310 * These are NOT the bucket indices i; they are related to the | |
| 1311 * bucket indices by | |
| 1312 * x(i) = startx + i * deltax | |
| 1313 * </pre> | |
| 1314 */ | |
| 1315 l_ok | |
| 1316 numaGetHistogramStats(NUMA *nahisto, | |
| 1317 l_float32 startx, | |
| 1318 l_float32 deltax, | |
| 1319 l_float32 *pxmean, | |
| 1320 l_float32 *pxmedian, | |
| 1321 l_float32 *pxmode, | |
| 1322 l_float32 *pxvariance) | |
| 1323 { | |
| 1324 if (pxmean) *pxmean = 0.0; | |
| 1325 if (pxmedian) *pxmedian = 0.0; | |
| 1326 if (pxmode) *pxmode = 0.0; | |
| 1327 if (pxvariance) *pxvariance = 0.0; | |
| 1328 if (!nahisto) | |
| 1329 return ERROR_INT("nahisto not defined", __func__, 1); | |
| 1330 | |
| 1331 return numaGetHistogramStatsOnInterval(nahisto, startx, deltax, 0, -1, | |
| 1332 pxmean, pxmedian, pxmode, | |
| 1333 pxvariance); | |
| 1334 } | |
| 1335 | |
| 1336 | |
| 1337 /*! | |
| 1338 * \brief numaGetHistogramStatsOnInterval() | |
| 1339 * | |
| 1340 * \param[in] nahisto histogram: y(x(i)), i = 0 ... nbins - 1 | |
| 1341 * \param[in] startx x value of first bin: x(0) | |
| 1342 * \param[in] deltax x increment between bins; the bin size; x(1) - x(0) | |
| 1343 * \param[in] ifirst first bin to use for collecting stats | |
| 1344 * \param[in] ilast last bin for collecting stats; -1 to go to the end | |
| 1345 * \param[out] pxmean [optional] mean value of histogram | |
| 1346 * \param[out] pxmedian [optional] median value of histogram | |
| 1347 * \param[out] pxmode [optional] mode value of histogram: | |
| 1348 * xmode = x(imode), where y(xmode) >= y(x(i)) for | |
| 1349 * all i != imode | |
| 1350 * \param[out] pxvariance [optional] variance of x | |
| 1351 * \return 0 if OK, 1 on error | |
| 1352 * | |
| 1353 * <pre> | |
| 1354 * Notes: | |
| 1355 * (1) If the histogram represents the relation y(x), the | |
| 1356 * computed values that are returned are the x values. | |
| 1357 * These are NOT the bucket indices i; they are related to the | |
| 1358 * bucket indices by | |
| 1359 * x(i) = startx + i * deltax | |
| 1360 * </pre> | |
| 1361 */ | |
| 1362 l_ok | |
| 1363 numaGetHistogramStatsOnInterval(NUMA *nahisto, | |
| 1364 l_float32 startx, | |
| 1365 l_float32 deltax, | |
| 1366 l_int32 ifirst, | |
| 1367 l_int32 ilast, | |
| 1368 l_float32 *pxmean, | |
| 1369 l_float32 *pxmedian, | |
| 1370 l_float32 *pxmode, | |
| 1371 l_float32 *pxvariance) | |
| 1372 { | |
| 1373 l_int32 i, n, imax; | |
| 1374 l_float32 sum, sumval, halfsum, moment, var, x, y, ymax; | |
| 1375 | |
| 1376 if (pxmean) *pxmean = 0.0; | |
| 1377 if (pxmedian) *pxmedian = 0.0; | |
| 1378 if (pxmode) *pxmode = 0.0; | |
| 1379 if (pxvariance) *pxvariance = 0.0; | |
| 1380 if (!nahisto) | |
| 1381 return ERROR_INT("nahisto not defined", __func__, 1); | |
| 1382 if (!pxmean && !pxmedian && !pxmode && !pxvariance) | |
| 1383 return ERROR_INT("nothing to compute", __func__, 1); | |
| 1384 | |
| 1385 n = numaGetCount(nahisto); | |
| 1386 ifirst = L_MAX(0, ifirst); | |
| 1387 if (ilast < 0) ilast = n - 1; | |
| 1388 if (ifirst >= n) | |
| 1389 return ERROR_INT("invalid ifirst", __func__, 1); | |
| 1390 if (ilast >= n) { | |
| 1391 L_WARNING("ilast = %d is beyond max index = %d; adjusting\n", | |
| 1392 __func__, ilast, n - 1); | |
| 1393 ilast = n - 1; | |
| 1394 } | |
| 1395 if (ifirst > ilast) | |
| 1396 return ERROR_INT("ifirst > ilast", __func__, 1); | |
| 1397 for (sum = 0.0, moment = 0.0, var = 0.0, i = ifirst; i <= ilast ; i++) { | |
| 1398 x = startx + i * deltax; | |
| 1399 numaGetFValue(nahisto, i, &y); | |
| 1400 sum += y; | |
| 1401 moment += x * y; | |
| 1402 var += x * x * y; | |
| 1403 } | |
| 1404 if (sum == 0.0) { | |
| 1405 L_INFO("sum is 0\n", __func__); | |
| 1406 return 0; | |
| 1407 } | |
| 1408 | |
| 1409 if (pxmean) | |
| 1410 *pxmean = moment / sum; | |
| 1411 if (pxvariance) | |
| 1412 *pxvariance = var / sum - moment * moment / (sum * sum); | |
| 1413 | |
| 1414 if (pxmedian) { | |
| 1415 halfsum = sum / 2.0f; | |
| 1416 for (sumval = 0.0, i = ifirst; i <= ilast; i++) { | |
| 1417 numaGetFValue(nahisto, i, &y); | |
| 1418 sumval += y; | |
| 1419 if (sumval >= halfsum) { | |
| 1420 *pxmedian = startx + i * deltax; | |
| 1421 break; | |
| 1422 } | |
| 1423 } | |
| 1424 } | |
| 1425 | |
| 1426 if (pxmode) { | |
| 1427 imax = -1; | |
| 1428 ymax = -1.0e10; | |
| 1429 for (i = ifirst; i <= ilast; i++) { | |
| 1430 numaGetFValue(nahisto, i, &y); | |
| 1431 if (y > ymax) { | |
| 1432 ymax = y; | |
| 1433 imax = i; | |
| 1434 } | |
| 1435 } | |
| 1436 *pxmode = startx + imax * deltax; | |
| 1437 } | |
| 1438 | |
| 1439 return 0; | |
| 1440 } | |
| 1441 | |
| 1442 | |
| 1443 /*! | |
| 1444 * \brief numaMakeRankFromHistogram() | |
| 1445 * | |
| 1446 * \param[in] startx xval corresponding to first element in nay | |
| 1447 * \param[in] deltax x increment between array elements in nay | |
| 1448 * \param[in] nasy input histogram, assumed equally spaced | |
| 1449 * \param[in] npts number of points to evaluate rank function | |
| 1450 * \param[out] pnax [optional] array of x values in range | |
| 1451 * \param[out] pnay rank array of specified npts | |
| 1452 * \return 0 if OK, 1 on error | |
| 1453 */ | |
| 1454 l_ok | |
| 1455 numaMakeRankFromHistogram(l_float32 startx, | |
| 1456 l_float32 deltax, | |
| 1457 NUMA *nasy, | |
| 1458 l_int32 npts, | |
| 1459 NUMA **pnax, | |
| 1460 NUMA **pnay) | |
| 1461 { | |
| 1462 l_int32 i, n; | |
| 1463 l_float32 sum, fval; | |
| 1464 NUMA *nan, *nar; | |
| 1465 | |
| 1466 if (pnax) *pnax = NULL; | |
| 1467 if (!pnay) | |
| 1468 return ERROR_INT("&nay not defined", __func__, 1); | |
| 1469 *pnay = NULL; | |
| 1470 if (!nasy) | |
| 1471 return ERROR_INT("nasy not defined", __func__, 1); | |
| 1472 if ((n = numaGetCount(nasy)) == 0) | |
| 1473 return ERROR_INT("no bins in nas", __func__, 1); | |
| 1474 | |
| 1475 /* Normalize and generate the rank array corresponding to | |
| 1476 * the binned histogram. */ | |
| 1477 nan = numaNormalizeHistogram(nasy, 1.0); | |
| 1478 nar = numaCreate(n + 1); /* rank numa corresponding to nan */ | |
| 1479 sum = 0.0; | |
| 1480 numaAddNumber(nar, sum); /* first element is 0.0 */ | |
| 1481 for (i = 0; i < n; i++) { | |
| 1482 numaGetFValue(nan, i, &fval); | |
| 1483 sum += fval; | |
| 1484 numaAddNumber(nar, sum); | |
| 1485 } | |
| 1486 | |
| 1487 /* Compute rank array on full range with specified | |
| 1488 * number of points and correspondence to x-values. */ | |
| 1489 numaInterpolateEqxInterval(startx, deltax, nar, L_LINEAR_INTERP, | |
| 1490 startx, startx + n * deltax, npts, | |
| 1491 pnax, pnay); | |
| 1492 numaDestroy(&nan); | |
| 1493 numaDestroy(&nar); | |
| 1494 return 0; | |
| 1495 } | |
| 1496 | |
| 1497 | |
| 1498 /*! | |
| 1499 * \brief numaHistogramGetRankFromVal() | |
| 1500 * | |
| 1501 * \param[in] na histogram | |
| 1502 * \param[in] rval value of input sample for which we want the rank | |
| 1503 * \param[out] prank fraction of total samples below rval | |
| 1504 * \return 0 if OK, 1 on error | |
| 1505 * | |
| 1506 * <pre> | |
| 1507 * Notes: | |
| 1508 * (1) If we think of the histogram as a function y(x), normalized | |
| 1509 * to 1, for a given input value of x, this computes the | |
| 1510 * rank of x, which is the integral of y(x) from the start | |
| 1511 * value of x to the input value. | |
| 1512 * (2) This function only makes sense when applied to a Numa that | |
| 1513 * is a histogram. The values in the histogram can be ints and | |
| 1514 * floats, and are computed as floats. The rank is returned | |
| 1515 * as a float between 0.0 and 1.0. | |
| 1516 * (3) The numa parameters startx and binsize are used to | |
| 1517 * compute x from the Numa index i. | |
| 1518 * </pre> | |
| 1519 */ | |
| 1520 l_ok | |
| 1521 numaHistogramGetRankFromVal(NUMA *na, | |
| 1522 l_float32 rval, | |
| 1523 l_float32 *prank) | |
| 1524 { | |
| 1525 l_int32 i, ibinval, n; | |
| 1526 l_float32 startval, binsize, binval, maxval, fractval, total, sum, val; | |
| 1527 | |
| 1528 if (!prank) | |
| 1529 return ERROR_INT("prank not defined", __func__, 1); | |
| 1530 *prank = 0.0; | |
| 1531 if (!na) | |
| 1532 return ERROR_INT("na not defined", __func__, 1); | |
| 1533 numaGetParameters(na, &startval, &binsize); | |
| 1534 n = numaGetCount(na); | |
| 1535 if (rval < startval) | |
| 1536 return 0; | |
| 1537 maxval = startval + n * binsize; | |
| 1538 if (rval > maxval) { | |
| 1539 *prank = 1.0; | |
| 1540 return 0; | |
| 1541 } | |
| 1542 | |
| 1543 binval = (rval - startval) / binsize; | |
| 1544 ibinval = (l_int32)binval; | |
| 1545 if (ibinval >= n) { | |
| 1546 *prank = 1.0; | |
| 1547 return 0; | |
| 1548 } | |
| 1549 fractval = binval - (l_float32)ibinval; | |
| 1550 | |
| 1551 sum = 0.0; | |
| 1552 for (i = 0; i < ibinval; i++) { | |
| 1553 numaGetFValue(na, i, &val); | |
| 1554 sum += val; | |
| 1555 } | |
| 1556 numaGetFValue(na, ibinval, &val); | |
| 1557 sum += fractval * val; | |
| 1558 numaGetSum(na, &total); | |
| 1559 *prank = sum / total; | |
| 1560 | |
| 1561 /* lept_stderr("binval = %7.3f, rank = %7.3f\n", binval, *prank); */ | |
| 1562 | |
| 1563 return 0; | |
| 1564 } | |
| 1565 | |
| 1566 | |
| 1567 /*! | |
| 1568 * \brief numaHistogramGetValFromRank() | |
| 1569 * | |
| 1570 * \param[in] na histogram | |
| 1571 * \param[in] rank fraction of total samples | |
| 1572 * \param[out] prval approx. to the bin value | |
| 1573 * \return 0 if OK, 1 on error | |
| 1574 * | |
| 1575 * <pre> | |
| 1576 * Notes: | |
| 1577 * (1) If we think of the histogram as a function y(x), this returns | |
| 1578 * the value x such that the integral of y(x) from the start | |
| 1579 * value to x gives the fraction 'rank' of the integral | |
| 1580 * of y(x) over all bins. | |
| 1581 * (2) This function only makes sense when applied to a Numa that | |
| 1582 * is a histogram. The values in the histogram can be ints and | |
| 1583 * floats, and are computed as floats. The val is returned | |
| 1584 * as a float, even though the buckets are of integer width. | |
| 1585 * (3) The numa parameters startx and binsize are used to | |
| 1586 * compute x from the Numa index i. | |
| 1587 * </pre> | |
| 1588 */ | |
| 1589 l_ok | |
| 1590 numaHistogramGetValFromRank(NUMA *na, | |
| 1591 l_float32 rank, | |
| 1592 l_float32 *prval) | |
| 1593 { | |
| 1594 l_int32 i, n; | |
| 1595 l_float32 startval, binsize, rankcount, total, sum, fract, val; | |
| 1596 | |
| 1597 if (!prval) | |
| 1598 return ERROR_INT("prval not defined", __func__, 1); | |
| 1599 *prval = 0.0; | |
| 1600 if (!na) | |
| 1601 return ERROR_INT("na not defined", __func__, 1); | |
| 1602 if (rank < 0.0) { | |
| 1603 L_WARNING("rank < 0; setting to 0.0\n", __func__); | |
| 1604 rank = 0.0; | |
| 1605 } | |
| 1606 if (rank > 1.0) { | |
| 1607 L_WARNING("rank > 1.0; setting to 1.0\n", __func__); | |
| 1608 rank = 1.0; | |
| 1609 } | |
| 1610 | |
| 1611 n = numaGetCount(na); | |
| 1612 numaGetParameters(na, &startval, &binsize); | |
| 1613 numaGetSum(na, &total); | |
| 1614 rankcount = rank * total; /* count that corresponds to rank */ | |
| 1615 sum = 0.0; | |
| 1616 val = 0.0; | |
| 1617 for (i = 0; i < n; i++) { | |
| 1618 numaGetFValue(na, i, &val); | |
| 1619 if (sum + val >= rankcount) | |
| 1620 break; | |
| 1621 sum += val; | |
| 1622 } | |
| 1623 if (val <= 0.0) /* can == 0 if rank == 0.0 */ | |
| 1624 fract = 0.0; | |
| 1625 else /* sum + fract * val = rankcount */ | |
| 1626 fract = (rankcount - sum) / val; | |
| 1627 | |
| 1628 /* The use of the fraction of a bin allows a simple calculation | |
| 1629 * for the histogram value at the given rank. */ | |
| 1630 *prval = startval + binsize * ((l_float32)i + fract); | |
| 1631 | |
| 1632 /* lept_stderr("rank = %7.3f, val = %7.3f\n", rank, *prval); */ | |
| 1633 | |
| 1634 return 0; | |
| 1635 } | |
| 1636 | |
| 1637 | |
| 1638 /*! | |
| 1639 * \brief numaDiscretizeSortedInBins() | |
| 1640 * | |
| 1641 * \param[in] na sorted | |
| 1642 * \param[in] nbins number of equal population bins (> 1) | |
| 1643 * \param[out] pnabinval average "gray" values in each bin | |
| 1644 * \return 0 if OK, 1 on error | |
| 1645 * | |
| 1646 * <pre> | |
| 1647 * Notes: | |
| 1648 * (1) The input %na is sorted in increasing value. | |
| 1649 * (2) The output array has the following mapping: | |
| 1650 * bin number --> average array value in bin (nabinval) | |
| 1651 * (3) With %nbins == 100, nabinval is the average gray value in | |
| 1652 * each of the 100 equally populated bins. It is the function | |
| 1653 * gray[100 * rank]. | |
| 1654 * Thus it is the inverse of | |
| 1655 * rank[gray] | |
| 1656 * (4) Contast with numaDiscretizeHistoInBins(), where the input %na | |
| 1657 * is a histogram. | |
| 1658 * </pre> | |
| 1659 */ | |
| 1660 l_ok | |
| 1661 numaDiscretizeSortedInBins(NUMA *na, | |
| 1662 l_int32 nbins, | |
| 1663 NUMA **pnabinval) | |
| 1664 { | |
| 1665 NUMA *nabinval; /* average gray value in the bins */ | |
| 1666 NUMA *naeach; | |
| 1667 l_int32 i, ntot, bincount, binindex, binsize; | |
| 1668 l_float32 sum, val, ave; | |
| 1669 | |
| 1670 if (!pnabinval) | |
| 1671 return ERROR_INT("&nabinval not defined", __func__, 1); | |
| 1672 *pnabinval = NULL; | |
| 1673 if (!na) | |
| 1674 return ERROR_INT("na not defined", __func__, 1); | |
| 1675 if (nbins < 2) | |
| 1676 return ERROR_INT("nbins must be > 1", __func__, 1); | |
| 1677 | |
| 1678 /* Get the number of items in each bin */ | |
| 1679 ntot = numaGetCount(na); | |
| 1680 if ((naeach = numaGetUniformBinSizes(ntot, nbins)) == NULL) | |
| 1681 return ERROR_INT("naeach not made", __func__, 1); | |
| 1682 | |
| 1683 /* Get the average value in each bin */ | |
| 1684 sum = 0.0; | |
| 1685 bincount = 0; | |
| 1686 binindex = 0; | |
| 1687 numaGetIValue(naeach, 0, &binsize); | |
| 1688 nabinval = numaCreate(nbins); | |
| 1689 for (i = 0; i < ntot; i++) { | |
| 1690 numaGetFValue(na, i, &val); | |
| 1691 bincount++; | |
| 1692 sum += val; | |
| 1693 if (bincount == binsize) { /* add bin entry */ | |
| 1694 ave = sum / binsize; | |
| 1695 numaAddNumber(nabinval, ave); | |
| 1696 sum = 0.0; | |
| 1697 bincount = 0; | |
| 1698 binindex++; | |
| 1699 if (binindex == nbins) break; | |
| 1700 numaGetIValue(naeach, binindex, &binsize); | |
| 1701 } | |
| 1702 } | |
| 1703 *pnabinval = nabinval; | |
| 1704 | |
| 1705 numaDestroy(&naeach); | |
| 1706 return 0; | |
| 1707 } | |
| 1708 | |
| 1709 | |
| 1710 /*! | |
| 1711 * \brief numaDiscretizeHistoInBins() | |
| 1712 * | |
| 1713 * \param[in] na histogram | |
| 1714 * \param[in] nbins number of equal population bins (> 1) | |
| 1715 * \param[out] pnabinval average "gray" values in each bin | |
| 1716 * \param[out] pnarank [optional] rank value of input histogram; | |
| 1717 * this is a cumulative norm histogram. | |
| 1718 * \return 0 if OK, 1 on error | |
| 1719 * | |
| 1720 * <pre> | |
| 1721 * Notes: | |
| 1722 * (1) With %nbins == 100, nabinval is the average gray value in | |
| 1723 * each of the 100 equally populated bins. It is the function | |
| 1724 * gray[100 * rank]. | |
| 1725 * Thus it is the inverse of | |
| 1726 * rank[gray] | |
| 1727 * which is optionally returned in narank. | |
| 1728 * (2) The "gray value" is the index into the input histogram. | |
| 1729 * (3) The two output arrays give the following mappings, where the | |
| 1730 * input is an un-normalized histogram of array values: | |
| 1731 * bin number --> average array value in bin (nabinval) | |
| 1732 * array values --> cumulative normalized histogram (narank) | |
| 1733 * </pre> | |
| 1734 */ | |
| 1735 l_ok | |
| 1736 numaDiscretizeHistoInBins(NUMA *na, | |
| 1737 l_int32 nbins, | |
| 1738 NUMA **pnabinval, | |
| 1739 NUMA **pnarank) | |
| 1740 { | |
| 1741 NUMA *nabinval; /* average gray value in the bins */ | |
| 1742 NUMA *naeach, *nan; | |
| 1743 l_int32 i, j, nxvals, occup, count, bincount, binindex, binsize; | |
| 1744 l_float32 sum, ave, ntot; | |
| 1745 | |
| 1746 if (pnarank) *pnarank = NULL; | |
| 1747 if (!pnabinval) | |
| 1748 return ERROR_INT("&nabinval not defined", __func__, 1); | |
| 1749 *pnabinval = NULL; | |
| 1750 if (!na) | |
| 1751 return ERROR_INT("na not defined", __func__, 1); | |
| 1752 if (nbins < 2) | |
| 1753 return ERROR_INT("nbins must be > 1", __func__, 1); | |
| 1754 | |
| 1755 nxvals = numaGetCount(na); | |
| 1756 numaGetSum(na, &ntot); | |
| 1757 occup = ntot / nxvals; | |
| 1758 if (occup < 1) L_INFO("average occupancy %d < 1\n", __func__, occup); | |
| 1759 | |
| 1760 /* Get the number of items in each bin */ | |
| 1761 if ((naeach = numaGetUniformBinSizes(ntot, nbins)) == NULL) | |
| 1762 return ERROR_INT("naeach not made", __func__, 1); | |
| 1763 | |
| 1764 /* Get the average value in each bin */ | |
| 1765 sum = 0.0; | |
| 1766 bincount = 0; | |
| 1767 binindex = 0; | |
| 1768 numaGetIValue(naeach, 0, &binsize); | |
| 1769 nabinval = numaCreate(nbins); | |
| 1770 for (i = 0; i < nxvals; i++) { | |
| 1771 numaGetIValue(na, i, &count); | |
| 1772 for (j = 0; j < count; j++) { | |
| 1773 bincount++; | |
| 1774 sum += i; | |
| 1775 if (bincount == binsize) { /* add bin entry */ | |
| 1776 ave = sum / binsize; | |
| 1777 numaAddNumber(nabinval, ave); | |
| 1778 sum = 0.0; | |
| 1779 bincount = 0; | |
| 1780 binindex++; | |
| 1781 if (binindex == nbins) break; | |
| 1782 numaGetIValue(naeach, binindex, &binsize); | |
| 1783 } | |
| 1784 } | |
| 1785 if (binindex == nbins) break; | |
| 1786 } | |
| 1787 *pnabinval = nabinval; | |
| 1788 if (binindex != nbins) | |
| 1789 L_ERROR("binindex = %d != nbins = %d\n", __func__, binindex, nbins); | |
| 1790 | |
| 1791 /* Get cumulative normalized histogram (rank[gray value]). | |
| 1792 * This is the partial sum operating on the normalized histogram. */ | |
| 1793 if (pnarank) { | |
| 1794 nan = numaNormalizeHistogram(na, 1.0); | |
| 1795 *pnarank = numaGetPartialSums(nan); | |
| 1796 numaDestroy(&nan); | |
| 1797 } | |
| 1798 numaDestroy(&naeach); | |
| 1799 return 0; | |
| 1800 } | |
| 1801 | |
| 1802 | |
| 1803 /*! | |
| 1804 * \brief numaGetRankBinValues() | |
| 1805 * | |
| 1806 * \param[in] na an array of values | |
| 1807 * \param[in] nbins number of bins at which the rank is divided | |
| 1808 * \param[out] pnam mean intensity in a bin vs rank bin value, | |
| 1809 * with %nbins of discretized rank values | |
| 1810 * \return 0 if OK, 1 on error | |
| 1811 * | |
| 1812 * <pre> | |
| 1813 * Notes: | |
| 1814 * (1) Simple interface for getting a binned rank representation | |
| 1815 * of an input array of values. This returns: | |
| 1816 * rank bin number --> average array value in each rank bin (nam) | |
| 1817 * (2) Uses bins either a sorted array or a histogram, depending on | |
| 1818 * the values in the array and the size of the array. | |
| 1819 * </pre> | |
| 1820 */ | |
| 1821 l_ok | |
| 1822 numaGetRankBinValues(NUMA *na, | |
| 1823 l_int32 nbins, | |
| 1824 NUMA **pnam) | |
| 1825 { | |
| 1826 NUMA *na1; | |
| 1827 l_int32 maxbins, type; | |
| 1828 l_float32 maxval, delx; | |
| 1829 | |
| 1830 if (!pnam) | |
| 1831 return ERROR_INT("&pnam not defined", __func__, 1); | |
| 1832 *pnam = NULL; | |
| 1833 if (!na) | |
| 1834 return ERROR_INT("na not defined", __func__, 1); | |
| 1835 if (numaGetCount(na) == 0) | |
| 1836 return ERROR_INT("na is empty", __func__, 1); | |
| 1837 if (nbins < 2) | |
| 1838 return ERROR_INT("nbins must be > 1", __func__, 1); | |
| 1839 | |
| 1840 /* Choose between sorted array and a histogram. | |
| 1841 * If the input array is has a small number of numbers with | |
| 1842 * a large maximum, we will sort it. At the other extreme, if | |
| 1843 * the array has many numbers with a small maximum, such as the | |
| 1844 * values of pixels in an 8 bpp grayscale image, generate a histogram. | |
| 1845 * If type comes back as L_BIN_SORT, use a histogram. */ | |
| 1846 type = numaChooseSortType(na); | |
| 1847 if (type == L_SHELL_SORT) { /* sort the array */ | |
| 1848 L_INFO("sort the array: input size = %d\n", __func__, numaGetCount(na)); | |
| 1849 na1 = numaSort(NULL, na, L_SORT_INCREASING); | |
| 1850 numaDiscretizeSortedInBins(na1, nbins, pnam); | |
| 1851 numaDestroy(&na1); | |
| 1852 return 0; | |
| 1853 } | |
| 1854 | |
| 1855 /* Make the histogram. Assuming there are no negative values | |
| 1856 * in the array, if the max value in the array does not exceed | |
| 1857 * about 100000, the bin size for generating the histogram will | |
| 1858 * be 1; maxbins refers to the number of entries in the histogram. */ | |
| 1859 L_INFO("use a histogram: input size = %d\n", __func__, numaGetCount(na)); | |
| 1860 numaGetMax(na, &maxval, NULL); | |
| 1861 maxbins = L_MIN(100002, (l_int32)maxval + 2); | |
| 1862 na1 = numaMakeHistogram(na, maxbins, NULL, NULL); | |
| 1863 | |
| 1864 /* Warn if there is a scale change. This shouldn't happen | |
| 1865 * unless the max value is above 100000. */ | |
| 1866 numaGetParameters(na1, NULL, &delx); | |
| 1867 if (delx > 1.0) | |
| 1868 L_WARNING("scale change: delx = %6.2f\n", __func__, delx); | |
| 1869 | |
| 1870 /* Rank bin the results */ | |
| 1871 numaDiscretizeHistoInBins(na1, nbins, pnam, NULL); | |
| 1872 numaDestroy(&na1); | |
| 1873 return 0; | |
| 1874 } | |
| 1875 | |
| 1876 | |
| 1877 /*! | |
| 1878 * \brief numaGetUniformBinSizes() | |
| 1879 * | |
| 1880 * \param[in] ntotal number of values to be split up | |
| 1881 * \param[in] nbins number of bins | |
| 1882 * \return naeach number of values to go in each bin, or NULL on error | |
| 1883 * | |
| 1884 * <pre> | |
| 1885 * Notes: | |
| 1886 * (1) The numbers in the bins can differ by 1. The sum of | |
| 1887 * bin numbers in %naeach is %ntotal. | |
| 1888 * </pre> | |
| 1889 */ | |
| 1890 NUMA * | |
| 1891 numaGetUniformBinSizes(l_int32 ntotal, | |
| 1892 l_int32 nbins) | |
| 1893 { | |
| 1894 l_int32 i, start, end; | |
| 1895 NUMA *naeach; | |
| 1896 | |
| 1897 if (ntotal <= 0) | |
| 1898 return (NUMA *)ERROR_PTR("ntotal <= 0", __func__, NULL); | |
| 1899 if (nbins <= 0) | |
| 1900 return (NUMA *)ERROR_PTR("nbins <= 0", __func__, NULL); | |
| 1901 | |
| 1902 if ((naeach = numaCreate(nbins)) == NULL) | |
| 1903 return (NUMA *)ERROR_PTR("naeach not made", __func__, NULL); | |
| 1904 | |
| 1905 if (ntotal < nbins) { /* put 1 in each of %ntotal bins */ | |
| 1906 for (i = 0; i < ntotal; i++) | |
| 1907 numaAddNumber(naeach, 1); | |
| 1908 return naeach; | |
| 1909 } | |
| 1910 | |
| 1911 start = 0; | |
| 1912 for (i = 0; i < nbins; i++) { | |
| 1913 end = ntotal * (i + 1) / nbins; | |
| 1914 numaAddNumber(naeach, end - start); | |
| 1915 start = end; | |
| 1916 } | |
| 1917 return naeach; | |
| 1918 } | |
| 1919 | |
| 1920 | |
| 1921 /*----------------------------------------------------------------------* | |
| 1922 * Splitting a distribution * | |
| 1923 *----------------------------------------------------------------------*/ | |
| 1924 /*! | |
| 1925 * \brief numaSplitDistribution() | |
| 1926 * | |
| 1927 * \param[in] na histogram | |
| 1928 * \param[in] scorefract fraction of the max score, used to determine | |
| 1929 * range over which the histogram min is searched | |
| 1930 * \param[out] psplitindex [optional] index for splitting | |
| 1931 * \param[out] pave1 [optional] average of lower distribution | |
| 1932 * \param[out] pave2 [optional] average of upper distribution | |
| 1933 * \param[out] pnum1 [optional] population of lower distribution | |
| 1934 * \param[out] pnum2 [optional] population of upper distribution | |
| 1935 * \param[out] pnascore [optional] for debugging; otherwise use NULL | |
| 1936 * \return 0 if OK, 1 on error | |
| 1937 * | |
| 1938 * <pre> | |
| 1939 * Notes: | |
| 1940 * (1) This function is intended to be used on a distribution of | |
| 1941 * values that represent two sets, such as a histogram of | |
| 1942 * pixel values for an image with a fg and bg, and the goal | |
| 1943 * is to determine the averages of the two sets and the | |
| 1944 * best splitting point. | |
| 1945 * (2) The Otsu method finds a split point that divides the distribution | |
| 1946 * into two parts by maximizing a score function that is the | |
| 1947 * product of two terms: | |
| 1948 * (a) the square of the difference of centroids, (ave1 - ave2)^2 | |
| 1949 * (b) fract1 * (1 - fract1) | |
| 1950 * where fract1 is the fraction in the lower distribution. | |
| 1951 * (3) This works well for images where the fg and bg are | |
| 1952 * each relatively homogeneous and well-separated in color. | |
| 1953 * However, if the actual fg and bg sets are very different | |
| 1954 * in size, and the bg is highly varied, as can occur in some | |
| 1955 * scanned document images, this will bias the split point | |
| 1956 * into the larger "bump" (i.e., toward the point where the | |
| 1957 * (b) term reaches its maximum of 0.25 at fract1 = 0.5. | |
| 1958 * To avoid this, we define a range of values near the | |
| 1959 * maximum of the score function, and choose the value within | |
| 1960 * this range such that the histogram itself has a minimum value. | |
| 1961 * The range is determined by scorefract: we include all abscissa | |
| 1962 * values to the left and right of the value that maximizes the | |
| 1963 * score, such that the score stays above (1 - scorefract) * maxscore. | |
| 1964 * The intuition behind this modification is to try to find | |
| 1965 * a split point that both has a high variance score and is | |
| 1966 * at or near a minimum in the histogram, so that the histogram | |
| 1967 * slope is small at the split point. | |
| 1968 * (4) We normalize the score so that if the two distributions | |
| 1969 * were of equal size and at opposite ends of the numa, the | |
| 1970 * score would be 1.0. | |
| 1971 * </pre> | |
| 1972 */ | |
| 1973 l_ok | |
| 1974 numaSplitDistribution(NUMA *na, | |
| 1975 l_float32 scorefract, | |
| 1976 l_int32 *psplitindex, | |
| 1977 l_float32 *pave1, | |
| 1978 l_float32 *pave2, | |
| 1979 l_float32 *pnum1, | |
| 1980 l_float32 *pnum2, | |
| 1981 NUMA **pnascore) | |
| 1982 { | |
| 1983 l_int32 i, n, bestsplit, minrange, maxrange, maxindex; | |
| 1984 l_float32 ave1, ave2, ave1prev, ave2prev; | |
| 1985 l_float32 num1, num2, num1prev, num2prev; | |
| 1986 l_float32 val, minval, sum, fract1; | |
| 1987 l_float32 norm, score, minscore, maxscore; | |
| 1988 NUMA *nascore, *naave1, *naave2, *nanum1, *nanum2; | |
| 1989 | |
| 1990 if (psplitindex) *psplitindex = 0; | |
| 1991 if (pave1) *pave1 = 0.0; | |
| 1992 if (pave2) *pave2 = 0.0; | |
| 1993 if (pnum1) *pnum1 = 0.0; | |
| 1994 if (pnum2) *pnum2 = 0.0; | |
| 1995 if (pnascore) *pnascore = NULL; | |
| 1996 if (!na) | |
| 1997 return ERROR_INT("na not defined", __func__, 1); | |
| 1998 | |
| 1999 n = numaGetCount(na); | |
| 2000 if (n <= 1) | |
| 2001 return ERROR_INT("n = 1 in histogram", __func__, 1); | |
| 2002 numaGetSum(na, &sum); | |
| 2003 if (sum <= 0.0) | |
| 2004 return ERROR_INT("sum <= 0.0", __func__, 1); | |
| 2005 norm = 4.0f / ((l_float32)(n - 1) * (n - 1)); | |
| 2006 ave1prev = 0.0; | |
| 2007 numaGetHistogramStats(na, 0.0, 1.0, &ave2prev, NULL, NULL, NULL); | |
| 2008 num1prev = 0.0; | |
| 2009 num2prev = sum; | |
| 2010 maxindex = n / 2; /* initialize with something */ | |
| 2011 | |
| 2012 /* Split the histogram with [0 ... i] in the lower part | |
| 2013 * and [i+1 ... n-1] in upper part. First, compute an otsu | |
| 2014 * score for each possible splitting. */ | |
| 2015 if ((nascore = numaCreate(n)) == NULL) | |
| 2016 return ERROR_INT("nascore not made", __func__, 1); | |
| 2017 naave1 = (pave1) ? numaCreate(n) : NULL; | |
| 2018 naave2 = (pave2) ? numaCreate(n) : NULL; | |
| 2019 nanum1 = (pnum1) ? numaCreate(n) : NULL; | |
| 2020 nanum2 = (pnum2) ? numaCreate(n) : NULL; | |
| 2021 maxscore = 0.0; | |
| 2022 for (i = 0; i < n; i++) { | |
| 2023 numaGetFValue(na, i, &val); | |
| 2024 num1 = num1prev + val; | |
| 2025 if (num1 == 0) | |
| 2026 ave1 = ave1prev; | |
| 2027 else | |
| 2028 ave1 = (num1prev * ave1prev + i * val) / num1; | |
| 2029 num2 = num2prev - val; | |
| 2030 if (num2 == 0) | |
| 2031 ave2 = ave2prev; | |
| 2032 else | |
| 2033 ave2 = (num2prev * ave2prev - i * val) / num2; | |
| 2034 fract1 = num1 / sum; | |
| 2035 score = norm * (fract1 * (1 - fract1)) * (ave2 - ave1) * (ave2 - ave1); | |
| 2036 numaAddNumber(nascore, score); | |
| 2037 if (pave1) numaAddNumber(naave1, ave1); | |
| 2038 if (pave2) numaAddNumber(naave2, ave2); | |
| 2039 if (pnum1) numaAddNumber(nanum1, num1); | |
| 2040 if (pnum2) numaAddNumber(nanum2, num2); | |
| 2041 if (score > maxscore) { | |
| 2042 maxscore = score; | |
| 2043 maxindex = i; | |
| 2044 } | |
| 2045 num1prev = num1; | |
| 2046 num2prev = num2; | |
| 2047 ave1prev = ave1; | |
| 2048 ave2prev = ave2; | |
| 2049 } | |
| 2050 | |
| 2051 /* Next, for all contiguous scores within a specified fraction | |
| 2052 * of the max, choose the split point as the value with the | |
| 2053 * minimum in the histogram. */ | |
| 2054 minscore = (1.f - scorefract) * maxscore; | |
| 2055 for (i = maxindex - 1; i >= 0; i--) { | |
| 2056 numaGetFValue(nascore, i, &val); | |
| 2057 if (val < minscore) | |
| 2058 break; | |
| 2059 } | |
| 2060 minrange = i + 1; | |
| 2061 for (i = maxindex + 1; i < n; i++) { | |
| 2062 numaGetFValue(nascore, i, &val); | |
| 2063 if (val < minscore) | |
| 2064 break; | |
| 2065 } | |
| 2066 maxrange = i - 1; | |
| 2067 numaGetFValue(na, minrange, &minval); | |
| 2068 bestsplit = minrange; | |
| 2069 for (i = minrange + 1; i <= maxrange; i++) { | |
| 2070 numaGetFValue(na, i, &val); | |
| 2071 if (val < minval) { | |
| 2072 minval = val; | |
| 2073 bestsplit = i; | |
| 2074 } | |
| 2075 } | |
| 2076 | |
| 2077 /* Add one to the bestsplit value to get the threshold value, | |
| 2078 * because when we take a threshold, as in pixThresholdToBinary(), | |
| 2079 * we always choose the set with values below the threshold. */ | |
| 2080 bestsplit = L_MIN(255, bestsplit + 1); | |
| 2081 | |
| 2082 if (psplitindex) *psplitindex = bestsplit; | |
| 2083 if (pave1) numaGetFValue(naave1, bestsplit, pave1); | |
| 2084 if (pave2) numaGetFValue(naave2, bestsplit, pave2); | |
| 2085 if (pnum1) numaGetFValue(nanum1, bestsplit, pnum1); | |
| 2086 if (pnum2) numaGetFValue(nanum2, bestsplit, pnum2); | |
| 2087 | |
| 2088 if (pnascore) { /* debug mode */ | |
| 2089 lept_stderr("minrange = %d, maxrange = %d\n", minrange, maxrange); | |
| 2090 lept_stderr("minval = %10.0f\n", minval); | |
| 2091 gplotSimple1(nascore, GPLOT_PNG, "/tmp/lept/nascore", | |
| 2092 "Score for split distribution"); | |
| 2093 *pnascore = nascore; | |
| 2094 } else { | |
| 2095 numaDestroy(&nascore); | |
| 2096 } | |
| 2097 | |
| 2098 if (pave1) numaDestroy(&naave1); | |
| 2099 if (pave2) numaDestroy(&naave2); | |
| 2100 if (pnum1) numaDestroy(&nanum1); | |
| 2101 if (pnum2) numaDestroy(&nanum2); | |
| 2102 return 0; | |
| 2103 } | |
| 2104 | |
| 2105 | |
| 2106 /*----------------------------------------------------------------------* | |
| 2107 * Comparing histograms * | |
| 2108 *----------------------------------------------------------------------*/ | |
| 2109 /*! | |
| 2110 * \brief grayHistogramsToEMD() | |
| 2111 * | |
| 2112 * \param[in] naa1, naa2 two numaa, each with one or more 256-element | |
| 2113 * histograms | |
| 2114 * \param[out] pnad nad of EM distances for each histogram | |
| 2115 * \return 0 if OK, 1 on error | |
| 2116 * | |
| 2117 * <pre> | |
| 2118 * Notes: | |
| 2119 * (1) The two numaas must be the same size and have corresponding | |
| 2120 * 256-element histograms. Pairs do not need to be normalized | |
| 2121 * to the same sum. | |
| 2122 * (2) This is typically used on two sets of histograms from | |
| 2123 * corresponding tiles of two images. The similarity of two | |
| 2124 * images can be found with the scoring function used in | |
| 2125 * pixCompareGrayByHisto(): | |
| 2126 * score S = 1.0 - k * D, where | |
| 2127 * k is a constant, say in the range 5-10 | |
| 2128 * D = EMD | |
| 2129 * for each tile; for multiple tiles, take the Min(S) over | |
| 2130 * the set of tiles to be the final score. | |
| 2131 * </pre> | |
| 2132 */ | |
| 2133 l_ok | |
| 2134 grayHistogramsToEMD(NUMAA *naa1, | |
| 2135 NUMAA *naa2, | |
| 2136 NUMA **pnad) | |
| 2137 { | |
| 2138 l_int32 i, n, nt; | |
| 2139 l_float32 dist; | |
| 2140 NUMA *na1, *na2, *nad; | |
| 2141 | |
| 2142 if (!pnad) | |
| 2143 return ERROR_INT("&nad not defined", __func__, 1); | |
| 2144 *pnad = NULL; | |
| 2145 if (!naa1 || !naa2) | |
| 2146 return ERROR_INT("na1 and na2 not both defined", __func__, 1); | |
| 2147 n = numaaGetCount(naa1); | |
| 2148 if (n != numaaGetCount(naa2)) | |
| 2149 return ERROR_INT("naa1 and naa2 numa counts differ", __func__, 1); | |
| 2150 nt = numaaGetNumberCount(naa1); | |
| 2151 if (nt != numaaGetNumberCount(naa2)) | |
| 2152 return ERROR_INT("naa1 and naa2 number counts differ", __func__, 1); | |
| 2153 if (256 * n != nt) /* good enough check */ | |
| 2154 return ERROR_INT("na sizes must be 256", __func__, 1); | |
| 2155 | |
| 2156 nad = numaCreate(n); | |
| 2157 *pnad = nad; | |
| 2158 for (i = 0; i < n; i++) { | |
| 2159 na1 = numaaGetNuma(naa1, i, L_CLONE); | |
| 2160 na2 = numaaGetNuma(naa2, i, L_CLONE); | |
| 2161 numaEarthMoverDistance(na1, na2, &dist); | |
| 2162 numaAddNumber(nad, dist / 255.f); /* normalize to [0.0 - 1.0] */ | |
| 2163 numaDestroy(&na1); | |
| 2164 numaDestroy(&na2); | |
| 2165 } | |
| 2166 return 0; | |
| 2167 } | |
| 2168 | |
| 2169 | |
| 2170 /*! | |
| 2171 * \brief numaEarthMoverDistance() | |
| 2172 * | |
| 2173 * \param[in] na1, na2 two numas of the same size, typically histograms | |
| 2174 * \param[out] pdist earthmover distance | |
| 2175 * \return 0 if OK, 1 on error | |
| 2176 * | |
| 2177 * <pre> | |
| 2178 * Notes: | |
| 2179 * (1) The two numas must have the same size. They do not need to be | |
| 2180 * normalized to the same sum before applying the function. | |
| 2181 * (2) For a 1D discrete function, the implementation of the EMD | |
| 2182 * is trivial. Just keep filling or emptying buckets in one numa | |
| 2183 * to match the amount in the other, moving sequentially along | |
| 2184 * both arrays. | |
| 2185 * (3) We divide the sum of the absolute value of everything moved | |
| 2186 * (by 1 unit at a time) by the sum of the numa (amount of "earth") | |
| 2187 * to get the average distance that the "earth" was moved. | |
| 2188 * This is the value returned here. | |
| 2189 * (4) The caller can do a further normalization, by the number of | |
| 2190 * buckets (minus 1), to get the EM distance as a fraction of | |
| 2191 * the maximum possible distance, which is n-1. This fraction | |
| 2192 * is 1.0 for the situation where all the 'earth' in the first | |
| 2193 * array is at one end, and all in the second array is at the | |
| 2194 * other end. | |
| 2195 * </pre> | |
| 2196 */ | |
| 2197 l_ok | |
| 2198 numaEarthMoverDistance(NUMA *na1, | |
| 2199 NUMA *na2, | |
| 2200 l_float32 *pdist) | |
| 2201 { | |
| 2202 l_int32 n, norm, i; | |
| 2203 l_float32 sum1, sum2, diff, total; | |
| 2204 l_float32 *array1, *array3; | |
| 2205 NUMA *na3; | |
| 2206 | |
| 2207 if (!pdist) | |
| 2208 return ERROR_INT("&dist not defined", __func__, 1); | |
| 2209 *pdist = 0.0; | |
| 2210 if (!na1 || !na2) | |
| 2211 return ERROR_INT("na1 and na2 not both defined", __func__, 1); | |
| 2212 n = numaGetCount(na1); | |
| 2213 if (n != numaGetCount(na2)) | |
| 2214 return ERROR_INT("na1 and na2 have different size", __func__, 1); | |
| 2215 | |
| 2216 /* Generate na3; normalize to na1 if necessary */ | |
| 2217 numaGetSum(na1, &sum1); | |
| 2218 numaGetSum(na2, &sum2); | |
| 2219 norm = (L_ABS(sum1 - sum2) < 0.00001 * L_ABS(sum1)) ? 1 : 0; | |
| 2220 if (!norm) | |
| 2221 na3 = numaTransform(na2, 0, sum1 / sum2); | |
| 2222 else | |
| 2223 na3 = numaCopy(na2); | |
| 2224 array1 = numaGetFArray(na1, L_NOCOPY); | |
| 2225 array3 = numaGetFArray(na3, L_NOCOPY); | |
| 2226 | |
| 2227 /* Move earth in n3 from array elements, to match n1 */ | |
| 2228 total = 0; | |
| 2229 for (i = 1; i < n; i++) { | |
| 2230 diff = array1[i - 1] - array3[i - 1]; | |
| 2231 array3[i] -= diff; | |
| 2232 total += L_ABS(diff); | |
| 2233 } | |
| 2234 *pdist = total / sum1; | |
| 2235 | |
| 2236 numaDestroy(&na3); | |
| 2237 return 0; | |
| 2238 } | |
| 2239 | |
| 2240 | |
| 2241 /*! | |
| 2242 * \brief grayInterHistogramStats() | |
| 2243 * | |
| 2244 * \param[in] naa numaa with two or more 256-element histograms | |
| 2245 * \param[in] wc half-width of the smoothing window | |
| 2246 * \param[out] pnam [optional] mean values | |
| 2247 * \param[out] pnams [optional] mean square values | |
| 2248 * \param[out] pnav [optional] variances | |
| 2249 * \param[out] pnarv [optional] rms deviations from the mean | |
| 2250 * \return 0 if OK, 1 on error | |
| 2251 * | |
| 2252 * <pre> | |
| 2253 * Notes: | |
| 2254 * (1) The %naa has two or more 256-element numa histograms, which | |
| 2255 * are to be compared value-wise at each of the 256 gray levels. | |
| 2256 * The result are stats (mean, mean square, variance, root variance) | |
| 2257 * aggregated across the set of histograms, and each is output | |
| 2258 * as a 256 entry numa. Think of these histograms as a matrix, | |
| 2259 * where each histogram is one row of the array. The stats are | |
| 2260 * then aggregated column-wise, between the histograms. | |
| 2261 * (2) These stats are: | |
| 2262 * ~ average value: <v> (nam) | |
| 2263 * ~ average squared value: <v*v> (nams) | |
| 2264 * ~ variance: <(v - <v>)*(v - <v>)> = <v*v> - <v>*<v> (nav) | |
| 2265 * ~ square-root of variance: (narv) | |
| 2266 * where the brackets < .. > indicate that the average value is | |
| 2267 * to be taken over each column of the array. | |
| 2268 * (3) The input histograms are optionally smoothed before these | |
| 2269 * statistical operations. | |
| 2270 * (4) The input histograms are normalized to a sum of 10000. By | |
| 2271 * doing this, the resulting numbers are independent of the | |
| 2272 * number of samples used in building the individual histograms. | |
| 2273 * (5) A typical application is on a set of histograms from tiles | |
| 2274 * of an image, to distinguish between text/tables and photo | |
| 2275 * regions. If the tiles are much larger than the text line | |
| 2276 * spacing, text/table regions typically have smaller variance | |
| 2277 * across tiles than photo regions. For this application, it | |
| 2278 * may be useful to ignore values near white, which are large for | |
| 2279 * text and would magnify the variance due to variations in | |
| 2280 * illumination. However, because the variance of a drawing or | |
| 2281 * a light photo can be similar to that of grayscale text, this | |
| 2282 * function is only a discriminator between darker photos/drawings | |
| 2283 * and light photos/text/line-graphics. | |
| 2284 * </pre> | |
| 2285 */ | |
| 2286 l_ok | |
| 2287 grayInterHistogramStats(NUMAA *naa, | |
| 2288 l_int32 wc, | |
| 2289 NUMA **pnam, | |
| 2290 NUMA **pnams, | |
| 2291 NUMA **pnav, | |
| 2292 NUMA **pnarv) | |
| 2293 { | |
| 2294 l_int32 i, j, n, nn; | |
| 2295 l_float32 **arrays; | |
| 2296 l_float32 mean, var, rvar; | |
| 2297 NUMA *na1, *na2, *na3, *na4; | |
| 2298 | |
| 2299 if (pnam) *pnam = NULL; | |
| 2300 if (pnams) *pnams = NULL; | |
| 2301 if (pnav) *pnav = NULL; | |
| 2302 if (pnarv) *pnarv = NULL; | |
| 2303 if (!pnam && !pnams && !pnav && !pnarv) | |
| 2304 return ERROR_INT("nothing requested", __func__, 1); | |
| 2305 if (!naa) | |
| 2306 return ERROR_INT("naa not defined", __func__, 1); | |
| 2307 n = numaaGetCount(naa); | |
| 2308 for (i = 0; i < n; i++) { | |
| 2309 nn = numaaGetNumaCount(naa, i); | |
| 2310 if (nn != 256) { | |
| 2311 L_ERROR("%d numbers in numa[%d]\n", __func__, nn, i); | |
| 2312 return 1; | |
| 2313 } | |
| 2314 } | |
| 2315 | |
| 2316 if (pnam) *pnam = numaCreate(256); | |
| 2317 if (pnams) *pnams = numaCreate(256); | |
| 2318 if (pnav) *pnav = numaCreate(256); | |
| 2319 if (pnarv) *pnarv = numaCreate(256); | |
| 2320 | |
| 2321 /* First, use mean smoothing, normalize each histogram, | |
| 2322 * and save all results in a 2D matrix. */ | |
| 2323 arrays = (l_float32 **)LEPT_CALLOC(n, sizeof(l_float32 *)); | |
| 2324 for (i = 0; i < n; i++) { | |
| 2325 na1 = numaaGetNuma(naa, i, L_CLONE); | |
| 2326 na2 = numaWindowedMean(na1, wc); | |
| 2327 na3 = numaNormalizeHistogram(na2, 10000.); | |
| 2328 arrays[i] = numaGetFArray(na3, L_COPY); | |
| 2329 numaDestroy(&na1); | |
| 2330 numaDestroy(&na2); | |
| 2331 numaDestroy(&na3); | |
| 2332 } | |
| 2333 | |
| 2334 /* Get stats between histograms */ | |
| 2335 for (j = 0; j < 256; j++) { | |
| 2336 na4 = numaCreate(n); | |
| 2337 for (i = 0; i < n; i++) { | |
| 2338 numaAddNumber(na4, arrays[i][j]); | |
| 2339 } | |
| 2340 numaSimpleStats(na4, 0, -1, &mean, &var, &rvar); | |
| 2341 if (pnam) numaAddNumber(*pnam, mean); | |
| 2342 if (pnams) numaAddNumber(*pnams, mean * mean); | |
| 2343 if (pnav) numaAddNumber(*pnav, var); | |
| 2344 if (pnarv) numaAddNumber(*pnarv, rvar); | |
| 2345 numaDestroy(&na4); | |
| 2346 } | |
| 2347 | |
| 2348 for (i = 0; i < n; i++) | |
| 2349 LEPT_FREE(arrays[i]); | |
| 2350 LEPT_FREE(arrays); | |
| 2351 return 0; | |
| 2352 } | |
| 2353 | |
| 2354 | |
| 2355 /*----------------------------------------------------------------------* | |
| 2356 * Extrema finding * | |
| 2357 *----------------------------------------------------------------------*/ | |
| 2358 /*! | |
| 2359 * \brief numaFindPeaks() | |
| 2360 * | |
| 2361 * \param[in] nas source numa | |
| 2362 * \param[in] nmax max number of peaks to be found | |
| 2363 * \param[in] fract1 min fraction of peak value | |
| 2364 * \param[in] fract2 min slope | |
| 2365 * \return peak na, or NULL on error. | |
| 2366 * | |
| 2367 * <pre> | |
| 2368 * Notes: | |
| 2369 * (1) The returned na consists of sets of four numbers representing | |
| 2370 * the peak, in the following order: | |
| 2371 * left edge; peak center; right edge; normalized peak area | |
| 2372 * </pre> | |
| 2373 */ | |
| 2374 NUMA * | |
| 2375 numaFindPeaks(NUMA *nas, | |
| 2376 l_int32 nmax, | |
| 2377 l_float32 fract1, | |
| 2378 l_float32 fract2) | |
| 2379 { | |
| 2380 l_int32 i, k, n, maxloc, lloc, rloc; | |
| 2381 l_float32 fmaxval, sum, total, newtotal, val, lastval; | |
| 2382 l_float32 peakfract; | |
| 2383 NUMA *na, *napeak; | |
| 2384 | |
| 2385 if (!nas) | |
| 2386 return (NUMA *)ERROR_PTR("nas not defined", __func__, NULL); | |
| 2387 n = numaGetCount(nas); | |
| 2388 numaGetSum(nas, &total); | |
| 2389 | |
| 2390 /* We munge this copy */ | |
| 2391 if ((na = numaCopy(nas)) == NULL) | |
| 2392 return (NUMA *)ERROR_PTR("na not made", __func__, NULL); | |
| 2393 if ((napeak = numaCreate(4 * nmax)) == NULL) { | |
| 2394 numaDestroy(&na); | |
| 2395 return (NUMA *)ERROR_PTR("napeak not made", __func__, NULL); | |
| 2396 } | |
| 2397 | |
| 2398 for (k = 0; k < nmax; k++) { | |
| 2399 numaGetSum(na, &newtotal); | |
| 2400 if (newtotal == 0.0) /* sanity check */ | |
| 2401 break; | |
| 2402 numaGetMax(na, &fmaxval, &maxloc); | |
| 2403 sum = fmaxval; | |
| 2404 lastval = fmaxval; | |
| 2405 lloc = 0; | |
| 2406 for (i = maxloc - 1; i >= 0; --i) { | |
| 2407 numaGetFValue(na, i, &val); | |
| 2408 if (val == 0.0) { | |
| 2409 lloc = i + 1; | |
| 2410 break; | |
| 2411 } | |
| 2412 if (val > fract1 * fmaxval) { | |
| 2413 sum += val; | |
| 2414 lastval = val; | |
| 2415 continue; | |
| 2416 } | |
| 2417 if (lastval - val > fract2 * lastval) { | |
| 2418 sum += val; | |
| 2419 lastval = val; | |
| 2420 continue; | |
| 2421 } | |
| 2422 lloc = i; | |
| 2423 break; | |
| 2424 } | |
| 2425 lastval = fmaxval; | |
| 2426 rloc = n - 1; | |
| 2427 for (i = maxloc + 1; i < n; ++i) { | |
| 2428 numaGetFValue(na, i, &val); | |
| 2429 if (val == 0.0) { | |
| 2430 rloc = i - 1; | |
| 2431 break; | |
| 2432 } | |
| 2433 if (val > fract1 * fmaxval) { | |
| 2434 sum += val; | |
| 2435 lastval = val; | |
| 2436 continue; | |
| 2437 } | |
| 2438 if (lastval - val > fract2 * lastval) { | |
| 2439 sum += val; | |
| 2440 lastval = val; | |
| 2441 continue; | |
| 2442 } | |
| 2443 rloc = i; | |
| 2444 break; | |
| 2445 } | |
| 2446 peakfract = sum / total; | |
| 2447 numaAddNumber(napeak, lloc); | |
| 2448 numaAddNumber(napeak, maxloc); | |
| 2449 numaAddNumber(napeak, rloc); | |
| 2450 numaAddNumber(napeak, peakfract); | |
| 2451 | |
| 2452 for (i = lloc; i <= rloc; i++) | |
| 2453 numaSetValue(na, i, 0.0); | |
| 2454 } | |
| 2455 | |
| 2456 numaDestroy(&na); | |
| 2457 return napeak; | |
| 2458 } | |
| 2459 | |
| 2460 | |
| 2461 /*! | |
| 2462 * \brief numaFindExtrema() | |
| 2463 * | |
| 2464 * \param[in] nas input values | |
| 2465 * \param[in] delta relative amount to resolve peaks and valleys | |
| 2466 * \param[out] pnav [optional] values of extrema | |
| 2467 * \return nad (locations of extrema, or NULL on error | |
| 2468 * | |
| 2469 * <pre> | |
| 2470 * Notes: | |
| 2471 * (1) This returns a sequence of extrema (peaks and valleys). | |
| 2472 * (2) The algorithm is analogous to that for determining | |
| 2473 * mountain peaks. Suppose we have a local peak, with | |
| 2474 * bumps on the side. Under what conditions can we consider | |
| 2475 * those 'bumps' to be actual peaks? The answer: if the | |
| 2476 * bump is separated from the peak by a saddle that is at | |
| 2477 * least 500 feet below the bump. | |
| 2478 * (3) Operationally, suppose we are trying to identify a peak. | |
| 2479 * We have a previous valley, and also the largest value that | |
| 2480 * we have seen since that valley. We can identify this as | |
| 2481 * a peak if we find a value that is delta BELOW it. When | |
| 2482 * we find such a value, label the peak, use the current | |
| 2483 * value to label the starting point for the search for | |
| 2484 * a valley, and do the same operation in reverse. Namely, | |
| 2485 * keep track of the lowest point seen, and look for a value | |
| 2486 * that is delta ABOVE it. Once found, the lowest point is | |
| 2487 * labeled the valley, and continue, looking for the next peak. | |
| 2488 * </pre> | |
| 2489 */ | |
| 2490 NUMA * | |
| 2491 numaFindExtrema(NUMA *nas, | |
| 2492 l_float32 delta, | |
| 2493 NUMA **pnav) | |
| 2494 { | |
| 2495 l_int32 i, n, found, loc, direction; | |
| 2496 l_float32 startval, val, maxval, minval; | |
| 2497 NUMA *nav, *nad; | |
| 2498 | |
| 2499 if (pnav) *pnav = NULL; | |
| 2500 if (!nas) | |
| 2501 return (NUMA *)ERROR_PTR("nas not defined", __func__, NULL); | |
| 2502 if (delta < 0.0) | |
| 2503 return (NUMA *)ERROR_PTR("delta < 0", __func__, NULL); | |
| 2504 | |
| 2505 n = numaGetCount(nas); | |
| 2506 nad = numaCreate(0); | |
| 2507 nav = NULL; | |
| 2508 if (pnav) { | |
| 2509 nav = numaCreate(0); | |
| 2510 *pnav = nav; | |
| 2511 } | |
| 2512 | |
| 2513 /* We don't know if we'll find a peak or valley first, | |
| 2514 * but use the first element of nas as the reference point. | |
| 2515 * Break when we deviate by 'delta' from the first point. */ | |
| 2516 numaGetFValue(nas, 0, &startval); | |
| 2517 found = FALSE; | |
| 2518 for (i = 1; i < n; i++) { | |
| 2519 numaGetFValue(nas, i, &val); | |
| 2520 if (L_ABS(val - startval) >= delta) { | |
| 2521 found = TRUE; | |
| 2522 break; | |
| 2523 } | |
| 2524 } | |
| 2525 | |
| 2526 if (!found) | |
| 2527 return nad; /* it's empty */ | |
| 2528 | |
| 2529 /* Are we looking for a peak or a valley? */ | |
| 2530 if (val > startval) { /* peak */ | |
| 2531 direction = 1; | |
| 2532 maxval = val; | |
| 2533 } else { | |
| 2534 direction = -1; | |
| 2535 minval = val; | |
| 2536 } | |
| 2537 loc = i; | |
| 2538 | |
| 2539 /* Sweep through the rest of the array, recording alternating | |
| 2540 * peak/valley extrema. */ | |
| 2541 for (i = i + 1; i < n; i++) { | |
| 2542 numaGetFValue(nas, i, &val); | |
| 2543 if (direction == 1 && val > maxval ) { /* new local max */ | |
| 2544 maxval = val; | |
| 2545 loc = i; | |
| 2546 } else if (direction == -1 && val < minval ) { /* new local min */ | |
| 2547 minval = val; | |
| 2548 loc = i; | |
| 2549 } else if (direction == 1 && (maxval - val >= delta)) { | |
| 2550 numaAddNumber(nad, loc); /* save the current max location */ | |
| 2551 if (nav) numaAddNumber(nav, maxval); | |
| 2552 direction = -1; /* reverse: start looking for a min */ | |
| 2553 minval = val; | |
| 2554 loc = i; /* current min location */ | |
| 2555 } else if (direction == -1 && (val - minval >= delta)) { | |
| 2556 numaAddNumber(nad, loc); /* save the current min location */ | |
| 2557 if (nav) numaAddNumber(nav, minval); | |
| 2558 direction = 1; /* reverse: start looking for a max */ | |
| 2559 maxval = val; | |
| 2560 loc = i; /* current max location */ | |
| 2561 } | |
| 2562 } | |
| 2563 | |
| 2564 /* Save the final extremum */ | |
| 2565 /* numaAddNumber(nad, loc); */ | |
| 2566 return nad; | |
| 2567 } | |
| 2568 | |
| 2569 | |
| 2570 /*! | |
| 2571 * \brief numaFindLocForThreshold() | |
| 2572 * | |
| 2573 * \param[in] nas input histogram | |
| 2574 * \param[in] skip look-ahead distance to avoid false mininma; | |
| 2575 * use 0 for default | |
| 2576 * \param[out] pthresh threshold value | |
| 2577 * \param[out] pfract [optional] fraction below or at threshold | |
| 2578 * \return 0 if OK, 1 on error or if no threshold can be found | |
| 2579 * | |
| 2580 * <pre> | |
| 2581 * Notes: | |
| 2582 * (1) This finds a good place to set a threshold for a histogram | |
| 2583 * of values that has two peaks. The peaks can differ greatly | |
| 2584 * in area underneath them. The number of buckets in the | |
| 2585 * histogram is expected to be 256 (e.g, from an 8 bpp gray image). | |
| 2586 * (2) The input histogram should have been smoothed with a window | |
| 2587 * to avoid false peak and valley detection due to noise. For | |
| 2588 * example, see pixThresholdByHisto(). | |
| 2589 * (3) A skip value can be input to determine the look-ahead distance | |
| 2590 * to ignore a false peak on the rise or descent from the first peak. | |
| 2591 * Input 0 to use the default value (it assumes a histo size of 256). | |
| 2592 * (4) Optionally, the fractional area under the first peak can | |
| 2593 * be returned. | |
| 2594 * </pre> | |
| 2595 */ | |
| 2596 l_ok | |
| 2597 numaFindLocForThreshold(NUMA *na, | |
| 2598 l_int32 skip, | |
| 2599 l_int32 *pthresh, | |
| 2600 l_float32 *pfract) | |
| 2601 { | |
| 2602 l_int32 i, n, start, index, minloc, found; | |
| 2603 l_float32 val, pval, jval, minval, maxval, sum, partsum; | |
| 2604 l_float32 *fa; | |
| 2605 | |
| 2606 if (pfract) *pfract = 0.0; | |
| 2607 if (!pthresh) | |
| 2608 return ERROR_INT("&thresh not defined", __func__, 1); | |
| 2609 *pthresh = 0; | |
| 2610 if (!na) | |
| 2611 return ERROR_INT("na not defined", __func__, 1); | |
| 2612 if (skip <= 0) skip = 20; | |
| 2613 | |
| 2614 /* Test for constant value */ | |
| 2615 numaGetMin(na, &minval, NULL); | |
| 2616 numaGetMax(na, &maxval, NULL); | |
| 2617 if (minval == maxval) | |
| 2618 return ERROR_INT("all array values are the same", __func__, 1); | |
| 2619 | |
| 2620 /* Look for the top of the first peak */ | |
| 2621 n = numaGetCount(na); | |
| 2622 if (n < 256) | |
| 2623 L_WARNING("array size %d < 256\n", __func__, n); | |
| 2624 fa = numaGetFArray(na, L_NOCOPY); | |
| 2625 pval = fa[0]; | |
| 2626 for (i = 1; i < n; i++) { | |
| 2627 val = fa[i]; | |
| 2628 index = L_MIN(i + skip, n - 1); | |
| 2629 jval = fa[index]; | |
| 2630 if (val < pval && jval < pval) /* near the top if not there */ | |
| 2631 break; | |
| 2632 pval = val; | |
| 2633 } | |
| 2634 | |
| 2635 if (i > n - 5) /* just an increasing function */ | |
| 2636 return ERROR_INT("top of first peak not found", __func__, 1); | |
| 2637 | |
| 2638 /* Look for the low point in the valley */ | |
| 2639 found = FALSE; | |
| 2640 start = i; | |
| 2641 pval = fa[start]; | |
| 2642 for (i = start + 1; i < n; i++) { | |
| 2643 val = fa[i]; | |
| 2644 if (val <= pval) { /* appears to be going down */ | |
| 2645 pval = val; | |
| 2646 } else { /* appears to be going up */ | |
| 2647 index = L_MIN(i + skip, n - 1); | |
| 2648 jval = fa[index]; /* junp ahead by 'skip' */ | |
| 2649 if (val > jval) { /* still going down; jump ahead */ | |
| 2650 pval = jval; | |
| 2651 i = index; | |
| 2652 } else { /* really going up; passed the min */ | |
| 2653 found = TRUE; | |
| 2654 break; | |
| 2655 } | |
| 2656 } | |
| 2657 } | |
| 2658 if (!found) | |
| 2659 return ERROR_INT("no minimum found", __func__, 1); | |
| 2660 | |
| 2661 /* Find the location of the minimum in the interval */ | |
| 2662 minloc = index; /* likely passed the min; look backward */ | |
| 2663 minval = fa[index]; | |
| 2664 for (i = index - 1; i > index - skip; i--) { | |
| 2665 if (fa[i] < minval) { | |
| 2666 minval = fa[i]; | |
| 2667 minloc = i; | |
| 2668 } | |
| 2669 } | |
| 2670 | |
| 2671 /* Is the minimum very near the end of the array? */ | |
| 2672 if (minloc > n - 10) | |
| 2673 return ERROR_INT("minimum at end of array; invalid", __func__, 1); | |
| 2674 *pthresh = minloc; | |
| 2675 | |
| 2676 /* Find the fraction under the first peak */ | |
| 2677 if (pfract) { | |
| 2678 numaGetSumOnInterval(na, 0, minloc, &partsum); | |
| 2679 numaGetSum(na, &sum); | |
| 2680 if (sum > 0.0) | |
| 2681 *pfract = partsum / sum; | |
| 2682 } | |
| 2683 return 0; | |
| 2684 } | |
| 2685 | |
| 2686 | |
| 2687 /*! | |
| 2688 * \brief numaCountReversals() | |
| 2689 * | |
| 2690 * \param[in] nas input values | |
| 2691 * \param[in] minreversal relative amount to resolve peaks and valleys | |
| 2692 * \param[out] pnr [optional] number of reversals | |
| 2693 * \param[out] prd [optional] reversal density: reversals/length | |
| 2694 * \return 0 if OK, 1 on error | |
| 2695 * | |
| 2696 * <pre> | |
| 2697 * Notes: | |
| 2698 * (1) The input numa is can be generated from pixExtractAlongLine(). | |
| 2699 * If so, the x parameters can be used to find the reversal | |
| 2700 * frequency along a line. | |
| 2701 * (2) If the input numa was generated from a 1 bpp pix, the | |
| 2702 * values will be 0 and 1. Use %minreversal == 1 to get | |
| 2703 * the number of pixel flips. If the only values are 0 and 1, | |
| 2704 * but %minreversal > 1, set the reversal count to 0 and | |
| 2705 * issue a warning. | |
| 2706 * </pre> | |
| 2707 */ | |
| 2708 l_ok | |
| 2709 numaCountReversals(NUMA *nas, | |
| 2710 l_float32 minreversal, | |
| 2711 l_int32 *pnr, | |
| 2712 l_float32 *prd) | |
| 2713 { | |
| 2714 l_int32 i, n, nr, ival, binvals; | |
| 2715 l_int32 *ia; | |
| 2716 l_float32 fval, delx, len; | |
| 2717 NUMA *nat; | |
| 2718 | |
| 2719 if (pnr) *pnr = 0; | |
| 2720 if (prd) *prd = 0.0; | |
| 2721 if (!pnr && !prd) | |
| 2722 return ERROR_INT("neither &nr nor &rd are defined", __func__, 1); | |
| 2723 if (!nas) | |
| 2724 return ERROR_INT("nas not defined", __func__, 1); | |
| 2725 if ((n = numaGetCount(nas)) == 0) { | |
| 2726 L_INFO("nas is empty\n", __func__); | |
| 2727 return 0; | |
| 2728 } | |
| 2729 if (minreversal < 0.0) | |
| 2730 return ERROR_INT("minreversal < 0", __func__, 1); | |
| 2731 | |
| 2732 /* Decide if the only values are 0 and 1 */ | |
| 2733 binvals = TRUE; | |
| 2734 for (i = 0; i < n; i++) { | |
| 2735 numaGetFValue(nas, i, &fval); | |
| 2736 if (fval != 0.0 && fval != 1.0) { | |
| 2737 binvals = FALSE; | |
| 2738 break; | |
| 2739 } | |
| 2740 } | |
| 2741 | |
| 2742 nr = 0; | |
| 2743 if (binvals) { | |
| 2744 if (minreversal > 1.0) { | |
| 2745 L_WARNING("binary values but minreversal > 1\n", __func__); | |
| 2746 } else { | |
| 2747 ia = numaGetIArray(nas); | |
| 2748 ival = ia[0]; | |
| 2749 for (i = 1; i < n; i++) { | |
| 2750 if (ia[i] != ival) { | |
| 2751 nr++; | |
| 2752 ival = ia[i]; | |
| 2753 } | |
| 2754 } | |
| 2755 LEPT_FREE(ia); | |
| 2756 } | |
| 2757 } else { | |
| 2758 nat = numaFindExtrema(nas, minreversal, NULL); | |
| 2759 nr = numaGetCount(nat); | |
| 2760 numaDestroy(&nat); | |
| 2761 } | |
| 2762 if (pnr) *pnr = nr; | |
| 2763 if (prd) { | |
| 2764 numaGetParameters(nas, NULL, &delx); | |
| 2765 len = delx * n; | |
| 2766 *prd = (l_float32)nr / len; | |
| 2767 } | |
| 2768 | |
| 2769 return 0; | |
| 2770 } | |
| 2771 | |
| 2772 | |
| 2773 /*----------------------------------------------------------------------* | |
| 2774 * Threshold crossings and frequency analysis * | |
| 2775 *----------------------------------------------------------------------*/ | |
| 2776 /*! | |
| 2777 * \brief numaSelectCrossingThreshold() | |
| 2778 * | |
| 2779 * \param[in] nax [optional] numa of abscissa values; can be NULL | |
| 2780 * \param[in] nay signal | |
| 2781 * \param[in] estthresh estimated pixel threshold for crossing: | |
| 2782 * e.g., for images, white <--> black; typ. ~120 | |
| 2783 * \param[out] pbestthresh robust estimate of threshold to use | |
| 2784 * \return 0 if OK, 1 on error or warning | |
| 2785 * | |
| 2786 * <pre> | |
| 2787 * Notes: | |
| 2788 * (1) When a valid threshold is used, the number of crossings is | |
| 2789 * a maximum, because none are missed. If no threshold intersects | |
| 2790 * all the crossings, the crossings must be determined with | |
| 2791 * numaCrossingsByPeaks(). | |
| 2792 * (2) %estthresh is an input estimate of the threshold that should | |
| 2793 * be used. We compute the crossings with 41 thresholds | |
| 2794 * (20 below and 20 above). There is a range in which the | |
| 2795 * number of crossings is a maximum. Return a threshold | |
| 2796 * in the center of this stable plateau of crossings. | |
| 2797 * This can then be used with numaCrossingsByThreshold() | |
| 2798 * to get a good estimate of crossing locations. | |
| 2799 * (3) If the count of nay is less than 2, a warning is issued. | |
| 2800 * </pre> | |
| 2801 */ | |
| 2802 l_ok | |
| 2803 numaSelectCrossingThreshold(NUMA *nax, | |
| 2804 NUMA *nay, | |
| 2805 l_float32 estthresh, | |
| 2806 l_float32 *pbestthresh) | |
| 2807 { | |
| 2808 l_int32 i, inrun, istart, iend, maxstart, maxend, runlen, maxrunlen; | |
| 2809 l_int32 val, maxval, nmax, count; | |
| 2810 l_float32 thresh, fmaxval, fmodeval; | |
| 2811 NUMA *nat, *nac; | |
| 2812 | |
| 2813 if (!pbestthresh) | |
| 2814 return ERROR_INT("&bestthresh not defined", __func__, 1); | |
| 2815 *pbestthresh = 0.0; | |
| 2816 if (!nay) | |
| 2817 return ERROR_INT("nay not defined", __func__, 1); | |
| 2818 if (numaGetCount(nay) < 2) { | |
| 2819 L_WARNING("nay count < 2; no threshold crossing\n", __func__); | |
| 2820 return 1; | |
| 2821 } | |
| 2822 | |
| 2823 /* Compute the number of crossings for different thresholds */ | |
| 2824 nat = numaCreate(41); | |
| 2825 for (i = 0; i < 41; i++) { | |
| 2826 thresh = estthresh - 80.0f + 4.0f * i; | |
| 2827 nac = numaCrossingsByThreshold(nax, nay, thresh); | |
| 2828 numaAddNumber(nat, numaGetCount(nac)); | |
| 2829 numaDestroy(&nac); | |
| 2830 } | |
| 2831 | |
| 2832 /* Find the center of the plateau of max crossings, which | |
| 2833 * extends from thresh[istart] to thresh[iend]. */ | |
| 2834 numaGetMax(nat, &fmaxval, NULL); | |
| 2835 maxval = (l_int32)fmaxval; | |
| 2836 nmax = 0; | |
| 2837 for (i = 0; i < 41; i++) { | |
| 2838 numaGetIValue(nat, i, &val); | |
| 2839 if (val == maxval) | |
| 2840 nmax++; | |
| 2841 } | |
| 2842 if (nmax < 3) { /* likely accidental max; try the mode */ | |
| 2843 numaGetMode(nat, &fmodeval, &count); | |
| 2844 if (count > nmax && fmodeval > 0.5 * fmaxval) | |
| 2845 maxval = (l_int32)fmodeval; /* use the mode */ | |
| 2846 } | |
| 2847 | |
| 2848 inrun = FALSE; | |
| 2849 iend = 40; | |
| 2850 maxrunlen = 0, maxstart = 0, maxend = 0; | |
| 2851 for (i = 0; i < 41; i++) { | |
| 2852 numaGetIValue(nat, i, &val); | |
| 2853 if (val == maxval) { | |
| 2854 if (!inrun) { | |
| 2855 istart = i; | |
| 2856 inrun = TRUE; | |
| 2857 } | |
| 2858 continue; | |
| 2859 } | |
| 2860 if (inrun && (val != maxval)) { | |
| 2861 iend = i - 1; | |
| 2862 runlen = iend - istart + 1; | |
| 2863 inrun = FALSE; | |
| 2864 if (runlen > maxrunlen) { | |
| 2865 maxstart = istart; | |
| 2866 maxend = iend; | |
| 2867 maxrunlen = runlen; | |
| 2868 } | |
| 2869 } | |
| 2870 } | |
| 2871 if (inrun) { | |
| 2872 runlen = i - istart; | |
| 2873 if (runlen > maxrunlen) { | |
| 2874 maxstart = istart; | |
| 2875 maxend = i - 1; | |
| 2876 maxrunlen = runlen; | |
| 2877 } | |
| 2878 } | |
| 2879 | |
| 2880 *pbestthresh = estthresh - 80.0f + 2.0f * (l_float32)(maxstart + maxend); | |
| 2881 | |
| 2882 #if DEBUG_CROSSINGS | |
| 2883 lept_stderr("\nCrossings attain a maximum at %d thresholds, between:\n" | |
| 2884 " thresh[%d] = %5.1f and thresh[%d] = %5.1f\n", | |
| 2885 nmax, maxstart, estthresh - 80.0 + 4.0 * maxstart, | |
| 2886 maxend, estthresh - 80.0 + 4.0 * maxend); | |
| 2887 lept_stderr("The best choice: %5.1f\n", *pbestthresh); | |
| 2888 lept_stderr("Number of crossings at the 41 thresholds:"); | |
| 2889 numaWriteStderr(nat); | |
| 2890 #endif /* DEBUG_CROSSINGS */ | |
| 2891 | |
| 2892 numaDestroy(&nat); | |
| 2893 return 0; | |
| 2894 } | |
| 2895 | |
| 2896 | |
| 2897 /*! | |
| 2898 * \brief numaCrossingsByThreshold() | |
| 2899 * | |
| 2900 * \param[in] nax [optional] numa of abscissa values; can be NULL | |
| 2901 * \param[in] nay numa of ordinate values, corresponding to nax | |
| 2902 * \param[in] thresh threshold value for nay | |
| 2903 * \return nad abscissa pts at threshold, or NULL on error | |
| 2904 * | |
| 2905 * <pre> | |
| 2906 * Notes: | |
| 2907 * (1) If nax == NULL, we use startx and delx from nay to compute | |
| 2908 * the crossing values in nad. | |
| 2909 * </pre> | |
| 2910 */ | |
| 2911 NUMA * | |
| 2912 numaCrossingsByThreshold(NUMA *nax, | |
| 2913 NUMA *nay, | |
| 2914 l_float32 thresh) | |
| 2915 { | |
| 2916 l_int32 i, n; | |
| 2917 l_float32 startx, delx; | |
| 2918 l_float32 xval1, xval2, yval1, yval2, delta1, delta2, crossval, fract; | |
| 2919 NUMA *nad; | |
| 2920 | |
| 2921 if (!nay) | |
| 2922 return (NUMA *)ERROR_PTR("nay not defined", __func__, NULL); | |
| 2923 n = numaGetCount(nay); | |
| 2924 | |
| 2925 if (nax && (numaGetCount(nax) != n)) | |
| 2926 return (NUMA *)ERROR_PTR("nax and nay sizes differ", __func__, NULL); | |
| 2927 | |
| 2928 nad = numaCreate(0); | |
| 2929 if (n < 2) return nad; | |
| 2930 numaGetFValue(nay, 0, &yval1); | |
| 2931 numaGetParameters(nay, &startx, &delx); | |
| 2932 if (nax) | |
| 2933 numaGetFValue(nax, 0, &xval1); | |
| 2934 else | |
| 2935 xval1 = startx; | |
| 2936 for (i = 1; i < n; i++) { | |
| 2937 numaGetFValue(nay, i, &yval2); | |
| 2938 if (nax) | |
| 2939 numaGetFValue(nax, i, &xval2); | |
| 2940 else | |
| 2941 xval2 = startx + i * delx; | |
| 2942 delta1 = yval1 - thresh; | |
| 2943 delta2 = yval2 - thresh; | |
| 2944 if (delta1 == 0.0) { | |
| 2945 numaAddNumber(nad, xval1); | |
| 2946 } else if (delta2 == 0.0) { | |
| 2947 numaAddNumber(nad, xval2); | |
| 2948 } else if (delta1 * delta2 < 0.0) { /* crossing */ | |
| 2949 fract = L_ABS(delta1) / L_ABS(yval1 - yval2); | |
| 2950 crossval = xval1 + fract * (xval2 - xval1); | |
| 2951 numaAddNumber(nad, crossval); | |
| 2952 } | |
| 2953 xval1 = xval2; | |
| 2954 yval1 = yval2; | |
| 2955 } | |
| 2956 | |
| 2957 return nad; | |
| 2958 } | |
| 2959 | |
| 2960 | |
| 2961 /*! | |
| 2962 * \brief numaCrossingsByPeaks() | |
| 2963 * | |
| 2964 * \param[in] nax [optional] numa of abscissa values | |
| 2965 * \param[in] nay numa of ordinate values, corresponding to nax | |
| 2966 * \param[in] delta parameter used to identify when a new peak can be found | |
| 2967 * \return nad abscissa pts at threshold, or NULL on error | |
| 2968 * | |
| 2969 * <pre> | |
| 2970 * Notes: | |
| 2971 * (1) If nax == NULL, we use startx and delx from nay to compute | |
| 2972 * the crossing values in nad. | |
| 2973 * </pre> | |
| 2974 */ | |
| 2975 NUMA * | |
| 2976 numaCrossingsByPeaks(NUMA *nax, | |
| 2977 NUMA *nay, | |
| 2978 l_float32 delta) | |
| 2979 { | |
| 2980 l_int32 i, j, n, np, previndex, curindex; | |
| 2981 l_float32 startx, delx; | |
| 2982 l_float32 xval1, xval2, yval1, yval2, delta1, delta2; | |
| 2983 l_float32 prevval, curval, thresh, crossval, fract; | |
| 2984 NUMA *nap, *nad; | |
| 2985 | |
| 2986 if (!nay) | |
| 2987 return (NUMA *)ERROR_PTR("nay not defined", __func__, NULL); | |
| 2988 | |
| 2989 n = numaGetCount(nay); | |
| 2990 if (nax && (numaGetCount(nax) != n)) | |
| 2991 return (NUMA *)ERROR_PTR("nax and nay sizes differ", __func__, NULL); | |
| 2992 | |
| 2993 /* Find the extrema. Also add last point in nay to get | |
| 2994 * the last transition (from the last peak to the end). | |
| 2995 * The number of crossings is 1 more than the number of extrema. */ | |
| 2996 nap = numaFindExtrema(nay, delta, NULL); | |
| 2997 numaAddNumber(nap, n - 1); | |
| 2998 np = numaGetCount(nap); | |
| 2999 L_INFO("Number of crossings: %d\n", __func__, np); | |
| 3000 | |
| 3001 /* Do all computation in index units of nax or the delx of nay */ | |
| 3002 nad = numaCreate(np); /* output crossing locations, in nax units */ | |
| 3003 previndex = 0; /* prime the search with 1st point */ | |
| 3004 numaGetFValue(nay, 0, &prevval); /* prime the search with 1st point */ | |
| 3005 numaGetParameters(nay, &startx, &delx); | |
| 3006 for (i = 0; i < np; i++) { | |
| 3007 numaGetIValue(nap, i, &curindex); | |
| 3008 numaGetFValue(nay, curindex, &curval); | |
| 3009 thresh = (prevval + curval) / 2.0f; | |
| 3010 if (nax) | |
| 3011 numaGetFValue(nax, previndex, &xval1); | |
| 3012 else | |
| 3013 xval1 = startx + previndex * delx; | |
| 3014 numaGetFValue(nay, previndex, &yval1); | |
| 3015 for (j = previndex + 1; j <= curindex; j++) { | |
| 3016 if (nax) | |
| 3017 numaGetFValue(nax, j, &xval2); | |
| 3018 else | |
| 3019 xval2 = startx + j * delx; | |
| 3020 numaGetFValue(nay, j, &yval2); | |
| 3021 delta1 = yval1 - thresh; | |
| 3022 delta2 = yval2 - thresh; | |
| 3023 if (delta1 == 0.0) { | |
| 3024 numaAddNumber(nad, xval1); | |
| 3025 break; | |
| 3026 } else if (delta2 == 0.0) { | |
| 3027 numaAddNumber(nad, xval2); | |
| 3028 break; | |
| 3029 } else if (delta1 * delta2 < 0.0) { /* crossing */ | |
| 3030 fract = L_ABS(delta1) / L_ABS(yval1 - yval2); | |
| 3031 crossval = xval1 + fract * (xval2 - xval1); | |
| 3032 numaAddNumber(nad, crossval); | |
| 3033 break; | |
| 3034 } | |
| 3035 xval1 = xval2; | |
| 3036 yval1 = yval2; | |
| 3037 } | |
| 3038 previndex = curindex; | |
| 3039 prevval = curval; | |
| 3040 } | |
| 3041 | |
| 3042 numaDestroy(&nap); | |
| 3043 return nad; | |
| 3044 } | |
| 3045 | |
| 3046 | |
| 3047 /*! | |
| 3048 * \brief numaEvalBestHaarParameters() | |
| 3049 * | |
| 3050 * \param[in] nas numa of non-negative signal values | |
| 3051 * \param[in] relweight relative weight of (-1 comb) / (+1 comb) | |
| 3052 * contributions to the 'convolution'. In effect, | |
| 3053 * the convolution kernel is a comb consisting of | |
| 3054 * alternating +1 and -weight. | |
| 3055 * \param[in] nwidth number of widths to consider | |
| 3056 * \param[in] nshift number of shifts to consider for each width | |
| 3057 * \param[in] minwidth smallest width to consider | |
| 3058 * \param[in] maxwidth largest width to consider | |
| 3059 * \param[out] pbestwidth width giving largest score | |
| 3060 * \param[out] pbestshift shift giving largest score | |
| 3061 * \param[out] pbestscore [optional] convolution with "Haar"-like comb | |
| 3062 * \return 0 if OK, 1 on error | |
| 3063 * | |
| 3064 * <pre> | |
| 3065 * Notes: | |
| 3066 * (1) This does a linear sweep of widths, evaluating at %nshift | |
| 3067 * shifts for each width, computing the score from a convolution | |
| 3068 * with a long comb, and finding the (width, shift) pair that | |
| 3069 * gives the maximum score. The best width is the "half-wavelength" | |
| 3070 * of the signal. | |
| 3071 * (2) The convolving function is a comb of alternating values | |
| 3072 * +1 and -1 * relweight, separated by the width and phased by | |
| 3073 * the shift. This is similar to a Haar transform, except | |
| 3074 * there the convolution is performed with a square wave. | |
| 3075 * (3) The function is useful for finding the line spacing | |
| 3076 * and strength of line signal from pixel sum projections. | |
| 3077 * (4) The score is normalized to the size of nas divided by | |
| 3078 * the number of half-widths. For image applications, the input is | |
| 3079 * typically an array of pixel projections, so one should | |
| 3080 * normalize by dividing the score by the image width in the | |
| 3081 * pixel projection direction. | |
| 3082 * </pre> | |
| 3083 */ | |
| 3084 l_ok | |
| 3085 numaEvalBestHaarParameters(NUMA *nas, | |
| 3086 l_float32 relweight, | |
| 3087 l_int32 nwidth, | |
| 3088 l_int32 nshift, | |
| 3089 l_float32 minwidth, | |
| 3090 l_float32 maxwidth, | |
| 3091 l_float32 *pbestwidth, | |
| 3092 l_float32 *pbestshift, | |
| 3093 l_float32 *pbestscore) | |
| 3094 { | |
| 3095 l_int32 i, j; | |
| 3096 l_float32 delwidth, delshift, width, shift, score; | |
| 3097 l_float32 bestwidth, bestshift, bestscore; | |
| 3098 | |
| 3099 if (pbestscore) *pbestscore = 0.0; | |
| 3100 if (pbestwidth) *pbestwidth = 0.0; | |
| 3101 if (pbestshift) *pbestshift = 0.0; | |
| 3102 if (!pbestwidth || !pbestshift) | |
| 3103 return ERROR_INT("&bestwidth and &bestshift not defined", __func__, 1); | |
| 3104 if (!nas) | |
| 3105 return ERROR_INT("nas not defined", __func__, 1); | |
| 3106 | |
| 3107 bestscore = bestwidth = bestshift = 0.0; | |
| 3108 delwidth = (maxwidth - minwidth) / (nwidth - 1.0f); | |
| 3109 for (i = 0; i < nwidth; i++) { | |
| 3110 width = minwidth + delwidth * i; | |
| 3111 delshift = width / (l_float32)(nshift); | |
| 3112 for (j = 0; j < nshift; j++) { | |
| 3113 shift = j * delshift; | |
| 3114 numaEvalHaarSum(nas, width, shift, relweight, &score); | |
| 3115 if (score > bestscore) { | |
| 3116 bestscore = score; | |
| 3117 bestwidth = width; | |
| 3118 bestshift = shift; | |
| 3119 #if DEBUG_FREQUENCY | |
| 3120 lept_stderr("width = %7.3f, shift = %7.3f, score = %7.3f\n", | |
| 3121 width, shift, score); | |
| 3122 #endif /* DEBUG_FREQUENCY */ | |
| 3123 } | |
| 3124 } | |
| 3125 } | |
| 3126 | |
| 3127 *pbestwidth = bestwidth; | |
| 3128 *pbestshift = bestshift; | |
| 3129 if (pbestscore) | |
| 3130 *pbestscore = bestscore; | |
| 3131 return 0; | |
| 3132 } | |
| 3133 | |
| 3134 | |
| 3135 /*! | |
| 3136 * \brief numaEvalHaarSum() | |
| 3137 * | |
| 3138 * \param[in] nas numa of non-negative signal values | |
| 3139 * \param[in] width distance between +1 and -1 in convolution comb | |
| 3140 * \param[in] shift phase of the comb: location of first +1 | |
| 3141 * \param[in] relweight relative weight of (-1 comb) / (+1 comb) | |
| 3142 * contributions to the 'convolution'. In effect, | |
| 3143 * the convolution kernel is a comb consisting of | |
| 3144 * alternating +1 and -weight. | |
| 3145 * \param[out] pscore convolution with "Haar"-like comb | |
| 3146 * \return 0 if OK, 1 on error | |
| 3147 * | |
| 3148 * <pre> | |
| 3149 * Notes: | |
| 3150 * (1) This does a convolution with a comb of alternating values | |
| 3151 * +1 and -relweight, separated by the width and phased by the shift. | |
| 3152 * This is similar to a Haar transform, except that for Haar, | |
| 3153 * (1) the convolution kernel is symmetric about 0, so the | |
| 3154 * relweight is 1.0, and | |
| 3155 * (2) the convolution is performed with a square wave. | |
| 3156 * (2) The score is normalized to the size of nas divided by | |
| 3157 * twice the "width". For image applications, the input is | |
| 3158 * typically an array of pixel projections, so one should | |
| 3159 * normalize by dividing the score by the image width in the | |
| 3160 * pixel projection direction. | |
| 3161 * (3) To get a Haar-like result, use relweight = 1.0. For detecting | |
| 3162 * signals where you expect every other sample to be close to | |
| 3163 * zero, as with barcodes or filtered text lines, you can | |
| 3164 * use relweight > 1.0. | |
| 3165 * </pre> | |
| 3166 */ | |
| 3167 l_ok | |
| 3168 numaEvalHaarSum(NUMA *nas, | |
| 3169 l_float32 width, | |
| 3170 l_float32 shift, | |
| 3171 l_float32 relweight, | |
| 3172 l_float32 *pscore) | |
| 3173 { | |
| 3174 l_int32 i, n, nsamp, index; | |
| 3175 l_float32 score, weight, val; | |
| 3176 | |
| 3177 if (!pscore) | |
| 3178 return ERROR_INT("&score not defined", __func__, 1); | |
| 3179 *pscore = 0.0; | |
| 3180 if (!nas) | |
| 3181 return ERROR_INT("nas not defined", __func__, 1); | |
| 3182 if ((n = numaGetCount(nas)) < 2 * width) | |
| 3183 return ERROR_INT("nas size too small", __func__, 1); | |
| 3184 | |
| 3185 score = 0.0; | |
| 3186 nsamp = (l_int32)((n - shift) / width); | |
| 3187 for (i = 0; i < nsamp; i++) { | |
| 3188 index = (l_int32)(shift + i * width); | |
| 3189 weight = (i % 2) ? 1.0f : -1.0f * relweight; | |
| 3190 numaGetFValue(nas, index, &val); | |
| 3191 score += weight * val; | |
| 3192 } | |
| 3193 | |
| 3194 *pscore = 2.0f * width * score / (l_float32)n; | |
| 3195 return 0; | |
| 3196 } | |
| 3197 | |
| 3198 | |
| 3199 /*----------------------------------------------------------------------* | |
| 3200 * Generating numbers in a range under constraints * | |
| 3201 *----------------------------------------------------------------------*/ | |
| 3202 /*! | |
| 3203 * \brief genConstrainedNumaInRange() | |
| 3204 * | |
| 3205 * \param[in] first first number to choose; >= 0 | |
| 3206 * \param[in] last biggest possible number to reach; >= first | |
| 3207 * \param[in] nmax maximum number of numbers to select; > 0 | |
| 3208 * \param[in] use_pairs 1 = select pairs of adjacent numbers; | |
| 3209 * 0 = select individual numbers | |
| 3210 * \return 0 if OK, 1 on error | |
| 3211 * | |
| 3212 * <pre> | |
| 3213 * Notes: | |
| 3214 * (1) Selection is made uniformly in the range. This can be used | |
| 3215 * to select pages distributed as uniformly as possible | |
| 3216 * through a book, where you are constrained to: | |
| 3217 * ~ choose between [first, ... biggest], | |
| 3218 * ~ choose no more than nmax numbers, and | |
| 3219 * and you have the option of requiring pairs of adjacent numbers. | |
| 3220 * </pre> | |
| 3221 */ | |
| 3222 NUMA * | |
| 3223 genConstrainedNumaInRange(l_int32 first, | |
| 3224 l_int32 last, | |
| 3225 l_int32 nmax, | |
| 3226 l_int32 use_pairs) | |
| 3227 { | |
| 3228 l_int32 i, nsets, val; | |
| 3229 l_float32 delta; | |
| 3230 NUMA *na; | |
| 3231 | |
| 3232 first = L_MAX(0, first); | |
| 3233 if (last < first) | |
| 3234 return (NUMA *)ERROR_PTR("last < first!", __func__, NULL); | |
| 3235 if (nmax < 1) | |
| 3236 return (NUMA *)ERROR_PTR("nmax < 1!", __func__, NULL); | |
| 3237 | |
| 3238 nsets = L_MIN(nmax, last - first + 1); | |
| 3239 if (use_pairs == 1) | |
| 3240 nsets = nsets / 2; | |
| 3241 if (nsets == 0) | |
| 3242 return (NUMA *)ERROR_PTR("nsets == 0", __func__, NULL); | |
| 3243 | |
| 3244 /* Select delta so that selection covers the full range if possible */ | |
| 3245 if (nsets == 1) { | |
| 3246 delta = 0.0; | |
| 3247 } else { | |
| 3248 if (use_pairs == 0) | |
| 3249 delta = (l_float32)(last - first) / (nsets - 1); | |
| 3250 else | |
| 3251 delta = (l_float32)(last - first - 1) / (nsets - 1); | |
| 3252 } | |
| 3253 | |
| 3254 na = numaCreate(nsets); | |
| 3255 for (i = 0; i < nsets; i++) { | |
| 3256 val = (l_int32)(first + i * delta + 0.5); | |
| 3257 numaAddNumber(na, val); | |
| 3258 if (use_pairs == 1) | |
| 3259 numaAddNumber(na, val + 1); | |
| 3260 } | |
| 3261 | |
| 3262 return na; | |
| 3263 } |
