Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/tesseract/src/ccstruct/statistc.cpp @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /********************************************************************** | |
| 2 * File: statistc.cpp (Formerly stats.c) | |
| 3 * Description: Simple statistical package for integer values. | |
| 4 * Author: Ray Smith | |
| 5 * | |
| 6 * (C) Copyright 1991, Hewlett-Packard Ltd. | |
| 7 ** Licensed under the Apache License, Version 2.0 (the "License"); | |
| 8 ** you may not use this file except in compliance with the License. | |
| 9 ** You may obtain a copy of the License at | |
| 10 ** http://www.apache.org/licenses/LICENSE-2.0 | |
| 11 ** Unless required by applicable law or agreed to in writing, software | |
| 12 ** distributed under the License is distributed on an "AS IS" BASIS, | |
| 13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 14 ** See the License for the specific language governing permissions and | |
| 15 ** limitations under the License. | |
| 16 * | |
| 17 **********************************************************************/ | |
| 18 | |
| 19 // Include automatically generated configuration file if running autoconf. | |
| 20 #ifdef HAVE_CONFIG_H | |
| 21 # include "config_auto.h" | |
| 22 #endif | |
| 23 | |
| 24 #include "statistc.h" | |
| 25 | |
| 26 #include "errcode.h" | |
| 27 #include "scrollview.h" | |
| 28 #include "tprintf.h" | |
| 29 | |
| 30 #include "helpers.h" | |
| 31 | |
| 32 #include <cmath> | |
| 33 #include <cstdlib> | |
| 34 #include <cstring> | |
| 35 | |
| 36 namespace tesseract { | |
| 37 | |
| 38 /********************************************************************** | |
| 39 * STATS::STATS | |
| 40 * | |
| 41 * Construct a new stats element by allocating and zeroing the memory. | |
| 42 **********************************************************************/ | |
| 43 STATS::STATS(int32_t min_bucket_value, int32_t max_bucket_value) { | |
| 44 if (max_bucket_value < min_bucket_value) { | |
| 45 min_bucket_value = 0; | |
| 46 max_bucket_value = 1; | |
| 47 } | |
| 48 rangemin_ = min_bucket_value; // setup | |
| 49 rangemax_ = max_bucket_value; | |
| 50 buckets_ = new int32_t[1 + rangemax_ - rangemin_]; | |
| 51 clear(); | |
| 52 } | |
| 53 | |
| 54 /********************************************************************** | |
| 55 * STATS::set_range | |
| 56 * | |
| 57 * Alter the range on an existing stats element. | |
| 58 **********************************************************************/ | |
| 59 bool STATS::set_range(int32_t min_bucket_value, int32_t max_bucket_value) { | |
| 60 if (max_bucket_value < min_bucket_value) { | |
| 61 return false; | |
| 62 } | |
| 63 if (rangemax_ - rangemin_ != max_bucket_value - min_bucket_value) { | |
| 64 delete[] buckets_; | |
| 65 buckets_ = new int32_t[1 + max_bucket_value - min_bucket_value]; | |
| 66 } | |
| 67 rangemin_ = min_bucket_value; // setup | |
| 68 rangemax_ = max_bucket_value; | |
| 69 clear(); // zero it | |
| 70 return true; | |
| 71 } | |
| 72 | |
| 73 /********************************************************************** | |
| 74 * STATS::clear | |
| 75 * | |
| 76 * Clear out the STATS class by zeroing all the buckets. | |
| 77 **********************************************************************/ | |
| 78 void STATS::clear() { // clear out buckets | |
| 79 total_count_ = 0; | |
| 80 if (buckets_ != nullptr) { | |
| 81 memset(buckets_, 0, (1 + rangemax_ - rangemin_) * sizeof(buckets_[0])); | |
| 82 } | |
| 83 } | |
| 84 | |
| 85 /********************************************************************** | |
| 86 * STATS::~STATS | |
| 87 * | |
| 88 * Destructor for a stats class. | |
| 89 **********************************************************************/ | |
| 90 STATS::~STATS() { | |
| 91 delete[] buckets_; | |
| 92 } | |
| 93 | |
| 94 /********************************************************************** | |
| 95 * STATS::add | |
| 96 * | |
| 97 * Add a set of samples to (or delete from) a pile. | |
| 98 **********************************************************************/ | |
| 99 void STATS::add(int32_t value, int32_t count) { | |
| 100 if (buckets_ != nullptr) { | |
| 101 value = ClipToRange(value, rangemin_, rangemax_); | |
| 102 buckets_[value - rangemin_] += count; | |
| 103 total_count_ += count; // keep count of total | |
| 104 } | |
| 105 } | |
| 106 | |
| 107 /********************************************************************** | |
| 108 * STATS::mode | |
| 109 * | |
| 110 * Find the mode of a stats class. | |
| 111 **********************************************************************/ | |
| 112 int32_t STATS::mode() const { // get mode of samples | |
| 113 if (buckets_ == nullptr) { | |
| 114 return rangemin_; | |
| 115 } | |
| 116 int32_t max = buckets_[0]; // max cell count | |
| 117 int32_t maxindex = 0; // index of max | |
| 118 for (int index = rangemax_ - rangemin_; index > 0; --index) { | |
| 119 if (buckets_[index] > max) { | |
| 120 max = buckets_[index]; // find biggest | |
| 121 maxindex = index; | |
| 122 } | |
| 123 } | |
| 124 return maxindex + rangemin_; // index of biggest | |
| 125 } | |
| 126 | |
| 127 /********************************************************************** | |
| 128 * STATS::mean | |
| 129 * | |
| 130 * Find the mean of a stats class. | |
| 131 **********************************************************************/ | |
| 132 double STATS::mean() const { // get mean of samples | |
| 133 if (buckets_ == nullptr || total_count_ <= 0) { | |
| 134 return static_cast<double>(rangemin_); | |
| 135 } | |
| 136 int64_t sum = 0; | |
| 137 for (int index = rangemax_ - rangemin_; index >= 0; --index) { | |
| 138 sum += static_cast<int64_t>(index) * buckets_[index]; | |
| 139 } | |
| 140 return static_cast<double>(sum) / total_count_ + rangemin_; | |
| 141 } | |
| 142 | |
| 143 /********************************************************************** | |
| 144 * STATS::sd | |
| 145 * | |
| 146 * Find the standard deviation of a stats class. | |
| 147 **********************************************************************/ | |
| 148 double STATS::sd() const { // standard deviation | |
| 149 if (buckets_ == nullptr || total_count_ <= 0) { | |
| 150 return 0.0; | |
| 151 } | |
| 152 int64_t sum = 0; | |
| 153 double sqsum = 0.0; | |
| 154 for (int index = rangemax_ - rangemin_; index >= 0; --index) { | |
| 155 sum += static_cast<int64_t>(index) * buckets_[index]; | |
| 156 sqsum += static_cast<double>(index) * index * buckets_[index]; | |
| 157 } | |
| 158 double variance = static_cast<double>(sum) / total_count_; | |
| 159 variance = sqsum / total_count_ - variance * variance; | |
| 160 if (variance > 0.0) { | |
| 161 return sqrt(variance); | |
| 162 } | |
| 163 return 0.0; | |
| 164 } | |
| 165 | |
| 166 /********************************************************************** | |
| 167 * STATS::ile | |
| 168 * | |
| 169 * Returns the fractile value such that frac fraction (in [0,1]) of samples | |
| 170 * has a value less than the return value. | |
| 171 **********************************************************************/ | |
| 172 double STATS::ile(double frac) const { | |
| 173 if (buckets_ == nullptr || total_count_ == 0) { | |
| 174 return static_cast<double>(rangemin_); | |
| 175 } | |
| 176 #if 0 | |
| 177 // TODO(rays) The existing code doesn't seem to be doing the right thing | |
| 178 // with target a double but this substitute crashes the code that uses it. | |
| 179 // Investigate and fix properly. | |
| 180 int target = IntCastRounded(frac * total_count_); | |
| 181 target = ClipToRange(target, 1, total_count_); | |
| 182 #else | |
| 183 double target = frac * total_count_; | |
| 184 target = ClipToRange(target, 1.0, static_cast<double>(total_count_)); | |
| 185 #endif | |
| 186 int sum = 0; | |
| 187 int index = 0; | |
| 188 for (index = 0; index <= rangemax_ - rangemin_ && sum < target; sum += buckets_[index++]) { | |
| 189 ; | |
| 190 } | |
| 191 if (index > 0) { | |
| 192 ASSERT_HOST(buckets_[index - 1] > 0); | |
| 193 return rangemin_ + index - static_cast<double>(sum - target) / buckets_[index - 1]; | |
| 194 } else { | |
| 195 return static_cast<double>(rangemin_); | |
| 196 } | |
| 197 } | |
| 198 | |
| 199 /********************************************************************** | |
| 200 * STATS::min_bucket | |
| 201 * | |
| 202 * Find REAL minimum bucket - ile(0.0) isn't necessarily correct | |
| 203 **********************************************************************/ | |
| 204 int32_t STATS::min_bucket() const { // Find min | |
| 205 if (buckets_ == nullptr || total_count_ == 0) { | |
| 206 return rangemin_; | |
| 207 } | |
| 208 int32_t min = 0; | |
| 209 for (min = 0; (min <= rangemax_ - rangemin_) && (buckets_[min] == 0); min++) { | |
| 210 ; | |
| 211 } | |
| 212 return rangemin_ + min; | |
| 213 } | |
| 214 | |
| 215 /********************************************************************** | |
| 216 * STATS::max_bucket | |
| 217 * | |
| 218 * Find REAL maximum bucket - ile(1.0) isn't necessarily correct | |
| 219 **********************************************************************/ | |
| 220 | |
| 221 int32_t STATS::max_bucket() const { // Find max | |
| 222 if (buckets_ == nullptr || total_count_ == 0) { | |
| 223 return rangemin_; | |
| 224 } | |
| 225 int32_t max; | |
| 226 for (max = rangemax_ - rangemin_; max > 0 && buckets_[max] == 0; max--) { | |
| 227 ; | |
| 228 } | |
| 229 return rangemin_ + max; | |
| 230 } | |
| 231 | |
| 232 /********************************************************************** | |
| 233 * STATS::median | |
| 234 * | |
| 235 * Finds a more useful estimate of median than ile(0.5). | |
| 236 * | |
| 237 * Overcomes a problem with ile() - if the samples are, for example, | |
| 238 * 6,6,13,14 ile(0.5) return 7.0 - when a more useful value would be midway | |
| 239 * between 6 and 13 = 9.5 | |
| 240 **********************************************************************/ | |
| 241 double STATS::median() const { // get median | |
| 242 if (buckets_ == nullptr) { | |
| 243 return static_cast<double>(rangemin_); | |
| 244 } | |
| 245 double median = ile(0.5); | |
| 246 int median_pile = static_cast<int>(floor(median)); | |
| 247 if ((total_count_ > 1) && (pile_count(median_pile) == 0)) { | |
| 248 int32_t min_pile; | |
| 249 int32_t max_pile; | |
| 250 /* Find preceding non zero pile */ | |
| 251 for (min_pile = median_pile; pile_count(min_pile) == 0; min_pile--) { | |
| 252 ; | |
| 253 } | |
| 254 /* Find following non zero pile */ | |
| 255 for (max_pile = median_pile; pile_count(max_pile) == 0; max_pile++) { | |
| 256 ; | |
| 257 } | |
| 258 median = (min_pile + max_pile) / 2.0; | |
| 259 } | |
| 260 return median; | |
| 261 } | |
| 262 | |
| 263 /********************************************************************** | |
| 264 * STATS::local_min | |
| 265 * | |
| 266 * Return true if this point is a local min. | |
| 267 **********************************************************************/ | |
| 268 bool STATS::local_min(int32_t x) const { | |
| 269 if (buckets_ == nullptr) { | |
| 270 return false; | |
| 271 } | |
| 272 x = ClipToRange(x, rangemin_, rangemax_) - rangemin_; | |
| 273 if (buckets_[x] == 0) { | |
| 274 return true; | |
| 275 } | |
| 276 int32_t index; // table index | |
| 277 for (index = x - 1; index >= 0 && buckets_[index] == buckets_[x]; --index) { | |
| 278 ; | |
| 279 } | |
| 280 if (index >= 0 && buckets_[index] < buckets_[x]) { | |
| 281 return false; | |
| 282 } | |
| 283 for (index = x + 1; index <= rangemax_ - rangemin_ && buckets_[index] == buckets_[x]; ++index) { | |
| 284 ; | |
| 285 } | |
| 286 if (index <= rangemax_ - rangemin_ && buckets_[index] < buckets_[x]) { | |
| 287 return false; | |
| 288 } else { | |
| 289 return true; | |
| 290 } | |
| 291 } | |
| 292 | |
| 293 /********************************************************************** | |
| 294 * STATS::smooth | |
| 295 * | |
| 296 * Apply a triangular smoothing filter to the stats. | |
| 297 * This makes the modes a bit more useful. | |
| 298 * The factor gives the height of the triangle, i.e. the weight of the | |
| 299 * centre. | |
| 300 **********************************************************************/ | |
| 301 void STATS::smooth(int32_t factor) { | |
| 302 if (buckets_ == nullptr || factor < 2) { | |
| 303 return; | |
| 304 } | |
| 305 STATS result(rangemin_, rangemax_); | |
| 306 int entrycount = 1 + rangemax_ - rangemin_; | |
| 307 for (int entry = 0; entry < entrycount; entry++) { | |
| 308 // centre weight | |
| 309 int count = buckets_[entry] * factor; | |
| 310 for (int offset = 1; offset < factor; offset++) { | |
| 311 if (entry - offset >= 0) { | |
| 312 count += buckets_[entry - offset] * (factor - offset); | |
| 313 } | |
| 314 if (entry + offset < entrycount) { | |
| 315 count += buckets_[entry + offset] * (factor - offset); | |
| 316 } | |
| 317 } | |
| 318 result.add(entry + rangemin_, count); | |
| 319 } | |
| 320 total_count_ = result.total_count_; | |
| 321 memcpy(buckets_, result.buckets_, entrycount * sizeof(buckets_[0])); | |
| 322 } | |
| 323 | |
| 324 /********************************************************************** | |
| 325 * STATS::cluster | |
| 326 * | |
| 327 * Cluster the samples into max_cluster clusters. | |
| 328 * Each call runs one iteration. The array of clusters must be | |
| 329 * max_clusters+1 in size as cluster 0 is used to indicate which samples | |
| 330 * have been used. | |
| 331 * The return value is the current number of clusters. | |
| 332 **********************************************************************/ | |
| 333 | |
| 334 int32_t STATS::cluster(float lower, // thresholds | |
| 335 float upper, | |
| 336 float multiple, // distance threshold | |
| 337 int32_t max_clusters, // max no to make | |
| 338 STATS *clusters) { // array of clusters | |
| 339 bool new_cluster; // added one | |
| 340 float *centres; // cluster centres | |
| 341 int32_t entry; // bucket index | |
| 342 int32_t cluster; // cluster index | |
| 343 int32_t best_cluster; // one to assign to | |
| 344 int32_t new_centre = 0; // residual mode | |
| 345 int32_t new_mode; // pile count of new_centre | |
| 346 int32_t count; // pile to place | |
| 347 float dist; // from cluster | |
| 348 float min_dist; // from best_cluster | |
| 349 int32_t cluster_count; // no of clusters | |
| 350 | |
| 351 if (buckets_ == nullptr || max_clusters < 1) { | |
| 352 return 0; | |
| 353 } | |
| 354 centres = new float[max_clusters + 1]; | |
| 355 for (cluster_count = 1; | |
| 356 cluster_count <= max_clusters && clusters[cluster_count].buckets_ != nullptr && | |
| 357 clusters[cluster_count].total_count_ > 0; | |
| 358 cluster_count++) { | |
| 359 centres[cluster_count] = static_cast<float>(clusters[cluster_count].ile(0.5)); | |
| 360 new_centre = clusters[cluster_count].mode(); | |
| 361 for (entry = new_centre - 1; centres[cluster_count] - entry < lower && entry >= rangemin_ && | |
| 362 pile_count(entry) <= pile_count(entry + 1); | |
| 363 entry--) { | |
| 364 count = pile_count(entry) - clusters[0].pile_count(entry); | |
| 365 if (count > 0) { | |
| 366 clusters[cluster_count].add(entry, count); | |
| 367 clusters[0].add(entry, count); | |
| 368 } | |
| 369 } | |
| 370 for (entry = new_centre + 1; entry - centres[cluster_count] < lower && entry <= rangemax_ && | |
| 371 pile_count(entry) <= pile_count(entry - 1); | |
| 372 entry++) { | |
| 373 count = pile_count(entry) - clusters[0].pile_count(entry); | |
| 374 if (count > 0) { | |
| 375 clusters[cluster_count].add(entry, count); | |
| 376 clusters[0].add(entry, count); | |
| 377 } | |
| 378 } | |
| 379 } | |
| 380 cluster_count--; | |
| 381 | |
| 382 if (cluster_count == 0) { | |
| 383 clusters[0].set_range(rangemin_, rangemax_); | |
| 384 } | |
| 385 do { | |
| 386 new_cluster = false; | |
| 387 new_mode = 0; | |
| 388 for (entry = 0; entry <= rangemax_ - rangemin_; entry++) { | |
| 389 count = buckets_[entry] - clusters[0].buckets_[entry]; | |
| 390 // remaining pile | |
| 391 if (count > 0) { // any to handle | |
| 392 min_dist = static_cast<float>(INT32_MAX); | |
| 393 best_cluster = 0; | |
| 394 for (cluster = 1; cluster <= cluster_count; cluster++) { | |
| 395 dist = entry + rangemin_ - centres[cluster]; | |
| 396 // find distance | |
| 397 if (dist < 0) { | |
| 398 dist = -dist; | |
| 399 } | |
| 400 if (dist < min_dist) { | |
| 401 min_dist = dist; // find least | |
| 402 best_cluster = cluster; | |
| 403 } | |
| 404 } | |
| 405 if (min_dist > upper // far enough for new | |
| 406 && (best_cluster == 0 || entry + rangemin_ > centres[best_cluster] * multiple || | |
| 407 entry + rangemin_ < centres[best_cluster] / multiple)) { | |
| 408 if (count > new_mode) { | |
| 409 new_mode = count; | |
| 410 new_centre = entry + rangemin_; | |
| 411 } | |
| 412 } | |
| 413 } | |
| 414 } | |
| 415 // need new and room | |
| 416 if (new_mode > 0 && cluster_count < max_clusters) { | |
| 417 cluster_count++; | |
| 418 new_cluster = true; | |
| 419 if (!clusters[cluster_count].set_range(rangemin_, rangemax_)) { | |
| 420 delete[] centres; | |
| 421 return 0; | |
| 422 } | |
| 423 centres[cluster_count] = static_cast<float>(new_centre); | |
| 424 clusters[cluster_count].add(new_centre, new_mode); | |
| 425 clusters[0].add(new_centre, new_mode); | |
| 426 for (entry = new_centre - 1; centres[cluster_count] - entry < lower && entry >= rangemin_ && | |
| 427 pile_count(entry) <= pile_count(entry + 1); | |
| 428 entry--) { | |
| 429 count = pile_count(entry) - clusters[0].pile_count(entry); | |
| 430 if (count > 0) { | |
| 431 clusters[cluster_count].add(entry, count); | |
| 432 clusters[0].add(entry, count); | |
| 433 } | |
| 434 } | |
| 435 for (entry = new_centre + 1; entry - centres[cluster_count] < lower && entry <= rangemax_ && | |
| 436 pile_count(entry) <= pile_count(entry - 1); | |
| 437 entry++) { | |
| 438 count = pile_count(entry) - clusters[0].pile_count(entry); | |
| 439 if (count > 0) { | |
| 440 clusters[cluster_count].add(entry, count); | |
| 441 clusters[0].add(entry, count); | |
| 442 } | |
| 443 } | |
| 444 centres[cluster_count] = static_cast<float>(clusters[cluster_count].ile(0.5)); | |
| 445 } | |
| 446 } while (new_cluster && cluster_count < max_clusters); | |
| 447 delete[] centres; | |
| 448 return cluster_count; | |
| 449 } | |
| 450 | |
| 451 // Helper tests that the current index is still part of the peak and gathers | |
| 452 // the data into the peak, returning false when the peak is ended. | |
| 453 // src_buckets[index] - used_buckets[index] is the unused part of the histogram. | |
| 454 // prev_count is the histogram count of the previous index on entry and is | |
| 455 // updated to the current index on return. | |
| 456 // total_count and total_value are accumulating the mean of the peak. | |
| 457 static bool GatherPeak(int index, const int *src_buckets, int *used_buckets, int *prev_count, | |
| 458 int *total_count, double *total_value) { | |
| 459 int pile_count = src_buckets[index] - used_buckets[index]; | |
| 460 if (pile_count <= *prev_count && pile_count > 0) { | |
| 461 // Accumulate count and index.count product. | |
| 462 *total_count += pile_count; | |
| 463 *total_value += index * pile_count; | |
| 464 // Mark this index as used | |
| 465 used_buckets[index] = src_buckets[index]; | |
| 466 *prev_count = pile_count; | |
| 467 return true; | |
| 468 } else { | |
| 469 return false; | |
| 470 } | |
| 471 } | |
| 472 | |
| 473 // Finds (at most) the top max_modes modes, well actually the whole peak around | |
| 474 // each mode, returning them in the given modes vector as a <mean of peak, | |
| 475 // total count of peak> pair in order of decreasing total count. | |
| 476 // Since the mean is the key and the count the data in the pair, a single call | |
| 477 // to sort on the output will re-sort by increasing mean of peak if that is | |
| 478 // more useful than decreasing total count. | |
| 479 // Returns the actual number of modes found. | |
| 480 int STATS::top_n_modes(int max_modes, std::vector<KDPairInc<float, int>> &modes) const { | |
| 481 if (max_modes <= 0) { | |
| 482 return 0; | |
| 483 } | |
| 484 int src_count = 1 + rangemax_ - rangemin_; | |
| 485 // Used copies the counts in buckets_ as they get used. | |
| 486 STATS used(rangemin_, rangemax_); | |
| 487 modes.clear(); | |
| 488 // Total count of the smallest peak found so far. | |
| 489 int least_count = 1; | |
| 490 // Mode that is used as a seed for each peak | |
| 491 int max_count = 0; | |
| 492 do { | |
| 493 // Find an unused mode. | |
| 494 max_count = 0; | |
| 495 int max_index = 0; | |
| 496 for (int src_index = 0; src_index < src_count; src_index++) { | |
| 497 int pile_count = buckets_[src_index] - used.buckets_[src_index]; | |
| 498 if (pile_count > max_count) { | |
| 499 max_count = pile_count; | |
| 500 max_index = src_index; | |
| 501 } | |
| 502 } | |
| 503 if (max_count > 0) { | |
| 504 // Copy the bucket count to used so it doesn't get found again. | |
| 505 used.buckets_[max_index] = max_count; | |
| 506 // Get the entire peak. | |
| 507 double total_value = max_index * max_count; | |
| 508 int total_count = max_count; | |
| 509 int prev_pile = max_count; | |
| 510 for (int offset = 1; max_index + offset < src_count; ++offset) { | |
| 511 if (!GatherPeak(max_index + offset, buckets_, used.buckets_, &prev_pile, &total_count, | |
| 512 &total_value)) { | |
| 513 break; | |
| 514 } | |
| 515 } | |
| 516 prev_pile = buckets_[max_index]; | |
| 517 for (int offset = 1; max_index - offset >= 0; ++offset) { | |
| 518 if (!GatherPeak(max_index - offset, buckets_, used.buckets_, &prev_pile, &total_count, | |
| 519 &total_value)) { | |
| 520 break; | |
| 521 } | |
| 522 } | |
| 523 if (total_count > least_count || modes.size() < static_cast<size_t>(max_modes)) { | |
| 524 // We definitely want this mode, so if we have enough discard the least. | |
| 525 if (modes.size() == static_cast<size_t>(max_modes)) { | |
| 526 modes.resize(max_modes - 1); | |
| 527 } | |
| 528 size_t target_index = 0; | |
| 529 // Linear search for the target insertion point. | |
| 530 while (target_index < modes.size() && modes[target_index].data() >= total_count) { | |
| 531 ++target_index; | |
| 532 } | |
| 533 auto peak_mean = static_cast<float>(total_value / total_count + rangemin_); | |
| 534 modes.insert(modes.begin() + target_index, KDPairInc<float, int>(peak_mean, total_count)); | |
| 535 least_count = modes.back().data(); | |
| 536 } | |
| 537 } | |
| 538 } while (max_count > 0); | |
| 539 return modes.size(); | |
| 540 } | |
| 541 | |
| 542 /********************************************************************** | |
| 543 * STATS::print | |
| 544 * | |
| 545 * Prints a summary and table of the histogram. | |
| 546 **********************************************************************/ | |
| 547 void STATS::print() const { | |
| 548 if (buckets_ == nullptr) { | |
| 549 return; | |
| 550 } | |
| 551 int32_t min = min_bucket() - rangemin_; | |
| 552 int32_t max = max_bucket() - rangemin_; | |
| 553 | |
| 554 int num_printed = 0; | |
| 555 for (int index = min; index <= max; index++) { | |
| 556 if (buckets_[index] != 0) { | |
| 557 tprintf("%4d:%-3d ", rangemin_ + index, buckets_[index]); | |
| 558 if (++num_printed % 8 == 0) { | |
| 559 tprintf("\n"); | |
| 560 } | |
| 561 } | |
| 562 } | |
| 563 tprintf("\n"); | |
| 564 print_summary(); | |
| 565 } | |
| 566 | |
| 567 /********************************************************************** | |
| 568 * STATS::print_summary | |
| 569 * | |
| 570 * Print a summary of the stats. | |
| 571 **********************************************************************/ | |
| 572 void STATS::print_summary() const { | |
| 573 if (buckets_ == nullptr) { | |
| 574 return; | |
| 575 } | |
| 576 int32_t min = min_bucket(); | |
| 577 int32_t max = max_bucket(); | |
| 578 tprintf("Total count=%d\n", total_count_); | |
| 579 tprintf("Min=%.2f Really=%d\n", ile(0.0), min); | |
| 580 tprintf("Lower quartile=%.2f\n", ile(0.25)); | |
| 581 tprintf("Median=%.2f, ile(0.5)=%.2f\n", median(), ile(0.5)); | |
| 582 tprintf("Upper quartile=%.2f\n", ile(0.75)); | |
| 583 tprintf("Max=%.2f Really=%d\n", ile(1.0), max); | |
| 584 tprintf("Range=%d\n", max + 1 - min); | |
| 585 tprintf("Mean= %.2f\n", mean()); | |
| 586 tprintf("SD= %.2f\n", sd()); | |
| 587 } | |
| 588 | |
| 589 /********************************************************************** | |
| 590 * STATS::plot | |
| 591 * | |
| 592 * Draw a histogram of the stats table. | |
| 593 **********************************************************************/ | |
| 594 | |
| 595 #ifndef GRAPHICS_DISABLED | |
| 596 void STATS::plot(ScrollView *window, // to draw in | |
| 597 float xorigin, // bottom left | |
| 598 float yorigin, | |
| 599 float xscale, // one x unit | |
| 600 float yscale, // one y unit | |
| 601 ScrollView::Color colour) const { // colour to draw in | |
| 602 if (buckets_ == nullptr) { | |
| 603 return; | |
| 604 } | |
| 605 window->Pen(colour); | |
| 606 | |
| 607 for (int index = 0; index <= rangemax_ - rangemin_; index++) { | |
| 608 window->Rectangle(xorigin + xscale * index, yorigin, xorigin + xscale * (index + 1), | |
| 609 yorigin + yscale * buckets_[index]); | |
| 610 } | |
| 611 } | |
| 612 #endif | |
| 613 | |
| 614 /********************************************************************** | |
| 615 * STATS::plotline | |
| 616 * | |
| 617 * Draw a histogram of the stats table. (Line only) | |
| 618 **********************************************************************/ | |
| 619 | |
| 620 #ifndef GRAPHICS_DISABLED | |
| 621 void STATS::plotline(ScrollView *window, // to draw in | |
| 622 float xorigin, // bottom left | |
| 623 float yorigin, | |
| 624 float xscale, // one x unit | |
| 625 float yscale, // one y unit | |
| 626 ScrollView::Color colour) const { // colour to draw in | |
| 627 if (buckets_ == nullptr) { | |
| 628 return; | |
| 629 } | |
| 630 window->Pen(colour); | |
| 631 window->SetCursor(xorigin, yorigin + yscale * buckets_[0]); | |
| 632 for (int index = 0; index <= rangemax_ - rangemin_; index++) { | |
| 633 window->DrawTo(xorigin + xscale * index, yorigin + yscale * buckets_[index]); | |
| 634 } | |
| 635 } | |
| 636 #endif | |
| 637 | |
| 638 } // namespace tesseract |
