Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/tesseract/src/ccstruct/statistc.h @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /********************************************************************** | |
| 2 * File: statistc.h (Formerly stats.h) | |
| 3 * Description: Class description for STATS class. | |
| 4 * Author: Ray Smith | |
| 5 * | |
| 6 * (C) Copyright 1991, Hewlett-Packard Ltd. | |
| 7 ** Licensed under the Apache License, Version 2.0 (the "License"); | |
| 8 ** you may not use this file except in compliance with the License. | |
| 9 ** You may obtain a copy of the License at | |
| 10 ** http://www.apache.org/licenses/LICENSE-2.0 | |
| 11 ** Unless required by applicable law or agreed to in writing, software | |
| 12 ** distributed under the License is distributed on an "AS IS" BASIS, | |
| 13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 14 ** See the License for the specific language governing permissions and | |
| 15 ** limitations under the License. | |
| 16 * | |
| 17 **********************************************************************/ | |
| 18 | |
| 19 #ifndef TESSERACT_CCSTRUCT_STATISTC_H_ | |
| 20 #define TESSERACT_CCSTRUCT_STATISTC_H_ | |
| 21 | |
| 22 #include <cstdio> | |
| 23 #include "kdpair.h" | |
| 24 #include "scrollview.h" | |
| 25 | |
| 26 namespace tesseract { | |
| 27 | |
| 28 // Simple histogram-based statistics for integer values in a known | |
| 29 // range, such that the range is small compared to the number of samples. | |
| 30 class TESS_API STATS { | |
| 31 public: | |
| 32 // The histogram buckets are in the range | |
| 33 // [min_bucket_value, max_bucket_value]. | |
| 34 // Any data under min_bucket value is silently mapped to min_bucket_value, | |
| 35 // and likewise, any data over max_bucket_value is silently mapped to | |
| 36 // max_bucket_value. | |
| 37 // In the internal array, min_bucket_value maps to 0 and | |
| 38 // 1 + max_bucket_value - min_bucket_value to the array size. | |
| 39 STATS(int32_t min_bucket_value, int32_t max_bucket_value); | |
| 40 STATS() = default; // empty for arrays | |
| 41 | |
| 42 ~STATS(); | |
| 43 | |
| 44 // (Re)Sets the range and clears the counts. | |
| 45 // See the constructor for info on max and min values. | |
| 46 bool set_range(int32_t min_bucket_value, int32_t max_bucket_value); | |
| 47 | |
| 48 void clear(); // empty buckets | |
| 49 | |
| 50 void add(int32_t value, int32_t count); | |
| 51 | |
| 52 // "Accessors" return various statistics on the data. | |
| 53 int32_t mode() const; // get mode of samples | |
| 54 double mean() const; // get mean of samples | |
| 55 double sd() const; // standard deviation | |
| 56 // Returns the fractile value such that frac fraction (in [0,1]) of samples | |
| 57 // has a value less than the return value. | |
| 58 double ile(double frac) const; | |
| 59 // Returns the minimum used entry in the histogram (ie the minimum of the | |
| 60 // data, NOT the minimum of the supplied range, nor is it an index.) | |
| 61 // Would normally be called min(), but that is a reserved word in VC++. | |
| 62 int32_t min_bucket() const; // Find min | |
| 63 // Returns the maximum used entry in the histogram (ie the maximum of the | |
| 64 // data, NOT the maximum of the supplied range, nor is it an index.) | |
| 65 int32_t max_bucket() const; // Find max | |
| 66 // Finds a more useful estimate of median than ile(0.5). | |
| 67 // Overcomes a problem with ile() - if the samples are, for example, | |
| 68 // 6,6,13,14 ile(0.5) return 7.0 - when a more useful value would be midway | |
| 69 // between 6 and 13 = 9.5 | |
| 70 double median() const; // get median of samples | |
| 71 // Returns the count of the given value. | |
| 72 int32_t pile_count(int32_t value) const { | |
| 73 if (buckets_ == nullptr) { | |
| 74 return 0; | |
| 75 } | |
| 76 if (value <= rangemin_) { | |
| 77 return buckets_[0]; | |
| 78 } | |
| 79 if (value >= rangemax_) { | |
| 80 return buckets_[rangemax_ - rangemin_]; | |
| 81 } | |
| 82 return buckets_[value - rangemin_]; | |
| 83 } | |
| 84 // Returns the total count of all buckets. | |
| 85 int32_t get_total() const { | |
| 86 return total_count_; // total of all piles | |
| 87 } | |
| 88 // Returns true if x is a local min. | |
| 89 bool local_min(int32_t x) const; | |
| 90 | |
| 91 // Apply a triangular smoothing filter to the stats. | |
| 92 // This makes the modes a bit more useful. | |
| 93 // The factor gives the height of the triangle, i.e. the weight of the | |
| 94 // centre. | |
| 95 void smooth(int32_t factor); | |
| 96 | |
| 97 // Cluster the samples into max_cluster clusters. | |
| 98 // Each call runs one iteration. The array of clusters must be | |
| 99 // max_clusters+1 in size as cluster 0 is used to indicate which samples | |
| 100 // have been used. | |
| 101 // The return value is the current number of clusters. | |
| 102 int32_t cluster(float lower, // thresholds | |
| 103 float upper, | |
| 104 float multiple, // distance threshold | |
| 105 int32_t max_clusters, // max no to make | |
| 106 STATS *clusters); // array of clusters | |
| 107 | |
| 108 // Finds (at most) the top max_modes modes, well actually the whole peak | |
| 109 // around each mode, returning them in the given modes vector as a <mean of | |
| 110 // peak, total count of peak> pair in order of decreasing total count. Since | |
| 111 // the mean is the key and the count the data in the pair, a single call to | |
| 112 // sort on the output will re-sort by increasing mean of peak if that is more | |
| 113 // useful than decreasing total count. Returns the actual number of modes | |
| 114 // found. | |
| 115 int top_n_modes(int max_modes, std::vector<KDPairInc<float, int>> &modes) const; | |
| 116 | |
| 117 // Prints a summary and table of the histogram. | |
| 118 void print() const; | |
| 119 // Prints summary stats only of the histogram. | |
| 120 void print_summary() const; | |
| 121 | |
| 122 #ifndef GRAPHICS_DISABLED | |
| 123 // Draws the histogram as a series of rectangles. | |
| 124 void plot(ScrollView *window, // window to draw in | |
| 125 float xorigin, // origin of histo | |
| 126 float yorigin, // gram | |
| 127 float xscale, // size of one unit | |
| 128 float yscale, // size of one uint | |
| 129 ScrollView::Color colour) const; // colour to draw in | |
| 130 | |
| 131 // Draws a line graph of the histogram. | |
| 132 void plotline(ScrollView *window, // window to draw in | |
| 133 float xorigin, // origin of histo | |
| 134 float yorigin, // gram | |
| 135 float xscale, // size of one unit | |
| 136 float yscale, // size of one uint | |
| 137 ScrollView::Color colour) const; // colour to draw in | |
| 138 #endif // !GRAPHICS_DISABLED | |
| 139 | |
| 140 private: | |
| 141 int32_t rangemin_ = 0; // min of range | |
| 142 int32_t rangemax_ = 0; // max of range | |
| 143 int32_t total_count_ = 0; // no of samples | |
| 144 int32_t *buckets_ = nullptr; // array of cells | |
| 145 }; | |
| 146 | |
| 147 } // namespace tesseract | |
| 148 | |
| 149 #endif // TESSERACT_CCSTRUCT_STATISTC_H_ |
