Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/tesseract/src/classify/trainingsample.h @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 // Copyright 2010 Google Inc. All Rights Reserved. | |
| 2 // Author: rays@google.com (Ray Smith) | |
| 3 // | |
| 4 // Licensed under the Apache License, Version 2.0 (the "License"); | |
| 5 // you may not use this file except in compliance with the License. | |
| 6 // You may obtain a copy of the License at | |
| 7 // http://www.apache.org/licenses/LICENSE-2.0 | |
| 8 // Unless required by applicable law or agreed to in writing, software | |
| 9 // distributed under the License is distributed on an "AS IS" BASIS, | |
| 10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 11 // See the License for the specific language governing permissions and | |
| 12 // limitations under the License. | |
| 13 // | |
| 14 /////////////////////////////////////////////////////////////////////// | |
| 15 | |
| 16 #ifndef TESSERACT_TRAINING_TRAININGSAMPLE_H_ | |
| 17 #define TESSERACT_TRAINING_TRAININGSAMPLE_H_ | |
| 18 | |
| 19 #include "elst.h" | |
| 20 #include "featdefs.h" | |
| 21 #include "intfx.h" | |
| 22 #include "intmatcher.h" | |
| 23 #include "matrix.h" | |
| 24 #include "mf.h" | |
| 25 #include "mfdefs.h" | |
| 26 #include "picofeat.h" | |
| 27 #include "shapetable.h" | |
| 28 #include "unicharset.h" | |
| 29 | |
| 30 struct Pix; | |
| 31 | |
| 32 namespace tesseract { | |
| 33 | |
| 34 class IntFeatureMap; | |
| 35 class IntFeatureSpace; | |
| 36 class ShapeTable; | |
| 37 | |
| 38 // Number of elements of cn_feature_. | |
| 39 static const int kNumCNParams = 4; | |
| 40 // Number of ways to shift the features when randomizing. | |
| 41 static const int kSampleYShiftSize = 5; | |
| 42 // Number of ways to scale the features when randomizing. | |
| 43 static const int kSampleScaleSize = 3; | |
| 44 // Total number of different ways to manipulate the features when randomizing. | |
| 45 // The first and last combinations are removed to avoid an excessive | |
| 46 // top movement (first) and an identity transformation (last). | |
| 47 // WARNING: To avoid patterned duplication of samples, be sure to keep | |
| 48 // kSampleRandomSize prime! | |
| 49 // Eg with current values (kSampleYShiftSize = 5 and TkSampleScaleSize = 3) | |
| 50 // kSampleRandomSize is 13, which is prime. | |
| 51 static const int kSampleRandomSize = kSampleYShiftSize * kSampleScaleSize - 2; | |
| 52 // ASSERT_IS_PRIME(kSampleRandomSize) !! | |
| 53 | |
| 54 class TESS_API TrainingSample : public ELIST_LINK { | |
| 55 public: | |
| 56 TrainingSample() | |
| 57 : class_id_(INVALID_UNICHAR_ID) | |
| 58 , font_id_(0) | |
| 59 , page_num_(0) | |
| 60 , num_features_(0) | |
| 61 , num_micro_features_(0) | |
| 62 , outline_length_(0) | |
| 63 , features_(nullptr) | |
| 64 , micro_features_(nullptr) | |
| 65 , weight_(1.0) | |
| 66 , max_dist_(0.0) | |
| 67 , sample_index_(0) | |
| 68 , features_are_indexed_(false) | |
| 69 , features_are_mapped_(false) | |
| 70 , is_error_(false) {} | |
| 71 ~TrainingSample(); | |
| 72 | |
| 73 // Saves the given features into a TrainingSample. The features are copied, | |
| 74 // so may be deleted afterwards. Delete the return value after use. | |
| 75 static TrainingSample *CopyFromFeatures(const INT_FX_RESULT_STRUCT &fx_info, | |
| 76 const TBOX &bounding_box, | |
| 77 const INT_FEATURE_STRUCT *features, int num_features); | |
| 78 // Returns the cn_feature as a FEATURE_STRUCT* needed by cntraining. | |
| 79 FEATURE_STRUCT *GetCNFeature() const; | |
| 80 // Constructs and returns a copy "randomized" by the method given by | |
| 81 // the randomizer index. If index is out of [0, kSampleRandomSize) then | |
| 82 // an exact copy is returned. | |
| 83 TrainingSample *RandomizedCopy(int index) const; | |
| 84 // Constructs and returns an exact copy. | |
| 85 TrainingSample *Copy() const; | |
| 86 | |
| 87 // WARNING! Serialize/DeSerialize do not save/restore the "cache" data | |
| 88 // members, which is mostly the mapped features, and the weight. | |
| 89 // It is assumed these can all be reconstructed from what is saved. | |
| 90 // Writes to the given file. Returns false in case of error. | |
| 91 bool Serialize(FILE *fp) const; | |
| 92 // Creates from the given file. Returns nullptr in case of error. | |
| 93 // If swap is true, assumes a big/little-endian swap is needed. | |
| 94 static TrainingSample *DeSerializeCreate(bool swap, FILE *fp); | |
| 95 // Reads from the given file. Returns false in case of error. | |
| 96 // If swap is true, assumes a big/little-endian swap is needed. | |
| 97 bool DeSerialize(bool swap, FILE *fp); | |
| 98 | |
| 99 // Extracts the needed information from the CHAR_DESC_STRUCT. | |
| 100 void ExtractCharDesc(int feature_type, int micro_type, int cn_type, int geo_type, | |
| 101 CHAR_DESC_STRUCT *char_desc); | |
| 102 | |
| 103 // Sets the mapped_features_ from the features_ using the provided | |
| 104 // feature_space to the indexed versions of the features. | |
| 105 void IndexFeatures(const IntFeatureSpace &feature_space); | |
| 106 | |
| 107 // Returns a pix representing the sample. (Int features only.) | |
| 108 Image RenderToPix(const UNICHARSET *unicharset) const; | |
| 109 // Displays the features in the given window with the given color. | |
| 110 void DisplayFeatures(ScrollView::Color color, ScrollView *window) const; | |
| 111 | |
| 112 // Returns a pix of the original sample image. The pix is padded all round | |
| 113 // by padding wherever possible. | |
| 114 // The returned Pix must be pixDestroyed after use. | |
| 115 // If the input page_pix is nullptr, nullptr is returned. | |
| 116 Image GetSamplePix(int padding, Image page_pix) const; | |
| 117 | |
| 118 // Accessors. | |
| 119 UNICHAR_ID class_id() const { | |
| 120 return class_id_; | |
| 121 } | |
| 122 void set_class_id(int id) { | |
| 123 class_id_ = id; | |
| 124 } | |
| 125 int font_id() const { | |
| 126 return font_id_; | |
| 127 } | |
| 128 void set_font_id(int id) { | |
| 129 font_id_ = id; | |
| 130 } | |
| 131 int page_num() const { | |
| 132 return page_num_; | |
| 133 } | |
| 134 void set_page_num(int page) { | |
| 135 page_num_ = page; | |
| 136 } | |
| 137 const TBOX &bounding_box() const { | |
| 138 return bounding_box_; | |
| 139 } | |
| 140 void set_bounding_box(const TBOX &box) { | |
| 141 bounding_box_ = box; | |
| 142 } | |
| 143 uint32_t num_features() const { | |
| 144 return num_features_; | |
| 145 } | |
| 146 const INT_FEATURE_STRUCT *features() const { | |
| 147 return features_; | |
| 148 } | |
| 149 uint32_t num_micro_features() const { | |
| 150 return num_micro_features_; | |
| 151 } | |
| 152 const MicroFeature *micro_features() const { | |
| 153 return micro_features_; | |
| 154 } | |
| 155 int outline_length() const { | |
| 156 return outline_length_; | |
| 157 } | |
| 158 float cn_feature(int index) const { | |
| 159 return cn_feature_[index]; | |
| 160 } | |
| 161 int geo_feature(int index) const { | |
| 162 return geo_feature_[index]; | |
| 163 } | |
| 164 double weight() const { | |
| 165 return weight_; | |
| 166 } | |
| 167 void set_weight(double value) { | |
| 168 weight_ = value; | |
| 169 } | |
| 170 double max_dist() const { | |
| 171 return max_dist_; | |
| 172 } | |
| 173 void set_max_dist(double value) { | |
| 174 max_dist_ = value; | |
| 175 } | |
| 176 int sample_index() const { | |
| 177 return sample_index_; | |
| 178 } | |
| 179 void set_sample_index(int value) { | |
| 180 sample_index_ = value; | |
| 181 } | |
| 182 bool features_are_mapped() const { | |
| 183 return features_are_mapped_; | |
| 184 } | |
| 185 const std::vector<int> &mapped_features() const { | |
| 186 ASSERT_HOST(features_are_mapped_); | |
| 187 return mapped_features_; | |
| 188 } | |
| 189 const std::vector<int> &indexed_features() const { | |
| 190 ASSERT_HOST(features_are_indexed_); | |
| 191 return mapped_features_; | |
| 192 } | |
| 193 bool is_error() const { | |
| 194 return is_error_; | |
| 195 } | |
| 196 void set_is_error(bool value) { | |
| 197 is_error_ = value; | |
| 198 } | |
| 199 | |
| 200 private: | |
| 201 // Unichar id that this sample represents. There obviously must be a | |
| 202 // reference UNICHARSET somewhere. Usually in TrainingSampleSet. | |
| 203 UNICHAR_ID class_id_; | |
| 204 // Font id in which this sample was printed. Refers to a fontinfo_table_ in | |
| 205 // MasterTrainer. | |
| 206 int font_id_; | |
| 207 // Number of page that the sample came from. | |
| 208 int page_num_; | |
| 209 // Bounding box of sample in original image. | |
| 210 TBOX bounding_box_; | |
| 211 // Number of INT_FEATURE_STRUCT in features_ array. | |
| 212 uint32_t num_features_; | |
| 213 // Number of MicroFeature in micro_features_ array. | |
| 214 uint32_t num_micro_features_; | |
| 215 // Total length of outline in the baseline normalized coordinate space. | |
| 216 // See comment in WERD_RES class definition for a discussion of coordinate | |
| 217 // spaces. | |
| 218 int outline_length_; | |
| 219 // Array of features. | |
| 220 INT_FEATURE_STRUCT *features_; | |
| 221 // Array of features. | |
| 222 MicroFeature *micro_features_; | |
| 223 // The one and only CN feature. Indexed by NORM_PARAM_NAME enum. | |
| 224 float cn_feature_[kNumCNParams]; | |
| 225 // The one and only geometric feature. (Aims at replacing cn_feature_). | |
| 226 // Indexed by GeoParams enum in picofeat.h | |
| 227 int geo_feature_[GeoCount]; | |
| 228 | |
| 229 // Non-serialized cache data. | |
| 230 // Weight used for boosting training. | |
| 231 double weight_; | |
| 232 // Maximum distance to other samples of same class/font used in computing | |
| 233 // the canonical sample. | |
| 234 double max_dist_; | |
| 235 // Global index of this sample. | |
| 236 int sample_index_; | |
| 237 | |
| 238 public: | |
| 239 // both are used in training tools | |
| 240 // hide after refactoring | |
| 241 | |
| 242 // Indexed/mapped features, as indicated by the bools below. | |
| 243 std::vector<int> mapped_features_; | |
| 244 bool features_are_indexed_; | |
| 245 bool features_are_mapped_; | |
| 246 | |
| 247 private: | |
| 248 // True if the last classification was an error by the current definition. | |
| 249 bool is_error_; | |
| 250 | |
| 251 // Randomizing factors. | |
| 252 static const int kYShiftValues[kSampleYShiftSize]; | |
| 253 static const double kScaleValues[kSampleScaleSize]; | |
| 254 }; | |
| 255 | |
| 256 ELISTIZEH(TrainingSample) | |
| 257 | |
| 258 } // namespace tesseract | |
| 259 | |
| 260 #endif // TESSERACT_TRAINING_TRAININGSAMPLE_H_ |
