Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/tesseract/src/training/common/commontraining.h @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 // Copyright 2008 Google Inc. All Rights Reserved. | |
| 2 // Author: scharron@google.com (Samuel Charron) | |
| 3 // | |
| 4 // Licensed under the Apache License, Version 2.0 (the "License"); | |
| 5 // you may not use this file except in compliance with the License. | |
| 6 // You may obtain a copy of the License at | |
| 7 // http://www.apache.org/licenses/LICENSE-2.0 | |
| 8 // Unless required by applicable law or agreed to in writing, software | |
| 9 // distributed under the License is distributed on an "AS IS" BASIS, | |
| 10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 11 // See the License for the specific language governing permissions and | |
| 12 // limitations under the License. | |
| 13 | |
| 14 #ifndef TESSERACT_TRAINING_COMMONTRAINING_H_ | |
| 15 #define TESSERACT_TRAINING_COMMONTRAINING_H_ | |
| 16 | |
| 17 #ifdef HAVE_CONFIG_H | |
| 18 # include "config_auto.h" | |
| 19 #endif | |
| 20 | |
| 21 #include "commandlineflags.h" | |
| 22 #include "export.h" | |
| 23 #include "tprintf.h" | |
| 24 | |
| 25 #include <tesseract/baseapi.h> | |
| 26 | |
| 27 #include <memory> | |
| 28 | |
| 29 namespace tesseract { | |
| 30 | |
| 31 TESS_COMMON_TRAINING_API | |
| 32 void ParseArguments(int *argc, char ***argv); | |
| 33 | |
| 34 // Check whether the shared tesseract library is the right one. | |
| 35 // This function must be inline because otherwise it would be part of | |
| 36 // the shared library, so it could not compare the versions. | |
| 37 static inline void CheckSharedLibraryVersion() { | |
| 38 #ifdef HAVE_CONFIG_H | |
| 39 if (!!strcmp(TESSERACT_VERSION_STR, TessBaseAPI::Version())) { | |
| 40 tprintf( | |
| 41 "ERROR: shared library version mismatch (was %s, expected %s\n" | |
| 42 "Did you use a wrong shared tesseract library?\n", | |
| 43 TessBaseAPI::Version(), TESSERACT_VERSION_STR); | |
| 44 exit(1); | |
| 45 } | |
| 46 #endif | |
| 47 } | |
| 48 | |
| 49 } // namespace tesseract | |
| 50 | |
| 51 #ifndef DISABLED_LEGACY_ENGINE | |
| 52 | |
| 53 # include "cluster.h" | |
| 54 # include "featdefs.h" | |
| 55 # include "intproto.h" | |
| 56 # include "oldlist.h" | |
| 57 | |
| 58 namespace tesseract { | |
| 59 | |
| 60 class Classify; | |
| 61 class MasterTrainer; | |
| 62 class ShapeTable; | |
| 63 | |
| 64 ////////////////////////////////////////////////////////////////////////////// | |
| 65 // Globals /////////////////////////////////////////////////////////////////// | |
| 66 ////////////////////////////////////////////////////////////////////////////// | |
| 67 | |
| 68 TESS_COMMON_TRAINING_API | |
| 69 extern FEATURE_DEFS_STRUCT feature_defs; | |
| 70 | |
| 71 // Must be defined in the file that "implements" commonTraining facilities. | |
| 72 TESS_COMMON_TRAINING_API | |
| 73 extern CLUSTERCONFIG Config; | |
| 74 | |
| 75 ////////////////////////////////////////////////////////////////////////////// | |
| 76 // Structs /////////////////////////////////////////////////////////////////// | |
| 77 ////////////////////////////////////////////////////////////////////////////// | |
| 78 struct LABELEDLISTNODE { | |
| 79 /// This constructor allocates a new, empty labeled list and gives | |
| 80 /// it the specified label. | |
| 81 /// @param Label label for new list | |
| 82 LABELEDLISTNODE(const char *label) : Label(label) { | |
| 83 } | |
| 84 std::string Label; | |
| 85 int SampleCount = 0; | |
| 86 int font_sample_count = 0; | |
| 87 LIST List = nullptr; | |
| 88 }; | |
| 89 using LABELEDLIST = LABELEDLISTNODE *; | |
| 90 | |
| 91 struct MERGE_CLASS_NODE { | |
| 92 MERGE_CLASS_NODE(const char * label) : Label(label), Class(NewClass(MAX_NUM_PROTOS, MAX_NUM_CONFIGS)) { | |
| 93 } | |
| 94 std::string Label; | |
| 95 int NumMerged[MAX_NUM_PROTOS]; | |
| 96 tesseract::CLASS_TYPE Class; | |
| 97 }; | |
| 98 using MERGE_CLASS = MERGE_CLASS_NODE *; | |
| 99 | |
| 100 ////////////////////////////////////////////////////////////////////////////// | |
| 101 // Functions ///////////////////////////////////////////////////////////////// | |
| 102 ////////////////////////////////////////////////////////////////////////////// | |
| 103 | |
| 104 // Helper loads shape table from the given file. | |
| 105 ShapeTable *LoadShapeTable(const std::string &file_prefix); | |
| 106 // Helper to write the shape_table. | |
| 107 TESS_COMMON_TRAINING_API | |
| 108 void WriteShapeTable(const std::string &file_prefix, const ShapeTable &shape_table); | |
| 109 | |
| 110 // Creates a MasterTraininer and loads the training data into it: | |
| 111 // Initializes feature_defs and IntegerFX. | |
| 112 // Loads the shape_table if shape_table != nullptr. | |
| 113 // Loads initial unicharset from -U command-line option. | |
| 114 // If FLAGS_input_trainer is set, loads the majority of data from there, else: | |
| 115 // Loads font info from -F option. | |
| 116 // Loads xheights from -X option. | |
| 117 // Loads samples from .tr files in remaining command-line args. | |
| 118 // Deletes outliers and computes canonical samples. | |
| 119 // If FLAGS_output_trainer is set, saves the trainer for future use. | |
| 120 // Computes canonical and cloud features. | |
| 121 // If shape_table is not nullptr, but failed to load, make a fake flat one, | |
| 122 // as shape clustering was not run. | |
| 123 TESS_COMMON_TRAINING_API | |
| 124 std::unique_ptr<MasterTrainer> LoadTrainingData(const char *const *filelist, bool replication, | |
| 125 ShapeTable **shape_table, std::string &file_prefix); | |
| 126 | |
| 127 LABELEDLIST FindList(tesseract::LIST List, const std::string &Label); | |
| 128 | |
| 129 TESS_COMMON_TRAINING_API | |
| 130 void ReadTrainingSamples(const tesseract::FEATURE_DEFS_STRUCT &feature_defs, | |
| 131 const char *feature_name, int max_samples, | |
| 132 tesseract::UNICHARSET *unicharset, FILE *file, | |
| 133 tesseract::LIST *training_samples); | |
| 134 | |
| 135 void WriteTrainingSamples(const tesseract::FEATURE_DEFS_STRUCT &FeatureDefs, char *Directory, | |
| 136 tesseract::LIST CharList, const char *program_feature_type); | |
| 137 | |
| 138 TESS_COMMON_TRAINING_API | |
| 139 void FreeTrainingSamples(tesseract::LIST CharList); | |
| 140 | |
| 141 TESS_COMMON_TRAINING_API | |
| 142 void FreeLabeledList(LABELEDLIST LabeledList); | |
| 143 | |
| 144 TESS_COMMON_TRAINING_API | |
| 145 void FreeLabeledClassList(tesseract::LIST ClassListList); | |
| 146 | |
| 147 TESS_COMMON_TRAINING_API | |
| 148 tesseract::CLUSTERER *SetUpForClustering(const tesseract::FEATURE_DEFS_STRUCT &FeatureDefs, | |
| 149 LABELEDLIST CharSample, const char *program_feature_type); | |
| 150 | |
| 151 TESS_COMMON_TRAINING_API | |
| 152 tesseract::LIST RemoveInsignificantProtos(tesseract::LIST ProtoList, bool KeepSigProtos, | |
| 153 bool KeepInsigProtos, int N); | |
| 154 | |
| 155 TESS_COMMON_TRAINING_API | |
| 156 void CleanUpUnusedData(tesseract::LIST ProtoList); | |
| 157 | |
| 158 TESS_COMMON_TRAINING_API | |
| 159 void MergeInsignificantProtos(tesseract::LIST ProtoList, const char *label, | |
| 160 tesseract::CLUSTERER *Clusterer, tesseract::CLUSTERCONFIG *Config); | |
| 161 | |
| 162 TESS_COMMON_TRAINING_API | |
| 163 MERGE_CLASS FindClass(tesseract::LIST List, const std::string &Label); | |
| 164 | |
| 165 TESS_COMMON_TRAINING_API | |
| 166 tesseract::CLASS_STRUCT *SetUpForFloat2Int(const tesseract::UNICHARSET &unicharset, | |
| 167 tesseract::LIST LabeledClassList); | |
| 168 | |
| 169 void Normalize(float *Values); | |
| 170 | |
| 171 TESS_COMMON_TRAINING_API | |
| 172 void FreeNormProtoList(tesseract::LIST CharList); | |
| 173 | |
| 174 TESS_COMMON_TRAINING_API | |
| 175 void AddToNormProtosList(tesseract::LIST *NormProtoList, tesseract::LIST ProtoList, const std::string &CharName); | |
| 176 | |
| 177 TESS_COMMON_TRAINING_API | |
| 178 int NumberOfProtos(tesseract::LIST ProtoList, bool CountSigProtos, bool CountInsigProtos); | |
| 179 | |
| 180 void allocNormProtos(); | |
| 181 | |
| 182 } // namespace tesseract | |
| 183 | |
| 184 #endif // def DISABLED_LEGACY_ENGINE | |
| 185 | |
| 186 #endif // TESSERACT_TRAINING_COMMONTRAINING_H_ |
