Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/tesseract/src/classify/shapeclassifier.cpp @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 // Copyright 2011 Google Inc. All Rights Reserved. | |
| 2 // Author: rays@google.com (Ray Smith) | |
| 3 /////////////////////////////////////////////////////////////////////// | |
| 4 // File: shapeclassifier.cpp | |
| 5 // Description: Base interface class for classifiers that return a | |
| 6 // shape index. | |
| 7 // Author: Ray Smith | |
| 8 // | |
| 9 // (C) Copyright 2011, Google Inc. | |
| 10 // Licensed under the Apache License, Version 2.0 (the "License"); | |
| 11 // you may not use this file except in compliance with the License. | |
| 12 // You may obtain a copy of the License at | |
| 13 // http://www.apache.org/licenses/LICENSE-2.0 | |
| 14 // Unless required by applicable law or agreed to in writing, software | |
| 15 // distributed under the License is distributed on an "AS IS" BASIS, | |
| 16 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 17 // See the License for the specific language governing permissions and | |
| 18 // limitations under the License. | |
| 19 // | |
| 20 /////////////////////////////////////////////////////////////////////// | |
| 21 | |
| 22 #ifdef HAVE_CONFIG_H | |
| 23 # include "config_auto.h" | |
| 24 #endif | |
| 25 | |
| 26 #include "shapeclassifier.h" | |
| 27 | |
| 28 #include "scrollview.h" | |
| 29 #include "shapetable.h" | |
| 30 #ifndef GRAPHICS_DISABLED | |
| 31 #include "svmnode.h" | |
| 32 #endif | |
| 33 #include "tprintf.h" | |
| 34 #include "trainingsample.h" | |
| 35 | |
| 36 namespace tesseract { | |
| 37 | |
| 38 // Classifies the given [training] sample, writing to results. | |
| 39 // See shapeclassifier.h for a full description. | |
| 40 // Default implementation calls the ShapeRating version. | |
| 41 int ShapeClassifier::UnicharClassifySample(const TrainingSample &sample, Image page_pix, int debug, | |
| 42 UNICHAR_ID keep_this, | |
| 43 std::vector<UnicharRating> *results) { | |
| 44 results->clear(); | |
| 45 std::vector<ShapeRating> shape_results; | |
| 46 int num_shape_results = ClassifySample(sample, page_pix, debug, keep_this, &shape_results); | |
| 47 const ShapeTable *shapes = GetShapeTable(); | |
| 48 std::vector<int> unichar_map(shapes->unicharset().size(), -1); | |
| 49 for (int r = 0; r < num_shape_results; ++r) { | |
| 50 shapes->AddShapeToResults(shape_results[r], &unichar_map, results); | |
| 51 } | |
| 52 return results->size(); | |
| 53 } | |
| 54 | |
| 55 // Classifies the given [training] sample, writing to results. | |
| 56 // See shapeclassifier.h for a full description. | |
| 57 // Default implementation aborts. | |
| 58 int ShapeClassifier::ClassifySample(const TrainingSample &sample, Image page_pix, int debug, | |
| 59 int keep_this, std::vector<ShapeRating> *results) { | |
| 60 ASSERT_HOST("Must implement ClassifySample!" == nullptr); | |
| 61 return 0; | |
| 62 } | |
| 63 | |
| 64 // Returns the shape that contains unichar_id that has the best result. | |
| 65 // If result is not nullptr, it is set with the shape_id and rating. | |
| 66 // Does not need to be overridden if ClassifySample respects the keep_this | |
| 67 // rule. | |
| 68 int ShapeClassifier::BestShapeForUnichar(const TrainingSample &sample, Image page_pix, | |
| 69 UNICHAR_ID unichar_id, ShapeRating *result) { | |
| 70 std::vector<ShapeRating> results; | |
| 71 const ShapeTable *shapes = GetShapeTable(); | |
| 72 int num_results = ClassifySample(sample, page_pix, 0, unichar_id, &results); | |
| 73 for (int r = 0; r < num_results; ++r) { | |
| 74 if (shapes->GetShape(results[r].shape_id).ContainsUnichar(unichar_id)) { | |
| 75 if (result != nullptr) { | |
| 76 *result = results[r]; | |
| 77 } | |
| 78 return results[r].shape_id; | |
| 79 } | |
| 80 } | |
| 81 return -1; | |
| 82 } | |
| 83 | |
| 84 // Provides access to the UNICHARSET that this classifier works with. | |
| 85 // Only needs to be overridden if GetShapeTable() can return nullptr. | |
| 86 const UNICHARSET &ShapeClassifier::GetUnicharset() const { | |
| 87 return GetShapeTable()->unicharset(); | |
| 88 } | |
| 89 | |
| 90 #ifndef GRAPHICS_DISABLED | |
| 91 | |
| 92 // Visual debugger classifies the given sample, displays the results and | |
| 93 // solicits user input to display other classifications. Returns when | |
| 94 // the user has finished with debugging the sample. | |
| 95 // Probably doesn't need to be overridden if the subclass provides | |
| 96 // DisplayClassifyAs. | |
| 97 void ShapeClassifier::DebugDisplay(const TrainingSample &sample, Image page_pix, | |
| 98 UNICHAR_ID unichar_id) { | |
| 99 static ScrollView *terminator = nullptr; | |
| 100 if (terminator == nullptr) { | |
| 101 terminator = new ScrollView("XIT", 0, 0, 50, 50, 50, 50, true); | |
| 102 } | |
| 103 ScrollView *debug_win = CreateFeatureSpaceWindow("ClassifierDebug", 0, 0); | |
| 104 // Provide a right-click menu to choose the class. | |
| 105 auto *popup_menu = new SVMenuNode(); | |
| 106 popup_menu->AddChild("Choose class to debug", 0, "x", "Class to debug"); | |
| 107 popup_menu->BuildMenu(debug_win, false); | |
| 108 // Display the features in green. | |
| 109 const INT_FEATURE_STRUCT *features = sample.features(); | |
| 110 uint32_t num_features = sample.num_features(); | |
| 111 for (uint32_t f = 0; f < num_features; ++f) { | |
| 112 RenderIntFeature(debug_win, &features[f], ScrollView::GREEN); | |
| 113 } | |
| 114 debug_win->Update(); | |
| 115 std::vector<UnicharRating> results; | |
| 116 // Debug classification until the user quits. | |
| 117 const UNICHARSET &unicharset = GetUnicharset(); | |
| 118 SVEventType ev_type; | |
| 119 do { | |
| 120 std::vector<ScrollView *> windows; | |
| 121 if (unichar_id >= 0) { | |
| 122 tprintf("Debugging class %d = %s\n", unichar_id, unicharset.id_to_unichar(unichar_id)); | |
| 123 UnicharClassifySample(sample, page_pix, 1, unichar_id, &results); | |
| 124 DisplayClassifyAs(sample, page_pix, unichar_id, 1, windows); | |
| 125 } else { | |
| 126 tprintf("Invalid unichar_id: %d\n", unichar_id); | |
| 127 UnicharClassifySample(sample, page_pix, 1, -1, &results); | |
| 128 } | |
| 129 if (unichar_id >= 0) { | |
| 130 tprintf("Debugged class %d = %s\n", unichar_id, unicharset.id_to_unichar(unichar_id)); | |
| 131 } | |
| 132 tprintf("Right-click in ClassifierDebug window to choose debug class,"); | |
| 133 tprintf(" Left-click or close window to quit...\n"); | |
| 134 UNICHAR_ID old_unichar_id; | |
| 135 do { | |
| 136 old_unichar_id = unichar_id; | |
| 137 auto ev = debug_win->AwaitEvent(SVET_ANY); | |
| 138 ev_type = ev->type; | |
| 139 if (ev_type == SVET_POPUP) { | |
| 140 if (unicharset.contains_unichar(ev->parameter)) { | |
| 141 unichar_id = unicharset.unichar_to_id(ev->parameter); | |
| 142 } else { | |
| 143 tprintf("Char class '%s' not found in unicharset", ev->parameter); | |
| 144 } | |
| 145 } | |
| 146 } while (unichar_id == old_unichar_id && ev_type != SVET_CLICK && ev_type != SVET_DESTROY); | |
| 147 for (auto window : windows) { | |
| 148 delete window; | |
| 149 } | |
| 150 } while (ev_type != SVET_CLICK && ev_type != SVET_DESTROY); | |
| 151 delete debug_win; | |
| 152 } | |
| 153 | |
| 154 #endif // !GRAPHICS_DISABLED | |
| 155 | |
| 156 // Displays classification as the given shape_id. Creates as many windows | |
| 157 // as it feels fit, using index as a guide for placement. Adds any created | |
| 158 // windows to the windows output and returns a new index that may be used | |
| 159 // by any subsequent classifiers. Caller waits for the user to view and | |
| 160 // then destroys the windows by clearing the vector. | |
| 161 int ShapeClassifier::DisplayClassifyAs(const TrainingSample &sample, Image page_pix, | |
| 162 UNICHAR_ID unichar_id, int index, | |
| 163 std::vector<ScrollView *> &windows) { | |
| 164 // Does nothing in the default implementation. | |
| 165 return index; | |
| 166 } | |
| 167 | |
| 168 // Prints debug information on the results. | |
| 169 void ShapeClassifier::UnicharPrintResults(const char *context, | |
| 170 const std::vector<UnicharRating> &results) const { | |
| 171 tprintf("%s\n", context); | |
| 172 for (const auto &result : results) { | |
| 173 tprintf("%g: c_id=%d=%s", result.rating, result.unichar_id, | |
| 174 GetUnicharset().id_to_unichar(result.unichar_id)); | |
| 175 if (!result.fonts.empty()) { | |
| 176 tprintf(" Font Vector:"); | |
| 177 for (auto &&font : result.fonts) { | |
| 178 tprintf(" %d", font.fontinfo_id); | |
| 179 } | |
| 180 } | |
| 181 tprintf("\n"); | |
| 182 } | |
| 183 } | |
| 184 void ShapeClassifier::PrintResults(const char *context, | |
| 185 const std::vector<ShapeRating> &results) const { | |
| 186 tprintf("%s\n", context); | |
| 187 for (const auto &result : results) { | |
| 188 tprintf("%g:", result.rating); | |
| 189 if (result.joined) { | |
| 190 tprintf("[J]"); | |
| 191 } | |
| 192 if (result.broken) { | |
| 193 tprintf("[B]"); | |
| 194 } | |
| 195 tprintf(" %s\n", GetShapeTable()->DebugStr(result.shape_id).c_str()); | |
| 196 } | |
| 197 } | |
| 198 | |
| 199 // Removes any result that has all its unichars covered by a better choice, | |
| 200 // regardless of font. | |
| 201 void ShapeClassifier::FilterDuplicateUnichars(std::vector<ShapeRating> *results) const { | |
| 202 std::vector<ShapeRating> filtered_results; | |
| 203 // Copy results to filtered results and knock out duplicate unichars. | |
| 204 const ShapeTable *shapes = GetShapeTable(); | |
| 205 for (unsigned r = 0; r < results->size(); ++r) { | |
| 206 if (r > 0) { | |
| 207 const Shape &shape_r = shapes->GetShape((*results)[r].shape_id); | |
| 208 int c; | |
| 209 for (c = 0; c < shape_r.size(); ++c) { | |
| 210 int unichar_id = shape_r[c].unichar_id; | |
| 211 unsigned s; | |
| 212 for (s = 0; s < r; ++s) { | |
| 213 const Shape &shape_s = shapes->GetShape((*results)[s].shape_id); | |
| 214 if (shape_s.ContainsUnichar(unichar_id)) { | |
| 215 break; // We found unichar_id. | |
| 216 } | |
| 217 } | |
| 218 if (s == r) { | |
| 219 break; // We didn't find unichar_id. | |
| 220 } | |
| 221 } | |
| 222 if (c == shape_r.size()) { | |
| 223 continue; // We found all the unichar ids in previous answers. | |
| 224 } | |
| 225 } | |
| 226 filtered_results.push_back((*results)[r]); | |
| 227 } | |
| 228 *results = std::move(filtered_results); | |
| 229 } | |
| 230 | |
| 231 } // namespace tesseract. |
