Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/tesseract/src/wordrec/findseam.cpp @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /****************************************************************************** | |
| 2 * | |
| 3 * File: findseam.cpp (Formerly findseam.c) | |
| 4 * Author: Mark Seaman, OCR Technology | |
| 5 * | |
| 6 * (c) Copyright 1987, Hewlett-Packard Company. | |
| 7 ** Licensed under the Apache License, Version 2.0 (the "License"); | |
| 8 ** you may not use this file except in compliance with the License. | |
| 9 ** You may obtain a copy of the License at | |
| 10 ** http://www.apache.org/licenses/LICENSE-2.0 | |
| 11 ** Unless required by applicable law or agreed to in writing, software | |
| 12 ** distributed under the License is distributed on an "AS IS" BASIS, | |
| 13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 14 ** See the License for the specific language governing permissions and | |
| 15 ** limitations under the License. | |
| 16 * | |
| 17 *****************************************************************************/ | |
| 18 /*---------------------------------------------------------------------- | |
| 19 I n c l u d e s | |
| 20 ----------------------------------------------------------------------*/ | |
| 21 #include "findseam.h" | |
| 22 #include "outlines.h" | |
| 23 #include "plotedges.h" | |
| 24 #include "seam.h" | |
| 25 #include "wordrec.h" | |
| 26 | |
| 27 // Include automatically generated configuration file if running autoconf. | |
| 28 #ifdef HAVE_CONFIG_H | |
| 29 # include "config_auto.h" | |
| 30 #endif | |
| 31 | |
| 32 /********************************************************************** | |
| 33 * partial_split_priority | |
| 34 * | |
| 35 * Assign a priority to this split based on the features that it has. | |
| 36 * Grade it according to the different rating schemes and return the | |
| 37 * value of its goodness. | |
| 38 **********************************************************************/ | |
| 39 | |
| 40 #define partial_split_priority(split) (grade_split_length(split) + grade_sharpness(split)) | |
| 41 | |
| 42 /*---------------------------------------------------------------------- | |
| 43 T y p e s | |
| 44 ----------------------------------------------------------------------*/ | |
| 45 #define SPLIT_CLOSENESS 20 /* Difference in x value */ | |
| 46 /* How many to keep */ | |
| 47 #define MAX_NUM_SEAMS 150 | |
| 48 /* How many to keep */ | |
| 49 #define NO_FULL_PRIORITY (-1) // Special marker for pri. | |
| 50 /* Evaluate right away */ | |
| 51 #define BAD_PRIORITY 9999.0 | |
| 52 | |
| 53 /*---------------------------------------------------------------------- | |
| 54 F u n c t i o n s | |
| 55 ----------------------------------------------------------------------*/ | |
| 56 namespace tesseract { | |
| 57 | |
| 58 /********************************************************************** | |
| 59 * add_seam_to_queue | |
| 60 * | |
| 61 * Adds the given new_seam to the seams priority queue, unless it is full | |
| 62 * and the new seam is worse than the worst. | |
| 63 **********************************************************************/ | |
| 64 void Wordrec::add_seam_to_queue(float new_priority, SEAM *new_seam, SeamQueue *seams) { | |
| 65 if (new_seam == nullptr) { | |
| 66 return; | |
| 67 } | |
| 68 if (chop_debug) { | |
| 69 tprintf("Pushing new seam with priority %g :", new_priority); | |
| 70 new_seam->Print("seam: "); | |
| 71 } | |
| 72 if (seams->size() >= MAX_NUM_SEAMS) { | |
| 73 SeamPair old_pair(0, nullptr); | |
| 74 if (seams->PopWorst(&old_pair) && old_pair.key() <= new_priority) { | |
| 75 if (chop_debug) { | |
| 76 tprintf("Old seam staying with priority %g\n", old_pair.key()); | |
| 77 } | |
| 78 delete new_seam; | |
| 79 seams->Push(&old_pair); | |
| 80 return; | |
| 81 } else if (chop_debug) { | |
| 82 tprintf("New seam with priority %g beats old worst seam with %g\n", new_priority, | |
| 83 old_pair.key()); | |
| 84 } | |
| 85 } | |
| 86 SeamPair new_pair(new_priority, new_seam); | |
| 87 seams->Push(&new_pair); | |
| 88 } | |
| 89 | |
| 90 /********************************************************************** | |
| 91 * choose_best_seam | |
| 92 * | |
| 93 * Choose the best seam that can be created by assembling this a | |
| 94 * collection of splits. A queue of all the possible seams is | |
| 95 * maintained. Each new split received is placed in that queue with | |
| 96 * its partial priority value. These values in the seam queue are | |
| 97 * evaluated and combined until a good enough seam is found. If no | |
| 98 * further good seams are being found then this function returns to the | |
| 99 * caller, who will send more splits. If this function is called with | |
| 100 * a split of nullptr, then no further splits can be supplied by the | |
| 101 * caller. | |
| 102 **********************************************************************/ | |
| 103 void Wordrec::choose_best_seam(SeamQueue *seam_queue, const SPLIT *split, PRIORITY priority, | |
| 104 SEAM **seam_result, TBLOB *blob, SeamPile *seam_pile) { | |
| 105 SEAM *seam; | |
| 106 float my_priority; | |
| 107 /* Add seam of split */ | |
| 108 my_priority = priority; | |
| 109 if (split != nullptr) { | |
| 110 TPOINT split_point = split->point1->pos; | |
| 111 split_point += split->point2->pos; | |
| 112 split_point /= 2; | |
| 113 seam = new SEAM(my_priority, split_point, *split); | |
| 114 if (chop_debug > 1) { | |
| 115 seam->Print("Partial priority "); | |
| 116 } | |
| 117 add_seam_to_queue(my_priority, seam, seam_queue); | |
| 118 | |
| 119 if (my_priority > chop_good_split) { | |
| 120 return; | |
| 121 } | |
| 122 } | |
| 123 | |
| 124 TBOX bbox = blob->bounding_box(); | |
| 125 /* Queue loop */ | |
| 126 while (!seam_queue->empty()) { | |
| 127 SeamPair seam_pair; | |
| 128 seam_queue->Pop(&seam_pair); | |
| 129 seam = seam_pair.extract_data(); | |
| 130 /* Set full priority */ | |
| 131 my_priority = | |
| 132 seam->FullPriority(bbox.left(), bbox.right(), chop_overlap_knob, chop_centered_maxwidth, | |
| 133 chop_center_knob, chop_width_change_knob); | |
| 134 if (chop_debug) { | |
| 135 char str[80]; | |
| 136 snprintf(str, sizeof(str), "Full my_priority %0.0f, ", my_priority); | |
| 137 seam->Print(str); | |
| 138 } | |
| 139 | |
| 140 if ((*seam_result == nullptr || (*seam_result)->priority() > my_priority) && | |
| 141 my_priority < chop_ok_split) { | |
| 142 /* No crossing */ | |
| 143 if (seam->IsHealthy(*blob, chop_min_outline_points, chop_min_outline_area)) { | |
| 144 delete *seam_result; | |
| 145 *seam_result = new SEAM(*seam); | |
| 146 (*seam_result)->set_priority(my_priority); | |
| 147 } else { | |
| 148 delete seam; | |
| 149 seam = nullptr; | |
| 150 my_priority = BAD_PRIORITY; | |
| 151 } | |
| 152 } | |
| 153 | |
| 154 if (my_priority < chop_good_split) { | |
| 155 delete seam; | |
| 156 return; /* Made good answer */ | |
| 157 } | |
| 158 | |
| 159 if (seam) { | |
| 160 /* Combine with others */ | |
| 161 if (seam_pile->size() < chop_seam_pile_size) { | |
| 162 combine_seam(*seam_pile, seam, seam_queue); | |
| 163 SeamDecPair pair(seam_pair.key(), seam); | |
| 164 seam_pile->Push(&pair); | |
| 165 } else if (chop_new_seam_pile && seam_pile->size() == chop_seam_pile_size && | |
| 166 seam_pile->PeekTop().key() > seam_pair.key()) { | |
| 167 combine_seam(*seam_pile, seam, seam_queue); | |
| 168 SeamDecPair pair; | |
| 169 seam_pile->Pop(&pair); // pop the worst. | |
| 170 // Replace the seam in pair (deleting the old one) with | |
| 171 // the new seam and score, then push back into the heap. | |
| 172 pair.set_key(seam_pair.key()); | |
| 173 pair.set_data(seam); | |
| 174 seam_pile->Push(&pair); | |
| 175 } else { | |
| 176 delete seam; | |
| 177 } | |
| 178 } | |
| 179 | |
| 180 my_priority = seam_queue->empty() ? NO_FULL_PRIORITY : seam_queue->PeekTop().key(); | |
| 181 if ((my_priority > chop_ok_split) || (my_priority > chop_good_split && split)) { | |
| 182 return; | |
| 183 } | |
| 184 } | |
| 185 } | |
| 186 | |
| 187 /********************************************************************** | |
| 188 * combine_seam | |
| 189 * | |
| 190 * Find other seams to combine with this one. The new seams that result | |
| 191 * from this union should be added to the seam queue. The return value | |
| 192 * tells whether or not any additional seams were added to the queue. | |
| 193 **********************************************************************/ | |
| 194 void Wordrec::combine_seam(const SeamPile &seam_pile, const SEAM *seam, SeamQueue *seam_queue) { | |
| 195 for (int x = 0; x < seam_pile.size(); ++x) { | |
| 196 const SEAM *this_one = seam_pile.get(x).data(); | |
| 197 if (seam->CombineableWith(*this_one, SPLIT_CLOSENESS, chop_ok_split)) { | |
| 198 SEAM *new_one = new SEAM(*seam); | |
| 199 new_one->CombineWith(*this_one); | |
| 200 if (chop_debug > 1) { | |
| 201 new_one->Print("Combo priority "); | |
| 202 } | |
| 203 add_seam_to_queue(new_one->priority(), new_one, seam_queue); | |
| 204 } | |
| 205 } | |
| 206 } | |
| 207 | |
| 208 /********************************************************************** | |
| 209 * pick_good_seam | |
| 210 * | |
| 211 * Find and return a good seam that will split this blob into two pieces. | |
| 212 * Work from the outlines provided. | |
| 213 **********************************************************************/ | |
| 214 SEAM *Wordrec::pick_good_seam(TBLOB *blob) { | |
| 215 SeamPile seam_pile(chop_seam_pile_size); | |
| 216 EDGEPT *points[MAX_NUM_POINTS]; | |
| 217 EDGEPT_CLIST new_points; | |
| 218 SEAM *seam = nullptr; | |
| 219 TESSLINE *outline; | |
| 220 int16_t num_points = 0; | |
| 221 | |
| 222 #ifndef GRAPHICS_DISABLED | |
| 223 if (chop_debug > 2) { | |
| 224 wordrec_display_splits.set_value(true); | |
| 225 } | |
| 226 | |
| 227 draw_blob_edges(blob); | |
| 228 #endif | |
| 229 | |
| 230 PointHeap point_heap(MAX_NUM_POINTS); | |
| 231 for (outline = blob->outlines; outline; outline = outline->next) { | |
| 232 prioritize_points(outline, &point_heap); | |
| 233 } | |
| 234 | |
| 235 while (!point_heap.empty() && num_points < MAX_NUM_POINTS) { | |
| 236 points[num_points++] = point_heap.PeekTop().data(); | |
| 237 point_heap.Pop(nullptr); | |
| 238 } | |
| 239 | |
| 240 /* Initialize queue */ | |
| 241 SeamQueue seam_queue(MAX_NUM_SEAMS); | |
| 242 | |
| 243 try_point_pairs(points, num_points, &seam_queue, &seam_pile, &seam, blob); | |
| 244 try_vertical_splits(points, num_points, &new_points, &seam_queue, &seam_pile, &seam, blob); | |
| 245 | |
| 246 if (seam == nullptr) { | |
| 247 choose_best_seam(&seam_queue, nullptr, BAD_PRIORITY, &seam, blob, &seam_pile); | |
| 248 } else if (seam->priority() > chop_good_split) { | |
| 249 choose_best_seam(&seam_queue, nullptr, seam->priority(), &seam, blob, &seam_pile); | |
| 250 } | |
| 251 | |
| 252 EDGEPT_C_IT it(&new_points); | |
| 253 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { | |
| 254 EDGEPT *inserted_point = it.data(); | |
| 255 if (seam == nullptr || !seam->UsesPoint(inserted_point)) { | |
| 256 for (outline = blob->outlines; outline; outline = outline->next) { | |
| 257 if (outline->loop == inserted_point) { | |
| 258 outline->loop = outline->loop->next; | |
| 259 } | |
| 260 } | |
| 261 remove_edgept(inserted_point); | |
| 262 } | |
| 263 } | |
| 264 | |
| 265 if (seam) { | |
| 266 if (seam->priority() > chop_ok_split) { | |
| 267 delete seam; | |
| 268 seam = nullptr; | |
| 269 } | |
| 270 #ifndef GRAPHICS_DISABLED | |
| 271 else if (wordrec_display_splits) { | |
| 272 seam->Mark(edge_window); | |
| 273 if (chop_debug > 2) { | |
| 274 edge_window->Update(); | |
| 275 edge_window->Wait(); | |
| 276 } | |
| 277 } | |
| 278 #endif | |
| 279 } | |
| 280 | |
| 281 if (chop_debug) { | |
| 282 wordrec_display_splits.set_value(false); | |
| 283 } | |
| 284 | |
| 285 return (seam); | |
| 286 } | |
| 287 | |
| 288 /********************************************************************** | |
| 289 * try_point_pairs | |
| 290 * | |
| 291 * Try all the splits that are produced by pairing critical points | |
| 292 * together. See if any of them are suitable for use. Use a seam | |
| 293 * queue and seam pile that have already been initialized and used. | |
| 294 **********************************************************************/ | |
| 295 void Wordrec::try_point_pairs(EDGEPT *points[MAX_NUM_POINTS], int16_t num_points, | |
| 296 SeamQueue *seam_queue, SeamPile *seam_pile, SEAM **seam, | |
| 297 TBLOB *blob) { | |
| 298 int16_t x; | |
| 299 int16_t y; | |
| 300 PRIORITY priority; | |
| 301 | |
| 302 for (x = 0; x < num_points; x++) { | |
| 303 for (y = x + 1; y < num_points; y++) { | |
| 304 if (points[y] && | |
| 305 points[x]->WeightedDistance(*points[y], chop_x_y_weight) < chop_split_length && | |
| 306 points[x] != points[y]->next && points[y] != points[x]->next && | |
| 307 !is_exterior_point(points[x], points[y]) && !is_exterior_point(points[y], points[x])) { | |
| 308 SPLIT split(points[x], points[y]); | |
| 309 priority = partial_split_priority(&split); | |
| 310 | |
| 311 choose_best_seam(seam_queue, &split, priority, seam, blob, seam_pile); | |
| 312 } | |
| 313 } | |
| 314 } | |
| 315 } | |
| 316 | |
| 317 /********************************************************************** | |
| 318 * try_vertical_splits | |
| 319 * | |
| 320 * Try all the splits that are produced by vertical projection to see | |
| 321 * if any of them are suitable for use. Use a seam queue and seam pile | |
| 322 * that have already been initialized and used. | |
| 323 * Return in new_points a collection of points that were inserted into | |
| 324 * the blob while examining vertical splits and which may safely be | |
| 325 * removed once a seam is chosen if they are not part of the seam. | |
| 326 **********************************************************************/ | |
| 327 void Wordrec::try_vertical_splits(EDGEPT *points[MAX_NUM_POINTS], int16_t num_points, | |
| 328 EDGEPT_CLIST *new_points, SeamQueue *seam_queue, | |
| 329 SeamPile *seam_pile, SEAM **seam, TBLOB *blob) { | |
| 330 EDGEPT *vertical_point = nullptr; | |
| 331 int16_t x; | |
| 332 PRIORITY priority; | |
| 333 TESSLINE *outline; | |
| 334 | |
| 335 for (x = 0; x < num_points; x++) { | |
| 336 vertical_point = nullptr; | |
| 337 for (outline = blob->outlines; outline; outline = outline->next) { | |
| 338 vertical_projection_point(points[x], outline->loop, &vertical_point, new_points); | |
| 339 } | |
| 340 | |
| 341 if (vertical_point && points[x] != vertical_point->next && vertical_point != points[x]->next && | |
| 342 points[x]->WeightedDistance(*vertical_point, chop_x_y_weight) < chop_split_length) { | |
| 343 SPLIT split(points[x], vertical_point); | |
| 344 priority = partial_split_priority(&split); | |
| 345 choose_best_seam(seam_queue, &split, priority, seam, blob, seam_pile); | |
| 346 } | |
| 347 } | |
| 348 } | |
| 349 | |
| 350 } // namespace tesseract |
