Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/tesseract/src/ccstruct/split.cpp @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /****************************************************************************** | |
| 2 * | |
| 3 * File: split.cpp (Formerly split.c) | |
| 4 * Author: Mark Seaman, OCR Technology | |
| 5 * | |
| 6 * (c) Copyright 1987, Hewlett-Packard Company. | |
| 7 ** Licensed under the Apache License, Version 2.0 (the "License"); | |
| 8 ** you may not use this file except in compliance with the License. | |
| 9 ** You may obtain a copy of the License at | |
| 10 ** http://www.apache.org/licenses/LICENSE-2.0 | |
| 11 ** Unless required by applicable law or agreed to in writing, software | |
| 12 ** distributed under the License is distributed on an "AS IS" BASIS, | |
| 13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 14 ** See the License for the specific language governing permissions and | |
| 15 ** limitations under the License. | |
| 16 * | |
| 17 *************************************************************************/ | |
| 18 | |
| 19 // Include automatically generated configuration file if running autoconf. | |
| 20 #ifdef HAVE_CONFIG_H | |
| 21 # include "config_auto.h" | |
| 22 #endif | |
| 23 | |
| 24 #include "split.h" | |
| 25 | |
| 26 #include "coutln.h" | |
| 27 #include "tprintf.h" | |
| 28 | |
| 29 #include <algorithm> | |
| 30 | |
| 31 namespace tesseract { | |
| 32 | |
| 33 /*---------------------------------------------------------------------- | |
| 34 V a r i a b l e s | |
| 35 ----------------------------------------------------------------------*/ | |
| 36 // Limit on the amount of penalty for the chop being off-center. | |
| 37 const int kCenterGradeCap = 25; | |
| 38 // Ridiculously large priority for splits that are no use. | |
| 39 const double kBadPriority = 999.0; | |
| 40 | |
| 41 BOOL_VAR(wordrec_display_splits, 0, "Display splits"); | |
| 42 | |
| 43 // Hides the SPLIT so the outlines appear not to be cut by it. | |
| 44 void SPLIT::Hide() const { | |
| 45 EDGEPT *edgept = point1; | |
| 46 do { | |
| 47 edgept->Hide(); | |
| 48 edgept = edgept->next; | |
| 49 } while (!edgept->EqualPos(*point2) && edgept != point1); | |
| 50 edgept = point2; | |
| 51 do { | |
| 52 edgept->Hide(); | |
| 53 edgept = edgept->next; | |
| 54 } while (!edgept->EqualPos(*point1) && edgept != point2); | |
| 55 } | |
| 56 | |
| 57 // Undoes hide, so the outlines are cut by the SPLIT. | |
| 58 void SPLIT::Reveal() const { | |
| 59 EDGEPT *edgept = point1; | |
| 60 do { | |
| 61 edgept->Reveal(); | |
| 62 edgept = edgept->next; | |
| 63 } while (!edgept->EqualPos(*point2) && edgept != point1); | |
| 64 edgept = point2; | |
| 65 do { | |
| 66 edgept->Reveal(); | |
| 67 edgept = edgept->next; | |
| 68 } while (!edgept->EqualPos(*point1) && edgept != point2); | |
| 69 } | |
| 70 | |
| 71 // Compute a split priority based on the bounding boxes of the parts. | |
| 72 // The arguments here are config parameters defined in Wordrec. Add chop_ | |
| 73 // to the beginning of the name. | |
| 74 float SPLIT::FullPriority(int xmin, int xmax, double overlap_knob, int centered_maxwidth, | |
| 75 double center_knob, double width_change_knob) const { | |
| 76 TBOX box1 = Box12(); | |
| 77 TBOX box2 = Box21(); | |
| 78 int min_left = std::min(box1.left(), box2.left()); | |
| 79 int max_right = std::max(box1.right(), box2.right()); | |
| 80 if (xmin < min_left && xmax > max_right) { | |
| 81 return kBadPriority; | |
| 82 } | |
| 83 | |
| 84 float grade = 0.0f; | |
| 85 // grade_overlap. | |
| 86 int width1 = box1.width(); | |
| 87 int width2 = box2.width(); | |
| 88 int min_width = std::min(width1, width2); | |
| 89 int overlap = -box1.x_gap(box2); | |
| 90 if (overlap == min_width) { | |
| 91 grade += 100.0f; // Total overlap. | |
| 92 } else { | |
| 93 if (2 * overlap > min_width) { | |
| 94 overlap += 2 * overlap - min_width; | |
| 95 } | |
| 96 if (overlap > 0) { | |
| 97 grade += overlap_knob * overlap; | |
| 98 } | |
| 99 } | |
| 100 // grade_center_of_blob. | |
| 101 if (width1 <= centered_maxwidth || width2 <= centered_maxwidth) { | |
| 102 grade += std::min(static_cast<double>(kCenterGradeCap), center_knob * abs(width1 - width2)); | |
| 103 } | |
| 104 // grade_width_change. | |
| 105 float width_change_grade = 20 - (max_right - min_left - std::max(width1, width2)); | |
| 106 if (width_change_grade > 0.0f) { | |
| 107 grade += width_change_grade * width_change_knob; | |
| 108 } | |
| 109 return grade; | |
| 110 } | |
| 111 | |
| 112 // Returns true if *this SPLIT appears OK in the sense that it does not cross | |
| 113 // any outlines and does not chop off any ridiculously small pieces. | |
| 114 bool SPLIT::IsHealthy(const TBLOB &blob, int min_points, int min_area) const { | |
| 115 return !IsLittleChunk(min_points, min_area) && | |
| 116 !blob.SegmentCrossesOutline(point1->pos, point2->pos); | |
| 117 } | |
| 118 | |
| 119 // Returns true if the split generates a small chunk in terms of either area | |
| 120 // or number of points. | |
| 121 bool SPLIT::IsLittleChunk(int min_points, int min_area) const { | |
| 122 if (point1->ShortNonCircularSegment(min_points, point2) && | |
| 123 point1->SegmentArea(point2) < min_area) { | |
| 124 return true; | |
| 125 } | |
| 126 if (point2->ShortNonCircularSegment(min_points, point1) && | |
| 127 point2->SegmentArea(point1) < min_area) { | |
| 128 return true; | |
| 129 } | |
| 130 return false; | |
| 131 } | |
| 132 | |
| 133 /********************************************************************** | |
| 134 * make_edgept | |
| 135 * | |
| 136 * Create an EDGEPT and hook it into an existing list of edge points. | |
| 137 **********************************************************************/ | |
| 138 EDGEPT *make_edgept(TDimension x, TDimension y, EDGEPT *next, EDGEPT *prev) { | |
| 139 EDGEPT *this_edgept; | |
| 140 /* Create point */ | |
| 141 this_edgept = new EDGEPT; | |
| 142 this_edgept->pos.x = x; | |
| 143 this_edgept->pos.y = y; | |
| 144 // Now deal with the src_outline steps. | |
| 145 C_OUTLINE *prev_ol = prev->src_outline; | |
| 146 if (prev_ol != nullptr && prev->next == next) { | |
| 147 // Compute the fraction of the segment that is being cut. | |
| 148 FCOORD segment_vec(next->pos.x - prev->pos.x, next->pos.y - prev->pos.y); | |
| 149 FCOORD target_vec(x - prev->pos.x, y - prev->pos.y); | |
| 150 double cut_fraction = target_vec.length() / segment_vec.length(); | |
| 151 // Get the start and end at the step level. | |
| 152 ICOORD step_start = prev_ol->position_at_index(prev->start_step); | |
| 153 int end_step = prev->start_step + prev->step_count; | |
| 154 int step_length = prev_ol->pathlength(); | |
| 155 ICOORD step_end = prev_ol->position_at_index(end_step % step_length); | |
| 156 ICOORD step_vec = step_end - step_start; | |
| 157 double target_length = step_vec.length() * cut_fraction; | |
| 158 // Find the point on the segment that gives the length nearest to target. | |
| 159 int best_step = prev->start_step; | |
| 160 ICOORD total_step(0, 0); | |
| 161 double best_dist = target_length; | |
| 162 for (int s = prev->start_step; s < end_step; ++s) { | |
| 163 total_step += prev_ol->step(s % step_length); | |
| 164 double dist = fabs(target_length - total_step.length()); | |
| 165 if (dist < best_dist) { | |
| 166 best_dist = dist; | |
| 167 best_step = s + 1; | |
| 168 } | |
| 169 } | |
| 170 // The new point is an intermediate point. | |
| 171 this_edgept->src_outline = prev_ol; | |
| 172 this_edgept->step_count = end_step - best_step; | |
| 173 this_edgept->start_step = best_step % step_length; | |
| 174 prev->step_count = best_step - prev->start_step; | |
| 175 } else { | |
| 176 // The new point is poly only. | |
| 177 this_edgept->src_outline = nullptr; | |
| 178 this_edgept->step_count = 0; | |
| 179 this_edgept->start_step = 0; | |
| 180 } | |
| 181 /* Hook it up */ | |
| 182 this_edgept->next = next; | |
| 183 this_edgept->prev = prev; | |
| 184 prev->next = this_edgept; | |
| 185 next->prev = this_edgept; | |
| 186 /* Set up vec entries */ | |
| 187 this_edgept->vec.x = this_edgept->next->pos.x - x; | |
| 188 this_edgept->vec.y = this_edgept->next->pos.y - y; | |
| 189 this_edgept->prev->vec.x = x - this_edgept->prev->pos.x; | |
| 190 this_edgept->prev->vec.y = y - this_edgept->prev->pos.y; | |
| 191 return this_edgept; | |
| 192 } | |
| 193 | |
| 194 /********************************************************************** | |
| 195 * remove_edgept | |
| 196 * | |
| 197 * Remove a given EDGEPT from its list and delete it. | |
| 198 **********************************************************************/ | |
| 199 void remove_edgept(EDGEPT *point) { | |
| 200 EDGEPT *prev = point->prev; | |
| 201 EDGEPT *next = point->next; | |
| 202 // Add point's steps onto prev's steps if they are from the same outline. | |
| 203 if (prev->src_outline == point->src_outline && prev->src_outline != nullptr) { | |
| 204 prev->step_count += point->step_count; | |
| 205 } | |
| 206 prev->next = next; | |
| 207 next->prev = prev; | |
| 208 prev->vec.x = next->pos.x - prev->pos.x; | |
| 209 prev->vec.y = next->pos.y - prev->pos.y; | |
| 210 delete point; | |
| 211 } | |
| 212 | |
| 213 /********************************************************************** | |
| 214 * Print | |
| 215 * | |
| 216 * Shows the coordinates of both points in a split. | |
| 217 **********************************************************************/ | |
| 218 void SPLIT::Print() const { | |
| 219 tprintf("(%d,%d)--(%d,%d)", point1->pos.x, point1->pos.y, point2->pos.x, point2->pos.y); | |
| 220 } | |
| 221 | |
| 222 #ifndef GRAPHICS_DISABLED | |
| 223 // Draws the split in the given window. | |
| 224 void SPLIT::Mark(ScrollView *window) const { | |
| 225 window->Pen(ScrollView::GREEN); | |
| 226 window->Line(point1->pos.x, point1->pos.y, point2->pos.x, point2->pos.y); | |
| 227 window->UpdateWindow(); | |
| 228 } | |
| 229 #endif | |
| 230 | |
| 231 // Creates two outlines out of one by splitting the original one in half. | |
| 232 // Inserts the resulting outlines into the given list. | |
| 233 void SPLIT::SplitOutlineList(TESSLINE *outlines) const { | |
| 234 SplitOutline(); | |
| 235 while (outlines->next != nullptr) { | |
| 236 outlines = outlines->next; | |
| 237 } | |
| 238 | |
| 239 outlines->next = new TESSLINE; | |
| 240 outlines->next->loop = point1; | |
| 241 outlines->next->ComputeBoundingBox(); | |
| 242 | |
| 243 outlines = outlines->next; | |
| 244 | |
| 245 outlines->next = new TESSLINE; | |
| 246 outlines->next->loop = point2; | |
| 247 outlines->next->ComputeBoundingBox(); | |
| 248 | |
| 249 outlines->next->next = nullptr; | |
| 250 } | |
| 251 | |
| 252 // Makes a split between these two edge points, but does not affect the | |
| 253 // outlines to which they belong. | |
| 254 void SPLIT::SplitOutline() const { | |
| 255 EDGEPT *temp2 = point2->next; | |
| 256 EDGEPT *temp1 = point1->next; | |
| 257 /* Create two new points */ | |
| 258 EDGEPT *new_point1 = make_edgept(point1->pos.x, point1->pos.y, temp1, point2); | |
| 259 EDGEPT *new_point2 = make_edgept(point2->pos.x, point2->pos.y, temp2, point1); | |
| 260 // point1 and 2 are now cross-over points, so they must have nullptr | |
| 261 // src_outlines and give their src_outline information their new | |
| 262 // replacements. | |
| 263 new_point1->src_outline = point1->src_outline; | |
| 264 new_point1->start_step = point1->start_step; | |
| 265 new_point1->step_count = point1->step_count; | |
| 266 new_point2->src_outline = point2->src_outline; | |
| 267 new_point2->start_step = point2->start_step; | |
| 268 new_point2->step_count = point2->step_count; | |
| 269 point1->src_outline = nullptr; | |
| 270 point1->start_step = 0; | |
| 271 point1->step_count = 0; | |
| 272 point2->src_outline = nullptr; | |
| 273 point2->start_step = 0; | |
| 274 point2->step_count = 0; | |
| 275 } | |
| 276 | |
| 277 // Undoes the effect of SplitOutlineList, correcting the outlines for undoing | |
| 278 // the split, but possibly leaving some duplicate outlines. | |
| 279 void SPLIT::UnsplitOutlineList(TBLOB *blob) const { | |
| 280 /* Modify edge points */ | |
| 281 UnsplitOutlines(); | |
| 282 | |
| 283 auto *outline1 = new TESSLINE; | |
| 284 outline1->next = blob->outlines; | |
| 285 blob->outlines = outline1; | |
| 286 outline1->loop = point1; | |
| 287 | |
| 288 auto *outline2 = new TESSLINE; | |
| 289 outline2->next = blob->outlines; | |
| 290 blob->outlines = outline2; | |
| 291 outline2->loop = point2; | |
| 292 } | |
| 293 | |
| 294 // Removes the split that was put between these two points. | |
| 295 void SPLIT::UnsplitOutlines() const { | |
| 296 EDGEPT *tmp1 = point1->next; | |
| 297 EDGEPT *tmp2 = point2->next; | |
| 298 | |
| 299 tmp1->next->prev = point2; | |
| 300 tmp2->next->prev = point1; | |
| 301 | |
| 302 // tmp2 is coincident with point1. point1 takes tmp2's place as tmp2 is | |
| 303 // deleted. | |
| 304 point1->next = tmp2->next; | |
| 305 point1->src_outline = tmp2->src_outline; | |
| 306 point1->start_step = tmp2->start_step; | |
| 307 point1->step_count = tmp2->step_count; | |
| 308 // Likewise point2 takes tmp1's place. | |
| 309 point2->next = tmp1->next; | |
| 310 point2->src_outline = tmp1->src_outline; | |
| 311 point2->start_step = tmp1->start_step; | |
| 312 point2->step_count = tmp1->step_count; | |
| 313 | |
| 314 delete tmp1; | |
| 315 delete tmp2; | |
| 316 | |
| 317 point1->vec.x = point1->next->pos.x - point1->pos.x; | |
| 318 point1->vec.y = point1->next->pos.y - point1->pos.y; | |
| 319 | |
| 320 point2->vec.x = point2->next->pos.x - point2->pos.x; | |
| 321 point2->vec.y = point2->next->pos.y - point2->pos.y; | |
| 322 } | |
| 323 | |
| 324 } // namespace tesseract |
