Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/tesseract/src/wordrec/chop.cpp @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /****************************************************************************** | |
| 2 * | |
| 3 * File: chop.cpp (Formerly chop.c) | |
| 4 * Author: Mark Seaman, OCR Technology | |
| 5 * | |
| 6 * (c) Copyright 1987, Hewlett-Packard Company. | |
| 7 ** Licensed under the Apache License, Version 2.0 (the "License"); | |
| 8 ** you may not use this file except in compliance with the License. | |
| 9 ** You may obtain a copy of the License at | |
| 10 ** http://www.apache.org/licenses/LICENSE-2.0 | |
| 11 ** Unless required by applicable law or agreed to in writing, software | |
| 12 ** distributed under the License is distributed on an "AS IS" BASIS, | |
| 13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 14 ** See the License for the specific language governing permissions and | |
| 15 ** limitations under the License. | |
| 16 * | |
| 17 *****************************************************************************/ | |
| 18 | |
| 19 /*---------------------------------------------------------------------- | |
| 20 I n c l u d e s | |
| 21 ----------------------------------------------------------------------*/ | |
| 22 | |
| 23 #define _USE_MATH_DEFINES // for M_PI | |
| 24 #include "chop.h" | |
| 25 #include <cmath> // for M_PI | |
| 26 #include "outlines.h" | |
| 27 #include "plotedges.h" | |
| 28 #include "wordrec.h" | |
| 29 | |
| 30 // Include automatically generated configuration file if running autoconf. | |
| 31 #ifdef HAVE_CONFIG_H | |
| 32 # include "config_auto.h" | |
| 33 #endif | |
| 34 | |
| 35 namespace tesseract { | |
| 36 | |
| 37 // Show if the line is going in the positive or negative X direction. | |
| 38 static int direction(const EDGEPT *point) { | |
| 39 //* direction to return | |
| 40 int dir = 0; | |
| 41 //* prev point | |
| 42 const EDGEPT *prev = point->prev; | |
| 43 //* next point | |
| 44 const EDGEPT *next = point->next; | |
| 45 | |
| 46 if (((prev->pos.x <= point->pos.x) && (point->pos.x < next->pos.x)) || | |
| 47 ((prev->pos.x < point->pos.x) && (point->pos.x <= next->pos.x))) { | |
| 48 dir = 1; | |
| 49 } | |
| 50 if (((prev->pos.x >= point->pos.x) && (point->pos.x > next->pos.x)) || | |
| 51 ((prev->pos.x > point->pos.x) && (point->pos.x >= next->pos.x))) { | |
| 52 dir = -1; | |
| 53 } | |
| 54 | |
| 55 return dir; | |
| 56 } | |
| 57 | |
| 58 /** | |
| 59 * @name point_priority | |
| 60 * | |
| 61 * Assign a priority to and edge point that might be used as part of a | |
| 62 * split. The argument should be of type EDGEPT. | |
| 63 */ | |
| 64 PRIORITY Wordrec::point_priority(EDGEPT *point) { | |
| 65 return static_cast<PRIORITY>(angle_change(point->prev, point, point->next)); | |
| 66 } | |
| 67 | |
| 68 /** | |
| 69 * @name add_point_to_list | |
| 70 * | |
| 71 * Add an edge point to a POINT_GROUP containing a list of other points. | |
| 72 */ | |
| 73 void Wordrec::add_point_to_list(PointHeap *point_heap, EDGEPT *point) { | |
| 74 if (point_heap->size() < MAX_NUM_POINTS - 2) { | |
| 75 PointPair pair(point_priority(point), point); | |
| 76 point_heap->Push(&pair); | |
| 77 } | |
| 78 | |
| 79 #ifndef GRAPHICS_DISABLED | |
| 80 if (chop_debug > 2) { | |
| 81 mark_outline(point); | |
| 82 } | |
| 83 #endif | |
| 84 } | |
| 85 | |
| 86 // Returns true if the edgept supplied as input is an inside angle. This | |
| 87 // is determined by the angular change of the vectors from point to point. | |
| 88 bool Wordrec::is_inside_angle(EDGEPT *pt) { | |
| 89 return angle_change(pt->prev, pt, pt->next) < chop_inside_angle; | |
| 90 } | |
| 91 | |
| 92 /** | |
| 93 * @name angle_change | |
| 94 * | |
| 95 * Return the change in angle (degrees) of the line segments between | |
| 96 * points one and two, and two and three. | |
| 97 */ | |
| 98 int Wordrec::angle_change(EDGEPT *point1, EDGEPT *point2, EDGEPT *point3) { | |
| 99 VECTOR vector1; | |
| 100 VECTOR vector2; | |
| 101 | |
| 102 int angle; | |
| 103 | |
| 104 /* Compute angle */ | |
| 105 vector1.x = point2->pos.x - point1->pos.x; | |
| 106 vector1.y = point2->pos.y - point1->pos.y; | |
| 107 vector2.x = point3->pos.x - point2->pos.x; | |
| 108 vector2.y = point3->pos.y - point2->pos.y; | |
| 109 /* Use cross product */ | |
| 110 float length = std::sqrt(static_cast<float>(vector1.length2()) * vector2.length2()); | |
| 111 if (static_cast<int>(length) == 0) { | |
| 112 return (0); | |
| 113 } | |
| 114 auto f = vector1.cross(vector2) / length; | |
| 115 // Avoid FP exception in std::asin caused by illegal values of f | |
| 116 // (caused by rounding errors). | |
| 117 if (f <= -1.0f) { | |
| 118 angle = -90; | |
| 119 } else if (f >= 1.0f) { | |
| 120 angle = 90; | |
| 121 } else { | |
| 122 angle = static_cast<int>(floor(std::asin(f) / M_PI * 180.0 + 0.5)); | |
| 123 // Use dot product. | |
| 124 if (vector1.dot(vector2) < 0) { | |
| 125 angle = 180 - angle; | |
| 126 } | |
| 127 // Adjust angle. | |
| 128 if (angle > 180) { | |
| 129 angle -= 360; | |
| 130 } else if (angle <= -180) { | |
| 131 angle += 360; | |
| 132 } | |
| 133 } | |
| 134 return angle; | |
| 135 } | |
| 136 | |
| 137 /** | |
| 138 * @name pick_close_point | |
| 139 * | |
| 140 * Choose the edge point that is closest to the critical point. This | |
| 141 * point may not be exactly vertical from the critical point. | |
| 142 */ | |
| 143 EDGEPT *Wordrec::pick_close_point(EDGEPT *critical_point, EDGEPT *vertical_point, int *best_dist) { | |
| 144 EDGEPT *best_point = nullptr; | |
| 145 int this_distance; | |
| 146 bool found_better; | |
| 147 | |
| 148 do { | |
| 149 found_better = false; | |
| 150 | |
| 151 this_distance = edgept_dist(critical_point, vertical_point); | |
| 152 if (this_distance <= *best_dist) { | |
| 153 if (!(same_point(critical_point->pos, vertical_point->pos) || | |
| 154 same_point(critical_point->pos, vertical_point->next->pos) || | |
| 155 (best_point && same_point(best_point->pos, vertical_point->pos)) || | |
| 156 is_exterior_point(critical_point, vertical_point))) { | |
| 157 *best_dist = this_distance; | |
| 158 best_point = vertical_point; | |
| 159 if (chop_vertical_creep) { | |
| 160 found_better = true; | |
| 161 } | |
| 162 } | |
| 163 } | |
| 164 vertical_point = vertical_point->next; | |
| 165 } while (found_better == true); | |
| 166 | |
| 167 return (best_point); | |
| 168 } | |
| 169 | |
| 170 /** | |
| 171 * @name prioritize_points | |
| 172 * | |
| 173 * Find a list of edge points from the outer outline of this blob. For | |
| 174 * each of these points assign a priority. Sort these points using a | |
| 175 * heap structure so that they can be visited in order. | |
| 176 */ | |
| 177 void Wordrec::prioritize_points(TESSLINE *outline, PointHeap *points) { | |
| 178 EDGEPT *this_point; | |
| 179 EDGEPT *local_min = nullptr; | |
| 180 EDGEPT *local_max = nullptr; | |
| 181 | |
| 182 this_point = outline->loop; | |
| 183 local_min = this_point; | |
| 184 local_max = this_point; | |
| 185 do { | |
| 186 if (this_point->vec.y < 0) { | |
| 187 /* Look for minima */ | |
| 188 if (local_max != nullptr) { | |
| 189 new_max_point(local_max, points); | |
| 190 } else if (is_inside_angle(this_point)) { | |
| 191 add_point_to_list(points, this_point); | |
| 192 } | |
| 193 local_max = nullptr; | |
| 194 local_min = this_point->next; | |
| 195 } else if (this_point->vec.y > 0) { | |
| 196 /* Look for maxima */ | |
| 197 if (local_min != nullptr) { | |
| 198 new_min_point(local_min, points); | |
| 199 } else if (is_inside_angle(this_point)) { | |
| 200 add_point_to_list(points, this_point); | |
| 201 } | |
| 202 local_min = nullptr; | |
| 203 local_max = this_point->next; | |
| 204 } else { | |
| 205 /* Flat area */ | |
| 206 if (local_max != nullptr) { | |
| 207 if (local_max->prev->vec.y != 0) { | |
| 208 new_max_point(local_max, points); | |
| 209 } | |
| 210 local_max = this_point->next; | |
| 211 local_min = nullptr; | |
| 212 } else { | |
| 213 if (local_min->prev->vec.y != 0) { | |
| 214 new_min_point(local_min, points); | |
| 215 } | |
| 216 local_min = this_point->next; | |
| 217 local_max = nullptr; | |
| 218 } | |
| 219 } | |
| 220 | |
| 221 /* Next point */ | |
| 222 this_point = this_point->next; | |
| 223 } while (this_point != outline->loop); | |
| 224 } | |
| 225 | |
| 226 /** | |
| 227 * @name new_min_point | |
| 228 * | |
| 229 * Found a new minimum point try to decide whether to save it or not. | |
| 230 * Return the new value for the local minimum. If a point is saved then | |
| 231 * the local minimum is reset to nullptr. | |
| 232 */ | |
| 233 void Wordrec::new_min_point(EDGEPT *local_min, PointHeap *points) { | |
| 234 int16_t dir; | |
| 235 | |
| 236 dir = direction(local_min); | |
| 237 | |
| 238 if (dir < 0) { | |
| 239 add_point_to_list(points, local_min); | |
| 240 return; | |
| 241 } | |
| 242 | |
| 243 if (dir == 0 && point_priority(local_min) < 0) { | |
| 244 add_point_to_list(points, local_min); | |
| 245 return; | |
| 246 } | |
| 247 } | |
| 248 | |
| 249 /** | |
| 250 * @name new_max_point | |
| 251 * | |
| 252 * Found a new minimum point try to decide whether to save it or not. | |
| 253 * Return the new value for the local minimum. If a point is saved then | |
| 254 * the local minimum is reset to nullptr. | |
| 255 */ | |
| 256 void Wordrec::new_max_point(EDGEPT *local_max, PointHeap *points) { | |
| 257 int16_t dir; | |
| 258 | |
| 259 dir = direction(local_max); | |
| 260 | |
| 261 if (dir > 0) { | |
| 262 add_point_to_list(points, local_max); | |
| 263 return; | |
| 264 } | |
| 265 | |
| 266 if (dir == 0 && point_priority(local_max) < 0) { | |
| 267 add_point_to_list(points, local_max); | |
| 268 return; | |
| 269 } | |
| 270 } | |
| 271 | |
| 272 /** | |
| 273 * @name vertical_projection_point | |
| 274 * | |
| 275 * For one point on the outline, find the corresponding point on the | |
| 276 * other side of the outline that is a likely projection for a split | |
| 277 * point. This is done by iterating through the edge points until the | |
| 278 * X value of the point being looked at is greater than the X value of | |
| 279 * the split point. Ensure that the point being returned is not right | |
| 280 * next to the split point. Return the edge point in *best_point as | |
| 281 * a result, and any points that were newly created are also saved on | |
| 282 * the new_points list. | |
| 283 */ | |
| 284 void Wordrec::vertical_projection_point(EDGEPT *split_point, EDGEPT *target_point, | |
| 285 EDGEPT **best_point, EDGEPT_CLIST *new_points) { | |
| 286 EDGEPT *p; /* Iterator */ | |
| 287 EDGEPT *this_edgept; /* Iterator */ | |
| 288 EDGEPT_C_IT new_point_it(new_points); | |
| 289 int x = split_point->pos.x; /* X value of vertical */ | |
| 290 int best_dist = LARGE_DISTANCE; /* Best point found */ | |
| 291 | |
| 292 if (*best_point != nullptr) { | |
| 293 best_dist = edgept_dist(split_point, *best_point); | |
| 294 } | |
| 295 | |
| 296 p = target_point; | |
| 297 /* Look at each edge point */ | |
| 298 do { | |
| 299 if (((p->pos.x <= x && x <= p->next->pos.x) || (p->next->pos.x <= x && x <= p->pos.x)) && | |
| 300 !same_point(split_point->pos, p->pos) && !same_point(split_point->pos, p->next->pos) && | |
| 301 !p->IsChopPt() && (*best_point == nullptr || !same_point((*best_point)->pos, p->pos))) { | |
| 302 if (near_point(split_point, p, p->next, &this_edgept)) { | |
| 303 new_point_it.add_before_then_move(this_edgept); | |
| 304 } | |
| 305 | |
| 306 if (*best_point == nullptr) { | |
| 307 best_dist = edgept_dist(split_point, this_edgept); | |
| 308 } | |
| 309 | |
| 310 this_edgept = pick_close_point(split_point, this_edgept, &best_dist); | |
| 311 if (this_edgept) { | |
| 312 *best_point = this_edgept; | |
| 313 } | |
| 314 } | |
| 315 | |
| 316 p = p->next; | |
| 317 } while (p != target_point); | |
| 318 } | |
| 319 | |
| 320 } // namespace tesseract |
