comparison mupdf-source/thirdparty/tesseract/src/wordrec/chop.cpp @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 /******************************************************************************
2 *
3 * File: chop.cpp (Formerly chop.c)
4 * Author: Mark Seaman, OCR Technology
5 *
6 * (c) Copyright 1987, Hewlett-Packard Company.
7 ** Licensed under the Apache License, Version 2.0 (the "License");
8 ** you may not use this file except in compliance with the License.
9 ** You may obtain a copy of the License at
10 ** http://www.apache.org/licenses/LICENSE-2.0
11 ** Unless required by applicable law or agreed to in writing, software
12 ** distributed under the License is distributed on an "AS IS" BASIS,
13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 ** See the License for the specific language governing permissions and
15 ** limitations under the License.
16 *
17 *****************************************************************************/
18
19 /*----------------------------------------------------------------------
20 I n c l u d e s
21 ----------------------------------------------------------------------*/
22
23 #define _USE_MATH_DEFINES // for M_PI
24 #include "chop.h"
25 #include <cmath> // for M_PI
26 #include "outlines.h"
27 #include "plotedges.h"
28 #include "wordrec.h"
29
30 // Include automatically generated configuration file if running autoconf.
31 #ifdef HAVE_CONFIG_H
32 # include "config_auto.h"
33 #endif
34
35 namespace tesseract {
36
37 // Show if the line is going in the positive or negative X direction.
38 static int direction(const EDGEPT *point) {
39 //* direction to return
40 int dir = 0;
41 //* prev point
42 const EDGEPT *prev = point->prev;
43 //* next point
44 const EDGEPT *next = point->next;
45
46 if (((prev->pos.x <= point->pos.x) && (point->pos.x < next->pos.x)) ||
47 ((prev->pos.x < point->pos.x) && (point->pos.x <= next->pos.x))) {
48 dir = 1;
49 }
50 if (((prev->pos.x >= point->pos.x) && (point->pos.x > next->pos.x)) ||
51 ((prev->pos.x > point->pos.x) && (point->pos.x >= next->pos.x))) {
52 dir = -1;
53 }
54
55 return dir;
56 }
57
58 /**
59 * @name point_priority
60 *
61 * Assign a priority to and edge point that might be used as part of a
62 * split. The argument should be of type EDGEPT.
63 */
64 PRIORITY Wordrec::point_priority(EDGEPT *point) {
65 return static_cast<PRIORITY>(angle_change(point->prev, point, point->next));
66 }
67
68 /**
69 * @name add_point_to_list
70 *
71 * Add an edge point to a POINT_GROUP containing a list of other points.
72 */
73 void Wordrec::add_point_to_list(PointHeap *point_heap, EDGEPT *point) {
74 if (point_heap->size() < MAX_NUM_POINTS - 2) {
75 PointPair pair(point_priority(point), point);
76 point_heap->Push(&pair);
77 }
78
79 #ifndef GRAPHICS_DISABLED
80 if (chop_debug > 2) {
81 mark_outline(point);
82 }
83 #endif
84 }
85
86 // Returns true if the edgept supplied as input is an inside angle. This
87 // is determined by the angular change of the vectors from point to point.
88 bool Wordrec::is_inside_angle(EDGEPT *pt) {
89 return angle_change(pt->prev, pt, pt->next) < chop_inside_angle;
90 }
91
92 /**
93 * @name angle_change
94 *
95 * Return the change in angle (degrees) of the line segments between
96 * points one and two, and two and three.
97 */
98 int Wordrec::angle_change(EDGEPT *point1, EDGEPT *point2, EDGEPT *point3) {
99 VECTOR vector1;
100 VECTOR vector2;
101
102 int angle;
103
104 /* Compute angle */
105 vector1.x = point2->pos.x - point1->pos.x;
106 vector1.y = point2->pos.y - point1->pos.y;
107 vector2.x = point3->pos.x - point2->pos.x;
108 vector2.y = point3->pos.y - point2->pos.y;
109 /* Use cross product */
110 float length = std::sqrt(static_cast<float>(vector1.length2()) * vector2.length2());
111 if (static_cast<int>(length) == 0) {
112 return (0);
113 }
114 auto f = vector1.cross(vector2) / length;
115 // Avoid FP exception in std::asin caused by illegal values of f
116 // (caused by rounding errors).
117 if (f <= -1.0f) {
118 angle = -90;
119 } else if (f >= 1.0f) {
120 angle = 90;
121 } else {
122 angle = static_cast<int>(floor(std::asin(f) / M_PI * 180.0 + 0.5));
123 // Use dot product.
124 if (vector1.dot(vector2) < 0) {
125 angle = 180 - angle;
126 }
127 // Adjust angle.
128 if (angle > 180) {
129 angle -= 360;
130 } else if (angle <= -180) {
131 angle += 360;
132 }
133 }
134 return angle;
135 }
136
137 /**
138 * @name pick_close_point
139 *
140 * Choose the edge point that is closest to the critical point. This
141 * point may not be exactly vertical from the critical point.
142 */
143 EDGEPT *Wordrec::pick_close_point(EDGEPT *critical_point, EDGEPT *vertical_point, int *best_dist) {
144 EDGEPT *best_point = nullptr;
145 int this_distance;
146 bool found_better;
147
148 do {
149 found_better = false;
150
151 this_distance = edgept_dist(critical_point, vertical_point);
152 if (this_distance <= *best_dist) {
153 if (!(same_point(critical_point->pos, vertical_point->pos) ||
154 same_point(critical_point->pos, vertical_point->next->pos) ||
155 (best_point && same_point(best_point->pos, vertical_point->pos)) ||
156 is_exterior_point(critical_point, vertical_point))) {
157 *best_dist = this_distance;
158 best_point = vertical_point;
159 if (chop_vertical_creep) {
160 found_better = true;
161 }
162 }
163 }
164 vertical_point = vertical_point->next;
165 } while (found_better == true);
166
167 return (best_point);
168 }
169
170 /**
171 * @name prioritize_points
172 *
173 * Find a list of edge points from the outer outline of this blob. For
174 * each of these points assign a priority. Sort these points using a
175 * heap structure so that they can be visited in order.
176 */
177 void Wordrec::prioritize_points(TESSLINE *outline, PointHeap *points) {
178 EDGEPT *this_point;
179 EDGEPT *local_min = nullptr;
180 EDGEPT *local_max = nullptr;
181
182 this_point = outline->loop;
183 local_min = this_point;
184 local_max = this_point;
185 do {
186 if (this_point->vec.y < 0) {
187 /* Look for minima */
188 if (local_max != nullptr) {
189 new_max_point(local_max, points);
190 } else if (is_inside_angle(this_point)) {
191 add_point_to_list(points, this_point);
192 }
193 local_max = nullptr;
194 local_min = this_point->next;
195 } else if (this_point->vec.y > 0) {
196 /* Look for maxima */
197 if (local_min != nullptr) {
198 new_min_point(local_min, points);
199 } else if (is_inside_angle(this_point)) {
200 add_point_to_list(points, this_point);
201 }
202 local_min = nullptr;
203 local_max = this_point->next;
204 } else {
205 /* Flat area */
206 if (local_max != nullptr) {
207 if (local_max->prev->vec.y != 0) {
208 new_max_point(local_max, points);
209 }
210 local_max = this_point->next;
211 local_min = nullptr;
212 } else {
213 if (local_min->prev->vec.y != 0) {
214 new_min_point(local_min, points);
215 }
216 local_min = this_point->next;
217 local_max = nullptr;
218 }
219 }
220
221 /* Next point */
222 this_point = this_point->next;
223 } while (this_point != outline->loop);
224 }
225
226 /**
227 * @name new_min_point
228 *
229 * Found a new minimum point try to decide whether to save it or not.
230 * Return the new value for the local minimum. If a point is saved then
231 * the local minimum is reset to nullptr.
232 */
233 void Wordrec::new_min_point(EDGEPT *local_min, PointHeap *points) {
234 int16_t dir;
235
236 dir = direction(local_min);
237
238 if (dir < 0) {
239 add_point_to_list(points, local_min);
240 return;
241 }
242
243 if (dir == 0 && point_priority(local_min) < 0) {
244 add_point_to_list(points, local_min);
245 return;
246 }
247 }
248
249 /**
250 * @name new_max_point
251 *
252 * Found a new minimum point try to decide whether to save it or not.
253 * Return the new value for the local minimum. If a point is saved then
254 * the local minimum is reset to nullptr.
255 */
256 void Wordrec::new_max_point(EDGEPT *local_max, PointHeap *points) {
257 int16_t dir;
258
259 dir = direction(local_max);
260
261 if (dir > 0) {
262 add_point_to_list(points, local_max);
263 return;
264 }
265
266 if (dir == 0 && point_priority(local_max) < 0) {
267 add_point_to_list(points, local_max);
268 return;
269 }
270 }
271
272 /**
273 * @name vertical_projection_point
274 *
275 * For one point on the outline, find the corresponding point on the
276 * other side of the outline that is a likely projection for a split
277 * point. This is done by iterating through the edge points until the
278 * X value of the point being looked at is greater than the X value of
279 * the split point. Ensure that the point being returned is not right
280 * next to the split point. Return the edge point in *best_point as
281 * a result, and any points that were newly created are also saved on
282 * the new_points list.
283 */
284 void Wordrec::vertical_projection_point(EDGEPT *split_point, EDGEPT *target_point,
285 EDGEPT **best_point, EDGEPT_CLIST *new_points) {
286 EDGEPT *p; /* Iterator */
287 EDGEPT *this_edgept; /* Iterator */
288 EDGEPT_C_IT new_point_it(new_points);
289 int x = split_point->pos.x; /* X value of vertical */
290 int best_dist = LARGE_DISTANCE; /* Best point found */
291
292 if (*best_point != nullptr) {
293 best_dist = edgept_dist(split_point, *best_point);
294 }
295
296 p = target_point;
297 /* Look at each edge point */
298 do {
299 if (((p->pos.x <= x && x <= p->next->pos.x) || (p->next->pos.x <= x && x <= p->pos.x)) &&
300 !same_point(split_point->pos, p->pos) && !same_point(split_point->pos, p->next->pos) &&
301 !p->IsChopPt() && (*best_point == nullptr || !same_point((*best_point)->pos, p->pos))) {
302 if (near_point(split_point, p, p->next, &this_edgept)) {
303 new_point_it.add_before_then_move(this_edgept);
304 }
305
306 if (*best_point == nullptr) {
307 best_dist = edgept_dist(split_point, this_edgept);
308 }
309
310 this_edgept = pick_close_point(split_point, this_edgept, &best_dist);
311 if (this_edgept) {
312 *best_point = this_edgept;
313 }
314 }
315
316 p = p->next;
317 } while (p != target_point);
318 }
319
320 } // namespace tesseract