comparison mupdf-source/thirdparty/tesseract/src/classify/mfoutline.cpp @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 /******************************************************************************
2 ** Filename: mfoutline.c
3 ** Purpose: Interface to outline struct used for extracting features
4 ** Author: Dan Johnson
5 **
6 ** (c) Copyright Hewlett-Packard Company, 1988.
7 ** Licensed under the Apache License, Version 2.0 (the "License");
8 ** you may not use this file except in compliance with the License.
9 ** You may obtain a copy of the License at
10 ** http://www.apache.org/licenses/LICENSE-2.0
11 ** Unless required by applicable law or agreed to in writing, software
12 ** distributed under the License is distributed on an "AS IS" BASIS,
13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 ** See the License for the specific language governing permissions and
15 ** limitations under the License.
16 ******************************************************************************/
17
18 #include "mfoutline.h"
19
20 #include "blobs.h"
21 #include "classify.h"
22 #include "clusttool.h" //If remove you get caught in a loop somewhere
23 #include "mfx.h"
24 #include "params.h"
25
26 #include <cmath>
27 #include <cstdio>
28
29 namespace tesseract {
30
31 /*---------------------------------------------------------------------------*/
32 /** Convert a blob into a list of MFOUTLINEs (float-based microfeature format).
33 */
34 LIST ConvertBlob(TBLOB *blob) {
35 LIST outlines = NIL_LIST;
36 return (blob == nullptr) ? NIL_LIST : ConvertOutlines(blob->outlines, outlines, outer);
37 }
38
39 /*---------------------------------------------------------------------------*/
40 /** Convert a TESSLINE into the float-based MFOUTLINE micro-feature format. */
41 MFOUTLINE ConvertOutline(TESSLINE *outline) {
42 auto MFOutline = NIL_LIST;
43
44 if (outline == nullptr || outline->loop == nullptr) {
45 return MFOutline;
46 }
47
48 auto StartPoint = outline->loop;
49 auto EdgePoint = StartPoint;
50 do {
51 auto NextPoint = EdgePoint->next;
52
53 /* filter out duplicate points */
54 if (EdgePoint->pos.x != NextPoint->pos.x || EdgePoint->pos.y != NextPoint->pos.y) {
55 auto NewPoint = new MFEDGEPT;
56 NewPoint->ClearMark();
57 NewPoint->Hidden = EdgePoint->IsHidden();
58 NewPoint->Point.x = EdgePoint->pos.x;
59 NewPoint->Point.y = EdgePoint->pos.y;
60 MFOutline = push(MFOutline, NewPoint);
61 }
62 EdgePoint = NextPoint;
63 } while (EdgePoint != StartPoint);
64
65 if (MFOutline != nullptr) {
66 MakeOutlineCircular(MFOutline);
67 }
68 return MFOutline;
69 }
70
71 /*---------------------------------------------------------------------------*/
72 /**
73 * Convert a tree of outlines to a list of MFOUTLINEs (lists of MFEDGEPTs).
74 *
75 * @param outline first outline to be converted
76 * @param mf_outlines list to add converted outlines to
77 * @param outline_type are the outlines outer or holes?
78 */
79 LIST ConvertOutlines(TESSLINE *outline, LIST mf_outlines, OUTLINETYPE outline_type) {
80 MFOUTLINE mf_outline;
81
82 while (outline != nullptr) {
83 mf_outline = ConvertOutline(outline);
84 if (mf_outline != nullptr) {
85 mf_outlines = push(mf_outlines, mf_outline);
86 }
87 outline = outline->next;
88 }
89 return mf_outlines;
90 }
91
92 /*---------------------------------------------------------------------------*/
93 /**
94 * This routine searches through the specified outline, computes
95 * a slope for each vector in the outline, and marks each
96 * vector as having one of the following directions:
97 * N, S, E, W, NE, NW, SE, SW
98 * This information is then stored in the outline and the
99 * outline is returned.
100 * @param Outline micro-feature outline to analyze
101 * @param MinSlope controls "snapping" of segments to horizontal
102 * @param MaxSlope controls "snapping" of segments to vertical
103 */
104 void FindDirectionChanges(MFOUTLINE Outline, float MinSlope, float MaxSlope) {
105 MFEDGEPT *Current;
106 MFEDGEPT *Last;
107 MFOUTLINE EdgePoint;
108
109 if (DegenerateOutline(Outline)) {
110 return;
111 }
112
113 Last = PointAt(Outline);
114 Outline = NextPointAfter(Outline);
115 EdgePoint = Outline;
116 do {
117 Current = PointAt(EdgePoint);
118 ComputeDirection(Last, Current, MinSlope, MaxSlope);
119
120 Last = Current;
121 EdgePoint = NextPointAfter(EdgePoint);
122 } while (EdgePoint != Outline);
123
124 } /* FindDirectionChanges */
125
126 /*---------------------------------------------------------------------------*/
127 /**
128 * This routine deallocates all of the memory consumed by
129 * a micro-feature outline.
130 * @param arg micro-feature outline to be freed
131 */
132 void FreeMFOutline(void *arg) { // MFOUTLINE Outline)
133 auto Outline = static_cast<MFOUTLINE>(arg);
134
135 /* break the circular outline so we can use std. techniques to deallocate */
136 MFOUTLINE Start = Outline->list_rest();
137 set_rest(Outline, NIL_LIST);
138 while (Start != nullptr) {
139 delete reinterpret_cast<MFEDGEPT *>(Start->first_node());
140 Start = pop(Start);
141 }
142
143 } /* FreeMFOutline */
144
145 /*---------------------------------------------------------------------------*/
146 /**
147 * Release all memory consumed by the specified list
148 * of outlines.
149 * @param Outlines list of mf-outlines to be freed
150 */
151 void FreeOutlines(LIST Outlines) {
152 destroy_nodes(Outlines, FreeMFOutline);
153 } /* FreeOutlines */
154
155 /*---------------------------------------------------------------------------*/
156 /**
157 * This routine searches through the specified outline and finds
158 * the points at which the outline changes direction. These
159 * points are then marked as "extremities". This routine is
160 * used as an alternative to FindExtremities(). It forces the
161 * endpoints of the microfeatures to be at the direction
162 * changes rather than at the midpoint between direction
163 * changes.
164 * @param Outline micro-feature outline to analyze
165 */
166 void MarkDirectionChanges(MFOUTLINE Outline) {
167 MFOUTLINE Current;
168 MFOUTLINE Last;
169 MFOUTLINE First;
170
171 if (DegenerateOutline(Outline)) {
172 return;
173 }
174
175 First = NextDirectionChange(Outline);
176 Last = First;
177 do {
178 Current = NextDirectionChange(Last);
179 PointAt(Current)->MarkPoint();
180 Last = Current;
181 } while (Last != First);
182
183 } /* MarkDirectionChanges */
184
185 /*---------------------------------------------------------------------------*/
186 /**
187 * This routine returns the next point in the micro-feature
188 * outline that is an extremity. The search starts after
189 * EdgePoint. The routine assumes that the outline being
190 * searched is not a degenerate outline (i.e. it must have
191 * 2 or more edge points).
192 * @param EdgePoint start search from this point
193 * @return Next extremity in the outline after EdgePoint.
194 * @note Globals: none
195 */
196 MFOUTLINE NextExtremity(MFOUTLINE EdgePoint) {
197 EdgePoint = NextPointAfter(EdgePoint);
198 while (!PointAt(EdgePoint)->ExtremityMark) {
199 EdgePoint = NextPointAfter(EdgePoint);
200 }
201
202 return (EdgePoint);
203
204 } /* NextExtremity */
205
206 /*---------------------------------------------------------------------------*/
207 /**
208 * This routine normalizes the coordinates of the specified
209 * outline so that the outline is deskewed down to the
210 * baseline, translated so that x=0 is at XOrigin, and scaled
211 * so that the height of a character cell from descender to
212 * ascender is 1. Of this height, 0.25 is for the descender,
213 * 0.25 for the ascender, and 0.5 for the x-height. The
214 * y coordinate of the baseline is 0.
215 * @param Outline outline to be normalized
216 * @param XOrigin x-origin of text
217 */
218 void NormalizeOutline(MFOUTLINE Outline, float XOrigin) {
219 if (Outline == NIL_LIST) {
220 return;
221 }
222
223 MFOUTLINE EdgePoint = Outline;
224 do {
225 MFEDGEPT *Current = PointAt(EdgePoint);
226 Current->Point.y = MF_SCALE_FACTOR * (Current->Point.y - kBlnBaselineOffset);
227 Current->Point.x = MF_SCALE_FACTOR * (Current->Point.x - XOrigin);
228 EdgePoint = NextPointAfter(EdgePoint);
229 } while (EdgePoint != Outline);
230 } /* NormalizeOutline */
231
232 /*---------------------------------------------------------------------------*/
233 /**
234 * This routine normalizes every outline in Outlines
235 * according to the currently selected normalization method.
236 * It also returns the scale factors that it used to do this
237 * scaling. The scale factors returned represent the x and
238 * y sizes in the normalized coordinate system that correspond
239 * to 1 pixel in the original coordinate system.
240 * Outlines are changed and XScale and YScale are updated.
241 *
242 * Globals:
243 * - classify_norm_method method being used for normalization
244 * - classify_char_norm_range map radius of gyration to this value
245 * @param Outlines list of outlines to be normalized
246 * @param XScale x-direction scale factor used by routine
247 * @param YScale y-direction scale factor used by routine
248 */
249 void Classify::NormalizeOutlines(LIST Outlines, float *XScale, float *YScale) {
250 MFOUTLINE Outline;
251
252 switch (classify_norm_method) {
253 case character:
254 ASSERT_HOST(!"How did NormalizeOutlines get called in character mode?");
255 break;
256
257 case baseline:
258 iterate(Outlines) {
259 Outline = static_cast<MFOUTLINE>(Outlines->first_node());
260 NormalizeOutline(Outline, 0.0);
261 }
262 *XScale = *YScale = MF_SCALE_FACTOR;
263 break;
264 }
265 } /* NormalizeOutlines */
266
267 /*----------------------------------------------------------------------------
268 Private Code
269 ----------------------------------------------------------------------------*/
270 /**
271 * Change the direction of every vector in the specified
272 * outline segment to Direction. The segment to be changed
273 * starts at Start and ends at End. Note that the previous
274 * direction of End must also be changed to reflect the
275 * change in direction of the point before it.
276 * @param Start defines start of segment of outline to be modified
277 * @param End defines end of segment of outline to be modified
278 * @param Direction new direction to assign to segment
279 */
280 void ChangeDirection(MFOUTLINE Start, MFOUTLINE End, DIRECTION Direction) {
281 MFOUTLINE Current;
282
283 for (Current = Start; Current != End; Current = NextPointAfter(Current)) {
284 PointAt(Current)->Direction = Direction;
285 }
286
287 PointAt(End)->PreviousDirection = Direction;
288
289 } /* ChangeDirection */
290
291 /**
292 * This routine normalizes each point in Outline by
293 * translating it to the specified center and scaling it
294 * anisotropically according to the given scale factors.
295 * @param Outline outline to be character normalized
296 * @param cn_denorm
297 */
298 void CharNormalizeOutline(MFOUTLINE Outline, const DENORM &cn_denorm) {
299 MFOUTLINE First, Current;
300 MFEDGEPT *CurrentPoint;
301
302 if (Outline == NIL_LIST) {
303 return;
304 }
305
306 First = Outline;
307 Current = First;
308 do {
309 CurrentPoint = PointAt(Current);
310 FCOORD pos(CurrentPoint->Point.x, CurrentPoint->Point.y);
311 cn_denorm.LocalNormTransform(pos, &pos);
312 CurrentPoint->Point.x = (pos.x() - UINT8_MAX / 2) * MF_SCALE_FACTOR;
313 CurrentPoint->Point.y = (pos.y() - UINT8_MAX / 2) * MF_SCALE_FACTOR;
314
315 Current = NextPointAfter(Current);
316 } while (Current != First);
317
318 } /* CharNormalizeOutline */
319
320 /**
321 * This routine computes the slope from Start to Finish and
322 * and then computes the approximate direction of the line
323 * segment from Start to Finish. The direction is quantized
324 * into 8 buckets:
325 * N, S, E, W, NE, NW, SE, SW
326 * Both the slope and the direction are then stored into
327 * the appropriate fields of the Start edge point. The
328 * direction is also stored into the PreviousDirection field
329 * of the Finish edge point.
330 * @param Start starting point to compute direction from
331 * @param Finish finishing point to compute direction to
332 * @param MinSlope slope below which lines are horizontal
333 * @param MaxSlope slope above which lines are vertical
334 */
335 void ComputeDirection(MFEDGEPT *Start, MFEDGEPT *Finish, float MinSlope, float MaxSlope) {
336 FVECTOR Delta;
337
338 Delta.x = Finish->Point.x - Start->Point.x;
339 Delta.y = Finish->Point.y - Start->Point.y;
340 if (Delta.x == 0) {
341 if (Delta.y < 0) {
342 Start->Slope = -FLT_MAX;
343 Start->Direction = south;
344 } else {
345 Start->Slope = FLT_MAX;
346 Start->Direction = north;
347 }
348 } else {
349 Start->Slope = Delta.y / Delta.x;
350 if (Delta.x > 0) {
351 if (Delta.y > 0) {
352 if (Start->Slope > MinSlope) {
353 if (Start->Slope < MaxSlope) {
354 Start->Direction = northeast;
355 } else {
356 Start->Direction = north;
357 }
358 } else {
359 Start->Direction = east;
360 }
361 } else if (Start->Slope < -MinSlope) {
362 if (Start->Slope > -MaxSlope) {
363 Start->Direction = southeast;
364 } else {
365 Start->Direction = south;
366 }
367 } else {
368 Start->Direction = east;
369 }
370 } else if (Delta.y > 0) {
371 if (Start->Slope < -MinSlope) {
372 if (Start->Slope > -MaxSlope) {
373 Start->Direction = northwest;
374 } else {
375 Start->Direction = north;
376 }
377 } else {
378 Start->Direction = west;
379 }
380 } else if (Start->Slope > MinSlope) {
381 if (Start->Slope < MaxSlope) {
382 Start->Direction = southwest;
383 } else {
384 Start->Direction = south;
385 }
386 } else {
387 Start->Direction = west;
388 }
389 }
390 Finish->PreviousDirection = Start->Direction;
391 }
392
393 /**
394 * This routine returns the next point in the micro-feature
395 * outline that has a direction different than EdgePoint. The
396 * routine assumes that the outline being searched is not a
397 * degenerate outline (i.e. it must have 2 or more edge points).
398 * @param EdgePoint start search from this point
399 * @return Point of next direction change in micro-feature outline.
400 * @note Globals: none
401 */
402 MFOUTLINE NextDirectionChange(MFOUTLINE EdgePoint) {
403 DIRECTION InitialDirection;
404
405 InitialDirection = PointAt(EdgePoint)->Direction;
406
407 MFOUTLINE next_pt = nullptr;
408 do {
409 EdgePoint = NextPointAfter(EdgePoint);
410 next_pt = NextPointAfter(EdgePoint);
411 } while (PointAt(EdgePoint)->Direction == InitialDirection && !PointAt(EdgePoint)->Hidden &&
412 next_pt != nullptr && !PointAt(next_pt)->Hidden);
413
414 return (EdgePoint);
415 }
416
417 } // namespace tesseract