comparison mupdf-source/thirdparty/tesseract/src/classify/outfeat.cpp @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 /******************************************************************************
2 ** Filename: outfeat.c
3 ** Purpose: Definition of outline-features.
4 ** Author: Dan Johnson
5 **
6 ** (c) Copyright Hewlett-Packard Company, 1988.
7 ** Licensed under the Apache License, Version 2.0 (the "License");
8 ** you may not use this file except in compliance with the License.
9 ** You may obtain a copy of the License at
10 ** http://www.apache.org/licenses/LICENSE-2.0
11 ** Unless required by applicable law or agreed to in writing, software
12 ** distributed under the License is distributed on an "AS IS" BASIS,
13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 ** See the License for the specific language governing permissions and
15 ** limitations under the License.
16 ******************************************************************************/
17
18 #include "outfeat.h"
19
20 #include "classify.h"
21 #include "featdefs.h"
22 #include "mfoutline.h"
23 #include "ocrfeatures.h"
24
25 #include <cstdio>
26
27 namespace tesseract {
28
29 /*----------------------------------------------------------------------------
30 Public Code
31 ----------------------------------------------------------------------------*/
32
33 /**
34 * Convert each segment in the outline to a feature
35 * and return the features.
36 * @param Blob blob to extract pico-features from
37 * @return Outline-features for Blob.
38 * @note Globals: none
39 */
40 FEATURE_SET Classify::ExtractOutlineFeatures(TBLOB *Blob) {
41 auto FeatureSet = new FEATURE_SET_STRUCT(MAX_OUTLINE_FEATURES);
42 if (Blob == nullptr) {
43 return (FeatureSet);
44 }
45
46 auto Outlines = ConvertBlob(Blob);
47
48 float XScale, YScale;
49 NormalizeOutlines(Outlines, &XScale, &YScale);
50 auto RemainingOutlines = Outlines;
51 iterate(RemainingOutlines) {
52 auto Outline = static_cast<MFOUTLINE>(RemainingOutlines->first_node());
53 ConvertToOutlineFeatures(Outline, FeatureSet);
54 }
55 if (classify_norm_method == baseline) {
56 NormalizeOutlineX(FeatureSet);
57 }
58 FreeOutlines(Outlines);
59 return (FeatureSet);
60 } /* ExtractOutlineFeatures */
61
62 /*----------------------------------------------------------------------------
63 Private Code
64 ----------------------------------------------------------------------------*/
65 /*---------------------------------------------------------------------------*/
66 /**
67 * This routine computes the midpoint between Start and
68 * End to obtain the x,y position of the outline-feature. It
69 * also computes the direction from Start to End as the
70 * direction of the outline-feature and the distance from
71 * Start to End as the length of the outline-feature.
72 * This feature is then
73 * inserted into the next feature slot in FeatureSet.
74 * @param Start starting point of outline-feature
75 * @param End ending point of outline-feature
76 * @param FeatureSet set to add outline-feature to
77 */
78 void AddOutlineFeatureToSet(FPOINT *Start, FPOINT *End, FEATURE_SET FeatureSet) {
79 auto Feature = new FEATURE_STRUCT(&OutlineFeatDesc);
80 Feature->Params[OutlineFeatDir] = NormalizedAngleFrom(Start, End, 1.0);
81 Feature->Params[OutlineFeatX] = AverageOf(Start->x, End->x);
82 Feature->Params[OutlineFeatY] = AverageOf(Start->y, End->y);
83 Feature->Params[OutlineFeatLength] = DistanceBetween(*Start, *End);
84 AddFeature(FeatureSet, Feature);
85
86 } /* AddOutlineFeatureToSet */
87
88 /*---------------------------------------------------------------------------*/
89 /**
90 * This routine steps converts each section in the specified
91 * outline to a feature described by its x,y position, length
92 * and angle.
93 * Results are returned in FeatureSet.
94 * @param Outline outline to extract outline-features from
95 * @param FeatureSet set of features to add outline-features to
96 */
97 void ConvertToOutlineFeatures(MFOUTLINE Outline, FEATURE_SET FeatureSet) {
98 MFOUTLINE Next;
99 MFOUTLINE First;
100 FPOINT FeatureStart;
101 FPOINT FeatureEnd;
102
103 if (DegenerateOutline(Outline)) {
104 return;
105 }
106
107 First = Outline;
108 Next = First;
109 do {
110 FeatureStart = PointAt(Next)->Point;
111 Next = NextPointAfter(Next);
112
113 /* note that an edge is hidden if the ending point of the edge is
114 marked as hidden. This situation happens because the order of
115 the outlines is reversed when they are converted from the old
116 format. In the old format, a hidden edge is marked by the
117 starting point for that edge. */
118 if (!PointAt(Next)->Hidden) {
119 FeatureEnd = PointAt(Next)->Point;
120 AddOutlineFeatureToSet(&FeatureStart, &FeatureEnd, FeatureSet);
121 }
122 } while (Next != First);
123 } /* ConvertToOutlineFeatures */
124
125 /*---------------------------------------------------------------------------*/
126 /**
127 * This routine computes the weighted average x position
128 * over all of the outline-features in FeatureSet and then
129 * renormalizes the outline-features to force this average
130 * to be the x origin (i.e. x=0).
131 * FeatureSet is changed.
132 * @param FeatureSet outline-features to be normalized
133 */
134 void NormalizeOutlineX(FEATURE_SET FeatureSet) {
135 int i;
136 FEATURE Feature;
137 float Length;
138 float TotalX = 0.0;
139 float TotalWeight = 0.0;
140 float Origin;
141
142 if (FeatureSet->NumFeatures <= 0) {
143 return;
144 }
145
146 for (i = 0; i < FeatureSet->NumFeatures; i++) {
147 Feature = FeatureSet->Features[i];
148 Length = Feature->Params[OutlineFeatLength];
149 TotalX += Feature->Params[OutlineFeatX] * Length;
150 TotalWeight += Length;
151 }
152 Origin = TotalX / TotalWeight;
153
154 for (i = 0; i < FeatureSet->NumFeatures; i++) {
155 Feature = FeatureSet->Features[i];
156 Feature->Params[OutlineFeatX] -= Origin;
157 }
158 } /* NormalizeOutlineX */
159
160 } // namespace tesseract