comparison mupdf-source/thirdparty/tesseract/src/training/mergenf.cpp @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 /******************************************************************************
2 ** Filename: MergeNF.c
3 ** Purpose: Program for merging similar nano-feature protos
4 ** Author: Dan Johnson
5 **
6 ** (c) Copyright Hewlett-Packard Company, 1988.
7 ** Licensed under the Apache License, Version 2.0 (the "License");
8 ** you may not use this file except in compliance with the License.
9 ** You may obtain a copy of the License at
10 ** http://www.apache.org/licenses/LICENSE-2.0
11 ** Unless required by applicable law or agreed to in writing, software
12 ** distributed under the License is distributed on an "AS IS" BASIS,
13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 ** See the License for the specific language governing permissions and
15 ** limitations under the License.
16 ******************************************************************************/
17
18 #define _USE_MATH_DEFINES // for M_PI
19 #include <algorithm>
20 #include <cfloat> // for FLT_MAX
21 #include <cmath> // for M_PI
22 #include <cstdio>
23 #include <cstring>
24
25 #include "cluster.h"
26 #include "clusttool.h"
27 #include "featdefs.h"
28 #include "intproto.h"
29 #include "mergenf.h"
30 #include "ocrfeatures.h"
31 #include "oldlist.h"
32 #include "params.h"
33 #include "protos.h"
34
35 using namespace tesseract;
36
37 /*-------------------once in subfeat---------------------------------*/
38 static double_VAR(training_angle_match_scale, 1.0, "Angle Match Scale ...");
39
40 static double_VAR(training_similarity_midpoint, 0.0075, "Similarity Midpoint ...");
41
42 static double_VAR(training_similarity_curl, 2.0, "Similarity Curl ...");
43
44 /*-----------------------------once in
45 * fasttrain----------------------------------*/
46 static double_VAR(training_tangent_bbox_pad, 0.5, "Tangent bounding box pad ...");
47
48 static double_VAR(training_orthogonal_bbox_pad, 2.5, "Orthogonal bounding box pad ...");
49
50 static double_VAR(training_angle_pad, 45.0, "Angle pad ...");
51
52 /**
53 * Compare protos p1 and p2 and return an estimate of the
54 * worst evidence rating that will result for any part of p1
55 * that is compared to p2. In other words, if p1 were broken
56 * into pico-features and each pico-feature was matched to p2,
57 * what is the worst evidence rating that will be achieved for
58 * any pico-feature.
59 *
60 * @param p1, p2 protos to be compared
61 *
62 * Globals: none
63 *
64 * @return Worst possible result when matching p1 to p2.
65 */
66 float CompareProtos(PROTO_STRUCT *p1, PROTO_STRUCT *p2) {
67 float WorstEvidence = WORST_EVIDENCE;
68 float Evidence;
69 float Angle, Length;
70
71 /* if p1 and p2 are not close in length, don't let them match */
72 Length = std::fabs(p1->Length - p2->Length);
73 if (Length > MAX_LENGTH_MISMATCH) {
74 return (0.0);
75 }
76
77 /* create a dummy pico-feature to be used for comparisons */
78 auto Feature = new FEATURE_STRUCT(&PicoFeatDesc);
79 Feature->Params[PicoFeatDir] = p1->Angle;
80
81 /* convert angle to radians */
82 Angle = p1->Angle * 2.0 * M_PI;
83
84 /* find distance from center of p1 to 1/2 picofeat from end */
85 Length = p1->Length / 2.0 - GetPicoFeatureLength() / 2.0;
86 if (Length < 0) {
87 Length = 0;
88 }
89
90 /* set the dummy pico-feature at one end of p1 and match it to p2 */
91 Feature->Params[PicoFeatX] = p1->X + std::cos(Angle) * Length;
92 Feature->Params[PicoFeatY] = p1->Y + std::sin(Angle) * Length;
93 if (DummyFastMatch(Feature, p2)) {
94 Evidence = SubfeatureEvidence(Feature, p2);
95 if (Evidence < WorstEvidence) {
96 WorstEvidence = Evidence;
97 }
98 } else {
99 delete Feature;
100 return 0.0;
101 }
102
103 /* set the dummy pico-feature at the other end of p1 and match it to p2 */
104 Feature->Params[PicoFeatX] = p1->X - std::cos(Angle) * Length;
105 Feature->Params[PicoFeatY] = p1->Y - std::sin(Angle) * Length;
106 if (DummyFastMatch(Feature, p2)) {
107 Evidence = SubfeatureEvidence(Feature, p2);
108 if (Evidence < WorstEvidence) {
109 WorstEvidence = Evidence;
110 }
111 } else {
112 delete Feature;
113 return 0.0;
114 }
115
116 delete Feature;
117 return (WorstEvidence);
118
119 } /* CompareProtos */
120
121 /**
122 * This routine computes a proto which is the weighted
123 * average of protos p1 and p2. The new proto is returned
124 * in MergedProto.
125 *
126 * @param p1, p2 protos to be merged
127 * @param w1, w2 weight of each proto
128 * @param MergedProto place to put resulting merged proto
129 */
130 void ComputeMergedProto(PROTO_STRUCT *p1, PROTO_STRUCT *p2, float w1, float w2, PROTO_STRUCT *MergedProto) {
131 float TotalWeight;
132
133 TotalWeight = w1 + w2;
134 w1 /= TotalWeight;
135 w2 /= TotalWeight;
136
137 MergedProto->X = p1->X * w1 + p2->X * w2;
138 MergedProto->Y = p1->Y * w1 + p2->Y * w2;
139 MergedProto->Length = p1->Length * w1 + p2->Length * w2;
140 MergedProto->Angle = p1->Angle * w1 + p2->Angle * w2;
141 FillABC(MergedProto);
142 } /* ComputeMergedProto */
143
144 /**
145 * This routine searches through all of the prototypes in
146 * Class and returns the id of the proto which would provide
147 * the best approximation of Prototype. If no close
148 * approximation can be found, NO_PROTO is returned.
149 *
150 * @param Class class to search for matching old proto in
151 * @param NumMerged # of protos merged into each proto of Class
152 * @param Prototype new proto to find match for
153 *
154 * Globals: none
155 *
156 * @return Id of closest proto in Class or NO_PROTO.
157 */
158 int FindClosestExistingProto(CLASS_TYPE Class, int NumMerged[], PROTOTYPE *Prototype) {
159 PROTO_STRUCT NewProto;
160 PROTO_STRUCT MergedProto;
161 int Pid;
162 PROTO_STRUCT *Proto;
163 int BestProto;
164 float BestMatch;
165 float Match, OldMatch, NewMatch;
166
167 MakeNewFromOld(&NewProto, Prototype);
168
169 BestProto = NO_PROTO;
170 BestMatch = WORST_MATCH_ALLOWED;
171 for (Pid = 0; Pid < Class->NumProtos; Pid++) {
172 Proto = ProtoIn(Class, Pid);
173 ComputeMergedProto(Proto, &NewProto, static_cast<float>(NumMerged[Pid]), 1.0, &MergedProto);
174 OldMatch = CompareProtos(Proto, &MergedProto);
175 NewMatch = CompareProtos(&NewProto, &MergedProto);
176 Match = std::min(OldMatch, NewMatch);
177 if (Match > BestMatch) {
178 BestProto = Pid;
179 BestMatch = Match;
180 }
181 }
182 return BestProto;
183 } /* FindClosestExistingProto */
184
185 /**
186 * This fills in the fields of the New proto based on the
187 * fields of the Old proto.
188 *
189 * @param New new proto to be filled in
190 * @param Old old proto to be converted
191 *
192 * Globals: none
193 */
194 void MakeNewFromOld(PROTO_STRUCT *New, PROTOTYPE *Old) {
195 New->X = CenterX(Old->Mean);
196 New->Y = CenterY(Old->Mean);
197 New->Length = LengthOf(Old->Mean);
198 New->Angle = OrientationOf(Old->Mean);
199 FillABC(New);
200 } /* MakeNewFromOld */
201
202 /*-------------------once in subfeat---------------------------------*/
203
204 /**
205 * @name SubfeatureEvidence
206 *
207 * Compare a feature to a prototype. Print the result.
208 */
209 float SubfeatureEvidence(FEATURE Feature, PROTO_STRUCT *Proto) {
210 float Distance;
211 float Dangle;
212
213 Dangle = Proto->Angle - Feature->Params[PicoFeatDir];
214 if (Dangle < -0.5) {
215 Dangle += 1.0;
216 }
217 if (Dangle > 0.5) {
218 Dangle -= 1.0;
219 }
220 Dangle *= training_angle_match_scale;
221
222 Distance =
223 Proto->A * Feature->Params[PicoFeatX] + Proto->B * Feature->Params[PicoFeatY] + Proto->C;
224
225 return (EvidenceOf(Distance * Distance + Dangle * Dangle));
226 }
227
228 /**
229 * @name EvidenceOf
230 *
231 * Return the new type of evidence number corresponding to this
232 * distance value. This number is no longer based on the chi squared
233 * approximation. The equation that represents the transform is:
234 * 1 / (1 + (sim / midpoint) ^ curl)
235 */
236 double EvidenceOf(double Similarity) {
237 Similarity /= training_similarity_midpoint;
238
239 if (training_similarity_curl == 3) {
240 Similarity = Similarity * Similarity * Similarity;
241 } else if (training_similarity_curl == 2) {
242 Similarity = Similarity * Similarity;
243 } else {
244 Similarity = pow(Similarity, training_similarity_curl);
245 }
246
247 return (1.0 / (1.0 + Similarity));
248 }
249
250 /**
251 * This routine returns true if Feature would be matched
252 * by a fast match table built from Proto.
253 *
254 * @param Feature feature to be "fast matched" to proto
255 * @param Proto proto being "fast matched" against
256 *
257 * Globals:
258 * - training_tangent_bbox_pad bounding box pad tangent to proto
259 * - training_orthogonal_bbox_pad bounding box pad orthogonal to proto
260 *
261 * @return true if feature could match Proto.
262 */
263 bool DummyFastMatch(FEATURE Feature, PROTO_STRUCT *Proto) {
264 FRECT BoundingBox;
265 float MaxAngleError;
266 float AngleError;
267
268 MaxAngleError = training_angle_pad / 360.0;
269 AngleError = std::fabs(Proto->Angle - Feature->Params[PicoFeatDir]);
270 if (AngleError > 0.5) {
271 AngleError = 1.0 - AngleError;
272 }
273
274 if (AngleError > MaxAngleError) {
275 return false;
276 }
277
278 ComputePaddedBoundingBox(Proto, training_tangent_bbox_pad * GetPicoFeatureLength(),
279 training_orthogonal_bbox_pad * GetPicoFeatureLength(), &BoundingBox);
280
281 return PointInside(&BoundingBox, Feature->Params[PicoFeatX], Feature->Params[PicoFeatY]);
282 } /* DummyFastMatch */
283
284 /**
285 * This routine computes a bounding box that encloses the
286 * specified proto along with some padding. The
287 * amount of padding is specified as separate distances
288 * in the tangential and orthogonal directions.
289 *
290 * @param Proto proto to compute bounding box for
291 * @param TangentPad amount of pad to add in direction of segment
292 * @param OrthogonalPad amount of pad to add orthogonal to segment
293 * @param[out] BoundingBox place to put results
294 */
295 void ComputePaddedBoundingBox(PROTO_STRUCT *Proto, float TangentPad, float OrthogonalPad,
296 FRECT *BoundingBox) {
297 float Length = Proto->Length / 2.0 + TangentPad;
298 float Angle = Proto->Angle * 2.0 * M_PI;
299 float CosOfAngle = fabs(std::cos(Angle));
300 float SinOfAngle = fabs(std::sin(Angle));
301
302 float Pad = std::max(CosOfAngle * Length, SinOfAngle * OrthogonalPad);
303 BoundingBox->MinX = Proto->X - Pad;
304 BoundingBox->MaxX = Proto->X + Pad;
305
306 Pad = std::max(SinOfAngle * Length, CosOfAngle * OrthogonalPad);
307 BoundingBox->MinY = Proto->Y - Pad;
308 BoundingBox->MaxY = Proto->Y + Pad;
309
310 } /* ComputePaddedBoundingBox */
311
312 /**
313 * Return true if point (X,Y) is inside of Rectangle.
314 *
315 * Globals: none
316 *
317 * @return true if point (X,Y) is inside of Rectangle.
318 */
319 bool PointInside(FRECT *Rectangle, float X, float Y) {
320 return (X >= Rectangle->MinX) && (X <= Rectangle->MaxX) && (Y >= Rectangle->MinY) &&
321 (Y <= Rectangle->MaxY);
322 } /* PointInside */