Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/tesseract/src/training/mergenf.cpp @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /****************************************************************************** | |
| 2 ** Filename: MergeNF.c | |
| 3 ** Purpose: Program for merging similar nano-feature protos | |
| 4 ** Author: Dan Johnson | |
| 5 ** | |
| 6 ** (c) Copyright Hewlett-Packard Company, 1988. | |
| 7 ** Licensed under the Apache License, Version 2.0 (the "License"); | |
| 8 ** you may not use this file except in compliance with the License. | |
| 9 ** You may obtain a copy of the License at | |
| 10 ** http://www.apache.org/licenses/LICENSE-2.0 | |
| 11 ** Unless required by applicable law or agreed to in writing, software | |
| 12 ** distributed under the License is distributed on an "AS IS" BASIS, | |
| 13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 14 ** See the License for the specific language governing permissions and | |
| 15 ** limitations under the License. | |
| 16 ******************************************************************************/ | |
| 17 | |
| 18 #define _USE_MATH_DEFINES // for M_PI | |
| 19 #include <algorithm> | |
| 20 #include <cfloat> // for FLT_MAX | |
| 21 #include <cmath> // for M_PI | |
| 22 #include <cstdio> | |
| 23 #include <cstring> | |
| 24 | |
| 25 #include "cluster.h" | |
| 26 #include "clusttool.h" | |
| 27 #include "featdefs.h" | |
| 28 #include "intproto.h" | |
| 29 #include "mergenf.h" | |
| 30 #include "ocrfeatures.h" | |
| 31 #include "oldlist.h" | |
| 32 #include "params.h" | |
| 33 #include "protos.h" | |
| 34 | |
| 35 using namespace tesseract; | |
| 36 | |
| 37 /*-------------------once in subfeat---------------------------------*/ | |
| 38 static double_VAR(training_angle_match_scale, 1.0, "Angle Match Scale ..."); | |
| 39 | |
| 40 static double_VAR(training_similarity_midpoint, 0.0075, "Similarity Midpoint ..."); | |
| 41 | |
| 42 static double_VAR(training_similarity_curl, 2.0, "Similarity Curl ..."); | |
| 43 | |
| 44 /*-----------------------------once in | |
| 45 * fasttrain----------------------------------*/ | |
| 46 static double_VAR(training_tangent_bbox_pad, 0.5, "Tangent bounding box pad ..."); | |
| 47 | |
| 48 static double_VAR(training_orthogonal_bbox_pad, 2.5, "Orthogonal bounding box pad ..."); | |
| 49 | |
| 50 static double_VAR(training_angle_pad, 45.0, "Angle pad ..."); | |
| 51 | |
| 52 /** | |
| 53 * Compare protos p1 and p2 and return an estimate of the | |
| 54 * worst evidence rating that will result for any part of p1 | |
| 55 * that is compared to p2. In other words, if p1 were broken | |
| 56 * into pico-features and each pico-feature was matched to p2, | |
| 57 * what is the worst evidence rating that will be achieved for | |
| 58 * any pico-feature. | |
| 59 * | |
| 60 * @param p1, p2 protos to be compared | |
| 61 * | |
| 62 * Globals: none | |
| 63 * | |
| 64 * @return Worst possible result when matching p1 to p2. | |
| 65 */ | |
| 66 float CompareProtos(PROTO_STRUCT *p1, PROTO_STRUCT *p2) { | |
| 67 float WorstEvidence = WORST_EVIDENCE; | |
| 68 float Evidence; | |
| 69 float Angle, Length; | |
| 70 | |
| 71 /* if p1 and p2 are not close in length, don't let them match */ | |
| 72 Length = std::fabs(p1->Length - p2->Length); | |
| 73 if (Length > MAX_LENGTH_MISMATCH) { | |
| 74 return (0.0); | |
| 75 } | |
| 76 | |
| 77 /* create a dummy pico-feature to be used for comparisons */ | |
| 78 auto Feature = new FEATURE_STRUCT(&PicoFeatDesc); | |
| 79 Feature->Params[PicoFeatDir] = p1->Angle; | |
| 80 | |
| 81 /* convert angle to radians */ | |
| 82 Angle = p1->Angle * 2.0 * M_PI; | |
| 83 | |
| 84 /* find distance from center of p1 to 1/2 picofeat from end */ | |
| 85 Length = p1->Length / 2.0 - GetPicoFeatureLength() / 2.0; | |
| 86 if (Length < 0) { | |
| 87 Length = 0; | |
| 88 } | |
| 89 | |
| 90 /* set the dummy pico-feature at one end of p1 and match it to p2 */ | |
| 91 Feature->Params[PicoFeatX] = p1->X + std::cos(Angle) * Length; | |
| 92 Feature->Params[PicoFeatY] = p1->Y + std::sin(Angle) * Length; | |
| 93 if (DummyFastMatch(Feature, p2)) { | |
| 94 Evidence = SubfeatureEvidence(Feature, p2); | |
| 95 if (Evidence < WorstEvidence) { | |
| 96 WorstEvidence = Evidence; | |
| 97 } | |
| 98 } else { | |
| 99 delete Feature; | |
| 100 return 0.0; | |
| 101 } | |
| 102 | |
| 103 /* set the dummy pico-feature at the other end of p1 and match it to p2 */ | |
| 104 Feature->Params[PicoFeatX] = p1->X - std::cos(Angle) * Length; | |
| 105 Feature->Params[PicoFeatY] = p1->Y - std::sin(Angle) * Length; | |
| 106 if (DummyFastMatch(Feature, p2)) { | |
| 107 Evidence = SubfeatureEvidence(Feature, p2); | |
| 108 if (Evidence < WorstEvidence) { | |
| 109 WorstEvidence = Evidence; | |
| 110 } | |
| 111 } else { | |
| 112 delete Feature; | |
| 113 return 0.0; | |
| 114 } | |
| 115 | |
| 116 delete Feature; | |
| 117 return (WorstEvidence); | |
| 118 | |
| 119 } /* CompareProtos */ | |
| 120 | |
| 121 /** | |
| 122 * This routine computes a proto which is the weighted | |
| 123 * average of protos p1 and p2. The new proto is returned | |
| 124 * in MergedProto. | |
| 125 * | |
| 126 * @param p1, p2 protos to be merged | |
| 127 * @param w1, w2 weight of each proto | |
| 128 * @param MergedProto place to put resulting merged proto | |
| 129 */ | |
| 130 void ComputeMergedProto(PROTO_STRUCT *p1, PROTO_STRUCT *p2, float w1, float w2, PROTO_STRUCT *MergedProto) { | |
| 131 float TotalWeight; | |
| 132 | |
| 133 TotalWeight = w1 + w2; | |
| 134 w1 /= TotalWeight; | |
| 135 w2 /= TotalWeight; | |
| 136 | |
| 137 MergedProto->X = p1->X * w1 + p2->X * w2; | |
| 138 MergedProto->Y = p1->Y * w1 + p2->Y * w2; | |
| 139 MergedProto->Length = p1->Length * w1 + p2->Length * w2; | |
| 140 MergedProto->Angle = p1->Angle * w1 + p2->Angle * w2; | |
| 141 FillABC(MergedProto); | |
| 142 } /* ComputeMergedProto */ | |
| 143 | |
| 144 /** | |
| 145 * This routine searches through all of the prototypes in | |
| 146 * Class and returns the id of the proto which would provide | |
| 147 * the best approximation of Prototype. If no close | |
| 148 * approximation can be found, NO_PROTO is returned. | |
| 149 * | |
| 150 * @param Class class to search for matching old proto in | |
| 151 * @param NumMerged # of protos merged into each proto of Class | |
| 152 * @param Prototype new proto to find match for | |
| 153 * | |
| 154 * Globals: none | |
| 155 * | |
| 156 * @return Id of closest proto in Class or NO_PROTO. | |
| 157 */ | |
| 158 int FindClosestExistingProto(CLASS_TYPE Class, int NumMerged[], PROTOTYPE *Prototype) { | |
| 159 PROTO_STRUCT NewProto; | |
| 160 PROTO_STRUCT MergedProto; | |
| 161 int Pid; | |
| 162 PROTO_STRUCT *Proto; | |
| 163 int BestProto; | |
| 164 float BestMatch; | |
| 165 float Match, OldMatch, NewMatch; | |
| 166 | |
| 167 MakeNewFromOld(&NewProto, Prototype); | |
| 168 | |
| 169 BestProto = NO_PROTO; | |
| 170 BestMatch = WORST_MATCH_ALLOWED; | |
| 171 for (Pid = 0; Pid < Class->NumProtos; Pid++) { | |
| 172 Proto = ProtoIn(Class, Pid); | |
| 173 ComputeMergedProto(Proto, &NewProto, static_cast<float>(NumMerged[Pid]), 1.0, &MergedProto); | |
| 174 OldMatch = CompareProtos(Proto, &MergedProto); | |
| 175 NewMatch = CompareProtos(&NewProto, &MergedProto); | |
| 176 Match = std::min(OldMatch, NewMatch); | |
| 177 if (Match > BestMatch) { | |
| 178 BestProto = Pid; | |
| 179 BestMatch = Match; | |
| 180 } | |
| 181 } | |
| 182 return BestProto; | |
| 183 } /* FindClosestExistingProto */ | |
| 184 | |
| 185 /** | |
| 186 * This fills in the fields of the New proto based on the | |
| 187 * fields of the Old proto. | |
| 188 * | |
| 189 * @param New new proto to be filled in | |
| 190 * @param Old old proto to be converted | |
| 191 * | |
| 192 * Globals: none | |
| 193 */ | |
| 194 void MakeNewFromOld(PROTO_STRUCT *New, PROTOTYPE *Old) { | |
| 195 New->X = CenterX(Old->Mean); | |
| 196 New->Y = CenterY(Old->Mean); | |
| 197 New->Length = LengthOf(Old->Mean); | |
| 198 New->Angle = OrientationOf(Old->Mean); | |
| 199 FillABC(New); | |
| 200 } /* MakeNewFromOld */ | |
| 201 | |
| 202 /*-------------------once in subfeat---------------------------------*/ | |
| 203 | |
| 204 /** | |
| 205 * @name SubfeatureEvidence | |
| 206 * | |
| 207 * Compare a feature to a prototype. Print the result. | |
| 208 */ | |
| 209 float SubfeatureEvidence(FEATURE Feature, PROTO_STRUCT *Proto) { | |
| 210 float Distance; | |
| 211 float Dangle; | |
| 212 | |
| 213 Dangle = Proto->Angle - Feature->Params[PicoFeatDir]; | |
| 214 if (Dangle < -0.5) { | |
| 215 Dangle += 1.0; | |
| 216 } | |
| 217 if (Dangle > 0.5) { | |
| 218 Dangle -= 1.0; | |
| 219 } | |
| 220 Dangle *= training_angle_match_scale; | |
| 221 | |
| 222 Distance = | |
| 223 Proto->A * Feature->Params[PicoFeatX] + Proto->B * Feature->Params[PicoFeatY] + Proto->C; | |
| 224 | |
| 225 return (EvidenceOf(Distance * Distance + Dangle * Dangle)); | |
| 226 } | |
| 227 | |
| 228 /** | |
| 229 * @name EvidenceOf | |
| 230 * | |
| 231 * Return the new type of evidence number corresponding to this | |
| 232 * distance value. This number is no longer based on the chi squared | |
| 233 * approximation. The equation that represents the transform is: | |
| 234 * 1 / (1 + (sim / midpoint) ^ curl) | |
| 235 */ | |
| 236 double EvidenceOf(double Similarity) { | |
| 237 Similarity /= training_similarity_midpoint; | |
| 238 | |
| 239 if (training_similarity_curl == 3) { | |
| 240 Similarity = Similarity * Similarity * Similarity; | |
| 241 } else if (training_similarity_curl == 2) { | |
| 242 Similarity = Similarity * Similarity; | |
| 243 } else { | |
| 244 Similarity = pow(Similarity, training_similarity_curl); | |
| 245 } | |
| 246 | |
| 247 return (1.0 / (1.0 + Similarity)); | |
| 248 } | |
| 249 | |
| 250 /** | |
| 251 * This routine returns true if Feature would be matched | |
| 252 * by a fast match table built from Proto. | |
| 253 * | |
| 254 * @param Feature feature to be "fast matched" to proto | |
| 255 * @param Proto proto being "fast matched" against | |
| 256 * | |
| 257 * Globals: | |
| 258 * - training_tangent_bbox_pad bounding box pad tangent to proto | |
| 259 * - training_orthogonal_bbox_pad bounding box pad orthogonal to proto | |
| 260 * | |
| 261 * @return true if feature could match Proto. | |
| 262 */ | |
| 263 bool DummyFastMatch(FEATURE Feature, PROTO_STRUCT *Proto) { | |
| 264 FRECT BoundingBox; | |
| 265 float MaxAngleError; | |
| 266 float AngleError; | |
| 267 | |
| 268 MaxAngleError = training_angle_pad / 360.0; | |
| 269 AngleError = std::fabs(Proto->Angle - Feature->Params[PicoFeatDir]); | |
| 270 if (AngleError > 0.5) { | |
| 271 AngleError = 1.0 - AngleError; | |
| 272 } | |
| 273 | |
| 274 if (AngleError > MaxAngleError) { | |
| 275 return false; | |
| 276 } | |
| 277 | |
| 278 ComputePaddedBoundingBox(Proto, training_tangent_bbox_pad * GetPicoFeatureLength(), | |
| 279 training_orthogonal_bbox_pad * GetPicoFeatureLength(), &BoundingBox); | |
| 280 | |
| 281 return PointInside(&BoundingBox, Feature->Params[PicoFeatX], Feature->Params[PicoFeatY]); | |
| 282 } /* DummyFastMatch */ | |
| 283 | |
| 284 /** | |
| 285 * This routine computes a bounding box that encloses the | |
| 286 * specified proto along with some padding. The | |
| 287 * amount of padding is specified as separate distances | |
| 288 * in the tangential and orthogonal directions. | |
| 289 * | |
| 290 * @param Proto proto to compute bounding box for | |
| 291 * @param TangentPad amount of pad to add in direction of segment | |
| 292 * @param OrthogonalPad amount of pad to add orthogonal to segment | |
| 293 * @param[out] BoundingBox place to put results | |
| 294 */ | |
| 295 void ComputePaddedBoundingBox(PROTO_STRUCT *Proto, float TangentPad, float OrthogonalPad, | |
| 296 FRECT *BoundingBox) { | |
| 297 float Length = Proto->Length / 2.0 + TangentPad; | |
| 298 float Angle = Proto->Angle * 2.0 * M_PI; | |
| 299 float CosOfAngle = fabs(std::cos(Angle)); | |
| 300 float SinOfAngle = fabs(std::sin(Angle)); | |
| 301 | |
| 302 float Pad = std::max(CosOfAngle * Length, SinOfAngle * OrthogonalPad); | |
| 303 BoundingBox->MinX = Proto->X - Pad; | |
| 304 BoundingBox->MaxX = Proto->X + Pad; | |
| 305 | |
| 306 Pad = std::max(SinOfAngle * Length, CosOfAngle * OrthogonalPad); | |
| 307 BoundingBox->MinY = Proto->Y - Pad; | |
| 308 BoundingBox->MaxY = Proto->Y + Pad; | |
| 309 | |
| 310 } /* ComputePaddedBoundingBox */ | |
| 311 | |
| 312 /** | |
| 313 * Return true if point (X,Y) is inside of Rectangle. | |
| 314 * | |
| 315 * Globals: none | |
| 316 * | |
| 317 * @return true if point (X,Y) is inside of Rectangle. | |
| 318 */ | |
| 319 bool PointInside(FRECT *Rectangle, float X, float Y) { | |
| 320 return (X >= Rectangle->MinX) && (X <= Rectangle->MaxX) && (Y >= Rectangle->MinY) && | |
| 321 (Y <= Rectangle->MaxY); | |
| 322 } /* PointInside */ |
