Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/tesseract/src/classify/mfoutline.cpp @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /****************************************************************************** | |
| 2 ** Filename: mfoutline.c | |
| 3 ** Purpose: Interface to outline struct used for extracting features | |
| 4 ** Author: Dan Johnson | |
| 5 ** | |
| 6 ** (c) Copyright Hewlett-Packard Company, 1988. | |
| 7 ** Licensed under the Apache License, Version 2.0 (the "License"); | |
| 8 ** you may not use this file except in compliance with the License. | |
| 9 ** You may obtain a copy of the License at | |
| 10 ** http://www.apache.org/licenses/LICENSE-2.0 | |
| 11 ** Unless required by applicable law or agreed to in writing, software | |
| 12 ** distributed under the License is distributed on an "AS IS" BASIS, | |
| 13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 14 ** See the License for the specific language governing permissions and | |
| 15 ** limitations under the License. | |
| 16 ******************************************************************************/ | |
| 17 | |
| 18 #include "mfoutline.h" | |
| 19 | |
| 20 #include "blobs.h" | |
| 21 #include "classify.h" | |
| 22 #include "clusttool.h" //If remove you get caught in a loop somewhere | |
| 23 #include "mfx.h" | |
| 24 #include "params.h" | |
| 25 | |
| 26 #include <cmath> | |
| 27 #include <cstdio> | |
| 28 | |
| 29 namespace tesseract { | |
| 30 | |
| 31 /*---------------------------------------------------------------------------*/ | |
| 32 /** Convert a blob into a list of MFOUTLINEs (float-based microfeature format). | |
| 33 */ | |
| 34 LIST ConvertBlob(TBLOB *blob) { | |
| 35 LIST outlines = NIL_LIST; | |
| 36 return (blob == nullptr) ? NIL_LIST : ConvertOutlines(blob->outlines, outlines, outer); | |
| 37 } | |
| 38 | |
| 39 /*---------------------------------------------------------------------------*/ | |
| 40 /** Convert a TESSLINE into the float-based MFOUTLINE micro-feature format. */ | |
| 41 MFOUTLINE ConvertOutline(TESSLINE *outline) { | |
| 42 auto MFOutline = NIL_LIST; | |
| 43 | |
| 44 if (outline == nullptr || outline->loop == nullptr) { | |
| 45 return MFOutline; | |
| 46 } | |
| 47 | |
| 48 auto StartPoint = outline->loop; | |
| 49 auto EdgePoint = StartPoint; | |
| 50 do { | |
| 51 auto NextPoint = EdgePoint->next; | |
| 52 | |
| 53 /* filter out duplicate points */ | |
| 54 if (EdgePoint->pos.x != NextPoint->pos.x || EdgePoint->pos.y != NextPoint->pos.y) { | |
| 55 auto NewPoint = new MFEDGEPT; | |
| 56 NewPoint->ClearMark(); | |
| 57 NewPoint->Hidden = EdgePoint->IsHidden(); | |
| 58 NewPoint->Point.x = EdgePoint->pos.x; | |
| 59 NewPoint->Point.y = EdgePoint->pos.y; | |
| 60 MFOutline = push(MFOutline, NewPoint); | |
| 61 } | |
| 62 EdgePoint = NextPoint; | |
| 63 } while (EdgePoint != StartPoint); | |
| 64 | |
| 65 if (MFOutline != nullptr) { | |
| 66 MakeOutlineCircular(MFOutline); | |
| 67 } | |
| 68 return MFOutline; | |
| 69 } | |
| 70 | |
| 71 /*---------------------------------------------------------------------------*/ | |
| 72 /** | |
| 73 * Convert a tree of outlines to a list of MFOUTLINEs (lists of MFEDGEPTs). | |
| 74 * | |
| 75 * @param outline first outline to be converted | |
| 76 * @param mf_outlines list to add converted outlines to | |
| 77 * @param outline_type are the outlines outer or holes? | |
| 78 */ | |
| 79 LIST ConvertOutlines(TESSLINE *outline, LIST mf_outlines, OUTLINETYPE outline_type) { | |
| 80 MFOUTLINE mf_outline; | |
| 81 | |
| 82 while (outline != nullptr) { | |
| 83 mf_outline = ConvertOutline(outline); | |
| 84 if (mf_outline != nullptr) { | |
| 85 mf_outlines = push(mf_outlines, mf_outline); | |
| 86 } | |
| 87 outline = outline->next; | |
| 88 } | |
| 89 return mf_outlines; | |
| 90 } | |
| 91 | |
| 92 /*---------------------------------------------------------------------------*/ | |
| 93 /** | |
| 94 * This routine searches through the specified outline, computes | |
| 95 * a slope for each vector in the outline, and marks each | |
| 96 * vector as having one of the following directions: | |
| 97 * N, S, E, W, NE, NW, SE, SW | |
| 98 * This information is then stored in the outline and the | |
| 99 * outline is returned. | |
| 100 * @param Outline micro-feature outline to analyze | |
| 101 * @param MinSlope controls "snapping" of segments to horizontal | |
| 102 * @param MaxSlope controls "snapping" of segments to vertical | |
| 103 */ | |
| 104 void FindDirectionChanges(MFOUTLINE Outline, float MinSlope, float MaxSlope) { | |
| 105 MFEDGEPT *Current; | |
| 106 MFEDGEPT *Last; | |
| 107 MFOUTLINE EdgePoint; | |
| 108 | |
| 109 if (DegenerateOutline(Outline)) { | |
| 110 return; | |
| 111 } | |
| 112 | |
| 113 Last = PointAt(Outline); | |
| 114 Outline = NextPointAfter(Outline); | |
| 115 EdgePoint = Outline; | |
| 116 do { | |
| 117 Current = PointAt(EdgePoint); | |
| 118 ComputeDirection(Last, Current, MinSlope, MaxSlope); | |
| 119 | |
| 120 Last = Current; | |
| 121 EdgePoint = NextPointAfter(EdgePoint); | |
| 122 } while (EdgePoint != Outline); | |
| 123 | |
| 124 } /* FindDirectionChanges */ | |
| 125 | |
| 126 /*---------------------------------------------------------------------------*/ | |
| 127 /** | |
| 128 * This routine deallocates all of the memory consumed by | |
| 129 * a micro-feature outline. | |
| 130 * @param arg micro-feature outline to be freed | |
| 131 */ | |
| 132 void FreeMFOutline(void *arg) { // MFOUTLINE Outline) | |
| 133 auto Outline = static_cast<MFOUTLINE>(arg); | |
| 134 | |
| 135 /* break the circular outline so we can use std. techniques to deallocate */ | |
| 136 MFOUTLINE Start = Outline->list_rest(); | |
| 137 set_rest(Outline, NIL_LIST); | |
| 138 while (Start != nullptr) { | |
| 139 delete reinterpret_cast<MFEDGEPT *>(Start->first_node()); | |
| 140 Start = pop(Start); | |
| 141 } | |
| 142 | |
| 143 } /* FreeMFOutline */ | |
| 144 | |
| 145 /*---------------------------------------------------------------------------*/ | |
| 146 /** | |
| 147 * Release all memory consumed by the specified list | |
| 148 * of outlines. | |
| 149 * @param Outlines list of mf-outlines to be freed | |
| 150 */ | |
| 151 void FreeOutlines(LIST Outlines) { | |
| 152 destroy_nodes(Outlines, FreeMFOutline); | |
| 153 } /* FreeOutlines */ | |
| 154 | |
| 155 /*---------------------------------------------------------------------------*/ | |
| 156 /** | |
| 157 * This routine searches through the specified outline and finds | |
| 158 * the points at which the outline changes direction. These | |
| 159 * points are then marked as "extremities". This routine is | |
| 160 * used as an alternative to FindExtremities(). It forces the | |
| 161 * endpoints of the microfeatures to be at the direction | |
| 162 * changes rather than at the midpoint between direction | |
| 163 * changes. | |
| 164 * @param Outline micro-feature outline to analyze | |
| 165 */ | |
| 166 void MarkDirectionChanges(MFOUTLINE Outline) { | |
| 167 MFOUTLINE Current; | |
| 168 MFOUTLINE Last; | |
| 169 MFOUTLINE First; | |
| 170 | |
| 171 if (DegenerateOutline(Outline)) { | |
| 172 return; | |
| 173 } | |
| 174 | |
| 175 First = NextDirectionChange(Outline); | |
| 176 Last = First; | |
| 177 do { | |
| 178 Current = NextDirectionChange(Last); | |
| 179 PointAt(Current)->MarkPoint(); | |
| 180 Last = Current; | |
| 181 } while (Last != First); | |
| 182 | |
| 183 } /* MarkDirectionChanges */ | |
| 184 | |
| 185 /*---------------------------------------------------------------------------*/ | |
| 186 /** | |
| 187 * This routine returns the next point in the micro-feature | |
| 188 * outline that is an extremity. The search starts after | |
| 189 * EdgePoint. The routine assumes that the outline being | |
| 190 * searched is not a degenerate outline (i.e. it must have | |
| 191 * 2 or more edge points). | |
| 192 * @param EdgePoint start search from this point | |
| 193 * @return Next extremity in the outline after EdgePoint. | |
| 194 * @note Globals: none | |
| 195 */ | |
| 196 MFOUTLINE NextExtremity(MFOUTLINE EdgePoint) { | |
| 197 EdgePoint = NextPointAfter(EdgePoint); | |
| 198 while (!PointAt(EdgePoint)->ExtremityMark) { | |
| 199 EdgePoint = NextPointAfter(EdgePoint); | |
| 200 } | |
| 201 | |
| 202 return (EdgePoint); | |
| 203 | |
| 204 } /* NextExtremity */ | |
| 205 | |
| 206 /*---------------------------------------------------------------------------*/ | |
| 207 /** | |
| 208 * This routine normalizes the coordinates of the specified | |
| 209 * outline so that the outline is deskewed down to the | |
| 210 * baseline, translated so that x=0 is at XOrigin, and scaled | |
| 211 * so that the height of a character cell from descender to | |
| 212 * ascender is 1. Of this height, 0.25 is for the descender, | |
| 213 * 0.25 for the ascender, and 0.5 for the x-height. The | |
| 214 * y coordinate of the baseline is 0. | |
| 215 * @param Outline outline to be normalized | |
| 216 * @param XOrigin x-origin of text | |
| 217 */ | |
| 218 void NormalizeOutline(MFOUTLINE Outline, float XOrigin) { | |
| 219 if (Outline == NIL_LIST) { | |
| 220 return; | |
| 221 } | |
| 222 | |
| 223 MFOUTLINE EdgePoint = Outline; | |
| 224 do { | |
| 225 MFEDGEPT *Current = PointAt(EdgePoint); | |
| 226 Current->Point.y = MF_SCALE_FACTOR * (Current->Point.y - kBlnBaselineOffset); | |
| 227 Current->Point.x = MF_SCALE_FACTOR * (Current->Point.x - XOrigin); | |
| 228 EdgePoint = NextPointAfter(EdgePoint); | |
| 229 } while (EdgePoint != Outline); | |
| 230 } /* NormalizeOutline */ | |
| 231 | |
| 232 /*---------------------------------------------------------------------------*/ | |
| 233 /** | |
| 234 * This routine normalizes every outline in Outlines | |
| 235 * according to the currently selected normalization method. | |
| 236 * It also returns the scale factors that it used to do this | |
| 237 * scaling. The scale factors returned represent the x and | |
| 238 * y sizes in the normalized coordinate system that correspond | |
| 239 * to 1 pixel in the original coordinate system. | |
| 240 * Outlines are changed and XScale and YScale are updated. | |
| 241 * | |
| 242 * Globals: | |
| 243 * - classify_norm_method method being used for normalization | |
| 244 * - classify_char_norm_range map radius of gyration to this value | |
| 245 * @param Outlines list of outlines to be normalized | |
| 246 * @param XScale x-direction scale factor used by routine | |
| 247 * @param YScale y-direction scale factor used by routine | |
| 248 */ | |
| 249 void Classify::NormalizeOutlines(LIST Outlines, float *XScale, float *YScale) { | |
| 250 MFOUTLINE Outline; | |
| 251 | |
| 252 switch (classify_norm_method) { | |
| 253 case character: | |
| 254 ASSERT_HOST(!"How did NormalizeOutlines get called in character mode?"); | |
| 255 break; | |
| 256 | |
| 257 case baseline: | |
| 258 iterate(Outlines) { | |
| 259 Outline = static_cast<MFOUTLINE>(Outlines->first_node()); | |
| 260 NormalizeOutline(Outline, 0.0); | |
| 261 } | |
| 262 *XScale = *YScale = MF_SCALE_FACTOR; | |
| 263 break; | |
| 264 } | |
| 265 } /* NormalizeOutlines */ | |
| 266 | |
| 267 /*---------------------------------------------------------------------------- | |
| 268 Private Code | |
| 269 ----------------------------------------------------------------------------*/ | |
| 270 /** | |
| 271 * Change the direction of every vector in the specified | |
| 272 * outline segment to Direction. The segment to be changed | |
| 273 * starts at Start and ends at End. Note that the previous | |
| 274 * direction of End must also be changed to reflect the | |
| 275 * change in direction of the point before it. | |
| 276 * @param Start defines start of segment of outline to be modified | |
| 277 * @param End defines end of segment of outline to be modified | |
| 278 * @param Direction new direction to assign to segment | |
| 279 */ | |
| 280 void ChangeDirection(MFOUTLINE Start, MFOUTLINE End, DIRECTION Direction) { | |
| 281 MFOUTLINE Current; | |
| 282 | |
| 283 for (Current = Start; Current != End; Current = NextPointAfter(Current)) { | |
| 284 PointAt(Current)->Direction = Direction; | |
| 285 } | |
| 286 | |
| 287 PointAt(End)->PreviousDirection = Direction; | |
| 288 | |
| 289 } /* ChangeDirection */ | |
| 290 | |
| 291 /** | |
| 292 * This routine normalizes each point in Outline by | |
| 293 * translating it to the specified center and scaling it | |
| 294 * anisotropically according to the given scale factors. | |
| 295 * @param Outline outline to be character normalized | |
| 296 * @param cn_denorm | |
| 297 */ | |
| 298 void CharNormalizeOutline(MFOUTLINE Outline, const DENORM &cn_denorm) { | |
| 299 MFOUTLINE First, Current; | |
| 300 MFEDGEPT *CurrentPoint; | |
| 301 | |
| 302 if (Outline == NIL_LIST) { | |
| 303 return; | |
| 304 } | |
| 305 | |
| 306 First = Outline; | |
| 307 Current = First; | |
| 308 do { | |
| 309 CurrentPoint = PointAt(Current); | |
| 310 FCOORD pos(CurrentPoint->Point.x, CurrentPoint->Point.y); | |
| 311 cn_denorm.LocalNormTransform(pos, &pos); | |
| 312 CurrentPoint->Point.x = (pos.x() - UINT8_MAX / 2) * MF_SCALE_FACTOR; | |
| 313 CurrentPoint->Point.y = (pos.y() - UINT8_MAX / 2) * MF_SCALE_FACTOR; | |
| 314 | |
| 315 Current = NextPointAfter(Current); | |
| 316 } while (Current != First); | |
| 317 | |
| 318 } /* CharNormalizeOutline */ | |
| 319 | |
| 320 /** | |
| 321 * This routine computes the slope from Start to Finish and | |
| 322 * and then computes the approximate direction of the line | |
| 323 * segment from Start to Finish. The direction is quantized | |
| 324 * into 8 buckets: | |
| 325 * N, S, E, W, NE, NW, SE, SW | |
| 326 * Both the slope and the direction are then stored into | |
| 327 * the appropriate fields of the Start edge point. The | |
| 328 * direction is also stored into the PreviousDirection field | |
| 329 * of the Finish edge point. | |
| 330 * @param Start starting point to compute direction from | |
| 331 * @param Finish finishing point to compute direction to | |
| 332 * @param MinSlope slope below which lines are horizontal | |
| 333 * @param MaxSlope slope above which lines are vertical | |
| 334 */ | |
| 335 void ComputeDirection(MFEDGEPT *Start, MFEDGEPT *Finish, float MinSlope, float MaxSlope) { | |
| 336 FVECTOR Delta; | |
| 337 | |
| 338 Delta.x = Finish->Point.x - Start->Point.x; | |
| 339 Delta.y = Finish->Point.y - Start->Point.y; | |
| 340 if (Delta.x == 0) { | |
| 341 if (Delta.y < 0) { | |
| 342 Start->Slope = -FLT_MAX; | |
| 343 Start->Direction = south; | |
| 344 } else { | |
| 345 Start->Slope = FLT_MAX; | |
| 346 Start->Direction = north; | |
| 347 } | |
| 348 } else { | |
| 349 Start->Slope = Delta.y / Delta.x; | |
| 350 if (Delta.x > 0) { | |
| 351 if (Delta.y > 0) { | |
| 352 if (Start->Slope > MinSlope) { | |
| 353 if (Start->Slope < MaxSlope) { | |
| 354 Start->Direction = northeast; | |
| 355 } else { | |
| 356 Start->Direction = north; | |
| 357 } | |
| 358 } else { | |
| 359 Start->Direction = east; | |
| 360 } | |
| 361 } else if (Start->Slope < -MinSlope) { | |
| 362 if (Start->Slope > -MaxSlope) { | |
| 363 Start->Direction = southeast; | |
| 364 } else { | |
| 365 Start->Direction = south; | |
| 366 } | |
| 367 } else { | |
| 368 Start->Direction = east; | |
| 369 } | |
| 370 } else if (Delta.y > 0) { | |
| 371 if (Start->Slope < -MinSlope) { | |
| 372 if (Start->Slope > -MaxSlope) { | |
| 373 Start->Direction = northwest; | |
| 374 } else { | |
| 375 Start->Direction = north; | |
| 376 } | |
| 377 } else { | |
| 378 Start->Direction = west; | |
| 379 } | |
| 380 } else if (Start->Slope > MinSlope) { | |
| 381 if (Start->Slope < MaxSlope) { | |
| 382 Start->Direction = southwest; | |
| 383 } else { | |
| 384 Start->Direction = south; | |
| 385 } | |
| 386 } else { | |
| 387 Start->Direction = west; | |
| 388 } | |
| 389 } | |
| 390 Finish->PreviousDirection = Start->Direction; | |
| 391 } | |
| 392 | |
| 393 /** | |
| 394 * This routine returns the next point in the micro-feature | |
| 395 * outline that has a direction different than EdgePoint. The | |
| 396 * routine assumes that the outline being searched is not a | |
| 397 * degenerate outline (i.e. it must have 2 or more edge points). | |
| 398 * @param EdgePoint start search from this point | |
| 399 * @return Point of next direction change in micro-feature outline. | |
| 400 * @note Globals: none | |
| 401 */ | |
| 402 MFOUTLINE NextDirectionChange(MFOUTLINE EdgePoint) { | |
| 403 DIRECTION InitialDirection; | |
| 404 | |
| 405 InitialDirection = PointAt(EdgePoint)->Direction; | |
| 406 | |
| 407 MFOUTLINE next_pt = nullptr; | |
| 408 do { | |
| 409 EdgePoint = NextPointAfter(EdgePoint); | |
| 410 next_pt = NextPointAfter(EdgePoint); | |
| 411 } while (PointAt(EdgePoint)->Direction == InitialDirection && !PointAt(EdgePoint)->Hidden && | |
| 412 next_pt != nullptr && !PointAt(next_pt)->Hidden); | |
| 413 | |
| 414 return (EdgePoint); | |
| 415 } | |
| 416 | |
| 417 } // namespace tesseract |
