Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/tesseract/src/classify/outfeat.cpp @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /****************************************************************************** | |
| 2 ** Filename: outfeat.c | |
| 3 ** Purpose: Definition of outline-features. | |
| 4 ** Author: Dan Johnson | |
| 5 ** | |
| 6 ** (c) Copyright Hewlett-Packard Company, 1988. | |
| 7 ** Licensed under the Apache License, Version 2.0 (the "License"); | |
| 8 ** you may not use this file except in compliance with the License. | |
| 9 ** You may obtain a copy of the License at | |
| 10 ** http://www.apache.org/licenses/LICENSE-2.0 | |
| 11 ** Unless required by applicable law or agreed to in writing, software | |
| 12 ** distributed under the License is distributed on an "AS IS" BASIS, | |
| 13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 14 ** See the License for the specific language governing permissions and | |
| 15 ** limitations under the License. | |
| 16 ******************************************************************************/ | |
| 17 | |
| 18 #include "outfeat.h" | |
| 19 | |
| 20 #include "classify.h" | |
| 21 #include "featdefs.h" | |
| 22 #include "mfoutline.h" | |
| 23 #include "ocrfeatures.h" | |
| 24 | |
| 25 #include <cstdio> | |
| 26 | |
| 27 namespace tesseract { | |
| 28 | |
| 29 /*---------------------------------------------------------------------------- | |
| 30 Public Code | |
| 31 ----------------------------------------------------------------------------*/ | |
| 32 | |
| 33 /** | |
| 34 * Convert each segment in the outline to a feature | |
| 35 * and return the features. | |
| 36 * @param Blob blob to extract pico-features from | |
| 37 * @return Outline-features for Blob. | |
| 38 * @note Globals: none | |
| 39 */ | |
| 40 FEATURE_SET Classify::ExtractOutlineFeatures(TBLOB *Blob) { | |
| 41 auto FeatureSet = new FEATURE_SET_STRUCT(MAX_OUTLINE_FEATURES); | |
| 42 if (Blob == nullptr) { | |
| 43 return (FeatureSet); | |
| 44 } | |
| 45 | |
| 46 auto Outlines = ConvertBlob(Blob); | |
| 47 | |
| 48 float XScale, YScale; | |
| 49 NormalizeOutlines(Outlines, &XScale, &YScale); | |
| 50 auto RemainingOutlines = Outlines; | |
| 51 iterate(RemainingOutlines) { | |
| 52 auto Outline = static_cast<MFOUTLINE>(RemainingOutlines->first_node()); | |
| 53 ConvertToOutlineFeatures(Outline, FeatureSet); | |
| 54 } | |
| 55 if (classify_norm_method == baseline) { | |
| 56 NormalizeOutlineX(FeatureSet); | |
| 57 } | |
| 58 FreeOutlines(Outlines); | |
| 59 return (FeatureSet); | |
| 60 } /* ExtractOutlineFeatures */ | |
| 61 | |
| 62 /*---------------------------------------------------------------------------- | |
| 63 Private Code | |
| 64 ----------------------------------------------------------------------------*/ | |
| 65 /*---------------------------------------------------------------------------*/ | |
| 66 /** | |
| 67 * This routine computes the midpoint between Start and | |
| 68 * End to obtain the x,y position of the outline-feature. It | |
| 69 * also computes the direction from Start to End as the | |
| 70 * direction of the outline-feature and the distance from | |
| 71 * Start to End as the length of the outline-feature. | |
| 72 * This feature is then | |
| 73 * inserted into the next feature slot in FeatureSet. | |
| 74 * @param Start starting point of outline-feature | |
| 75 * @param End ending point of outline-feature | |
| 76 * @param FeatureSet set to add outline-feature to | |
| 77 */ | |
| 78 void AddOutlineFeatureToSet(FPOINT *Start, FPOINT *End, FEATURE_SET FeatureSet) { | |
| 79 auto Feature = new FEATURE_STRUCT(&OutlineFeatDesc); | |
| 80 Feature->Params[OutlineFeatDir] = NormalizedAngleFrom(Start, End, 1.0); | |
| 81 Feature->Params[OutlineFeatX] = AverageOf(Start->x, End->x); | |
| 82 Feature->Params[OutlineFeatY] = AverageOf(Start->y, End->y); | |
| 83 Feature->Params[OutlineFeatLength] = DistanceBetween(*Start, *End); | |
| 84 AddFeature(FeatureSet, Feature); | |
| 85 | |
| 86 } /* AddOutlineFeatureToSet */ | |
| 87 | |
| 88 /*---------------------------------------------------------------------------*/ | |
| 89 /** | |
| 90 * This routine steps converts each section in the specified | |
| 91 * outline to a feature described by its x,y position, length | |
| 92 * and angle. | |
| 93 * Results are returned in FeatureSet. | |
| 94 * @param Outline outline to extract outline-features from | |
| 95 * @param FeatureSet set of features to add outline-features to | |
| 96 */ | |
| 97 void ConvertToOutlineFeatures(MFOUTLINE Outline, FEATURE_SET FeatureSet) { | |
| 98 MFOUTLINE Next; | |
| 99 MFOUTLINE First; | |
| 100 FPOINT FeatureStart; | |
| 101 FPOINT FeatureEnd; | |
| 102 | |
| 103 if (DegenerateOutline(Outline)) { | |
| 104 return; | |
| 105 } | |
| 106 | |
| 107 First = Outline; | |
| 108 Next = First; | |
| 109 do { | |
| 110 FeatureStart = PointAt(Next)->Point; | |
| 111 Next = NextPointAfter(Next); | |
| 112 | |
| 113 /* note that an edge is hidden if the ending point of the edge is | |
| 114 marked as hidden. This situation happens because the order of | |
| 115 the outlines is reversed when they are converted from the old | |
| 116 format. In the old format, a hidden edge is marked by the | |
| 117 starting point for that edge. */ | |
| 118 if (!PointAt(Next)->Hidden) { | |
| 119 FeatureEnd = PointAt(Next)->Point; | |
| 120 AddOutlineFeatureToSet(&FeatureStart, &FeatureEnd, FeatureSet); | |
| 121 } | |
| 122 } while (Next != First); | |
| 123 } /* ConvertToOutlineFeatures */ | |
| 124 | |
| 125 /*---------------------------------------------------------------------------*/ | |
| 126 /** | |
| 127 * This routine computes the weighted average x position | |
| 128 * over all of the outline-features in FeatureSet and then | |
| 129 * renormalizes the outline-features to force this average | |
| 130 * to be the x origin (i.e. x=0). | |
| 131 * FeatureSet is changed. | |
| 132 * @param FeatureSet outline-features to be normalized | |
| 133 */ | |
| 134 void NormalizeOutlineX(FEATURE_SET FeatureSet) { | |
| 135 int i; | |
| 136 FEATURE Feature; | |
| 137 float Length; | |
| 138 float TotalX = 0.0; | |
| 139 float TotalWeight = 0.0; | |
| 140 float Origin; | |
| 141 | |
| 142 if (FeatureSet->NumFeatures <= 0) { | |
| 143 return; | |
| 144 } | |
| 145 | |
| 146 for (i = 0; i < FeatureSet->NumFeatures; i++) { | |
| 147 Feature = FeatureSet->Features[i]; | |
| 148 Length = Feature->Params[OutlineFeatLength]; | |
| 149 TotalX += Feature->Params[OutlineFeatX] * Length; | |
| 150 TotalWeight += Length; | |
| 151 } | |
| 152 Origin = TotalX / TotalWeight; | |
| 153 | |
| 154 for (i = 0; i < FeatureSet->NumFeatures; i++) { | |
| 155 Feature = FeatureSet->Features[i]; | |
| 156 Feature->Params[OutlineFeatX] -= Origin; | |
| 157 } | |
| 158 } /* NormalizeOutlineX */ | |
| 159 | |
| 160 } // namespace tesseract |
