Mercurial > hgrepos > Python2 > PyMuPDF
diff mupdf-source/thirdparty/tesseract/src/classify/outfeat.cpp @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mupdf-source/thirdparty/tesseract/src/classify/outfeat.cpp Mon Sep 15 11:43:07 2025 +0200 @@ -0,0 +1,160 @@ +/****************************************************************************** + ** Filename: outfeat.c + ** Purpose: Definition of outline-features. + ** Author: Dan Johnson + ** + ** (c) Copyright Hewlett-Packard Company, 1988. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + ******************************************************************************/ + +#include "outfeat.h" + +#include "classify.h" +#include "featdefs.h" +#include "mfoutline.h" +#include "ocrfeatures.h" + +#include <cstdio> + +namespace tesseract { + +/*---------------------------------------------------------------------------- + Public Code +----------------------------------------------------------------------------*/ + +/** + * Convert each segment in the outline to a feature + * and return the features. + * @param Blob blob to extract pico-features from + * @return Outline-features for Blob. + * @note Globals: none + */ +FEATURE_SET Classify::ExtractOutlineFeatures(TBLOB *Blob) { + auto FeatureSet = new FEATURE_SET_STRUCT(MAX_OUTLINE_FEATURES); + if (Blob == nullptr) { + return (FeatureSet); + } + + auto Outlines = ConvertBlob(Blob); + + float XScale, YScale; + NormalizeOutlines(Outlines, &XScale, &YScale); + auto RemainingOutlines = Outlines; + iterate(RemainingOutlines) { + auto Outline = static_cast<MFOUTLINE>(RemainingOutlines->first_node()); + ConvertToOutlineFeatures(Outline, FeatureSet); + } + if (classify_norm_method == baseline) { + NormalizeOutlineX(FeatureSet); + } + FreeOutlines(Outlines); + return (FeatureSet); +} /* ExtractOutlineFeatures */ + +/*---------------------------------------------------------------------------- + Private Code +----------------------------------------------------------------------------*/ +/*---------------------------------------------------------------------------*/ +/** + * This routine computes the midpoint between Start and + * End to obtain the x,y position of the outline-feature. It + * also computes the direction from Start to End as the + * direction of the outline-feature and the distance from + * Start to End as the length of the outline-feature. + * This feature is then + * inserted into the next feature slot in FeatureSet. + * @param Start starting point of outline-feature + * @param End ending point of outline-feature + * @param FeatureSet set to add outline-feature to + */ +void AddOutlineFeatureToSet(FPOINT *Start, FPOINT *End, FEATURE_SET FeatureSet) { + auto Feature = new FEATURE_STRUCT(&OutlineFeatDesc); + Feature->Params[OutlineFeatDir] = NormalizedAngleFrom(Start, End, 1.0); + Feature->Params[OutlineFeatX] = AverageOf(Start->x, End->x); + Feature->Params[OutlineFeatY] = AverageOf(Start->y, End->y); + Feature->Params[OutlineFeatLength] = DistanceBetween(*Start, *End); + AddFeature(FeatureSet, Feature); + +} /* AddOutlineFeatureToSet */ + +/*---------------------------------------------------------------------------*/ +/** + * This routine steps converts each section in the specified + * outline to a feature described by its x,y position, length + * and angle. + * Results are returned in FeatureSet. + * @param Outline outline to extract outline-features from + * @param FeatureSet set of features to add outline-features to + */ +void ConvertToOutlineFeatures(MFOUTLINE Outline, FEATURE_SET FeatureSet) { + MFOUTLINE Next; + MFOUTLINE First; + FPOINT FeatureStart; + FPOINT FeatureEnd; + + if (DegenerateOutline(Outline)) { + return; + } + + First = Outline; + Next = First; + do { + FeatureStart = PointAt(Next)->Point; + Next = NextPointAfter(Next); + + /* note that an edge is hidden if the ending point of the edge is + marked as hidden. This situation happens because the order of + the outlines is reversed when they are converted from the old + format. In the old format, a hidden edge is marked by the + starting point for that edge. */ + if (!PointAt(Next)->Hidden) { + FeatureEnd = PointAt(Next)->Point; + AddOutlineFeatureToSet(&FeatureStart, &FeatureEnd, FeatureSet); + } + } while (Next != First); +} /* ConvertToOutlineFeatures */ + +/*---------------------------------------------------------------------------*/ +/** + * This routine computes the weighted average x position + * over all of the outline-features in FeatureSet and then + * renormalizes the outline-features to force this average + * to be the x origin (i.e. x=0). + * FeatureSet is changed. + * @param FeatureSet outline-features to be normalized + */ +void NormalizeOutlineX(FEATURE_SET FeatureSet) { + int i; + FEATURE Feature; + float Length; + float TotalX = 0.0; + float TotalWeight = 0.0; + float Origin; + + if (FeatureSet->NumFeatures <= 0) { + return; + } + + for (i = 0; i < FeatureSet->NumFeatures; i++) { + Feature = FeatureSet->Features[i]; + Length = Feature->Params[OutlineFeatLength]; + TotalX += Feature->Params[OutlineFeatX] * Length; + TotalWeight += Length; + } + Origin = TotalX / TotalWeight; + + for (i = 0; i < FeatureSet->NumFeatures; i++) { + Feature = FeatureSet->Features[i]; + Feature->Params[OutlineFeatX] -= Origin; + } +} /* NormalizeOutlineX */ + +} // namespace tesseract
