Mercurial > hgrepos > Python2 > PyMuPDF
diff mupdf-source/thirdparty/tesseract/src/classify/float2int.cpp @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mupdf-source/thirdparty/tesseract/src/classify/float2int.cpp Mon Sep 15 11:43:07 2025 +0200 @@ -0,0 +1,105 @@ +/****************************************************************************** + ** Filename: float2int.cpp + ** Purpose: Routines for converting float features to int features + ** Author: Dan Johnson + ** + ** (c) Copyright Hewlett-Packard Company, 1988. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + ******************************************************************************/ + +#include "float2int.h" + +#include "classify.h" +#include "mfoutline.h" +#include "normmatch.h" +#include "picofeat.h" + +#include "helpers.h" + +#define MAX_INT_CHAR_NORM (INT_CHAR_NORM_RANGE - 1) + +/*---------------------------------------------------------------------------*/ +namespace tesseract { + +/** + * For each class in the unicharset, clears the corresponding + * entry in char_norm_array. char_norm_array is indexed by unichar_id. + * + * Globals: + * - none + * + * @param char_norm_array array to be cleared + */ +void Classify::ClearCharNormArray(uint8_t *char_norm_array) { + memset(char_norm_array, 0, sizeof(*char_norm_array) * unicharset.size()); +} /* ClearCharNormArray */ + +/*---------------------------------------------------------------------------*/ +/** + * For each class in unicharset, computes the match between + * norm_feature and the normalization protos for that class. + * Converts this number to the range from 0 - 255 and stores it + * into char_norm_array. CharNormArray is indexed by unichar_id. + * + * Globals: + * - PreTrainedTemplates current set of built-in templates + * + * @param norm_feature character normalization feature + * @param[out] char_norm_array place to put results of size unicharset.size() + */ +void Classify::ComputeIntCharNormArray(const FEATURE_STRUCT &norm_feature, + uint8_t *char_norm_array) { + for (unsigned i = 0; i < unicharset.size(); i++) { + if (i < PreTrainedTemplates->NumClasses) { + int norm_adjust = + static_cast<int>(INT_CHAR_NORM_RANGE * ComputeNormMatch(i, norm_feature, false)); + char_norm_array[i] = ClipToRange(norm_adjust, 0, MAX_INT_CHAR_NORM); + } else { + // Classes with no templates (eg. ambigs & ligatures) default + // to worst match. + char_norm_array[i] = MAX_INT_CHAR_NORM; + } + } +} /* ComputeIntCharNormArray */ + +/*---------------------------------------------------------------------------*/ +/** + * This routine converts each floating point pico-feature + * in Features into integer format and saves it into + * IntFeatures. + * + * Globals: + * - none + * + * @param Features floating point pico-features to be converted + * @param[out] IntFeatures array to put converted features into + */ +void Classify::ComputeIntFeatures(FEATURE_SET Features, INT_FEATURE_ARRAY IntFeatures) { + float YShift; + + if (classify_norm_method == baseline) { + YShift = BASELINE_Y_SHIFT; + } else { + YShift = Y_SHIFT; + } + + for (int Fid = 0; Fid < Features->NumFeatures; Fid++) { + FEATURE Feature = Features->Features[Fid]; + + IntFeatures[Fid].X = Bucket8For(Feature->Params[PicoFeatX], X_SHIFT, INT_FEAT_RANGE); + IntFeatures[Fid].Y = Bucket8For(Feature->Params[PicoFeatY], YShift, INT_FEAT_RANGE); + IntFeatures[Fid].Theta = + CircBucketFor(Feature->Params[PicoFeatDir], ANGLE_SHIFT, INT_FEAT_RANGE); + IntFeatures[Fid].CP_misses = 0; + } +} /* ComputeIntFeatures */ + +} // namespace tesseract
