Mercurial > hgrepos > Python2 > PyMuPDF
diff mupdf-source/thirdparty/tesseract/src/classify/ocrfeatures.cpp @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mupdf-source/thirdparty/tesseract/src/classify/ocrfeatures.cpp Mon Sep 15 11:43:07 2025 +0200 @@ -0,0 +1,139 @@ +/****************************************************************************** + ** Filename: ocrfeatures.cpp + ** Purpose: Generic definition of a feature. + ** Author: Dan Johnson + ** + ** (c) Copyright Hewlett-Packard Company, 1988. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + ******************************************************************************/ + +#include "ocrfeatures.h" + +#include "scanutils.h" + +#include <cassert> +#include <cmath> +#include <sstream> // for std::stringstream + +namespace tesseract { + +/*---------------------------------------------------------------------------- + Public Code +----------------------------------------------------------------------------*/ +/** + * Add a feature to a feature set. If the feature set is + * already full, false is returned to indicate that the + * feature could not be added to the set; otherwise, true is + * returned. + * @param FeatureSet set of features to add Feature to + * @param Feature feature to be added to FeatureSet + * @return true if feature added to set, false if set is already full. + */ +bool AddFeature(FEATURE_SET FeatureSet, FEATURE Feature) { + if (FeatureSet->NumFeatures >= FeatureSet->MaxNumFeatures) { + delete Feature; + return false; + } + + FeatureSet->Features[FeatureSet->NumFeatures++] = Feature; + return true; +} /* AddFeature */ + +/** + * Create a new feature of the specified type and read in + * the value of its parameters from File. The extra penalty + * for the feature is also computed by calling the appropriate + * function for the specified feature type. The correct text + * representation for a feature is a list of N floats where + * N is the number of parameters in the feature. + * @param File open text file to read feature from + * @param FeatureDesc specifies type of feature to read from File + * @return New #FEATURE read from File. + */ +static FEATURE ReadFeature(FILE *File, const FEATURE_DESC_STRUCT *FeatureDesc) { + auto Feature = new FEATURE_STRUCT(FeatureDesc); + for (int i = 0; i < Feature->Type->NumParams; i++) { + ASSERT_HOST(tfscanf(File, "%f", &(Feature->Params[i])) == 1); +#ifndef _WIN32 + assert(!std::isnan(Feature->Params[i])); +#endif + } + return Feature; +} + +/** + * Create a new feature set of the specified type and read in + * the features from File. The correct text representation + * for a feature set is an integer which specifies the number (N) + * of features in a set followed by a list of N feature + * descriptions. + * @param File open text file to read new feature set from + * @param FeatureDesc specifies type of feature to read from File + * @return New feature set read from File. + */ +FEATURE_SET ReadFeatureSet(FILE *File, const FEATURE_DESC_STRUCT *FeatureDesc) { + int NumFeatures; + ASSERT_HOST(tfscanf(File, "%d", &NumFeatures) == 1); + ASSERT_HOST(NumFeatures >= 0); + + auto FeatureSet = new FEATURE_SET_STRUCT(NumFeatures); + for (int i = 0; i < NumFeatures; i++) { + AddFeature(FeatureSet, ReadFeature(File, FeatureDesc)); + } + + return FeatureSet; +} + +/** + * Appends a textual representation of Feature to str. + * This representation is simply a list of the N parameters + * of the feature, terminated with a newline. It is assumed + * that the ExtraPenalty field can be reconstructed from the + * parameters of the feature. It is also assumed that the + * feature type information is specified or assumed elsewhere. + * @param Feature feature to write out to str + * @param str string to write Feature to + */ +static void WriteFeature(FEATURE Feature, std::string &str) { + for (int i = 0; i < Feature->Type->NumParams; i++) { +#ifndef WIN32 + assert(!std::isnan(Feature->Params[i])); +#endif + std::stringstream stream; + // Use "C" locale (needed for double value). + stream.imbue(std::locale::classic()); + // Use 8 digits for double value. + stream.precision(8); + stream << Feature->Params[i]; + str += " " + stream.str(); + } + str += "\n"; +} /* WriteFeature */ + +/** + * Write a textual representation of FeatureSet to File. + * This representation is an integer specifying the number of + * features in the set, followed by a newline, followed by + * text representations for each feature in the set. + * @param FeatureSet feature set to write to File + * @param str string to write Feature to + */ +void WriteFeatureSet(FEATURE_SET FeatureSet, std::string &str) { + if (FeatureSet) { + str += "" + std::to_string(FeatureSet->NumFeatures); + str += "\n"; + for (int i = 0; i < FeatureSet->NumFeatures; i++) { + WriteFeature(FeatureSet->Features[i], str); + } + } +} /* WriteFeatureSet */ + +} // namespace tesseract
