Mercurial > hgrepos > Python2 > PyMuPDF
diff mupdf-source/thirdparty/tesseract/src/classify/featdefs.cpp @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mupdf-source/thirdparty/tesseract/src/classify/featdefs.cpp Mon Sep 15 11:43:07 2025 +0200 @@ -0,0 +1,213 @@ +/****************************************************************************** + ** Filename: featdefs.cpp + ** Purpose: Definitions of currently defined feature types. + ** Author: Dan Johnson + ** + ** (c) Copyright Hewlett-Packard Company, 1988. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + ******************************************************************************/ + +#include "featdefs.h" + +#include "picofeat.h" // for PicoFeatureLength +#include "scanutils.h" + +#include <cstdio> +#include <cstring> + +namespace tesseract { + +#define PICO_FEATURE_LENGTH 0.05 + +/*----------------------------------------------------------------------------- + Global Data Definitions and Declarations +-----------------------------------------------------------------------------*/ +const char *const kMicroFeatureType = "mf"; +const char *const kCNFeatureType = "cn"; +const char *const kIntFeatureType = "if"; +const char *const kGeoFeatureType = "tb"; + +// Define all of the parameters for the MicroFeature type. +StartParamDesc(MicroFeatureParams) DefineParam(0, 0, -0.5, 0.5) DefineParam(0, 0, -0.25, 0.75) + DefineParam(0, 1, 0.0, 1.0) DefineParam(1, 0, 0.0, 1.0) DefineParam(0, 1, -0.5, 0.5) + DefineParam(0, 1, -0.5, 0.5) EndParamDesc + // Now define the feature type itself (see features.h for parameters). + DefineFeature(MicroFeatureDesc, 5, 1, kMicroFeatureType, MicroFeatureParams) + + // Define all of the parameters for the NormFeat type. + StartParamDesc(CharNormParams) DefineParam(0, 0, -0.25, 0.75) DefineParam(0, 1, 0.0, 1.0) + DefineParam(0, 0, 0.0, 1.0) DefineParam(0, 0, 0.0, 1.0) EndParamDesc + // Now define the feature type itself (see features.h for parameters). + DefineFeature(CharNormDesc, 4, 0, kCNFeatureType, CharNormParams) + + // Define all of the parameters for the IntFeature type + StartParamDesc(IntFeatParams) DefineParam(0, 0, 0.0, 255.0) DefineParam(0, 0, 0.0, 255.0) + DefineParam(1, 0, 0.0, 255.0) EndParamDesc + // Now define the feature type itself (see features.h for parameters). + DefineFeature(IntFeatDesc, 2, 1, kIntFeatureType, IntFeatParams) + + // Define all of the parameters for the GeoFeature type + StartParamDesc(GeoFeatParams) DefineParam(0, 0, 0.0, 255.0) DefineParam(0, 0, 0.0, 255.0) + DefineParam(0, 0, 0.0, 255.0) EndParamDesc + // Now define the feature type itself (see features.h for parameters). + DefineFeature(GeoFeatDesc, 3, 0, kGeoFeatureType, GeoFeatParams) + + // Other features used for training the adaptive classifier, but not used + // during normal training, therefore not in the DescDefs array. + + // Define all of the parameters for the PicoFeature type + // define knob that can be used to adjust pico-feature length. + float PicoFeatureLength = PICO_FEATURE_LENGTH; +StartParamDesc(PicoFeatParams) DefineParam(0, 0, -0.25, 0.75) DefineParam(1, 0, 0.0, 1.0) + DefineParam(0, 0, -0.5, 0.5) EndParamDesc + // Now define the feature type itself (see features.h for parameters). + DefineFeature(PicoFeatDesc, 2, 1, "pf", PicoFeatParams) + + // Define all of the parameters for the OutlineFeature type. + StartParamDesc(OutlineFeatParams) DefineParam(0, 0, -0.5, 0.5) DefineParam(0, 0, -0.25, 0.75) + DefineParam(0, 0, 0.0, 1.0) DefineParam(1, 0, 0.0, 1.0) EndParamDesc + // Now define the feature type itself (see features.h for parameters). + DefineFeature(OutlineFeatDesc, 3, 1, "of", OutlineFeatParams) + + // MUST be kept in-sync with ExtractorDefs in fxdefs.cpp. + static const FEATURE_DESC_STRUCT *DescDefs[NUM_FEATURE_TYPES] = { + &MicroFeatureDesc, &CharNormDesc, &IntFeatDesc, &GeoFeatDesc}; + +/*----------------------------------------------------------------------------- + Public Code +-----------------------------------------------------------------------------*/ +void InitFeatureDefs(FEATURE_DEFS_STRUCT *featuredefs) { + featuredefs->NumFeatureTypes = NUM_FEATURE_TYPES; + for (int i = 0; i < NUM_FEATURE_TYPES; ++i) { + featuredefs->FeatureDesc[i] = DescDefs[i]; + } +} + +/*---------------------------------------------------------------------------*/ +/** + * Appends a textual representation of CharDesc to str. + * The format used is to write out the number of feature + * sets which will be written followed by a representation of + * each feature set. + * + * Each set starts with the short name for that feature followed + * by a description of the feature set. Feature sets which are + * not present are not written. + * + * @param FeatureDefs definitions of feature types/extractors + * @param str string to append CharDesc to + * @param CharDesc character description to write to File + */ +void WriteCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs, CHAR_DESC_STRUCT *CharDesc, std::string &str) { + int NumSetsToWrite = 0; + + for (size_t Type = 0; Type < CharDesc->NumFeatureSets; Type++) { + if (CharDesc->FeatureSets[Type]) { + NumSetsToWrite++; + } + } + + str += " " + std::to_string(NumSetsToWrite); + str += "\n"; + for (size_t Type = 0; Type < CharDesc->NumFeatureSets; Type++) { + if (CharDesc->FeatureSets[Type]) { + str += FeatureDefs.FeatureDesc[Type]->ShortName; + str += " "; + WriteFeatureSet(CharDesc->FeatureSets[Type], str); + } + } +} /* WriteCharDescription */ + +// Return whether all of the fields of the given feature set +// are well defined (not inf or nan). +bool ValidCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs, CHAR_DESC_STRUCT *CharDesc) { + bool anything_written = false; + bool well_formed = true; + for (size_t Type = 0; Type < CharDesc->NumFeatureSets; Type++) { + if (CharDesc->FeatureSets[Type]) { + for (int i = 0; i < CharDesc->FeatureSets[Type]->NumFeatures; i++) { + FEATURE feat = CharDesc->FeatureSets[Type]->Features[i]; + for (int p = 0; p < feat->Type->NumParams; p++) { + if (std::isnan(feat->Params[p]) || std::isinf(feat->Params[p])) { + well_formed = false; + } else { + anything_written = true; + } + } + } + } else { + return false; + } + } + return anything_written && well_formed; +} /* ValidCharDescription */ + +/*---------------------------------------------------------------------------*/ +/** + * Read a character description from File, and return + * a data structure containing this information. The data + * is formatted as follows: + * @verbatim + NumberOfSets + ShortNameForSet1 Set1 + ShortNameForSet2 Set2 + ... + @endverbatim + * + * Globals: + * - none + * + * @param FeatureDefs definitions of feature types/extractors + * @param File open text file to read character description from + * @return Character description read from File. + */ +CHAR_DESC_STRUCT *ReadCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs, FILE *File) { + int NumSetsToRead; + char ShortName[FEAT_NAME_SIZE]; + int Type; + + ASSERT_HOST(tfscanf(File, "%d", &NumSetsToRead) == 1); + ASSERT_HOST(NumSetsToRead >= 0); + ASSERT_HOST(NumSetsToRead <= FeatureDefs.NumFeatureTypes); + + auto CharDesc = new CHAR_DESC_STRUCT(FeatureDefs); + for (; NumSetsToRead > 0; NumSetsToRead--) { + tfscanf(File, "%s", ShortName); + Type = ShortNameToFeatureType(FeatureDefs, ShortName); + CharDesc->FeatureSets[Type] = ReadFeatureSet(File, FeatureDefs.FeatureDesc[Type]); + } + return CharDesc; +} + +/*---------------------------------------------------------------------------*/ +/** + * Search through all features currently defined and return + * the feature type for the feature with the specified short + * name. Trap an error if the specified name is not found. + * + * Globals: + * - none + * + * @param FeatureDefs definitions of feature types/extractors + * @param ShortName short name of a feature type + * @return Feature type which corresponds to ShortName. + */ +uint32_t ShortNameToFeatureType(const FEATURE_DEFS_STRUCT &FeatureDefs, const char *ShortName) { + for (int i = 0; i < FeatureDefs.NumFeatureTypes; i++) { + if (!strcmp((FeatureDefs.FeatureDesc[i]->ShortName), ShortName)) { + return static_cast<uint32_t>(i); + } + } + ASSERT_HOST(!"Illegal short name for a feature"); + return 0; +} + +} // namespace tesseract
