Mercurial > hgrepos > Python2 > PyMuPDF
view mupdf-source/thirdparty/tesseract/src/classify/intmatcher.h @ 21:2f43e400f144
Provide an "all" target to build both the sdist and the wheel
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Fri, 19 Sep 2025 10:28:53 +0200 |
| parents | b50eed0cc0ef |
| children |
line wrap: on
line source
/****************************************************************************** ** Filename: intmatcher.h ** Purpose: Interface to high level generic classifier routines. ** Author: Robert Moss ** ** (c) Copyright Hewlett-Packard Company, 1988. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at ** http://www.apache.org/licenses/LICENSE-2.0 ** Unless required by applicable law or agreed to in writing, software ** distributed under the License is distributed on an "AS IS" BASIS, ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ** See the License for the specific language governing permissions and ** limitations under the License. ******************************************************************************/ #ifndef INTMATCHER_H #define INTMATCHER_H #include "intproto.h" #include "params.h" namespace tesseract { // Character fragments could be present in the trained templaes // but turned on/off on the language-by-language basis or depending // on particular properties of the corpus (e.g. when we expect the // images to have low exposure). extern BOOL_VAR_H(disable_character_fragments); extern INT_VAR_H(classify_integer_matcher_multiplier); struct UnicharRating; struct CP_RESULT_STRUCT { CP_RESULT_STRUCT() : Rating(0.0f), Class(0) {} float Rating; CLASS_ID Class; }; /**---------------------------------------------------------------------------- Public Function Prototypes ----------------------------------------------------------------------------**/ #define SE_TABLE_BITS 9 #define SE_TABLE_SIZE 512 struct ScratchEvidence { uint8_t feature_evidence_[MAX_NUM_CONFIGS]; int sum_feature_evidence_[MAX_NUM_CONFIGS]; uint8_t proto_evidence_[MAX_NUM_PROTOS][MAX_PROTO_INDEX]; void Clear(const INT_CLASS_STRUCT *class_template); void ClearFeatureEvidence(const INT_CLASS_STRUCT *class_template); void NormalizeSums(INT_CLASS_STRUCT *ClassTemplate, int16_t NumFeatures); void UpdateSumOfProtoEvidences(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ConfigMask); }; class IntegerMatcher { public: // Integer Matcher Theta Fudge (0-255). static const int kIntThetaFudge = 128; // Bits in Similarity to Evidence Lookup (8-9). static const int kEvidenceTableBits = 9; // Integer Evidence Truncation Bits (8-14). static const int kIntEvidenceTruncBits = 14; // Similarity to Evidence Table Exponential Multiplier. static const float kSEExponentialMultiplier; // Center of Similarity Curve. static const float kSimilarityCenter; IntegerMatcher(tesseract::IntParam *classify_debug_level); void Match(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, int16_t NumFeatures, const INT_FEATURE_STRUCT *Features, tesseract::UnicharRating *Result, int AdaptFeatureThreshold, int Debug, bool SeparateDebugWindows); // Applies the CN normalization factor to the given rating and returns // the modified rating. float ApplyCNCorrection(float rating, int blob_length, int normalization_factor, int matcher_multiplier); int FindGoodProtos(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, int16_t NumFeatures, INT_FEATURE_ARRAY Features, PROTO_ID *ProtoArray, int AdaptProtoThreshold, int Debug); int FindBadFeatures(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, int16_t NumFeatures, INT_FEATURE_ARRAY Features, FEATURE_ID *FeatureArray, int AdaptFeatureThreshold, int Debug); private: int UpdateTablesForFeature(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, int FeatureNum, const INT_FEATURE_STRUCT *Feature, ScratchEvidence *evidence, int Debug); int FindBestMatch(INT_CLASS_STRUCT *ClassTemplate, const ScratchEvidence &tables, tesseract::UnicharRating *Result); #ifndef GRAPHICS_DISABLED void DebugFeatureProtoError(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, const ScratchEvidence &tables, int16_t NumFeatures, int Debug); void DisplayProtoDebugInfo(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ConfigMask, const ScratchEvidence &tables, bool SeparateDebugWindows); void DisplayFeatureDebugInfo(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, int16_t NumFeatures, const INT_FEATURE_STRUCT *Features, int AdaptFeatureThreshold, int Debug, bool SeparateDebugWindows); #endif private: tesseract::IntParam *classify_debug_level_; uint8_t similarity_evidence_table_[SE_TABLE_SIZE]; uint32_t evidence_table_mask_; uint32_t mult_trunc_shift_bits_; uint32_t table_trunc_shift_bits_; uint32_t evidence_mult_mask_; }; } // namespace tesseract #endif
