Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/tesseract/src/classify/intmatcher.h @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /****************************************************************************** | |
| 2 ** Filename: intmatcher.h | |
| 3 ** Purpose: Interface to high level generic classifier routines. | |
| 4 ** Author: Robert Moss | |
| 5 ** | |
| 6 ** (c) Copyright Hewlett-Packard Company, 1988. | |
| 7 ** Licensed under the Apache License, Version 2.0 (the "License"); | |
| 8 ** you may not use this file except in compliance with the License. | |
| 9 ** You may obtain a copy of the License at | |
| 10 ** http://www.apache.org/licenses/LICENSE-2.0 | |
| 11 ** Unless required by applicable law or agreed to in writing, software | |
| 12 ** distributed under the License is distributed on an "AS IS" BASIS, | |
| 13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 14 ** See the License for the specific language governing permissions and | |
| 15 ** limitations under the License. | |
| 16 ******************************************************************************/ | |
| 17 #ifndef INTMATCHER_H | |
| 18 #define INTMATCHER_H | |
| 19 | |
| 20 #include "intproto.h" | |
| 21 #include "params.h" | |
| 22 | |
| 23 namespace tesseract { | |
| 24 | |
| 25 // Character fragments could be present in the trained templaes | |
| 26 // but turned on/off on the language-by-language basis or depending | |
| 27 // on particular properties of the corpus (e.g. when we expect the | |
| 28 // images to have low exposure). | |
| 29 extern BOOL_VAR_H(disable_character_fragments); | |
| 30 | |
| 31 extern INT_VAR_H(classify_integer_matcher_multiplier); | |
| 32 | |
| 33 struct UnicharRating; | |
| 34 | |
| 35 struct CP_RESULT_STRUCT { | |
| 36 CP_RESULT_STRUCT() : Rating(0.0f), Class(0) {} | |
| 37 | |
| 38 float Rating; | |
| 39 CLASS_ID Class; | |
| 40 }; | |
| 41 | |
| 42 /**---------------------------------------------------------------------------- | |
| 43 Public Function Prototypes | |
| 44 ----------------------------------------------------------------------------**/ | |
| 45 | |
| 46 #define SE_TABLE_BITS 9 | |
| 47 #define SE_TABLE_SIZE 512 | |
| 48 | |
| 49 struct ScratchEvidence { | |
| 50 uint8_t feature_evidence_[MAX_NUM_CONFIGS]; | |
| 51 int sum_feature_evidence_[MAX_NUM_CONFIGS]; | |
| 52 uint8_t proto_evidence_[MAX_NUM_PROTOS][MAX_PROTO_INDEX]; | |
| 53 | |
| 54 void Clear(const INT_CLASS_STRUCT *class_template); | |
| 55 void ClearFeatureEvidence(const INT_CLASS_STRUCT *class_template); | |
| 56 void NormalizeSums(INT_CLASS_STRUCT *ClassTemplate, int16_t NumFeatures); | |
| 57 void UpdateSumOfProtoEvidences(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ConfigMask); | |
| 58 }; | |
| 59 | |
| 60 class IntegerMatcher { | |
| 61 public: | |
| 62 // Integer Matcher Theta Fudge (0-255). | |
| 63 static const int kIntThetaFudge = 128; | |
| 64 // Bits in Similarity to Evidence Lookup (8-9). | |
| 65 static const int kEvidenceTableBits = 9; | |
| 66 // Integer Evidence Truncation Bits (8-14). | |
| 67 static const int kIntEvidenceTruncBits = 14; | |
| 68 // Similarity to Evidence Table Exponential Multiplier. | |
| 69 static const float kSEExponentialMultiplier; | |
| 70 // Center of Similarity Curve. | |
| 71 static const float kSimilarityCenter; | |
| 72 | |
| 73 IntegerMatcher(tesseract::IntParam *classify_debug_level); | |
| 74 | |
| 75 void Match(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, | |
| 76 int16_t NumFeatures, const INT_FEATURE_STRUCT *Features, | |
| 77 tesseract::UnicharRating *Result, int AdaptFeatureThreshold, int Debug, | |
| 78 bool SeparateDebugWindows); | |
| 79 | |
| 80 // Applies the CN normalization factor to the given rating and returns | |
| 81 // the modified rating. | |
| 82 float ApplyCNCorrection(float rating, int blob_length, int normalization_factor, | |
| 83 int matcher_multiplier); | |
| 84 | |
| 85 int FindGoodProtos(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, | |
| 86 int16_t NumFeatures, INT_FEATURE_ARRAY Features, PROTO_ID *ProtoArray, | |
| 87 int AdaptProtoThreshold, int Debug); | |
| 88 | |
| 89 int FindBadFeatures(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, | |
| 90 int16_t NumFeatures, INT_FEATURE_ARRAY Features, FEATURE_ID *FeatureArray, | |
| 91 int AdaptFeatureThreshold, int Debug); | |
| 92 | |
| 93 private: | |
| 94 int UpdateTablesForFeature(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, | |
| 95 int FeatureNum, const INT_FEATURE_STRUCT *Feature, | |
| 96 ScratchEvidence *evidence, int Debug); | |
| 97 | |
| 98 int FindBestMatch(INT_CLASS_STRUCT *ClassTemplate, const ScratchEvidence &tables, | |
| 99 tesseract::UnicharRating *Result); | |
| 100 | |
| 101 #ifndef GRAPHICS_DISABLED | |
| 102 void DebugFeatureProtoError(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, | |
| 103 const ScratchEvidence &tables, int16_t NumFeatures, int Debug); | |
| 104 | |
| 105 void DisplayProtoDebugInfo(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ConfigMask, | |
| 106 const ScratchEvidence &tables, bool SeparateDebugWindows); | |
| 107 | |
| 108 void DisplayFeatureDebugInfo(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, | |
| 109 int16_t NumFeatures, const INT_FEATURE_STRUCT *Features, | |
| 110 int AdaptFeatureThreshold, int Debug, bool SeparateDebugWindows); | |
| 111 #endif | |
| 112 | |
| 113 private: | |
| 114 tesseract::IntParam *classify_debug_level_; | |
| 115 uint8_t similarity_evidence_table_[SE_TABLE_SIZE]; | |
| 116 uint32_t evidence_table_mask_; | |
| 117 uint32_t mult_trunc_shift_bits_; | |
| 118 uint32_t table_trunc_shift_bits_; | |
| 119 uint32_t evidence_mult_mask_; | |
| 120 }; | |
| 121 | |
| 122 } // namespace tesseract | |
| 123 | |
| 124 #endif |
