comparison mupdf-source/thirdparty/tesseract/src/classify/intmatcher.h @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 /******************************************************************************
2 ** Filename: intmatcher.h
3 ** Purpose: Interface to high level generic classifier routines.
4 ** Author: Robert Moss
5 **
6 ** (c) Copyright Hewlett-Packard Company, 1988.
7 ** Licensed under the Apache License, Version 2.0 (the "License");
8 ** you may not use this file except in compliance with the License.
9 ** You may obtain a copy of the License at
10 ** http://www.apache.org/licenses/LICENSE-2.0
11 ** Unless required by applicable law or agreed to in writing, software
12 ** distributed under the License is distributed on an "AS IS" BASIS,
13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 ** See the License for the specific language governing permissions and
15 ** limitations under the License.
16 ******************************************************************************/
17 #ifndef INTMATCHER_H
18 #define INTMATCHER_H
19
20 #include "intproto.h"
21 #include "params.h"
22
23 namespace tesseract {
24
25 // Character fragments could be present in the trained templaes
26 // but turned on/off on the language-by-language basis or depending
27 // on particular properties of the corpus (e.g. when we expect the
28 // images to have low exposure).
29 extern BOOL_VAR_H(disable_character_fragments);
30
31 extern INT_VAR_H(classify_integer_matcher_multiplier);
32
33 struct UnicharRating;
34
35 struct CP_RESULT_STRUCT {
36 CP_RESULT_STRUCT() : Rating(0.0f), Class(0) {}
37
38 float Rating;
39 CLASS_ID Class;
40 };
41
42 /**----------------------------------------------------------------------------
43 Public Function Prototypes
44 ----------------------------------------------------------------------------**/
45
46 #define SE_TABLE_BITS 9
47 #define SE_TABLE_SIZE 512
48
49 struct ScratchEvidence {
50 uint8_t feature_evidence_[MAX_NUM_CONFIGS];
51 int sum_feature_evidence_[MAX_NUM_CONFIGS];
52 uint8_t proto_evidence_[MAX_NUM_PROTOS][MAX_PROTO_INDEX];
53
54 void Clear(const INT_CLASS_STRUCT *class_template);
55 void ClearFeatureEvidence(const INT_CLASS_STRUCT *class_template);
56 void NormalizeSums(INT_CLASS_STRUCT *ClassTemplate, int16_t NumFeatures);
57 void UpdateSumOfProtoEvidences(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ConfigMask);
58 };
59
60 class IntegerMatcher {
61 public:
62 // Integer Matcher Theta Fudge (0-255).
63 static const int kIntThetaFudge = 128;
64 // Bits in Similarity to Evidence Lookup (8-9).
65 static const int kEvidenceTableBits = 9;
66 // Integer Evidence Truncation Bits (8-14).
67 static const int kIntEvidenceTruncBits = 14;
68 // Similarity to Evidence Table Exponential Multiplier.
69 static const float kSEExponentialMultiplier;
70 // Center of Similarity Curve.
71 static const float kSimilarityCenter;
72
73 IntegerMatcher(tesseract::IntParam *classify_debug_level);
74
75 void Match(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask,
76 int16_t NumFeatures, const INT_FEATURE_STRUCT *Features,
77 tesseract::UnicharRating *Result, int AdaptFeatureThreshold, int Debug,
78 bool SeparateDebugWindows);
79
80 // Applies the CN normalization factor to the given rating and returns
81 // the modified rating.
82 float ApplyCNCorrection(float rating, int blob_length, int normalization_factor,
83 int matcher_multiplier);
84
85 int FindGoodProtos(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask,
86 int16_t NumFeatures, INT_FEATURE_ARRAY Features, PROTO_ID *ProtoArray,
87 int AdaptProtoThreshold, int Debug);
88
89 int FindBadFeatures(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask,
90 int16_t NumFeatures, INT_FEATURE_ARRAY Features, FEATURE_ID *FeatureArray,
91 int AdaptFeatureThreshold, int Debug);
92
93 private:
94 int UpdateTablesForFeature(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask,
95 int FeatureNum, const INT_FEATURE_STRUCT *Feature,
96 ScratchEvidence *evidence, int Debug);
97
98 int FindBestMatch(INT_CLASS_STRUCT *ClassTemplate, const ScratchEvidence &tables,
99 tesseract::UnicharRating *Result);
100
101 #ifndef GRAPHICS_DISABLED
102 void DebugFeatureProtoError(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask,
103 const ScratchEvidence &tables, int16_t NumFeatures, int Debug);
104
105 void DisplayProtoDebugInfo(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ConfigMask,
106 const ScratchEvidence &tables, bool SeparateDebugWindows);
107
108 void DisplayFeatureDebugInfo(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask,
109 int16_t NumFeatures, const INT_FEATURE_STRUCT *Features,
110 int AdaptFeatureThreshold, int Debug, bool SeparateDebugWindows);
111 #endif
112
113 private:
114 tesseract::IntParam *classify_debug_level_;
115 uint8_t similarity_evidence_table_[SE_TABLE_SIZE];
116 uint32_t evidence_table_mask_;
117 uint32_t mult_trunc_shift_bits_;
118 uint32_t table_trunc_shift_bits_;
119 uint32_t evidence_mult_mask_;
120 };
121
122 } // namespace tesseract
123
124 #endif