comparison mupdf-source/thirdparty/tesseract/src/classify/float2int.cpp @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 /******************************************************************************
2 ** Filename: float2int.cpp
3 ** Purpose: Routines for converting float features to int features
4 ** Author: Dan Johnson
5 **
6 ** (c) Copyright Hewlett-Packard Company, 1988.
7 ** Licensed under the Apache License, Version 2.0 (the "License");
8 ** you may not use this file except in compliance with the License.
9 ** You may obtain a copy of the License at
10 ** http://www.apache.org/licenses/LICENSE-2.0
11 ** Unless required by applicable law or agreed to in writing, software
12 ** distributed under the License is distributed on an "AS IS" BASIS,
13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 ** See the License for the specific language governing permissions and
15 ** limitations under the License.
16 ******************************************************************************/
17
18 #include "float2int.h"
19
20 #include "classify.h"
21 #include "mfoutline.h"
22 #include "normmatch.h"
23 #include "picofeat.h"
24
25 #include "helpers.h"
26
27 #define MAX_INT_CHAR_NORM (INT_CHAR_NORM_RANGE - 1)
28
29 /*---------------------------------------------------------------------------*/
30 namespace tesseract {
31
32 /**
33 * For each class in the unicharset, clears the corresponding
34 * entry in char_norm_array. char_norm_array is indexed by unichar_id.
35 *
36 * Globals:
37 * - none
38 *
39 * @param char_norm_array array to be cleared
40 */
41 void Classify::ClearCharNormArray(uint8_t *char_norm_array) {
42 memset(char_norm_array, 0, sizeof(*char_norm_array) * unicharset.size());
43 } /* ClearCharNormArray */
44
45 /*---------------------------------------------------------------------------*/
46 /**
47 * For each class in unicharset, computes the match between
48 * norm_feature and the normalization protos for that class.
49 * Converts this number to the range from 0 - 255 and stores it
50 * into char_norm_array. CharNormArray is indexed by unichar_id.
51 *
52 * Globals:
53 * - PreTrainedTemplates current set of built-in templates
54 *
55 * @param norm_feature character normalization feature
56 * @param[out] char_norm_array place to put results of size unicharset.size()
57 */
58 void Classify::ComputeIntCharNormArray(const FEATURE_STRUCT &norm_feature,
59 uint8_t *char_norm_array) {
60 for (unsigned i = 0; i < unicharset.size(); i++) {
61 if (i < PreTrainedTemplates->NumClasses) {
62 int norm_adjust =
63 static_cast<int>(INT_CHAR_NORM_RANGE * ComputeNormMatch(i, norm_feature, false));
64 char_norm_array[i] = ClipToRange(norm_adjust, 0, MAX_INT_CHAR_NORM);
65 } else {
66 // Classes with no templates (eg. ambigs & ligatures) default
67 // to worst match.
68 char_norm_array[i] = MAX_INT_CHAR_NORM;
69 }
70 }
71 } /* ComputeIntCharNormArray */
72
73 /*---------------------------------------------------------------------------*/
74 /**
75 * This routine converts each floating point pico-feature
76 * in Features into integer format and saves it into
77 * IntFeatures.
78 *
79 * Globals:
80 * - none
81 *
82 * @param Features floating point pico-features to be converted
83 * @param[out] IntFeatures array to put converted features into
84 */
85 void Classify::ComputeIntFeatures(FEATURE_SET Features, INT_FEATURE_ARRAY IntFeatures) {
86 float YShift;
87
88 if (classify_norm_method == baseline) {
89 YShift = BASELINE_Y_SHIFT;
90 } else {
91 YShift = Y_SHIFT;
92 }
93
94 for (int Fid = 0; Fid < Features->NumFeatures; Fid++) {
95 FEATURE Feature = Features->Features[Fid];
96
97 IntFeatures[Fid].X = Bucket8For(Feature->Params[PicoFeatX], X_SHIFT, INT_FEAT_RANGE);
98 IntFeatures[Fid].Y = Bucket8For(Feature->Params[PicoFeatY], YShift, INT_FEAT_RANGE);
99 IntFeatures[Fid].Theta =
100 CircBucketFor(Feature->Params[PicoFeatDir], ANGLE_SHIFT, INT_FEAT_RANGE);
101 IntFeatures[Fid].CP_misses = 0;
102 }
103 } /* ComputeIntFeatures */
104
105 } // namespace tesseract