comparison mupdf-source/thirdparty/tesseract/src/classify/ocrfeatures.cpp @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 /******************************************************************************
2 ** Filename: ocrfeatures.cpp
3 ** Purpose: Generic definition of a feature.
4 ** Author: Dan Johnson
5 **
6 ** (c) Copyright Hewlett-Packard Company, 1988.
7 ** Licensed under the Apache License, Version 2.0 (the "License");
8 ** you may not use this file except in compliance with the License.
9 ** You may obtain a copy of the License at
10 ** http://www.apache.org/licenses/LICENSE-2.0
11 ** Unless required by applicable law or agreed to in writing, software
12 ** distributed under the License is distributed on an "AS IS" BASIS,
13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 ** See the License for the specific language governing permissions and
15 ** limitations under the License.
16 ******************************************************************************/
17
18 #include "ocrfeatures.h"
19
20 #include "scanutils.h"
21
22 #include <cassert>
23 #include <cmath>
24 #include <sstream> // for std::stringstream
25
26 namespace tesseract {
27
28 /*----------------------------------------------------------------------------
29 Public Code
30 ----------------------------------------------------------------------------*/
31 /**
32 * Add a feature to a feature set. If the feature set is
33 * already full, false is returned to indicate that the
34 * feature could not be added to the set; otherwise, true is
35 * returned.
36 * @param FeatureSet set of features to add Feature to
37 * @param Feature feature to be added to FeatureSet
38 * @return true if feature added to set, false if set is already full.
39 */
40 bool AddFeature(FEATURE_SET FeatureSet, FEATURE Feature) {
41 if (FeatureSet->NumFeatures >= FeatureSet->MaxNumFeatures) {
42 delete Feature;
43 return false;
44 }
45
46 FeatureSet->Features[FeatureSet->NumFeatures++] = Feature;
47 return true;
48 } /* AddFeature */
49
50 /**
51 * Create a new feature of the specified type and read in
52 * the value of its parameters from File. The extra penalty
53 * for the feature is also computed by calling the appropriate
54 * function for the specified feature type. The correct text
55 * representation for a feature is a list of N floats where
56 * N is the number of parameters in the feature.
57 * @param File open text file to read feature from
58 * @param FeatureDesc specifies type of feature to read from File
59 * @return New #FEATURE read from File.
60 */
61 static FEATURE ReadFeature(FILE *File, const FEATURE_DESC_STRUCT *FeatureDesc) {
62 auto Feature = new FEATURE_STRUCT(FeatureDesc);
63 for (int i = 0; i < Feature->Type->NumParams; i++) {
64 ASSERT_HOST(tfscanf(File, "%f", &(Feature->Params[i])) == 1);
65 #ifndef _WIN32
66 assert(!std::isnan(Feature->Params[i]));
67 #endif
68 }
69 return Feature;
70 }
71
72 /**
73 * Create a new feature set of the specified type and read in
74 * the features from File. The correct text representation
75 * for a feature set is an integer which specifies the number (N)
76 * of features in a set followed by a list of N feature
77 * descriptions.
78 * @param File open text file to read new feature set from
79 * @param FeatureDesc specifies type of feature to read from File
80 * @return New feature set read from File.
81 */
82 FEATURE_SET ReadFeatureSet(FILE *File, const FEATURE_DESC_STRUCT *FeatureDesc) {
83 int NumFeatures;
84 ASSERT_HOST(tfscanf(File, "%d", &NumFeatures) == 1);
85 ASSERT_HOST(NumFeatures >= 0);
86
87 auto FeatureSet = new FEATURE_SET_STRUCT(NumFeatures);
88 for (int i = 0; i < NumFeatures; i++) {
89 AddFeature(FeatureSet, ReadFeature(File, FeatureDesc));
90 }
91
92 return FeatureSet;
93 }
94
95 /**
96 * Appends a textual representation of Feature to str.
97 * This representation is simply a list of the N parameters
98 * of the feature, terminated with a newline. It is assumed
99 * that the ExtraPenalty field can be reconstructed from the
100 * parameters of the feature. It is also assumed that the
101 * feature type information is specified or assumed elsewhere.
102 * @param Feature feature to write out to str
103 * @param str string to write Feature to
104 */
105 static void WriteFeature(FEATURE Feature, std::string &str) {
106 for (int i = 0; i < Feature->Type->NumParams; i++) {
107 #ifndef WIN32
108 assert(!std::isnan(Feature->Params[i]));
109 #endif
110 std::stringstream stream;
111 // Use "C" locale (needed for double value).
112 stream.imbue(std::locale::classic());
113 // Use 8 digits for double value.
114 stream.precision(8);
115 stream << Feature->Params[i];
116 str += " " + stream.str();
117 }
118 str += "\n";
119 } /* WriteFeature */
120
121 /**
122 * Write a textual representation of FeatureSet to File.
123 * This representation is an integer specifying the number of
124 * features in the set, followed by a newline, followed by
125 * text representations for each feature in the set.
126 * @param FeatureSet feature set to write to File
127 * @param str string to write Feature to
128 */
129 void WriteFeatureSet(FEATURE_SET FeatureSet, std::string &str) {
130 if (FeatureSet) {
131 str += "" + std::to_string(FeatureSet->NumFeatures);
132 str += "\n";
133 for (int i = 0; i < FeatureSet->NumFeatures; i++) {
134 WriteFeature(FeatureSet->Features[i], str);
135 }
136 }
137 } /* WriteFeatureSet */
138
139 } // namespace tesseract