Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/tesseract/src/classify/clusttool.cpp @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /****************************************************************************** | |
| 2 ** Filename: clusttool.cpp | |
| 3 ** Purpose: Misc. tools for use with the clustering routines | |
| 4 ** Author: Dan Johnson | |
| 5 ** | |
| 6 ** (c) Copyright Hewlett-Packard Company, 1988. | |
| 7 ** Licensed under the Apache License, Version 2.0 (the "License"); | |
| 8 ** you may not use this file except in compliance with the License. | |
| 9 ** You may obtain a copy of the License at | |
| 10 ** http://www.apache.org/licenses/LICENSE-2.0 | |
| 11 ** Unless required by applicable law or agreed to in writing, software | |
| 12 ** distributed under the License is distributed on an "AS IS" BASIS, | |
| 13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 14 ** See the License for the specific language governing permissions and | |
| 15 ** limitations under the License. | |
| 16 *****************************************************************************/ | |
| 17 | |
| 18 #define _USE_MATH_DEFINES // for M_PI | |
| 19 | |
| 20 #include "clusttool.h" | |
| 21 | |
| 22 #include <cmath> // for M_PI, std::isnan | |
| 23 #include <locale> // for std::locale::classic | |
| 24 #include <sstream> // for std::stringstream | |
| 25 | |
| 26 namespace tesseract { | |
| 27 | |
| 28 //---------------Global Data Definitions and Declarations-------------------- | |
| 29 #define TOKENSIZE 80 ///< max size of tokens read from an input file | |
| 30 #define QUOTED_TOKENSIZE "79" | |
| 31 #define MAXSAMPLESIZE 65535 ///< max num of dimensions in feature space | |
| 32 | |
| 33 /** | |
| 34 * This routine reads N floats from the specified text file | |
| 35 * and places them into Buffer. If Buffer is nullptr, a buffer | |
| 36 * is created and passed back to the caller. If EOF is | |
| 37 * encountered before any floats can be read, nullptr is | |
| 38 * returned. | |
| 39 * @param fp open text file to read floats from | |
| 40 * @param N number of floats to read | |
| 41 * @param Buffer pointer to buffer to place floats into | |
| 42 * @return Pointer to buffer holding floats or nullptr if EOF | |
| 43 * @note Globals: None | |
| 44 */ | |
| 45 static bool ReadNFloats(TFile *fp, uint16_t N, float Buffer[]) { | |
| 46 const int kMaxLineSize = 1024; | |
| 47 char line[kMaxLineSize]; | |
| 48 if (fp->FGets(line, kMaxLineSize) == nullptr) { | |
| 49 tprintf("Hit EOF in ReadNFloats!\n"); | |
| 50 return false; | |
| 51 } | |
| 52 | |
| 53 std::stringstream stream(line); | |
| 54 // Use "C" locale (needed for float values Buffer[i]). | |
| 55 stream.imbue(std::locale::classic()); | |
| 56 for (uint16_t i = 0; i < N; i++) { | |
| 57 float f = NAN; | |
| 58 stream >> f; | |
| 59 if (std::isnan(f)) { | |
| 60 tprintf("Read of %u floats failed!\n", N); | |
| 61 return false; | |
| 62 } | |
| 63 Buffer[i] = f; | |
| 64 } | |
| 65 return true; | |
| 66 } | |
| 67 | |
| 68 /** | |
| 69 * This routine writes a text representation of N floats from | |
| 70 * an array to a file. All of the floats are placed on one line. | |
| 71 * @param File open text file to write N floats to | |
| 72 * @param N number of floats to write | |
| 73 * @param Array array of floats to write | |
| 74 */ | |
| 75 static void WriteNFloats(FILE *File, uint16_t N, float Array[]) { | |
| 76 for (int i = 0; i < N; i++) { | |
| 77 fprintf(File, " %9.6f", Array[i]); | |
| 78 } | |
| 79 fprintf(File, "\n"); | |
| 80 } | |
| 81 | |
| 82 /** | |
| 83 * This routine writes to the specified text file a word | |
| 84 * which represents the ProtoStyle. It does not append | |
| 85 * a carriage return to the end. | |
| 86 * @param File open text file to write prototype style to | |
| 87 * @param ProtoStyle prototype style to write | |
| 88 */ | |
| 89 static void WriteProtoStyle(FILE *File, PROTOSTYLE ProtoStyle) { | |
| 90 switch (ProtoStyle) { | |
| 91 case spherical: | |
| 92 fprintf(File, "spherical"); | |
| 93 break; | |
| 94 case elliptical: | |
| 95 fprintf(File, "elliptical"); | |
| 96 break; | |
| 97 case mixed: | |
| 98 fprintf(File, "mixed"); | |
| 99 break; | |
| 100 case automatic: | |
| 101 fprintf(File, "automatic"); | |
| 102 break; | |
| 103 } | |
| 104 } | |
| 105 | |
| 106 /** | |
| 107 * This routine reads a single integer from the specified | |
| 108 * file and checks to ensure that it is between 0 and | |
| 109 * MAXSAMPLESIZE. | |
| 110 * @param fp open text file to read sample size from | |
| 111 * @return Sample size | |
| 112 * @note Globals: None | |
| 113 */ | |
| 114 uint16_t ReadSampleSize(TFile *fp) { | |
| 115 int SampleSize = 0; | |
| 116 | |
| 117 const int kMaxLineSize = 100; | |
| 118 char line[kMaxLineSize]; | |
| 119 ASSERT_HOST(fp->FGets(line, kMaxLineSize) != nullptr); | |
| 120 ASSERT_HOST(sscanf(line, "%d", &SampleSize) == 1); | |
| 121 ASSERT_HOST(SampleSize >= 0 && SampleSize <= MAXSAMPLESIZE); | |
| 122 return SampleSize; | |
| 123 } | |
| 124 | |
| 125 /** | |
| 126 * This routine reads textual descriptions of sets of parameters | |
| 127 * which describe the characteristics of feature dimensions. | |
| 128 * | |
| 129 * @param fp open text file to read N parameter descriptions from | |
| 130 * @param N number of parameter descriptions to read | |
| 131 * @return Pointer to an array of parameter descriptors. | |
| 132 * @note Globals: None | |
| 133 */ | |
| 134 PARAM_DESC *ReadParamDesc(TFile *fp, uint16_t N) { | |
| 135 auto ParamDesc = new PARAM_DESC[N]; | |
| 136 for (int i = 0; i < N; i++) { | |
| 137 const int kMaxLineSize = TOKENSIZE * 4; | |
| 138 char line[kMaxLineSize]; | |
| 139 ASSERT_HOST(fp->FGets(line, kMaxLineSize) != nullptr); | |
| 140 std::istringstream stream(line); | |
| 141 // Use "C" locale (needed for float values Min, Max). | |
| 142 stream.imbue(std::locale::classic()); | |
| 143 std::string linear_token; | |
| 144 stream >> linear_token; | |
| 145 std::string essential_token; | |
| 146 stream >> essential_token; | |
| 147 stream >> ParamDesc[i].Min; | |
| 148 stream >> ParamDesc[i].Max; | |
| 149 ASSERT_HOST(!stream.fail()); | |
| 150 ParamDesc[i].Circular = (linear_token[0] == 'c'); | |
| 151 ParamDesc[i].NonEssential = (essential_token[0] != 'e'); | |
| 152 ParamDesc[i].Range = ParamDesc[i].Max - ParamDesc[i].Min; | |
| 153 ParamDesc[i].HalfRange = ParamDesc[i].Range / 2; | |
| 154 ParamDesc[i].MidRange = (ParamDesc[i].Max + ParamDesc[i].Min) / 2; | |
| 155 } | |
| 156 return (ParamDesc); | |
| 157 } | |
| 158 | |
| 159 /** | |
| 160 * This routine reads a textual description of a prototype from | |
| 161 * the specified file. | |
| 162 * | |
| 163 * @param fp open text file to read prototype from | |
| 164 * @param N number of dimensions used in prototype | |
| 165 * @return List of prototypes | |
| 166 * @note Globals: None | |
| 167 */ | |
| 168 PROTOTYPE *ReadPrototype(TFile *fp, uint16_t N) { | |
| 169 char sig_token[TOKENSIZE], shape_token[TOKENSIZE]; | |
| 170 int SampleCount; | |
| 171 int i; | |
| 172 | |
| 173 const int kMaxLineSize = TOKENSIZE * 4; | |
| 174 char line[kMaxLineSize]; | |
| 175 if (fp->FGets(line, kMaxLineSize) == nullptr || | |
| 176 sscanf(line, "%" QUOTED_TOKENSIZE "s %" QUOTED_TOKENSIZE "s %d", sig_token, shape_token, | |
| 177 &SampleCount) != 3) { | |
| 178 tprintf("Invalid prototype: %s\n", line); | |
| 179 return nullptr; | |
| 180 } | |
| 181 auto Proto = new PROTOTYPE; | |
| 182 Proto->Cluster = nullptr; | |
| 183 Proto->Significant = (sig_token[0] == 's'); | |
| 184 | |
| 185 switch (shape_token[0]) { | |
| 186 case 's': | |
| 187 Proto->Style = spherical; | |
| 188 break; | |
| 189 case 'e': | |
| 190 Proto->Style = elliptical; | |
| 191 break; | |
| 192 case 'a': | |
| 193 Proto->Style = automatic; | |
| 194 break; | |
| 195 default: | |
| 196 tprintf("Invalid prototype style specification:%s\n", shape_token); | |
| 197 Proto->Style = elliptical; | |
| 198 } | |
| 199 | |
| 200 ASSERT_HOST(SampleCount >= 0); | |
| 201 Proto->NumSamples = SampleCount; | |
| 202 | |
| 203 Proto->Mean.resize(N); | |
| 204 ReadNFloats(fp, N, &Proto->Mean[0]); | |
| 205 | |
| 206 switch (Proto->Style) { | |
| 207 case spherical: | |
| 208 ReadNFloats(fp, 1, &(Proto->Variance.Spherical)); | |
| 209 Proto->Magnitude.Spherical = 1.0 / sqrt(2.0 * M_PI * Proto->Variance.Spherical); | |
| 210 Proto->TotalMagnitude = std::pow(Proto->Magnitude.Spherical, static_cast<float>(N)); | |
| 211 Proto->LogMagnitude = log(static_cast<double>(Proto->TotalMagnitude)); | |
| 212 Proto->Weight.Spherical = 1.0 / Proto->Variance.Spherical; | |
| 213 Proto->Distrib.clear(); | |
| 214 break; | |
| 215 case elliptical: | |
| 216 Proto->Variance.Elliptical = new float[N]; | |
| 217 ReadNFloats(fp, N, Proto->Variance.Elliptical); | |
| 218 Proto->Magnitude.Elliptical = new float[N]; | |
| 219 Proto->Weight.Elliptical = new float[N]; | |
| 220 Proto->TotalMagnitude = 1.0; | |
| 221 for (i = 0; i < N; i++) { | |
| 222 Proto->Magnitude.Elliptical[i] = 1.0f / sqrt(2.0f * M_PI * Proto->Variance.Elliptical[i]); | |
| 223 Proto->Weight.Elliptical[i] = 1.0f / Proto->Variance.Elliptical[i]; | |
| 224 Proto->TotalMagnitude *= Proto->Magnitude.Elliptical[i]; | |
| 225 } | |
| 226 Proto->LogMagnitude = log(static_cast<double>(Proto->TotalMagnitude)); | |
| 227 Proto->Distrib.clear(); | |
| 228 break; | |
| 229 default: | |
| 230 delete Proto; | |
| 231 tprintf("Invalid prototype style\n"); | |
| 232 return nullptr; | |
| 233 } | |
| 234 return Proto; | |
| 235 } | |
| 236 | |
| 237 /** | |
| 238 * This routine writes an array of dimension descriptors to | |
| 239 * the specified text file. | |
| 240 * @param File open text file to write param descriptors to | |
| 241 * @param N number of param descriptors to write | |
| 242 * @param ParamDesc array of param descriptors to write | |
| 243 */ | |
| 244 void WriteParamDesc(FILE *File, uint16_t N, const PARAM_DESC ParamDesc[]) { | |
| 245 int i; | |
| 246 | |
| 247 for (i = 0; i < N; i++) { | |
| 248 if (ParamDesc[i].Circular) { | |
| 249 fprintf(File, "circular "); | |
| 250 } else { | |
| 251 fprintf(File, "linear "); | |
| 252 } | |
| 253 | |
| 254 if (ParamDesc[i].NonEssential) { | |
| 255 fprintf(File, "non-essential "); | |
| 256 } else { | |
| 257 fprintf(File, "essential "); | |
| 258 } | |
| 259 | |
| 260 fprintf(File, "%10.6f %10.6f\n", ParamDesc[i].Min, ParamDesc[i].Max); | |
| 261 } | |
| 262 } | |
| 263 | |
| 264 /** | |
| 265 * This routine writes a textual description of a prototype | |
| 266 * to the specified text file. | |
| 267 * @param File open text file to write prototype to | |
| 268 * @param N number of dimensions in feature space | |
| 269 * @param Proto prototype to write out | |
| 270 */ | |
| 271 void WritePrototype(FILE *File, uint16_t N, PROTOTYPE *Proto) { | |
| 272 int i; | |
| 273 | |
| 274 if (Proto->Significant) { | |
| 275 fprintf(File, "significant "); | |
| 276 } else { | |
| 277 fprintf(File, "insignificant "); | |
| 278 } | |
| 279 WriteProtoStyle(File, static_cast<PROTOSTYLE>(Proto->Style)); | |
| 280 fprintf(File, "%6u\n\t", Proto->NumSamples); | |
| 281 WriteNFloats(File, N, &Proto->Mean[0]); | |
| 282 fprintf(File, "\t"); | |
| 283 | |
| 284 switch (Proto->Style) { | |
| 285 case spherical: | |
| 286 WriteNFloats(File, 1, &(Proto->Variance.Spherical)); | |
| 287 break; | |
| 288 case elliptical: | |
| 289 WriteNFloats(File, N, Proto->Variance.Elliptical); | |
| 290 break; | |
| 291 case mixed: | |
| 292 for (i = 0; i < N; i++) { | |
| 293 switch (Proto->Distrib[i]) { | |
| 294 case normal: | |
| 295 fprintf(File, " %9s", "normal"); | |
| 296 break; | |
| 297 case uniform: | |
| 298 fprintf(File, " %9s", "uniform"); | |
| 299 break; | |
| 300 case D_random: | |
| 301 fprintf(File, " %9s", "random"); | |
| 302 break; | |
| 303 case DISTRIBUTION_COUNT: | |
| 304 ASSERT_HOST(!"Distribution count not allowed!"); | |
| 305 } | |
| 306 } | |
| 307 fprintf(File, "\n\t"); | |
| 308 WriteNFloats(File, N, Proto->Variance.Elliptical); | |
| 309 } | |
| 310 } | |
| 311 | |
| 312 } // namespace tesseract |
