Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/tesseract/src/classify/adaptive.cpp @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /****************************************************************************** | |
| 2 ** Filename: adaptive.c | |
| 3 ** Purpose: Adaptive matcher. | |
| 4 ** Author: Dan Johnson | |
| 5 ** | |
| 6 ** (c) Copyright Hewlett-Packard Company, 1988. | |
| 7 ** Licensed under the Apache License, Version 2.0 (the "License"); | |
| 8 ** you may not use this file except in compliance with the License. | |
| 9 ** You may obtain a copy of the License at | |
| 10 ** http://www.apache.org/licenses/LICENSE-2.0 | |
| 11 ** Unless required by applicable law or agreed to in writing, software | |
| 12 ** distributed under the License is distributed on an "AS IS" BASIS, | |
| 13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 14 ** See the License for the specific language governing permissions and | |
| 15 ** limitations under the License. | |
| 16 ******************************************************************************/ | |
| 17 | |
| 18 #include "adaptive.h" | |
| 19 | |
| 20 #include "classify.h" | |
| 21 | |
| 22 #include <cassert> | |
| 23 #include <cstdio> | |
| 24 | |
| 25 namespace tesseract { | |
| 26 | |
| 27 /*---------------------------------------------------------------------------- | |
| 28 Public Code | |
| 29 ----------------------------------------------------------------------------*/ | |
| 30 /*---------------------------------------------------------------------------*/ | |
| 31 /** | |
| 32 * This routine adds a new adapted class to an existing | |
| 33 * set of adapted templates. | |
| 34 * | |
| 35 * @param Templates set of templates to add new class to | |
| 36 * @param Class new class to add to templates | |
| 37 * @param ClassId class id to associate with new class | |
| 38 * | |
| 39 * @note Globals: none | |
| 40 */ | |
| 41 void AddAdaptedClass(ADAPT_TEMPLATES_STRUCT *Templates, ADAPT_CLASS_STRUCT *Class, CLASS_ID ClassId) { | |
| 42 assert(Templates != nullptr); | |
| 43 assert(Class != nullptr); | |
| 44 assert(LegalClassId(ClassId)); | |
| 45 assert(UnusedClassIdIn(Templates->Templates, ClassId)); | |
| 46 assert(Class->NumPermConfigs == 0); | |
| 47 | |
| 48 auto IntClass = new INT_CLASS_STRUCT(1, 1); | |
| 49 AddIntClass(Templates->Templates, ClassId, IntClass); | |
| 50 | |
| 51 assert(Templates->Class[ClassId] == nullptr); | |
| 52 Templates->Class[ClassId] = Class; | |
| 53 | |
| 54 } /* AddAdaptedClass */ | |
| 55 | |
| 56 /*---------------------------------------------------------------------------*/ | |
| 57 | |
| 58 PERM_CONFIG_STRUCT::~PERM_CONFIG_STRUCT() { | |
| 59 delete[] Ambigs; | |
| 60 } | |
| 61 | |
| 62 ADAPT_CLASS_STRUCT::ADAPT_CLASS_STRUCT() : | |
| 63 NumPermConfigs(0), | |
| 64 MaxNumTimesSeen(0), | |
| 65 PermProtos(NewBitVector(MAX_NUM_PROTOS)), | |
| 66 PermConfigs(NewBitVector(MAX_NUM_CONFIGS)), | |
| 67 TempProtos(NIL_LIST) { | |
| 68 zero_all_bits(PermProtos, WordsInVectorOfSize(MAX_NUM_PROTOS)); | |
| 69 zero_all_bits(PermConfigs, WordsInVectorOfSize(MAX_NUM_CONFIGS)); | |
| 70 | |
| 71 for (int i = 0; i < MAX_NUM_CONFIGS; i++) { | |
| 72 TempConfigFor(this, i) = nullptr; | |
| 73 } | |
| 74 } | |
| 75 | |
| 76 ADAPT_CLASS_STRUCT::~ADAPT_CLASS_STRUCT() { | |
| 77 for (int i = 0; i < MAX_NUM_CONFIGS; i++) { | |
| 78 if (ConfigIsPermanent(this, i) && PermConfigFor(this, i) != nullptr) { | |
| 79 delete PermConfigFor(this, i); | |
| 80 } else if (!ConfigIsPermanent(this, i) && TempConfigFor(this, i) != nullptr) { | |
| 81 delete TempConfigFor(this, i); | |
| 82 } | |
| 83 } | |
| 84 FreeBitVector(PermProtos); | |
| 85 FreeBitVector(PermConfigs); | |
| 86 auto list = TempProtos; | |
| 87 while (list != nullptr) { | |
| 88 delete reinterpret_cast<TEMP_PROTO_STRUCT *>(list->node); | |
| 89 list = pop(list); | |
| 90 } | |
| 91 } | |
| 92 | |
| 93 /// Constructor for adapted templates. | |
| 94 /// Add an empty class for each char in unicharset to the newly created templates. | |
| 95 ADAPT_TEMPLATES_STRUCT::ADAPT_TEMPLATES_STRUCT(UNICHARSET &unicharset) { | |
| 96 Templates = new INT_TEMPLATES_STRUCT; | |
| 97 NumPermClasses = 0; | |
| 98 NumNonEmptyClasses = 0; | |
| 99 | |
| 100 /* Insert an empty class for each unichar id in unicharset */ | |
| 101 for (unsigned i = 0; i < MAX_NUM_CLASSES; i++) { | |
| 102 Class[i] = nullptr; | |
| 103 if (i < unicharset.size()) { | |
| 104 AddAdaptedClass(this, new ADAPT_CLASS_STRUCT, i); | |
| 105 } | |
| 106 } | |
| 107 } | |
| 108 | |
| 109 ADAPT_TEMPLATES_STRUCT::~ADAPT_TEMPLATES_STRUCT() { | |
| 110 for (unsigned i = 0; i < (Templates)->NumClasses; i++) { | |
| 111 delete Class[i]; | |
| 112 } | |
| 113 delete Templates; | |
| 114 } | |
| 115 | |
| 116 // Returns FontinfoId of the given config of the given adapted class. | |
| 117 int Classify::GetFontinfoId(ADAPT_CLASS_STRUCT *Class, uint8_t ConfigId) { | |
| 118 return (ConfigIsPermanent(Class, ConfigId) ? PermConfigFor(Class, ConfigId)->FontinfoId | |
| 119 : TempConfigFor(Class, ConfigId)->FontinfoId); | |
| 120 } | |
| 121 | |
| 122 /// This constructor allocates and returns a new temporary config. | |
| 123 /// | |
| 124 /// @param MaxProtoId max id of any proto in new config | |
| 125 /// @param FontinfoId font information from pre-trained templates | |
| 126 TEMP_CONFIG_STRUCT::TEMP_CONFIG_STRUCT(int maxProtoId, int fontinfoId) : | |
| 127 NumTimesSeen(1), | |
| 128 ProtoVectorSize(WordsInVectorOfSize(maxProtoId + 1)), | |
| 129 MaxProtoId(maxProtoId), | |
| 130 Protos(NewBitVector(maxProtoId + 1)), | |
| 131 FontinfoId(fontinfoId) { | |
| 132 zero_all_bits(Protos, ProtoVectorSize); | |
| 133 } | |
| 134 | |
| 135 TEMP_CONFIG_STRUCT::~TEMP_CONFIG_STRUCT() { | |
| 136 FreeBitVector(Protos); | |
| 137 } | |
| 138 | |
| 139 /*---------------------------------------------------------------------------*/ | |
| 140 /** | |
| 141 * This routine prints a summary of the adapted templates | |
| 142 * in Templates to File. | |
| 143 * | |
| 144 * @param File open text file to print Templates to | |
| 145 * @param Templates adapted templates to print to File | |
| 146 * | |
| 147 * @note Globals: none | |
| 148 */ | |
| 149 void Classify::PrintAdaptedTemplates(FILE *File, ADAPT_TEMPLATES_STRUCT *Templates) { | |
| 150 INT_CLASS_STRUCT *IClass; | |
| 151 ADAPT_CLASS_STRUCT *AClass; | |
| 152 | |
| 153 fprintf(File, "\n\nSUMMARY OF ADAPTED TEMPLATES:\n\n"); | |
| 154 fprintf(File, "Num classes = %d; Num permanent classes = %d\n\n", Templates->NumNonEmptyClasses, | |
| 155 Templates->NumPermClasses); | |
| 156 fprintf(File, " Id NC NPC NP NPP\n"); | |
| 157 fprintf(File, "------------------------\n"); | |
| 158 | |
| 159 for (unsigned i = 0; i < (Templates->Templates)->NumClasses; i++) { | |
| 160 IClass = Templates->Templates->Class[i]; | |
| 161 AClass = Templates->Class[i]; | |
| 162 if (!IsEmptyAdaptedClass(AClass)) { | |
| 163 fprintf(File, "%5u %s %3d %3d %3d %3zd\n", i, unicharset.id_to_unichar(i), IClass->NumConfigs, | |
| 164 AClass->NumPermConfigs, IClass->NumProtos, | |
| 165 IClass->NumProtos - AClass->TempProtos->size()); | |
| 166 } | |
| 167 } | |
| 168 fprintf(File, "\n"); | |
| 169 | |
| 170 } /* PrintAdaptedTemplates */ | |
| 171 | |
| 172 /*---------------------------------------------------------------------------*/ | |
| 173 /** | |
| 174 * Read an adapted class description from file and return | |
| 175 * a ptr to the adapted class. | |
| 176 * | |
| 177 * @param fp open file to read adapted class from | |
| 178 * @return Ptr to new adapted class. | |
| 179 * | |
| 180 * @note Globals: none | |
| 181 */ | |
| 182 ADAPT_CLASS_STRUCT *ReadAdaptedClass(TFile *fp) { | |
| 183 int NumTempProtos; | |
| 184 int NumConfigs; | |
| 185 int i; | |
| 186 ADAPT_CLASS_STRUCT *Class; | |
| 187 | |
| 188 /* first read high level adapted class structure */ | |
| 189 Class = new ADAPT_CLASS_STRUCT; | |
| 190 fp->FRead(Class, sizeof(ADAPT_CLASS_STRUCT), 1); | |
| 191 | |
| 192 /* then read in the definitions of the permanent protos and configs */ | |
| 193 Class->PermProtos = NewBitVector(MAX_NUM_PROTOS); | |
| 194 Class->PermConfigs = NewBitVector(MAX_NUM_CONFIGS); | |
| 195 fp->FRead(Class->PermProtos, sizeof(uint32_t), WordsInVectorOfSize(MAX_NUM_PROTOS)); | |
| 196 fp->FRead(Class->PermConfigs, sizeof(uint32_t), WordsInVectorOfSize(MAX_NUM_CONFIGS)); | |
| 197 | |
| 198 /* then read in the list of temporary protos */ | |
| 199 fp->FRead(&NumTempProtos, sizeof(int), 1); | |
| 200 Class->TempProtos = NIL_LIST; | |
| 201 for (i = 0; i < NumTempProtos; i++) { | |
| 202 auto TempProto = new TEMP_PROTO_STRUCT; | |
| 203 fp->FRead(TempProto, sizeof(TEMP_PROTO_STRUCT), 1); | |
| 204 Class->TempProtos = push_last(Class->TempProtos, TempProto); | |
| 205 } | |
| 206 | |
| 207 /* then read in the adapted configs */ | |
| 208 fp->FRead(&NumConfigs, sizeof(int), 1); | |
| 209 for (i = 0; i < NumConfigs; i++) { | |
| 210 if (test_bit(Class->PermConfigs, i)) { | |
| 211 Class->Config[i].Perm = ReadPermConfig(fp); | |
| 212 } else { | |
| 213 Class->Config[i].Temp = ReadTempConfig(fp); | |
| 214 } | |
| 215 } | |
| 216 | |
| 217 return (Class); | |
| 218 | |
| 219 } /* ReadAdaptedClass */ | |
| 220 | |
| 221 /*---------------------------------------------------------------------------*/ | |
| 222 /** | |
| 223 * Read a set of adapted templates from file and return | |
| 224 * a ptr to the templates. | |
| 225 * | |
| 226 * @param fp open text file to read adapted templates from | |
| 227 * @return Ptr to adapted templates read from file. | |
| 228 * | |
| 229 * @note Globals: none | |
| 230 */ | |
| 231 ADAPT_TEMPLATES_STRUCT *Classify::ReadAdaptedTemplates(TFile *fp) { | |
| 232 auto Templates = new ADAPT_TEMPLATES_STRUCT; | |
| 233 | |
| 234 /* first read the high level adaptive template struct */ | |
| 235 fp->FRead(Templates, sizeof(ADAPT_TEMPLATES_STRUCT), 1); | |
| 236 | |
| 237 /* then read in the basic integer templates */ | |
| 238 Templates->Templates = ReadIntTemplates(fp); | |
| 239 | |
| 240 /* then read in the adaptive info for each class */ | |
| 241 for (unsigned i = 0; i < (Templates->Templates)->NumClasses; i++) { | |
| 242 Templates->Class[i] = ReadAdaptedClass(fp); | |
| 243 } | |
| 244 return (Templates); | |
| 245 | |
| 246 } /* ReadAdaptedTemplates */ | |
| 247 | |
| 248 /*---------------------------------------------------------------------------*/ | |
| 249 /** | |
| 250 * Read a permanent configuration description from file | |
| 251 * and return a ptr to it. | |
| 252 * | |
| 253 * @param fp open file to read permanent config from | |
| 254 * @return Ptr to new permanent configuration description. | |
| 255 * | |
| 256 * @note Globals: none | |
| 257 */ | |
| 258 PERM_CONFIG_STRUCT *ReadPermConfig(TFile *fp) { | |
| 259 auto Config = new PERM_CONFIG_STRUCT; | |
| 260 uint8_t NumAmbigs; | |
| 261 fp->FRead(&NumAmbigs, sizeof(NumAmbigs), 1); | |
| 262 Config->Ambigs = new UNICHAR_ID[NumAmbigs + 1]; | |
| 263 fp->FRead(Config->Ambigs, sizeof(UNICHAR_ID), NumAmbigs); | |
| 264 Config->Ambigs[NumAmbigs] = -1; | |
| 265 fp->FRead(&(Config->FontinfoId), sizeof(int), 1); | |
| 266 | |
| 267 return (Config); | |
| 268 | |
| 269 } /* ReadPermConfig */ | |
| 270 | |
| 271 /*---------------------------------------------------------------------------*/ | |
| 272 /** | |
| 273 * Read a temporary configuration description from file | |
| 274 * and return a ptr to it. | |
| 275 * | |
| 276 * @param fp open file to read temporary config from | |
| 277 * @return Ptr to new temporary configuration description. | |
| 278 * | |
| 279 * @note Globals: none | |
| 280 */ | |
| 281 TEMP_CONFIG_STRUCT *ReadTempConfig(TFile *fp) { | |
| 282 auto Config = new TEMP_CONFIG_STRUCT; | |
| 283 fp->FRead(Config, sizeof(TEMP_CONFIG_STRUCT), 1); | |
| 284 | |
| 285 Config->Protos = NewBitVector(Config->ProtoVectorSize * BITSINLONG); | |
| 286 fp->FRead(Config->Protos, sizeof(uint32_t), Config->ProtoVectorSize); | |
| 287 | |
| 288 return (Config); | |
| 289 | |
| 290 } /* ReadTempConfig */ | |
| 291 | |
| 292 /*---------------------------------------------------------------------------*/ | |
| 293 /** | |
| 294 * This routine writes a binary representation of Class | |
| 295 * to File. | |
| 296 * | |
| 297 * @param File open file to write Class to | |
| 298 * @param Class adapted class to write to File | |
| 299 * @param NumConfigs number of configs in Class | |
| 300 * | |
| 301 * @note Globals: none | |
| 302 */ | |
| 303 void WriteAdaptedClass(FILE *File, ADAPT_CLASS_STRUCT *Class, int NumConfigs) { | |
| 304 /* first write high level adapted class structure */ | |
| 305 fwrite(Class, sizeof(ADAPT_CLASS_STRUCT), 1, File); | |
| 306 | |
| 307 /* then write out the definitions of the permanent protos and configs */ | |
| 308 fwrite(Class->PermProtos, sizeof(uint32_t), WordsInVectorOfSize(MAX_NUM_PROTOS), File); | |
| 309 fwrite(Class->PermConfigs, sizeof(uint32_t), WordsInVectorOfSize(MAX_NUM_CONFIGS), File); | |
| 310 | |
| 311 /* then write out the list of temporary protos */ | |
| 312 uint32_t NumTempProtos = Class->TempProtos->size(); | |
| 313 fwrite(&NumTempProtos, sizeof(NumTempProtos), 1, File); | |
| 314 auto TempProtos = Class->TempProtos; | |
| 315 iterate(TempProtos) { | |
| 316 void *proto = TempProtos->node; | |
| 317 fwrite(proto, sizeof(TEMP_PROTO_STRUCT), 1, File); | |
| 318 } | |
| 319 | |
| 320 /* then write out the adapted configs */ | |
| 321 fwrite(&NumConfigs, sizeof(int), 1, File); | |
| 322 for (int i = 0; i < NumConfigs; i++) { | |
| 323 if (test_bit(Class->PermConfigs, i)) { | |
| 324 WritePermConfig(File, Class->Config[i].Perm); | |
| 325 } else { | |
| 326 WriteTempConfig(File, Class->Config[i].Temp); | |
| 327 } | |
| 328 } | |
| 329 | |
| 330 } /* WriteAdaptedClass */ | |
| 331 | |
| 332 /*---------------------------------------------------------------------------*/ | |
| 333 /** | |
| 334 * This routine saves Templates to File in a binary format. | |
| 335 * | |
| 336 * @param File open text file to write Templates to | |
| 337 * @param Templates set of adapted templates to write to File | |
| 338 * | |
| 339 * @note Globals: none | |
| 340 */ | |
| 341 void Classify::WriteAdaptedTemplates(FILE *File, ADAPT_TEMPLATES_STRUCT *Templates) { | |
| 342 /* first write the high level adaptive template struct */ | |
| 343 fwrite(Templates, sizeof(ADAPT_TEMPLATES_STRUCT), 1, File); | |
| 344 | |
| 345 /* then write out the basic integer templates */ | |
| 346 WriteIntTemplates(File, Templates->Templates, unicharset); | |
| 347 | |
| 348 /* then write out the adaptive info for each class */ | |
| 349 for (unsigned i = 0; i < (Templates->Templates)->NumClasses; i++) { | |
| 350 WriteAdaptedClass(File, Templates->Class[i], Templates->Templates->Class[i]->NumConfigs); | |
| 351 } | |
| 352 } /* WriteAdaptedTemplates */ | |
| 353 | |
| 354 /*---------------------------------------------------------------------------*/ | |
| 355 /** | |
| 356 * This routine writes a binary representation of a | |
| 357 * permanent configuration to File. | |
| 358 * | |
| 359 * @param File open file to write Config to | |
| 360 * @param Config permanent config to write to File | |
| 361 * | |
| 362 * @note Globals: none | |
| 363 */ | |
| 364 void WritePermConfig(FILE *File, PERM_CONFIG_STRUCT *Config) { | |
| 365 uint8_t NumAmbigs = 0; | |
| 366 | |
| 367 assert(Config != nullptr); | |
| 368 while (Config->Ambigs[NumAmbigs] > 0) { | |
| 369 ++NumAmbigs; | |
| 370 } | |
| 371 | |
| 372 fwrite(&NumAmbigs, sizeof(uint8_t), 1, File); | |
| 373 fwrite(Config->Ambigs, sizeof(UNICHAR_ID), NumAmbigs, File); | |
| 374 fwrite(&(Config->FontinfoId), sizeof(int), 1, File); | |
| 375 } /* WritePermConfig */ | |
| 376 | |
| 377 /*---------------------------------------------------------------------------*/ | |
| 378 /** | |
| 379 * This routine writes a binary representation of a | |
| 380 * temporary configuration to File. | |
| 381 * | |
| 382 * @param File open file to write Config to | |
| 383 * @param Config temporary config to write to File | |
| 384 * | |
| 385 * @note Globals: none | |
| 386 */ | |
| 387 void WriteTempConfig(FILE *File, TEMP_CONFIG_STRUCT *Config) { | |
| 388 assert(Config != nullptr); | |
| 389 | |
| 390 fwrite(Config, sizeof(TEMP_CONFIG_STRUCT), 1, File); | |
| 391 fwrite(Config->Protos, sizeof(uint32_t), Config->ProtoVectorSize, File); | |
| 392 | |
| 393 } /* WriteTempConfig */ | |
| 394 | |
| 395 } // namespace tesseract |
