comparison mupdf-source/thirdparty/tesseract/src/classify/adaptive.cpp @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 /******************************************************************************
2 ** Filename: adaptive.c
3 ** Purpose: Adaptive matcher.
4 ** Author: Dan Johnson
5 **
6 ** (c) Copyright Hewlett-Packard Company, 1988.
7 ** Licensed under the Apache License, Version 2.0 (the "License");
8 ** you may not use this file except in compliance with the License.
9 ** You may obtain a copy of the License at
10 ** http://www.apache.org/licenses/LICENSE-2.0
11 ** Unless required by applicable law or agreed to in writing, software
12 ** distributed under the License is distributed on an "AS IS" BASIS,
13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 ** See the License for the specific language governing permissions and
15 ** limitations under the License.
16 ******************************************************************************/
17
18 #include "adaptive.h"
19
20 #include "classify.h"
21
22 #include <cassert>
23 #include <cstdio>
24
25 namespace tesseract {
26
27 /*----------------------------------------------------------------------------
28 Public Code
29 ----------------------------------------------------------------------------*/
30 /*---------------------------------------------------------------------------*/
31 /**
32 * This routine adds a new adapted class to an existing
33 * set of adapted templates.
34 *
35 * @param Templates set of templates to add new class to
36 * @param Class new class to add to templates
37 * @param ClassId class id to associate with new class
38 *
39 * @note Globals: none
40 */
41 void AddAdaptedClass(ADAPT_TEMPLATES_STRUCT *Templates, ADAPT_CLASS_STRUCT *Class, CLASS_ID ClassId) {
42 assert(Templates != nullptr);
43 assert(Class != nullptr);
44 assert(LegalClassId(ClassId));
45 assert(UnusedClassIdIn(Templates->Templates, ClassId));
46 assert(Class->NumPermConfigs == 0);
47
48 auto IntClass = new INT_CLASS_STRUCT(1, 1);
49 AddIntClass(Templates->Templates, ClassId, IntClass);
50
51 assert(Templates->Class[ClassId] == nullptr);
52 Templates->Class[ClassId] = Class;
53
54 } /* AddAdaptedClass */
55
56 /*---------------------------------------------------------------------------*/
57
58 PERM_CONFIG_STRUCT::~PERM_CONFIG_STRUCT() {
59 delete[] Ambigs;
60 }
61
62 ADAPT_CLASS_STRUCT::ADAPT_CLASS_STRUCT() :
63 NumPermConfigs(0),
64 MaxNumTimesSeen(0),
65 PermProtos(NewBitVector(MAX_NUM_PROTOS)),
66 PermConfigs(NewBitVector(MAX_NUM_CONFIGS)),
67 TempProtos(NIL_LIST) {
68 zero_all_bits(PermProtos, WordsInVectorOfSize(MAX_NUM_PROTOS));
69 zero_all_bits(PermConfigs, WordsInVectorOfSize(MAX_NUM_CONFIGS));
70
71 for (int i = 0; i < MAX_NUM_CONFIGS; i++) {
72 TempConfigFor(this, i) = nullptr;
73 }
74 }
75
76 ADAPT_CLASS_STRUCT::~ADAPT_CLASS_STRUCT() {
77 for (int i = 0; i < MAX_NUM_CONFIGS; i++) {
78 if (ConfigIsPermanent(this, i) && PermConfigFor(this, i) != nullptr) {
79 delete PermConfigFor(this, i);
80 } else if (!ConfigIsPermanent(this, i) && TempConfigFor(this, i) != nullptr) {
81 delete TempConfigFor(this, i);
82 }
83 }
84 FreeBitVector(PermProtos);
85 FreeBitVector(PermConfigs);
86 auto list = TempProtos;
87 while (list != nullptr) {
88 delete reinterpret_cast<TEMP_PROTO_STRUCT *>(list->node);
89 list = pop(list);
90 }
91 }
92
93 /// Constructor for adapted templates.
94 /// Add an empty class for each char in unicharset to the newly created templates.
95 ADAPT_TEMPLATES_STRUCT::ADAPT_TEMPLATES_STRUCT(UNICHARSET &unicharset) {
96 Templates = new INT_TEMPLATES_STRUCT;
97 NumPermClasses = 0;
98 NumNonEmptyClasses = 0;
99
100 /* Insert an empty class for each unichar id in unicharset */
101 for (unsigned i = 0; i < MAX_NUM_CLASSES; i++) {
102 Class[i] = nullptr;
103 if (i < unicharset.size()) {
104 AddAdaptedClass(this, new ADAPT_CLASS_STRUCT, i);
105 }
106 }
107 }
108
109 ADAPT_TEMPLATES_STRUCT::~ADAPT_TEMPLATES_STRUCT() {
110 for (unsigned i = 0; i < (Templates)->NumClasses; i++) {
111 delete Class[i];
112 }
113 delete Templates;
114 }
115
116 // Returns FontinfoId of the given config of the given adapted class.
117 int Classify::GetFontinfoId(ADAPT_CLASS_STRUCT *Class, uint8_t ConfigId) {
118 return (ConfigIsPermanent(Class, ConfigId) ? PermConfigFor(Class, ConfigId)->FontinfoId
119 : TempConfigFor(Class, ConfigId)->FontinfoId);
120 }
121
122 /// This constructor allocates and returns a new temporary config.
123 ///
124 /// @param MaxProtoId max id of any proto in new config
125 /// @param FontinfoId font information from pre-trained templates
126 TEMP_CONFIG_STRUCT::TEMP_CONFIG_STRUCT(int maxProtoId, int fontinfoId) :
127 NumTimesSeen(1),
128 ProtoVectorSize(WordsInVectorOfSize(maxProtoId + 1)),
129 MaxProtoId(maxProtoId),
130 Protos(NewBitVector(maxProtoId + 1)),
131 FontinfoId(fontinfoId) {
132 zero_all_bits(Protos, ProtoVectorSize);
133 }
134
135 TEMP_CONFIG_STRUCT::~TEMP_CONFIG_STRUCT() {
136 FreeBitVector(Protos);
137 }
138
139 /*---------------------------------------------------------------------------*/
140 /**
141 * This routine prints a summary of the adapted templates
142 * in Templates to File.
143 *
144 * @param File open text file to print Templates to
145 * @param Templates adapted templates to print to File
146 *
147 * @note Globals: none
148 */
149 void Classify::PrintAdaptedTemplates(FILE *File, ADAPT_TEMPLATES_STRUCT *Templates) {
150 INT_CLASS_STRUCT *IClass;
151 ADAPT_CLASS_STRUCT *AClass;
152
153 fprintf(File, "\n\nSUMMARY OF ADAPTED TEMPLATES:\n\n");
154 fprintf(File, "Num classes = %d; Num permanent classes = %d\n\n", Templates->NumNonEmptyClasses,
155 Templates->NumPermClasses);
156 fprintf(File, " Id NC NPC NP NPP\n");
157 fprintf(File, "------------------------\n");
158
159 for (unsigned i = 0; i < (Templates->Templates)->NumClasses; i++) {
160 IClass = Templates->Templates->Class[i];
161 AClass = Templates->Class[i];
162 if (!IsEmptyAdaptedClass(AClass)) {
163 fprintf(File, "%5u %s %3d %3d %3d %3zd\n", i, unicharset.id_to_unichar(i), IClass->NumConfigs,
164 AClass->NumPermConfigs, IClass->NumProtos,
165 IClass->NumProtos - AClass->TempProtos->size());
166 }
167 }
168 fprintf(File, "\n");
169
170 } /* PrintAdaptedTemplates */
171
172 /*---------------------------------------------------------------------------*/
173 /**
174 * Read an adapted class description from file and return
175 * a ptr to the adapted class.
176 *
177 * @param fp open file to read adapted class from
178 * @return Ptr to new adapted class.
179 *
180 * @note Globals: none
181 */
182 ADAPT_CLASS_STRUCT *ReadAdaptedClass(TFile *fp) {
183 int NumTempProtos;
184 int NumConfigs;
185 int i;
186 ADAPT_CLASS_STRUCT *Class;
187
188 /* first read high level adapted class structure */
189 Class = new ADAPT_CLASS_STRUCT;
190 fp->FRead(Class, sizeof(ADAPT_CLASS_STRUCT), 1);
191
192 /* then read in the definitions of the permanent protos and configs */
193 Class->PermProtos = NewBitVector(MAX_NUM_PROTOS);
194 Class->PermConfigs = NewBitVector(MAX_NUM_CONFIGS);
195 fp->FRead(Class->PermProtos, sizeof(uint32_t), WordsInVectorOfSize(MAX_NUM_PROTOS));
196 fp->FRead(Class->PermConfigs, sizeof(uint32_t), WordsInVectorOfSize(MAX_NUM_CONFIGS));
197
198 /* then read in the list of temporary protos */
199 fp->FRead(&NumTempProtos, sizeof(int), 1);
200 Class->TempProtos = NIL_LIST;
201 for (i = 0; i < NumTempProtos; i++) {
202 auto TempProto = new TEMP_PROTO_STRUCT;
203 fp->FRead(TempProto, sizeof(TEMP_PROTO_STRUCT), 1);
204 Class->TempProtos = push_last(Class->TempProtos, TempProto);
205 }
206
207 /* then read in the adapted configs */
208 fp->FRead(&NumConfigs, sizeof(int), 1);
209 for (i = 0; i < NumConfigs; i++) {
210 if (test_bit(Class->PermConfigs, i)) {
211 Class->Config[i].Perm = ReadPermConfig(fp);
212 } else {
213 Class->Config[i].Temp = ReadTempConfig(fp);
214 }
215 }
216
217 return (Class);
218
219 } /* ReadAdaptedClass */
220
221 /*---------------------------------------------------------------------------*/
222 /**
223 * Read a set of adapted templates from file and return
224 * a ptr to the templates.
225 *
226 * @param fp open text file to read adapted templates from
227 * @return Ptr to adapted templates read from file.
228 *
229 * @note Globals: none
230 */
231 ADAPT_TEMPLATES_STRUCT *Classify::ReadAdaptedTemplates(TFile *fp) {
232 auto Templates = new ADAPT_TEMPLATES_STRUCT;
233
234 /* first read the high level adaptive template struct */
235 fp->FRead(Templates, sizeof(ADAPT_TEMPLATES_STRUCT), 1);
236
237 /* then read in the basic integer templates */
238 Templates->Templates = ReadIntTemplates(fp);
239
240 /* then read in the adaptive info for each class */
241 for (unsigned i = 0; i < (Templates->Templates)->NumClasses; i++) {
242 Templates->Class[i] = ReadAdaptedClass(fp);
243 }
244 return (Templates);
245
246 } /* ReadAdaptedTemplates */
247
248 /*---------------------------------------------------------------------------*/
249 /**
250 * Read a permanent configuration description from file
251 * and return a ptr to it.
252 *
253 * @param fp open file to read permanent config from
254 * @return Ptr to new permanent configuration description.
255 *
256 * @note Globals: none
257 */
258 PERM_CONFIG_STRUCT *ReadPermConfig(TFile *fp) {
259 auto Config = new PERM_CONFIG_STRUCT;
260 uint8_t NumAmbigs;
261 fp->FRead(&NumAmbigs, sizeof(NumAmbigs), 1);
262 Config->Ambigs = new UNICHAR_ID[NumAmbigs + 1];
263 fp->FRead(Config->Ambigs, sizeof(UNICHAR_ID), NumAmbigs);
264 Config->Ambigs[NumAmbigs] = -1;
265 fp->FRead(&(Config->FontinfoId), sizeof(int), 1);
266
267 return (Config);
268
269 } /* ReadPermConfig */
270
271 /*---------------------------------------------------------------------------*/
272 /**
273 * Read a temporary configuration description from file
274 * and return a ptr to it.
275 *
276 * @param fp open file to read temporary config from
277 * @return Ptr to new temporary configuration description.
278 *
279 * @note Globals: none
280 */
281 TEMP_CONFIG_STRUCT *ReadTempConfig(TFile *fp) {
282 auto Config = new TEMP_CONFIG_STRUCT;
283 fp->FRead(Config, sizeof(TEMP_CONFIG_STRUCT), 1);
284
285 Config->Protos = NewBitVector(Config->ProtoVectorSize * BITSINLONG);
286 fp->FRead(Config->Protos, sizeof(uint32_t), Config->ProtoVectorSize);
287
288 return (Config);
289
290 } /* ReadTempConfig */
291
292 /*---------------------------------------------------------------------------*/
293 /**
294 * This routine writes a binary representation of Class
295 * to File.
296 *
297 * @param File open file to write Class to
298 * @param Class adapted class to write to File
299 * @param NumConfigs number of configs in Class
300 *
301 * @note Globals: none
302 */
303 void WriteAdaptedClass(FILE *File, ADAPT_CLASS_STRUCT *Class, int NumConfigs) {
304 /* first write high level adapted class structure */
305 fwrite(Class, sizeof(ADAPT_CLASS_STRUCT), 1, File);
306
307 /* then write out the definitions of the permanent protos and configs */
308 fwrite(Class->PermProtos, sizeof(uint32_t), WordsInVectorOfSize(MAX_NUM_PROTOS), File);
309 fwrite(Class->PermConfigs, sizeof(uint32_t), WordsInVectorOfSize(MAX_NUM_CONFIGS), File);
310
311 /* then write out the list of temporary protos */
312 uint32_t NumTempProtos = Class->TempProtos->size();
313 fwrite(&NumTempProtos, sizeof(NumTempProtos), 1, File);
314 auto TempProtos = Class->TempProtos;
315 iterate(TempProtos) {
316 void *proto = TempProtos->node;
317 fwrite(proto, sizeof(TEMP_PROTO_STRUCT), 1, File);
318 }
319
320 /* then write out the adapted configs */
321 fwrite(&NumConfigs, sizeof(int), 1, File);
322 for (int i = 0; i < NumConfigs; i++) {
323 if (test_bit(Class->PermConfigs, i)) {
324 WritePermConfig(File, Class->Config[i].Perm);
325 } else {
326 WriteTempConfig(File, Class->Config[i].Temp);
327 }
328 }
329
330 } /* WriteAdaptedClass */
331
332 /*---------------------------------------------------------------------------*/
333 /**
334 * This routine saves Templates to File in a binary format.
335 *
336 * @param File open text file to write Templates to
337 * @param Templates set of adapted templates to write to File
338 *
339 * @note Globals: none
340 */
341 void Classify::WriteAdaptedTemplates(FILE *File, ADAPT_TEMPLATES_STRUCT *Templates) {
342 /* first write the high level adaptive template struct */
343 fwrite(Templates, sizeof(ADAPT_TEMPLATES_STRUCT), 1, File);
344
345 /* then write out the basic integer templates */
346 WriteIntTemplates(File, Templates->Templates, unicharset);
347
348 /* then write out the adaptive info for each class */
349 for (unsigned i = 0; i < (Templates->Templates)->NumClasses; i++) {
350 WriteAdaptedClass(File, Templates->Class[i], Templates->Templates->Class[i]->NumConfigs);
351 }
352 } /* WriteAdaptedTemplates */
353
354 /*---------------------------------------------------------------------------*/
355 /**
356 * This routine writes a binary representation of a
357 * permanent configuration to File.
358 *
359 * @param File open file to write Config to
360 * @param Config permanent config to write to File
361 *
362 * @note Globals: none
363 */
364 void WritePermConfig(FILE *File, PERM_CONFIG_STRUCT *Config) {
365 uint8_t NumAmbigs = 0;
366
367 assert(Config != nullptr);
368 while (Config->Ambigs[NumAmbigs] > 0) {
369 ++NumAmbigs;
370 }
371
372 fwrite(&NumAmbigs, sizeof(uint8_t), 1, File);
373 fwrite(Config->Ambigs, sizeof(UNICHAR_ID), NumAmbigs, File);
374 fwrite(&(Config->FontinfoId), sizeof(int), 1, File);
375 } /* WritePermConfig */
376
377 /*---------------------------------------------------------------------------*/
378 /**
379 * This routine writes a binary representation of a
380 * temporary configuration to File.
381 *
382 * @param File open file to write Config to
383 * @param Config temporary config to write to File
384 *
385 * @note Globals: none
386 */
387 void WriteTempConfig(FILE *File, TEMP_CONFIG_STRUCT *Config) {
388 assert(Config != nullptr);
389
390 fwrite(Config, sizeof(TEMP_CONFIG_STRUCT), 1, File);
391 fwrite(Config->Protos, sizeof(uint32_t), Config->ProtoVectorSize, File);
392
393 } /* WriteTempConfig */
394
395 } // namespace tesseract