comparison mupdf-source/thirdparty/tesseract/src/ccmain/adaptions.cpp @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 /**********************************************************************
2 * File: adaptions.cpp (Formerly adaptions.c)
3 * Description: Functions used to adapt to blobs already confidently
4 * identified
5 * Author: Chris Newton
6 *
7 * (C) Copyright 1992, Hewlett-Packard Ltd.
8 ** Licensed under the Apache License, Version 2.0 (the "License");
9 ** you may not use this file except in compliance with the License.
10 ** You may obtain a copy of the License at
11 ** http://www.apache.org/licenses/LICENSE-2.0
12 ** Unless required by applicable law or agreed to in writing, software
13 ** distributed under the License is distributed on an "AS IS" BASIS,
14 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 ** See the License for the specific language governing permissions and
16 ** limitations under the License.
17 *
18 **********************************************************************/
19
20 #include <cctype>
21 #include <cstring>
22 #include "control.h"
23 #include "reject.h"
24 #include "stopper.h"
25 #include "tesseractclass.h"
26 #include "tessvars.h"
27
28 // Include automatically generated configuration file if running autoconf.
29 #ifdef HAVE_CONFIG_H
30 # include "config_auto.h"
31 #endif
32
33 namespace tesseract {
34 bool Tesseract::word_adaptable( // should we adapt?
35 WERD_RES *word, uint16_t mode) {
36 if (tessedit_adaption_debug) {
37 tprintf("Running word_adaptable() for %s rating %.4f certainty %.4f\n",
38 word->best_choice->unichar_string().c_str(), word->best_choice->rating(),
39 word->best_choice->certainty());
40 }
41
42 bool status = false;
43 std::bitset<16> flags(mode);
44
45 enum MODES {
46 ADAPTABLE_WERD,
47 ACCEPTABLE_WERD,
48 CHECK_DAWGS,
49 CHECK_SPACES,
50 CHECK_ONE_ELL_CONFLICT,
51 CHECK_AMBIG_WERD
52 };
53
54 /*
55 0: NO adaption
56 */
57 if (mode == 0) {
58 if (tessedit_adaption_debug) {
59 tprintf("adaption disabled\n");
60 }
61 return false;
62 }
63
64 if (flags[ADAPTABLE_WERD]) {
65 status |= word->tess_would_adapt; // result of Classify::AdaptableWord()
66 if (tessedit_adaption_debug && !status) {
67 tprintf("tess_would_adapt bit is false\n");
68 }
69 }
70
71 if (flags[ACCEPTABLE_WERD]) {
72 status |= word->tess_accepted;
73 if (tessedit_adaption_debug && !status) {
74 tprintf("tess_accepted bit is false\n");
75 }
76 }
77
78 if (!status) { // If not set then
79 return false; // ignore other checks
80 }
81
82 if (flags[CHECK_DAWGS] && (word->best_choice->permuter() != SYSTEM_DAWG_PERM) &&
83 (word->best_choice->permuter() != FREQ_DAWG_PERM) &&
84 (word->best_choice->permuter() != USER_DAWG_PERM) &&
85 (word->best_choice->permuter() != NUMBER_PERM)) {
86 if (tessedit_adaption_debug) {
87 tprintf("word not in dawgs\n");
88 }
89 return false;
90 }
91
92 if (flags[CHECK_ONE_ELL_CONFLICT] && one_ell_conflict(word, false)) {
93 if (tessedit_adaption_debug) {
94 tprintf("word has ell conflict\n");
95 }
96 return false;
97 }
98
99 if (flags[CHECK_SPACES] &&
100 (strchr(word->best_choice->unichar_string().c_str(), ' ') != nullptr)) {
101 if (tessedit_adaption_debug) {
102 tprintf("word contains spaces\n");
103 }
104 return false;
105 }
106
107 if (flags[CHECK_AMBIG_WERD] && word->best_choice->dangerous_ambig_found()) {
108 if (tessedit_adaption_debug) {
109 tprintf("word is ambiguous\n");
110 }
111 return false;
112 }
113
114 if (tessedit_adaption_debug) {
115 tprintf("returning status %d\n", status);
116 }
117 return status;
118 }
119
120 } // namespace tesseract