Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/tesseract/src/ccmain/adaptions.cpp @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /********************************************************************** | |
| 2 * File: adaptions.cpp (Formerly adaptions.c) | |
| 3 * Description: Functions used to adapt to blobs already confidently | |
| 4 * identified | |
| 5 * Author: Chris Newton | |
| 6 * | |
| 7 * (C) Copyright 1992, Hewlett-Packard Ltd. | |
| 8 ** Licensed under the Apache License, Version 2.0 (the "License"); | |
| 9 ** you may not use this file except in compliance with the License. | |
| 10 ** You may obtain a copy of the License at | |
| 11 ** http://www.apache.org/licenses/LICENSE-2.0 | |
| 12 ** Unless required by applicable law or agreed to in writing, software | |
| 13 ** distributed under the License is distributed on an "AS IS" BASIS, | |
| 14 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 15 ** See the License for the specific language governing permissions and | |
| 16 ** limitations under the License. | |
| 17 * | |
| 18 **********************************************************************/ | |
| 19 | |
| 20 #include <cctype> | |
| 21 #include <cstring> | |
| 22 #include "control.h" | |
| 23 #include "reject.h" | |
| 24 #include "stopper.h" | |
| 25 #include "tesseractclass.h" | |
| 26 #include "tessvars.h" | |
| 27 | |
| 28 // Include automatically generated configuration file if running autoconf. | |
| 29 #ifdef HAVE_CONFIG_H | |
| 30 # include "config_auto.h" | |
| 31 #endif | |
| 32 | |
| 33 namespace tesseract { | |
| 34 bool Tesseract::word_adaptable( // should we adapt? | |
| 35 WERD_RES *word, uint16_t mode) { | |
| 36 if (tessedit_adaption_debug) { | |
| 37 tprintf("Running word_adaptable() for %s rating %.4f certainty %.4f\n", | |
| 38 word->best_choice->unichar_string().c_str(), word->best_choice->rating(), | |
| 39 word->best_choice->certainty()); | |
| 40 } | |
| 41 | |
| 42 bool status = false; | |
| 43 std::bitset<16> flags(mode); | |
| 44 | |
| 45 enum MODES { | |
| 46 ADAPTABLE_WERD, | |
| 47 ACCEPTABLE_WERD, | |
| 48 CHECK_DAWGS, | |
| 49 CHECK_SPACES, | |
| 50 CHECK_ONE_ELL_CONFLICT, | |
| 51 CHECK_AMBIG_WERD | |
| 52 }; | |
| 53 | |
| 54 /* | |
| 55 0: NO adaption | |
| 56 */ | |
| 57 if (mode == 0) { | |
| 58 if (tessedit_adaption_debug) { | |
| 59 tprintf("adaption disabled\n"); | |
| 60 } | |
| 61 return false; | |
| 62 } | |
| 63 | |
| 64 if (flags[ADAPTABLE_WERD]) { | |
| 65 status |= word->tess_would_adapt; // result of Classify::AdaptableWord() | |
| 66 if (tessedit_adaption_debug && !status) { | |
| 67 tprintf("tess_would_adapt bit is false\n"); | |
| 68 } | |
| 69 } | |
| 70 | |
| 71 if (flags[ACCEPTABLE_WERD]) { | |
| 72 status |= word->tess_accepted; | |
| 73 if (tessedit_adaption_debug && !status) { | |
| 74 tprintf("tess_accepted bit is false\n"); | |
| 75 } | |
| 76 } | |
| 77 | |
| 78 if (!status) { // If not set then | |
| 79 return false; // ignore other checks | |
| 80 } | |
| 81 | |
| 82 if (flags[CHECK_DAWGS] && (word->best_choice->permuter() != SYSTEM_DAWG_PERM) && | |
| 83 (word->best_choice->permuter() != FREQ_DAWG_PERM) && | |
| 84 (word->best_choice->permuter() != USER_DAWG_PERM) && | |
| 85 (word->best_choice->permuter() != NUMBER_PERM)) { | |
| 86 if (tessedit_adaption_debug) { | |
| 87 tprintf("word not in dawgs\n"); | |
| 88 } | |
| 89 return false; | |
| 90 } | |
| 91 | |
| 92 if (flags[CHECK_ONE_ELL_CONFLICT] && one_ell_conflict(word, false)) { | |
| 93 if (tessedit_adaption_debug) { | |
| 94 tprintf("word has ell conflict\n"); | |
| 95 } | |
| 96 return false; | |
| 97 } | |
| 98 | |
| 99 if (flags[CHECK_SPACES] && | |
| 100 (strchr(word->best_choice->unichar_string().c_str(), ' ') != nullptr)) { | |
| 101 if (tessedit_adaption_debug) { | |
| 102 tprintf("word contains spaces\n"); | |
| 103 } | |
| 104 return false; | |
| 105 } | |
| 106 | |
| 107 if (flags[CHECK_AMBIG_WERD] && word->best_choice->dangerous_ambig_found()) { | |
| 108 if (tessedit_adaption_debug) { | |
| 109 tprintf("word is ambiguous\n"); | |
| 110 } | |
| 111 return false; | |
| 112 } | |
| 113 | |
| 114 if (tessedit_adaption_debug) { | |
| 115 tprintf("returning status %d\n", status); | |
| 116 } | |
| 117 return status; | |
| 118 } | |
| 119 | |
| 120 } // namespace tesseract |
