Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/tesseract/src/ccstruct/rejctmap.cpp @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /********************************************************************** | |
| 2 * File: rejctmap.cpp (Formerly rejmap.c) | |
| 3 * Description: REJ and REJMAP class functions. | |
| 4 * Author: Phil Cheatle | |
| 5 * | |
| 6 * (C) Copyright 1994, Hewlett-Packard Ltd. | |
| 7 ** Licensed under the Apache License, Version 2.0 (the "License"); | |
| 8 ** you may not use this file except in compliance with the License. | |
| 9 ** You may obtain a copy of the License at | |
| 10 ** http://www.apache.org/licenses/LICENSE-2.0 | |
| 11 ** Unless required by applicable law or agreed to in writing, software | |
| 12 ** distributed under the License is distributed on an "AS IS" BASIS, | |
| 13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 14 ** See the License for the specific language governing permissions and | |
| 15 ** limitations under the License. | |
| 16 * | |
| 17 **********************************************************************/ | |
| 18 | |
| 19 #include "rejctmap.h" | |
| 20 | |
| 21 #include <memory> | |
| 22 | |
| 23 #include "params.h" | |
| 24 | |
| 25 namespace tesseract { | |
| 26 | |
| 27 void REJ::full_print(FILE *fp) const { | |
| 28 fprintf(fp, "R_TESS_FAILURE: %s\n", flag(R_TESS_FAILURE) ? "T" : "F"); | |
| 29 fprintf(fp, "R_SMALL_XHT: %s\n", flag(R_SMALL_XHT) ? "T" : "F"); | |
| 30 fprintf(fp, "R_EDGE_CHAR: %s\n", flag(R_EDGE_CHAR) ? "T" : "F"); | |
| 31 fprintf(fp, "R_1IL_CONFLICT: %s\n", flag(R_1IL_CONFLICT) ? "T" : "F"); | |
| 32 fprintf(fp, "R_POSTNN_1IL: %s\n", flag(R_POSTNN_1IL) ? "T" : "F"); | |
| 33 fprintf(fp, "R_REJ_CBLOB: %s\n", flag(R_REJ_CBLOB) ? "T" : "F"); | |
| 34 fprintf(fp, "R_MM_REJECT: %s\n", flag(R_MM_REJECT) ? "T" : "F"); | |
| 35 fprintf(fp, "R_BAD_REPETITION: %s\n", flag(R_BAD_REPETITION) ? "T" : "F"); | |
| 36 fprintf(fp, "R_POOR_MATCH: %s\n", flag(R_POOR_MATCH) ? "T" : "F"); | |
| 37 fprintf(fp, "R_NOT_TESS_ACCEPTED: %s\n", | |
| 38 flag(R_NOT_TESS_ACCEPTED) ? "T" : "F"); | |
| 39 fprintf(fp, "R_CONTAINS_BLANKS: %s\n", flag(R_CONTAINS_BLANKS) ? "T" : "F"); | |
| 40 fprintf(fp, "R_BAD_PERMUTER: %s\n", flag(R_BAD_PERMUTER) ? "T" : "F"); | |
| 41 fprintf(fp, "R_HYPHEN: %s\n", flag(R_HYPHEN) ? "T" : "F"); | |
| 42 fprintf(fp, "R_DUBIOUS: %s\n", flag(R_DUBIOUS) ? "T" : "F"); | |
| 43 fprintf(fp, "R_NO_ALPHANUMS: %s\n", flag(R_NO_ALPHANUMS) ? "T" : "F"); | |
| 44 fprintf(fp, "R_MOSTLY_REJ: %s\n", flag(R_MOSTLY_REJ) ? "T" : "F"); | |
| 45 fprintf(fp, "R_XHT_FIXUP: %s\n", flag(R_XHT_FIXUP) ? "T" : "F"); | |
| 46 fprintf(fp, "R_BAD_QUALITY: %s\n", flag(R_BAD_QUALITY) ? "T" : "F"); | |
| 47 fprintf(fp, "R_DOC_REJ: %s\n", flag(R_DOC_REJ) ? "T" : "F"); | |
| 48 fprintf(fp, "R_BLOCK_REJ: %s\n", flag(R_BLOCK_REJ) ? "T" : "F"); | |
| 49 fprintf(fp, "R_ROW_REJ: %s\n", flag(R_ROW_REJ) ? "T" : "F"); | |
| 50 fprintf(fp, "R_UNLV_REJ: %s\n", flag(R_UNLV_REJ) ? "T" : "F"); | |
| 51 fprintf(fp, "R_HYPHEN_ACCEPT: %s\n", flag(R_HYPHEN_ACCEPT) ? "T" : "F"); | |
| 52 fprintf(fp, "R_NN_ACCEPT: %s\n", flag(R_NN_ACCEPT) ? "T" : "F"); | |
| 53 fprintf(fp, "R_MM_ACCEPT: %s\n", flag(R_MM_ACCEPT) ? "T" : "F"); | |
| 54 fprintf(fp, "R_QUALITY_ACCEPT: %s\n", flag(R_QUALITY_ACCEPT) ? "T" : "F"); | |
| 55 fprintf(fp, "R_MINIMAL_REJ_ACCEPT: %s\n", | |
| 56 flag(R_MINIMAL_REJ_ACCEPT) ? "T" : "F"); | |
| 57 } | |
| 58 | |
| 59 REJMAP &REJMAP::operator=(const REJMAP &source) { | |
| 60 initialise(source.len); | |
| 61 for (unsigned i = 0; i < len; i++) { | |
| 62 ptr[i] = source.ptr[i]; | |
| 63 } | |
| 64 return *this; | |
| 65 } | |
| 66 | |
| 67 void REJMAP::initialise(uint16_t length) { | |
| 68 ptr = std::make_unique<REJ[]>(length); | |
| 69 len = length; | |
| 70 } | |
| 71 | |
| 72 int16_t REJMAP::accept_count() const { // How many accepted? | |
| 73 int16_t count = 0; | |
| 74 for (unsigned i = 0; i < len; i++) { | |
| 75 if (ptr[i].accepted()) { | |
| 76 count++; | |
| 77 } | |
| 78 } | |
| 79 return count; | |
| 80 } | |
| 81 | |
| 82 bool REJMAP::recoverable_rejects() const { // Any non perm rejs? | |
| 83 for (unsigned i = 0; i < len; i++) { | |
| 84 if (ptr[i].recoverable()) { | |
| 85 return true; | |
| 86 } | |
| 87 } | |
| 88 return false; | |
| 89 } | |
| 90 | |
| 91 bool REJMAP::quality_recoverable_rejects() const { // Any potential rejs? | |
| 92 for (unsigned i = 0; i < len; i++) { | |
| 93 if (ptr[i].accept_if_good_quality()) { | |
| 94 return true; | |
| 95 } | |
| 96 } | |
| 97 return false; | |
| 98 } | |
| 99 | |
| 100 void REJMAP::remove_pos( // Cut out an element | |
| 101 uint16_t pos // element to remove | |
| 102 ) { | |
| 103 ASSERT_HOST(pos < len); | |
| 104 ASSERT_HOST(len > 0); | |
| 105 | |
| 106 len--; | |
| 107 for (; pos < len; pos++) { | |
| 108 ptr[pos] = ptr[pos + 1]; | |
| 109 } | |
| 110 } | |
| 111 | |
| 112 void REJMAP::print(FILE *fp) const { | |
| 113 fputc('"', fp); | |
| 114 for (unsigned i = 0; i < len; i++) { | |
| 115 fputc( ptr[i].display_char(), fp); | |
| 116 } | |
| 117 fputc('"', fp); | |
| 118 } | |
| 119 | |
| 120 void REJMAP::full_print(FILE *fp) const { | |
| 121 for (unsigned i = 0; i < len; i++) { | |
| 122 ptr[i].full_print(fp); | |
| 123 fprintf(fp, "\n"); | |
| 124 } | |
| 125 } | |
| 126 | |
| 127 void REJMAP::rej_word_small_xht() { // Reject whole word | |
| 128 for (unsigned i = 0; i < len; i++) { | |
| 129 ptr[i].setrej_small_xht(); | |
| 130 } | |
| 131 } | |
| 132 | |
| 133 void REJMAP::rej_word_tess_failure() { // Reject whole word | |
| 134 for (unsigned i = 0; i < len; i++) { | |
| 135 ptr[i].setrej_tess_failure(); | |
| 136 } | |
| 137 } | |
| 138 | |
| 139 void REJMAP::rej_word_not_tess_accepted() { // Reject whole word | |
| 140 for (unsigned i = 0; i < len; i++) { | |
| 141 if (ptr[i].accepted()) { | |
| 142 ptr[i].setrej_not_tess_accepted(); | |
| 143 } | |
| 144 } | |
| 145 } | |
| 146 | |
| 147 void REJMAP::rej_word_contains_blanks() { // Reject whole word | |
| 148 for (unsigned i = 0; i < len; i++) { | |
| 149 if (ptr[i].accepted()) { | |
| 150 ptr[i].setrej_contains_blanks(); | |
| 151 } | |
| 152 } | |
| 153 } | |
| 154 | |
| 155 void REJMAP::rej_word_bad_permuter() { // Reject whole word | |
| 156 for (unsigned i = 0; i < len; i++) { | |
| 157 if (ptr[i].accepted()) { | |
| 158 ptr[i].setrej_bad_permuter(); | |
| 159 } | |
| 160 } | |
| 161 } | |
| 162 | |
| 163 void REJMAP::rej_word_xht_fixup() { // Reject whole word | |
| 164 for (unsigned i = 0; i < len; i++) { | |
| 165 if (ptr[i].accepted()) { | |
| 166 ptr[i].setrej_xht_fixup(); | |
| 167 } | |
| 168 } | |
| 169 } | |
| 170 | |
| 171 void REJMAP::rej_word_no_alphanums() { // Reject whole word | |
| 172 for (unsigned i = 0; i < len; i++) { | |
| 173 if (ptr[i].accepted()) { | |
| 174 ptr[i].setrej_no_alphanums(); | |
| 175 } | |
| 176 } | |
| 177 } | |
| 178 | |
| 179 void REJMAP::rej_word_mostly_rej() { // Reject whole word | |
| 180 for (unsigned i = 0; i < len; i++) { | |
| 181 if (ptr[i].accepted()) { | |
| 182 ptr[i].setrej_mostly_rej(); | |
| 183 } | |
| 184 } | |
| 185 } | |
| 186 | |
| 187 void REJMAP::rej_word_bad_quality() { // Reject whole word | |
| 188 for (unsigned i = 0; i < len; i++) { | |
| 189 if (ptr[i].accepted()) { | |
| 190 ptr[i].setrej_bad_quality(); | |
| 191 } | |
| 192 } | |
| 193 } | |
| 194 | |
| 195 void REJMAP::rej_word_doc_rej() { // Reject whole word | |
| 196 for (unsigned i = 0; i < len; i++) { | |
| 197 if (ptr[i].accepted()) { | |
| 198 ptr[i].setrej_doc_rej(); | |
| 199 } | |
| 200 } | |
| 201 } | |
| 202 | |
| 203 void REJMAP::rej_word_block_rej() { // Reject whole word | |
| 204 for (unsigned i = 0; i < len; i++) { | |
| 205 if (ptr[i].accepted()) { | |
| 206 ptr[i].setrej_block_rej(); | |
| 207 } | |
| 208 } | |
| 209 } | |
| 210 | |
| 211 void REJMAP::rej_word_row_rej() { // Reject whole word | |
| 212 for (unsigned i = 0; i < len; i++) { | |
| 213 if (ptr[i].accepted()) { | |
| 214 ptr[i].setrej_row_rej(); | |
| 215 } | |
| 216 } | |
| 217 } | |
| 218 | |
| 219 } // namespace tesseract |
