Mercurial > hgrepos > Python2 > PyMuPDF
diff mupdf-source/thirdparty/tesseract/src/ccstruct/rejctmap.cpp @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mupdf-source/thirdparty/tesseract/src/ccstruct/rejctmap.cpp Mon Sep 15 11:43:07 2025 +0200 @@ -0,0 +1,219 @@ +/********************************************************************** + * File: rejctmap.cpp (Formerly rejmap.c) + * Description: REJ and REJMAP class functions. + * Author: Phil Cheatle + * + * (C) Copyright 1994, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#include "rejctmap.h" + +#include <memory> + +#include "params.h" + +namespace tesseract { + +void REJ::full_print(FILE *fp) const { + fprintf(fp, "R_TESS_FAILURE: %s\n", flag(R_TESS_FAILURE) ? "T" : "F"); + fprintf(fp, "R_SMALL_XHT: %s\n", flag(R_SMALL_XHT) ? "T" : "F"); + fprintf(fp, "R_EDGE_CHAR: %s\n", flag(R_EDGE_CHAR) ? "T" : "F"); + fprintf(fp, "R_1IL_CONFLICT: %s\n", flag(R_1IL_CONFLICT) ? "T" : "F"); + fprintf(fp, "R_POSTNN_1IL: %s\n", flag(R_POSTNN_1IL) ? "T" : "F"); + fprintf(fp, "R_REJ_CBLOB: %s\n", flag(R_REJ_CBLOB) ? "T" : "F"); + fprintf(fp, "R_MM_REJECT: %s\n", flag(R_MM_REJECT) ? "T" : "F"); + fprintf(fp, "R_BAD_REPETITION: %s\n", flag(R_BAD_REPETITION) ? "T" : "F"); + fprintf(fp, "R_POOR_MATCH: %s\n", flag(R_POOR_MATCH) ? "T" : "F"); + fprintf(fp, "R_NOT_TESS_ACCEPTED: %s\n", + flag(R_NOT_TESS_ACCEPTED) ? "T" : "F"); + fprintf(fp, "R_CONTAINS_BLANKS: %s\n", flag(R_CONTAINS_BLANKS) ? "T" : "F"); + fprintf(fp, "R_BAD_PERMUTER: %s\n", flag(R_BAD_PERMUTER) ? "T" : "F"); + fprintf(fp, "R_HYPHEN: %s\n", flag(R_HYPHEN) ? "T" : "F"); + fprintf(fp, "R_DUBIOUS: %s\n", flag(R_DUBIOUS) ? "T" : "F"); + fprintf(fp, "R_NO_ALPHANUMS: %s\n", flag(R_NO_ALPHANUMS) ? "T" : "F"); + fprintf(fp, "R_MOSTLY_REJ: %s\n", flag(R_MOSTLY_REJ) ? "T" : "F"); + fprintf(fp, "R_XHT_FIXUP: %s\n", flag(R_XHT_FIXUP) ? "T" : "F"); + fprintf(fp, "R_BAD_QUALITY: %s\n", flag(R_BAD_QUALITY) ? "T" : "F"); + fprintf(fp, "R_DOC_REJ: %s\n", flag(R_DOC_REJ) ? "T" : "F"); + fprintf(fp, "R_BLOCK_REJ: %s\n", flag(R_BLOCK_REJ) ? "T" : "F"); + fprintf(fp, "R_ROW_REJ: %s\n", flag(R_ROW_REJ) ? "T" : "F"); + fprintf(fp, "R_UNLV_REJ: %s\n", flag(R_UNLV_REJ) ? "T" : "F"); + fprintf(fp, "R_HYPHEN_ACCEPT: %s\n", flag(R_HYPHEN_ACCEPT) ? "T" : "F"); + fprintf(fp, "R_NN_ACCEPT: %s\n", flag(R_NN_ACCEPT) ? "T" : "F"); + fprintf(fp, "R_MM_ACCEPT: %s\n", flag(R_MM_ACCEPT) ? "T" : "F"); + fprintf(fp, "R_QUALITY_ACCEPT: %s\n", flag(R_QUALITY_ACCEPT) ? "T" : "F"); + fprintf(fp, "R_MINIMAL_REJ_ACCEPT: %s\n", + flag(R_MINIMAL_REJ_ACCEPT) ? "T" : "F"); +} + +REJMAP &REJMAP::operator=(const REJMAP &source) { + initialise(source.len); + for (unsigned i = 0; i < len; i++) { + ptr[i] = source.ptr[i]; + } + return *this; +} + +void REJMAP::initialise(uint16_t length) { + ptr = std::make_unique<REJ[]>(length); + len = length; +} + +int16_t REJMAP::accept_count() const { // How many accepted? + int16_t count = 0; + for (unsigned i = 0; i < len; i++) { + if (ptr[i].accepted()) { + count++; + } + } + return count; +} + +bool REJMAP::recoverable_rejects() const { // Any non perm rejs? + for (unsigned i = 0; i < len; i++) { + if (ptr[i].recoverable()) { + return true; + } + } + return false; +} + +bool REJMAP::quality_recoverable_rejects() const { // Any potential rejs? + for (unsigned i = 0; i < len; i++) { + if (ptr[i].accept_if_good_quality()) { + return true; + } + } + return false; +} + +void REJMAP::remove_pos( // Cut out an element + uint16_t pos // element to remove +) { + ASSERT_HOST(pos < len); + ASSERT_HOST(len > 0); + + len--; + for (; pos < len; pos++) { + ptr[pos] = ptr[pos + 1]; + } +} + +void REJMAP::print(FILE *fp) const { + fputc('"', fp); + for (unsigned i = 0; i < len; i++) { + fputc( ptr[i].display_char(), fp); + } + fputc('"', fp); +} + +void REJMAP::full_print(FILE *fp) const { + for (unsigned i = 0; i < len; i++) { + ptr[i].full_print(fp); + fprintf(fp, "\n"); + } +} + +void REJMAP::rej_word_small_xht() { // Reject whole word + for (unsigned i = 0; i < len; i++) { + ptr[i].setrej_small_xht(); + } +} + +void REJMAP::rej_word_tess_failure() { // Reject whole word + for (unsigned i = 0; i < len; i++) { + ptr[i].setrej_tess_failure(); + } +} + +void REJMAP::rej_word_not_tess_accepted() { // Reject whole word + for (unsigned i = 0; i < len; i++) { + if (ptr[i].accepted()) { + ptr[i].setrej_not_tess_accepted(); + } + } +} + +void REJMAP::rej_word_contains_blanks() { // Reject whole word + for (unsigned i = 0; i < len; i++) { + if (ptr[i].accepted()) { + ptr[i].setrej_contains_blanks(); + } + } +} + +void REJMAP::rej_word_bad_permuter() { // Reject whole word + for (unsigned i = 0; i < len; i++) { + if (ptr[i].accepted()) { + ptr[i].setrej_bad_permuter(); + } + } +} + +void REJMAP::rej_word_xht_fixup() { // Reject whole word + for (unsigned i = 0; i < len; i++) { + if (ptr[i].accepted()) { + ptr[i].setrej_xht_fixup(); + } + } +} + +void REJMAP::rej_word_no_alphanums() { // Reject whole word + for (unsigned i = 0; i < len; i++) { + if (ptr[i].accepted()) { + ptr[i].setrej_no_alphanums(); + } + } +} + +void REJMAP::rej_word_mostly_rej() { // Reject whole word + for (unsigned i = 0; i < len; i++) { + if (ptr[i].accepted()) { + ptr[i].setrej_mostly_rej(); + } + } +} + +void REJMAP::rej_word_bad_quality() { // Reject whole word + for (unsigned i = 0; i < len; i++) { + if (ptr[i].accepted()) { + ptr[i].setrej_bad_quality(); + } + } +} + +void REJMAP::rej_word_doc_rej() { // Reject whole word + for (unsigned i = 0; i < len; i++) { + if (ptr[i].accepted()) { + ptr[i].setrej_doc_rej(); + } + } +} + +void REJMAP::rej_word_block_rej() { // Reject whole word + for (unsigned i = 0; i < len; i++) { + if (ptr[i].accepted()) { + ptr[i].setrej_block_rej(); + } + } +} + +void REJMAP::rej_word_row_rej() { // Reject whole word + for (unsigned i = 0; i < len; i++) { + if (ptr[i].accepted()) { + ptr[i].setrej_row_rej(); + } + } +} + +} // namespace tesseract
