Mercurial > hgrepos > Python2 > PyMuPDF
diff mupdf-source/thirdparty/tesseract/src/ccstruct/blamer.cpp @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mupdf-source/thirdparty/tesseract/src/ccstruct/blamer.cpp Mon Sep 15 11:43:07 2025 +0200 @@ -0,0 +1,576 @@ +/////////////////////////////////////////////////////////////////////// +// File: blamer.cpp +// Description: Module allowing precise error causes to be allocated. +// Author: Rike Antonova +// Refactored: Ray Smith +// +// (C) Copyright 2013, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#include "blamer.h" + +#include "blobs.h" // for TPOINT, TWERD, TBLOB +#include "errcode.h" // for ASSERT_HOST +#if !defined(DISABLED_LEGACY_ENGINE) +# include "lm_pain_points.h" // for LMPainPoints +#endif +#include "matrix.h" // for MATRIX +#include "normalis.h" // for DENORM +#include "pageres.h" // for WERD_RES +#include "unicharset.h" // for UNICHARSET + +#include <cmath> // for abs +#include <cstdlib> // for abs + +namespace tesseract { + +// Names for each value of IncorrectResultReason enum. Keep in sync. +const char kBlameCorrect[] = "corr"; +const char kBlameClassifier[] = "cl"; +const char kBlameChopper[] = "chop"; +const char kBlameClassLMTradeoff[] = "cl/LM"; +const char kBlamePageLayout[] = "pglt"; +const char kBlameSegsearchHeur[] = "ss_heur"; +const char kBlameSegsearchPP[] = "ss_pp"; +const char kBlameClassOldLMTradeoff[] = "cl/old_LM"; +const char kBlameAdaption[] = "adapt"; +const char kBlameNoTruthSplit[] = "no_tr_spl"; +const char kBlameNoTruth[] = "no_tr"; +const char kBlameUnknown[] = "unkn"; + +const char *const kIncorrectResultReasonNames[] = { + kBlameCorrect, kBlameClassifier, kBlameChopper, kBlameClassLMTradeoff, + kBlamePageLayout, kBlameSegsearchHeur, kBlameSegsearchPP, kBlameClassOldLMTradeoff, + kBlameAdaption, kBlameNoTruthSplit, kBlameNoTruth, kBlameUnknown}; + +const char *BlamerBundle::IncorrectReasonName(IncorrectResultReason irr) { + return kIncorrectResultReasonNames[irr]; +} + +const char *BlamerBundle::IncorrectReason() const { + return kIncorrectResultReasonNames[incorrect_result_reason_]; +} + +// Functions to setup the blamer. +// Whole word string, whole word bounding box. +void BlamerBundle::SetWordTruth(const UNICHARSET &unicharset, const char *truth_str, + const TBOX &word_box) { + truth_word_.InsertBox(0, word_box); + truth_has_char_boxes_ = false; + // Encode the string as UNICHAR_IDs. + std::vector<UNICHAR_ID> encoding; + std::vector<char> lengths; + unicharset.encode_string(truth_str, false, &encoding, &lengths, nullptr); + int total_length = 0; + for (size_t i = 0; i < encoding.size(); total_length += lengths[i++]) { + std::string uch(truth_str + total_length); + uch.resize(lengths[i] - total_length); + UNICHAR_ID id = encoding[i]; + if (id != INVALID_UNICHAR_ID) { + uch = unicharset.get_normed_unichar(id); + } + truth_text_.push_back(uch); + } +} + +// Single "character" string, "character" bounding box. +// May be called multiple times to indicate the characters in a word. +void BlamerBundle::SetSymbolTruth(const UNICHARSET &unicharset, const char *char_str, + const TBOX &char_box) { + std::string symbol_str(char_str); + UNICHAR_ID id = unicharset.unichar_to_id(char_str); + if (id != INVALID_UNICHAR_ID) { + std::string normed_uch(unicharset.get_normed_unichar(id)); + if (normed_uch.length() > 0) { + symbol_str = std::move(normed_uch); + } + } + int length = truth_word_.length(); + truth_text_.push_back(symbol_str); + truth_word_.InsertBox(length, char_box); + if (length == 0) { + truth_has_char_boxes_ = true; + } else if (truth_word_.BlobBox(length - 1) == char_box) { + truth_has_char_boxes_ = false; + } +} + +// Marks that there is something wrong with the truth text, like it contains +// reject characters. +void BlamerBundle::SetRejectedTruth() { + incorrect_result_reason_ = IRR_NO_TRUTH; + truth_has_char_boxes_ = false; +} + +// Returns true if the provided word_choice is correct. +bool BlamerBundle::ChoiceIsCorrect(const WERD_CHOICE *word_choice) const { + if (word_choice == nullptr) { + return false; + } + const UNICHARSET *uni_set = word_choice->unicharset(); + std::string normed_choice_str; + for (unsigned i = 0; i < word_choice->length(); ++i) { + normed_choice_str += uni_set->get_normed_unichar(word_choice->unichar_id(i)); + } + std::string truth_str = TruthString(); + return truth_str == normed_choice_str; +} + +void BlamerBundle::FillDebugString(const std::string &msg, const WERD_CHOICE *choice, std::string &debug) { + debug += "Truth "; + for (auto &text : this->truth_text_) { + debug += text; + } + if (!this->truth_has_char_boxes_) { + debug += " (no char boxes)"; + } + if (choice != nullptr) { + debug += " Choice "; + std::string choice_str; + choice->string_and_lengths(&choice_str, nullptr); + debug += choice_str; + } + if (msg.length() > 0) { + debug += "\n"; + debug += msg; + } + debug += "\n"; +} + +// Sets up the norm_truth_word from truth_word using the given DENORM. +void BlamerBundle::SetupNormTruthWord(const DENORM &denorm) { + // TODO(rays) Is this the last use of denorm in WERD_RES and can it go? + norm_box_tolerance_ = kBlamerBoxTolerance * denorm.x_scale(); + TPOINT topleft; + TPOINT botright; + TPOINT norm_topleft; + TPOINT norm_botright; + for (unsigned b = 0; b < truth_word_.length(); ++b) { + const TBOX &box = truth_word_.BlobBox(b); + topleft.x = box.left(); + topleft.y = box.top(); + botright.x = box.right(); + botright.y = box.bottom(); + denorm.NormTransform(nullptr, topleft, &norm_topleft); + denorm.NormTransform(nullptr, botright, &norm_botright); + TBOX norm_box(norm_topleft.x, norm_botright.y, norm_botright.x, norm_topleft.y); + norm_truth_word_.InsertBox(b, norm_box); + } +} + +// Splits *this into two pieces in bundle1 and bundle2 (preallocated, empty +// bundles) where the right edge/ of the left-hand word is word1_right, +// and the left edge of the right-hand word is word2_left. +void BlamerBundle::SplitBundle(int word1_right, int word2_left, bool debug, BlamerBundle *bundle1, + BlamerBundle *bundle2) const { + std::string debug_str; + // Find truth boxes that correspond to the split in the blobs. + unsigned begin2_truth_index = 0; + if (incorrect_result_reason_ != IRR_NO_TRUTH && truth_has_char_boxes_) { + debug_str = "Looking for truth split at"; + debug_str += " end1_x " + std::to_string(word1_right); + debug_str += " begin2_x " + std::to_string(word2_left); + debug_str += "\nnorm_truth_word boxes:\n"; + if (norm_truth_word_.length() > 1) { + norm_truth_word_.BlobBox(0).print_to_str(debug_str); + for (unsigned b = 1; b < norm_truth_word_.length(); ++b) { + norm_truth_word_.BlobBox(b).print_to_str(debug_str); + if ((abs(word1_right - norm_truth_word_.BlobBox(b - 1).right()) < norm_box_tolerance_) && + (abs(word2_left - norm_truth_word_.BlobBox(b).left()) < norm_box_tolerance_)) { + begin2_truth_index = b; + debug_str += "Split found"; + break; + } + } + debug_str += '\n'; + } + } + // Populate truth information in word and word2 with the first and second + // part of the original truth. + if (begin2_truth_index > 0) { + bundle1->truth_has_char_boxes_ = true; + bundle1->norm_box_tolerance_ = norm_box_tolerance_; + bundle2->truth_has_char_boxes_ = true; + bundle2->norm_box_tolerance_ = norm_box_tolerance_; + BlamerBundle *curr_bb = bundle1; + for (unsigned b = 0; b < norm_truth_word_.length(); ++b) { + if (b == begin2_truth_index) { + curr_bb = bundle2; + } + curr_bb->norm_truth_word_.InsertBox(b, norm_truth_word_.BlobBox(b)); + curr_bb->truth_word_.InsertBox(b, truth_word_.BlobBox(b)); + curr_bb->truth_text_.push_back(truth_text_[b]); + } + } else if (incorrect_result_reason_ == IRR_NO_TRUTH) { + bundle1->incorrect_result_reason_ = IRR_NO_TRUTH; + bundle2->incorrect_result_reason_ = IRR_NO_TRUTH; + } else { + debug_str += "Truth split not found"; + debug_str += truth_has_char_boxes_ ? "\n" : " (no truth char boxes)\n"; + bundle1->SetBlame(IRR_NO_TRUTH_SPLIT, debug_str, nullptr, debug); + bundle2->SetBlame(IRR_NO_TRUTH_SPLIT, debug_str, nullptr, debug); + } +} + +// "Joins" the blames from bundle1 and bundle2 into *this. +void BlamerBundle::JoinBlames(const BlamerBundle &bundle1, const BlamerBundle &bundle2, + bool debug) { + std::string debug_str; + IncorrectResultReason irr = incorrect_result_reason_; + if (irr != IRR_NO_TRUTH_SPLIT) { + debug_str = ""; + } + if (bundle1.incorrect_result_reason_ != IRR_CORRECT && + bundle1.incorrect_result_reason_ != IRR_NO_TRUTH && + bundle1.incorrect_result_reason_ != IRR_NO_TRUTH_SPLIT) { + debug_str += "Blame from part 1: "; + debug_str += bundle1.debug_; + irr = bundle1.incorrect_result_reason_; + } + if (bundle2.incorrect_result_reason_ != IRR_CORRECT && + bundle2.incorrect_result_reason_ != IRR_NO_TRUTH && + bundle2.incorrect_result_reason_ != IRR_NO_TRUTH_SPLIT) { + debug_str += "Blame from part 2: "; + debug_str += bundle2.debug_; + if (irr == IRR_CORRECT) { + irr = bundle2.incorrect_result_reason_; + } else if (irr != bundle2.incorrect_result_reason_) { + irr = IRR_UNKNOWN; + } + } + incorrect_result_reason_ = irr; + if (irr != IRR_CORRECT && irr != IRR_NO_TRUTH) { + SetBlame(irr, debug_str, nullptr, debug); + } +} + +// If a blob with the same bounding box as one of the truth character +// bounding boxes is not classified as the corresponding truth character +// blames character classifier for incorrect answer. +void BlamerBundle::BlameClassifier(const UNICHARSET &unicharset, const TBOX &blob_box, + const BLOB_CHOICE_LIST &choices, bool debug) { + if (!truth_has_char_boxes_ || incorrect_result_reason_ != IRR_CORRECT) { + return; // Nothing to do here. + } + + for (unsigned b = 0; b < norm_truth_word_.length(); ++b) { + const TBOX &truth_box = norm_truth_word_.BlobBox(b); + // Note that we are more strict on the bounding box boundaries here + // than in other places (chopper, segmentation search), since we do + // not have the ability to check the previous and next bounding box. + if (blob_box.x_almost_equal(truth_box, norm_box_tolerance_ / 2)) { + bool found = false; + bool incorrect_adapted = false; + UNICHAR_ID incorrect_adapted_id = INVALID_UNICHAR_ID; + const char *truth_str = truth_text_[b].c_str(); + // We promise not to modify the list or its contents, using a + // const BLOB_CHOICE* below. + BLOB_CHOICE_IT choices_it(const_cast<BLOB_CHOICE_LIST *>(&choices)); + for (choices_it.mark_cycle_pt(); !choices_it.cycled_list(); choices_it.forward()) { + const BLOB_CHOICE *choice = choices_it.data(); + if (strcmp(truth_str, unicharset.get_normed_unichar(choice->unichar_id())) == 0) { + found = true; + break; + } else if (choice->IsAdapted()) { + incorrect_adapted = true; + incorrect_adapted_id = choice->unichar_id(); + } + } // end choices_it for loop + if (!found) { + std::string debug_str = "unichar "; + debug_str += truth_str; + debug_str += " not found in classification list"; + SetBlame(IRR_CLASSIFIER, debug_str, nullptr, debug); + } else if (incorrect_adapted) { + std::string debug_str = "better rating for adapted "; + debug_str += unicharset.id_to_unichar(incorrect_adapted_id); + debug_str += " than for correct "; + debug_str += truth_str; + SetBlame(IRR_ADAPTION, debug_str, nullptr, debug); + } + break; + } + } // end iterating over blamer_bundle->norm_truth_word +} + +// Checks whether chops were made at all the character bounding box +// boundaries in word->truth_word. If not - blames the chopper for an +// incorrect answer. +void BlamerBundle::SetChopperBlame(const WERD_RES *word, bool debug) { + if (NoTruth() || !truth_has_char_boxes_ || word->chopped_word->blobs.empty()) { + return; + } + bool missing_chop = false; + int num_blobs = word->chopped_word->blobs.size(); + unsigned box_index = 0; + int blob_index = 0; + int16_t truth_x = -1; + while (box_index < truth_word_.length() && blob_index < num_blobs) { + truth_x = norm_truth_word_.BlobBox(box_index).right(); + TBLOB *curr_blob = word->chopped_word->blobs[blob_index]; + if (curr_blob->bounding_box().right() < truth_x - norm_box_tolerance_) { + ++blob_index; + continue; // encountered an extra chop, keep looking + } else if (curr_blob->bounding_box().right() > truth_x + norm_box_tolerance_) { + missing_chop = true; + break; + } else { + ++blob_index; + } + } + if (missing_chop || box_index < norm_truth_word_.length()) { + std::string debug_str; + if (missing_chop) { + debug_str += "Detected missing chop (tolerance=" + std::to_string(norm_box_tolerance_); + debug_str += ") at Bounding Box="; + TBLOB *curr_blob = word->chopped_word->blobs[blob_index]; + curr_blob->bounding_box().print_to_str(debug_str); + debug_str += "\nNo chop for truth at x=" + std::to_string(truth_x); + } else { + debug_str += "Missing chops for last " + std::to_string(norm_truth_word_.length() - box_index); + debug_str += " truth box(es)"; + } + debug_str += "\nMaximally chopped word boxes:\n"; + for (blob_index = 0; blob_index < num_blobs; ++blob_index) { + TBLOB *curr_blob = word->chopped_word->blobs[blob_index]; + curr_blob->bounding_box().print_to_str(debug_str); + debug_str += '\n'; + } + debug_str += "Truth bounding boxes:\n"; + for (box_index = 0; box_index < norm_truth_word_.length(); ++box_index) { + norm_truth_word_.BlobBox(box_index).print_to_str(debug_str); + debug_str += '\n'; + } + SetBlame(IRR_CHOPPER, debug_str, word->best_choice, debug); + } +} + +// Blames the classifier or the language model if, after running only the +// chopper, best_choice is incorrect and no blame has been yet set. +// Blames the classifier if best_choice is classifier's top choice and is a +// dictionary word (i.e. language model could not have helped). +// Otherwise, blames the language model (formerly permuter word adjustment). +void BlamerBundle::BlameClassifierOrLangModel(const WERD_RES *word, const UNICHARSET &unicharset, + bool valid_permuter, bool debug) { + if (valid_permuter) { + // Find out whether best choice is a top choice. + best_choice_is_dict_and_top_choice_ = true; + for (unsigned i = 0; i < word->best_choice->length(); ++i) { + BLOB_CHOICE_IT blob_choice_it(word->GetBlobChoices(i)); + ASSERT_HOST(!blob_choice_it.empty()); + BLOB_CHOICE *first_choice = nullptr; + for (blob_choice_it.mark_cycle_pt(); !blob_choice_it.cycled_list(); + blob_choice_it.forward()) { // find first non-fragment choice + if (!(unicharset.get_fragment(blob_choice_it.data()->unichar_id()))) { + first_choice = blob_choice_it.data(); + break; + } + } + ASSERT_HOST(first_choice != nullptr); + if (first_choice->unichar_id() != word->best_choice->unichar_id(i)) { + best_choice_is_dict_and_top_choice_ = false; + break; + } + } + } + std::string debug_str; + if (best_choice_is_dict_and_top_choice_) { + debug_str = "Best choice is: incorrect, top choice, dictionary word"; + debug_str += " with permuter "; + debug_str += word->best_choice->permuter_name(); + } else { + debug_str = "Classifier/Old LM tradeoff is to blame"; + } + SetBlame(best_choice_is_dict_and_top_choice_ ? IRR_CLASSIFIER : IRR_CLASS_OLD_LM_TRADEOFF, + debug_str, word->best_choice, debug); +} + +// Sets up the correct_segmentation_* to mark the correct bounding boxes. +void BlamerBundle::SetupCorrectSegmentation(const TWERD *word, bool debug) { +#ifndef DISABLED_LEGACY_ENGINE + params_training_bundle_.StartHypothesisList(); +#endif // ndef DISABLED_LEGACY_ENGINE + if (incorrect_result_reason_ != IRR_CORRECT || !truth_has_char_boxes_) { + return; // Nothing to do here. + } + + std::string debug_str = "Blamer computing correct_segmentation_cols\n"; + int curr_box_col = 0; + int next_box_col = 0; + int num_blobs = word->NumBlobs(); + if (num_blobs == 0) { + return; // No blobs to play with. + } + int blob_index = 0; + int16_t next_box_x = word->blobs[blob_index]->bounding_box().right(); + for (unsigned truth_idx = 0; blob_index < num_blobs && truth_idx < norm_truth_word_.length(); + ++blob_index) { + ++next_box_col; + int16_t curr_box_x = next_box_x; + if (blob_index + 1 < num_blobs) { + next_box_x = word->blobs[blob_index + 1]->bounding_box().right(); + } + int16_t truth_x = norm_truth_word_.BlobBox(truth_idx).right(); + debug_str += "Box x coord vs. truth: " + std::to_string(curr_box_x); + debug_str += " " + std::to_string(truth_x); + debug_str += "\n"; + if (curr_box_x > (truth_x + norm_box_tolerance_)) { + break; // failed to find a matching box + } else if (curr_box_x >= truth_x - norm_box_tolerance_ && // matched + (blob_index + 1 >= num_blobs || // next box can't be included + next_box_x > truth_x + norm_box_tolerance_)) { + correct_segmentation_cols_.push_back(curr_box_col); + correct_segmentation_rows_.push_back(next_box_col - 1); + ++truth_idx; + debug_str += "col=" + std::to_string(curr_box_col); + debug_str += " row=" + std::to_string(next_box_col - 1); + debug_str += "\n"; + curr_box_col = next_box_col; + } + } + if (blob_index < num_blobs || // trailing blobs + correct_segmentation_cols_.size() != norm_truth_word_.length()) { + debug_str += + "Blamer failed to find correct segmentation" + " (tolerance=" + + std::to_string(norm_box_tolerance_); + if (blob_index >= num_blobs) { + debug_str += " blob == nullptr"; + } + debug_str += ")\n"; + debug_str += " path length " + std::to_string(correct_segmentation_cols_.size()); + debug_str += " vs. truth " + std::to_string(norm_truth_word_.length()); + debug_str += "\n"; + SetBlame(IRR_UNKNOWN, debug_str, nullptr, debug); + correct_segmentation_cols_.clear(); + correct_segmentation_rows_.clear(); + } +} + +// Returns true if a guided segmentation search is needed. +bool BlamerBundle::GuidedSegsearchNeeded(const WERD_CHOICE *best_choice) const { + return incorrect_result_reason_ == IRR_CORRECT && !segsearch_is_looking_for_blame_ && + truth_has_char_boxes_ && !ChoiceIsCorrect(best_choice); +} + +#if !defined(DISABLED_LEGACY_ENGINE) +// Setup ready to guide the segmentation search to the correct segmentation. +void BlamerBundle::InitForSegSearch(const WERD_CHOICE *best_choice, MATRIX *ratings, + UNICHAR_ID wildcard_id, bool debug, std::string &debug_str, + tesseract::LMPainPoints *pain_points, double max_char_wh_ratio, + WERD_RES *word_res) { + segsearch_is_looking_for_blame_ = true; + if (debug) { + tprintf("segsearch starting to look for blame\n"); + } + // Fill pain points for any unclassifed blob corresponding to the + // correct segmentation state. + debug_str += "Correct segmentation:\n"; + for (unsigned idx = 0; idx < correct_segmentation_cols_.size(); ++idx) { + debug_str += "col=" + std::to_string(correct_segmentation_cols_[idx]); + debug_str += " row=" + std::to_string(correct_segmentation_rows_[idx]); + debug_str += "\n"; + if (!ratings->Classified(correct_segmentation_cols_[idx], correct_segmentation_rows_[idx], + wildcard_id) && + !pain_points->GeneratePainPoint( + correct_segmentation_cols_[idx], correct_segmentation_rows_[idx], + tesseract::LM_PPTYPE_BLAMER, 0.0, false, max_char_wh_ratio, word_res)) { + segsearch_is_looking_for_blame_ = false; + debug_str += "\nFailed to insert pain point\n"; + SetBlame(IRR_SEGSEARCH_HEUR, debug_str, best_choice, debug); + break; + } + } // end for blamer_bundle->correct_segmentation_cols/rows +} +#endif // !defined(DISABLED_LEGACY_ENGINE) + +// Returns true if the guided segsearch is in progress. +bool BlamerBundle::GuidedSegsearchStillGoing() const { + return segsearch_is_looking_for_blame_; +} + +// The segmentation search has ended. Sets the blame appropriately. +void BlamerBundle::FinishSegSearch(const WERD_CHOICE *best_choice, bool debug, std::string &debug_str) { + // If we are still looking for blame (i.e. best_choice is incorrect, but a + // path representing the correct segmentation could be constructed), we can + // blame segmentation search pain point prioritization if the rating of the + // path corresponding to the correct segmentation is better than that of + // best_choice (i.e. language model would have done the correct thing, but + // because of poor pain point prioritization the correct segmentation was + // never explored). Otherwise we blame the tradeoff between the language model + // and the classifier, since even after exploring the path corresponding to + // the correct segmentation incorrect best_choice would have been chosen. + // One special case when we blame the classifier instead is when best choice + // is incorrect, but it is a dictionary word and it classifier's top choice. + if (segsearch_is_looking_for_blame_) { + segsearch_is_looking_for_blame_ = false; + if (best_choice_is_dict_and_top_choice_) { + debug_str = "Best choice is: incorrect, top choice, dictionary word"; + debug_str += " with permuter "; + debug_str += best_choice->permuter_name(); + SetBlame(IRR_CLASSIFIER, debug_str, best_choice, debug); + } else if (best_correctly_segmented_rating_ < best_choice->rating()) { + debug_str += "Correct segmentation state was not explored"; + SetBlame(IRR_SEGSEARCH_PP, debug_str, best_choice, debug); + } else { + if (best_correctly_segmented_rating_ >= WERD_CHOICE::kBadRating) { + debug_str += "Correct segmentation paths were pruned by LM\n"; + } else { + debug_str += "Best correct segmentation rating " + + std::to_string(best_correctly_segmented_rating_); + debug_str += " vs. best choice rating " + std::to_string(best_choice->rating()); + } + SetBlame(IRR_CLASS_LM_TRADEOFF, debug_str, best_choice, debug); + } + } +} + +// If the bundle is null or still does not indicate the correct result, +// fix it and use some backup reason for the blame. +void BlamerBundle::LastChanceBlame(bool debug, WERD_RES *word) { + if (word->blamer_bundle == nullptr) { + word->blamer_bundle = new BlamerBundle(); + word->blamer_bundle->SetBlame(IRR_PAGE_LAYOUT, "LastChanceBlame", word->best_choice, debug); + } else if (word->blamer_bundle->incorrect_result_reason_ == IRR_NO_TRUTH) { + word->blamer_bundle->SetBlame(IRR_NO_TRUTH, "Rejected truth", word->best_choice, debug); + } else { + bool correct = word->blamer_bundle->ChoiceIsCorrect(word->best_choice); + IncorrectResultReason irr = word->blamer_bundle->incorrect_result_reason_; + if (irr == IRR_CORRECT && !correct) { + std::string debug_str = "Choice is incorrect after recognition"; + word->blamer_bundle->SetBlame(IRR_UNKNOWN, debug_str, word->best_choice, debug); + } else if (irr != IRR_CORRECT && correct) { + if (debug) { + tprintf("Corrected %s\n", word->blamer_bundle->debug_.c_str()); + } + word->blamer_bundle->incorrect_result_reason_ = IRR_CORRECT; + word->blamer_bundle->debug_ = ""; + } + } +} + +// Sets the misadaption debug if this word is incorrect, as this word is +// being adapted to. +void BlamerBundle::SetMisAdaptionDebug(const WERD_CHOICE *best_choice, bool debug) { + if (incorrect_result_reason_ != IRR_NO_TRUTH && !ChoiceIsCorrect(best_choice)) { + misadaption_debug_ = "misadapt to word ("; + misadaption_debug_ += best_choice->permuter_name(); + misadaption_debug_ += "): "; + FillDebugString("", best_choice, misadaption_debug_); + if (debug) { + tprintf("%s\n", misadaption_debug_.c_str()); + } + } +} + +} // namespace tesseract
