comparison mupdf-source/thirdparty/tesseract/src/wordrec/lm_state.cpp @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 ///////////////////////////////////////////////////////////////////////
2 // File: lm_state.cpp
3 // Description: Structures and functionality for capturing the state of
4 // segmentation search guided by the language model.
5 // Author: Rika Antonova
6 //
7 // (C) Copyright 2012, Google Inc.
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 // http://www.apache.org/licenses/LICENSE-2.0
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
18 ///////////////////////////////////////////////////////////////////////
19
20 #include "lm_state.h"
21
22 namespace tesseract {
23
24 void ViterbiStateEntry::Print(const char *msg) const {
25 tprintf("%s ViterbiStateEntry", msg);
26 if (updated) {
27 tprintf("(NEW)");
28 }
29 if (this->debug_str != nullptr) {
30 tprintf(" str=%s", this->debug_str->c_str());
31 }
32 tprintf(" with ratings_sum=%.4f length=%d cost=%.6f", this->ratings_sum, this->length,
33 this->cost);
34 if (this->top_choice_flags) {
35 tprintf(" top_choice_flags=0x%x", this->top_choice_flags);
36 }
37 if (!this->Consistent()) {
38 tprintf(" inconsistent=(punc %d case %d chartype %d script %d font %d)",
39 this->consistency_info.NumInconsistentPunc(),
40 this->consistency_info.NumInconsistentCase(),
41 this->consistency_info.NumInconsistentChartype(),
42 this->consistency_info.inconsistent_script, this->consistency_info.inconsistent_font);
43 }
44 if (this->dawg_info) {
45 tprintf(" permuter=%d", this->dawg_info->permuter);
46 }
47 if (this->ngram_info) {
48 tprintf(" ngram_cl_cost=%g context=%s ngram pruned=%d",
49 this->ngram_info->ngram_and_classifier_cost, this->ngram_info->context.c_str(),
50 this->ngram_info->pruned);
51 }
52 if (this->associate_stats.shape_cost > 0.0f) {
53 tprintf(" shape_cost=%g", this->associate_stats.shape_cost);
54 }
55 tprintf(" %s", XHeightConsistencyEnumName[this->consistency_info.xht_decision]);
56
57 tprintf("\n");
58 }
59
60 /// Clears the viterbi search state back to its initial conditions.
61 void LanguageModelState::Clear() {
62 viterbi_state_entries.clear();
63 viterbi_state_entries_prunable_length = 0;
64 viterbi_state_entries_prunable_max_cost = FLT_MAX;
65 viterbi_state_entries_length = 0;
66 }
67
68 void LanguageModelState::Print(const char *msg) {
69 tprintf("%s VSEs (max_cost=%g prn_len=%d tot_len=%d):\n", msg,
70 viterbi_state_entries_prunable_max_cost, viterbi_state_entries_prunable_length,
71 viterbi_state_entries_length);
72 ViterbiStateEntry_IT vit(&viterbi_state_entries);
73 for (vit.mark_cycle_pt(); !vit.cycled_list(); vit.forward()) {
74 vit.data()->Print("");
75 }
76 }
77
78 } // namespace tesseract