diff mupdf-source/thirdparty/tesseract/src/wordrec/lm_state.cpp @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mupdf-source/thirdparty/tesseract/src/wordrec/lm_state.cpp	Mon Sep 15 11:43:07 2025 +0200
@@ -0,0 +1,78 @@
+///////////////////////////////////////////////////////////////////////
+// File:        lm_state.cpp
+// Description: Structures and functionality for capturing the state of
+//              segmentation search guided by the language model.
+// Author:      Rika Antonova
+//
+// (C) Copyright 2012, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#include "lm_state.h"
+
+namespace tesseract {
+
+void ViterbiStateEntry::Print(const char *msg) const {
+  tprintf("%s ViterbiStateEntry", msg);
+  if (updated) {
+    tprintf("(NEW)");
+  }
+  if (this->debug_str != nullptr) {
+    tprintf(" str=%s", this->debug_str->c_str());
+  }
+  tprintf(" with ratings_sum=%.4f length=%d cost=%.6f", this->ratings_sum, this->length,
+          this->cost);
+  if (this->top_choice_flags) {
+    tprintf(" top_choice_flags=0x%x", this->top_choice_flags);
+  }
+  if (!this->Consistent()) {
+    tprintf(" inconsistent=(punc %d case %d chartype %d script %d font %d)",
+            this->consistency_info.NumInconsistentPunc(),
+            this->consistency_info.NumInconsistentCase(),
+            this->consistency_info.NumInconsistentChartype(),
+            this->consistency_info.inconsistent_script, this->consistency_info.inconsistent_font);
+  }
+  if (this->dawg_info) {
+    tprintf(" permuter=%d", this->dawg_info->permuter);
+  }
+  if (this->ngram_info) {
+    tprintf(" ngram_cl_cost=%g context=%s ngram pruned=%d",
+            this->ngram_info->ngram_and_classifier_cost, this->ngram_info->context.c_str(),
+            this->ngram_info->pruned);
+  }
+  if (this->associate_stats.shape_cost > 0.0f) {
+    tprintf(" shape_cost=%g", this->associate_stats.shape_cost);
+  }
+  tprintf(" %s", XHeightConsistencyEnumName[this->consistency_info.xht_decision]);
+
+  tprintf("\n");
+}
+
+/// Clears the viterbi search state back to its initial conditions.
+void LanguageModelState::Clear() {
+  viterbi_state_entries.clear();
+  viterbi_state_entries_prunable_length = 0;
+  viterbi_state_entries_prunable_max_cost = FLT_MAX;
+  viterbi_state_entries_length = 0;
+}
+
+void LanguageModelState::Print(const char *msg) {
+  tprintf("%s VSEs (max_cost=%g prn_len=%d tot_len=%d):\n", msg,
+          viterbi_state_entries_prunable_max_cost, viterbi_state_entries_prunable_length,
+          viterbi_state_entries_length);
+  ViterbiStateEntry_IT vit(&viterbi_state_entries);
+  for (vit.mark_cycle_pt(); !vit.cycled_list(); vit.forward()) {
+    vit.data()->Print("");
+  }
+}
+
+} // namespace tesseract