diff mupdf-source/thirdparty/tesseract/src/classify/tessclassifier.cpp @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mupdf-source/thirdparty/tesseract/src/classify/tessclassifier.cpp	Mon Sep 15 11:43:07 2025 +0200
@@ -0,0 +1,84 @@
+///////////////////////////////////////////////////////////////////////
+// File:        tessclassifier.cpp
+// Description: Tesseract implementation of a ShapeClassifier.
+// Author:      Ray Smith
+//
+// (C) Copyright 2011, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#include "tessclassifier.h"
+
+#include "classify.h"
+#include "trainingsample.h"
+
+namespace tesseract {
+
+// Classifies the given [training] sample, writing to results.
+// See ShapeClassifier for a full description.
+int TessClassifier::UnicharClassifySample(const TrainingSample &sample, Image page_pix, int debug,
+                                          UNICHAR_ID keep_this,
+                                          std::vector<UnicharRating> *results) {
+  const int old_matcher_level = classify_->matcher_debug_level;
+  const int old_matcher_flags = classify_->matcher_debug_flags;
+  const int old_classify_level = classify_->classify_debug_level;
+  if (debug) {
+    // Explicitly set values of various control parameters to generate debug
+    // output if required, restoring the old values after classifying.
+    classify_->matcher_debug_level.set_value(2);
+    classify_->matcher_debug_flags.set_value(25);
+    classify_->classify_debug_level.set_value(3);
+  }
+  classify_->CharNormTrainingSample(pruner_only_, keep_this, sample, results);
+  if (debug) {
+    classify_->matcher_debug_level.set_value(old_matcher_level);
+    classify_->matcher_debug_flags.set_value(old_matcher_flags);
+    classify_->classify_debug_level.set_value(old_classify_level);
+  }
+  return results->size();
+}
+
+// Provides access to the ShapeTable that this classifier works with.
+const ShapeTable *TessClassifier::GetShapeTable() const {
+  return classify_->shape_table();
+}
+// Provides access to the UNICHARSET that this classifier works with.
+// Only needs to be overridden if GetShapeTable() can return nullptr.
+const UNICHARSET &TessClassifier::GetUnicharset() const {
+  return classify_->unicharset;
+}
+
+// Displays classification as the given shape_id. Creates as many windows
+// as it feels fit, using index as a guide for placement. Adds any created
+// windows to the windows output and returns a new index that may be used
+// by any subsequent classifiers. Caller waits for the user to view and
+// then destroys the windows by clearing the vector.
+int TessClassifier::DisplayClassifyAs(const TrainingSample &sample, Image page_pix, int unichar_id,
+                                      int index, std::vector<ScrollView *> &windows) {
+  int shape_id = unichar_id;
+  // TODO(rays) Fix this so it works with both flat and real shapetables.
+  //  if (GetShapeTable() != nullptr)
+  //  shape_id = BestShapeForUnichar(sample, page_pix, unichar_id, nullptr);
+  if (shape_id < 0) {
+    return index;
+  }
+  if (UnusedClassIdIn(classify_->PreTrainedTemplates, shape_id)) {
+    tprintf("No built-in templates for class/shape %d\n", shape_id);
+    return index;
+  }
+#ifndef GRAPHICS_DISABLED
+  classify_->ShowBestMatchFor(shape_id, sample.features(), sample.num_features());
+#endif
+  return index;
+}
+
+} // namespace tesseract