diff mupdf-source/thirdparty/tesseract/src/classify/tessclassifier.h @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mupdf-source/thirdparty/tesseract/src/classify/tessclassifier.h	Mon Sep 15 11:43:07 2025 +0200
@@ -0,0 +1,69 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+// Author: rays@google.com (Ray Smith)
+///////////////////////////////////////////////////////////////////////
+// File:        tessclassifier.h
+// Description: Tesseract implementation of a ShapeClassifier.
+// Author:      Ray Smith
+//
+// (C) Copyright 2011, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifndef THIRD_PARTY_TESSERACT_CLASSIFY_TESSCLASSIFIER_H_
+#define THIRD_PARTY_TESSERACT_CLASSIFY_TESSCLASSIFIER_H_
+
+#include "shapeclassifier.h"
+
+namespace tesseract {
+
+class Classify;
+class TrainingSample;
+
+// Tesseract implementation of a ShapeClassifier.
+// Due to limitations in the content of TrainingSample, this currently
+// only works for the static classifier and only works if the ShapeTable
+// in classify is not nullptr.
+class TESS_API TessClassifier : public ShapeClassifier {
+public:
+  TessClassifier(bool pruner_only, tesseract::Classify *classify)
+      : pruner_only_(pruner_only), classify_(classify) {}
+  ~TessClassifier() override = default;
+
+  // Classifies the given [training] sample, writing to results.
+  // See ShapeClassifier for a full description.
+  int UnicharClassifySample(const TrainingSample &sample, Image page_pix, int debug,
+                            UNICHAR_ID keep_this, std::vector<UnicharRating> *results) override;
+  // Provides access to the ShapeTable that this classifier works with.
+  const ShapeTable *GetShapeTable() const override;
+  // Provides access to the UNICHARSET that this classifier works with.
+  // Only needs to be overridden if GetShapeTable() can return nullptr.
+  const UNICHARSET &GetUnicharset() const override;
+
+  // Displays classification as the given shape_id. Creates as many windows
+  // as it feels fit, using index as a guide for placement. Adds any created
+  // windows to the windows output and returns a new index that may be used
+  // by any subsequent classifiers. Caller waits for the user to view and
+  // then destroys the windows by clearing the vector.
+  int DisplayClassifyAs(const TrainingSample &sample, Image page_pix, int unichar_id, int index,
+                        std::vector<ScrollView *> &windows) override;
+
+private:
+  // Indicates that this classifier is to use just the ClassPruner, or the
+  // full classifier if false.
+  bool pruner_only_;
+  // Borrowed pointer to the actual Tesseract classifier.
+  tesseract::Classify *classify_;
+};
+
+} // namespace tesseract
+
+#endif /* THIRD_PARTY_TESSERACT_CLASSIFY_TESSCLASSIFIER_H_ */