diff mupdf-source/thirdparty/tesseract/src/training/unicharset/unicharset_training_utils.h @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mupdf-source/thirdparty/tesseract/src/training/unicharset/unicharset_training_utils.h	Mon Sep 15 11:43:07 2025 +0200
@@ -0,0 +1,60 @@
+///////////////////////////////////////////////////////////////////////
+// File:        unicharset_training_utils.h
+// Description: Training utilities for UNICHARSET.
+// Author:      Ray Smith
+//
+// (C) Copyright 2014, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifndef TESSERACT_TRAINING_UNICHARSET_TRAINING_UTILS_H_
+#define TESSERACT_TRAINING_UNICHARSET_TRAINING_UTILS_H_
+
+#include "export.h"
+
+#include <tesseract/export.h>
+
+#include <string>
+
+namespace tesseract {
+
+class UNICHARSET;
+
+// Helper sets the character attribute properties and sets up the script table.
+// Does not set tops and bottoms.
+TESS_UNICHARSET_TRAINING_API
+void SetupBasicProperties(bool report_errors, bool decompose, UNICHARSET *unicharset);
+// Default behavior is to compose, until it is proven that decomposed benefits
+// at least one language.
+inline void SetupBasicProperties(bool report_errors, UNICHARSET *unicharset) {
+  SetupBasicProperties(report_errors, false, unicharset);
+}
+// Helper sets the properties from universal script unicharsets, if found.
+TESS_UNICHARSET_TRAINING_API
+void SetScriptProperties(const std::string &script_dir, UNICHARSET *unicharset);
+// Helper gets the combined x-heights string.
+std::string GetXheightString(const std::string &script_dir, const UNICHARSET &unicharset);
+
+// Helper to set the properties for an input unicharset file, writes to the
+// output file. If an appropriate script unicharset can be found in the
+// script_dir directory, then the tops and bottoms are expanded using the
+// script unicharset.
+// If non-empty, xheight data for the fonts are written to the xheights_file.
+TESS_UNICHARSET_TRAINING_API
+void SetPropertiesForInputFile(const std::string &script_dir,
+                               const std::string &input_unicharset_file,
+                               const std::string &output_unicharset_file,
+                               const std::string &output_xheights_file);
+
+} // namespace tesseract.
+
+#endif // TESSERACT_TRAINING_UNICHARSET_TRAINING_UTILS_H_