diff mupdf-source/thirdparty/tesseract/src/classify/cutoffs.cpp @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mupdf-source/thirdparty/tesseract/src/classify/cutoffs.cpp	Mon Sep 15 11:43:07 2025 +0200
@@ -0,0 +1,74 @@
+/******************************************************************************
+ ** Filename:    cutoffs.c
+ ** Purpose:     Routines to manipulate an array of class cutoffs.
+ ** Author:      Dan Johnson
+ **
+ ** (c) Copyright Hewlett-Packard Company, 1988.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ ******************************************************************************/
+/*----------------------------------------------------------------------------
+          Include Files and Type Defines
+----------------------------------------------------------------------------*/
+
+#include <cstdio>
+#include <sstream> // for std::istringstream
+#include <string>  // for std::string
+
+#include <tesseract/unichar.h>
+#include "classify.h"
+#include "helpers.h"
+#include "serialis.h"
+
+#define MAX_CUTOFF 1000
+
+namespace tesseract {
+/**
+ * Open file, read in all of the class-id/cutoff pairs
+ * and insert them into the Cutoffs array.  Cutoffs are
+ * indexed in the array by class id.  Unused entries in the
+ * array are set to an arbitrarily high cutoff value.
+ * @param fp file containing cutoff definitions
+ * @param Cutoffs array to put cutoffs into
+ */
+void Classify::ReadNewCutoffs(TFile *fp, uint16_t *Cutoffs) {
+  int Cutoff;
+
+  if (shape_table_ != nullptr) {
+    if (!fp->DeSerialize(shapetable_cutoffs_)) {
+      tprintf("Error during read of shapetable pffmtable!\n");
+    }
+  }
+  for (int i = 0; i < MAX_NUM_CLASSES; i++) {
+    Cutoffs[i] = MAX_CUTOFF;
+  }
+
+  const int kMaxLineSize = 100;
+  char line[kMaxLineSize];
+  while (fp->FGets(line, kMaxLineSize) != nullptr) {
+    std::string Class;
+    CLASS_ID ClassId;
+    std::istringstream stream(line);
+    stream.imbue(std::locale::classic());
+    stream >> Class >> Cutoff;
+    if (stream.fail()) {
+      break;
+    }
+    if (Class.compare("NULL") == 0) {
+      ClassId = unicharset.unichar_to_id(" ");
+    } else {
+      ClassId = unicharset.unichar_to_id(Class.c_str());
+    }
+    ASSERT_HOST(ClassId >= 0 && ClassId < MAX_NUM_CLASSES);
+    Cutoffs[ClassId] = Cutoff;
+  }
+}
+
+} // namespace tesseract