diff mupdf-source/thirdparty/tesseract/src/classify/adaptive.cpp @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mupdf-source/thirdparty/tesseract/src/classify/adaptive.cpp	Mon Sep 15 11:43:07 2025 +0200
@@ -0,0 +1,395 @@
+/******************************************************************************
+ ** Filename:    adaptive.c
+ ** Purpose:     Adaptive matcher.
+ ** Author:      Dan Johnson
+ **
+ ** (c) Copyright Hewlett-Packard Company, 1988.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ ******************************************************************************/
+
+#include "adaptive.h"
+
+#include "classify.h"
+
+#include <cassert>
+#include <cstdio>
+
+namespace tesseract {
+
+/*----------------------------------------------------------------------------
+              Public Code
+----------------------------------------------------------------------------*/
+/*---------------------------------------------------------------------------*/
+/**
+ * This routine adds a new adapted class to an existing
+ * set of adapted templates.
+ *
+ * @param Templates set of templates to add new class to
+ * @param Class new class to add to templates
+ * @param ClassId class id to associate with new class
+ *
+ * @note Globals: none
+ */
+void AddAdaptedClass(ADAPT_TEMPLATES_STRUCT *Templates, ADAPT_CLASS_STRUCT *Class, CLASS_ID ClassId) {
+  assert(Templates != nullptr);
+  assert(Class != nullptr);
+  assert(LegalClassId(ClassId));
+  assert(UnusedClassIdIn(Templates->Templates, ClassId));
+  assert(Class->NumPermConfigs == 0);
+
+  auto IntClass = new INT_CLASS_STRUCT(1, 1);
+  AddIntClass(Templates->Templates, ClassId, IntClass);
+
+  assert(Templates->Class[ClassId] == nullptr);
+  Templates->Class[ClassId] = Class;
+
+} /* AddAdaptedClass */
+
+/*---------------------------------------------------------------------------*/
+
+PERM_CONFIG_STRUCT::~PERM_CONFIG_STRUCT() {
+  delete[] Ambigs;
+}
+
+ADAPT_CLASS_STRUCT::ADAPT_CLASS_STRUCT() :
+  NumPermConfigs(0),
+  MaxNumTimesSeen(0),
+  PermProtos(NewBitVector(MAX_NUM_PROTOS)),
+  PermConfigs(NewBitVector(MAX_NUM_CONFIGS)),
+  TempProtos(NIL_LIST) {
+  zero_all_bits(PermProtos, WordsInVectorOfSize(MAX_NUM_PROTOS));
+  zero_all_bits(PermConfigs, WordsInVectorOfSize(MAX_NUM_CONFIGS));
+
+  for (int i = 0; i < MAX_NUM_CONFIGS; i++) {
+    TempConfigFor(this, i) = nullptr;
+  }
+}
+
+ADAPT_CLASS_STRUCT::~ADAPT_CLASS_STRUCT() {
+  for (int i = 0; i < MAX_NUM_CONFIGS; i++) {
+    if (ConfigIsPermanent(this, i) && PermConfigFor(this, i) != nullptr) {
+      delete PermConfigFor(this, i);
+    } else if (!ConfigIsPermanent(this, i) && TempConfigFor(this, i) != nullptr) {
+      delete TempConfigFor(this, i);
+    }
+  }
+  FreeBitVector(PermProtos);
+  FreeBitVector(PermConfigs);
+  auto list = TempProtos;
+  while (list != nullptr) {
+    delete reinterpret_cast<TEMP_PROTO_STRUCT *>(list->node);
+    list = pop(list);
+  }
+}
+
+/// Constructor for adapted templates.
+/// Add an empty class for each char in unicharset to the newly created templates.
+ADAPT_TEMPLATES_STRUCT::ADAPT_TEMPLATES_STRUCT(UNICHARSET &unicharset) {
+  Templates = new INT_TEMPLATES_STRUCT;
+  NumPermClasses = 0;
+  NumNonEmptyClasses = 0;
+
+  /* Insert an empty class for each unichar id in unicharset */
+  for (unsigned i = 0; i < MAX_NUM_CLASSES; i++) {
+    Class[i] = nullptr;
+    if (i < unicharset.size()) {
+      AddAdaptedClass(this, new ADAPT_CLASS_STRUCT, i);
+    }
+  }
+}
+
+ADAPT_TEMPLATES_STRUCT::~ADAPT_TEMPLATES_STRUCT() {
+  for (unsigned i = 0; i < (Templates)->NumClasses; i++) {
+    delete Class[i];
+  }
+  delete Templates;
+}
+
+// Returns FontinfoId of the given config of the given adapted class.
+int Classify::GetFontinfoId(ADAPT_CLASS_STRUCT *Class, uint8_t ConfigId) {
+  return (ConfigIsPermanent(Class, ConfigId) ? PermConfigFor(Class, ConfigId)->FontinfoId
+                                             : TempConfigFor(Class, ConfigId)->FontinfoId);
+}
+
+/// This constructor allocates and returns a new temporary config.
+///
+/// @param MaxProtoId  max id of any proto in new config
+/// @param FontinfoId font information from pre-trained templates
+TEMP_CONFIG_STRUCT::TEMP_CONFIG_STRUCT(int maxProtoId, int fontinfoId) :
+  NumTimesSeen(1),
+  ProtoVectorSize(WordsInVectorOfSize(maxProtoId + 1)),
+  MaxProtoId(maxProtoId),
+  Protos(NewBitVector(maxProtoId + 1)),
+  FontinfoId(fontinfoId) {
+  zero_all_bits(Protos, ProtoVectorSize);
+}
+
+TEMP_CONFIG_STRUCT::~TEMP_CONFIG_STRUCT() {
+  FreeBitVector(Protos);
+}
+
+/*---------------------------------------------------------------------------*/
+/**
+ * This routine prints a summary of the adapted templates
+ *  in Templates to File.
+ *
+ * @param File    open text file to print Templates to
+ * @param Templates adapted templates to print to File
+ *
+ * @note Globals: none
+ */
+void Classify::PrintAdaptedTemplates(FILE *File, ADAPT_TEMPLATES_STRUCT *Templates) {
+  INT_CLASS_STRUCT *IClass;
+  ADAPT_CLASS_STRUCT *AClass;
+
+  fprintf(File, "\n\nSUMMARY OF ADAPTED TEMPLATES:\n\n");
+  fprintf(File, "Num classes = %d;  Num permanent classes = %d\n\n", Templates->NumNonEmptyClasses,
+          Templates->NumPermClasses);
+  fprintf(File, "   Id  NC NPC  NP NPP\n");
+  fprintf(File, "------------------------\n");
+
+  for (unsigned i = 0; i < (Templates->Templates)->NumClasses; i++) {
+    IClass = Templates->Templates->Class[i];
+    AClass = Templates->Class[i];
+    if (!IsEmptyAdaptedClass(AClass)) {
+      fprintf(File, "%5u  %s %3d %3d %3d %3zd\n", i, unicharset.id_to_unichar(i), IClass->NumConfigs,
+              AClass->NumPermConfigs, IClass->NumProtos,
+              IClass->NumProtos - AClass->TempProtos->size());
+    }
+  }
+  fprintf(File, "\n");
+
+} /* PrintAdaptedTemplates */
+
+/*---------------------------------------------------------------------------*/
+/**
+ * Read an adapted class description from file and return
+ * a ptr to the adapted class.
+ *
+ * @param fp open file to read adapted class from
+ * @return Ptr to new adapted class.
+ *
+ * @note Globals: none
+ */
+ADAPT_CLASS_STRUCT *ReadAdaptedClass(TFile *fp) {
+  int NumTempProtos;
+  int NumConfigs;
+  int i;
+  ADAPT_CLASS_STRUCT *Class;
+
+  /* first read high level adapted class structure */
+  Class = new ADAPT_CLASS_STRUCT;
+  fp->FRead(Class, sizeof(ADAPT_CLASS_STRUCT), 1);
+
+  /* then read in the definitions of the permanent protos and configs */
+  Class->PermProtos = NewBitVector(MAX_NUM_PROTOS);
+  Class->PermConfigs = NewBitVector(MAX_NUM_CONFIGS);
+  fp->FRead(Class->PermProtos, sizeof(uint32_t), WordsInVectorOfSize(MAX_NUM_PROTOS));
+  fp->FRead(Class->PermConfigs, sizeof(uint32_t), WordsInVectorOfSize(MAX_NUM_CONFIGS));
+
+  /* then read in the list of temporary protos */
+  fp->FRead(&NumTempProtos, sizeof(int), 1);
+  Class->TempProtos = NIL_LIST;
+  for (i = 0; i < NumTempProtos; i++) {
+    auto TempProto = new TEMP_PROTO_STRUCT;
+    fp->FRead(TempProto, sizeof(TEMP_PROTO_STRUCT), 1);
+    Class->TempProtos = push_last(Class->TempProtos, TempProto);
+  }
+
+  /* then read in the adapted configs */
+  fp->FRead(&NumConfigs, sizeof(int), 1);
+  for (i = 0; i < NumConfigs; i++) {
+    if (test_bit(Class->PermConfigs, i)) {
+      Class->Config[i].Perm = ReadPermConfig(fp);
+    } else {
+      Class->Config[i].Temp = ReadTempConfig(fp);
+    }
+  }
+
+  return (Class);
+
+} /* ReadAdaptedClass */
+
+/*---------------------------------------------------------------------------*/
+/**
+ * Read a set of adapted templates from file and return
+ * a ptr to the templates.
+ *
+ * @param fp open text file to read adapted templates from
+ * @return Ptr to adapted templates read from file.
+ *
+ * @note Globals: none
+ */
+ADAPT_TEMPLATES_STRUCT *Classify::ReadAdaptedTemplates(TFile *fp) {
+  auto Templates = new ADAPT_TEMPLATES_STRUCT;
+
+  /* first read the high level adaptive template struct */
+  fp->FRead(Templates, sizeof(ADAPT_TEMPLATES_STRUCT), 1);
+
+  /* then read in the basic integer templates */
+  Templates->Templates = ReadIntTemplates(fp);
+
+  /* then read in the adaptive info for each class */
+  for (unsigned i = 0; i < (Templates->Templates)->NumClasses; i++) {
+    Templates->Class[i] = ReadAdaptedClass(fp);
+  }
+  return (Templates);
+
+} /* ReadAdaptedTemplates */
+
+/*---------------------------------------------------------------------------*/
+/**
+ * Read a permanent configuration description from file
+ * and return a ptr to it.
+ *
+ * @param fp open file to read permanent config from
+ * @return Ptr to new permanent configuration description.
+ *
+ * @note Globals: none
+ */
+PERM_CONFIG_STRUCT *ReadPermConfig(TFile *fp) {
+  auto Config = new PERM_CONFIG_STRUCT;
+  uint8_t NumAmbigs;
+  fp->FRead(&NumAmbigs, sizeof(NumAmbigs), 1);
+  Config->Ambigs = new UNICHAR_ID[NumAmbigs + 1];
+  fp->FRead(Config->Ambigs, sizeof(UNICHAR_ID), NumAmbigs);
+  Config->Ambigs[NumAmbigs] = -1;
+  fp->FRead(&(Config->FontinfoId), sizeof(int), 1);
+
+  return (Config);
+
+} /* ReadPermConfig */
+
+/*---------------------------------------------------------------------------*/
+/**
+ * Read a temporary configuration description from file
+ * and return a ptr to it.
+ *
+ * @param fp open file to read temporary config from
+ * @return Ptr to new temporary configuration description.
+ *
+ * @note Globals: none
+ */
+TEMP_CONFIG_STRUCT *ReadTempConfig(TFile *fp) {
+  auto Config = new TEMP_CONFIG_STRUCT;
+  fp->FRead(Config, sizeof(TEMP_CONFIG_STRUCT), 1);
+
+  Config->Protos = NewBitVector(Config->ProtoVectorSize * BITSINLONG);
+  fp->FRead(Config->Protos, sizeof(uint32_t), Config->ProtoVectorSize);
+
+  return (Config);
+
+} /* ReadTempConfig */
+
+/*---------------------------------------------------------------------------*/
+/**
+ * This routine writes a binary representation of Class
+ * to File.
+ *
+ * @param File    open file to write Class to
+ * @param Class   adapted class to write to File
+ * @param NumConfigs  number of configs in Class
+ *
+ * @note Globals: none
+ */
+void WriteAdaptedClass(FILE *File, ADAPT_CLASS_STRUCT *Class, int NumConfigs) {
+  /* first write high level adapted class structure */
+  fwrite(Class, sizeof(ADAPT_CLASS_STRUCT), 1, File);
+
+  /* then write out the definitions of the permanent protos and configs */
+  fwrite(Class->PermProtos, sizeof(uint32_t), WordsInVectorOfSize(MAX_NUM_PROTOS), File);
+  fwrite(Class->PermConfigs, sizeof(uint32_t), WordsInVectorOfSize(MAX_NUM_CONFIGS), File);
+
+  /* then write out the list of temporary protos */
+  uint32_t NumTempProtos = Class->TempProtos->size();
+  fwrite(&NumTempProtos, sizeof(NumTempProtos), 1, File);
+  auto TempProtos = Class->TempProtos;
+  iterate(TempProtos) {
+    void *proto = TempProtos->node;
+    fwrite(proto, sizeof(TEMP_PROTO_STRUCT), 1, File);
+  }
+
+  /* then write out the adapted configs */
+  fwrite(&NumConfigs, sizeof(int), 1, File);
+  for (int i = 0; i < NumConfigs; i++) {
+    if (test_bit(Class->PermConfigs, i)) {
+      WritePermConfig(File, Class->Config[i].Perm);
+    } else {
+      WriteTempConfig(File, Class->Config[i].Temp);
+    }
+  }
+
+} /* WriteAdaptedClass */
+
+/*---------------------------------------------------------------------------*/
+/**
+ * This routine saves Templates to File in a binary format.
+ *
+ * @param File    open text file to write Templates to
+ * @param Templates set of adapted templates to write to File
+ *
+ * @note Globals: none
+ */
+void Classify::WriteAdaptedTemplates(FILE *File, ADAPT_TEMPLATES_STRUCT *Templates) {
+  /* first write the high level adaptive template struct */
+  fwrite(Templates, sizeof(ADAPT_TEMPLATES_STRUCT), 1, File);
+
+  /* then write out the basic integer templates */
+  WriteIntTemplates(File, Templates->Templates, unicharset);
+
+  /* then write out the adaptive info for each class */
+  for (unsigned i = 0; i < (Templates->Templates)->NumClasses; i++) {
+    WriteAdaptedClass(File, Templates->Class[i], Templates->Templates->Class[i]->NumConfigs);
+  }
+} /* WriteAdaptedTemplates */
+
+/*---------------------------------------------------------------------------*/
+/**
+ * This routine writes a binary representation of a
+ * permanent configuration to File.
+ *
+ * @param File  open file to write Config to
+ * @param Config  permanent config to write to File
+ *
+ * @note Globals: none
+ */
+void WritePermConfig(FILE *File, PERM_CONFIG_STRUCT *Config) {
+  uint8_t NumAmbigs = 0;
+
+  assert(Config != nullptr);
+  while (Config->Ambigs[NumAmbigs] > 0) {
+    ++NumAmbigs;
+  }
+
+  fwrite(&NumAmbigs, sizeof(uint8_t), 1, File);
+  fwrite(Config->Ambigs, sizeof(UNICHAR_ID), NumAmbigs, File);
+  fwrite(&(Config->FontinfoId), sizeof(int), 1, File);
+} /* WritePermConfig */
+
+/*---------------------------------------------------------------------------*/
+/**
+ * This routine writes a binary representation of a
+ * temporary configuration to File.
+ *
+ * @param File  open file to write Config to
+ * @param Config  temporary config to write to File
+ *
+ * @note Globals: none
+ */
+void WriteTempConfig(FILE *File, TEMP_CONFIG_STRUCT *Config) {
+  assert(Config != nullptr);
+
+  fwrite(Config, sizeof(TEMP_CONFIG_STRUCT), 1, File);
+  fwrite(Config->Protos, sizeof(uint32_t), Config->ProtoVectorSize, File);
+
+} /* WriteTempConfig */
+
+} // namespace tesseract