Mercurial > hgrepos > Python2 > PyMuPDF
diff mupdf-source/thirdparty/tesseract/src/ccmain/paramsd.cpp @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mupdf-source/thirdparty/tesseract/src/ccmain/paramsd.cpp Mon Sep 15 11:43:07 2025 +0200 @@ -0,0 +1,350 @@ +/////////////////////////////////////////////////////////////////////// +// File: paramsd.cpp +// Description: Tesseract parameter Editor +// Author: Joern Wanke +// +// (C) Copyright 2007, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// +// +// The parameters editor is used to edit all the parameters used within +// tesseract from the ui. + +// Include automatically generated configuration file if running autoconf. +#ifdef HAVE_CONFIG_H +# include "config_auto.h" +#endif + +#ifndef GRAPHICS_DISABLED + +# include "params.h" // for ParamsVectors, StringParam, BoolParam +# include "paramsd.h" +# include "scrollview.h" // for SVEvent, ScrollView, SVET_POPUP +# include "svmnode.h" // for SVMenuNode +# include "tesseractclass.h" // for Tesseract + +# include <cstdio> // for fclose, fopen, fprintf, FILE +# include <cstdlib> // for atoi +# include <cstring> // for strcmp, strcspn, strlen, strncpy +# include <locale> // for std::locale::classic +# include <map> // for map, _Rb_tree_iterator, map<>::iterator +# include <memory> // for unique_ptr +# include <sstream> // for std::stringstream +# include <utility> // for pair + +namespace tesseract { + +# define VARDIR "configs/" /*parameters files */ +# define MAX_ITEMS_IN_SUBMENU 30 + +// The following variables should remain static globals, since they +// are used by debug editor, which uses a single Tesseract instance. +// +// Contains the mappings from unique VC ids to their actual pointers. +static std::map<int, ParamContent *> vcMap; +static int nrParams = 0; +static int writeCommands[2]; + +// Constructors for the various ParamTypes. +ParamContent::ParamContent(tesseract::StringParam *it) { + my_id_ = nrParams; + nrParams++; + param_type_ = VT_STRING; + sIt = it; + vcMap[my_id_] = this; +} +// Constructors for the various ParamTypes. +ParamContent::ParamContent(tesseract::IntParam *it) { + my_id_ = nrParams; + nrParams++; + param_type_ = VT_INTEGER; + iIt = it; + vcMap[my_id_] = this; +} +// Constructors for the various ParamTypes. +ParamContent::ParamContent(tesseract::BoolParam *it) { + my_id_ = nrParams; + nrParams++; + param_type_ = VT_BOOLEAN; + bIt = it; + vcMap[my_id_] = this; +} +// Constructors for the various ParamTypes. +ParamContent::ParamContent(tesseract::DoubleParam *it) { + my_id_ = nrParams; + nrParams++; + param_type_ = VT_DOUBLE; + dIt = it; + vcMap[my_id_] = this; +} + +// Gets a VC object identified by its ID. +ParamContent *ParamContent::GetParamContentById(int id) { + return vcMap[id]; +} + +// Copy the first N words from the source string to the target string. +// Words are delimited by "_". +void ParamsEditor::GetFirstWords(const char *s, // source string + int n, // number of words + char *t // target string +) { + int full_length = strlen(s); + int reqd_len = 0; // No. of chars required + const char *next_word = s; + + while ((n > 0) && reqd_len < full_length) { + reqd_len += strcspn(next_word, "_") + 1; + next_word += reqd_len; + n--; + } + strncpy(t, s, reqd_len); + t[reqd_len] = '\0'; // ensure null terminal +} + +// Getter for the name. +const char *ParamContent::GetName() const { + if (param_type_ == VT_INTEGER) { + return iIt->name_str(); + } else if (param_type_ == VT_BOOLEAN) { + return bIt->name_str(); + } else if (param_type_ == VT_DOUBLE) { + return dIt->name_str(); + } else if (param_type_ == VT_STRING) { + return sIt->name_str(); + } else { + return "ERROR: ParamContent::GetName()"; + } +} + +// Getter for the description. +const char *ParamContent::GetDescription() const { + if (param_type_ == VT_INTEGER) { + return iIt->info_str(); + } else if (param_type_ == VT_BOOLEAN) { + return bIt->info_str(); + } else if (param_type_ == VT_DOUBLE) { + return dIt->info_str(); + } else if (param_type_ == VT_STRING) { + return sIt->info_str(); + } else { + return nullptr; + } +} + +// Getter for the value. +std::string ParamContent::GetValue() const { + std::string result; + if (param_type_ == VT_INTEGER) { + result += std::to_string(*iIt); + } else if (param_type_ == VT_BOOLEAN) { + result += std::to_string(*bIt); + } else if (param_type_ == VT_DOUBLE) { + result += std::to_string(*dIt); + } else if (param_type_ == VT_STRING) { + result = sIt->c_str(); + } + return result; +} + +// Setter for the value. +void ParamContent::SetValue(const char *val) { + // TODO (wanke) Test if the values actually are properly converted. + // (Quickly visible impacts?) + changed_ = true; + if (param_type_ == VT_INTEGER) { + iIt->set_value(atoi(val)); + } else if (param_type_ == VT_BOOLEAN) { + bIt->set_value(atoi(val)); + } else if (param_type_ == VT_DOUBLE) { + std::stringstream stream(val); + // Use "C" locale for reading double value. + stream.imbue(std::locale::classic()); + double d = 0; + stream >> d; + dIt->set_value(d); + } else if (param_type_ == VT_STRING) { + sIt->set_value(val); + } +} + +// Gets the up to the first 3 prefixes from s (split by _). +// For example, tesseract_foo_bar will be split into tesseract,foo and bar. +void ParamsEditor::GetPrefixes(const char *s, std::string *level_one, std::string *level_two, + std::string *level_three) { + std::unique_ptr<char[]> p(new char[1024]); + GetFirstWords(s, 1, p.get()); + *level_one = p.get(); + GetFirstWords(s, 2, p.get()); + *level_two = p.get(); + GetFirstWords(s, 3, p.get()); + *level_three = p.get(); +} + +// Compare two VC objects by their name. +int ParamContent::Compare(const void *v1, const void *v2) { + const ParamContent *one = *static_cast<const ParamContent *const *>(v1); + const ParamContent *two = *static_cast<const ParamContent *const *>(v2); + return strcmp(one->GetName(), two->GetName()); +} + +// Find all editable parameters used within tesseract and create a +// SVMenuNode tree from it. +// TODO (wanke): This is actually sort of hackish. +SVMenuNode *ParamsEditor::BuildListOfAllLeaves(tesseract::Tesseract *tess) { + auto *mr = new SVMenuNode(); + ParamContent_LIST vclist; + ParamContent_IT vc_it(&vclist); + // Amount counts the number of entries for a specific char*. + // TODO(rays) get rid of the use of std::map. + std::map<const char *, int> amount; + + // Add all parameters to a list. + int num_iterations = (tess->params() == nullptr) ? 1 : 2; + for (int v = 0; v < num_iterations; ++v) { + tesseract::ParamsVectors *vec = (v == 0) ? GlobalParams() : tess->params(); + for (auto ¶m : vec->int_params) { + vc_it.add_after_then_move(new ParamContent(param)); + } + for (auto ¶m : vec->bool_params) { + vc_it.add_after_then_move(new ParamContent(param)); + } + for (auto ¶m : vec->string_params) { + vc_it.add_after_then_move(new ParamContent(param)); + } + for (auto ¶m : vec->double_params) { + vc_it.add_after_then_move(new ParamContent(param)); + } + } + + // Count the # of entries starting with a specific prefix. + for (vc_it.mark_cycle_pt(); !vc_it.cycled_list(); vc_it.forward()) { + ParamContent *vc = vc_it.data(); + std::string tag; + std::string tag2; + std::string tag3; + + GetPrefixes(vc->GetName(), &tag, &tag2, &tag3); + amount[tag.c_str()]++; + amount[tag2.c_str()]++; + amount[tag3.c_str()]++; + } + + vclist.sort(ParamContent::Compare); // Sort the list alphabetically. + + SVMenuNode *other = mr->AddChild("OTHER"); + + // go through the list again and this time create the menu structure. + vc_it.move_to_first(); + for (vc_it.mark_cycle_pt(); !vc_it.cycled_list(); vc_it.forward()) { + ParamContent *vc = vc_it.data(); + std::string tag; + std::string tag2; + std::string tag3; + GetPrefixes(vc->GetName(), &tag, &tag2, &tag3); + + if (amount[tag.c_str()] == 1) { + other->AddChild(vc->GetName(), vc->GetId(), vc->GetValue().c_str(), vc->GetDescription()); + } else { // More than one would use this submenu -> create submenu. + SVMenuNode *sv = mr->AddChild(tag.c_str()); + if ((amount[tag.c_str()] <= MAX_ITEMS_IN_SUBMENU) || (amount[tag2.c_str()] <= 1)) { + sv->AddChild(vc->GetName(), vc->GetId(), vc->GetValue().c_str(), vc->GetDescription()); + } else { // Make subsubmenus. + SVMenuNode *sv2 = sv->AddChild(tag2.c_str()); + sv2->AddChild(vc->GetName(), vc->GetId(), vc->GetValue().c_str(), vc->GetDescription()); + } + } + } + return mr; +} + +// Event listener. Waits for SVET_POPUP events and processes them. +void ParamsEditor::Notify(const SVEvent *sve) { + if (sve->type == SVET_POPUP) { // only catch SVET_POPUP! + char *param = sve->parameter; + if (sve->command_id == writeCommands[0]) { + WriteParams(param, false); + } else if (sve->command_id == writeCommands[1]) { + WriteParams(param, true); + } else { + ParamContent *vc = ParamContent::GetParamContentById(sve->command_id); + vc->SetValue(param); + sv_window_->AddMessageF("Setting %s to %s", vc->GetName(), vc->GetValue().c_str()); + } + } +} + +// Integrate the parameters editor as popupmenu into the existing scrollview +// window (usually the pg editor). If sv == null, create a new empty +// empty window and attach the parameters editor to that window (ugly). +ParamsEditor::ParamsEditor(tesseract::Tesseract *tess, ScrollView *sv) { + if (sv == nullptr) { + const char *name = "ParamEditorMAIN"; + sv = new ScrollView(name, 1, 1, 200, 200, 300, 200); + } + + sv_window_ = sv; + + // Only one event handler per window. + // sv->AddEventHandler((SVEventHandler*) this); + + SVMenuNode *svMenuRoot = BuildListOfAllLeaves(tess); + + std::string paramfile; + paramfile = tess->datadir; + paramfile += VARDIR; // parameters dir + paramfile += "edited"; // actual name + + SVMenuNode *std_menu = svMenuRoot->AddChild("Build Config File"); + + writeCommands[0] = nrParams + 1; + std_menu->AddChild("All Parameters", writeCommands[0], paramfile.c_str(), "Config file name?"); + + writeCommands[1] = nrParams + 2; + std_menu->AddChild("changed_ Parameters Only", writeCommands[1], paramfile.c_str(), + "Config file name?"); + + svMenuRoot->BuildMenu(sv, false); +} + +// Write all (changed_) parameters to a config file. +void ParamsEditor::WriteParams(char *filename, bool changes_only) { + FILE *fp; // input file + // if file exists + if ((fp = fopen(filename, "rb")) != nullptr) { + fclose(fp); + std::stringstream msg; + msg << "Overwrite file " << filename << "? (Y/N)"; + int a = sv_window_->ShowYesNoDialog(msg.str().c_str()); + if (a == 'n') { + return; + } // don't write + } + + fp = fopen(filename, "wb"); // can we write to it? + if (fp == nullptr) { + sv_window_->AddMessageF("Can't write to file %s", filename); + return; + } + for (auto &iter : vcMap) { + ParamContent *cur = iter.second; + if (!changes_only || cur->HasChanged()) { + fprintf(fp, "%-25s %-12s # %s\n", cur->GetName(), cur->GetValue().c_str(), + cur->GetDescription()); + } + } + fclose(fp); +} + +} // namespace tesseract + +#endif // !GRAPHICS_DISABLED
