comparison mupdf-source/thirdparty/tesseract/src/ccutil/ccutil.cpp @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 // Copyright 2008 Google Inc. All Rights Reserved.
2 // Author: scharron@google.com (Samuel Charron)
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 // http://www.apache.org/licenses/LICENSE-2.0
7 // Unless required by applicable law or agreed to in writing, software
8 // distributed under the License is distributed on an "AS IS" BASIS,
9 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 // See the License for the specific language governing permissions and
11 // limitations under the License.
12
13 #include "ccutil.h"
14 #include "tprintf.h" // for tprintf
15
16 #include <cstdlib>
17 #include <cstring> // for std::strrchrA
18 #include <filesystem> // for std::filesystem
19
20 namespace tesseract {
21
22 CCUtil::CCUtil()
23 : params_()
24 , INT_INIT_MEMBER(ambigs_debug_level, 0, "Debug level for unichar ambiguities", &params_)
25 , BOOL_MEMBER(use_ambigs_for_adaption, false,
26 "Use ambigs for deciding"
27 " whether to adapt to a character",
28 &params_) {}
29
30 // Destructor.
31 // It is defined here, so the compiler can create a single vtable
32 // instead of weak vtables in every compilation unit.
33 CCUtil::~CCUtil() = default;
34
35 /**
36 * @brief CCUtil::main_setup - set location of tessdata and name of image
37 *
38 * @param argv0 - paths to the directory with language files and config files.
39 * An actual value of argv0 is used if not nullptr, otherwise TESSDATA_PREFIX is
40 * used if not nullptr, next try to use compiled in -DTESSDATA_PREFIX. If
41 * previous is not successful - use current directory.
42 * @param basename - name of image
43 */
44 void CCUtil::main_setup(const std::string &argv0, const std::string &basename) {
45 imagebasename = basename; /**< name of image */
46
47 const char *tessdata_prefix = getenv("TESSDATA_PREFIX");
48
49 // Ignore TESSDATA_PREFIX if there is no matching filesystem entry.
50 if (tessdata_prefix != nullptr && !std::filesystem::exists(tessdata_prefix)) {
51 tprintf("Warning: TESSDATA_PREFIX %s does not exist, ignore it\n", tessdata_prefix);
52 tessdata_prefix = nullptr;
53 }
54
55 if (!argv0.empty()) {
56 /* Use tessdata prefix from the command line. */
57 datadir = argv0;
58 } else if (tessdata_prefix) {
59 /* Use tessdata prefix from the environment. */
60 datadir = tessdata_prefix;
61 #if defined(_WIN32)
62 } else if (datadir.empty() || !std::filesystem::exists(datadir)) {
63 /* Look for tessdata in directory of executable. */
64 char path[_MAX_PATH];
65 DWORD length = GetModuleFileName(nullptr, path, sizeof(path));
66 if (length > 0 && length < sizeof(path)) {
67 char *separator = std::strrchr(path, '\\');
68 if (separator != nullptr) {
69 *separator = '\0';
70 std::string subdir = path;
71 subdir += "/tessdata";
72 if (std::filesystem::exists(subdir)) {
73 datadir = subdir;
74 }
75 }
76 }
77 #endif /* _WIN32 */
78 }
79
80 // datadir may still be empty:
81 if (datadir.empty()) {
82 #if defined(TESSDATA_PREFIX)
83 // Use tessdata prefix which was compiled in.
84 datadir = TESSDATA_PREFIX "/tessdata/";
85 // Note that some software (for example conda) patches TESSDATA_PREFIX
86 // in the binary, so it might be shorter. Recalculate its length.
87 datadir.resize(std::strlen(datadir.c_str()));
88 #else
89 datadir = "./";
90 #endif /* TESSDATA_PREFIX */
91 }
92
93 // check for missing directory separator
94 const char lastchar = datadir.back();
95 if (lastchar != '/' && lastchar != '\\') {
96 datadir += '/';
97 }
98 }
99
100 } // namespace tesseract