comparison mupdf-source/thirdparty/tesseract/src/tesseract.cpp @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 /**********************************************************************
2 * File: tesseract.cpp
3 * Description: Main program for merge of tess and editor.
4 * Author: Ray Smith
5 *
6 * (C) Copyright 1992, Hewlett-Packard Ltd.
7 ** Licensed under the Apache License, Version 2.0 (the "License");
8 ** you may not use this file except in compliance with the License.
9 ** You may obtain a copy of the License at
10 ** http://www.apache.org/licenses/LICENSE-2.0
11 ** Unless required by applicable law or agreed to in writing, software
12 ** distributed under the License is distributed on an "AS IS" BASIS,
13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 ** See the License for the specific language governing permissions and
15 ** limitations under the License.
16 *
17 **********************************************************************/
18
19 // Include automatically generated configuration file if running autoconf
20 #ifdef HAVE_CONFIG_H
21 # include "config_auto.h"
22 #endif
23
24 #include <cerrno> // for errno
25 #if defined(__USE_GNU)
26 # include <cfenv> // for feenableexcept
27 #endif
28 #include <climits> // for INT_MIN, INT_MAX
29 #include <cstdlib> // for std::getenv
30 #include <iostream>
31 #include <map> // for std::map
32 #include <memory> // std::unique_ptr
33
34 #include <allheaders.h>
35 #include <tesseract/baseapi.h>
36 #include "dict.h"
37 #include <tesseract/renderer.h>
38 #include "simddetect.h"
39 #include "tesseractclass.h" // for AnyTessLang
40 #include "tprintf.h" // for tprintf
41
42 #ifdef _OPENMP
43 # include <omp.h>
44 #endif
45
46 #if defined(HAVE_LIBARCHIVE)
47 # include <archive.h>
48 #endif
49 #if defined(HAVE_LIBCURL)
50 # include <curl/curl.h>
51 #endif
52
53 #if defined(_WIN32)
54 # include <fcntl.h>
55 # include <io.h>
56 # if defined(HAVE_TIFFIO_H)
57
58 # include <tiffio.h>
59
60 static void Win32ErrorHandler(const char *module, const char *fmt, va_list ap) {
61 if (module != nullptr) {
62 fprintf(stderr, "%s: ", module);
63 }
64 vfprintf(stderr, fmt, ap);
65 fprintf(stderr, ".\n");
66 }
67
68 static void Win32WarningHandler(const char *module, const char *fmt, va_list ap) {
69 if (module != nullptr) {
70 fprintf(stderr, "%s: ", module);
71 }
72 fprintf(stderr, "Warning, ");
73 vfprintf(stderr, fmt, ap);
74 fprintf(stderr, ".\n");
75 }
76
77 # endif /* HAVE_TIFFIO_H */
78
79 class AutoWin32ConsoleOutputCP {
80 public:
81 explicit AutoWin32ConsoleOutputCP(UINT codeCP) :
82 oldCP_(GetConsoleOutputCP()) {
83 SetConsoleOutputCP(codeCP);
84 }
85 ~AutoWin32ConsoleOutputCP() {
86 SetConsoleOutputCP(oldCP_);
87 }
88
89 private:
90 UINT oldCP_;
91 };
92
93 static AutoWin32ConsoleOutputCP autoWin32ConsoleOutputCP(CP_UTF8);
94
95 #endif // _WIN32
96
97 using namespace tesseract;
98
99 static void PrintVersionInfo() {
100 char *versionStrP;
101
102 printf("tesseract %s\n", tesseract::TessBaseAPI::Version());
103
104 versionStrP = getLeptonicaVersion();
105 printf(" %s\n", versionStrP);
106 lept_free(versionStrP);
107
108 versionStrP = getImagelibVersions();
109 printf(" %s\n", versionStrP);
110 lept_free(versionStrP);
111
112 #if defined(HAVE_NEON) || defined(__aarch64__)
113 if (tesseract::SIMDDetect::IsNEONAvailable())
114 printf(" Found NEON\n");
115 #elif defined(HAVE_RVV)
116 if (tesseract::SIMDDetect::IsRVVAvailable())
117 printf(" Found RVV\n");
118 #else
119 if (tesseract::SIMDDetect::IsAVX512BWAvailable()) {
120 printf(" Found AVX512BW\n");
121 }
122 if (tesseract::SIMDDetect::IsAVX512FAvailable()) {
123 printf(" Found AVX512F\n");
124 }
125 if (tesseract::SIMDDetect::IsAVX512VNNIAvailable()) {
126 printf(" Found AVX512VNNI\n");
127 }
128 if (tesseract::SIMDDetect::IsAVX2Available()) {
129 printf(" Found AVX2\n");
130 }
131 if (tesseract::SIMDDetect::IsAVXAvailable()) {
132 printf(" Found AVX\n");
133 }
134 if (tesseract::SIMDDetect::IsFMAAvailable()) {
135 printf(" Found FMA\n");
136 }
137 if (tesseract::SIMDDetect::IsSSEAvailable()) {
138 printf(" Found SSE4.1\n");
139 }
140 #endif
141 #ifdef _OPENMP
142 printf(" Found OpenMP %d\n", _OPENMP);
143 #endif
144 #if defined(HAVE_LIBARCHIVE)
145 # if ARCHIVE_VERSION_NUMBER >= 3002000
146 printf(" Found %s\n", archive_version_details());
147 # else
148 printf(" Found %s\n", archive_version_string());
149 # endif // ARCHIVE_VERSION_NUMBER
150 #endif // HAVE_LIBARCHIVE
151 #if defined(HAVE_LIBCURL)
152 printf(" Found %s\n", curl_version());
153 #endif
154 }
155
156 static void PrintHelpForPSM() {
157 printf(
158 "Page segmentation modes (PSM):\n"
159 " 0|osd_only Orientation and script detection (OSD) only.\n"
160 " 1|auto_osd Automatic page segmentation with OSD.\n"
161 " 2|auto_only Automatic page segmentation, but no OSD, or OCR. (not "
162 "implemented)\n"
163 " 3|auto Fully automatic page segmentation, but no OSD. (Default)\n"
164 " 4|single_column Assume a single column of text of variable sizes.\n"
165 " 5|single_block_vert_text Assume a single uniform block of vertically aligned text.\n"
166 " 6|single_block Assume a single uniform block of text.\n"
167 " 7|single_line Treat the image as a single text line.\n"
168 " 8|single_word Treat the image as a single word.\n"
169 " 9|circle_word Treat the image as a single word in a circle.\n"
170 " 10|single_char Treat the image as a single character.\n"
171 " 11|sparse_text Sparse text. Find as much text as possible in no"
172 " particular order.\n"
173 " 12|sparse_text_osd Sparse text with OSD.\n"
174 " 13|raw_line Raw line. Treat the image as a single text line,\n"
175 " bypassing hacks that are Tesseract-specific.\n"
176 );
177
178 #ifdef DISABLED_LEGACY_ENGINE
179 printf("\nNOTE: The OSD modes are currently disabled.\n");
180 #endif
181 }
182
183 #ifndef DISABLED_LEGACY_ENGINE
184 static void PrintHelpForOEM() {
185 printf(
186 "OCR Engine modes (OEM):\n"
187 " 0|tesseract_only Legacy engine only.\n"
188 " 1|lstm_only Neural nets LSTM engine only.\n"
189 " 2|tesseract_lstm_combined Legacy + LSTM engines.\n"
190 " 3|default Default, based on what is available.\n"
191 );
192 }
193 #endif // ndef DISABLED_LEGACY_ENGINE
194
195 static void PrintHelpExtra(const char *program) {
196 printf(
197 "Usage:\n"
198 " %s --help | --help-extra | --help-psm | "
199 #ifndef DISABLED_LEGACY_ENGINE
200 "--help-oem | "
201 #endif
202 "--version\n"
203 " %s --list-langs [--tessdata-dir PATH]\n"
204 #ifndef DISABLED_LEGACY_ENGINE
205 " %s --print-fonts-table [options...] [configfile...]\n"
206 #endif // ndef DISABLED_LEGACY_ENGINE
207 " %s --print-parameters [options...] [configfile...]\n"
208 " %s imagename|imagelist|stdin outputbase|stdout [options...] "
209 "[configfile...]\n"
210 "\n"
211 "OCR options:\n"
212 " --tessdata-dir PATH Specify the location of tessdata path.\n"
213 " --user-words PATH Specify the location of user words file.\n"
214 " --user-patterns PATH Specify the location of user patterns file.\n"
215 " --dpi VALUE Specify DPI for input image.\n"
216 " --loglevel LEVEL Specify logging level. LEVEL can be\n"
217 " ALL, TRACE, DEBUG, INFO, WARN, ERROR, FATAL or OFF.\n"
218 " -l LANG[+LANG] Specify language(s) used for OCR.\n"
219 " -c VAR=VALUE Set value for config variables.\n"
220 " Multiple -c arguments are allowed.\n"
221 " --psm PSM|NUM Specify page segmentation mode.\n"
222 #ifndef DISABLED_LEGACY_ENGINE
223 " --oem OEM|NUM Specify OCR Engine mode.\n"
224 #endif
225 "NOTE: These options must occur before any configfile.\n"
226 "\n",
227 program, program, program, program
228 #ifndef DISABLED_LEGACY_ENGINE
229 , program
230 #endif // ndef DISABLED_LEGACY_ENGINE
231 );
232
233 PrintHelpForPSM();
234 #ifndef DISABLED_LEGACY_ENGINE
235 printf("\n");
236 PrintHelpForOEM();
237 #endif
238
239 printf(
240 "\n"
241 "Single options:\n"
242 " -h, --help Show minimal help message.\n"
243 " --help-extra Show extra help for advanced users.\n"
244 " --help-psm Show page segmentation modes.\n"
245 #ifndef DISABLED_LEGACY_ENGINE
246 " --help-oem Show OCR Engine modes.\n"
247 #endif
248 " -v, --version Show version information.\n"
249 " --list-langs List available languages for tesseract engine.\n"
250 #ifndef DISABLED_LEGACY_ENGINE
251 " --print-fonts-table Print tesseract fonts table.\n"
252 #endif // ndef DISABLED_LEGACY_ENGINE
253 " --print-parameters Print tesseract parameters.\n");
254 }
255
256 static void PrintHelpMessage(const char *program) {
257 printf(
258 "Usage:\n"
259 " %s --help | --help-extra | --version\n"
260 " %s --list-langs\n"
261 " %s imagename outputbase [options...] [configfile...]\n"
262 "\n"
263 "OCR options:\n"
264 " -l LANG[+LANG] Specify language(s) used for OCR.\n"
265 "NOTE: These options must occur before any configfile.\n"
266 "\n"
267 "Single options:\n"
268 " --help Show this help message.\n"
269 " --help-extra Show extra help for advanced users.\n"
270 " --version Show version information.\n"
271 " --list-langs List available languages for tesseract "
272 "engine.\n",
273 program, program, program);
274 }
275
276 static bool SetVariablesFromCLArgs(tesseract::TessBaseAPI &api, int argc, char **argv) {
277 bool success = true;
278 char opt1[256], opt2[255];
279 for (int i = 0; i < argc; i++) {
280 if (strcmp(argv[i], "-c") == 0 && i + 1 < argc) {
281 strncpy(opt1, argv[i + 1], 255);
282 opt1[255] = '\0';
283 char *p = strchr(opt1, '=');
284 if (!p) {
285 fprintf(stderr, "Missing = in configvar assignment\n");
286 success = false;
287 break;
288 }
289 *p = 0;
290 strncpy(opt2, strchr(argv[i + 1], '=') + 1, sizeof(opt2) - 1);
291 opt2[254] = 0;
292 ++i;
293
294 if (!api.SetVariable(opt1, opt2)) {
295 fprintf(stderr, "Could not set option: %s=%s\n", opt1, opt2);
296 }
297 }
298 }
299 return success;
300 }
301
302 static void PrintLangsList(tesseract::TessBaseAPI &api) {
303 std::vector<std::string> languages;
304 api.GetAvailableLanguagesAsVector(&languages);
305 printf("List of available languages in \"%s\" (%zu):\n",
306 api.GetDatapath(), languages.size());
307 for (const auto &language : languages) {
308 printf("%s\n", language.c_str());
309 }
310 api.End();
311 }
312
313 /**
314 * We have 2 possible sources of pagesegmode: a config file and
315 * the command line. For backwards compatibility reasons, the
316 * default in tesseract is tesseract::PSM_SINGLE_BLOCK, but the
317 * default for this program is tesseract::PSM_AUTO. We will let
318 * the config file take priority, so the command-line default
319 * can take priority over the tesseract default, so we use the
320 * value from the command line only if the retrieved mode
321 * is still tesseract::PSM_SINGLE_BLOCK, indicating no change
322 * in any config file. Therefore the only way to force
323 * tesseract::PSM_SINGLE_BLOCK is from the command line.
324 * It would be simpler if we could set the value before Init,
325 * but that doesn't work.
326 */
327 static void FixPageSegMode(tesseract::TessBaseAPI &api, tesseract::PageSegMode pagesegmode) {
328 if (api.GetPageSegMode() == tesseract::PSM_SINGLE_BLOCK) {
329 api.SetPageSegMode(pagesegmode);
330 }
331 }
332
333 static bool checkArgValues(int arg, const char *mode, int count) {
334 if (arg >= count || arg < 0) {
335 printf("Invalid %s value, please enter a symbolic %s value or a number between 0-%d\n", mode, mode, count - 1);
336 return false;
337 }
338 return true;
339 }
340
341 // Convert a symbolic or numeric string to an OEM value.
342 static int stringToOEM(const std::string arg) {
343 std::map<std::string, int> oem_map = {
344 {"0", 0},
345 {"1", 1},
346 {"2", 2},
347 {"3", 3},
348 {"tesseract_only", 0},
349 {"lstm_only", 1},
350 {"tesseract_lstm_combined", 2},
351 {"default", 3},
352 };
353 auto it = oem_map.find(arg);
354 return it == oem_map.end() ? -1 : it->second;
355 }
356
357 static int stringToPSM(const std::string arg) {
358 std::map<std::string, int> psm_map = {
359 {"0", 0},
360 {"1", 1},
361 {"2", 2},
362 {"3", 3},
363 {"4", 4},
364 {"5", 5},
365 {"6", 6},
366 {"7", 7},
367 {"8", 8},
368 {"9", 9},
369 {"10", 10},
370 {"11", 11},
371 {"12", 12},
372 {"13", 13},
373 {"osd_only", 0},
374 {"auto_osd", 1},
375 {"auto_only", 2},
376 {"auto", 3},
377 {"single_column", 4},
378 {"single_block_vert_text", 5},
379 {"single_block", 6},
380 {"single_line", 7},
381 {"single_word", 8},
382 {"circle_word", 9},
383 {"single_char", 10},
384 {"sparse_text", 11},
385 {"sparse_text_osd", 12},
386 {"raw_line", 13},
387 };
388 auto it = psm_map.find(arg);
389 return it == psm_map.end() ? -1 : it->second;
390 }
391
392 // NOTE: arg_i is used here to avoid ugly *i so many times in this function
393 static bool ParseArgs(int argc, char **argv, const char **lang, const char **image,
394 const char **outputbase, const char **datapath, l_int32 *dpi,
395 bool *list_langs, bool *print_parameters, bool *print_fonts_table,
396 std::vector<std::string> *vars_vec, std::vector<std::string> *vars_values,
397 l_int32 *arg_i, tesseract::PageSegMode *pagesegmode,
398 tesseract::OcrEngineMode *enginemode) {
399 bool noocr = false;
400 int i;
401 for (i = 1; i < argc && (*outputbase == nullptr || argv[i][0] == '-'); i++) {
402 if (*image != nullptr && *outputbase == nullptr) {
403 // outputbase follows image, don't allow options at that position.
404 *outputbase = argv[i];
405 } else if ((strcmp(argv[i], "-h") == 0) || (strcmp(argv[i], "--help") == 0)) {
406 PrintHelpMessage(argv[0]);
407 noocr = true;
408 } else if (strcmp(argv[i], "--help-extra") == 0) {
409 PrintHelpExtra(argv[0]);
410 noocr = true;
411 } else if ((strcmp(argv[i], "--help-psm") == 0)) {
412 PrintHelpForPSM();
413 noocr = true;
414 #ifndef DISABLED_LEGACY_ENGINE
415 } else if ((strcmp(argv[i], "--help-oem") == 0)) {
416 PrintHelpForOEM();
417 noocr = true;
418 #endif
419 } else if ((strcmp(argv[i], "-v") == 0) || (strcmp(argv[i], "--version") == 0)) {
420 PrintVersionInfo();
421 noocr = true;
422 } else if (strcmp(argv[i], "-l") == 0 && i + 1 < argc) {
423 *lang = argv[i + 1];
424 ++i;
425 } else if (strcmp(argv[i], "--tessdata-dir") == 0 && i + 1 < argc) {
426 *datapath = argv[i + 1];
427 ++i;
428 } else if (strcmp(argv[i], "--dpi") == 0 && i + 1 < argc) {
429 *dpi = atoi(argv[i + 1]);
430 ++i;
431 } else if (strcmp(argv[i], "--loglevel") == 0 && i + 1 < argc) {
432 // Allow the log levels which are used by log4cxx.
433 const std::string loglevel_string = argv[++i];
434 static const std::map<const std::string, int> loglevels {
435 {"ALL", INT_MIN},
436 {"TRACE", 5000},
437 {"DEBUG", 10000},
438 {"INFO", 20000},
439 {"WARN", 30000},
440 {"ERROR", 40000},
441 {"FATAL", 50000},
442 {"OFF", INT_MAX},
443 };
444 try {
445 auto loglevel = loglevels.at(loglevel_string);
446 log_level = loglevel;
447 } catch (const std::out_of_range &e) {
448 // TODO: Allow numeric argument?
449 tprintf("Error, unsupported --loglevel %s\n", loglevel_string.c_str());
450 return false;
451 }
452 } else if (strcmp(argv[i], "--user-words") == 0 && i + 1 < argc) {
453 vars_vec->push_back("user_words_file");
454 vars_values->push_back(argv[i + 1]);
455 ++i;
456 } else if (strcmp(argv[i], "--user-patterns") == 0 && i + 1 < argc) {
457 vars_vec->push_back("user_patterns_file");
458 vars_values->push_back(argv[i + 1]);
459 ++i;
460 } else if (strcmp(argv[i], "--list-langs") == 0) {
461 noocr = true;
462 *list_langs = true;
463 } else if (strcmp(argv[i], "--psm") == 0 && i + 1 < argc) {
464 int psm = stringToPSM(argv[i + 1]);
465 if (!checkArgValues(psm, "PSM", tesseract::PSM_COUNT)) {
466 return false;
467 }
468 *pagesegmode = static_cast<tesseract::PageSegMode>(psm);
469 ++i;
470 } else if (strcmp(argv[i], "--oem") == 0 && i + 1 < argc) {
471 #ifndef DISABLED_LEGACY_ENGINE
472 int oem = stringToOEM(argv[i + 1]);
473 if (!checkArgValues(oem, "OEM", tesseract::OEM_COUNT)) {
474 return false;
475 }
476 *enginemode = static_cast<tesseract::OcrEngineMode>(oem);
477 #endif
478 ++i;
479 } else if (strcmp(argv[i], "--print-parameters") == 0) {
480 noocr = true;
481 *print_parameters = true;
482 #ifndef DISABLED_LEGACY_ENGINE
483 } else if (strcmp(argv[i], "--print-fonts-table") == 0) {
484 noocr = true;
485 *print_fonts_table = true;
486 #endif // ndef DISABLED_LEGACY_ENGINE
487 } else if (strcmp(argv[i], "-c") == 0 && i + 1 < argc) {
488 // handled properly after api init
489 ++i;
490 } else if (*image == nullptr) {
491 *image = argv[i];
492 } else {
493 // Unexpected argument.
494 fprintf(stderr, "Error, unknown command line argument '%s'\n", argv[i]);
495 return false;
496 }
497 }
498
499 *arg_i = i;
500
501 if (*pagesegmode == tesseract::PSM_OSD_ONLY) {
502 // OSD = orientation and script detection.
503 if (*lang != nullptr && strcmp(*lang, "osd")) {
504 // If the user explicitly specifies a language (other than osd)
505 // or a script, only orientation can be detected.
506 fprintf(stderr, "Warning, detects only orientation with -l %s\n", *lang);
507 } else {
508 // That mode requires osd.traineddata to detect orientation and script.
509 *lang = "osd";
510 }
511 }
512
513 if (*outputbase == nullptr && noocr == false) {
514 PrintHelpMessage(argv[0]);
515 return false;
516 }
517
518 return true;
519 }
520
521 static void PreloadRenderers(tesseract::TessBaseAPI &api,
522 std::vector<std::unique_ptr<TessResultRenderer>> &renderers,
523 tesseract::PageSegMode pagesegmode, const char *outputbase) {
524 if (pagesegmode == tesseract::PSM_OSD_ONLY) {
525 #ifndef DISABLED_LEGACY_ENGINE
526 renderers.push_back(std::make_unique<tesseract::TessOsdRenderer>(outputbase));
527 #endif // ndef DISABLED_LEGACY_ENGINE
528 } else {
529 bool error = false;
530 bool b;
531 api.GetBoolVariable("tessedit_create_hocr", &b);
532 if (b) {
533 bool font_info;
534 api.GetBoolVariable("hocr_font_info", &font_info);
535 auto renderer = std::make_unique<tesseract::TessHOcrRenderer>(outputbase, font_info);
536 if (renderer->happy()) {
537 renderers.push_back(std::move(renderer));
538 } else {
539 tprintf("Error, could not create hOCR output file: %s\n", strerror(errno));
540 error = true;
541 }
542 }
543
544 api.GetBoolVariable("tessedit_create_alto", &b);
545 if (b) {
546 auto renderer = std::make_unique<tesseract::TessAltoRenderer>(outputbase);
547 if (renderer->happy()) {
548 renderers.push_back(std::move(renderer));
549 } else {
550 tprintf("Error, could not create ALTO output file: %s\n", strerror(errno));
551 error = true;
552 }
553 }
554
555 api.GetBoolVariable("tessedit_create_page_xml", &b);
556 if (b) {
557 auto renderer = std::make_unique<tesseract::TessPAGERenderer>(outputbase);
558 if (renderer->happy()) {
559 renderers.push_back(std::move(renderer));
560 } else {
561 tprintf("Error, could not create PAGE output file: %s\n", strerror(errno));
562 error = true;
563 }
564 }
565
566 api.GetBoolVariable("tessedit_create_tsv", &b);
567 if (b) {
568 bool font_info;
569 api.GetBoolVariable("hocr_font_info", &font_info);
570 auto renderer = std::make_unique<tesseract::TessTsvRenderer>(outputbase, font_info);
571 if (renderer->happy()) {
572 renderers.push_back(std::move(renderer));
573 } else {
574 tprintf("Error, could not create TSV output file: %s\n", strerror(errno));
575 error = true;
576 }
577 }
578
579 api.GetBoolVariable("tessedit_create_pdf", &b);
580 if (b) {
581 #ifdef WIN32
582 if (_setmode(_fileno(stdout), _O_BINARY) == -1)
583 tprintf("ERROR: cin to binary: %s", strerror(errno));
584 #endif // WIN32
585 bool textonly;
586 api.GetBoolVariable("textonly_pdf", &textonly);
587 auto renderer = std::make_unique<tesseract::TessPDFRenderer>(outputbase, api.GetDatapath(), textonly);
588 if (renderer->happy()) {
589 renderers.push_back(std::move(renderer));
590 } else {
591 tprintf("Error, could not create PDF output file: %s\n", strerror(errno));
592 error = true;
593 }
594 }
595
596 api.GetBoolVariable("tessedit_write_unlv", &b);
597 if (b) {
598 api.SetVariable("unlv_tilde_crunching", "true");
599 auto renderer = std::make_unique<tesseract::TessUnlvRenderer>(outputbase);
600 if (renderer->happy()) {
601 renderers.push_back(std::move(renderer));
602 } else {
603 tprintf("Error, could not create UNLV output file: %s\n", strerror(errno));
604 error = true;
605 }
606 }
607
608 api.GetBoolVariable("tessedit_create_lstmbox", &b);
609 if (b) {
610 auto renderer = std::make_unique<tesseract::TessLSTMBoxRenderer>(outputbase);
611 if (renderer->happy()) {
612 renderers.push_back(std::move(renderer));
613 } else {
614 tprintf("Error, could not create LSTM BOX output file: %s\n", strerror(errno));
615 error = true;
616 }
617 }
618
619 api.GetBoolVariable("tessedit_create_boxfile", &b);
620 if (b) {
621 auto renderer = std::make_unique<tesseract::TessBoxTextRenderer>(outputbase);
622 if (renderer->happy()) {
623 renderers.push_back(std::move(renderer));
624 } else {
625 tprintf("Error, could not create BOX output file: %s\n", strerror(errno));
626 error = true;
627 }
628 }
629
630 api.GetBoolVariable("tessedit_create_wordstrbox", &b);
631 if (b) {
632 auto renderer = std::make_unique<tesseract::TessWordStrBoxRenderer>(outputbase);
633 if (renderer->happy()) {
634 renderers.push_back(std::move(renderer));
635 } else {
636 tprintf("Error, could not create WordStr BOX output file: %s\n", strerror(errno));
637 error = true;
638 }
639 }
640
641 api.GetBoolVariable("tessedit_create_txt", &b);
642 if (b || (!error && renderers.empty())) {
643 // Create text output if no other output was requested
644 // even if text output was not explicitly requested unless
645 // there was an error.
646 auto renderer = std::make_unique<tesseract::TessTextRenderer>(outputbase);
647 if (renderer->happy()) {
648 renderers.push_back(std::move(renderer));
649 } else {
650 tprintf("Error, could not create TXT output file: %s\n", strerror(errno));
651 }
652 }
653 }
654
655 // Null-out the renderers that are
656 // added to the root, and leave the root in the vector.
657 for (size_t r = 1; r < renderers.size(); ++r) {
658 renderers[0]->insert(renderers[r].get());
659 renderers[r].release(); // at the moment insert() is owning
660 }
661 }
662
663 /**********************************************************************
664 * main()
665 *
666 **********************************************************************/
667
668 int main(int argc, char **argv) {
669 #if defined(__USE_GNU) && defined(HAVE_FEENABLEEXCEPT)
670 // Raise SIGFPE.
671 # if defined(__clang__)
672 // clang creates code which causes some FP exceptions, so don't enable those.
673 feenableexcept(FE_DIVBYZERO);
674 # else
675 feenableexcept(FE_DIVBYZERO | FE_OVERFLOW | FE_INVALID);
676 # endif
677 #endif
678 const char *lang = nullptr;
679 const char *image = nullptr;
680 const char *outputbase = nullptr;
681 const char *datapath = nullptr;
682 bool list_langs = false;
683 bool print_parameters = false;
684 bool print_fonts_table = false;
685 l_int32 dpi = 0;
686 int arg_i = 1;
687 tesseract::PageSegMode pagesegmode = tesseract::PSM_AUTO;
688 #ifdef DISABLED_LEGACY_ENGINE
689 auto enginemode = tesseract::OEM_LSTM_ONLY;
690 #else
691 tesseract::OcrEngineMode enginemode = tesseract::OEM_DEFAULT;
692 #endif
693 std::vector<std::string> vars_vec;
694 std::vector<std::string> vars_values;
695
696 if (std::getenv("LEPT_MSG_SEVERITY")) {
697 // Get Leptonica message level from environment variable.
698 setMsgSeverity(L_SEVERITY_EXTERNAL);
699 } else {
700 // Disable debugging and informational messages from Leptonica.
701 setMsgSeverity(L_SEVERITY_ERROR);
702 }
703
704 #if defined(HAVE_TIFFIO_H) && defined(_WIN32)
705 /* Show libtiff errors and warnings on console (not in GUI). */
706 TIFFSetErrorHandler(Win32ErrorHandler);
707 TIFFSetWarningHandler(Win32WarningHandler);
708 #endif // HAVE_TIFFIO_H && _WIN32
709
710 if (!ParseArgs(argc, argv, &lang, &image, &outputbase, &datapath, &dpi, &list_langs,
711 &print_parameters, &print_fonts_table, &vars_vec, &vars_values, &arg_i,
712 &pagesegmode, &enginemode)) {
713 return EXIT_FAILURE;
714 }
715
716 bool in_recognition_mode = !list_langs && !print_parameters && !print_fonts_table;
717
718 if (lang == nullptr && in_recognition_mode) {
719 // Set default language model if none was given and a model file is needed.
720 lang = "eng";
721 }
722
723 if (image == nullptr && in_recognition_mode) {
724 return EXIT_SUCCESS;
725 }
726
727 // Call GlobalDawgCache here to create the global DawgCache object before
728 // the TessBaseAPI object. This fixes the order of destructor calls:
729 // first TessBaseAPI must be destructed, DawgCache must be the last object.
730 tesseract::Dict::GlobalDawgCache();
731
732 TessBaseAPI api;
733
734 api.SetOutputName(outputbase);
735
736 const int init_failed = api.Init(datapath, lang, enginemode, &(argv[arg_i]), argc - arg_i,
737 &vars_vec, &vars_values, false);
738
739 if (!SetVariablesFromCLArgs(api, argc, argv)) {
740 return EXIT_FAILURE;
741 }
742
743 // SIMD settings might be overridden by config variable.
744 tesseract::SIMDDetect::Update();
745
746 if (list_langs) {
747 PrintLangsList(api);
748 return EXIT_SUCCESS;
749 }
750
751 if (init_failed) {
752 fprintf(stderr, "Could not initialize tesseract.\n");
753 return EXIT_FAILURE;
754 }
755
756 if (print_parameters) {
757 FILE *fout = stdout;
758 fprintf(stdout, "Tesseract parameters:\n");
759 api.PrintVariables(fout);
760 api.End();
761 return EXIT_SUCCESS;
762 }
763
764 #ifndef DISABLED_LEGACY_ENGINE
765 if (print_fonts_table) {
766 FILE *fout = stdout;
767 fprintf(stdout, "Tesseract fonts table:\n");
768 api.PrintFontsTable(fout);
769 api.End();
770 return EXIT_SUCCESS;
771 }
772 #endif // ndef DISABLED_LEGACY_ENGINE
773
774 FixPageSegMode(api, pagesegmode);
775
776 if (dpi) {
777 auto dpi_string = std::to_string(dpi);
778 api.SetVariable("user_defined_dpi", dpi_string.c_str());
779 }
780
781 int ret_val = EXIT_SUCCESS;
782
783 if (pagesegmode == tesseract::PSM_AUTO_ONLY) {
784 Pix *pixs = pixRead(image);
785 if (!pixs) {
786 fprintf(stderr, "Leptonica can't process input file: %s\n", image);
787 return 2;
788 }
789
790 api.SetImage(pixs);
791
792 tesseract::Orientation orientation;
793 tesseract::WritingDirection direction;
794 tesseract::TextlineOrder order;
795 float deskew_angle;
796
797 const std::unique_ptr<const tesseract::PageIterator> it(api.AnalyseLayout());
798 if (it) {
799 // TODO: Implement output of page segmentation, see documentation
800 // ("Automatic page segmentation, but no OSD, or OCR").
801 it->Orientation(&orientation, &direction, &order, &deskew_angle);
802 tprintf(
803 "Orientation: %d\nWritingDirection: %d\nTextlineOrder: %d\n"
804 "Deskew angle: %.4f\n",
805 orientation, direction, order, deskew_angle);
806 } else {
807 ret_val = EXIT_FAILURE;
808 }
809
810 pixDestroy(&pixs);
811 return ret_val;
812 }
813
814 // Set in_training_mode to true when using one of these configs:
815 // ambigs.train, box.train, box.train.stderr, linebox, rebox, lstm.train.
816 // In this mode no other OCR result files are written.
817 bool b = false;
818 bool in_training_mode = (api.GetBoolVariable("tessedit_ambigs_training", &b) && b) ||
819 (api.GetBoolVariable("tessedit_resegment_from_boxes", &b) && b) ||
820 (api.GetBoolVariable("tessedit_make_boxes_from_boxes", &b) && b) ||
821 (api.GetBoolVariable("tessedit_train_line_recognizer", &b) && b);
822
823 if (api.GetPageSegMode() == tesseract::PSM_OSD_ONLY) {
824 if (!api.tesseract()->AnyTessLang()) {
825 fprintf(stderr, "Error, OSD requires a model for the legacy engine\n");
826 return EXIT_FAILURE;
827 }
828 }
829 #ifdef DISABLED_LEGACY_ENGINE
830 auto cur_psm = api.GetPageSegMode();
831 auto osd_warning = std::string("");
832 if (cur_psm == tesseract::PSM_OSD_ONLY) {
833 const char *disabled_osd_msg =
834 "\nERROR: The page segmentation mode 0 (OSD Only) is currently "
835 "disabled.\n\n";
836 fprintf(stderr, "%s", disabled_osd_msg);
837 return EXIT_FAILURE;
838 } else if (cur_psm == tesseract::PSM_AUTO_OSD) {
839 api.SetPageSegMode(tesseract::PSM_AUTO);
840 osd_warning +=
841 "\nWarning: The page segmentation mode 1 (Auto+OSD) is currently "
842 "disabled. "
843 "Using PSM 3 (Auto) instead.\n\n";
844 } else if (cur_psm == tesseract::PSM_SPARSE_TEXT_OSD) {
845 api.SetPageSegMode(tesseract::PSM_SPARSE_TEXT);
846 osd_warning +=
847 "\nWarning: The page segmentation mode 12 (Sparse text + OSD) is "
848 "currently disabled. "
849 "Using PSM 11 (Sparse text) instead.\n\n";
850 }
851 #endif // def DISABLED_LEGACY_ENGINE
852
853 std::vector<std::unique_ptr<TessResultRenderer>> renderers;
854
855 if (in_training_mode) {
856 renderers.push_back(nullptr);
857 } else if (outputbase != nullptr) {
858 PreloadRenderers(api, renderers, pagesegmode, outputbase);
859 }
860
861 if (!renderers.empty()) {
862 #ifdef DISABLED_LEGACY_ENGINE
863 if (!osd_warning.empty()) {
864 fprintf(stderr, "%s", osd_warning.c_str());
865 }
866 #endif
867 bool succeed = api.ProcessPages(image, nullptr, 0, renderers[0].get());
868 if (!succeed) {
869 fprintf(stderr, "Error during processing.\n");
870 ret_val = EXIT_FAILURE;
871 }
872 }
873
874 return ret_val;
875 }