comparison mupdf-source/thirdparty/tesseract/src/ccmain/tesseractclass.h @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 ///////////////////////////////////////////////////////////////////////
2 // File: tesseractclass.h
3 // Description: The Tesseract class. It holds/owns everything needed
4 // to run Tesseract on a single language, and also a set of
5 // sub-Tesseracts to run sub-languages. For thread safety, *every*
6 // global variable goes in here, directly, or indirectly.
7 // This makes it safe to run multiple Tesseracts in different
8 // threads in parallel, and keeps the different language
9 // instances separate.
10 // Author: Ray Smith
11 //
12 // (C) Copyright 2008, Google Inc.
13 // Licensed under the Apache License, Version 2.0 (the "License");
14 // you may not use this file except in compliance with the License.
15 // You may obtain a copy of the License at
16 // http://www.apache.org/licenses/LICENSE-2.0
17 // Unless required by applicable law or agreed to in writing, software
18 // distributed under the License is distributed on an "AS IS" BASIS,
19 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
20 // See the License for the specific language governing permissions and
21 // limitations under the License.
22 //
23 ///////////////////////////////////////////////////////////////////////
24
25 #ifndef TESSERACT_CCMAIN_TESSERACTCLASS_H_
26 #define TESSERACT_CCMAIN_TESSERACTCLASS_H_
27
28 #ifdef HAVE_CONFIG_H
29 # include "config_auto.h" // DISABLED_LEGACY_ENGINE
30 #endif
31
32 #include "control.h" // for ACCEPTABLE_WERD_TYPE
33 #include "debugpixa.h" // for DebugPixa
34 #include "devanagari_processing.h" // for ShiroRekhaSplitter
35 #ifndef DISABLED_LEGACY_ENGINE
36 # include "docqual.h" // for GARBAGE_LEVEL
37 #endif
38 #include "genericvector.h" // for PointerVector
39 #include "pageres.h" // for WERD_RES (ptr only), PAGE_RES (pt...
40 #include "params.h" // for BOOL_VAR_H, BoolParam, DoubleParam
41 #include "points.h" // for FCOORD
42 #include "ratngs.h" // for ScriptPos, WERD_CHOICE (ptr only)
43 #include "tessdatamanager.h" // for TessdataManager
44 #include "textord.h" // for Textord
45 #include "wordrec.h" // for Wordrec
46
47 #include <tesseract/publictypes.h> // for OcrEngineMode, PageSegMode, OEM_L...
48 #include <tesseract/unichar.h> // for UNICHAR_ID
49
50 #include <allheaders.h> // for pixDestroy, pixGetWidth, pixGetHe...
51
52 #include <cstdint> // for int16_t, int32_t, uint16_t
53 #include <cstdio> // for FILE
54
55 namespace tesseract {
56
57 class BLOCK_LIST;
58 class ETEXT_DESC;
59 struct OSResults;
60 class PAGE_RES;
61 class PAGE_RES_IT;
62 class ROW;
63 class SVMenuNode;
64 class TBOX;
65 class TO_BLOCK_LIST;
66 class WERD;
67 class WERD_CHOICE;
68 class WERD_RES;
69
70 class ColumnFinder;
71 class DocumentData;
72 #ifndef DISABLED_LEGACY_ENGINE
73 class EquationDetect;
74 #endif // ndef DISABLED_LEGACY_ENGINE
75 class ImageData;
76 class LSTMRecognizer;
77 class Tesseract;
78
79 // Top-level class for all tesseract global instance data.
80 // This class either holds or points to all data used by an instance
81 // of Tesseract, including the memory allocator. When this is
82 // complete, Tesseract will be thread-safe. UNTIL THEN, IT IS NOT!
83 //
84 // NOTE to developers: Do not create cyclic dependencies through this class!
85 // The directory dependency tree must remain a tree! To keep this clean,
86 // lower-level code (eg in ccutil, the bottom level) must never need to
87 // know about the content of a higher-level directory.
88 // The following scheme will grant the easiest access to lower-level
89 // global members without creating a cyclic dependency:
90 //
91 // Class Hierarchy (^ = inheritance):
92 //
93 // CCUtil (ccutil/ccutil.h)
94 // ^ Members include: UNICHARSET
95 // CCStruct (ccstruct/ccstruct.h)
96 // ^ Members include: Image
97 // Classify (classify/classify.h)
98 // ^ Members include: Dict
99 // WordRec (wordrec/wordrec.h)
100 // ^ Members include: WERD*, DENORM*
101 // Tesseract (ccmain/tesseractclass.h)
102 // Members include: Pix*
103 //
104 // Other important classes:
105 //
106 // TessBaseAPI (tesseract/baseapi.h)
107 // Members include: BLOCK_LIST*, PAGE_RES*,
108 // Tesseract*, ImageThresholder*
109 // Dict (dict/dict.h)
110 // Members include: Image* (private)
111 //
112 // NOTE: that each level contains members that correspond to global
113 // data that is defined (and used) at that level, not necessarily where
114 // the type is defined so for instance:
115 // BOOL_VAR_H(textord_show_blobs);
116 // goes inside the Textord class, not the cc_util class.
117
118 // A collection of various variables for statistics and debugging.
119 struct TesseractStats {
120 TesseractStats()
121 : adaption_word_number(0)
122 , doc_blob_quality(0)
123 , doc_outline_errs(0)
124 , doc_char_quality(0)
125 , good_char_count(0)
126 , doc_good_char_quality(0)
127 , word_count(0)
128 , dict_words(0)
129 , tilde_crunch_written(false)
130 , last_char_was_newline(true)
131 , last_char_was_tilde(false)
132 , write_results_empty_block(true) {}
133
134 int32_t adaption_word_number;
135 int16_t doc_blob_quality;
136 int16_t doc_outline_errs;
137 int16_t doc_char_quality;
138 int16_t good_char_count;
139 int16_t doc_good_char_quality;
140 int32_t word_count; // count of word in the document
141 int32_t dict_words; // number of dicitionary words in the document
142 std::string dump_words_str; // accumulator used by dump_words()
143 // Flags used by write_results()
144 bool tilde_crunch_written;
145 bool last_char_was_newline;
146 bool last_char_was_tilde;
147 bool write_results_empty_block;
148 };
149
150 // Struct to hold all the pointers to relevant data for processing a word.
151 struct WordData {
152 WordData() : word(nullptr), row(nullptr), block(nullptr), prev_word(nullptr) {}
153 explicit WordData(const PAGE_RES_IT &page_res_it)
154 : word(page_res_it.word())
155 , row(page_res_it.row()->row)
156 , block(page_res_it.block()->block)
157 , prev_word(nullptr) {}
158 WordData(BLOCK *block_in, ROW *row_in, WERD_RES *word_res)
159 : word(word_res), row(row_in), block(block_in), prev_word(nullptr) {}
160
161 WERD_RES *word;
162 ROW *row;
163 BLOCK *block;
164 WordData *prev_word;
165 PointerVector<WERD_RES> lang_words;
166 };
167
168 // Definition of a Tesseract WordRecognizer. The WordData provides the context
169 // of row/block, in_word holds an initialized, possibly pre-classified word,
170 // that the recognizer may or may not consume (but if so it sets
171 // *in_word=nullptr) and produces one or more output words in out_words, which
172 // may be the consumed in_word, or may be generated independently. This api
173 // allows both a conventional tesseract classifier to work, or a line-level
174 // classifier that generates multiple words from a merged input.
175 using WordRecognizer = void (Tesseract::*)(const WordData &, WERD_RES **,
176 PointerVector<WERD_RES> *);
177
178 class TESS_API Tesseract : public Wordrec {
179 public:
180 Tesseract();
181 ~Tesseract() override;
182
183 // Return appropriate dictionary
184 Dict &getDict() override;
185
186 // Clear as much used memory as possible without resetting the adaptive
187 // classifier or losing any other classifier data.
188 void Clear();
189 // Clear all memory of adaption for this and all subclassifiers.
190 void ResetAdaptiveClassifier();
191 // Clear the document dictionary for this and all subclassifiers.
192 void ResetDocumentDictionary();
193
194 #ifndef DISABLED_LEGACY_ENGINE
195 // Set the equation detector.
196 void SetEquationDetect(EquationDetect *detector);
197 #endif // ndef DISABLED_LEGACY_ENGINE
198
199 // Simple accessors.
200 const FCOORD &reskew() const {
201 return reskew_;
202 }
203 float gradient() const {
204 return gradient_;
205 }
206 // Destroy any existing pix and return a pointer to the pointer.
207 Image *mutable_pix_binary() {
208 pix_binary_.destroy();
209 return &pix_binary_;
210 }
211 Image pix_binary() const {
212 return pix_binary_;
213 }
214 Image pix_grey() const {
215 return pix_grey_;
216 }
217 void set_pix_grey(Image grey_pix) {
218 pix_grey_.destroy();
219 pix_grey_ = grey_pix;
220 }
221 Image pix_original() const {
222 return pix_original_;
223 }
224 // Takes ownership of the given original_pix.
225 void set_pix_original(Image original_pix) {
226 pix_original_.destroy();
227 pix_original_ = original_pix;
228 // Clone to sublangs as well.
229 for (auto &lang : sub_langs_) {
230 lang->set_pix_original(original_pix ? original_pix.clone() : nullptr);
231 }
232 }
233 // Returns a pointer to a Pix representing the best available resolution image
234 // of the page, with best available bit depth as second priority. Result can
235 // be of any bit depth, but never color-mapped, as that has always been
236 // removed. Note that in grey and color, 0 is black and 255 is
237 // white. If the input was binary, then black is 1 and white is 0.
238 // To tell the difference pixGetDepth() will return 32, 8 or 1.
239 // In any case, the return value is a borrowed Pix, and should not be
240 // deleted or pixDestroyed.
241 Image BestPix() const {
242 if (pixGetWidth(pix_original_) == ImageWidth()) {
243 return pix_original_;
244 } else if (pix_grey_ != nullptr) {
245 return pix_grey_;
246 } else {
247 return pix_binary_;
248 }
249 }
250 void set_pix_thresholds(Image thresholds) {
251 pix_thresholds_.destroy();
252 pix_thresholds_ = thresholds;
253 }
254 int source_resolution() const {
255 return source_resolution_;
256 }
257 void set_source_resolution(int ppi) {
258 source_resolution_ = ppi;
259 }
260 int ImageWidth() const {
261 return pixGetWidth(pix_binary_);
262 }
263 int ImageHeight() const {
264 return pixGetHeight(pix_binary_);
265 }
266 Image scaled_color() const {
267 return scaled_color_;
268 }
269 int scaled_factor() const {
270 return scaled_factor_;
271 }
272 void SetScaledColor(int factor, Image color) {
273 scaled_factor_ = factor;
274 scaled_color_ = color;
275 }
276 const Textord &textord() const {
277 return textord_;
278 }
279 Textord *mutable_textord() {
280 return &textord_;
281 }
282
283 bool right_to_left() const {
284 return right_to_left_;
285 }
286 int num_sub_langs() const {
287 return sub_langs_.size();
288 }
289 Tesseract *get_sub_lang(int index) const {
290 return sub_langs_[index];
291 }
292 // Returns true if any language uses Tesseract (as opposed to LSTM).
293 bool AnyTessLang() const {
294 if (tessedit_ocr_engine_mode != OEM_LSTM_ONLY) {
295 return true;
296 }
297 for (auto &lang : sub_langs_) {
298 if (lang->tessedit_ocr_engine_mode != OEM_LSTM_ONLY) {
299 return true;
300 }
301 }
302 return false;
303 }
304 // Returns true if any language uses the LSTM.
305 bool AnyLSTMLang() const {
306 if (tessedit_ocr_engine_mode != OEM_TESSERACT_ONLY) {
307 return true;
308 }
309 for (auto &lang : sub_langs_) {
310 if (lang->tessedit_ocr_engine_mode != OEM_TESSERACT_ONLY) {
311 return true;
312 }
313 }
314 return false;
315 }
316
317 void SetBlackAndWhitelist();
318
319 // Perform steps to prepare underlying binary image/other data structures for
320 // page segmentation. Uses the strategy specified in the global variable
321 // pageseg_devanagari_split_strategy for perform splitting while preparing for
322 // page segmentation.
323 void PrepareForPageseg();
324
325 // Perform steps to prepare underlying binary image/other data structures for
326 // Tesseract OCR. The current segmentation is required by this method.
327 // Uses the strategy specified in the global variable
328 // ocr_devanagari_split_strategy for performing splitting while preparing for
329 // Tesseract ocr.
330 void PrepareForTessOCR(BLOCK_LIST *block_list, Tesseract *osd_tess, OSResults *osr);
331
332 int SegmentPage(const char *input_file, BLOCK_LIST *blocks, Tesseract *osd_tess, OSResults *osr);
333 void SetupWordScripts(BLOCK_LIST *blocks);
334 int AutoPageSeg(PageSegMode pageseg_mode, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks,
335 BLOBNBOX_LIST *diacritic_blobs, Tesseract *osd_tess, OSResults *osr);
336 ColumnFinder *SetupPageSegAndDetectOrientation(PageSegMode pageseg_mode, BLOCK_LIST *blocks,
337 Tesseract *osd_tess, OSResults *osr,
338 TO_BLOCK_LIST *to_blocks, Image *photo_mask_pix,
339 Image *music_mask_pix);
340 // par_control.cpp
341 void PrerecAllWordsPar(const std::vector<WordData> &words);
342
343 //// linerec.cpp
344 // Generates training data for training a line recognizer, eg LSTM.
345 // Breaks the page into lines, according to the boxes, and writes them to a
346 // serialized DocumentData based on output_basename.
347 // Return true if successful, false if an error occurred.
348 bool TrainLineRecognizer(const char *input_imagename, const std::string &output_basename,
349 BLOCK_LIST *block_list);
350 // Generates training data for training a line recognizer, eg LSTM.
351 // Breaks the boxes into lines, normalizes them, converts to ImageData and
352 // appends them to the given training_data.
353 void TrainFromBoxes(const std::vector<TBOX> &boxes, const std::vector<std::string> &texts,
354 BLOCK_LIST *block_list, DocumentData *training_data);
355
356 // Returns an Imagedata containing the image of the given textline,
357 // and ground truth boxes/truth text if available in the input.
358 // The image is not normalized in any way.
359 ImageData *GetLineData(const TBOX &line_box, const std::vector<TBOX> &boxes,
360 const std::vector<std::string> &texts, int start_box, int end_box,
361 const BLOCK &block);
362 // Helper gets the image of a rectangle, using the block.re_rotation() if
363 // needed to get to the image, and rotating the result back to horizontal
364 // layout. (CJK characters will be on their left sides) The vertical text flag
365 // is set in the returned ImageData if the text was originally vertical, which
366 // can be used to invoke a different CJK recognition engine. The revised_box
367 // is also returned to enable calculation of output bounding boxes.
368 ImageData *GetRectImage(const TBOX &box, const BLOCK &block, int padding,
369 TBOX *revised_box) const;
370 // Recognizes a word or group of words, converting to WERD_RES in *words.
371 // Analogous to classify_word_pass1, but can handle a group of words as well.
372 void LSTMRecognizeWord(const BLOCK &block, ROW *row, WERD_RES *word,
373 PointerVector<WERD_RES> *words);
374 // Apply segmentation search to the given set of words, within the constraints
375 // of the existing ratings matrix. If there is already a best_choice on a word
376 // leaves it untouched and just sets the done/accepted etc flags.
377 void SearchWords(PointerVector<WERD_RES> *words);
378
379 //// control.h /////////////////////////////////////////////////////////
380 bool ProcessTargetWord(const TBOX &word_box, const TBOX &target_word_box, const char *word_config,
381 int pass);
382 // Sets up the words ready for whichever engine is to be run
383 void SetupAllWordsPassN(int pass_n, const TBOX *target_word_box, const char *word_config,
384 PAGE_RES *page_res, std::vector<WordData> *words);
385 // Sets up the single word ready for whichever engine is to be run.
386 void SetupWordPassN(int pass_n, WordData *word);
387 // Runs word recognition on all the words.
388 bool RecogAllWordsPassN(int pass_n, ETEXT_DESC *monitor, PAGE_RES_IT *pr_it,
389 std::vector<WordData> *words);
390 bool recog_all_words(PAGE_RES *page_res, ETEXT_DESC *monitor, const TBOX *target_word_box,
391 const char *word_config, int dopasses);
392 void rejection_passes(PAGE_RES *page_res, ETEXT_DESC *monitor, const TBOX *target_word_box,
393 const char *word_config);
394 void bigram_correction_pass(PAGE_RES *page_res);
395 void blamer_pass(PAGE_RES *page_res);
396 // Sets script positions and detects smallcaps on all output words.
397 void script_pos_pass(PAGE_RES *page_res);
398 // Helper to recognize the word using the given (language-specific) tesseract.
399 // Returns positive if this recognizer found more new best words than the
400 // number kept from best_words.
401 int RetryWithLanguage(const WordData &word_data, WordRecognizer recognizer, bool debug,
402 WERD_RES **in_word, PointerVector<WERD_RES> *best_words);
403 // Moves good-looking "noise"/diacritics from the reject list to the main
404 // blob list on the current word. Returns true if anything was done, and
405 // sets make_next_word_fuzzy if blob(s) were added to the end of the word.
406 bool ReassignDiacritics(int pass, PAGE_RES_IT *pr_it, bool *make_next_word_fuzzy);
407 // Attempts to put noise/diacritic outlines into the blobs that they overlap.
408 // Input: a set of noisy outlines that probably belong to the real_word.
409 // Output: outlines that overlapped blobs are set to nullptr and put back into
410 // the word, either in the blobs or in the reject list.
411 void AssignDiacriticsToOverlappingBlobs(const std::vector<C_OUTLINE *> &outlines, int pass,
412 WERD *real_word, PAGE_RES_IT *pr_it,
413 std::vector<bool> *word_wanted,
414 std::vector<bool> *overlapped_any_blob,
415 std::vector<C_BLOB *> *target_blobs);
416 // Attempts to assign non-overlapping outlines to their nearest blobs or
417 // make new blobs out of them.
418 void AssignDiacriticsToNewBlobs(const std::vector<C_OUTLINE *> &outlines, int pass,
419 WERD *real_word, PAGE_RES_IT *pr_it,
420 std::vector<bool> *word_wanted,
421 std::vector<C_BLOB *> *target_blobs);
422 // Starting with ok_outlines set to indicate which outlines overlap the blob,
423 // chooses the optimal set (approximately) and returns true if any outlines
424 // are desired, in which case ok_outlines indicates which ones.
425 bool SelectGoodDiacriticOutlines(int pass, float certainty_threshold, PAGE_RES_IT *pr_it,
426 C_BLOB *blob, const std::vector<C_OUTLINE *> &outlines,
427 int num_outlines, std::vector<bool> *ok_outlines);
428 // Classifies the given blob plus the outlines flagged by ok_outlines, undoes
429 // the inclusion of the outlines, and returns the certainty of the raw choice.
430 float ClassifyBlobPlusOutlines(const std::vector<bool> &ok_outlines,
431 const std::vector<C_OUTLINE *> &outlines, int pass_n,
432 PAGE_RES_IT *pr_it, C_BLOB *blob, std::string &best_str);
433 // Classifies the given blob (part of word_data->word->word) as an individual
434 // word, using languages, chopper etc, returning only the certainty of the
435 // best raw choice, and undoing all the work done to fake out the word.
436 float ClassifyBlobAsWord(int pass_n, PAGE_RES_IT *pr_it, C_BLOB *blob, std::string &best_str,
437 float *c2);
438 void classify_word_and_language(int pass_n, PAGE_RES_IT *pr_it, WordData *word_data);
439 void classify_word_pass1(const WordData &word_data, WERD_RES **in_word,
440 PointerVector<WERD_RES> *out_words);
441 void recog_pseudo_word(PAGE_RES *page_res, // blocks to check
442 TBOX &selection_box);
443
444 void fix_rep_char(PAGE_RES_IT *page_res_it);
445
446 ACCEPTABLE_WERD_TYPE acceptable_word_string(const UNICHARSET &char_set, const char *s,
447 const char *lengths);
448 void match_word_pass_n(int pass_n, WERD_RES *word, ROW *row, BLOCK *block);
449 void classify_word_pass2(const WordData &word_data, WERD_RES **in_word,
450 PointerVector<WERD_RES> *out_words);
451 void ReportXhtFixResult(bool accept_new_word, float new_x_ht, WERD_RES *word, WERD_RES *new_word);
452 bool RunOldFixXht(WERD_RES *word, BLOCK *block, ROW *row);
453 bool TrainedXheightFix(WERD_RES *word, BLOCK *block, ROW *row);
454 // Runs recognition with the test baseline shift and x-height and returns true
455 // if there was an improvement in recognition result.
456 bool TestNewNormalization(int original_misfits, float baseline_shift, float new_x_ht,
457 WERD_RES *word, BLOCK *block, ROW *row);
458 bool recog_interactive(PAGE_RES_IT *pr_it);
459
460 // Set fonts of this word.
461 void set_word_fonts(WERD_RES *word);
462 void font_recognition_pass(PAGE_RES *page_res);
463 void dictionary_correction_pass(PAGE_RES *page_res);
464 bool check_debug_pt(WERD_RES *word, int location);
465
466 //// superscript.cpp ////////////////////////////////////////////////////
467 bool SubAndSuperscriptFix(WERD_RES *word_res);
468 void GetSubAndSuperscriptCandidates(const WERD_RES *word, int *num_rebuilt_leading,
469 ScriptPos *leading_pos, float *leading_certainty,
470 int *num_rebuilt_trailing, ScriptPos *trailing_pos,
471 float *trailing_certainty, float *avg_certainty,
472 float *unlikely_threshold);
473 WERD_RES *TrySuperscriptSplits(int num_chopped_leading, float leading_certainty,
474 ScriptPos leading_pos, int num_chopped_trailing,
475 float trailing_certainty, ScriptPos trailing_pos, WERD_RES *word,
476 bool *is_good, int *retry_leading, int *retry_trailing);
477 bool BelievableSuperscript(bool debug, const WERD_RES &word, float certainty_threshold,
478 int *left_ok, int *right_ok) const;
479
480 //// output.h //////////////////////////////////////////////////////////
481
482 void output_pass(PAGE_RES_IT &page_res_it, const TBOX *target_word_box);
483 void write_results(PAGE_RES_IT &page_res_it, // full info
484 char newline_type, // type of newline
485 bool force_eol // override tilde crunch?
486 );
487 void set_unlv_suspects(WERD_RES *word);
488 UNICHAR_ID get_rep_char(WERD_RES *word); // what char is repeated?
489 bool acceptable_number_string(const char *s, const char *lengths);
490 int16_t count_alphanums(const WERD_CHOICE &word);
491 int16_t count_alphas(const WERD_CHOICE &word);
492
493 void read_config_file(const char *filename, SetParamConstraint constraint);
494 // Initialize for potentially a set of languages defined by the language
495 // string and recursively any additional languages required by any language
496 // traineddata file (via tessedit_load_sublangs in its config) that is loaded.
497 // See init_tesseract_internal for args.
498 int init_tesseract(const std::string &arg0, const std::string &textbase,
499 const std::string &language, OcrEngineMode oem, char **configs,
500 int configs_size, const std::vector<std::string> *vars_vec,
501 const std::vector<std::string> *vars_values, bool set_only_non_debug_params,
502 TessdataManager *mgr);
503 int init_tesseract(const std::string &datapath, const std::string &language, OcrEngineMode oem) {
504 TessdataManager mgr;
505 return init_tesseract(datapath, {}, language, oem, nullptr, 0, nullptr, nullptr, false, &mgr);
506 }
507 // Common initialization for a single language.
508 // arg0 is the datapath for the tessdata directory, which could be the
509 // path of the tessdata directory with no trailing /, or (if tessdata
510 // lives in the same directory as the executable, the path of the executable,
511 // hence the name arg0.
512 // textbase is an optional output file basename (used only for training)
513 // language is the language code to load.
514 // oem controls which engine(s) will operate on the image
515 // configs (argv) is an array of config filenames to load variables from.
516 // May be nullptr.
517 // configs_size (argc) is the number of elements in configs.
518 // vars_vec is an optional vector of variables to set.
519 // vars_values is an optional corresponding vector of values for the variables
520 // in vars_vec.
521 // If set_only_non_debug_params is true, only params that do not contain
522 // "debug" in the name will be set.
523 int init_tesseract_internal(const std::string &arg0, const std::string &textbase,
524 const std::string &language, OcrEngineMode oem, char **configs,
525 int configs_size, const std::vector<std::string> *vars_vec,
526 const std::vector<std::string> *vars_values,
527 bool set_only_non_debug_params, TessdataManager *mgr);
528
529 // Set the universal_id member of each font to be unique among all
530 // instances of the same font loaded.
531 void SetupUniversalFontIds();
532
533 void recognize_page(std::string &image_name);
534 void end_tesseract();
535
536 bool init_tesseract_lang_data(const std::string &arg0,
537 const std::string &language, OcrEngineMode oem, char **configs,
538 int configs_size, const std::vector<std::string> *vars_vec,
539 const std::vector<std::string> *vars_values,
540 bool set_only_non_debug_params, TessdataManager *mgr);
541
542 void ParseLanguageString(const std::string &lang_str, std::vector<std::string> *to_load,
543 std::vector<std::string> *not_to_load);
544
545 //// pgedit.h //////////////////////////////////////////////////////////
546 SVMenuNode *build_menu_new();
547 #ifndef GRAPHICS_DISABLED
548 void pgeditor_main(int width, int height, PAGE_RES *page_res);
549
550 void process_image_event( // action in image win
551 const SVEvent &event);
552 bool process_cmd_win_event( // UI command semantics
553 int32_t cmd_event, // which menu item?
554 char *new_value // any prompt data
555 );
556 #endif // !GRAPHICS_DISABLED
557 void debug_word(PAGE_RES *page_res, const TBOX &selection_box);
558 void do_re_display(bool (tesseract::Tesseract::*word_painter)(PAGE_RES_IT *pr_it));
559 bool word_display(PAGE_RES_IT *pr_it);
560 bool word_bln_display(PAGE_RES_IT *pr_it);
561 bool word_blank_and_set_display(PAGE_RES_IT *pr_its);
562 bool word_set_display(PAGE_RES_IT *pr_it);
563 // #ifndef GRAPHICS_DISABLED
564 bool word_dumper(PAGE_RES_IT *pr_it);
565 // #endif // !GRAPHICS_DISABLED
566 void blob_feature_display(PAGE_RES *page_res, const TBOX &selection_box);
567 //// reject.h //////////////////////////////////////////////////////////
568 // make rej map for word
569 void make_reject_map(WERD_RES *word, ROW *row, int16_t pass);
570 bool one_ell_conflict(WERD_RES *word_res, bool update_map);
571 int16_t first_alphanum_index(const char *word, const char *word_lengths);
572 int16_t first_alphanum_offset(const char *word, const char *word_lengths);
573 int16_t alpha_count(const char *word, const char *word_lengths);
574 bool word_contains_non_1_digit(const char *word, const char *word_lengths);
575 void dont_allow_1Il(WERD_RES *word);
576 int16_t count_alphanums( // how many alphanums
577 WERD_RES *word);
578 void flip_0O(WERD_RES *word);
579 bool non_0_digit(const UNICHARSET &ch_set, UNICHAR_ID unichar_id);
580 bool non_O_upper(const UNICHARSET &ch_set, UNICHAR_ID unichar_id);
581 bool repeated_nonalphanum_wd(WERD_RES *word, ROW *row);
582 void nn_match_word( // Match a word
583 WERD_RES *word, ROW *row);
584 void nn_recover_rejects(WERD_RES *word, ROW *row);
585 void set_done( // set done flag
586 WERD_RES *word, int16_t pass);
587 int16_t safe_dict_word(const WERD_RES *werd_res); // is best_choice in dict?
588 void flip_hyphens(WERD_RES *word);
589 void reject_I_1_L(WERD_RES *word);
590 void reject_edge_blobs(WERD_RES *word);
591 void reject_mostly_rejects(WERD_RES *word);
592 //// adaptions.h ///////////////////////////////////////////////////////
593 bool word_adaptable( // should we adapt?
594 WERD_RES *word, uint16_t mode);
595
596 //// tfacepp.cpp ///////////////////////////////////////////////////////
597 void recog_word_recursive(WERD_RES *word);
598 void recog_word(WERD_RES *word);
599 void split_and_recog_word(WERD_RES *word);
600 void split_word(WERD_RES *word, unsigned split_pt, WERD_RES **right_piece,
601 BlamerBundle **orig_blamer_bundle) const;
602 void join_words(WERD_RES *word, WERD_RES *word2, BlamerBundle *orig_bb) const;
603 //// fixspace.cpp ///////////////////////////////////////////////////////
604 bool digit_or_numeric_punct(WERD_RES *word, int char_position);
605 int16_t eval_word_spacing(WERD_RES_LIST &word_res_list);
606 void match_current_words(WERD_RES_LIST &words, ROW *row, BLOCK *block);
607 int16_t fp_eval_word_spacing(WERD_RES_LIST &word_res_list);
608 void fix_noisy_space_list(WERD_RES_LIST &best_perm, ROW *row, BLOCK *block);
609 void fix_fuzzy_space_list(WERD_RES_LIST &best_perm, ROW *row, BLOCK *block);
610 void fix_sp_fp_word(WERD_RES_IT &word_res_it, ROW *row, BLOCK *block);
611 void fix_fuzzy_spaces( // find fuzzy words
612 ETEXT_DESC *monitor, // progress monitor
613 int32_t word_count, // count of words in doc
614 PAGE_RES *page_res);
615 void dump_words(WERD_RES_LIST &perm, int16_t score, int16_t mode, bool improved);
616 bool fixspace_thinks_word_done(WERD_RES *word);
617 int16_t worst_noise_blob(WERD_RES *word_res, float *worst_noise_score);
618 float blob_noise_score(TBLOB *blob);
619 void break_noisiest_blob_word(WERD_RES_LIST &words);
620 //// docqual.cpp ////////////////////////////////////////////////////////
621 #ifndef DISABLED_LEGACY_ENGINE
622 GARBAGE_LEVEL garbage_word(WERD_RES *word, bool ok_dict_word);
623 bool potential_word_crunch(WERD_RES *word, GARBAGE_LEVEL garbage_level, bool ok_dict_word);
624 #endif
625 void tilde_crunch(PAGE_RES_IT &page_res_it);
626 void unrej_good_quality_words( // unreject potential
627 PAGE_RES_IT &page_res_it);
628 void doc_and_block_rejection( // reject big chunks
629 PAGE_RES_IT &page_res_it, bool good_quality_doc);
630 void quality_based_rejection(PAGE_RES_IT &page_res_it, bool good_quality_doc);
631 void convert_bad_unlv_chs(WERD_RES *word_res);
632 void tilde_delete(PAGE_RES_IT &page_res_it);
633 int16_t word_blob_quality(WERD_RES *word);
634 void word_char_quality(WERD_RES *word, int16_t *match_count, int16_t *accepted_match_count);
635 void unrej_good_chs(WERD_RES *word);
636 int16_t count_outline_errs(char c, int16_t outline_count);
637 int16_t word_outline_errs(WERD_RES *word);
638 #ifndef DISABLED_LEGACY_ENGINE
639 bool terrible_word_crunch(WERD_RES *word, GARBAGE_LEVEL garbage_level);
640 #endif
641 CRUNCH_MODE word_deletable(WERD_RES *word, int16_t &delete_mode);
642 int16_t failure_count(WERD_RES *word);
643 bool noise_outlines(TWERD *word);
644 //// pagewalk.cpp ///////////////////////////////////////////////////////
645 void process_selected_words(PAGE_RES *page_res, // blocks to check
646 // function to call
647 TBOX &selection_box,
648 bool (tesseract::Tesseract::*word_processor)(PAGE_RES_IT *pr_it));
649 //// tessbox.cpp ///////////////////////////////////////////////////////
650 void tess_add_doc_word( // test acceptability
651 WERD_CHOICE *word_choice // after context
652 );
653 void tess_segment_pass_n(int pass_n, WERD_RES *word);
654 bool tess_acceptable_word(WERD_RES *word);
655
656 //// applybox.cpp //////////////////////////////////////////////////////
657 // Applies the box file based on the image name filename, and resegments
658 // the words in the block_list (page), with:
659 // blob-mode: one blob per line in the box file, words as input.
660 // word/line-mode: one blob per space-delimited unit after the #, and one word
661 // per line in the box file. (See comment above for box file format.)
662 // If find_segmentation is true, (word/line mode) then the classifier is used
663 // to re-segment words/lines to match the space-delimited truth string for
664 // each box. In this case, the input box may be for a word or even a whole
665 // text line, and the output words will contain multiple blobs corresponding
666 // to the space-delimited input string.
667 // With find_segmentation false, no classifier is needed, but the chopper
668 // can still be used to correctly segment touching characters with the help
669 // of the input boxes.
670 // In the returned PAGE_RES, the WERD_RES are setup as they would be returned
671 // from normal classification, ie. with a word, chopped_word, rebuild_word,
672 // seam_array, denorm, box_word, and best_state, but NO best_choice or
673 // raw_choice, as they would require a UNICHARSET, which we aim to avoid.
674 // Instead, the correct_text member of WERD_RES is set, and this may be later
675 // converted to a best_choice using CorrectClassifyWords. CorrectClassifyWords
676 // is not required before calling ApplyBoxTraining.
677 PAGE_RES *ApplyBoxes(const char *filename, bool find_segmentation, BLOCK_LIST *block_list);
678
679 // Any row xheight that is significantly different from the median is set
680 // to the median.
681 void PreenXHeights(BLOCK_LIST *block_list);
682
683 // Builds a PAGE_RES from the block_list in the way required for ApplyBoxes:
684 // All fuzzy spaces are removed, and all the words are maximally chopped.
685 PAGE_RES *SetupApplyBoxes(const std::vector<TBOX> &boxes, BLOCK_LIST *block_list);
686 // Tests the chopper by exhaustively running chop_one_blob.
687 // The word_res will contain filled chopped_word, seam_array, denorm,
688 // box_word and best_state for the maximally chopped word.
689 void MaximallyChopWord(const std::vector<TBOX> &boxes, BLOCK *block, ROW *row,
690 WERD_RES *word_res);
691 // Gather consecutive blobs that match the given box into the best_state
692 // and corresponding correct_text.
693 // Fights over which box owns which blobs are settled by pre-chopping and
694 // applying the blobs to box or next_box with the least non-overlap.
695 // Returns false if the box was in error, which can only be caused by
696 // failing to find an appropriate blob for a box.
697 // This means that occasionally, blobs may be incorrectly segmented if the
698 // chopper fails to find a suitable chop point.
699 bool ResegmentCharBox(PAGE_RES *page_res, const TBOX *prev_box, const TBOX &box,
700 const TBOX *next_box, const char *correct_text);
701 // Consume all source blobs that strongly overlap the given box,
702 // putting them into a new word, with the correct_text label.
703 // Fights over which box owns which blobs are settled by
704 // applying the blobs to box or next_box with the least non-overlap.
705 // Returns false if the box was in error, which can only be caused by
706 // failing to find an overlapping blob for a box.
707 bool ResegmentWordBox(BLOCK_LIST *block_list, const TBOX &box, const TBOX *next_box,
708 const char *correct_text);
709 // Resegments the words by running the classifier in an attempt to find the
710 // correct segmentation that produces the required string.
711 void ReSegmentByClassification(PAGE_RES *page_res);
712 // Converts the space-delimited string of utf8 text to a vector of UNICHAR_ID.
713 // Returns false if an invalid UNICHAR_ID is encountered.
714 bool ConvertStringToUnichars(const char *utf8, std::vector<UNICHAR_ID> *class_ids);
715 // Resegments the word to achieve the target_text from the classifier.
716 // Returns false if the re-segmentation fails.
717 // Uses brute-force combination of up to kMaxGroupSize adjacent blobs, and
718 // applies a full search on the classifier results to find the best classified
719 // segmentation. As a compromise to obtain better recall, 1-1 ambigiguity
720 // substitutions ARE used.
721 bool FindSegmentation(const std::vector<UNICHAR_ID> &target_text, WERD_RES *word_res);
722 // Recursive helper to find a match to the target_text (from text_index
723 // position) in the choices (from choices_pos position).
724 // Choices is an array of vectors of length choices_length, with each
725 // element representing a starting position in the word, and the
726 // vector holding classification results for a sequence of consecutive
727 // blobs, with index 0 being a single blob, index 1 being 2 blobs etc.
728 void SearchForText(const std::vector<BLOB_CHOICE_LIST *> *choices, int choices_pos,
729 unsigned choices_length, const std::vector<UNICHAR_ID> &target_text,
730 unsigned text_index, float rating, std::vector<int> *segmentation,
731 float *best_rating, std::vector<int> *best_segmentation);
732 // Counts up the labelled words and the blobs within.
733 // Deletes all unused or emptied words, counting the unused ones.
734 // Resets W_BOL and W_EOL flags correctly.
735 // Builds the rebuild_word and rebuilds the box_word.
736 void TidyUp(PAGE_RES *page_res);
737 // Logs a bad box by line in the box file and box coords.
738 void ReportFailedBox(int boxfile_lineno, TBOX box, const char *box_ch, const char *err_msg);
739 // Creates a fake best_choice entry in each WERD_RES with the correct text.
740 void CorrectClassifyWords(PAGE_RES *page_res);
741 // Call LearnWord to extract features for labelled blobs within each word.
742 // Features are stored in an internal buffer.
743 void ApplyBoxTraining(const std::string &fontname, PAGE_RES *page_res);
744
745 //// fixxht.cpp ///////////////////////////////////////////////////////
746 // Returns the number of misfit blob tops in this word.
747 int CountMisfitTops(WERD_RES *word_res);
748 // Returns a new x-height in pixels (original image coords) that is
749 // maximally compatible with the result in word_res.
750 // Returns 0.0f if no x-height is found that is better than the current
751 // estimate.
752 float ComputeCompatibleXheight(WERD_RES *word_res, float *baseline_shift);
753 //// Data members ///////////////////////////////////////////////////////
754 // TODO(ocr-team): Find and remove obsolete parameters.
755 BOOL_VAR_H(tessedit_resegment_from_boxes);
756 BOOL_VAR_H(tessedit_resegment_from_line_boxes);
757 BOOL_VAR_H(tessedit_train_from_boxes);
758 BOOL_VAR_H(tessedit_make_boxes_from_boxes);
759 BOOL_VAR_H(tessedit_train_line_recognizer);
760 BOOL_VAR_H(tessedit_dump_pageseg_images);
761 // TODO: remove deprecated tessedit_do_invert in release 6.
762 BOOL_VAR_H(tessedit_do_invert);
763 double_VAR_H(invert_threshold);
764 INT_VAR_H(tessedit_pageseg_mode);
765 INT_VAR_H(thresholding_method);
766 BOOL_VAR_H(thresholding_debug);
767 double_VAR_H(thresholding_window_size);
768 double_VAR_H(thresholding_kfactor);
769 double_VAR_H(thresholding_tile_size);
770 double_VAR_H(thresholding_smooth_kernel_size);
771 double_VAR_H(thresholding_score_fraction);
772 INT_VAR_H(tessedit_ocr_engine_mode);
773 STRING_VAR_H(tessedit_char_blacklist);
774 STRING_VAR_H(tessedit_char_whitelist);
775 STRING_VAR_H(tessedit_char_unblacklist);
776 BOOL_VAR_H(tessedit_ambigs_training);
777 INT_VAR_H(pageseg_devanagari_split_strategy);
778 INT_VAR_H(ocr_devanagari_split_strategy);
779 STRING_VAR_H(tessedit_write_params_to_file);
780 BOOL_VAR_H(tessedit_adaption_debug);
781 INT_VAR_H(bidi_debug);
782 INT_VAR_H(applybox_debug);
783 INT_VAR_H(applybox_page);
784 STRING_VAR_H(applybox_exposure_pattern);
785 BOOL_VAR_H(applybox_learn_chars_and_char_frags_mode);
786 BOOL_VAR_H(applybox_learn_ngrams_mode);
787 BOOL_VAR_H(tessedit_display_outwords);
788 BOOL_VAR_H(tessedit_dump_choices);
789 BOOL_VAR_H(tessedit_timing_debug);
790 BOOL_VAR_H(tessedit_fix_fuzzy_spaces);
791 BOOL_VAR_H(tessedit_unrej_any_wd);
792 BOOL_VAR_H(tessedit_fix_hyphens);
793 BOOL_VAR_H(tessedit_enable_doc_dict);
794 BOOL_VAR_H(tessedit_debug_fonts);
795 INT_VAR_H(tessedit_font_id);
796 BOOL_VAR_H(tessedit_debug_block_rejection);
797 BOOL_VAR_H(tessedit_enable_bigram_correction);
798 BOOL_VAR_H(tessedit_enable_dict_correction);
799 INT_VAR_H(tessedit_bigram_debug);
800 BOOL_VAR_H(enable_noise_removal);
801 INT_VAR_H(debug_noise_removal);
802 // Worst (min) certainty, for which a diacritic is allowed to make the base
803 // character worse and still be included.
804 double_VAR_H(noise_cert_basechar);
805 // Worst (min) certainty, for which a non-overlapping diacritic is allowed to
806 // make the base character worse and still be included.
807 double_VAR_H(noise_cert_disjoint);
808 // Worst (min) certainty, for which a diacritic is allowed to make a new
809 // stand-alone blob.
810 double_VAR_H(noise_cert_punc);
811 // Factor of certainty margin for adding diacritics to not count as worse.
812 double_VAR_H(noise_cert_factor);
813 INT_VAR_H(noise_maxperblob);
814 INT_VAR_H(noise_maxperword);
815 INT_VAR_H(debug_x_ht_level);
816 STRING_VAR_H(chs_leading_punct);
817 STRING_VAR_H(chs_trailing_punct1);
818 STRING_VAR_H(chs_trailing_punct2);
819 double_VAR_H(quality_rej_pc);
820 double_VAR_H(quality_blob_pc);
821 double_VAR_H(quality_outline_pc);
822 double_VAR_H(quality_char_pc);
823 INT_VAR_H(quality_min_initial_alphas_reqd);
824 INT_VAR_H(tessedit_tess_adaption_mode);
825 BOOL_VAR_H(tessedit_minimal_rej_pass1);
826 BOOL_VAR_H(tessedit_test_adaption);
827 BOOL_VAR_H(test_pt);
828 double_VAR_H(test_pt_x);
829 double_VAR_H(test_pt_y);
830 INT_VAR_H(multilang_debug_level);
831 INT_VAR_H(paragraph_debug_level);
832 BOOL_VAR_H(paragraph_text_based);
833 BOOL_VAR_H(lstm_use_matrix);
834 STRING_VAR_H(outlines_odd);
835 STRING_VAR_H(outlines_2);
836 BOOL_VAR_H(tessedit_good_quality_unrej);
837 BOOL_VAR_H(tessedit_use_reject_spaces);
838 double_VAR_H(tessedit_reject_doc_percent);
839 double_VAR_H(tessedit_reject_block_percent);
840 double_VAR_H(tessedit_reject_row_percent);
841 double_VAR_H(tessedit_whole_wd_rej_row_percent);
842 BOOL_VAR_H(tessedit_preserve_blk_rej_perfect_wds);
843 BOOL_VAR_H(tessedit_preserve_row_rej_perfect_wds);
844 BOOL_VAR_H(tessedit_dont_blkrej_good_wds);
845 BOOL_VAR_H(tessedit_dont_rowrej_good_wds);
846 INT_VAR_H(tessedit_preserve_min_wd_len);
847 BOOL_VAR_H(tessedit_row_rej_good_docs);
848 double_VAR_H(tessedit_good_doc_still_rowrej_wd);
849 BOOL_VAR_H(tessedit_reject_bad_qual_wds);
850 BOOL_VAR_H(tessedit_debug_doc_rejection);
851 BOOL_VAR_H(tessedit_debug_quality_metrics);
852 BOOL_VAR_H(bland_unrej);
853 double_VAR_H(quality_rowrej_pc);
854 BOOL_VAR_H(unlv_tilde_crunching);
855 BOOL_VAR_H(hocr_font_info);
856 BOOL_VAR_H(hocr_char_boxes);
857 BOOL_VAR_H(crunch_early_merge_tess_fails);
858 BOOL_VAR_H(crunch_early_convert_bad_unlv_chs);
859 double_VAR_H(crunch_terrible_rating);
860 BOOL_VAR_H(crunch_terrible_garbage);
861 double_VAR_H(crunch_poor_garbage_cert);
862 double_VAR_H(crunch_poor_garbage_rate);
863 double_VAR_H(crunch_pot_poor_rate);
864 double_VAR_H(crunch_pot_poor_cert);
865 double_VAR_H(crunch_del_rating);
866 double_VAR_H(crunch_del_cert);
867 double_VAR_H(crunch_del_min_ht);
868 double_VAR_H(crunch_del_max_ht);
869 double_VAR_H(crunch_del_min_width);
870 double_VAR_H(crunch_del_high_word);
871 double_VAR_H(crunch_del_low_word);
872 double_VAR_H(crunch_small_outlines_size);
873 INT_VAR_H(crunch_rating_max);
874 INT_VAR_H(crunch_pot_indicators);
875 BOOL_VAR_H(crunch_leave_ok_strings);
876 BOOL_VAR_H(crunch_accept_ok);
877 BOOL_VAR_H(crunch_leave_accept_strings);
878 BOOL_VAR_H(crunch_include_numerals);
879 INT_VAR_H(crunch_leave_lc_strings);
880 INT_VAR_H(crunch_leave_uc_strings);
881 INT_VAR_H(crunch_long_repetitions);
882 INT_VAR_H(crunch_debug);
883 INT_VAR_H(fixsp_non_noise_limit);
884 double_VAR_H(fixsp_small_outlines_size);
885 BOOL_VAR_H(tessedit_prefer_joined_punct);
886 INT_VAR_H(fixsp_done_mode);
887 INT_VAR_H(debug_fix_space_level);
888 STRING_VAR_H(numeric_punctuation);
889 INT_VAR_H(x_ht_acceptance_tolerance);
890 INT_VAR_H(x_ht_min_change);
891 INT_VAR_H(superscript_debug);
892 double_VAR_H(superscript_worse_certainty);
893 double_VAR_H(superscript_bettered_certainty);
894 double_VAR_H(superscript_scaledown_ratio);
895 double_VAR_H(subscript_max_y_top);
896 double_VAR_H(superscript_min_y_bottom);
897 BOOL_VAR_H(tessedit_write_block_separators);
898 BOOL_VAR_H(tessedit_write_rep_codes);
899 BOOL_VAR_H(tessedit_write_unlv);
900 BOOL_VAR_H(tessedit_create_txt);
901 BOOL_VAR_H(tessedit_create_hocr);
902 BOOL_VAR_H(tessedit_create_alto);
903 BOOL_VAR_H(tessedit_create_page_xml);
904 BOOL_VAR_H(page_xml_polygon);
905 INT_VAR_H(page_xml_level);
906 BOOL_VAR_H(tessedit_create_lstmbox);
907 BOOL_VAR_H(tessedit_create_tsv);
908 BOOL_VAR_H(tessedit_create_wordstrbox);
909 BOOL_VAR_H(tessedit_create_pdf);
910 BOOL_VAR_H(textonly_pdf);
911 INT_VAR_H(jpg_quality);
912 INT_VAR_H(user_defined_dpi);
913 INT_VAR_H(min_characters_to_try);
914 STRING_VAR_H(unrecognised_char);
915 INT_VAR_H(suspect_level);
916 INT_VAR_H(suspect_short_words);
917 BOOL_VAR_H(suspect_constrain_1Il);
918 double_VAR_H(suspect_rating_per_ch);
919 double_VAR_H(suspect_accept_rating);
920 BOOL_VAR_H(tessedit_minimal_rejection);
921 BOOL_VAR_H(tessedit_zero_rejection);
922 BOOL_VAR_H(tessedit_word_for_word);
923 BOOL_VAR_H(tessedit_zero_kelvin_rejection);
924 INT_VAR_H(tessedit_reject_mode);
925 BOOL_VAR_H(tessedit_rejection_debug);
926 BOOL_VAR_H(tessedit_flip_0O);
927 double_VAR_H(tessedit_lower_flip_hyphen);
928 double_VAR_H(tessedit_upper_flip_hyphen);
929 BOOL_VAR_H(rej_trust_doc_dawg);
930 BOOL_VAR_H(rej_1Il_use_dict_word);
931 BOOL_VAR_H(rej_1Il_trust_permuter_type);
932 BOOL_VAR_H(rej_use_tess_accepted);
933 BOOL_VAR_H(rej_use_tess_blanks);
934 BOOL_VAR_H(rej_use_good_perm);
935 BOOL_VAR_H(rej_use_sensible_wd);
936 BOOL_VAR_H(rej_alphas_in_number_perm);
937 double_VAR_H(rej_whole_of_mostly_reject_word_fract);
938 INT_VAR_H(tessedit_image_border);
939 STRING_VAR_H(ok_repeated_ch_non_alphanum_wds);
940 STRING_VAR_H(conflict_set_I_l_1);
941 INT_VAR_H(min_sane_x_ht_pixels);
942 BOOL_VAR_H(tessedit_create_boxfile);
943 INT_VAR_H(tessedit_page_number);
944 BOOL_VAR_H(tessedit_write_images);
945 BOOL_VAR_H(interactive_display_mode);
946 STRING_VAR_H(file_type);
947 BOOL_VAR_H(tessedit_override_permuter);
948 STRING_VAR_H(tessedit_load_sublangs);
949 BOOL_VAR_H(tessedit_use_primary_params_model);
950 // Min acceptable orientation margin (difference in scores between top and 2nd
951 // choice in OSResults::orientations) to believe the page orientation.
952 double_VAR_H(min_orientation_margin);
953 BOOL_VAR_H(textord_tabfind_show_vlines);
954 BOOL_VAR_H(textord_use_cjk_fp_model);
955 BOOL_VAR_H(poly_allow_detailed_fx);
956 BOOL_VAR_H(tessedit_init_config_only);
957 #ifndef DISABLED_LEGACY_ENGINE
958 BOOL_VAR_H(textord_equation_detect);
959 #endif // ndef DISABLED_LEGACY_ENGINE
960 BOOL_VAR_H(textord_tabfind_vertical_text);
961 BOOL_VAR_H(textord_tabfind_force_vertical_text);
962 double_VAR_H(textord_tabfind_vertical_text_ratio);
963 double_VAR_H(textord_tabfind_aligned_gap_fraction);
964 INT_VAR_H(tessedit_parallelize);
965 BOOL_VAR_H(preserve_interword_spaces);
966 STRING_VAR_H(page_separator);
967 INT_VAR_H(lstm_choice_mode);
968 INT_VAR_H(lstm_choice_iterations);
969 double_VAR_H(lstm_rating_coefficient);
970 BOOL_VAR_H(pageseg_apply_music_mask);
971
972 //// ambigsrecog.cpp /////////////////////////////////////////////////////////
973 FILE *init_recog_training(const char *filename);
974 void recog_training_segmented(const char *filename, PAGE_RES *page_res,
975 volatile ETEXT_DESC *monitor, FILE *output_file);
976 void ambigs_classify_and_output(const char *label, PAGE_RES_IT *pr_it, FILE *output_file);
977
978 private:
979 // The filename of a backup config file. If not null, then we currently
980 // have a temporary debug config file loaded, and backup_config_file_
981 // will be loaded, and set to null when debug is complete.
982 const char *backup_config_file_;
983 // The filename of a config file to read when processing a debug word.
984 std::string word_config_;
985 // Image used for input to layout analysis and tesseract recognition.
986 // May be modified by the ShiroRekhaSplitter to eliminate the top-line.
987 Image pix_binary_;
988 // Grey-level input image if the input was not binary, otherwise nullptr.
989 Image pix_grey_;
990 // Original input image. Color if the input was color.
991 Image pix_original_;
992 // Thresholds that were used to generate the thresholded image from grey.
993 Image pix_thresholds_;
994 // Debug images. If non-empty, will be written on destruction.
995 DebugPixa pixa_debug_;
996 // Input image resolution after any scaling. The resolution is not well
997 // transmitted by operations on Pix, so we keep an independent record here.
998 int source_resolution_;
999 // The shiro-rekha splitter object which is used to split top-lines in
1000 // Devanagari words to provide a better word and grapheme segmentation.
1001 ShiroRekhaSplitter splitter_;
1002 // Page segmentation/layout
1003 Textord textord_;
1004 // True if the primary language uses right_to_left reading order.
1005 bool right_to_left_;
1006 Image scaled_color_;
1007 int scaled_factor_;
1008 FCOORD deskew_;
1009 FCOORD reskew_;
1010 float gradient_;
1011 TesseractStats stats_;
1012 // Sub-languages to be tried in addition to this.
1013 std::vector<Tesseract *> sub_langs_;
1014 // Most recently used Tesseract out of this and sub_langs_. The default
1015 // language for the next word.
1016 Tesseract *most_recently_used_;
1017 // The size of the font table, ie max possible font id + 1.
1018 int font_table_size_;
1019 #ifndef DISABLED_LEGACY_ENGINE
1020 // Equation detector. Note: this pointer is NOT owned by the class.
1021 EquationDetect *equ_detect_;
1022 #endif // ndef DISABLED_LEGACY_ENGINE
1023 // LSTM recognizer, if available.
1024 LSTMRecognizer *lstm_recognizer_;
1025 // Output "page" number (actually line number) using TrainLineRecognizer.
1026 int train_line_page_num_;
1027 };
1028
1029 } // namespace tesseract
1030
1031 #endif // TESSERACT_CCMAIN_TESSERACTCLASS_H_