Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/tesseract/src/wordrec/wordrec.cpp @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /////////////////////////////////////////////////////////////////////// | |
| 2 // File: wordrec.cpp | |
| 3 // Description: wordrec class. | |
| 4 // Author: Samuel Charron | |
| 5 // | |
| 6 // (C) Copyright 2006, Google Inc. | |
| 7 // Licensed under the Apache License, Version 2.0 (the "License"); | |
| 8 // you may not use this file except in compliance with the License. | |
| 9 // You may obtain a copy of the License at | |
| 10 // http://www.apache.org/licenses/LICENSE-2.0 | |
| 11 // Unless required by applicable law or agreed to in writing, software | |
| 12 // distributed under the License is distributed on an "AS IS" BASIS, | |
| 13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 14 // See the License for the specific language governing permissions and | |
| 15 // limitations under the License. | |
| 16 // | |
| 17 /////////////////////////////////////////////////////////////////////// | |
| 18 | |
| 19 #include "wordrec.h" | |
| 20 | |
| 21 #include <memory> | |
| 22 | |
| 23 #ifdef DISABLED_LEGACY_ENGINE | |
| 24 | |
| 25 # include "params.h" | |
| 26 | |
| 27 namespace tesseract { | |
| 28 Wordrec::Wordrec() | |
| 29 : // control parameters | |
| 30 | |
| 31 BOOL_MEMBER(wordrec_debug_blamer, false, "Print blamer debug messages", params()) | |
| 32 , | |
| 33 | |
| 34 BOOL_MEMBER(wordrec_run_blamer, false, "Try to set the blame for errors", params()) { | |
| 35 prev_word_best_choice_ = nullptr; | |
| 36 } | |
| 37 | |
| 38 } // namespace tesseract | |
| 39 | |
| 40 #else // DISABLED_LEGACY_ENGINE not defined | |
| 41 | |
| 42 # include "language_model.h" | |
| 43 # include "params.h" | |
| 44 | |
| 45 namespace tesseract { | |
| 46 Wordrec::Wordrec() | |
| 47 : // control parameters | |
| 48 BOOL_MEMBER(merge_fragments_in_matrix, true, | |
| 49 "Merge the fragments in the ratings matrix and delete them" | |
| 50 " after merging", | |
| 51 params()) | |
| 52 , BOOL_MEMBER(wordrec_enable_assoc, true, "Associator Enable", params()) | |
| 53 , BOOL_MEMBER(force_word_assoc, false, | |
| 54 "force associator to run regardless of what enable_assoc is." | |
| 55 " This is used for CJK where component grouping is necessary.", | |
| 56 CCUtil::params()) | |
| 57 , INT_MEMBER(repair_unchopped_blobs, 1, "Fix blobs that aren't chopped", params()) | |
| 58 , double_MEMBER(tessedit_certainty_threshold, -2.25, "Good blob limit", params()) | |
| 59 , INT_MEMBER(chop_debug, 0, "Chop debug", params()) | |
| 60 , BOOL_MEMBER(chop_enable, 1, "Chop enable", params()) | |
| 61 , BOOL_MEMBER(chop_vertical_creep, 0, "Vertical creep", params()) | |
| 62 , INT_MEMBER(chop_split_length, 10000, "Split Length", params()) | |
| 63 , INT_MEMBER(chop_same_distance, 2, "Same distance", params()) | |
| 64 , INT_MEMBER(chop_min_outline_points, 6, "Min Number of Points on Outline", params()) | |
| 65 , INT_MEMBER(chop_seam_pile_size, 150, "Max number of seams in seam_pile", params()) | |
| 66 , BOOL_MEMBER(chop_new_seam_pile, 1, "Use new seam_pile", params()) | |
| 67 , INT_MEMBER(chop_inside_angle, -50, "Min Inside Angle Bend", params()) | |
| 68 , INT_MEMBER(chop_min_outline_area, 2000, "Min Outline Area", params()) | |
| 69 , double_MEMBER(chop_split_dist_knob, 0.5, "Split length adjustment", params()) | |
| 70 , double_MEMBER(chop_overlap_knob, 0.9, "Split overlap adjustment", params()) | |
| 71 , double_MEMBER(chop_center_knob, 0.15, "Split center adjustment", params()) | |
| 72 , INT_MEMBER(chop_centered_maxwidth, 90, | |
| 73 "Width of (smaller) chopped blobs " | |
| 74 "above which we don't care that a chop is not near the center.", | |
| 75 params()) | |
| 76 , double_MEMBER(chop_sharpness_knob, 0.06, "Split sharpness adjustment", params()) | |
| 77 , double_MEMBER(chop_width_change_knob, 5.0, "Width change adjustment", params()) | |
| 78 , double_MEMBER(chop_ok_split, 100.0, "OK split limit", params()) | |
| 79 , double_MEMBER(chop_good_split, 50.0, "Good split limit", params()) | |
| 80 , INT_MEMBER(chop_x_y_weight, 3, "X / Y length weight", params()) | |
| 81 , BOOL_MEMBER(assume_fixed_pitch_char_segment, false, | |
| 82 "include fixed-pitch heuristics in char segmentation", params()) | |
| 83 , INT_MEMBER(wordrec_debug_level, 0, "Debug level for wordrec", params()) | |
| 84 , INT_MEMBER(wordrec_max_join_chunks, 4, "Max number of broken pieces to associate", params()) | |
| 85 , BOOL_MEMBER(wordrec_skip_no_truth_words, false, | |
| 86 "Only run OCR for words that had truth recorded in BlamerBundle", params()) | |
| 87 , BOOL_MEMBER(wordrec_debug_blamer, false, "Print blamer debug messages", params()) | |
| 88 , BOOL_MEMBER(wordrec_run_blamer, false, "Try to set the blame for errors", params()) | |
| 89 , INT_MEMBER(segsearch_debug_level, 0, "SegSearch debug level", params()) | |
| 90 , INT_MEMBER(segsearch_max_pain_points, 2000, | |
| 91 "Maximum number of pain points stored in the queue", params()) | |
| 92 , INT_MEMBER(segsearch_max_futile_classifications, 20, | |
| 93 "Maximum number of pain point classifications per chunk that" | |
| 94 " did not result in finding a better word choice.", | |
| 95 params()) | |
| 96 , double_MEMBER(segsearch_max_char_wh_ratio, 2.0, "Maximum character width-to-height ratio", | |
| 97 params()) | |
| 98 , BOOL_MEMBER(save_alt_choices, true, | |
| 99 "Save alternative paths found during chopping" | |
| 100 " and segmentation search", | |
| 101 params()) | |
| 102 , language_model_(std::make_unique<LanguageModel>(&get_fontinfo_table(), &(getDict()))) | |
| 103 , pass2_ok_split(0.0f) | |
| 104 , prev_word_best_choice_(nullptr) | |
| 105 , fill_lattice_(nullptr) { | |
| 106 } | |
| 107 | |
| 108 } // namespace tesseract | |
| 109 | |
| 110 #endif // DISABLED_LEGACY_ENGINE |
