Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/tesseract/src/wordrec/wordrec.h @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /////////////////////////////////////////////////////////////////////// | |
| 2 // File: wordrec.h | |
| 3 // Description: wordrec class. | |
| 4 // Author: Samuel Charron | |
| 5 // | |
| 6 // (C) Copyright 2006, Google Inc. | |
| 7 // Licensed under the Apache License, Version 2.0 (the "License"); | |
| 8 // you may not use this file except in compliance with the License. | |
| 9 // You may obtain a copy of the License at | |
| 10 // http://www.apache.org/licenses/LICENSE-2.0 | |
| 11 // Unless required by applicable law or agreed to in writing, software | |
| 12 // distributed under the License is distributed on an "AS IS" BASIS, | |
| 13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 14 // See the License for the specific language governing permissions and | |
| 15 // limitations under the License. | |
| 16 // | |
| 17 /////////////////////////////////////////////////////////////////////// | |
| 18 | |
| 19 #ifndef TESSERACT_WORDREC_WORDREC_H_ | |
| 20 #define TESSERACT_WORDREC_WORDREC_H_ | |
| 21 | |
| 22 #ifdef HAVE_CONFIG_H | |
| 23 # include "config_auto.h" // DISABLED_LEGACY_ENGINE | |
| 24 #endif | |
| 25 | |
| 26 #ifdef DISABLED_LEGACY_ENGINE | |
| 27 | |
| 28 # include <cstdint> // for int16_t, int32_t | |
| 29 # include "classify.h" // for Classify | |
| 30 # include "params.h" // for INT_VAR_H, IntParam, BOOL_VAR_H, BoolP... | |
| 31 # include "ratngs.h" // for WERD_CHOICE | |
| 32 | |
| 33 namespace tesseract { | |
| 34 class TessdataManager; | |
| 35 } | |
| 36 | |
| 37 namespace tesseract { | |
| 38 | |
| 39 /* ccmain/tstruct.cpp */ | |
| 40 | |
| 41 class TESS_API Wordrec : public Classify { | |
| 42 public: | |
| 43 // config parameters | |
| 44 | |
| 45 BOOL_VAR_H(wordrec_debug_blamer); | |
| 46 BOOL_VAR_H(wordrec_run_blamer); | |
| 47 | |
| 48 // methods | |
| 49 Wordrec(); | |
| 50 virtual ~Wordrec() = default; | |
| 51 | |
| 52 // tface.cpp | |
| 53 void program_editup(const std::string &textbase, TessdataManager *init_classifier, | |
| 54 TessdataManager *init_dict); | |
| 55 void program_editdown(int32_t elapsed_time); | |
| 56 int end_recog(); | |
| 57 int dict_word(const WERD_CHOICE &word); | |
| 58 | |
| 59 // Member variables | |
| 60 WERD_CHOICE *prev_word_best_choice_; | |
| 61 }; | |
| 62 | |
| 63 } // namespace tesseract | |
| 64 | |
| 65 #else // DISABLED_LEGACY_ENGINE not defined | |
| 66 | |
| 67 # include <memory> | |
| 68 # include "associate.h" | |
| 69 # include "chop.h" // for PointHeap, MAX_NUM_POINTS | |
| 70 # include "classify.h" // for Classify | |
| 71 # include "dict.h" | |
| 72 # include "elst.h" // for ELIST_ITERATOR, ELISTIZEH, ELIST_LINK | |
| 73 # include "findseam.h" // for SeamQueue, SeamPile | |
| 74 # include "language_model.h" | |
| 75 # include "matrix.h" | |
| 76 # include "oldlist.h" // for LIST | |
| 77 # include "params.h" // for INT_VAR_H, IntParam, BOOL_VAR_H, BoolP... | |
| 78 # include "points.h" // for ICOORD | |
| 79 # include "ratngs.h" // for BLOB_CHOICE_LIST (ptr only), BLOB_CHOI... | |
| 80 # include "seam.h" // for SEAM (ptr only), PRIORITY | |
| 81 # include "stopper.h" // for DANGERR | |
| 82 | |
| 83 # include <cstdint> // for int16_t, int32_t | |
| 84 | |
| 85 namespace tesseract { | |
| 86 | |
| 87 class EDGEPT_CLIST; | |
| 88 class MATRIX; | |
| 89 class TBOX; | |
| 90 class UNICHARSET; | |
| 91 class WERD_RES; | |
| 92 | |
| 93 class LMPainPoints; | |
| 94 class TessdataManager; | |
| 95 struct BestChoiceBundle; | |
| 96 | |
| 97 struct BlamerBundle; | |
| 98 struct EDGEPT; | |
| 99 struct MATRIX_COORD; | |
| 100 struct SPLIT; | |
| 101 struct TBLOB; | |
| 102 struct TESSLINE; | |
| 103 struct TWERD; | |
| 104 | |
| 105 // A class for storing which nodes are to be processed by the segmentation | |
| 106 // search. There is a single SegSearchPending for each column in the ratings | |
| 107 // matrix, and it indicates whether the segsearch should combine all | |
| 108 // BLOB_CHOICES in the column, or just the given row with the parents | |
| 109 // corresponding to *this SegSearchPending, and whether only updated parent | |
| 110 // ViterbiStateEntries should be combined, or all, with the BLOB_CHOICEs. | |
| 111 class SegSearchPending { | |
| 112 public: | |
| 113 SegSearchPending() | |
| 114 : classified_row_(-1), revisit_whole_column_(false), column_classified_(false) {} | |
| 115 | |
| 116 // Marks the whole column as just classified. Used to start a search on | |
| 117 // a newly initialized ratings matrix. | |
| 118 void SetColumnClassified() { | |
| 119 column_classified_ = true; | |
| 120 } | |
| 121 // Marks the matrix entry at the given row as just classified. | |
| 122 // Used after classifying a new matrix cell. | |
| 123 // Additional to, not overriding a previous RevisitWholeColumn. | |
| 124 void SetBlobClassified(int row) { | |
| 125 classified_row_ = row; | |
| 126 } | |
| 127 // Marks the whole column as needing work, but not just classified. | |
| 128 // Used when the parent vse list is updated. | |
| 129 // Additional to, not overriding a previous SetBlobClassified. | |
| 130 void RevisitWholeColumn() { | |
| 131 revisit_whole_column_ = true; | |
| 132 } | |
| 133 | |
| 134 // Clears *this to indicate no work to do. | |
| 135 void Clear() { | |
| 136 classified_row_ = -1; | |
| 137 revisit_whole_column_ = false; | |
| 138 column_classified_ = false; | |
| 139 } | |
| 140 | |
| 141 // Returns true if there are updates to do in the column that *this | |
| 142 // represents. | |
| 143 bool WorkToDo() const { | |
| 144 return revisit_whole_column_ || column_classified_ || classified_row_ >= 0; | |
| 145 } | |
| 146 // Returns true if the given row was just classified. | |
| 147 bool IsRowJustClassified(int row) const { | |
| 148 return row == classified_row_ || column_classified_; | |
| 149 } | |
| 150 // Returns the single row to process if there is only one, otherwise -1. | |
| 151 int SingleRow() const { | |
| 152 return revisit_whole_column_ || column_classified_ ? -1 : classified_row_; | |
| 153 } | |
| 154 | |
| 155 private: | |
| 156 // If non-negative, indicates the single row in the ratings matrix that has | |
| 157 // just been classified, and so should be combined with all the parents in the | |
| 158 // column that this SegSearchPending represents. | |
| 159 // Operates independently of revisit_whole_column. | |
| 160 int classified_row_; | |
| 161 // If revisit_whole_column is true, then all BLOB_CHOICEs in this column will | |
| 162 // be processed, but classified_row can indicate a row that is newly | |
| 163 // classified. Overridden if column_classified is true. | |
| 164 bool revisit_whole_column_; | |
| 165 // If column_classified is true, parent vses are processed with all rows | |
| 166 // regardless of whether they are just updated, overriding | |
| 167 // revisit_whole_column and classified_row. | |
| 168 bool column_classified_; | |
| 169 }; | |
| 170 | |
| 171 /* ccmain/tstruct.cpp *********************************************************/ | |
| 172 class FRAGMENT : public ELIST_LINK { | |
| 173 public: | |
| 174 FRAGMENT() { // constructor | |
| 175 } | |
| 176 FRAGMENT(EDGEPT *head_pt, // start | |
| 177 EDGEPT *tail_pt); // end | |
| 178 | |
| 179 ICOORD head; // coords of start | |
| 180 ICOORD tail; // coords of end | |
| 181 EDGEPT *headpt; // start point | |
| 182 EDGEPT *tailpt; // end point | |
| 183 }; | |
| 184 ELISTIZEH(FRAGMENT) | |
| 185 | |
| 186 class TESS_API Wordrec : public Classify { | |
| 187 public: | |
| 188 // config parameters ******************************************************* | |
| 189 BOOL_VAR_H(merge_fragments_in_matrix); | |
| 190 BOOL_VAR_H(wordrec_enable_assoc); | |
| 191 BOOL_VAR_H(force_word_assoc); | |
| 192 INT_VAR_H(repair_unchopped_blobs); | |
| 193 double_VAR_H(tessedit_certainty_threshold); | |
| 194 INT_VAR_H(chop_debug); | |
| 195 BOOL_VAR_H(chop_enable); | |
| 196 BOOL_VAR_H(chop_vertical_creep); | |
| 197 INT_VAR_H(chop_split_length); | |
| 198 INT_VAR_H(chop_same_distance); | |
| 199 INT_VAR_H(chop_min_outline_points); | |
| 200 INT_VAR_H(chop_seam_pile_size); | |
| 201 BOOL_VAR_H(chop_new_seam_pile); | |
| 202 INT_VAR_H(chop_inside_angle); | |
| 203 INT_VAR_H(chop_min_outline_area); | |
| 204 double_VAR_H(chop_split_dist_knob); | |
| 205 double_VAR_H(chop_overlap_knob); | |
| 206 double_VAR_H(chop_center_knob); | |
| 207 INT_VAR_H(chop_centered_maxwidth); | |
| 208 double_VAR_H(chop_sharpness_knob); | |
| 209 double_VAR_H(chop_width_change_knob); | |
| 210 double_VAR_H(chop_ok_split); | |
| 211 double_VAR_H(chop_good_split); | |
| 212 INT_VAR_H(chop_x_y_weight); | |
| 213 BOOL_VAR_H(assume_fixed_pitch_char_segment); | |
| 214 INT_VAR_H(wordrec_debug_level); | |
| 215 INT_VAR_H(wordrec_max_join_chunks); | |
| 216 BOOL_VAR_H(wordrec_skip_no_truth_words); | |
| 217 BOOL_VAR_H(wordrec_debug_blamer); | |
| 218 BOOL_VAR_H(wordrec_run_blamer); | |
| 219 INT_VAR_H(segsearch_debug_level); | |
| 220 INT_VAR_H(segsearch_max_pain_points); | |
| 221 INT_VAR_H(segsearch_max_futile_classifications); | |
| 222 double_VAR_H(segsearch_max_char_wh_ratio); | |
| 223 BOOL_VAR_H(save_alt_choices); | |
| 224 | |
| 225 // methods from wordrec/*.cpp *********************************************** | |
| 226 Wordrec(); | |
| 227 ~Wordrec() override = default; | |
| 228 | |
| 229 // Fills word->alt_choices with alternative paths found during | |
| 230 // chopping/segmentation search that are kept in best_choices. | |
| 231 void SaveAltChoices(const LIST &best_choices, WERD_RES *word); | |
| 232 | |
| 233 // Fills character choice lattice in the given BlamerBundle | |
| 234 // using the given ratings matrix and best choice list. | |
| 235 void FillLattice(const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, | |
| 236 const UNICHARSET &unicharset, BlamerBundle *blamer_bundle); | |
| 237 | |
| 238 // Calls fill_lattice_ member function | |
| 239 // (assumes that fill_lattice_ is not nullptr). | |
| 240 void CallFillLattice(const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, | |
| 241 const UNICHARSET &unicharset, BlamerBundle *blamer_bundle) { | |
| 242 (this->*fill_lattice_)(ratings, best_choices, unicharset, blamer_bundle); | |
| 243 } | |
| 244 | |
| 245 // tface.cpp | |
| 246 void program_editup(const std::string &textbase, TessdataManager *init_classifier, | |
| 247 TessdataManager *init_dict); | |
| 248 void cc_recog(WERD_RES *word); | |
| 249 void program_editdown(int32_t elapsed_time); | |
| 250 void set_pass1(); | |
| 251 void set_pass2(); | |
| 252 int end_recog(); | |
| 253 BLOB_CHOICE_LIST *call_matcher(TBLOB *blob); | |
| 254 int dict_word(const WERD_CHOICE &word); | |
| 255 // wordclass.cpp | |
| 256 BLOB_CHOICE_LIST *classify_blob(TBLOB *blob, const char *string, ScrollView::Color color, | |
| 257 BlamerBundle *blamer_bundle); | |
| 258 | |
| 259 // segsearch.cpp | |
| 260 // SegSearch works on the lower diagonal matrix of BLOB_CHOICE_LISTs. | |
| 261 // Each entry in the matrix represents the classification choice | |
| 262 // for a chunk, i.e. an entry in row 2, column 1 represents the list | |
| 263 // of ratings for the chunks 1 and 2 classified as a single blob. | |
| 264 // The entries on the diagonal of the matrix are classifier choice lists | |
| 265 // for a single chunk from the maximal segmentation. | |
| 266 // | |
| 267 // The ratings matrix given to SegSearch represents the segmentation | |
| 268 // graph / trellis for the current word. The nodes in the graph are the | |
| 269 // individual BLOB_CHOICEs in each of the BLOB_CHOICE_LISTs in the ratings | |
| 270 // matrix. The children of each node (nodes connected by outgoing links) | |
| 271 // are the entries in the column that is equal to node's row+1. The parents | |
| 272 // (nodes connected by the incoming links) are the entries in the row that | |
| 273 // is equal to the node's column-1. Here is an example ratings matrix: | |
| 274 // | |
| 275 // 0 1 2 3 4 | |
| 276 // ------------------------- | |
| 277 // 0| c,( | | |
| 278 // 1| d l,1 | | |
| 279 // 2| o | | |
| 280 // 3| c,( | | |
| 281 // 4| g,y l,1 | | |
| 282 // ------------------------- | |
| 283 // | |
| 284 // In the example above node "o" has children (outgoing connection to nodes) | |
| 285 // "c","(","g","y" and parents (incoming connections from nodes) "l","1","d". | |
| 286 // | |
| 287 // The objective of the search is to find the least cost path, where the cost | |
| 288 // is determined by the language model components and the properties of the | |
| 289 // cut between the blobs on the path. SegSearch starts by populating the | |
| 290 // matrix with the all the entries that were classified by the chopper and | |
| 291 // finding the initial best path. Based on the classifier ratings, language | |
| 292 // model scores and the properties of each cut, a list of "pain points" is | |
| 293 // constructed - those are the points on the path where the choices do not | |
| 294 // look consistent with the neighboring choices, the cuts look particularly | |
| 295 // problematic, or the certainties of the blobs are low. The most troublesome | |
| 296 // "pain point" is picked from the list and the new entry in the ratings | |
| 297 // matrix corresponding to this "pain point" is filled in. Then the language | |
| 298 // model state is updated to reflect the new classification and the new | |
| 299 // "pain points" are added to the list and the next most troublesome | |
| 300 // "pain point" is determined. This continues until either the word choice | |
| 301 // composed from the best paths in the segmentation graph is "good enough" | |
| 302 // (e.g. above a certain certainty threshold, is an unambiguous dictionary | |
| 303 // word, etc) or there are no more "pain points" to explore. | |
| 304 // | |
| 305 // If associate_blobs is set to false no new classifications will be done | |
| 306 // to combine blobs. Segmentation search will run only one "iteration" | |
| 307 // on the classifications already recorded in chunks_record.ratings. | |
| 308 // | |
| 309 // Note: this function assumes that word_res, best_choice_bundle arguments | |
| 310 // are not nullptr. | |
| 311 void SegSearch(WERD_RES *word_res, BestChoiceBundle *best_choice_bundle, | |
| 312 BlamerBundle *blamer_bundle); | |
| 313 | |
| 314 // Setup and run just the initial segsearch on an established matrix, | |
| 315 // without doing any additional chopping or joining. | |
| 316 // (Internal factored version that can be used as part of the main SegSearch.) | |
| 317 void InitialSegSearch(WERD_RES *word_res, LMPainPoints *pain_points, | |
| 318 std::vector<SegSearchPending> *pending, | |
| 319 BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle); | |
| 320 | |
| 321 // chop.cpp | |
| 322 PRIORITY point_priority(EDGEPT *point); | |
| 323 void add_point_to_list(PointHeap *point_heap, EDGEPT *point); | |
| 324 // Returns true if the edgept supplied as input is an inside angle. This | |
| 325 // is determined by the angular change of the vectors from point to point. | |
| 326 bool is_inside_angle(EDGEPT *pt); | |
| 327 int angle_change(EDGEPT *point1, EDGEPT *point2, EDGEPT *point3); | |
| 328 EDGEPT *pick_close_point(EDGEPT *critical_point, EDGEPT *vertical_point, int *best_dist); | |
| 329 void prioritize_points(TESSLINE *outline, PointHeap *points); | |
| 330 void new_min_point(EDGEPT *local_min, PointHeap *points); | |
| 331 void new_max_point(EDGEPT *local_max, PointHeap *points); | |
| 332 void vertical_projection_point(EDGEPT *split_point, EDGEPT *target_point, EDGEPT **best_point, | |
| 333 EDGEPT_CLIST *new_points); | |
| 334 | |
| 335 // chopper.cpp | |
| 336 SEAM *attempt_blob_chop(TWERD *word, TBLOB *blob, int32_t blob_number, bool italic_blob, | |
| 337 const std::vector<SEAM *> &seams); | |
| 338 SEAM *chop_numbered_blob(TWERD *word, int32_t blob_number, bool italic_blob, | |
| 339 const std::vector<SEAM *> &seams); | |
| 340 SEAM *chop_overlapping_blob(const std::vector<TBOX> &boxes, bool italic_blob, WERD_RES *word_res, | |
| 341 unsigned *blob_number); | |
| 342 SEAM *improve_one_blob(const std::vector<BLOB_CHOICE *> &blob_choices, DANGERR *fixpt, | |
| 343 bool split_next_to_fragment, bool italic_blob, WERD_RES *word, | |
| 344 unsigned *blob_number); | |
| 345 SEAM *chop_one_blob(const std::vector<TBOX> &boxes, | |
| 346 const std::vector<BLOB_CHOICE *> &blob_choices, WERD_RES *word_res, | |
| 347 unsigned *blob_number); | |
| 348 void chop_word_main(WERD_RES *word); | |
| 349 void improve_by_chopping(float rating_cert_scale, WERD_RES *word, | |
| 350 BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle, | |
| 351 LMPainPoints *pain_points, std::vector<SegSearchPending> *pending); | |
| 352 int select_blob_to_split(const std::vector<BLOB_CHOICE *> &blob_choices, float rating_ceiling, | |
| 353 bool split_next_to_fragment); | |
| 354 int select_blob_to_split_from_fixpt(DANGERR *fixpt); | |
| 355 | |
| 356 // findseam.cpp | |
| 357 void add_seam_to_queue(float new_priority, SEAM *new_seam, SeamQueue *seams); | |
| 358 void choose_best_seam(SeamQueue *seam_queue, const SPLIT *split, PRIORITY priority, | |
| 359 SEAM **seam_result, TBLOB *blob, SeamPile *seam_pile); | |
| 360 void combine_seam(const SeamPile &seam_pile, const SEAM *seam, SeamQueue *seam_queue); | |
| 361 SEAM *pick_good_seam(TBLOB *blob); | |
| 362 void try_point_pairs(EDGEPT *points[MAX_NUM_POINTS], int16_t num_points, SeamQueue *seam_queue, | |
| 363 SeamPile *seam_pile, SEAM **seam, TBLOB *blob); | |
| 364 void try_vertical_splits(EDGEPT *points[MAX_NUM_POINTS], int16_t num_points, | |
| 365 EDGEPT_CLIST *new_points, SeamQueue *seam_queue, SeamPile *seam_pile, | |
| 366 SEAM **seam, TBLOB *blob); | |
| 367 | |
| 368 // gradechop.cpp | |
| 369 PRIORITY grade_split_length(SPLIT *split); | |
| 370 PRIORITY grade_sharpness(SPLIT *split); | |
| 371 | |
| 372 // outlines.cpp | |
| 373 bool near_point(EDGEPT *point, EDGEPT *line_pt_0, EDGEPT *line_pt_1, EDGEPT **near_pt); | |
| 374 | |
| 375 // pieces.cpp | |
| 376 virtual BLOB_CHOICE_LIST *classify_piece(const std::vector<SEAM *> &seams, int16_t start, | |
| 377 int16_t end, const char *description, TWERD *word, | |
| 378 BlamerBundle *blamer_bundle); | |
| 379 | |
| 380 // Member variables. | |
| 381 | |
| 382 std::unique_ptr<LanguageModel> language_model_; | |
| 383 PRIORITY pass2_ok_split; | |
| 384 // Stores the best choice for the previous word in the paragraph. | |
| 385 // This variable is modified by PAGE_RES_IT when iterating over | |
| 386 // words to OCR on the page. | |
| 387 WERD_CHOICE *prev_word_best_choice_; | |
| 388 | |
| 389 // Function used to fill char choice lattices. | |
| 390 void (Wordrec::*fill_lattice_)(const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, | |
| 391 const UNICHARSET &unicharset, BlamerBundle *blamer_bundle); | |
| 392 | |
| 393 protected: | |
| 394 inline bool SegSearchDone(int num_futile_classifications) { | |
| 395 return (language_model_->AcceptableChoiceFound() || | |
| 396 num_futile_classifications >= segsearch_max_futile_classifications); | |
| 397 } | |
| 398 | |
| 399 // Updates the language model state recorded for the child entries specified | |
| 400 // in pending[starting_col]. Enqueues the children of the updated entries | |
| 401 // into pending and proceeds to update (and remove from pending) all the | |
| 402 // remaining entries in pending[col] (col >= starting_col). Upon termination | |
| 403 // of this function all the pending[col] lists will be empty. | |
| 404 // | |
| 405 // The arguments: | |
| 406 // | |
| 407 // starting_col: index of the column in chunks_record->ratings from | |
| 408 // which the update should be started | |
| 409 // | |
| 410 // pending: list of entries listing chunks_record->ratings entries | |
| 411 // that should be updated | |
| 412 // | |
| 413 // pain_points: priority heap listing the pain points generated by | |
| 414 // the language model | |
| 415 // | |
| 416 // temp_pain_points: temporary storage for tentative pain points generated | |
| 417 // by the language model after a single call to LanguageModel::UpdateState() | |
| 418 // (the argument is passed in rather than created before each | |
| 419 // LanguageModel::UpdateState() call to avoid dynamic memory re-allocation) | |
| 420 // | |
| 421 // best_choice_bundle: a collection of variables that should be updated | |
| 422 // if a new best choice is found | |
| 423 // | |
| 424 void UpdateSegSearchNodes(float rating_cert_scale, int starting_col, | |
| 425 std::vector<SegSearchPending> *pending, WERD_RES *word_res, | |
| 426 LMPainPoints *pain_points, BestChoiceBundle *best_choice_bundle, | |
| 427 BlamerBundle *blamer_bundle); | |
| 428 | |
| 429 // Process the given pain point: classify the corresponding blob, enqueue | |
| 430 // new pain points to join the newly classified blob with its neighbors. | |
| 431 void ProcessSegSearchPainPoint(float pain_point_priority, const MATRIX_COORD &pain_point, | |
| 432 const char *pain_point_type, | |
| 433 std::vector<SegSearchPending> *pending, WERD_RES *word_res, | |
| 434 LMPainPoints *pain_points, BlamerBundle *blamer_bundle); | |
| 435 // Resets enough of the results so that the Viterbi search is re-run. | |
| 436 // Needed when the n-gram model is enabled, as the multi-length comparison | |
| 437 // implementation will re-value existing paths to worse values. | |
| 438 void ResetNGramSearch(WERD_RES *word_res, BestChoiceBundle *best_choice_bundle, | |
| 439 std::vector<SegSearchPending> &pending); | |
| 440 | |
| 441 // Add pain points for classifying blobs on the correct segmentation path | |
| 442 // (so that we can evaluate correct segmentation path and discover the reason | |
| 443 // for incorrect result). | |
| 444 void InitBlamerForSegSearch(WERD_RES *word_res, LMPainPoints *pain_points, | |
| 445 BlamerBundle *blamer_bundle, std::string &blamer_debug); | |
| 446 }; | |
| 447 | |
| 448 } // namespace tesseract | |
| 449 | |
| 450 #endif // DISABLED_LEGACY_ENGINE | |
| 451 | |
| 452 #endif // TESSERACT_WORDREC_WORDREC_H_ |
