Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/tesseract/src/lstm/recodebeam.h @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /////////////////////////////////////////////////////////////////////// | |
| 2 // File: recodebeam.h | |
| 3 // Description: Beam search to decode from the re-encoded CJK as a sequence of | |
| 4 // smaller numbers in place of a single large code. | |
| 5 // Author: Ray Smith | |
| 6 // | |
| 7 // (C) Copyright 2015, Google Inc. | |
| 8 // Licensed under the Apache License, Version 2.0 (the "License"); | |
| 9 // you may not use this file except in compliance with the License. | |
| 10 // You may obtain a copy of the License at | |
| 11 // http://www.apache.org/licenses/LICENSE-2.0 | |
| 12 // Unless required by applicable law or agreed to in writing, software | |
| 13 // distributed under the License is distributed on an "AS IS" BASIS, | |
| 14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 15 // See the License for the specific language governing permissions and | |
| 16 // limitations under the License. | |
| 17 // | |
| 18 /////////////////////////////////////////////////////////////////////// | |
| 19 | |
| 20 #ifndef THIRD_PARTY_TESSERACT_LSTM_RECODEBEAM_H_ | |
| 21 #define THIRD_PARTY_TESSERACT_LSTM_RECODEBEAM_H_ | |
| 22 | |
| 23 #include "dawg.h" | |
| 24 #include "dict.h" | |
| 25 #include "genericheap.h" | |
| 26 #include "genericvector.h" | |
| 27 #include "kdpair.h" | |
| 28 #include "networkio.h" | |
| 29 #include "ratngs.h" | |
| 30 #include "unicharcompress.h" | |
| 31 | |
| 32 #include <unordered_set> // for std::unordered_set | |
| 33 #include <vector> // for std::vector | |
| 34 | |
| 35 namespace tesseract { | |
| 36 | |
| 37 // Enum describing what can follow the current node. | |
| 38 // Consider the following softmax outputs: | |
| 39 // Timestep 0 1 2 3 4 5 6 7 8 | |
| 40 // X-score 0.01 0.55 0.98 0.42 0.01 0.01 0.40 0.95 0.01 | |
| 41 // Y-score 0.00 0.01 0.01 0.01 0.01 0.97 0.59 0.04 0.01 | |
| 42 // Null-score 0.99 0.44 0.01 0.57 0.98 0.02 0.01 0.01 0.98 | |
| 43 // Then the correct CTC decoding (in which adjacent equal classes are folded, | |
| 44 // and then all nulls are dropped) is clearly XYX, but simple decoding (taking | |
| 45 // the max at each timestep) leads to: | |
| 46 // Null@0.99 X@0.55 X@0.98 Null@0.57 Null@0.98 Y@0.97 Y@0.59 X@0.95 Null@0.98, | |
| 47 // which folds to the correct XYX. The conversion to Tesseract rating and | |
| 48 // certainty uses the sum of the log probs (log of the product of probabilities) | |
| 49 // for the Rating and the minimum log prob for the certainty, but that yields a | |
| 50 // minimum certainty of log(0.55), which is poor for such an obvious case. | |
| 51 // CTC says that the probability of the result is the SUM of the products of the | |
| 52 // probabilities over ALL PATHS that decode to the same result, which includes: | |
| 53 // NXXNNYYXN, NNXNNYYN, NXXXNYYXN, NNXXNYXXN, and others including XXXXXYYXX. | |
| 54 // That is intractable, so some compromise between simple and ideal is needed. | |
| 55 // Observing that evenly split timesteps rarely happen next to each other, we | |
| 56 // allow scores at a transition between classes to be added for decoding thus: | |
| 57 // N@0.99 (N+X)@0.99 X@0.98 (N+X)@0.99 N@0.98 Y@0.97 (X+Y+N)@1.00 X@0.95 N@0.98. | |
| 58 // This works because NNX and NXX both decode to X, so in the middle we can use | |
| 59 // N+X. Note that the classes either side of a sum must stand alone, i.e. use a | |
| 60 // single score, to force all paths to pass through them and decode to the same | |
| 61 // result. Also in the special case of a transition from X to Y, with only one | |
| 62 // timestep between, it is possible to add X+Y+N, since XXY, XYY, and XNY all | |
| 63 // decode to XY. | |
| 64 // An important condition is that we cannot combine X and Null between two | |
| 65 // stand-alone Xs, since that can decode as XNX->XX or XXX->X, so the scores for | |
| 66 // X and Null have to go in separate paths. Combining scores in this way | |
| 67 // provides a much better minimum certainty of log(0.95). | |
| 68 // In the implementation of the beam search, we have to place the possibilities | |
| 69 // X, X+N and X+Y+N in the beam under appropriate conditions of the previous | |
| 70 // node, and constrain what can follow, to enforce the rules explained above. | |
| 71 // We therefore have 3 different types of node determined by what can follow: | |
| 72 enum NodeContinuation { | |
| 73 NC_ANYTHING, // This node used just its own score, so anything can follow. | |
| 74 NC_ONLY_DUP, // The current node combined another score with the score for | |
| 75 // itself, without a stand-alone duplicate before, so must be | |
| 76 // followed by a stand-alone duplicate. | |
| 77 NC_NO_DUP, // The current node combined another score with the score for | |
| 78 // itself, after a stand-alone, so can only be followed by | |
| 79 // something other than a duplicate of the current node. | |
| 80 NC_COUNT | |
| 81 }; | |
| 82 | |
| 83 // Enum describing the top-n status of a code. | |
| 84 enum TopNState { | |
| 85 TN_TOP2, // Winner or 2nd. | |
| 86 TN_TOPN, // Runner up in top-n, but not 1st or 2nd. | |
| 87 TN_ALSO_RAN, // Not in the top-n. | |
| 88 TN_COUNT | |
| 89 }; | |
| 90 | |
| 91 // Lattice element for Re-encode beam search. | |
| 92 struct RecodeNode { | |
| 93 RecodeNode() | |
| 94 : code(-1) | |
| 95 , unichar_id(INVALID_UNICHAR_ID) | |
| 96 , permuter(TOP_CHOICE_PERM) | |
| 97 , start_of_dawg(false) | |
| 98 , start_of_word(false) | |
| 99 , end_of_word(false) | |
| 100 , duplicate(false) | |
| 101 , certainty(0.0f) | |
| 102 , score(0.0f) | |
| 103 , prev(nullptr) | |
| 104 , dawgs(nullptr) | |
| 105 , code_hash(0) {} | |
| 106 RecodeNode(int c, int uni_id, PermuterType perm, bool dawg_start, bool word_start, bool end, | |
| 107 bool dup, float cert, float s, const RecodeNode *p, DawgPositionVector *d, | |
| 108 uint64_t hash) | |
| 109 : code(c) | |
| 110 , unichar_id(uni_id) | |
| 111 , permuter(perm) | |
| 112 , start_of_dawg(dawg_start) | |
| 113 , start_of_word(word_start) | |
| 114 , end_of_word(end) | |
| 115 , duplicate(dup) | |
| 116 , certainty(cert) | |
| 117 , score(s) | |
| 118 , prev(p) | |
| 119 , dawgs(d) | |
| 120 , code_hash(hash) {} | |
| 121 // NOTE: If we could use C++11, then this would be a move constructor. | |
| 122 // Instead we have copy constructor that does a move!! This is because we | |
| 123 // don't want to copy the whole DawgPositionVector each time, and true | |
| 124 // copying isn't necessary for this struct. It does get moved around a lot | |
| 125 // though inside the heap and during heap push, hence the move semantics. | |
| 126 RecodeNode(const RecodeNode &src) : dawgs(nullptr) { | |
| 127 *this = src; | |
| 128 ASSERT_HOST(src.dawgs == nullptr); | |
| 129 } | |
| 130 RecodeNode &operator=(const RecodeNode &src) { | |
| 131 delete dawgs; | |
| 132 memcpy(this, &src, sizeof(src)); | |
| 133 ((RecodeNode &)src).dawgs = nullptr; | |
| 134 return *this; | |
| 135 } | |
| 136 ~RecodeNode() { | |
| 137 delete dawgs; | |
| 138 } | |
| 139 // Prints details of the node. | |
| 140 void Print(int null_char, const UNICHARSET &unicharset, int depth) const; | |
| 141 | |
| 142 // The re-encoded code here = index to network output. | |
| 143 int code; | |
| 144 // The decoded unichar_id is only valid for the final code of a sequence. | |
| 145 int unichar_id; | |
| 146 // The type of permuter active at this point. Intervals between start_of_word | |
| 147 // and end_of_word make valid words of type given by permuter where | |
| 148 // end_of_word is true. These aren't necessarily delimited by spaces. | |
| 149 PermuterType permuter; | |
| 150 // True if this is the initial dawg state. May be attached to a space or, | |
| 151 // in a non-space-delimited lang, the end of the previous word. | |
| 152 bool start_of_dawg; | |
| 153 // True if this is the first node in a dictionary word. | |
| 154 bool start_of_word; | |
| 155 // True if this represents a valid candidate end of word position. Does not | |
| 156 // necessarily mark the end of a word, since a word can be extended beyond a | |
| 157 // candidate end by a continuation, eg 'the' continues to 'these'. | |
| 158 bool end_of_word; | |
| 159 // True if this->code is a duplicate of prev->code. Some training modes | |
| 160 // allow the network to output duplicate characters and crush them with CTC, | |
| 161 // but that would mess up the dictionary search, so we just smash them | |
| 162 // together on the fly using the duplicate flag. | |
| 163 bool duplicate; | |
| 164 // Certainty (log prob) of (just) this position. | |
| 165 float certainty; | |
| 166 // Total certainty of the path to this position. | |
| 167 float score; | |
| 168 // The previous node in this chain. Borrowed pointer. | |
| 169 const RecodeNode *prev; | |
| 170 // The currently active dawgs at this position. Owned pointer. | |
| 171 DawgPositionVector *dawgs; | |
| 172 // A hash of all codes in the prefix and this->code as well. Used for | |
| 173 // duplicate path removal. | |
| 174 uint64_t code_hash; | |
| 175 }; | |
| 176 | |
| 177 using RecodePair = KDPairInc<double, RecodeNode>; | |
| 178 using RecodeHeap = GenericHeap<RecodePair>; | |
| 179 | |
| 180 // Class that holds the entire beam search for recognition of a text line. | |
| 181 class TESS_API RecodeBeamSearch { | |
| 182 public: | |
| 183 // Borrows the pointer, which is expected to survive until *this is deleted. | |
| 184 RecodeBeamSearch(const UnicharCompress &recoder, int null_char, bool simple_text, Dict *dict); | |
| 185 ~RecodeBeamSearch(); | |
| 186 | |
| 187 // Decodes the set of network outputs, storing the lattice internally. | |
| 188 // If charset is not null, it enables detailed debugging of the beam search. | |
| 189 void Decode(const NetworkIO &output, double dict_ratio, double cert_offset, | |
| 190 double worst_dict_cert, const UNICHARSET *charset, int lstm_choice_mode = 0); | |
| 191 void Decode(const GENERIC_2D_ARRAY<float> &output, double dict_ratio, double cert_offset, | |
| 192 double worst_dict_cert, const UNICHARSET *charset); | |
| 193 | |
| 194 void DecodeSecondaryBeams(const NetworkIO &output, double dict_ratio, double cert_offset, | |
| 195 double worst_dict_cert, const UNICHARSET *charset, | |
| 196 int lstm_choice_mode = 0); | |
| 197 | |
| 198 // Returns the best path as labels/scores/xcoords similar to simple CTC. | |
| 199 void ExtractBestPathAsLabels(std::vector<int> *labels, std::vector<int> *xcoords) const; | |
| 200 // Returns the best path as unichar-ids/certs/ratings/xcoords skipping | |
| 201 // duplicates, nulls and intermediate parts. | |
| 202 void ExtractBestPathAsUnicharIds(bool debug, const UNICHARSET *unicharset, | |
| 203 std::vector<int> *unichar_ids, std::vector<float> *certs, | |
| 204 std::vector<float> *ratings, std::vector<int> *xcoords) const; | |
| 205 | |
| 206 // Returns the best path as a set of WERD_RES. | |
| 207 void ExtractBestPathAsWords(const TBOX &line_box, float scale_factor, bool debug, | |
| 208 const UNICHARSET *unicharset, PointerVector<WERD_RES> *words, | |
| 209 int lstm_choice_mode = 0); | |
| 210 | |
| 211 // Generates debug output of the content of the beams after a Decode. | |
| 212 void DebugBeams(const UNICHARSET &unicharset) const; | |
| 213 | |
| 214 // Extract the best characters from the current decode iteration and block | |
| 215 // those symbols for the next iteration. In contrast to Tesseract's standard | |
| 216 // method to chose the best overall node chain, this methods looks at a short | |
| 217 // node chain segmented by the character boundaries and chooses the best | |
| 218 // option independent of the remaining node chain. | |
| 219 void extractSymbolChoices(const UNICHARSET *unicharset); | |
| 220 | |
| 221 // Generates debug output of the content of the beams after a Decode. | |
| 222 void PrintBeam2(bool uids, int num_outputs, const UNICHARSET *charset, bool secondary) const; | |
| 223 // Segments the timestep bundle by the character_boundaries. | |
| 224 void segmentTimestepsByCharacters(); | |
| 225 std::vector<std::vector<std::pair<const char *, float>>> | |
| 226 // Unions the segmented timestep character bundles to one big bundle. | |
| 227 combineSegmentedTimesteps( | |
| 228 std::vector<std::vector<std::vector<std::pair<const char *, float>>>> *segmentedTimesteps); | |
| 229 // Stores the alternative characters of every timestep together with their | |
| 230 // probability. | |
| 231 std::vector<std::vector<std::pair<const char *, float>>> timesteps; | |
| 232 std::vector<std::vector<std::vector<std::pair<const char *, float>>>> segmentedTimesteps; | |
| 233 // Stores the character choices found in the ctc algorithm | |
| 234 std::vector<std::vector<std::pair<const char *, float>>> ctc_choices; | |
| 235 // Stores all unicharids which are excluded for future iterations | |
| 236 std::vector<std::unordered_set<int>> excludedUnichars; | |
| 237 // Stores the character boundaries regarding timesteps. | |
| 238 std::vector<int> character_boundaries_; | |
| 239 // Clipping value for certainty inside Tesseract. Reflects the minimum value | |
| 240 // of certainty that will be returned by ExtractBestPathAsUnicharIds. | |
| 241 // Supposedly on a uniform scale that can be compared across languages and | |
| 242 // engines. | |
| 243 static constexpr float kMinCertainty = -20.0f; | |
| 244 // Number of different code lengths for which we have a separate beam. | |
| 245 static const int kNumLengths = RecodedCharID::kMaxCodeLen + 1; | |
| 246 // Total number of beams: dawg/nodawg * number of NodeContinuation * number | |
| 247 // of different lengths. | |
| 248 static const int kNumBeams = 2 * NC_COUNT * kNumLengths; | |
| 249 // Returns the relevant factor in the beams_ index. | |
| 250 static int LengthFromBeamsIndex(int index) { | |
| 251 return index % kNumLengths; | |
| 252 } | |
| 253 static NodeContinuation ContinuationFromBeamsIndex(int index) { | |
| 254 return static_cast<NodeContinuation>((index / kNumLengths) % NC_COUNT); | |
| 255 } | |
| 256 static bool IsDawgFromBeamsIndex(int index) { | |
| 257 return index / (kNumLengths * NC_COUNT) > 0; | |
| 258 } | |
| 259 // Computes a beams_ index from the given factors. | |
| 260 static int BeamIndex(bool is_dawg, NodeContinuation cont, int length) { | |
| 261 return (is_dawg * NC_COUNT + cont) * kNumLengths + length; | |
| 262 } | |
| 263 | |
| 264 private: | |
| 265 // Struct for the Re-encode beam search. This struct holds the data for | |
| 266 // a single time-step position of the output. Use a vector<RecodeBeam> | |
| 267 // to hold all the timesteps and prevent reallocation of the individual heaps. | |
| 268 struct RecodeBeam { | |
| 269 // Resets to the initial state without deleting all the memory. | |
| 270 void Clear() { | |
| 271 for (auto &beam : beams_) { | |
| 272 beam.clear(); | |
| 273 } | |
| 274 RecodeNode empty; | |
| 275 for (auto &best_initial_dawg : best_initial_dawgs_) { | |
| 276 best_initial_dawg = empty; | |
| 277 } | |
| 278 } | |
| 279 | |
| 280 // A separate beam for each combination of code length, | |
| 281 // NodeContinuation, and dictionary flag. Separating out all these types | |
| 282 // allows the beam to be quite narrow, and yet still have a low chance of | |
| 283 // losing the best path. | |
| 284 // We have to keep all these beams separate, since the highest scoring paths | |
| 285 // come from the paths that are most likely to dead-end at any time, like | |
| 286 // dawg paths, NC_ONLY_DUP etc. | |
| 287 // Each heap is stored with the WORST result at the top, so we can quickly | |
| 288 // get the top-n values. | |
| 289 RecodeHeap beams_[kNumBeams]; | |
| 290 // While the language model is only a single word dictionary, we can use | |
| 291 // word starts as a choke point in the beam, and keep only a single dict | |
| 292 // start node at each step (for each NodeContinuation type), so we find the | |
| 293 // best one here and push it on the heap, if it qualifies, after processing | |
| 294 // all of the step. | |
| 295 RecodeNode best_initial_dawgs_[NC_COUNT]; | |
| 296 }; | |
| 297 using TopPair = KDPairInc<float, int>; | |
| 298 | |
| 299 // Generates debug output of the content of a single beam position. | |
| 300 void DebugBeamPos(const UNICHARSET &unicharset, const RecodeHeap &heap) const; | |
| 301 | |
| 302 // Returns the given best_nodes as unichar-ids/certs/ratings/xcoords skipping | |
| 303 // duplicates, nulls and intermediate parts. | |
| 304 static void ExtractPathAsUnicharIds(const std::vector<const RecodeNode *> &best_nodes, | |
| 305 std::vector<int> *unichar_ids, std::vector<float> *certs, | |
| 306 std::vector<float> *ratings, std::vector<int> *xcoords, | |
| 307 std::vector<int> *character_boundaries = nullptr); | |
| 308 | |
| 309 // Sets up a word with the ratings matrix and fake blobs with boxes in the | |
| 310 // right places. | |
| 311 WERD_RES *InitializeWord(bool leading_space, const TBOX &line_box, int word_start, int word_end, | |
| 312 float space_certainty, const UNICHARSET *unicharset, | |
| 313 const std::vector<int> &xcoords, float scale_factor); | |
| 314 | |
| 315 // Fills top_n_flags_ with bools that are true iff the corresponding output | |
| 316 // is one of the top_n. | |
| 317 void ComputeTopN(const float *outputs, int num_outputs, int top_n); | |
| 318 | |
| 319 void ComputeSecTopN(std::unordered_set<int> *exList, const float *outputs, int num_outputs, | |
| 320 int top_n); | |
| 321 | |
| 322 // Adds the computation for the current time-step to the beam. Call at each | |
| 323 // time-step in sequence from left to right. outputs is the activation vector | |
| 324 // for the current timestep. | |
| 325 void DecodeStep(const float *outputs, int t, double dict_ratio, double cert_offset, | |
| 326 double worst_dict_cert, const UNICHARSET *charset, bool debug = false); | |
| 327 | |
| 328 void DecodeSecondaryStep(const float *outputs, int t, double dict_ratio, double cert_offset, | |
| 329 double worst_dict_cert, const UNICHARSET *charset, bool debug = false); | |
| 330 | |
| 331 // Saves the most certain choices for the current time-step. | |
| 332 void SaveMostCertainChoices(const float *outputs, int num_outputs, const UNICHARSET *charset, | |
| 333 int xCoord); | |
| 334 | |
| 335 // Calculates more accurate character boundaries which can be used to | |
| 336 // provide more accurate alternative symbol choices. | |
| 337 static void calculateCharBoundaries(std::vector<int> *starts, std::vector<int> *ends, | |
| 338 std::vector<int> *character_boundaries_, int maxWidth); | |
| 339 | |
| 340 // Adds to the appropriate beams the legal (according to recoder) | |
| 341 // continuations of context prev, which is from the given index to beams_, | |
| 342 // using the given network outputs to provide scores to the choices. Uses only | |
| 343 // those choices for which top_n_flags[code] == top_n_flag. | |
| 344 void ContinueContext(const RecodeNode *prev, int index, const float *outputs, | |
| 345 TopNState top_n_flag, const UNICHARSET *unicharset, double dict_ratio, | |
| 346 double cert_offset, double worst_dict_cert, RecodeBeam *step); | |
| 347 // Continues for a new unichar, using dawg or non-dawg as per flag. | |
| 348 void ContinueUnichar(int code, int unichar_id, float cert, float worst_dict_cert, | |
| 349 float dict_ratio, bool use_dawgs, NodeContinuation cont, | |
| 350 const RecodeNode *prev, RecodeBeam *step); | |
| 351 // Adds a RecodeNode composed of the args to the correct heap in step if | |
| 352 // unichar_id is a valid dictionary continuation of whatever is in prev. | |
| 353 void ContinueDawg(int code, int unichar_id, float cert, NodeContinuation cont, | |
| 354 const RecodeNode *prev, RecodeBeam *step); | |
| 355 // Sets the correct best_initial_dawgs_ with a RecodeNode composed of the args | |
| 356 // if better than what is already there. | |
| 357 void PushInitialDawgIfBetter(int code, int unichar_id, PermuterType permuter, bool start, | |
| 358 bool end, float cert, NodeContinuation cont, const RecodeNode *prev, | |
| 359 RecodeBeam *step); | |
| 360 // Adds a RecodeNode composed of the args to the correct heap in step for | |
| 361 // partial unichar or duplicate if there is room or if better than the | |
| 362 // current worst element if already full. | |
| 363 void PushDupOrNoDawgIfBetter(int length, bool dup, int code, int unichar_id, float cert, | |
| 364 float worst_dict_cert, float dict_ratio, bool use_dawgs, | |
| 365 NodeContinuation cont, const RecodeNode *prev, RecodeBeam *step); | |
| 366 // Adds a RecodeNode composed of the args to the correct heap in step if there | |
| 367 // is room or if better than the current worst element if already full. | |
| 368 void PushHeapIfBetter(int max_size, int code, int unichar_id, PermuterType permuter, | |
| 369 bool dawg_start, bool word_start, bool end, bool dup, float cert, | |
| 370 const RecodeNode *prev, DawgPositionVector *d, RecodeHeap *heap); | |
| 371 // Adds a RecodeNode to heap if there is room | |
| 372 // or if better than the current worst element if already full. | |
| 373 void PushHeapIfBetter(int max_size, RecodeNode *node, RecodeHeap *heap); | |
| 374 // Searches the heap for an entry matching new_node, and updates the entry | |
| 375 // with reshuffle if needed. Returns true if there was a match. | |
| 376 bool UpdateHeapIfMatched(RecodeNode *new_node, RecodeHeap *heap); | |
| 377 // Computes and returns the code-hash for the given code and prev. | |
| 378 uint64_t ComputeCodeHash(int code, bool dup, const RecodeNode *prev) const; | |
| 379 // Backtracks to extract the best path through the lattice that was built | |
| 380 // during Decode. On return the best_nodes vector essentially contains the set | |
| 381 // of code, score pairs that make the optimal path with the constraint that | |
| 382 // the recoder can decode the code sequence back to a sequence of unichar-ids. | |
| 383 void ExtractBestPaths(std::vector<const RecodeNode *> *best_nodes, | |
| 384 std::vector<const RecodeNode *> *second_nodes) const; | |
| 385 // Helper backtracks through the lattice from the given node, storing the | |
| 386 // path and reversing it. | |
| 387 void ExtractPath(const RecodeNode *node, std::vector<const RecodeNode *> *path) const; | |
| 388 void ExtractPath(const RecodeNode *node, std::vector<const RecodeNode *> *path, | |
| 389 int limiter) const; | |
| 390 // Helper prints debug information on the given lattice path. | |
| 391 void DebugPath(const UNICHARSET *unicharset, const std::vector<const RecodeNode *> &path) const; | |
| 392 // Helper prints debug information on the given unichar path. | |
| 393 void DebugUnicharPath(const UNICHARSET *unicharset, const std::vector<const RecodeNode *> &path, | |
| 394 const std::vector<int> &unichar_ids, const std::vector<float> &certs, | |
| 395 const std::vector<float> &ratings, const std::vector<int> &xcoords) const; | |
| 396 | |
| 397 static const int kBeamWidths[RecodedCharID::kMaxCodeLen + 1]; | |
| 398 | |
| 399 // The encoder/decoder that we will be using. | |
| 400 const UnicharCompress &recoder_; | |
| 401 // The beam for each timestep in the output. | |
| 402 std::vector<RecodeBeam *> beam_; | |
| 403 // Secondary Beam for Results with less Probability | |
| 404 std::vector<RecodeBeam *> secondary_beam_; | |
| 405 // The number of timesteps valid in beam_; | |
| 406 int beam_size_; | |
| 407 // A flag to indicate which outputs are the top-n choices. Current timestep | |
| 408 // only. | |
| 409 std::vector<TopNState> top_n_flags_; | |
| 410 // A record of the highest and second scoring codes. | |
| 411 int top_code_; | |
| 412 int second_code_; | |
| 413 // Heap used to compute the top_n_flags_. | |
| 414 GenericHeap<TopPair> top_heap_; | |
| 415 // Borrowed pointer to the dictionary to use in the search. | |
| 416 Dict *dict_; | |
| 417 // True if the language is space-delimited, which is true for most languages | |
| 418 // except chi*, jpn, tha. | |
| 419 bool space_delimited_; | |
| 420 // True if the input is simple text, ie adjacent equal chars are not to be | |
| 421 // eliminated. | |
| 422 bool is_simple_text_; | |
| 423 // The encoded (class label) of the null/reject character. | |
| 424 int null_char_; | |
| 425 }; | |
| 426 | |
| 427 } // namespace tesseract. | |
| 428 | |
| 429 #endif // THIRD_PARTY_TESSERACT_LSTM_RECODEBEAM_H_ |
