Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/tesseract/src/dict/trie.h @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /****************************************************************************** | |
| 2 * | |
| 3 * File: trie.h | |
| 4 * Description: Functions to build a trie data structure. | |
| 5 * Author: Mark Seaman, SW Productivity | |
| 6 * | |
| 7 * (c) Copyright 1987, Hewlett-Packard Company. | |
| 8 ** Licensed under the Apache License, Version 2.0 (the "License"); | |
| 9 ** you may not use this file except in compliance with the License. | |
| 10 ** You may obtain a copy of the License at | |
| 11 ** http://www.apache.org/licenses/LICENSE-2.0 | |
| 12 ** Unless required by applicable law or agreed to in writing, software | |
| 13 ** distributed under the License is distributed on an "AS IS" BASIS, | |
| 14 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 15 ** See the License for the specific language governing permissions and | |
| 16 ** limitations under the License. | |
| 17 * | |
| 18 *****************************************************************************/ | |
| 19 #ifndef TRIE_H | |
| 20 #define TRIE_H | |
| 21 | |
| 22 #include "dawg.h" | |
| 23 | |
| 24 namespace tesseract { | |
| 25 | |
| 26 class UNICHARSET; | |
| 27 | |
| 28 // Note: if we consider either NODE_REF or EDGE_INDEX to ever exceed | |
| 29 // max int32, we will need to change vector to use int64 for size | |
| 30 // and address indices. This does not seem to be needed immediately, | |
| 31 // since currently the largest number of edges limit used by tesseract | |
| 32 // (kMaxNumEdges in wordlist2dawg.cpp) is far less than max int32. | |
| 33 // There are also int casts below to satisfy the WIN32 compiler that would | |
| 34 // need to be changed. | |
| 35 // It might be cleanest to change the types of most of the Trie/Dawg related | |
| 36 // typedefs to int and restrict the casts to extracting these values from | |
| 37 // the 64 bit EDGE_RECORD. | |
| 38 using EDGE_INDEX = int64_t; // index of an edge in a given node | |
| 39 using EDGE_VECTOR = std::vector<EDGE_RECORD>; | |
| 40 | |
| 41 struct TRIE_NODE_RECORD { | |
| 42 EDGE_VECTOR forward_edges; | |
| 43 EDGE_VECTOR backward_edges; | |
| 44 }; | |
| 45 using TRIE_NODES = std::vector<TRIE_NODE_RECORD *>; | |
| 46 | |
| 47 /** | |
| 48 * Concrete class for Trie data structure that allows to store a list of | |
| 49 * words (extends Dawg base class) as well as dynamically add new words. | |
| 50 * This class stores a vector of pointers to TRIE_NODE_RECORDs, each of | |
| 51 * which has a vector of forward and backward edges. | |
| 52 */ | |
| 53 class TESS_API Trie : public Dawg { | |
| 54 public: | |
| 55 enum RTLReversePolicy { | |
| 56 RRP_DO_NO_REVERSE, | |
| 57 RRP_REVERSE_IF_HAS_RTL, | |
| 58 RRP_FORCE_REVERSE, | |
| 59 }; | |
| 60 | |
| 61 // Minimum number of concrete characters at the beginning of user patterns. | |
| 62 static const int kSaneNumConcreteChars = 0; | |
| 63 // Various unicode whitespace characters are used to denote unichar patterns, | |
| 64 // (character classifier would never produce these whitespace characters as a | |
| 65 // valid classification). | |
| 66 static const char kAlphaPatternUnicode[]; | |
| 67 static const char kDigitPatternUnicode[]; | |
| 68 static const char kAlphanumPatternUnicode[]; | |
| 69 static const char kPuncPatternUnicode[]; | |
| 70 static const char kLowerPatternUnicode[]; | |
| 71 static const char kUpperPatternUnicode[]; | |
| 72 | |
| 73 static const char *get_reverse_policy_name(RTLReversePolicy reverse_policy); | |
| 74 | |
| 75 // max_num_edges argument allows limiting the amount of memory this | |
| 76 // Trie can consume (if a new word insert would cause the Trie to | |
| 77 // contain more edges than max_num_edges, all the edges are cleared | |
| 78 // so that new inserts can proceed). | |
| 79 Trie(DawgType type, const std::string &lang, PermuterType perm, int unicharset_size, int debug_level) | |
| 80 : Dawg(type, lang, perm, debug_level) { | |
| 81 init(unicharset_size); | |
| 82 deref_node_index_mask_ = ~letter_mask_; | |
| 83 new_dawg_node(); // need to allocate node 0 | |
| 84 } | |
| 85 ~Trie() override { | |
| 86 for (auto node : nodes_) { | |
| 87 delete node; | |
| 88 } | |
| 89 } | |
| 90 | |
| 91 // Reset the Trie to empty. | |
| 92 void clear(); | |
| 93 | |
| 94 /** Returns the edge that corresponds to the letter out of this node. */ | |
| 95 EDGE_REF edge_char_of(NODE_REF node_ref, UNICHAR_ID unichar_id, bool word_end) const override { | |
| 96 EDGE_RECORD *edge_ptr; | |
| 97 EDGE_INDEX edge_index; | |
| 98 if (!edge_char_of(node_ref, NO_EDGE, FORWARD_EDGE, word_end, unichar_id, &edge_ptr, | |
| 99 &edge_index)) { | |
| 100 return NO_EDGE; | |
| 101 } | |
| 102 return make_edge_ref(node_ref, edge_index); | |
| 103 } | |
| 104 | |
| 105 /** | |
| 106 * Fills the given NodeChildVector with all the unichar ids (and the | |
| 107 * corresponding EDGE_REFs) for which there is an edge out of this node. | |
| 108 */ | |
| 109 void unichar_ids_of(NODE_REF node, NodeChildVector *vec, bool word_end) const override { | |
| 110 const EDGE_VECTOR &forward_edges = nodes_[static_cast<int>(node)]->forward_edges; | |
| 111 for (auto &edge : forward_edges) { | |
| 112 if (!word_end || end_of_word_from_edge_rec(edge)) { | |
| 113 vec->push_back( | |
| 114 NodeChild(unichar_id_from_edge_rec(edge), make_edge_ref(node, &edge - &forward_edges[0]))); | |
| 115 } | |
| 116 } | |
| 117 } | |
| 118 | |
| 119 /** | |
| 120 * Returns the next node visited by following the edge | |
| 121 * indicated by the given EDGE_REF. | |
| 122 */ | |
| 123 NODE_REF next_node(EDGE_REF edge_ref) const override { | |
| 124 if (edge_ref == NO_EDGE || num_edges_ == 0) { | |
| 125 return NO_EDGE; | |
| 126 } | |
| 127 return next_node_from_edge_rec(*deref_edge_ref(edge_ref)); | |
| 128 } | |
| 129 | |
| 130 /** | |
| 131 * Returns true if the edge indicated by the given EDGE_REF | |
| 132 * marks the end of a word. | |
| 133 */ | |
| 134 bool end_of_word(EDGE_REF edge_ref) const override { | |
| 135 if (edge_ref == NO_EDGE || num_edges_ == 0) { | |
| 136 return false; | |
| 137 } | |
| 138 return end_of_word_from_edge_rec(*deref_edge_ref(edge_ref)); | |
| 139 } | |
| 140 | |
| 141 /** Returns UNICHAR_ID stored in the edge indicated by the given EDGE_REF. */ | |
| 142 UNICHAR_ID edge_letter(EDGE_REF edge_ref) const override { | |
| 143 if (edge_ref == NO_EDGE || num_edges_ == 0) { | |
| 144 return INVALID_UNICHAR_ID; | |
| 145 } | |
| 146 return unichar_id_from_edge_rec(*deref_edge_ref(edge_ref)); | |
| 147 } | |
| 148 // Sets the UNICHAR_ID in the given edge_rec to unicharset_size_, marking | |
| 149 // the edge dead. | |
| 150 void KillEdge(EDGE_RECORD *edge_rec) const { | |
| 151 *edge_rec &= ~letter_mask_; | |
| 152 *edge_rec |= (unicharset_size_ << LETTER_START_BIT); | |
| 153 } | |
| 154 bool DeadEdge(const EDGE_RECORD &edge_rec) const { | |
| 155 return unichar_id_from_edge_rec(edge_rec) == unicharset_size_; | |
| 156 } | |
| 157 | |
| 158 // Prints the contents of the node indicated by the given NODE_REF. | |
| 159 // At most max_num_edges will be printed. | |
| 160 void print_node(NODE_REF node, int max_num_edges) const override; | |
| 161 | |
| 162 // Writes edges from nodes_ to an EDGE_ARRAY and creates a SquishedDawg. | |
| 163 // Eliminates redundant edges and returns the pointer to the SquishedDawg. | |
| 164 // Note: the caller is responsible for deallocating memory associated | |
| 165 // with the returned SquishedDawg pointer. | |
| 166 SquishedDawg *trie_to_dawg(); | |
| 167 | |
| 168 // Reads a list of words from the given file and adds into the Trie. | |
| 169 // Calls WERD_CHOICE::reverse_unichar_ids_if_rtl() according to the reverse | |
| 170 // policy and information in the unicharset. | |
| 171 // Returns false on error. | |
| 172 bool read_and_add_word_list(const char *filename, const UNICHARSET &unicharset, | |
| 173 Trie::RTLReversePolicy reverse); | |
| 174 | |
| 175 // Reads a list of words from the given file. | |
| 176 // Returns false on error. | |
| 177 bool read_word_list(const char *filename, std::vector<std::string> *words); | |
| 178 // Adds a list of words previously read using read_word_list to the trie | |
| 179 // using the given unicharset and reverse_policy to convert to unichar-ids. | |
| 180 // Returns false on error. | |
| 181 bool add_word_list(const std::vector<std::string> &words, const UNICHARSET &unicharset, | |
| 182 Trie::RTLReversePolicy reverse_policy); | |
| 183 | |
| 184 // Inserts the list of patterns from the given file into the Trie. | |
| 185 // The pattern list file should contain one pattern per line in UTF-8 format. | |
| 186 // | |
| 187 // Each pattern can contain any non-whitespace characters, however only the | |
| 188 // patterns that contain characters from the unicharset of the corresponding | |
| 189 // language will be useful. | |
| 190 // The only meta character is '\'. To be used in a pattern as an ordinary | |
| 191 // string it should be escaped with '\' (e.g. string "C:\Documents" should | |
| 192 // be written in the patterns file as "C:\\Documents"). | |
| 193 // This function supports a very limited regular expression syntax. One can | |
| 194 // express a character, a certain character class and a number of times the | |
| 195 // entity should be repeated in the pattern. | |
| 196 // | |
| 197 // To denote a character class use one of: | |
| 198 // \c - unichar for which UNICHARSET::get_isalpha() is true (character) | |
| 199 // \d - unichar for which UNICHARSET::get_isdigit() is true | |
| 200 // \n - unichar for which UNICHARSET::get_isdigit() or | |
| 201 // UNICHARSET::isalpha() are true | |
| 202 // \p - unichar for which UNICHARSET::get_ispunct() is true | |
| 203 // \a - unichar for which UNICHARSET::get_islower() is true | |
| 204 // \A - unichar for which UNICHARSET::get_isupper() is true | |
| 205 // | |
| 206 // \* could be specified after each character or pattern to indicate that | |
| 207 // the character/pattern can be repeated any number of times before the next | |
| 208 // character/pattern occurs. | |
| 209 // | |
| 210 // Examples: | |
| 211 // 1-8\d\d-GOOG-411 will be expanded to strings: | |
| 212 // 1-800-GOOG-411, 1-801-GOOG-411, ... 1-899-GOOG-411. | |
| 213 // | |
| 214 // http://www.\n\*.com will be expanded to strings like: | |
| 215 // http://www.a.com http://www.a123.com ... http://www.ABCDefgHIJKLMNop.com | |
| 216 // | |
| 217 // Note: In choosing which patterns to include please be aware of the fact | |
| 218 // providing very generic patterns will make tesseract run slower. | |
| 219 // For example \n\* at the beginning of the pattern will make Tesseract | |
| 220 // consider all the combinations of proposed character choices for each | |
| 221 // of the segmentations, which will be unacceptably slow. | |
| 222 // Because of potential problems with speed that could be difficult to | |
| 223 // identify, each user pattern has to have at least kSaneNumConcreteChars | |
| 224 // concrete characters from the unicharset at the beginning. | |
| 225 bool read_pattern_list(const char *filename, const UNICHARSET &unicharset); | |
| 226 | |
| 227 // Initializes the values of *_pattern_ unichar ids. | |
| 228 // This function should be called before calling read_pattern_list(). | |
| 229 void initialize_patterns(UNICHARSET *unicharset); | |
| 230 | |
| 231 // Fills in the given unichar id vector with the unichar ids that represent | |
| 232 // the patterns of the character classes of the given unichar_id. | |
| 233 void unichar_id_to_patterns(UNICHAR_ID unichar_id, const UNICHARSET &unicharset, | |
| 234 std::vector<UNICHAR_ID> *vec) const override; | |
| 235 | |
| 236 // Returns the given EDGE_REF if the EDGE_RECORD that it points to has | |
| 237 // a self loop and the given unichar_id matches the unichar_id stored in the | |
| 238 // EDGE_RECORD, returns NO_EDGE otherwise. | |
| 239 EDGE_REF pattern_loop_edge(EDGE_REF edge_ref, UNICHAR_ID unichar_id, | |
| 240 bool word_end) const override { | |
| 241 if (edge_ref == NO_EDGE) { | |
| 242 return NO_EDGE; | |
| 243 } | |
| 244 EDGE_RECORD *edge_rec = deref_edge_ref(edge_ref); | |
| 245 return (marker_flag_from_edge_rec(*edge_rec) && | |
| 246 unichar_id == unichar_id_from_edge_rec(*edge_rec) && | |
| 247 word_end == end_of_word_from_edge_rec(*edge_rec)) | |
| 248 ? edge_ref | |
| 249 : NO_EDGE; | |
| 250 } | |
| 251 | |
| 252 // Adds a word to the Trie (creates the necessary nodes and edges). | |
| 253 // | |
| 254 // If repetitions vector is not nullptr, each entry in the vector indicates | |
| 255 // whether the unichar id with the corresponding index in the word is allowed | |
| 256 // to repeat an unlimited number of times. For each entry that is true, MARKER | |
| 257 // flag of the corresponding edge created for this unichar id is set to true). | |
| 258 // | |
| 259 // Return true if add succeeded, false otherwise (e.g. when a word contained | |
| 260 // an invalid unichar id or the trie was getting too large and was cleared). | |
| 261 bool add_word_to_dawg(const WERD_CHOICE &word, const std::vector<bool> *repetitions); | |
| 262 bool add_word_to_dawg(const WERD_CHOICE &word) { | |
| 263 return add_word_to_dawg(word, nullptr); | |
| 264 } | |
| 265 | |
| 266 protected: | |
| 267 // The structure of an EDGE_REF for Trie edges is as follows: | |
| 268 // [LETTER_START_BIT, flag_start_bit_): | |
| 269 // edge index in *_edges in a TRIE_NODE_RECORD | |
| 270 // [flag_start_bit, 30th bit]: node index in nodes (TRIE_NODES vector) | |
| 271 // | |
| 272 // With this arrangement there are enough bits to represent edge indices | |
| 273 // (each node can have at most unicharset_size_ forward edges and | |
| 274 // the position of flag_start_bit is set to be log2(unicharset_size_)). | |
| 275 // It is also possible to accommodate a maximum number of nodes that is at | |
| 276 // least as large as that of the SquishedDawg representation (in SquishedDawg | |
| 277 // each EDGE_RECORD has 32-(flag_start_bit+NUM_FLAG_BITS) bits to represent | |
| 278 // the next node index). | |
| 279 // | |
| 280 | |
| 281 // Returns the pointer to EDGE_RECORD after decoding the location | |
| 282 // of the edge from the information in the given EDGE_REF. | |
| 283 // This function assumes that EDGE_REF holds valid node/edge indices. | |
| 284 inline EDGE_RECORD *deref_edge_ref(EDGE_REF edge_ref) const { | |
| 285 int edge_index = static_cast<int>((edge_ref & letter_mask_) >> LETTER_START_BIT); | |
| 286 int node_index = static_cast<int>((edge_ref & deref_node_index_mask_) >> flag_start_bit_); | |
| 287 TRIE_NODE_RECORD *node_rec = nodes_[node_index]; | |
| 288 return &(node_rec->forward_edges[edge_index]); | |
| 289 } | |
| 290 /** Constructs EDGE_REF from the given node_index and edge_index. */ | |
| 291 inline EDGE_REF make_edge_ref(NODE_REF node_index, EDGE_INDEX edge_index) const { | |
| 292 return ((node_index << flag_start_bit_) | (edge_index << LETTER_START_BIT)); | |
| 293 } | |
| 294 /** Sets up this edge record to the requested values. */ | |
| 295 inline void link_edge(EDGE_RECORD *edge, NODE_REF nxt, bool repeats, int direction, bool word_end, | |
| 296 UNICHAR_ID unichar_id) { | |
| 297 EDGE_RECORD flags = 0; | |
| 298 if (repeats) { | |
| 299 flags |= MARKER_FLAG; | |
| 300 } | |
| 301 if (word_end) { | |
| 302 flags |= WERD_END_FLAG; | |
| 303 } | |
| 304 if (direction == BACKWARD_EDGE) { | |
| 305 flags |= DIRECTION_FLAG; | |
| 306 } | |
| 307 *edge = ((nxt << next_node_start_bit_) | (static_cast<EDGE_RECORD>(flags) << flag_start_bit_) | | |
| 308 (static_cast<EDGE_RECORD>(unichar_id) << LETTER_START_BIT)); | |
| 309 } | |
| 310 /** Prints the given EDGE_RECORD. */ | |
| 311 inline void print_edge_rec(const EDGE_RECORD &edge_rec) const { | |
| 312 tprintf("|" REFFORMAT "|%s%s%s|%d|", next_node_from_edge_rec(edge_rec), | |
| 313 marker_flag_from_edge_rec(edge_rec) ? "R," : "", | |
| 314 (direction_from_edge_rec(edge_rec) == FORWARD_EDGE) ? "F" : "B", | |
| 315 end_of_word_from_edge_rec(edge_rec) ? ",E" : "", unichar_id_from_edge_rec(edge_rec)); | |
| 316 } | |
| 317 // Returns true if the next node in recorded the given EDGE_RECORD | |
| 318 // has exactly one forward edge. | |
| 319 inline bool can_be_eliminated(const EDGE_RECORD &edge_rec) { | |
| 320 NODE_REF node_ref = next_node_from_edge_rec(edge_rec); | |
| 321 return (node_ref != NO_EDGE && nodes_[static_cast<int>(node_ref)]->forward_edges.size() == 1); | |
| 322 } | |
| 323 | |
| 324 // Prints the contents of the Trie. | |
| 325 // At most max_num_edges will be printed for each node. | |
| 326 void print_all(const char *msg, int max_num_edges) { | |
| 327 tprintf("\n__________________________\n%s\n", msg); | |
| 328 for (size_t i = 0; i < nodes_.size(); ++i) { | |
| 329 print_node(i, max_num_edges); | |
| 330 } | |
| 331 tprintf("__________________________\n"); | |
| 332 } | |
| 333 | |
| 334 // Finds the edge with the given direction, word_end and unichar_id | |
| 335 // in the node indicated by node_ref. Fills in the pointer to the | |
| 336 // EDGE_RECORD and the index of the edge with the values | |
| 337 // corresponding to the edge found. Returns true if an edge was found. | |
| 338 bool edge_char_of(NODE_REF node_ref, NODE_REF next_node, int direction, bool word_end, | |
| 339 UNICHAR_ID unichar_id, EDGE_RECORD **edge_ptr, EDGE_INDEX *edge_index) const; | |
| 340 | |
| 341 // Adds an single edge linkage between node1 and node2 in the direction | |
| 342 // indicated by direction argument. | |
| 343 bool add_edge_linkage(NODE_REF node1, NODE_REF node2, bool repeats, int direction, bool word_end, | |
| 344 UNICHAR_ID unichar_id); | |
| 345 | |
| 346 // Adds forward edge linkage from node1 to node2 and the corresponding | |
| 347 // backward edge linkage in the other direction. | |
| 348 bool add_new_edge(NODE_REF node1, NODE_REF node2, bool repeats, bool word_end, | |
| 349 UNICHAR_ID unichar_id) { | |
| 350 return (add_edge_linkage(node1, node2, repeats, FORWARD_EDGE, word_end, unichar_id) && | |
| 351 add_edge_linkage(node2, node1, repeats, BACKWARD_EDGE, word_end, unichar_id)); | |
| 352 } | |
| 353 | |
| 354 // Sets the word ending flags in an already existing edge pair. | |
| 355 // Returns true on success. | |
| 356 void add_word_ending(EDGE_RECORD *edge, NODE_REF the_next_node, bool repeats, | |
| 357 UNICHAR_ID unichar_id); | |
| 358 | |
| 359 // Allocates space for a new node in the Trie. | |
| 360 NODE_REF new_dawg_node(); | |
| 361 | |
| 362 // Removes a single edge linkage to between node1 and node2 in the | |
| 363 // direction indicated by direction argument. | |
| 364 void remove_edge_linkage(NODE_REF node1, NODE_REF node2, int direction, bool word_end, | |
| 365 UNICHAR_ID unichar_id); | |
| 366 | |
| 367 // Removes forward edge linkage from node1 to node2 and the corresponding | |
| 368 // backward edge linkage in the other direction. | |
| 369 void remove_edge(NODE_REF node1, NODE_REF node2, bool word_end, UNICHAR_ID unichar_id) { | |
| 370 remove_edge_linkage(node1, node2, FORWARD_EDGE, word_end, unichar_id); | |
| 371 remove_edge_linkage(node2, node1, BACKWARD_EDGE, word_end, unichar_id); | |
| 372 } | |
| 373 | |
| 374 // Compares edge1 and edge2 in the given node to see if they point to two | |
| 375 // next nodes that could be collapsed. If they do, performs the reduction | |
| 376 // and returns true. | |
| 377 bool eliminate_redundant_edges(NODE_REF node, const EDGE_RECORD &edge1, const EDGE_RECORD &edge2); | |
| 378 | |
| 379 // Assuming that edge_index indicates the first edge in a group of edges | |
| 380 // in this node with a particular letter value, looks through these edges | |
| 381 // to see if any of them can be collapsed. If so does it. Returns to the | |
| 382 // caller when all edges with this letter have been reduced. | |
| 383 // Returns true if further reduction is possible with this same letter. | |
| 384 bool reduce_lettered_edges(EDGE_INDEX edge_index, UNICHAR_ID unichar_id, NODE_REF node, | |
| 385 EDGE_VECTOR *backward_edges, std::vector<bool> &reduced_nodes); | |
| 386 | |
| 387 /** | |
| 388 * Order num_edges of consecutive EDGE_RECORDS in the given EDGE_VECTOR in | |
| 389 * increasing order of unichar ids. This function is normally called | |
| 390 * for all edges in a single node, and since number of edges in each node | |
| 391 * is usually quite small, selection sort is used. | |
| 392 */ | |
| 393 void sort_edges(EDGE_VECTOR *edges); | |
| 394 | |
| 395 /** Eliminates any redundant edges from this node in the Trie. */ | |
| 396 void reduce_node_input(NODE_REF node, std::vector<bool> &reduced_nodes); | |
| 397 | |
| 398 // Returns the pattern unichar id for the given character class code. | |
| 399 UNICHAR_ID character_class_to_pattern(char ch); | |
| 400 | |
| 401 // Member variables | |
| 402 TRIE_NODES nodes_; // vector of nodes in the Trie | |
| 403 // Freelist of edges in the root backwards node that were previously zeroed. | |
| 404 std::vector<EDGE_INDEX> root_back_freelist_; | |
| 405 uint64_t num_edges_ = 0; // sum of all edges (forward and backward) | |
| 406 uint64_t deref_direction_mask_ = 0; // mask for EDGE_REF to extract direction | |
| 407 uint64_t deref_node_index_mask_ = 0; // mask for EDGE_REF to extract node index | |
| 408 // Variables for translating character class codes denoted in user patterns | |
| 409 // file to the unichar ids used to represent them in a Trie. | |
| 410 UNICHAR_ID alpha_pattern_ = 0; | |
| 411 UNICHAR_ID digit_pattern_ = 0; | |
| 412 UNICHAR_ID alphanum_pattern_ = 0; | |
| 413 UNICHAR_ID punc_pattern_ = 0; | |
| 414 UNICHAR_ID lower_pattern_ = 0; | |
| 415 UNICHAR_ID upper_pattern_ = 0; | |
| 416 bool initialized_patterns_ = false; | |
| 417 }; | |
| 418 | |
| 419 } // namespace tesseract | |
| 420 | |
| 421 #endif |
