Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/tesseract/src/training/pango/ligature_table.h @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /********************************************************************** | |
| 2 * File: ligature_table.h | |
| 3 * Description: Class for adding and removing optional latin ligatures, | |
| 4 * conditional on codepoint support by a specified font | |
| 5 * (if specified). | |
| 6 * Author: Ranjith Unnikrishnan | |
| 7 * Created: Mon Nov 18 2013 | |
| 8 * | |
| 9 * (C) Copyright 2013, Google Inc. | |
| 10 * Licensed under the Apache License, Version 2.0 (the "License"); | |
| 11 * you may not use this file except in compliance with the License. | |
| 12 * You may obtain a copy of the License at | |
| 13 * http://www.apache.org/licenses/LICENSE-2.0 | |
| 14 * Unless required by applicable law or agreed to in writing, software | |
| 15 * distributed under the License is distributed on an "AS IS" BASIS, | |
| 16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 17 * See the License for the specific language governing permissions and | |
| 18 * limitations under the License. | |
| 19 * | |
| 20 **********************************************************************/ | |
| 21 | |
| 22 #ifndef TRAININGDATA_LIGATURE_TABLE_H_ | |
| 23 #define TRAININGDATA_LIGATURE_TABLE_H_ | |
| 24 | |
| 25 #include "export.h" | |
| 26 | |
| 27 #include <memory> | |
| 28 #include <string> | |
| 29 #include <unordered_map> | |
| 30 | |
| 31 namespace tesseract { | |
| 32 | |
| 33 class PangoFontInfo; // defined in pango_font_info.h | |
| 34 | |
| 35 // Map to substitute strings for ligatures. | |
| 36 using LigHash = std::unordered_map<std::string, std::string>; | |
| 37 | |
| 38 class TESS_PANGO_TRAINING_API LigatureTable { | |
| 39 public: | |
| 40 // Get a static instance of this class. | |
| 41 static LigatureTable *Get(); | |
| 42 | |
| 43 // Convert the utf8 string so that ligaturizable sequences, such as "fi" get | |
| 44 // replaced by the (utf8 code for) appropriate ligature characters. Only do so | |
| 45 // if the corresponding ligature character is renderable in the current font. | |
| 46 std::string AddLigatures(const std::string &str, const PangoFontInfo *font) const; | |
| 47 // Remove all ligatures. | |
| 48 std::string RemoveLigatures(const std::string &str) const; | |
| 49 // Remove only custom ligatures (eg. "ct") encoded in the private-use-area. | |
| 50 std::string RemoveCustomLigatures(const std::string &str) const; | |
| 51 | |
| 52 const LigHash &norm_to_lig_table() const { | |
| 53 return norm_to_lig_table_; | |
| 54 } | |
| 55 const LigHash &lig_to_norm_table() const { | |
| 56 return lig_to_norm_table_; | |
| 57 } | |
| 58 | |
| 59 protected: | |
| 60 LigatureTable(); | |
| 61 // Initialize the hash tables mapping between ligature strings and the | |
| 62 // corresponding ligature characters. | |
| 63 void Init(); | |
| 64 | |
| 65 static std::unique_ptr<LigatureTable> instance_; | |
| 66 LigHash norm_to_lig_table_; | |
| 67 LigHash lig_to_norm_table_; | |
| 68 int min_lig_length_; | |
| 69 int max_lig_length_; | |
| 70 int min_norm_length_; | |
| 71 int max_norm_length_; | |
| 72 | |
| 73 private: | |
| 74 LigatureTable(const LigatureTable &) = delete; | |
| 75 void operator=(const LigatureTable &) = delete; | |
| 76 }; | |
| 77 | |
| 78 } // namespace tesseract | |
| 79 | |
| 80 #endif // OCR_TRAININGDATA_TYPESETTING_LIGATURE_TABLE_H_ |
