Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/tesseract/src/training/pango/pango_font_info.h @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /********************************************************************** | |
| 2 * File: pango_font_info.h | |
| 3 * Description: Font-related objects and helper functions | |
| 4 * Author: Ranjith Unnikrishnan | |
| 5 * Created: Mon Nov 18 2013 | |
| 6 * | |
| 7 * (C) Copyright 2013, Google Inc. | |
| 8 * Licensed under the Apache License, Version 2.0 (the "License"); | |
| 9 * you may not use this file except in compliance with the License. | |
| 10 * You may obtain a copy of the License at | |
| 11 * http://www.apache.org/licenses/LICENSE-2.0 | |
| 12 * Unless required by applicable law or agreed to in writing, software | |
| 13 * distributed under the License is distributed on an "AS IS" BASIS, | |
| 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 15 * See the License for the specific language governing permissions and | |
| 16 * limitations under the License. | |
| 17 * | |
| 18 **********************************************************************/ | |
| 19 | |
| 20 #ifndef TESSERACT_TRAINING_PANGO_FONT_INFO_H_ | |
| 21 #define TESSERACT_TRAINING_PANGO_FONT_INFO_H_ | |
| 22 | |
| 23 #include "export.h" | |
| 24 | |
| 25 #include "commandlineflags.h" | |
| 26 | |
| 27 #include "pango/pango-font.h" | |
| 28 #include "pango/pango.h" | |
| 29 #include "pango/pangocairo.h" | |
| 30 | |
| 31 #include <string> | |
| 32 #include <unordered_map> | |
| 33 #include <utility> | |
| 34 #include <vector> | |
| 35 | |
| 36 using char32 = signed int; | |
| 37 | |
| 38 namespace tesseract { | |
| 39 | |
| 40 // Data holder class for a font, intended to avoid having to work with Pango or | |
| 41 // FontConfig-specific objects directly. | |
| 42 class TESS_PANGO_TRAINING_API PangoFontInfo { | |
| 43 public: | |
| 44 enum FontTypeEnum { | |
| 45 UNKNOWN, | |
| 46 SERIF, | |
| 47 SANS_SERIF, | |
| 48 DECORATIVE, | |
| 49 }; | |
| 50 PangoFontInfo(); | |
| 51 ~PangoFontInfo(); | |
| 52 // Initialize from parsing a font description name, defined as a string of the | |
| 53 // format: | |
| 54 // "FamilyName [FaceName] [PointSize]" | |
| 55 // where a missing FaceName implies the default regular face. | |
| 56 // eg. "Arial Italic 12", "Verdana" | |
| 57 // | |
| 58 // FaceName is a combination of: | |
| 59 // [StyleName] [Variant] [Weight] [Stretch] | |
| 60 // with (all optional) Pango-defined values of: | |
| 61 // StyleName: Oblique, Italic | |
| 62 // Variant : Small-Caps | |
| 63 // Weight : Ultra-Light, Light, Medium, Semi-Bold, Bold, Ultra-Bold, Heavy | |
| 64 // Stretch : Ultra-Condensed, Extra-Condensed, Condensed, Semi-Condensed, | |
| 65 // Semi-Expanded, Expanded, Extra-Expanded, Ultra-Expanded. | |
| 66 explicit PangoFontInfo(const std::string &name); | |
| 67 bool ParseFontDescriptionName(const std::string &name); | |
| 68 | |
| 69 // Returns true if the font have codepoint coverage for the specified text. | |
| 70 bool CoversUTF8Text(const char *utf8_text, int byte_length) const; | |
| 71 // Modifies string to remove unicode points that are not covered by the | |
| 72 // font. Returns the number of characters dropped. | |
| 73 int DropUncoveredChars(std::string *utf8_text) const; | |
| 74 | |
| 75 // Returns true if the entire string can be rendered by the font with full | |
| 76 // character coverage and no unknown glyph or dotted-circle glyph | |
| 77 // substitutions on encountering a badly formed unicode sequence. | |
| 78 // If true, returns individual graphemes. Any whitespace characters in the | |
| 79 // original string are also included in the list. | |
| 80 bool CanRenderString(const char *utf8_word, int len, std::vector<std::string> *graphemes) const; | |
| 81 bool CanRenderString(const char *utf8_word, int len) const; | |
| 82 | |
| 83 // Retrieves the x_bearing and x_advance for the given utf8 character in the | |
| 84 // font. Returns false if the glyph for the character could not be found in | |
| 85 // the font. | |
| 86 // Ref: http://freetype.sourceforge.net/freetype2/docs/glyphs/glyphs-3.html | |
| 87 bool GetSpacingProperties(const std::string &utf8_char, int *x_bearing, int *x_advance) const; | |
| 88 | |
| 89 // If not already initialized, initializes FontConfig by setting its | |
| 90 // environment variable and creating a fonts.conf file that points to the | |
| 91 // FLAGS_fonts_dir and the cache to FLAGS_fontconfig_tmpdir. | |
| 92 static void SoftInitFontConfig(); | |
| 93 // Re-initializes font config, whether or not already initialized. | |
| 94 // If already initialized, any existing cache is deleted, just to be sure. | |
| 95 static void HardInitFontConfig(const char *fonts_dir, const char *cache_dir); | |
| 96 | |
| 97 // Accessors | |
| 98 std::string DescriptionName() const; | |
| 99 // Font Family name eg. "Arial" | |
| 100 const std::string &family_name() const { | |
| 101 return family_name_; | |
| 102 } | |
| 103 // Size in points (1/72"), rounded to the nearest integer. | |
| 104 int font_size() const { | |
| 105 return font_size_; | |
| 106 } | |
| 107 FontTypeEnum font_type() const { | |
| 108 return font_type_; | |
| 109 } | |
| 110 | |
| 111 int resolution() const { | |
| 112 return resolution_; | |
| 113 } | |
| 114 void set_resolution(const int resolution) { | |
| 115 resolution_ = resolution; | |
| 116 } | |
| 117 | |
| 118 private: | |
| 119 friend class FontUtils; | |
| 120 void Clear(); | |
| 121 bool ParseFontDescription(const PangoFontDescription *desc); | |
| 122 // Returns the PangoFont structure corresponding to the closest available font | |
| 123 // in the font map. | |
| 124 PangoFont *ToPangoFont() const; | |
| 125 | |
| 126 // Font properties set automatically from parsing the font description name. | |
| 127 std::string family_name_; | |
| 128 int font_size_; | |
| 129 FontTypeEnum font_type_; | |
| 130 // The Pango description that was used to initialize the instance. | |
| 131 PangoFontDescription *desc_; | |
| 132 // Default output resolution to assume for GetSpacingProperties() and any | |
| 133 // other methods that returns pixel values. | |
| 134 int resolution_; | |
| 135 // Fontconfig operates through an environment variable, so it intrinsically | |
| 136 // cannot be thread-friendly, but you can serialize multiple independent | |
| 137 // font configurations by calling HardInitFontConfig(fonts_dir, cache_dir). | |
| 138 // These hold the last initialized values set by HardInitFontConfig or | |
| 139 // the first call to SoftInitFontConfig. | |
| 140 // Directory to be scanned for font files. | |
| 141 static std::string fonts_dir_; | |
| 142 // Directory to store the cache of font information. (Can be the same as | |
| 143 // fonts_dir_) | |
| 144 static std::string cache_dir_; | |
| 145 | |
| 146 private: | |
| 147 PangoFontInfo(const PangoFontInfo &) = delete; | |
| 148 void operator=(const PangoFontInfo &) = delete; | |
| 149 }; | |
| 150 | |
| 151 // Static utility methods for querying font availability and font-selection | |
| 152 // based on codepoint coverage. | |
| 153 class TESS_PANGO_TRAINING_API FontUtils { | |
| 154 public: | |
| 155 // Returns true if the font of the given description name is available in the | |
| 156 // target directory specified by --fonts_dir | |
| 157 static bool IsAvailableFont(const char *font_desc) { | |
| 158 return IsAvailableFont(font_desc, nullptr); | |
| 159 } | |
| 160 // Returns true if the font of the given description name is available in the | |
| 161 // target directory specified by --fonts_dir. If false is returned, and | |
| 162 // best_match is not nullptr, the closest matching font is returned there. | |
| 163 static bool IsAvailableFont(const char *font_desc, std::string *best_match); | |
| 164 // Outputs description names of available fonts. | |
| 165 static const std::vector<std::string> &ListAvailableFonts(); | |
| 166 | |
| 167 // Picks font among available fonts that covers and can render the given word, | |
| 168 // and returns the font description name and the decomposition of the word to | |
| 169 // graphemes. Returns false if no suitable font was found. | |
| 170 static bool SelectFont(const char *utf8_word, const int utf8_len, std::string *font_name, | |
| 171 std::vector<std::string> *graphemes); | |
| 172 | |
| 173 // Picks font among all_fonts that covers and can render the given word, | |
| 174 // and returns the font description name and the decomposition of the word to | |
| 175 // graphemes. Returns false if no suitable font was found. | |
| 176 static bool SelectFont(const char *utf8_word, const int utf8_len, | |
| 177 const std::vector<std::string> &all_fonts, std::string *font_name, | |
| 178 std::vector<std::string> *graphemes); | |
| 179 | |
| 180 // NOTE: The following utilities were written to be backward compatible with | |
| 181 // StringRender. | |
| 182 | |
| 183 // BestFonts returns a font name and a bit vector of the characters it | |
| 184 // can render for the fonts that score within some fraction of the best | |
| 185 // font on the characters in the given hash map. | |
| 186 // In the flags vector, each flag is set according to whether the | |
| 187 // corresponding character (in order of iterating ch_map) can be rendered. | |
| 188 // The return string is a list of the acceptable fonts that were used. | |
| 189 static std::string BestFonts(const std::unordered_map<char32, int64_t> &ch_map, | |
| 190 std::vector<std::pair<const char *, std::vector<bool>>> *font_flag); | |
| 191 | |
| 192 // FontScore returns the weighted renderability score of the given | |
| 193 // hash map character table in the given font. The unweighted score | |
| 194 // is also returned in raw_score. | |
| 195 // The values in the bool vector ch_flags correspond to whether the | |
| 196 // corresponding character (in order of iterating ch_map) can be rendered. | |
| 197 static int FontScore(const std::unordered_map<char32, int64_t> &ch_map, | |
| 198 const std::string &fontname, int *raw_score, std::vector<bool> *ch_flags); | |
| 199 | |
| 200 // PangoFontInfo is reinitialized, so clear the static list of fonts. | |
| 201 static void ReInit(); | |
| 202 static void PangoFontTypeInfo(); | |
| 203 | |
| 204 private: | |
| 205 static std::vector<std::string> available_fonts_; // cache list | |
| 206 }; | |
| 207 } // namespace tesseract | |
| 208 | |
| 209 #endif // TESSERACT_TRAINING_PANGO_FONT_INFO_H_ |
