Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/tesseract/src/training/pango/pango_font_info.cpp @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /********************************************************************** | |
| 2 * File: pango_font_info.cpp | |
| 3 * Description: Font-related objects and helper functions | |
| 4 * Author: Ranjith Unnikrishnan | |
| 5 * | |
| 6 * (C) Copyright 2013, Google Inc. | |
| 7 * Licensed under the Apache License, Version 2.0 (the "License"); | |
| 8 * you may not use this file except in compliance with the License. | |
| 9 * You may obtain a copy of the License at | |
| 10 * http://www.apache.org/licenses/LICENSE-2.0 | |
| 11 * Unless required by applicable law or agreed to in writing, software | |
| 12 * distributed under the License is distributed on an "AS IS" BASIS, | |
| 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 14 * See the License for the specific language governing permissions and | |
| 15 * limitations under the License. | |
| 16 * | |
| 17 **********************************************************************/ | |
| 18 | |
| 19 // Include automatically generated configuration file if running autoconf. | |
| 20 #ifdef HAVE_CONFIG_H | |
| 21 # include "config_auto.h" | |
| 22 #endif | |
| 23 | |
| 24 #if (defined __CYGWIN__) | |
| 25 // workaround for stdlib.h and putenv | |
| 26 # undef __STRICT_ANSI__ | |
| 27 #endif | |
| 28 | |
| 29 #include "commandlineflags.h" | |
| 30 #include "fileio.h" | |
| 31 #include "normstrngs.h" | |
| 32 #include "pango_font_info.h" | |
| 33 #include "tlog.h" | |
| 34 | |
| 35 #include <tesseract/unichar.h> | |
| 36 | |
| 37 #include "pango/pango.h" | |
| 38 #include "pango/pangocairo.h" | |
| 39 #include "pango/pangofc-font.h" | |
| 40 | |
| 41 #include <algorithm> | |
| 42 #include <cstdio> | |
| 43 #include <cstdlib> | |
| 44 #include <cstring> | |
| 45 | |
| 46 #ifndef _MSC_VER | |
| 47 # include <sys/param.h> | |
| 48 #endif | |
| 49 | |
| 50 #define DISABLE_HEAP_LEAK_CHECK | |
| 51 | |
| 52 using namespace tesseract; | |
| 53 | |
| 54 namespace tesseract { | |
| 55 | |
| 56 // Default assumed output resolution. Required only for providing font metrics | |
| 57 // in pixels. | |
| 58 const int kDefaultResolution = 300; | |
| 59 | |
| 60 std::string PangoFontInfo::fonts_dir_; | |
| 61 std::string PangoFontInfo::cache_dir_; | |
| 62 | |
| 63 static PangoGlyph get_glyph(PangoFont *font, gunichar wc) { | |
| 64 #if PANGO_VERSION_CHECK(1, 44, 0) | |
| 65 // pango_font_get_hb_font requires Pango 1.44 or newer. | |
| 66 hb_font_t *hb_font = pango_font_get_hb_font(font); | |
| 67 hb_codepoint_t glyph; | |
| 68 hb_font_get_nominal_glyph(hb_font, wc, &glyph); | |
| 69 #else | |
| 70 // Use deprecated pango_fc_font_get_glyph for older Pango versions. | |
| 71 PangoGlyph glyph = pango_fc_font_get_glyph(PANGO_FC_FONT(font), wc); | |
| 72 #endif | |
| 73 return glyph; | |
| 74 } | |
| 75 | |
| 76 PangoFontInfo::PangoFontInfo() : desc_(nullptr), resolution_(kDefaultResolution) { | |
| 77 Clear(); | |
| 78 } | |
| 79 | |
| 80 PangoFontInfo::PangoFontInfo(const std::string &desc) | |
| 81 : desc_(nullptr), resolution_(kDefaultResolution) { | |
| 82 if (!ParseFontDescriptionName(desc)) { | |
| 83 tprintf("ERROR: Could not parse %s\n", desc.c_str()); | |
| 84 Clear(); | |
| 85 } | |
| 86 } | |
| 87 | |
| 88 void PangoFontInfo::Clear() { | |
| 89 font_size_ = 0; | |
| 90 family_name_.clear(); | |
| 91 font_type_ = UNKNOWN; | |
| 92 if (desc_) { | |
| 93 pango_font_description_free(desc_); | |
| 94 desc_ = nullptr; | |
| 95 } | |
| 96 } | |
| 97 | |
| 98 PangoFontInfo::~PangoFontInfo() { | |
| 99 pango_font_description_free(desc_); | |
| 100 } | |
| 101 | |
| 102 std::string PangoFontInfo::DescriptionName() const { | |
| 103 if (!desc_) { | |
| 104 return ""; | |
| 105 } | |
| 106 char *desc_str = pango_font_description_to_string(desc_); | |
| 107 std::string desc_name(desc_str); | |
| 108 g_free(desc_str); | |
| 109 return desc_name; | |
| 110 } | |
| 111 | |
| 112 // If not already initialized, initializes FontConfig by setting its | |
| 113 // environment variable and creating a fonts.conf file that points to the | |
| 114 // FLAGS_fonts_dir and the cache to FLAGS_fontconfig_tmpdir. | |
| 115 /* static */ | |
| 116 void PangoFontInfo::SoftInitFontConfig() { | |
| 117 if (fonts_dir_.empty()) { | |
| 118 HardInitFontConfig(FLAGS_fonts_dir.c_str(), FLAGS_fontconfig_tmpdir.c_str()); | |
| 119 } | |
| 120 } | |
| 121 | |
| 122 // Re-initializes font config, whether or not already initialized. | |
| 123 // If already initialized, any existing cache is deleted, just to be sure. | |
| 124 /* static */ | |
| 125 void PangoFontInfo::HardInitFontConfig(const char *fonts_dir, const char *cache_dir) { | |
| 126 if (!cache_dir_.empty()) { | |
| 127 File::DeleteMatchingFiles(File::JoinPath(cache_dir_.c_str(), "*cache-?").c_str()); | |
| 128 } | |
| 129 const int MAX_FONTCONF_FILESIZE = 1024; | |
| 130 char fonts_conf_template[MAX_FONTCONF_FILESIZE]; | |
| 131 cache_dir_ = cache_dir; | |
| 132 fonts_dir_ = fonts_dir; | |
| 133 snprintf(fonts_conf_template, MAX_FONTCONF_FILESIZE, | |
| 134 "<?xml version=\"1.0\"?>\n" | |
| 135 "<!DOCTYPE fontconfig SYSTEM \"fonts.dtd\">\n" | |
| 136 "<fontconfig>\n" | |
| 137 "<dir>%s</dir>\n" | |
| 138 "<cachedir>%s</cachedir>\n" | |
| 139 "<config></config>\n" | |
| 140 "</fontconfig>\n", | |
| 141 fonts_dir, cache_dir); | |
| 142 std::string fonts_conf_file = File::JoinPath(cache_dir, "fonts.conf"); | |
| 143 File::WriteStringToFileOrDie(fonts_conf_template, fonts_conf_file); | |
| 144 #ifdef _WIN32 | |
| 145 std::string env("FONTCONFIG_PATH="); | |
| 146 env.append(cache_dir); | |
| 147 _putenv(env.c_str()); | |
| 148 _putenv("LANG=en_US.utf8"); | |
| 149 #else | |
| 150 setenv("FONTCONFIG_PATH", cache_dir, true); | |
| 151 // Fix the locale so that the reported font names are consistent. | |
| 152 setenv("LANG", "en_US.utf8", true); | |
| 153 #endif // _WIN32 | |
| 154 | |
| 155 if (FcInitReinitialize() != FcTrue) { | |
| 156 tprintf("FcInitiReinitialize failed!!\n"); | |
| 157 } | |
| 158 FontUtils::ReInit(); | |
| 159 // Clear Pango's font cache too. | |
| 160 pango_cairo_font_map_set_default(nullptr); | |
| 161 } | |
| 162 | |
| 163 static void ListFontFamilies(PangoFontFamily ***families, int *n_families) { | |
| 164 PangoFontInfo::SoftInitFontConfig(); | |
| 165 PangoFontMap *font_map = pango_cairo_font_map_get_default(); | |
| 166 DISABLE_HEAP_LEAK_CHECK; | |
| 167 pango_font_map_list_families(font_map, families, n_families); | |
| 168 } | |
| 169 | |
| 170 bool PangoFontInfo::ParseFontDescription(const PangoFontDescription *desc) { | |
| 171 Clear(); | |
| 172 const char *family = pango_font_description_get_family(desc); | |
| 173 if (!family) { | |
| 174 char *desc_str = pango_font_description_to_string(desc); | |
| 175 tprintf("WARNING: Could not parse family name from description: '%s'\n", desc_str); | |
| 176 g_free(desc_str); | |
| 177 return false; | |
| 178 } | |
| 179 family_name_ = std::string(family); | |
| 180 desc_ = pango_font_description_copy(desc); | |
| 181 | |
| 182 // Set font size in points | |
| 183 font_size_ = pango_font_description_get_size(desc); | |
| 184 if (!pango_font_description_get_size_is_absolute(desc)) { | |
| 185 font_size_ /= PANGO_SCALE; | |
| 186 } | |
| 187 | |
| 188 return true; | |
| 189 } | |
| 190 | |
| 191 bool PangoFontInfo::ParseFontDescriptionName(const std::string &name) { | |
| 192 PangoFontDescription *desc = pango_font_description_from_string(name.c_str()); | |
| 193 bool success = ParseFontDescription(desc); | |
| 194 pango_font_description_free(desc); | |
| 195 return success; | |
| 196 } | |
| 197 | |
| 198 // Returns the PangoFont structure corresponding to the closest available font | |
| 199 // in the font map. Note that if the font is wholly missing, this could | |
| 200 // correspond to a completely different font family and face. | |
| 201 PangoFont *PangoFontInfo::ToPangoFont() const { | |
| 202 SoftInitFontConfig(); | |
| 203 PangoFontMap *font_map = pango_cairo_font_map_get_default(); | |
| 204 PangoContext *context = pango_context_new(); | |
| 205 pango_cairo_context_set_resolution(context, resolution_); | |
| 206 pango_context_set_font_map(context, font_map); | |
| 207 PangoFont *font = nullptr; | |
| 208 { | |
| 209 DISABLE_HEAP_LEAK_CHECK; | |
| 210 font = pango_font_map_load_font(font_map, context, desc_); | |
| 211 } | |
| 212 g_object_unref(context); | |
| 213 return font; | |
| 214 } | |
| 215 | |
| 216 bool PangoFontInfo::CoversUTF8Text(const char *utf8_text, int byte_length) const { | |
| 217 PangoFont *font = ToPangoFont(); | |
| 218 if (font == nullptr) { | |
| 219 // Font not found. | |
| 220 return false; | |
| 221 } | |
| 222 PangoCoverage *coverage = pango_font_get_coverage(font, nullptr); | |
| 223 for (UNICHAR::const_iterator it = UNICHAR::begin(utf8_text, byte_length); | |
| 224 it != UNICHAR::end(utf8_text, byte_length); ++it) { | |
| 225 if (IsWhitespace(*it) || pango_is_zero_width(*it)) { | |
| 226 continue; | |
| 227 } | |
| 228 if (pango_coverage_get(coverage, *it) != PANGO_COVERAGE_EXACT) { | |
| 229 char tmp[5]; | |
| 230 int len = it.get_utf8(tmp); | |
| 231 tmp[len] = '\0'; | |
| 232 tlog(2, "'%s' (U+%x) not covered by font\n", tmp, *it); | |
| 233 #if PANGO_VERSION_CHECK(1, 52, 0) | |
| 234 g_object_unref(coverage); | |
| 235 #else | |
| 236 pango_coverage_unref(coverage); | |
| 237 #endif | |
| 238 g_object_unref(font); | |
| 239 return false; | |
| 240 } | |
| 241 } | |
| 242 #if PANGO_VERSION_CHECK(1, 52, 0) | |
| 243 g_object_unref(coverage); | |
| 244 #else | |
| 245 pango_coverage_unref(coverage); | |
| 246 #endif | |
| 247 g_object_unref(font); | |
| 248 return true; | |
| 249 } | |
| 250 | |
| 251 // This variant of strncpy permits src and dest to overlap. It will copy the | |
| 252 // first byte first. | |
| 253 static char *my_strnmove(char *dest, const char *src, size_t n) { | |
| 254 char *ret = dest; | |
| 255 | |
| 256 // Copy characters until n reaches zero or the src byte is a nul. | |
| 257 do { | |
| 258 *dest = *src; | |
| 259 --n; | |
| 260 ++dest; | |
| 261 ++src; | |
| 262 } while (n && src[0]); | |
| 263 | |
| 264 // If we reached a nul byte and there are more 'n' left, zero them out. | |
| 265 while (n) { | |
| 266 *dest = '\0'; | |
| 267 --n; | |
| 268 ++dest; | |
| 269 } | |
| 270 return ret; | |
| 271 } | |
| 272 | |
| 273 int PangoFontInfo::DropUncoveredChars(std::string *utf8_text) const { | |
| 274 int num_dropped_chars = 0; | |
| 275 PangoFont *font = ToPangoFont(); | |
| 276 if (font == nullptr) { | |
| 277 // Font not found, drop all characters. | |
| 278 num_dropped_chars = utf8_text->length(); | |
| 279 utf8_text->clear(); | |
| 280 return num_dropped_chars; | |
| 281 } | |
| 282 PangoCoverage *coverage = pango_font_get_coverage(font, nullptr); | |
| 283 // Maintain two iterators that point into the string. For space efficiency, we | |
| 284 // will repeatedly copy one covered UTF8 character from one to the other, and | |
| 285 // at the end resize the string to the right length. | |
| 286 char *out = const_cast<char *>(utf8_text->c_str()); | |
| 287 const UNICHAR::const_iterator it_begin = UNICHAR::begin(utf8_text->c_str(), utf8_text->length()); | |
| 288 const UNICHAR::const_iterator it_end = UNICHAR::end(utf8_text->c_str(), utf8_text->length()); | |
| 289 for (UNICHAR::const_iterator it = it_begin; it != it_end;) { | |
| 290 // Skip bad utf-8. | |
| 291 if (!it.is_legal()) { | |
| 292 ++it; // One suitable error message will still be issued. | |
| 293 continue; | |
| 294 } | |
| 295 int unicode = *it; | |
| 296 int utf8_len = it.utf8_len(); | |
| 297 const char *utf8_char = it.utf8_data(); | |
| 298 // Move it forward before the data gets modified. | |
| 299 ++it; | |
| 300 if (!IsWhitespace(unicode) && !pango_is_zero_width(unicode) && | |
| 301 pango_coverage_get(coverage, unicode) != PANGO_COVERAGE_EXACT) { | |
| 302 if (TLOG_IS_ON(2)) { | |
| 303 UNICHAR unichar(unicode); | |
| 304 char *str = unichar.utf8_str(); | |
| 305 tlog(2, "'%s' (U+%x) not covered by font\n", str, unicode); | |
| 306 delete[] str; | |
| 307 } | |
| 308 ++num_dropped_chars; | |
| 309 continue; | |
| 310 } | |
| 311 my_strnmove(out, utf8_char, utf8_len); | |
| 312 out += utf8_len; | |
| 313 } | |
| 314 #if PANGO_VERSION_CHECK(1, 52, 0) | |
| 315 g_object_unref(coverage); | |
| 316 #else | |
| 317 pango_coverage_unref(coverage); | |
| 318 #endif | |
| 319 g_object_unref(font); | |
| 320 utf8_text->resize(out - utf8_text->c_str()); | |
| 321 return num_dropped_chars; | |
| 322 } | |
| 323 | |
| 324 bool PangoFontInfo::GetSpacingProperties(const std::string &utf8_char, int *x_bearing, | |
| 325 int *x_advance) const { | |
| 326 // Convert to equivalent PangoFont structure | |
| 327 PangoFont *font = ToPangoFont(); | |
| 328 if (!font) { | |
| 329 return false; | |
| 330 } | |
| 331 // Find the glyph index in the font for the supplied utf8 character. | |
| 332 int total_advance = 0; | |
| 333 int min_bearing = 0; | |
| 334 // Handle multi-unicode strings by reporting the left-most position of the | |
| 335 // x-bearing, and right-most position of the x-advance if the string were to | |
| 336 // be rendered. | |
| 337 const UNICHAR::const_iterator it_begin = UNICHAR::begin(utf8_char.c_str(), utf8_char.length()); | |
| 338 const UNICHAR::const_iterator it_end = UNICHAR::end(utf8_char.c_str(), utf8_char.length()); | |
| 339 for (UNICHAR::const_iterator it = it_begin; it != it_end; ++it) { | |
| 340 PangoGlyph glyph_index = get_glyph(font, *it); | |
| 341 if (!glyph_index) { | |
| 342 // Glyph for given unicode character doesn't exist in font. | |
| 343 g_object_unref(font); | |
| 344 return false; | |
| 345 } | |
| 346 // Find the ink glyph extents for the glyph | |
| 347 PangoRectangle ink_rect, logical_rect; | |
| 348 pango_font_get_glyph_extents(font, glyph_index, &ink_rect, &logical_rect); | |
| 349 pango_extents_to_pixels(&ink_rect, nullptr); | |
| 350 pango_extents_to_pixels(&logical_rect, nullptr); | |
| 351 | |
| 352 int bearing = total_advance + PANGO_LBEARING(ink_rect); | |
| 353 if (it == it_begin || bearing < min_bearing) { | |
| 354 min_bearing = bearing; | |
| 355 } | |
| 356 total_advance += PANGO_RBEARING(logical_rect); | |
| 357 } | |
| 358 *x_bearing = min_bearing; | |
| 359 *x_advance = total_advance; | |
| 360 g_object_unref(font); | |
| 361 return true; | |
| 362 } | |
| 363 | |
| 364 bool PangoFontInfo::CanRenderString(const char *utf8_word, int len) const { | |
| 365 std::vector<std::string> graphemes; | |
| 366 return CanRenderString(utf8_word, len, &graphemes); | |
| 367 } | |
| 368 | |
| 369 bool PangoFontInfo::CanRenderString(const char *utf8_word, int len, | |
| 370 std::vector<std::string> *graphemes) const { | |
| 371 if (graphemes) { | |
| 372 graphemes->clear(); | |
| 373 } | |
| 374 // We check for font coverage of the text first, as otherwise Pango could | |
| 375 // (undesirably) fall back to another font that does have the required | |
| 376 // coverage. | |
| 377 if (!CoversUTF8Text(utf8_word, len)) { | |
| 378 return false; | |
| 379 } | |
| 380 // U+25CC dotted circle character that often (but not always) gets rendered | |
| 381 // when there is an illegal grapheme sequence. | |
| 382 const char32 kDottedCircleGlyph = 9676; | |
| 383 bool bad_glyph = false; | |
| 384 PangoFontMap *font_map = pango_cairo_font_map_get_default(); | |
| 385 PangoContext *context = pango_context_new(); | |
| 386 pango_context_set_font_map(context, font_map); | |
| 387 PangoLayout *layout; | |
| 388 { | |
| 389 // Pango is not releasing the cached layout. | |
| 390 DISABLE_HEAP_LEAK_CHECK; | |
| 391 layout = pango_layout_new(context); | |
| 392 } | |
| 393 if (desc_) { | |
| 394 pango_layout_set_font_description(layout, desc_); | |
| 395 } else { | |
| 396 PangoFontDescription *desc = pango_font_description_from_string(DescriptionName().c_str()); | |
| 397 pango_layout_set_font_description(layout, desc); | |
| 398 pango_font_description_free(desc); | |
| 399 } | |
| 400 pango_layout_set_text(layout, utf8_word, len); | |
| 401 PangoLayoutIter *run_iter = nullptr; | |
| 402 { // Fontconfig caches some information here that is not freed before exit. | |
| 403 DISABLE_HEAP_LEAK_CHECK; | |
| 404 run_iter = pango_layout_get_iter(layout); | |
| 405 } | |
| 406 do { | |
| 407 PangoLayoutRun *run = pango_layout_iter_get_run_readonly(run_iter); | |
| 408 if (!run) { | |
| 409 tlog(2, "Found end of line nullptr run marker\n"); | |
| 410 continue; | |
| 411 } | |
| 412 PangoGlyph dotted_circle_glyph; | |
| 413 PangoFont *font = run->item->analysis.font; | |
| 414 | |
| 415 dotted_circle_glyph = get_glyph(font, kDottedCircleGlyph); | |
| 416 | |
| 417 if (TLOG_IS_ON(2)) { | |
| 418 PangoFontDescription *desc = pango_font_describe(font); | |
| 419 char *desc_str = pango_font_description_to_string(desc); | |
| 420 tlog(2, "Desc of font in run: %s\n", desc_str); | |
| 421 g_free(desc_str); | |
| 422 pango_font_description_free(desc); | |
| 423 } | |
| 424 | |
| 425 PangoGlyphItemIter cluster_iter; | |
| 426 gboolean have_cluster; | |
| 427 for (have_cluster = pango_glyph_item_iter_init_start(&cluster_iter, run, utf8_word); | |
| 428 have_cluster && !bad_glyph; | |
| 429 have_cluster = pango_glyph_item_iter_next_cluster(&cluster_iter)) { | |
| 430 const int start_byte_index = cluster_iter.start_index; | |
| 431 const int end_byte_index = cluster_iter.end_index; | |
| 432 int start_glyph_index = cluster_iter.start_glyph; | |
| 433 int end_glyph_index = cluster_iter.end_glyph; | |
| 434 std::string cluster_text = | |
| 435 std::string(utf8_word + start_byte_index, end_byte_index - start_byte_index); | |
| 436 if (graphemes) { | |
| 437 graphemes->push_back(cluster_text); | |
| 438 } | |
| 439 if (IsUTF8Whitespace(cluster_text.c_str())) { | |
| 440 tlog(2, "Skipping whitespace\n"); | |
| 441 continue; | |
| 442 } | |
| 443 if (TLOG_IS_ON(2)) { | |
| 444 printf("start_byte=%d end_byte=%d start_glyph=%d end_glyph=%d ", start_byte_index, | |
| 445 end_byte_index, start_glyph_index, end_glyph_index); | |
| 446 } | |
| 447 for (int i = start_glyph_index, step = (end_glyph_index > start_glyph_index) ? 1 : -1; | |
| 448 !bad_glyph && i != end_glyph_index; i += step) { | |
| 449 const bool unknown_glyph = | |
| 450 (cluster_iter.glyph_item->glyphs->glyphs[i].glyph & PANGO_GLYPH_UNKNOWN_FLAG); | |
| 451 const bool illegal_glyph = | |
| 452 (cluster_iter.glyph_item->glyphs->glyphs[i].glyph == dotted_circle_glyph); | |
| 453 bad_glyph = unknown_glyph || illegal_glyph; | |
| 454 if (TLOG_IS_ON(2)) { | |
| 455 printf("(%d=%d)", cluster_iter.glyph_item->glyphs->glyphs[i].glyph, bad_glyph ? 1 : 0); | |
| 456 } | |
| 457 } | |
| 458 if (TLOG_IS_ON(2)) { | |
| 459 printf(" '%s'\n", cluster_text.c_str()); | |
| 460 } | |
| 461 if (bad_glyph) | |
| 462 tlog(1, "Found illegal glyph!\n"); | |
| 463 } | |
| 464 } while (!bad_glyph && pango_layout_iter_next_run(run_iter)); | |
| 465 | |
| 466 pango_layout_iter_free(run_iter); | |
| 467 g_object_unref(context); | |
| 468 g_object_unref(layout); | |
| 469 if (bad_glyph && graphemes) { | |
| 470 graphemes->clear(); | |
| 471 } | |
| 472 return !bad_glyph; | |
| 473 } | |
| 474 | |
| 475 // ------------------------ FontUtils ------------------------------------ | |
| 476 std::vector<std::string> FontUtils::available_fonts_; // cache list | |
| 477 | |
| 478 // Returns whether the specified font description is available in the fonts | |
| 479 // directory. | |
| 480 // | |
| 481 // The generated list of font families and faces includes "synthesized" font | |
| 482 // faces that are not truly loadable. Pango versions >=1.18 have a | |
| 483 // pango_font_face_is_synthesized method that can be used to prune the list. | |
| 484 // Until then, we are restricted to using a hack where we try to load the font | |
| 485 // from the font_map, and then check what we loaded to see if it has the | |
| 486 // description we expected. If it is not, then the font is deemed unavailable. | |
| 487 // | |
| 488 // TODO: This function reports also some not synthesized fonts as not available | |
| 489 // e.g. 'Bitstream Charter Medium Italic', 'LMRoman17', so we need this hack | |
| 490 // until other solution is found. | |
| 491 /* static */ | |
| 492 bool FontUtils::IsAvailableFont(const char *input_query_desc, std::string *best_match) { | |
| 493 std::string query_desc(input_query_desc); | |
| 494 PangoFontDescription *desc = pango_font_description_from_string(query_desc.c_str()); | |
| 495 PangoFont *selected_font = nullptr; | |
| 496 { | |
| 497 PangoFontInfo::SoftInitFontConfig(); | |
| 498 PangoFontMap *font_map = pango_cairo_font_map_get_default(); | |
| 499 PangoContext *context = pango_context_new(); | |
| 500 pango_context_set_font_map(context, font_map); | |
| 501 { | |
| 502 DISABLE_HEAP_LEAK_CHECK; | |
| 503 selected_font = pango_font_map_load_font(font_map, context, desc); | |
| 504 } | |
| 505 g_object_unref(context); | |
| 506 } | |
| 507 if (selected_font == nullptr) { | |
| 508 pango_font_description_free(desc); | |
| 509 tlog(4, "** Font '%s' failed to load from font map!\n", input_query_desc); | |
| 510 return false; | |
| 511 } | |
| 512 PangoFontDescription *selected_desc = pango_font_describe(selected_font); | |
| 513 | |
| 514 bool equal = pango_font_description_equal(desc, selected_desc); | |
| 515 tlog(3, "query weight = %d \t selected weight =%d\n", pango_font_description_get_weight(desc), | |
| 516 pango_font_description_get_weight(selected_desc)); | |
| 517 | |
| 518 char *selected_desc_str = pango_font_description_to_string(selected_desc); | |
| 519 tlog(2, "query_desc: '%s' Selected: '%s'\n", query_desc.c_str(), selected_desc_str); | |
| 520 if (!equal && best_match != nullptr) { | |
| 521 *best_match = selected_desc_str; | |
| 522 // Clip the ending ' 0' if there is one. It seems that, if there is no | |
| 523 // point size on the end of the fontname, then Pango always appends ' 0'. | |
| 524 auto len = best_match->size(); | |
| 525 if (len > 2 && best_match->at(len - 1) == '0' && best_match->at(len - 2) == ' ') { | |
| 526 best_match->resize(len - 2); | |
| 527 } | |
| 528 } | |
| 529 g_free(selected_desc_str); | |
| 530 pango_font_description_free(selected_desc); | |
| 531 g_object_unref(selected_font); | |
| 532 pango_font_description_free(desc); | |
| 533 if (!equal) | |
| 534 tlog(4, "** Font '%s' failed pango_font_description_equal!\n", input_query_desc); | |
| 535 return equal; | |
| 536 } | |
| 537 | |
| 538 static bool ShouldIgnoreFontFamilyName(const char *query) { | |
| 539 static const char *kIgnoredFamilyNames[] = {"Sans", "Serif", "Monospace", nullptr}; | |
| 540 const char **list = kIgnoredFamilyNames; | |
| 541 for (; *list != nullptr; ++list) { | |
| 542 if (!strcmp(*list, query)) { | |
| 543 return true; | |
| 544 } | |
| 545 } | |
| 546 return false; | |
| 547 } | |
| 548 | |
| 549 // Outputs description names of available fonts. | |
| 550 /* static */ | |
| 551 const std::vector<std::string> &FontUtils::ListAvailableFonts() { | |
| 552 if (!available_fonts_.empty()) { | |
| 553 return available_fonts_; | |
| 554 } | |
| 555 | |
| 556 PangoFontFamily **families = nullptr; | |
| 557 int n_families = 0; | |
| 558 ListFontFamilies(&families, &n_families); | |
| 559 for (int i = 0; i < n_families; ++i) { | |
| 560 const char *family_name = pango_font_family_get_name(families[i]); | |
| 561 tlog(2, "Listing family %s\n", family_name); | |
| 562 if (ShouldIgnoreFontFamilyName(family_name)) { | |
| 563 continue; | |
| 564 } | |
| 565 | |
| 566 int n_faces; | |
| 567 PangoFontFace **faces = nullptr; | |
| 568 pango_font_family_list_faces(families[i], &faces, &n_faces); | |
| 569 for (int j = 0; j < n_faces; ++j) { | |
| 570 PangoFontDescription *desc = pango_font_face_describe(faces[j]); | |
| 571 char *desc_str = pango_font_description_to_string(desc); | |
| 572 // "synthesized" font faces that are not truly loadable, so we skip it | |
| 573 if (!pango_font_face_is_synthesized(faces[j]) && IsAvailableFont(desc_str)) { | |
| 574 available_fonts_.emplace_back(desc_str); | |
| 575 } | |
| 576 pango_font_description_free(desc); | |
| 577 g_free(desc_str); | |
| 578 } | |
| 579 g_free(faces); | |
| 580 } | |
| 581 g_free(families); | |
| 582 std::sort(available_fonts_.begin(), available_fonts_.end()); | |
| 583 return available_fonts_; | |
| 584 } | |
| 585 | |
| 586 // Utilities written to be backward compatible with StringRender | |
| 587 | |
| 588 /* static */ | |
| 589 int FontUtils::FontScore(const std::unordered_map<char32, int64_t> &ch_map, | |
| 590 const std::string &fontname, int *raw_score, std::vector<bool> *ch_flags) { | |
| 591 PangoFontInfo font_info; | |
| 592 if (!font_info.ParseFontDescriptionName(fontname)) { | |
| 593 tprintf("ERROR: Could not parse %s\n", fontname.c_str()); | |
| 594 } | |
| 595 PangoFont *font = font_info.ToPangoFont(); | |
| 596 PangoCoverage *coverage = nullptr; | |
| 597 if (font != nullptr) { | |
| 598 coverage = pango_font_get_coverage(font, nullptr); | |
| 599 } | |
| 600 if (ch_flags) { | |
| 601 ch_flags->clear(); | |
| 602 ch_flags->reserve(ch_map.size()); | |
| 603 } | |
| 604 *raw_score = 0; | |
| 605 int ok_chars = 0; | |
| 606 for (auto &&it : ch_map) { | |
| 607 bool covered = | |
| 608 (coverage != nullptr) && (IsWhitespace(it.first) || | |
| 609 (pango_coverage_get(coverage, it.first) == PANGO_COVERAGE_EXACT)); | |
| 610 if (covered) { | |
| 611 ++(*raw_score); | |
| 612 ok_chars += it.second; | |
| 613 } | |
| 614 if (ch_flags) { | |
| 615 ch_flags->push_back(covered); | |
| 616 } | |
| 617 } | |
| 618 #if PANGO_VERSION_CHECK(1, 52, 0) | |
| 619 g_object_unref(coverage); | |
| 620 #else | |
| 621 pango_coverage_unref(coverage); | |
| 622 #endif | |
| 623 g_object_unref(font); | |
| 624 return ok_chars; | |
| 625 } | |
| 626 | |
| 627 /* static */ | |
| 628 std::string FontUtils::BestFonts(const std::unordered_map<char32, int64_t> &ch_map, | |
| 629 std::vector<std::pair<const char *, std::vector<bool>>> *fonts) { | |
| 630 const double kMinOKFraction = 0.99; | |
| 631 // Weighted fraction of characters that must be renderable in a font to make | |
| 632 // it OK even if the raw count is not good. | |
| 633 const double kMinWeightedFraction = 0.99995; | |
| 634 | |
| 635 fonts->clear(); | |
| 636 std::vector<std::vector<bool>> font_flags; | |
| 637 std::vector<int> font_scores; | |
| 638 std::vector<int> raw_scores; | |
| 639 int most_ok_chars = 0; | |
| 640 int best_raw_score = 0; | |
| 641 const std::vector<std::string> &font_names = FontUtils::ListAvailableFonts(); | |
| 642 for (const auto &font_name : font_names) { | |
| 643 std::vector<bool> ch_flags; | |
| 644 int raw_score = 0; | |
| 645 int ok_chars = FontScore(ch_map, font_name, &raw_score, &ch_flags); | |
| 646 most_ok_chars = std::max(ok_chars, most_ok_chars); | |
| 647 best_raw_score = std::max(raw_score, best_raw_score); | |
| 648 | |
| 649 font_flags.push_back(ch_flags); | |
| 650 font_scores.push_back(ok_chars); | |
| 651 raw_scores.push_back(raw_score); | |
| 652 } | |
| 653 | |
| 654 // Now select the fonts with a score above a threshold fraction | |
| 655 // of both the raw and weighted best scores. To prevent bogus fonts being | |
| 656 // selected for CJK, we require a high fraction (kMinOKFraction = 0.99) of | |
| 657 // BOTH weighted and raw scores. | |
| 658 // In low character-count scripts, the issue is more getting enough fonts, | |
| 659 // when only 1 or 2 might have all those rare dingbats etc in them, so we | |
| 660 // allow a font with a very high weighted (coverage) score | |
| 661 // (kMinWeightedFraction = 0.99995) to be used even if its raw score is poor. | |
| 662 int least_good_enough = static_cast<int>(most_ok_chars * kMinOKFraction); | |
| 663 int least_raw_enough = static_cast<int>(best_raw_score * kMinOKFraction); | |
| 664 int override_enough = static_cast<int>(most_ok_chars * kMinWeightedFraction); | |
| 665 | |
| 666 std::string font_list; | |
| 667 for (unsigned i = 0; i < font_names.size(); ++i) { | |
| 668 int score = font_scores[i]; | |
| 669 int raw_score = raw_scores[i]; | |
| 670 if ((score >= least_good_enough && raw_score >= least_raw_enough) || score >= override_enough) { | |
| 671 fonts->push_back(std::make_pair(font_names[i].c_str(), font_flags[i])); | |
| 672 tlog(1, "OK font %s = %.4f%%, raw = %d = %.2f%%\n", font_names[i].c_str(), | |
| 673 100.0 * score / most_ok_chars, raw_score, 100.0 * raw_score / best_raw_score); | |
| 674 font_list += font_names[i]; | |
| 675 font_list += "\n"; | |
| 676 } else if (score >= least_good_enough || raw_score >= least_raw_enough) { | |
| 677 tlog(1, "Runner-up font %s = %.4f%%, raw = %d = %.2f%%\n", font_names[i].c_str(), | |
| 678 100.0 * score / most_ok_chars, raw_score, 100.0 * raw_score / best_raw_score); | |
| 679 } | |
| 680 } | |
| 681 return font_list; | |
| 682 } | |
| 683 | |
| 684 /* static */ | |
| 685 bool FontUtils::SelectFont(const char *utf8_word, const int utf8_len, std::string *font_name, | |
| 686 std::vector<std::string> *graphemes) { | |
| 687 return SelectFont(utf8_word, utf8_len, ListAvailableFonts(), font_name, graphemes); | |
| 688 } | |
| 689 | |
| 690 /* static */ | |
| 691 bool FontUtils::SelectFont(const char *utf8_word, const int utf8_len, | |
| 692 const std::vector<std::string> &all_fonts, std::string *font_name, | |
| 693 std::vector<std::string> *graphemes) { | |
| 694 if (font_name) { | |
| 695 font_name->clear(); | |
| 696 } | |
| 697 if (graphemes) { | |
| 698 graphemes->clear(); | |
| 699 } | |
| 700 for (const auto &all_font : all_fonts) { | |
| 701 PangoFontInfo font; | |
| 702 std::vector<std::string> found_graphemes; | |
| 703 ASSERT_HOST_MSG(font.ParseFontDescriptionName(all_font), "Could not parse font desc name %s\n", | |
| 704 all_font.c_str()); | |
| 705 if (font.CanRenderString(utf8_word, utf8_len, &found_graphemes)) { | |
| 706 if (graphemes) { | |
| 707 graphemes->swap(found_graphemes); | |
| 708 } | |
| 709 if (font_name) { | |
| 710 *font_name = all_font; | |
| 711 } | |
| 712 return true; | |
| 713 } | |
| 714 } | |
| 715 return false; | |
| 716 } | |
| 717 | |
| 718 // PangoFontInfo is reinitialized, so clear the static list of fonts. | |
| 719 /* static */ | |
| 720 void FontUtils::ReInit() { | |
| 721 available_fonts_.clear(); | |
| 722 } | |
| 723 | |
| 724 // Print info about used font backend | |
| 725 /* static */ | |
| 726 void FontUtils::PangoFontTypeInfo() { | |
| 727 PangoFontMap *font_map = pango_cairo_font_map_get_default(); | |
| 728 if (pango_cairo_font_map_get_font_type(reinterpret_cast<PangoCairoFontMap *>(font_map)) == | |
| 729 CAIRO_FONT_TYPE_TOY) { | |
| 730 printf("Using CAIRO_FONT_TYPE_TOY.\n"); | |
| 731 } else if (pango_cairo_font_map_get_font_type(reinterpret_cast<PangoCairoFontMap *>(font_map)) == | |
| 732 CAIRO_FONT_TYPE_FT) { | |
| 733 printf("Using CAIRO_FONT_TYPE_FT.\n"); | |
| 734 } else if (pango_cairo_font_map_get_font_type(reinterpret_cast<PangoCairoFontMap *>(font_map)) == | |
| 735 CAIRO_FONT_TYPE_WIN32) { | |
| 736 printf("Using CAIRO_FONT_TYPE_WIN32.\n"); | |
| 737 } else if (pango_cairo_font_map_get_font_type(reinterpret_cast<PangoCairoFontMap *>(font_map)) == | |
| 738 CAIRO_FONT_TYPE_QUARTZ) { | |
| 739 printf("Using CAIRO_FONT_TYPE_QUARTZ.\n"); | |
| 740 } else if (pango_cairo_font_map_get_font_type(reinterpret_cast<PangoCairoFontMap *>(font_map)) == | |
| 741 CAIRO_FONT_TYPE_USER) { | |
| 742 printf("Using CAIRO_FONT_TYPE_USER.\n"); | |
| 743 } else if (!font_map) { | |
| 744 printf("Cannot create pango cairo font map!\n"); | |
| 745 } | |
| 746 } | |
| 747 | |
| 748 } // namespace tesseract |
