diff mupdf-source/thirdparty/tesseract/src/training/pango/pango_font_info.cpp @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mupdf-source/thirdparty/tesseract/src/training/pango/pango_font_info.cpp	Mon Sep 15 11:43:07 2025 +0200
@@ -0,0 +1,748 @@
+/**********************************************************************
+ * File:        pango_font_info.cpp
+ * Description: Font-related objects and helper functions
+ * Author:      Ranjith Unnikrishnan
+ *
+ * (C) Copyright 2013, Google Inc.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ **********************************************************************/
+
+// Include automatically generated configuration file if running autoconf.
+#ifdef HAVE_CONFIG_H
+#  include "config_auto.h"
+#endif
+
+#if (defined __CYGWIN__)
+// workaround for stdlib.h and putenv
+#  undef __STRICT_ANSI__
+#endif
+
+#include "commandlineflags.h"
+#include "fileio.h"
+#include "normstrngs.h"
+#include "pango_font_info.h"
+#include "tlog.h"
+
+#include <tesseract/unichar.h>
+
+#include "pango/pango.h"
+#include "pango/pangocairo.h"
+#include "pango/pangofc-font.h"
+
+#include <algorithm>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+
+#ifndef _MSC_VER
+#  include <sys/param.h>
+#endif
+
+#define DISABLE_HEAP_LEAK_CHECK
+
+using namespace tesseract;
+
+namespace tesseract {
+
+// Default assumed output resolution. Required only for providing font metrics
+// in pixels.
+const int kDefaultResolution = 300;
+
+std::string PangoFontInfo::fonts_dir_;
+std::string PangoFontInfo::cache_dir_;
+
+static PangoGlyph get_glyph(PangoFont *font, gunichar wc) {
+#if PANGO_VERSION_CHECK(1, 44, 0)
+  // pango_font_get_hb_font requires Pango 1.44 or newer.
+  hb_font_t *hb_font = pango_font_get_hb_font(font);
+  hb_codepoint_t glyph;
+  hb_font_get_nominal_glyph(hb_font, wc, &glyph);
+#else
+  // Use deprecated pango_fc_font_get_glyph for older Pango versions.
+  PangoGlyph glyph = pango_fc_font_get_glyph(PANGO_FC_FONT(font), wc);
+#endif
+  return glyph;
+}
+
+PangoFontInfo::PangoFontInfo() : desc_(nullptr), resolution_(kDefaultResolution) {
+  Clear();
+}
+
+PangoFontInfo::PangoFontInfo(const std::string &desc)
+    : desc_(nullptr), resolution_(kDefaultResolution) {
+  if (!ParseFontDescriptionName(desc)) {
+    tprintf("ERROR: Could not parse %s\n", desc.c_str());
+    Clear();
+  }
+}
+
+void PangoFontInfo::Clear() {
+  font_size_ = 0;
+  family_name_.clear();
+  font_type_ = UNKNOWN;
+  if (desc_) {
+    pango_font_description_free(desc_);
+    desc_ = nullptr;
+  }
+}
+
+PangoFontInfo::~PangoFontInfo() {
+  pango_font_description_free(desc_);
+}
+
+std::string PangoFontInfo::DescriptionName() const {
+  if (!desc_) {
+    return "";
+  }
+  char *desc_str = pango_font_description_to_string(desc_);
+  std::string desc_name(desc_str);
+  g_free(desc_str);
+  return desc_name;
+}
+
+// If not already initialized, initializes FontConfig by setting its
+// environment variable and creating a fonts.conf file that points to the
+// FLAGS_fonts_dir and the cache to FLAGS_fontconfig_tmpdir.
+/* static */
+void PangoFontInfo::SoftInitFontConfig() {
+  if (fonts_dir_.empty()) {
+    HardInitFontConfig(FLAGS_fonts_dir.c_str(), FLAGS_fontconfig_tmpdir.c_str());
+  }
+}
+
+// Re-initializes font config, whether or not already initialized.
+// If already initialized, any existing cache is deleted, just to be sure.
+/* static */
+void PangoFontInfo::HardInitFontConfig(const char *fonts_dir, const char *cache_dir) {
+  if (!cache_dir_.empty()) {
+    File::DeleteMatchingFiles(File::JoinPath(cache_dir_.c_str(), "*cache-?").c_str());
+  }
+  const int MAX_FONTCONF_FILESIZE = 1024;
+  char fonts_conf_template[MAX_FONTCONF_FILESIZE];
+  cache_dir_ = cache_dir;
+  fonts_dir_ = fonts_dir;
+  snprintf(fonts_conf_template, MAX_FONTCONF_FILESIZE,
+           "<?xml version=\"1.0\"?>\n"
+           "<!DOCTYPE fontconfig SYSTEM \"fonts.dtd\">\n"
+           "<fontconfig>\n"
+           "<dir>%s</dir>\n"
+           "<cachedir>%s</cachedir>\n"
+           "<config></config>\n"
+           "</fontconfig>\n",
+           fonts_dir, cache_dir);
+  std::string fonts_conf_file = File::JoinPath(cache_dir, "fonts.conf");
+  File::WriteStringToFileOrDie(fonts_conf_template, fonts_conf_file);
+#ifdef _WIN32
+  std::string env("FONTCONFIG_PATH=");
+  env.append(cache_dir);
+  _putenv(env.c_str());
+  _putenv("LANG=en_US.utf8");
+#else
+  setenv("FONTCONFIG_PATH", cache_dir, true);
+  // Fix the locale so that the reported font names are consistent.
+  setenv("LANG", "en_US.utf8", true);
+#endif // _WIN32
+
+  if (FcInitReinitialize() != FcTrue) {
+    tprintf("FcInitiReinitialize failed!!\n");
+  }
+  FontUtils::ReInit();
+  // Clear Pango's font cache too.
+  pango_cairo_font_map_set_default(nullptr);
+}
+
+static void ListFontFamilies(PangoFontFamily ***families, int *n_families) {
+  PangoFontInfo::SoftInitFontConfig();
+  PangoFontMap *font_map = pango_cairo_font_map_get_default();
+  DISABLE_HEAP_LEAK_CHECK;
+  pango_font_map_list_families(font_map, families, n_families);
+}
+
+bool PangoFontInfo::ParseFontDescription(const PangoFontDescription *desc) {
+  Clear();
+  const char *family = pango_font_description_get_family(desc);
+  if (!family) {
+    char *desc_str = pango_font_description_to_string(desc);
+    tprintf("WARNING: Could not parse family name from description: '%s'\n", desc_str);
+    g_free(desc_str);
+    return false;
+  }
+  family_name_ = std::string(family);
+  desc_ = pango_font_description_copy(desc);
+
+  // Set font size in points
+  font_size_ = pango_font_description_get_size(desc);
+  if (!pango_font_description_get_size_is_absolute(desc)) {
+    font_size_ /= PANGO_SCALE;
+  }
+
+  return true;
+}
+
+bool PangoFontInfo::ParseFontDescriptionName(const std::string &name) {
+  PangoFontDescription *desc = pango_font_description_from_string(name.c_str());
+  bool success = ParseFontDescription(desc);
+  pango_font_description_free(desc);
+  return success;
+}
+
+// Returns the PangoFont structure corresponding to the closest available font
+// in the font map. Note that if the font is wholly missing, this could
+// correspond to a completely different font family and face.
+PangoFont *PangoFontInfo::ToPangoFont() const {
+  SoftInitFontConfig();
+  PangoFontMap *font_map = pango_cairo_font_map_get_default();
+  PangoContext *context = pango_context_new();
+  pango_cairo_context_set_resolution(context, resolution_);
+  pango_context_set_font_map(context, font_map);
+  PangoFont *font = nullptr;
+  {
+    DISABLE_HEAP_LEAK_CHECK;
+    font = pango_font_map_load_font(font_map, context, desc_);
+  }
+  g_object_unref(context);
+  return font;
+}
+
+bool PangoFontInfo::CoversUTF8Text(const char *utf8_text, int byte_length) const {
+  PangoFont *font = ToPangoFont();
+  if (font == nullptr) {
+    // Font not found.
+    return false;
+  }
+  PangoCoverage *coverage = pango_font_get_coverage(font, nullptr);
+  for (UNICHAR::const_iterator it = UNICHAR::begin(utf8_text, byte_length);
+       it != UNICHAR::end(utf8_text, byte_length); ++it) {
+    if (IsWhitespace(*it) || pango_is_zero_width(*it)) {
+      continue;
+    }
+    if (pango_coverage_get(coverage, *it) != PANGO_COVERAGE_EXACT) {
+      char tmp[5];
+      int len = it.get_utf8(tmp);
+      tmp[len] = '\0';
+      tlog(2, "'%s' (U+%x) not covered by font\n", tmp, *it);
+#if PANGO_VERSION_CHECK(1, 52, 0)
+      g_object_unref(coverage);
+#else
+      pango_coverage_unref(coverage);
+#endif
+      g_object_unref(font);
+      return false;
+    }
+  }
+#if PANGO_VERSION_CHECK(1, 52, 0)
+  g_object_unref(coverage);
+#else
+  pango_coverage_unref(coverage);
+#endif
+  g_object_unref(font);
+  return true;
+}
+
+// This variant of strncpy permits src and dest to overlap. It will copy the
+// first byte first.
+static char *my_strnmove(char *dest, const char *src, size_t n) {
+  char *ret = dest;
+
+  // Copy characters until n reaches zero or the src byte is a nul.
+  do {
+    *dest = *src;
+    --n;
+    ++dest;
+    ++src;
+  } while (n && src[0]);
+
+  // If we reached a nul byte and there are more 'n' left, zero them out.
+  while (n) {
+    *dest = '\0';
+    --n;
+    ++dest;
+  }
+  return ret;
+}
+
+int PangoFontInfo::DropUncoveredChars(std::string *utf8_text) const {
+  int num_dropped_chars = 0;
+  PangoFont *font = ToPangoFont();
+  if (font == nullptr) {
+    // Font not found, drop all characters.
+    num_dropped_chars = utf8_text->length();
+    utf8_text->clear();
+    return num_dropped_chars;
+  }
+  PangoCoverage *coverage = pango_font_get_coverage(font, nullptr);
+  // Maintain two iterators that point into the string. For space efficiency, we
+  // will repeatedly copy one covered UTF8 character from one to the other, and
+  // at the end resize the string to the right length.
+  char *out = const_cast<char *>(utf8_text->c_str());
+  const UNICHAR::const_iterator it_begin = UNICHAR::begin(utf8_text->c_str(), utf8_text->length());
+  const UNICHAR::const_iterator it_end = UNICHAR::end(utf8_text->c_str(), utf8_text->length());
+  for (UNICHAR::const_iterator it = it_begin; it != it_end;) {
+    // Skip bad utf-8.
+    if (!it.is_legal()) {
+      ++it; // One suitable error message will still be issued.
+      continue;
+    }
+    int unicode = *it;
+    int utf8_len = it.utf8_len();
+    const char *utf8_char = it.utf8_data();
+    // Move it forward before the data gets modified.
+    ++it;
+    if (!IsWhitespace(unicode) && !pango_is_zero_width(unicode) &&
+        pango_coverage_get(coverage, unicode) != PANGO_COVERAGE_EXACT) {
+      if (TLOG_IS_ON(2)) {
+        UNICHAR unichar(unicode);
+        char *str = unichar.utf8_str();
+        tlog(2, "'%s' (U+%x) not covered by font\n", str, unicode);
+        delete[] str;
+      }
+      ++num_dropped_chars;
+      continue;
+    }
+    my_strnmove(out, utf8_char, utf8_len);
+    out += utf8_len;
+  }
+#if PANGO_VERSION_CHECK(1, 52, 0)
+  g_object_unref(coverage);
+#else
+  pango_coverage_unref(coverage);
+#endif
+  g_object_unref(font);
+  utf8_text->resize(out - utf8_text->c_str());
+  return num_dropped_chars;
+}
+
+bool PangoFontInfo::GetSpacingProperties(const std::string &utf8_char, int *x_bearing,
+                                         int *x_advance) const {
+  // Convert to equivalent PangoFont structure
+  PangoFont *font = ToPangoFont();
+  if (!font) {
+    return false;
+  }
+  // Find the glyph index in the font for the supplied utf8 character.
+  int total_advance = 0;
+  int min_bearing = 0;
+  // Handle multi-unicode strings by reporting the left-most position of the
+  // x-bearing, and right-most position of the x-advance if the string were to
+  // be rendered.
+  const UNICHAR::const_iterator it_begin = UNICHAR::begin(utf8_char.c_str(), utf8_char.length());
+  const UNICHAR::const_iterator it_end = UNICHAR::end(utf8_char.c_str(), utf8_char.length());
+  for (UNICHAR::const_iterator it = it_begin; it != it_end; ++it) {
+    PangoGlyph glyph_index = get_glyph(font, *it);
+    if (!glyph_index) {
+      // Glyph for given unicode character doesn't exist in font.
+      g_object_unref(font);
+      return false;
+    }
+    // Find the ink glyph extents for the glyph
+    PangoRectangle ink_rect, logical_rect;
+    pango_font_get_glyph_extents(font, glyph_index, &ink_rect, &logical_rect);
+    pango_extents_to_pixels(&ink_rect, nullptr);
+    pango_extents_to_pixels(&logical_rect, nullptr);
+
+    int bearing = total_advance + PANGO_LBEARING(ink_rect);
+    if (it == it_begin || bearing < min_bearing) {
+      min_bearing = bearing;
+    }
+    total_advance += PANGO_RBEARING(logical_rect);
+  }
+  *x_bearing = min_bearing;
+  *x_advance = total_advance;
+  g_object_unref(font);
+  return true;
+}
+
+bool PangoFontInfo::CanRenderString(const char *utf8_word, int len) const {
+  std::vector<std::string> graphemes;
+  return CanRenderString(utf8_word, len, &graphemes);
+}
+
+bool PangoFontInfo::CanRenderString(const char *utf8_word, int len,
+                                    std::vector<std::string> *graphemes) const {
+  if (graphemes) {
+    graphemes->clear();
+  }
+  // We check for font coverage of the text first, as otherwise Pango could
+  // (undesirably) fall back to another font that does have the required
+  // coverage.
+  if (!CoversUTF8Text(utf8_word, len)) {
+    return false;
+  }
+  // U+25CC dotted circle character that often (but not always) gets rendered
+  // when there is an illegal grapheme sequence.
+  const char32 kDottedCircleGlyph = 9676;
+  bool bad_glyph = false;
+  PangoFontMap *font_map = pango_cairo_font_map_get_default();
+  PangoContext *context = pango_context_new();
+  pango_context_set_font_map(context, font_map);
+  PangoLayout *layout;
+  {
+    // Pango is not releasing the cached layout.
+    DISABLE_HEAP_LEAK_CHECK;
+    layout = pango_layout_new(context);
+  }
+  if (desc_) {
+    pango_layout_set_font_description(layout, desc_);
+  } else {
+    PangoFontDescription *desc = pango_font_description_from_string(DescriptionName().c_str());
+    pango_layout_set_font_description(layout, desc);
+    pango_font_description_free(desc);
+  }
+  pango_layout_set_text(layout, utf8_word, len);
+  PangoLayoutIter *run_iter = nullptr;
+  { // Fontconfig caches some information here that is not freed before exit.
+    DISABLE_HEAP_LEAK_CHECK;
+    run_iter = pango_layout_get_iter(layout);
+  }
+  do {
+    PangoLayoutRun *run = pango_layout_iter_get_run_readonly(run_iter);
+    if (!run) {
+      tlog(2, "Found end of line nullptr run marker\n");
+      continue;
+    }
+    PangoGlyph dotted_circle_glyph;
+    PangoFont *font = run->item->analysis.font;
+
+    dotted_circle_glyph = get_glyph(font, kDottedCircleGlyph);
+
+    if (TLOG_IS_ON(2)) {
+      PangoFontDescription *desc = pango_font_describe(font);
+      char *desc_str = pango_font_description_to_string(desc);
+      tlog(2, "Desc of font in run: %s\n", desc_str);
+      g_free(desc_str);
+      pango_font_description_free(desc);
+    }
+
+    PangoGlyphItemIter cluster_iter;
+    gboolean have_cluster;
+    for (have_cluster = pango_glyph_item_iter_init_start(&cluster_iter, run, utf8_word);
+         have_cluster && !bad_glyph;
+         have_cluster = pango_glyph_item_iter_next_cluster(&cluster_iter)) {
+      const int start_byte_index = cluster_iter.start_index;
+      const int end_byte_index = cluster_iter.end_index;
+      int start_glyph_index = cluster_iter.start_glyph;
+      int end_glyph_index = cluster_iter.end_glyph;
+      std::string cluster_text =
+          std::string(utf8_word + start_byte_index, end_byte_index - start_byte_index);
+      if (graphemes) {
+        graphemes->push_back(cluster_text);
+      }
+      if (IsUTF8Whitespace(cluster_text.c_str())) {
+        tlog(2, "Skipping whitespace\n");
+        continue;
+      }
+      if (TLOG_IS_ON(2)) {
+        printf("start_byte=%d end_byte=%d start_glyph=%d end_glyph=%d ", start_byte_index,
+               end_byte_index, start_glyph_index, end_glyph_index);
+      }
+      for (int i = start_glyph_index, step = (end_glyph_index > start_glyph_index) ? 1 : -1;
+           !bad_glyph && i != end_glyph_index; i += step) {
+        const bool unknown_glyph =
+            (cluster_iter.glyph_item->glyphs->glyphs[i].glyph & PANGO_GLYPH_UNKNOWN_FLAG);
+        const bool illegal_glyph =
+            (cluster_iter.glyph_item->glyphs->glyphs[i].glyph == dotted_circle_glyph);
+        bad_glyph = unknown_glyph || illegal_glyph;
+        if (TLOG_IS_ON(2)) {
+          printf("(%d=%d)", cluster_iter.glyph_item->glyphs->glyphs[i].glyph, bad_glyph ? 1 : 0);
+        }
+      }
+      if (TLOG_IS_ON(2)) {
+        printf("  '%s'\n", cluster_text.c_str());
+      }
+      if (bad_glyph)
+        tlog(1, "Found illegal glyph!\n");
+    }
+  } while (!bad_glyph && pango_layout_iter_next_run(run_iter));
+
+  pango_layout_iter_free(run_iter);
+  g_object_unref(context);
+  g_object_unref(layout);
+  if (bad_glyph && graphemes) {
+    graphemes->clear();
+  }
+  return !bad_glyph;
+}
+
+// ------------------------ FontUtils ------------------------------------
+std::vector<std::string> FontUtils::available_fonts_; // cache list
+
+// Returns whether the specified font description is available in the fonts
+// directory.
+//
+// The generated list of font families and faces includes "synthesized" font
+// faces that are not truly loadable. Pango versions >=1.18 have a
+// pango_font_face_is_synthesized method that can be used to prune the list.
+// Until then, we are restricted to using a hack where we try to load the font
+// from the font_map, and then check what we loaded to see if it has the
+// description we expected. If it is not, then the font is deemed unavailable.
+//
+// TODO: This function reports also some not synthesized fonts as not available
+// e.g. 'Bitstream Charter Medium Italic', 'LMRoman17', so we need this hack
+// until  other solution is found.
+/* static */
+bool FontUtils::IsAvailableFont(const char *input_query_desc, std::string *best_match) {
+  std::string query_desc(input_query_desc);
+  PangoFontDescription *desc = pango_font_description_from_string(query_desc.c_str());
+  PangoFont *selected_font = nullptr;
+  {
+    PangoFontInfo::SoftInitFontConfig();
+    PangoFontMap *font_map = pango_cairo_font_map_get_default();
+    PangoContext *context = pango_context_new();
+    pango_context_set_font_map(context, font_map);
+    {
+      DISABLE_HEAP_LEAK_CHECK;
+      selected_font = pango_font_map_load_font(font_map, context, desc);
+    }
+    g_object_unref(context);
+  }
+  if (selected_font == nullptr) {
+    pango_font_description_free(desc);
+    tlog(4, "** Font '%s' failed to load from font map!\n", input_query_desc);
+    return false;
+  }
+  PangoFontDescription *selected_desc = pango_font_describe(selected_font);
+
+  bool equal = pango_font_description_equal(desc, selected_desc);
+  tlog(3, "query weight = %d \t selected weight =%d\n", pango_font_description_get_weight(desc),
+       pango_font_description_get_weight(selected_desc));
+
+  char *selected_desc_str = pango_font_description_to_string(selected_desc);
+  tlog(2, "query_desc: '%s' Selected: '%s'\n", query_desc.c_str(), selected_desc_str);
+  if (!equal && best_match != nullptr) {
+    *best_match = selected_desc_str;
+    // Clip the ending ' 0' if there is one. It seems that, if there is no
+    // point size on the end of the fontname, then Pango always appends ' 0'.
+    auto len = best_match->size();
+    if (len > 2 && best_match->at(len - 1) == '0' && best_match->at(len - 2) == ' ') {
+      best_match->resize(len - 2);
+    }
+  }
+  g_free(selected_desc_str);
+  pango_font_description_free(selected_desc);
+  g_object_unref(selected_font);
+  pango_font_description_free(desc);
+  if (!equal)
+    tlog(4, "** Font '%s' failed pango_font_description_equal!\n", input_query_desc);
+  return equal;
+}
+
+static bool ShouldIgnoreFontFamilyName(const char *query) {
+  static const char *kIgnoredFamilyNames[] = {"Sans", "Serif", "Monospace", nullptr};
+  const char **list = kIgnoredFamilyNames;
+  for (; *list != nullptr; ++list) {
+    if (!strcmp(*list, query)) {
+      return true;
+    }
+  }
+  return false;
+}
+
+// Outputs description names of available fonts.
+/* static */
+const std::vector<std::string> &FontUtils::ListAvailableFonts() {
+  if (!available_fonts_.empty()) {
+    return available_fonts_;
+  }
+
+  PangoFontFamily **families = nullptr;
+  int n_families = 0;
+  ListFontFamilies(&families, &n_families);
+  for (int i = 0; i < n_families; ++i) {
+    const char *family_name = pango_font_family_get_name(families[i]);
+    tlog(2, "Listing family %s\n", family_name);
+    if (ShouldIgnoreFontFamilyName(family_name)) {
+      continue;
+    }
+
+    int n_faces;
+    PangoFontFace **faces = nullptr;
+    pango_font_family_list_faces(families[i], &faces, &n_faces);
+    for (int j = 0; j < n_faces; ++j) {
+      PangoFontDescription *desc = pango_font_face_describe(faces[j]);
+      char *desc_str = pango_font_description_to_string(desc);
+      // "synthesized" font faces that are not truly loadable, so we skip it
+      if (!pango_font_face_is_synthesized(faces[j]) && IsAvailableFont(desc_str)) {
+        available_fonts_.emplace_back(desc_str);
+      }
+      pango_font_description_free(desc);
+      g_free(desc_str);
+    }
+    g_free(faces);
+  }
+  g_free(families);
+  std::sort(available_fonts_.begin(), available_fonts_.end());
+  return available_fonts_;
+}
+
+// Utilities written to be backward compatible with StringRender
+
+/* static */
+int FontUtils::FontScore(const std::unordered_map<char32, int64_t> &ch_map,
+                         const std::string &fontname, int *raw_score, std::vector<bool> *ch_flags) {
+  PangoFontInfo font_info;
+  if (!font_info.ParseFontDescriptionName(fontname)) {
+    tprintf("ERROR: Could not parse %s\n", fontname.c_str());
+  }
+  PangoFont *font = font_info.ToPangoFont();
+  PangoCoverage *coverage = nullptr;
+  if (font != nullptr) {
+    coverage = pango_font_get_coverage(font, nullptr);
+  }
+  if (ch_flags) {
+    ch_flags->clear();
+    ch_flags->reserve(ch_map.size());
+  }
+  *raw_score = 0;
+  int ok_chars = 0;
+  for (auto &&it : ch_map) {
+    bool covered =
+        (coverage != nullptr) && (IsWhitespace(it.first) ||
+                                  (pango_coverage_get(coverage, it.first) == PANGO_COVERAGE_EXACT));
+    if (covered) {
+      ++(*raw_score);
+      ok_chars += it.second;
+    }
+    if (ch_flags) {
+      ch_flags->push_back(covered);
+    }
+  }
+#if PANGO_VERSION_CHECK(1, 52, 0)
+  g_object_unref(coverage);
+#else
+  pango_coverage_unref(coverage);
+#endif
+  g_object_unref(font);
+  return ok_chars;
+}
+
+/* static */
+std::string FontUtils::BestFonts(const std::unordered_map<char32, int64_t> &ch_map,
+                                 std::vector<std::pair<const char *, std::vector<bool>>> *fonts) {
+  const double kMinOKFraction = 0.99;
+  // Weighted fraction of characters that must be renderable in a font to make
+  // it OK even if the raw count is not good.
+  const double kMinWeightedFraction = 0.99995;
+
+  fonts->clear();
+  std::vector<std::vector<bool>> font_flags;
+  std::vector<int> font_scores;
+  std::vector<int> raw_scores;
+  int most_ok_chars = 0;
+  int best_raw_score = 0;
+  const std::vector<std::string> &font_names = FontUtils::ListAvailableFonts();
+  for (const auto &font_name : font_names) {
+    std::vector<bool> ch_flags;
+    int raw_score = 0;
+    int ok_chars = FontScore(ch_map, font_name, &raw_score, &ch_flags);
+    most_ok_chars = std::max(ok_chars, most_ok_chars);
+    best_raw_score = std::max(raw_score, best_raw_score);
+
+    font_flags.push_back(ch_flags);
+    font_scores.push_back(ok_chars);
+    raw_scores.push_back(raw_score);
+  }
+
+  // Now select the fonts with a score above a threshold fraction
+  // of both the raw and weighted best scores. To prevent bogus fonts being
+  // selected for CJK, we require a high fraction (kMinOKFraction = 0.99) of
+  // BOTH weighted and raw scores.
+  // In low character-count scripts, the issue is more getting enough fonts,
+  // when only 1 or 2 might have all those rare dingbats etc in them, so we
+  // allow a font with a very high weighted (coverage) score
+  // (kMinWeightedFraction = 0.99995) to be used even if its raw score is poor.
+  int least_good_enough = static_cast<int>(most_ok_chars * kMinOKFraction);
+  int least_raw_enough = static_cast<int>(best_raw_score * kMinOKFraction);
+  int override_enough = static_cast<int>(most_ok_chars * kMinWeightedFraction);
+
+  std::string font_list;
+  for (unsigned i = 0; i < font_names.size(); ++i) {
+    int score = font_scores[i];
+    int raw_score = raw_scores[i];
+    if ((score >= least_good_enough && raw_score >= least_raw_enough) || score >= override_enough) {
+      fonts->push_back(std::make_pair(font_names[i].c_str(), font_flags[i]));
+      tlog(1, "OK font %s = %.4f%%, raw = %d = %.2f%%\n", font_names[i].c_str(),
+           100.0 * score / most_ok_chars, raw_score, 100.0 * raw_score / best_raw_score);
+      font_list += font_names[i];
+      font_list += "\n";
+    } else if (score >= least_good_enough || raw_score >= least_raw_enough) {
+      tlog(1, "Runner-up font %s = %.4f%%, raw = %d = %.2f%%\n", font_names[i].c_str(),
+           100.0 * score / most_ok_chars, raw_score, 100.0 * raw_score / best_raw_score);
+    }
+  }
+  return font_list;
+}
+
+/* static */
+bool FontUtils::SelectFont(const char *utf8_word, const int utf8_len, std::string *font_name,
+                           std::vector<std::string> *graphemes) {
+  return SelectFont(utf8_word, utf8_len, ListAvailableFonts(), font_name, graphemes);
+}
+
+/* static */
+bool FontUtils::SelectFont(const char *utf8_word, const int utf8_len,
+                           const std::vector<std::string> &all_fonts, std::string *font_name,
+                           std::vector<std::string> *graphemes) {
+  if (font_name) {
+    font_name->clear();
+  }
+  if (graphemes) {
+    graphemes->clear();
+  }
+  for (const auto &all_font : all_fonts) {
+    PangoFontInfo font;
+    std::vector<std::string> found_graphemes;
+    ASSERT_HOST_MSG(font.ParseFontDescriptionName(all_font), "Could not parse font desc name %s\n",
+                    all_font.c_str());
+    if (font.CanRenderString(utf8_word, utf8_len, &found_graphemes)) {
+      if (graphemes) {
+        graphemes->swap(found_graphemes);
+      }
+      if (font_name) {
+        *font_name = all_font;
+      }
+      return true;
+    }
+  }
+  return false;
+}
+
+// PangoFontInfo is reinitialized, so clear the static list of fonts.
+/* static */
+void FontUtils::ReInit() {
+  available_fonts_.clear();
+}
+
+// Print info about used font backend
+/* static */
+void FontUtils::PangoFontTypeInfo() {
+  PangoFontMap *font_map = pango_cairo_font_map_get_default();
+  if (pango_cairo_font_map_get_font_type(reinterpret_cast<PangoCairoFontMap *>(font_map)) ==
+      CAIRO_FONT_TYPE_TOY) {
+    printf("Using CAIRO_FONT_TYPE_TOY.\n");
+  } else if (pango_cairo_font_map_get_font_type(reinterpret_cast<PangoCairoFontMap *>(font_map)) ==
+             CAIRO_FONT_TYPE_FT) {
+    printf("Using CAIRO_FONT_TYPE_FT.\n");
+  } else if (pango_cairo_font_map_get_font_type(reinterpret_cast<PangoCairoFontMap *>(font_map)) ==
+             CAIRO_FONT_TYPE_WIN32) {
+    printf("Using CAIRO_FONT_TYPE_WIN32.\n");
+  } else if (pango_cairo_font_map_get_font_type(reinterpret_cast<PangoCairoFontMap *>(font_map)) ==
+             CAIRO_FONT_TYPE_QUARTZ) {
+    printf("Using CAIRO_FONT_TYPE_QUARTZ.\n");
+  } else if (pango_cairo_font_map_get_font_type(reinterpret_cast<PangoCairoFontMap *>(font_map)) ==
+             CAIRO_FONT_TYPE_USER) {
+    printf("Using CAIRO_FONT_TYPE_USER.\n");
+  } else if (!font_map) {
+    printf("Cannot create pango cairo font map!\n");
+  }
+}
+
+} // namespace tesseract