view mupdf-source/thirdparty/tesseract/src/training/pango/pango_font_info.h @ 17:dd9cdb856310

Remove PKG-INFO from the because it is regenerated automatically for the sdist
author Franz Glasner <fzglas.hg@dom66.de>
date Thu, 18 Sep 2025 17:40:40 +0200
parents b50eed0cc0ef
children
line wrap: on
line source

/**********************************************************************
 * File:        pango_font_info.h
 * Description: Font-related objects and helper functions
 * Author:      Ranjith Unnikrishnan
 * Created:     Mon Nov 18 2013
 *
 * (C) Copyright 2013, Google Inc.
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * http://www.apache.org/licenses/LICENSE-2.0
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 **********************************************************************/

#ifndef TESSERACT_TRAINING_PANGO_FONT_INFO_H_
#define TESSERACT_TRAINING_PANGO_FONT_INFO_H_

#include "export.h"

#include "commandlineflags.h"

#include "pango/pango-font.h"
#include "pango/pango.h"
#include "pango/pangocairo.h"

#include <string>
#include <unordered_map>
#include <utility>
#include <vector>

using char32 = signed int;

namespace tesseract {

// Data holder class for a font, intended to avoid having to work with Pango or
// FontConfig-specific objects directly.
class TESS_PANGO_TRAINING_API PangoFontInfo {
public:
  enum FontTypeEnum {
    UNKNOWN,
    SERIF,
    SANS_SERIF,
    DECORATIVE,
  };
  PangoFontInfo();
  ~PangoFontInfo();
  // Initialize from parsing a font description name, defined as a string of the
  // format:
  //   "FamilyName [FaceName] [PointSize]"
  // where a missing FaceName implies the default regular face.
  // eg. "Arial Italic 12", "Verdana"
  //
  // FaceName is a combination of:
  //   [StyleName] [Variant] [Weight] [Stretch]
  // with (all optional) Pango-defined values of:
  // StyleName: Oblique, Italic
  // Variant  : Small-Caps
  // Weight   : Ultra-Light, Light, Medium, Semi-Bold, Bold, Ultra-Bold, Heavy
  // Stretch  : Ultra-Condensed, Extra-Condensed, Condensed, Semi-Condensed,
  //            Semi-Expanded, Expanded, Extra-Expanded, Ultra-Expanded.
  explicit PangoFontInfo(const std::string &name);
  bool ParseFontDescriptionName(const std::string &name);

  // Returns true if the font have codepoint coverage for the specified text.
  bool CoversUTF8Text(const char *utf8_text, int byte_length) const;
  // Modifies string to remove unicode points that are not covered by the
  // font. Returns the number of characters dropped.
  int DropUncoveredChars(std::string *utf8_text) const;

  // Returns true if the entire string can be rendered by the font with full
  // character coverage and no unknown glyph or dotted-circle glyph
  // substitutions on encountering a badly formed unicode sequence.
  // If true, returns individual graphemes. Any whitespace characters in the
  // original string are also included in the list.
  bool CanRenderString(const char *utf8_word, int len, std::vector<std::string> *graphemes) const;
  bool CanRenderString(const char *utf8_word, int len) const;

  // Retrieves the x_bearing and x_advance for the given utf8 character in the
  // font. Returns false if the glyph for the character could not be found in
  // the font.
  // Ref: http://freetype.sourceforge.net/freetype2/docs/glyphs/glyphs-3.html
  bool GetSpacingProperties(const std::string &utf8_char, int *x_bearing, int *x_advance) const;

  // If not already initialized, initializes FontConfig by setting its
  // environment variable and creating a fonts.conf file that points to the
  // FLAGS_fonts_dir and the cache to FLAGS_fontconfig_tmpdir.
  static void SoftInitFontConfig();
  // Re-initializes font config, whether or not already initialized.
  // If already initialized, any existing cache is deleted, just to be sure.
  static void HardInitFontConfig(const char *fonts_dir, const char *cache_dir);

  // Accessors
  std::string DescriptionName() const;
  // Font Family name eg. "Arial"
  const std::string &family_name() const {
    return family_name_;
  }
  // Size in points (1/72"), rounded to the nearest integer.
  int font_size() const {
    return font_size_;
  }
  FontTypeEnum font_type() const {
    return font_type_;
  }

  int resolution() const {
    return resolution_;
  }
  void set_resolution(const int resolution) {
    resolution_ = resolution;
  }

private:
  friend class FontUtils;
  void Clear();
  bool ParseFontDescription(const PangoFontDescription *desc);
  // Returns the PangoFont structure corresponding to the closest available font
  // in the font map.
  PangoFont *ToPangoFont() const;

  // Font properties set automatically from parsing the font description name.
  std::string family_name_;
  int font_size_;
  FontTypeEnum font_type_;
  // The Pango description that was used to initialize the instance.
  PangoFontDescription *desc_;
  // Default output resolution to assume for GetSpacingProperties() and any
  // other methods that returns pixel values.
  int resolution_;
  // Fontconfig operates through an environment variable, so it intrinsically
  // cannot be thread-friendly, but you can serialize multiple independent
  // font configurations by calling HardInitFontConfig(fonts_dir, cache_dir).
  // These hold the last initialized values set by HardInitFontConfig or
  // the first call to SoftInitFontConfig.
  // Directory to be scanned for font files.
  static std::string fonts_dir_;
  // Directory to store the cache of font information. (Can be the same as
  // fonts_dir_)
  static std::string cache_dir_;

private:
  PangoFontInfo(const PangoFontInfo &) = delete;
  void operator=(const PangoFontInfo &) = delete;
};

// Static utility methods for querying font availability and font-selection
// based on codepoint coverage.
class TESS_PANGO_TRAINING_API FontUtils {
public:
  // Returns true if the font of the given description name is available in the
  // target directory specified by --fonts_dir
  static bool IsAvailableFont(const char *font_desc) {
    return IsAvailableFont(font_desc, nullptr);
  }
  // Returns true if the font of the given description name is available in the
  // target directory specified by --fonts_dir. If false is returned, and
  // best_match is not nullptr, the closest matching font is returned there.
  static bool IsAvailableFont(const char *font_desc, std::string *best_match);
  // Outputs description names of available fonts.
  static const std::vector<std::string> &ListAvailableFonts();

  // Picks font among available fonts that covers and can render the given word,
  // and returns the font description name and the decomposition of the word to
  // graphemes. Returns false if no suitable font was found.
  static bool SelectFont(const char *utf8_word, const int utf8_len, std::string *font_name,
                         std::vector<std::string> *graphemes);

  // Picks font among all_fonts that covers and can render the given word,
  // and returns the font description name and the decomposition of the word to
  // graphemes. Returns false if no suitable font was found.
  static bool SelectFont(const char *utf8_word, const int utf8_len,
                         const std::vector<std::string> &all_fonts, std::string *font_name,
                         std::vector<std::string> *graphemes);

  // NOTE: The following utilities were written to be backward compatible with
  // StringRender.

  // BestFonts returns a font name and a bit vector of the characters it
  // can render for the fonts that score within some fraction of the best
  // font on the characters in the given hash map.
  // In the flags vector, each flag is set according to whether the
  // corresponding character (in order of iterating ch_map) can be rendered.
  // The return string is a list of the acceptable fonts that were used.
  static std::string BestFonts(const std::unordered_map<char32, int64_t> &ch_map,
                               std::vector<std::pair<const char *, std::vector<bool>>> *font_flag);

  // FontScore returns the weighted renderability score of the given
  // hash map character table in the given font. The unweighted score
  // is also returned in raw_score.
  // The values in the bool vector ch_flags correspond to whether the
  // corresponding character (in order of iterating ch_map) can be rendered.
  static int FontScore(const std::unordered_map<char32, int64_t> &ch_map,
                       const std::string &fontname, int *raw_score, std::vector<bool> *ch_flags);

  // PangoFontInfo is reinitialized, so clear the static list of fonts.
  static void ReInit();
  static void PangoFontTypeInfo();

private:
  static std::vector<std::string> available_fonts_; // cache list
};
} // namespace tesseract

#endif // TESSERACT_TRAINING_PANGO_FONT_INFO_H_