Mercurial > hgrepos > Python2 > PyMuPDF
view mupdf-source/thirdparty/tesseract/src/training/pango/boxchar.h @ 17:dd9cdb856310
Remove PKG-INFO from the because it is regenerated automatically for the sdist
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Thu, 18 Sep 2025 17:40:40 +0200 |
| parents | b50eed0cc0ef |
| children |
line wrap: on
line source
/********************************************************************** * File: boxchar.h * Description: Simple class to associate a Tesseract classification unit with * its bounding box so that the boxes can be rotated as the image * is rotated for degradation. Also includes routines to output * the character-tagged boxes to a boxfile. * Author: Ray Smith * * (C) Copyright 2013, Google Inc. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * http://www.apache.org/licenses/LICENSE-2.0 * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * **********************************************************************/ #ifndef TESSERACT_TRAINING_BOXCHAR_H_ #define TESSERACT_TRAINING_BOXCHAR_H_ #include <string> #include <vector> #include <allheaders.h> // for Leptonica API #if (LIBLEPT_MAJOR_VERSION == 1 && LIBLEPT_MINOR_VERSION >= 83) || LIBLEPT_MAJOR_VERSION > 1 #include <pix_internal.h> // for fast access to Box geometry #endif #include <tesseract/export.h> namespace tesseract { class BoxChar { public: BoxChar(const char *utf8_str, int len); ~BoxChar(); // Accessors. const std::string &ch() const { return ch_; } const Box *box() const { return box_; } const int &page() const { return page_; } void set_rtl_index(int index) { rtl_index_ = index; } const int &rtl_index() const { return rtl_index_; } // Set the box_ member. void AddBox(int x, int y, int width, int height); void set_page(int page) { page_ = page; } std::string *mutable_ch() { return &ch_; } Box *mutable_box() { return box_; } // Sort function for sorting by left edge of box. Note that this will not // work properly until after InsertNewlines and InsertSpaces. bool operator<(const BoxChar &other) const { if (box_ == nullptr) { return true; } if (other.box_ == nullptr) { return false; } return box_->x < other.box_->x; } // Increments *num_rtl and *num_ltr according to the directionality of // characters in the box. void GetDirection(int *num_rtl, int *num_ltr) const; // Reverses the order of unicodes within the box. If Pango generates a // ligature, these will get reversed on output, so reverse now. void ReverseUnicodesInBox(); static void TranslateBoxes(int xshift, int yshift, std::vector<BoxChar *> *boxes); // Prepares for writing the boxes to a file by inserting newlines, spaces, // and re-ordering so the boxes are strictly left-to-right. static void PrepareToWrite(std::vector<BoxChar *> *boxes); // Inserts newline (tab) characters into the vector at newline positions. static void InsertNewlines(bool rtl_rules, bool vertical_rules, std::vector<BoxChar *> *boxes); // Converts nullptr boxes to space characters, with appropriate bounding // boxes. static void InsertSpaces(bool rtl_rules, bool vertical_rules, std::vector<BoxChar *> *boxes); // Reorders text in a right-to-left script in left-to-right order. static void ReorderRTLText(std::vector<BoxChar *> *boxes); // Returns true if the vector contains mostly RTL characters. static bool ContainsMostlyRTL(const std::vector<BoxChar *> &boxes); // Returns true if the text is mostly laid out vertically. static bool MostlyVertical(const std::vector<BoxChar *> &boxes); // Returns the total length of all the strings in the boxes. static int TotalByteLength(const std::vector<BoxChar *> &boxes); // Rotate the vector of boxes between start and end by the given rotation. // The rotation is in radians clockwise about the given center. static void RotateBoxes(float rotation, int xcenter, int ycenter, int start_box, int end_box, std::vector<BoxChar *> *boxes); // Create a tesseract box file from the vector of boxes. The image height // is needed to convert to tesseract coordinates. static void WriteTesseractBoxFile(const std::string &name, int height, const std::vector<BoxChar *> &boxes); // Gets the tesseract box file as a string from the vector of boxes. // The image height is needed to convert to tesseract coordinates. static std::string GetTesseractBoxStr(int height, const std::vector<BoxChar *> &boxes); private: std::string ch_; Box *box_; int page_; // If the box is an RTL character, contains the original position in the // array of boxes (before reversal), otherwise -1. int rtl_index_; }; // Sort predicate to sort a vector of BoxChar*. struct BoxCharPtrSort { bool operator()(const BoxChar *box1, const BoxChar *box2) const { if (box1->rtl_index() >= 0 && box2->rtl_index() >= 0) { return box2->rtl_index() < box1->rtl_index(); } return *box1 < *box2; } }; } // namespace tesseract #endif // TESSERACT_TRAINING_BOXCHAR_H_
