Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/tesseract/src/training/unicharset/validate_grapheme.h @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 #ifndef TESSERACT_TRAINING_VALIDATE_GRAPHEME_H_ | |
| 2 #define TESSERACT_TRAINING_VALIDATE_GRAPHEME_H_ | |
| 3 | |
| 4 #include "validator.h" | |
| 5 | |
| 6 namespace tesseract { | |
| 7 | |
| 8 // Subclass of Validator that validates and segments generic unicode into | |
| 9 // grapheme clusters, including Latin with diacritics. | |
| 10 class ValidateGrapheme : public Validator { | |
| 11 public: | |
| 12 ValidateGrapheme(ViramaScript script, bool report_errors) : Validator(script, report_errors) {} | |
| 13 ~ValidateGrapheme() override = default; | |
| 14 | |
| 15 protected: | |
| 16 // Consumes the next Grapheme in codes_[codes_used_++...] and copies it to | |
| 17 // parts_ and output_. Returns true if a valid Grapheme was consumed, | |
| 18 // otherwise does not increment codes_used_. | |
| 19 bool ConsumeGraphemeIfValid() override; | |
| 20 // Returns the CharClass corresponding to the given Unicode ch. | |
| 21 CharClass UnicodeToCharClass(char32 ch) const override; | |
| 22 | |
| 23 private: | |
| 24 // Helper returns true if the sequence prev_ch,ch is invalid. | |
| 25 bool IsBadlyFormed(char32 prev_ch, char32 ch); | |
| 26 // Helper returns true if the sequence prev_ch,ch is an invalid Indic vowel. | |
| 27 static bool IsBadlyFormedIndicVowel(char32 prev_ch, char32 ch); | |
| 28 // Helper returns true if the sequence prev_ch,ch is invalid Thai. | |
| 29 static bool IsBadlyFormedThai(char32 prev_ch, char32 ch); | |
| 30 }; | |
| 31 | |
| 32 } // namespace tesseract | |
| 33 | |
| 34 #endif // TESSERACT_TRAINING_VALIDATE_GRAPHEME_H_ |
