Mercurial > hgrepos > Python2 > PyMuPDF
diff mupdf-source/include/mupdf/fitz/text.h @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mupdf-source/include/mupdf/fitz/text.h Mon Sep 15 11:43:07 2025 +0200 @@ -0,0 +1,210 @@ +// Copyright (C) 2004-2024 Artifex Software, Inc. +// +// This file is part of MuPDF. +// +// MuPDF is free software: you can redistribute it and/or modify it under the +// terms of the GNU Affero General Public License as published by the Free +// Software Foundation, either version 3 of the License, or (at your option) +// any later version. +// +// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more +// details. +// +// You should have received a copy of the GNU Affero General Public License +// along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html> +// +// Alternative licensing terms are available from the licensor. +// For commercial licensing, see <https://www.artifex.com/> or contact +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. + +#ifndef MUPDF_FITZ_TEXT_H +#define MUPDF_FITZ_TEXT_H + +#include "mupdf/fitz/system.h" +#include "mupdf/fitz/context.h" +#include "mupdf/fitz/font.h" +#include "mupdf/fitz/path.h" +#include "mupdf/fitz/bidi.h" + +/** + Text buffer. + + The trm field contains the a, b, c and d coefficients. + The e and f coefficients come from the individual elements, + together they form the transform matrix for the glyph. + + Glyphs are referenced by glyph ID. + The Unicode text equivalent is kept in a separate array + with indexes into the glyph array. +*/ + +typedef struct +{ + float x, y; + float adv; /* advance width given by input format */ + int gid; /* -1 for one gid to many ucs mappings */ + int ucs; /* -1 for one ucs to many gid mappings */ + int cid; /* CID for CJK fonts, raw character code for other fonts; or unicode for non-PDF formats. */ +} fz_text_item; + +#define FZ_LANG_TAG2(c1,c2) ((c1-'a'+1) + ((c2-'a'+1)*27)) +#define FZ_LANG_TAG3(c1,c2,c3) ((c1-'a'+1) + ((c2-'a'+1)*27) + ((c3-'a'+1)*27*27)) + +typedef enum +{ + FZ_LANG_UNSET = 0, + FZ_LANG_ur = FZ_LANG_TAG2('u','r'), + FZ_LANG_urd = FZ_LANG_TAG3('u','r','d'), + FZ_LANG_ko = FZ_LANG_TAG2('k','o'), + FZ_LANG_ja = FZ_LANG_TAG2('j','a'), + FZ_LANG_zh = FZ_LANG_TAG2('z','h'), + FZ_LANG_zh_Hans = FZ_LANG_TAG3('z','h','s'), + FZ_LANG_zh_Hant = FZ_LANG_TAG3('z','h','t'), +} fz_text_language; + +typedef struct fz_text_span +{ + fz_font *font; + fz_matrix trm; + unsigned wmode : 1; /* 0 horizontal, 1 vertical */ + unsigned bidi_level : 7; /* The bidirectional level of text */ + unsigned markup_dir : 2; /* The direction of text as marked in the original document */ + unsigned language : 15; /* The language as marked in the original document */ + int len, cap; + fz_text_item *items; + struct fz_text_span *next; +} fz_text_span; + +typedef struct +{ + int refs; + fz_text_span *head, *tail; +} fz_text; + +/** + Create a new empty fz_text object. + + Throws exception on failure to allocate. +*/ +fz_text *fz_new_text(fz_context *ctx); + +/** + Increment the reference count for the text object. The same + pointer is returned. + + Never throws exceptions. +*/ +fz_text *fz_keep_text(fz_context *ctx, const fz_text *text); + +/** + Decrement the reference count for the text object. When the + reference count hits zero, the text object is freed. + + Never throws exceptions. +*/ +void fz_drop_text(fz_context *ctx, const fz_text *text); + +/** + Add a glyph/unicode value to a text object. + + text: Text object to add to. + + font: The font the glyph should be added in. + + trm: The transform to use for the glyph. + + glyph: The glyph id to add. + + unicode: The unicode character for the glyph. + + cid: The CJK CID value or raw character code. + + wmode: 1 for vertical mode, 0 for horizontal. + + bidi_level: The bidirectional level for this glyph. + + markup_dir: The direction of the text as specified in the + markup. + + language: The language in use (if known, 0 otherwise) + (e.g. FZ_LANG_zh_Hans). + + Throws exception on failure to allocate. +*/ +void fz_show_glyph(fz_context *ctx, fz_text *text, fz_font *font, fz_matrix trm, int glyph, int unicode, int wmode, int bidi_level, fz_bidi_direction markup_dir, fz_text_language language); +void fz_show_glyph_aux(fz_context *ctx, fz_text *text, fz_font *font, fz_matrix trm, float adv, int glyph, int unicode, int cid, int wmode, int bidi_level, fz_bidi_direction markup_dir, fz_text_language lang); + +/** + Add a UTF8 string to a text object. + + text: Text object to add to. + + font: The font the string should be added in. + + trm: The transform to use. + + s: The utf-8 string to add. + + wmode: 1 for vertical mode, 0 for horizontal. + + bidi_level: The bidirectional level for this glyph. + + markup_dir: The direction of the text as specified in the markup. + + language: The language in use (if known, 0 otherwise) + (e.g. FZ_LANG_zh_Hans). + + Returns the transform updated with the advance width of the + string. +*/ +fz_matrix fz_show_string(fz_context *ctx, fz_text *text, fz_font *font, fz_matrix trm, const char *s, int wmode, int bidi_level, fz_bidi_direction markup_dir, fz_text_language language); + +/** + Measure the advance width of a UTF8 string should it be added to a text object. + + This uses the same layout algorithms as fz_show_string, and can be used + to calculate text alignment adjustments. +*/ +fz_matrix +fz_measure_string(fz_context *ctx, fz_font *user_font, fz_matrix trm, const char *s, int wmode, int bidi_level, fz_bidi_direction markup_dir, fz_text_language language); + +/** + Find the bounds of a given text object. + + text: The text object to find the bounds of. + + stroke: Pointer to the stroke attributes (for stroked + text), or NULL (for filled text). + + ctm: The matrix in use. + + r: pointer to storage for the bounds. + + Returns a pointer to r, which is updated to contain the + bounding box for the text object. +*/ +fz_rect fz_bound_text(fz_context *ctx, const fz_text *text, const fz_stroke_state *stroke, fz_matrix ctm); + +/** + Convert ISO 639 (639-{1,2,3,5}) language specification + strings losslessly to a 15 bit fz_text_language code. + + No validation is carried out. Obviously invalid (out + of spec) codes will be mapped to FZ_LANG_UNSET, but + well-formed (but undefined) codes will be blithely + accepted. +*/ +fz_text_language fz_text_language_from_string(const char *str); + +/** + Recover ISO 639 (639-{1,2,3,5}) language specification + strings losslessly from a 15 bit fz_text_language code. + + No validation is carried out. See note above. +*/ +char *fz_string_from_text_language(char str[8], fz_text_language lang); + +#endif
