comparison mupdf-source/include/mupdf/fitz/text.h @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 // Copyright (C) 2004-2024 Artifex Software, Inc.
2 //
3 // This file is part of MuPDF.
4 //
5 // MuPDF is free software: you can redistribute it and/or modify it under the
6 // terms of the GNU Affero General Public License as published by the Free
7 // Software Foundation, either version 3 of the License, or (at your option)
8 // any later version.
9 //
10 // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
11 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
13 // details.
14 //
15 // You should have received a copy of the GNU Affero General Public License
16 // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
17 //
18 // Alternative licensing terms are available from the licensor.
19 // For commercial licensing, see <https://www.artifex.com/> or contact
20 // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
21 // CA 94129, USA, for further information.
22
23 #ifndef MUPDF_FITZ_TEXT_H
24 #define MUPDF_FITZ_TEXT_H
25
26 #include "mupdf/fitz/system.h"
27 #include "mupdf/fitz/context.h"
28 #include "mupdf/fitz/font.h"
29 #include "mupdf/fitz/path.h"
30 #include "mupdf/fitz/bidi.h"
31
32 /**
33 Text buffer.
34
35 The trm field contains the a, b, c and d coefficients.
36 The e and f coefficients come from the individual elements,
37 together they form the transform matrix for the glyph.
38
39 Glyphs are referenced by glyph ID.
40 The Unicode text equivalent is kept in a separate array
41 with indexes into the glyph array.
42 */
43
44 typedef struct
45 {
46 float x, y;
47 float adv; /* advance width given by input format */
48 int gid; /* -1 for one gid to many ucs mappings */
49 int ucs; /* -1 for one ucs to many gid mappings */
50 int cid; /* CID for CJK fonts, raw character code for other fonts; or unicode for non-PDF formats. */
51 } fz_text_item;
52
53 #define FZ_LANG_TAG2(c1,c2) ((c1-'a'+1) + ((c2-'a'+1)*27))
54 #define FZ_LANG_TAG3(c1,c2,c3) ((c1-'a'+1) + ((c2-'a'+1)*27) + ((c3-'a'+1)*27*27))
55
56 typedef enum
57 {
58 FZ_LANG_UNSET = 0,
59 FZ_LANG_ur = FZ_LANG_TAG2('u','r'),
60 FZ_LANG_urd = FZ_LANG_TAG3('u','r','d'),
61 FZ_LANG_ko = FZ_LANG_TAG2('k','o'),
62 FZ_LANG_ja = FZ_LANG_TAG2('j','a'),
63 FZ_LANG_zh = FZ_LANG_TAG2('z','h'),
64 FZ_LANG_zh_Hans = FZ_LANG_TAG3('z','h','s'),
65 FZ_LANG_zh_Hant = FZ_LANG_TAG3('z','h','t'),
66 } fz_text_language;
67
68 typedef struct fz_text_span
69 {
70 fz_font *font;
71 fz_matrix trm;
72 unsigned wmode : 1; /* 0 horizontal, 1 vertical */
73 unsigned bidi_level : 7; /* The bidirectional level of text */
74 unsigned markup_dir : 2; /* The direction of text as marked in the original document */
75 unsigned language : 15; /* The language as marked in the original document */
76 int len, cap;
77 fz_text_item *items;
78 struct fz_text_span *next;
79 } fz_text_span;
80
81 typedef struct
82 {
83 int refs;
84 fz_text_span *head, *tail;
85 } fz_text;
86
87 /**
88 Create a new empty fz_text object.
89
90 Throws exception on failure to allocate.
91 */
92 fz_text *fz_new_text(fz_context *ctx);
93
94 /**
95 Increment the reference count for the text object. The same
96 pointer is returned.
97
98 Never throws exceptions.
99 */
100 fz_text *fz_keep_text(fz_context *ctx, const fz_text *text);
101
102 /**
103 Decrement the reference count for the text object. When the
104 reference count hits zero, the text object is freed.
105
106 Never throws exceptions.
107 */
108 void fz_drop_text(fz_context *ctx, const fz_text *text);
109
110 /**
111 Add a glyph/unicode value to a text object.
112
113 text: Text object to add to.
114
115 font: The font the glyph should be added in.
116
117 trm: The transform to use for the glyph.
118
119 glyph: The glyph id to add.
120
121 unicode: The unicode character for the glyph.
122
123 cid: The CJK CID value or raw character code.
124
125 wmode: 1 for vertical mode, 0 for horizontal.
126
127 bidi_level: The bidirectional level for this glyph.
128
129 markup_dir: The direction of the text as specified in the
130 markup.
131
132 language: The language in use (if known, 0 otherwise)
133 (e.g. FZ_LANG_zh_Hans).
134
135 Throws exception on failure to allocate.
136 */
137 void fz_show_glyph(fz_context *ctx, fz_text *text, fz_font *font, fz_matrix trm, int glyph, int unicode, int wmode, int bidi_level, fz_bidi_direction markup_dir, fz_text_language language);
138 void fz_show_glyph_aux(fz_context *ctx, fz_text *text, fz_font *font, fz_matrix trm, float adv, int glyph, int unicode, int cid, int wmode, int bidi_level, fz_bidi_direction markup_dir, fz_text_language lang);
139
140 /**
141 Add a UTF8 string to a text object.
142
143 text: Text object to add to.
144
145 font: The font the string should be added in.
146
147 trm: The transform to use.
148
149 s: The utf-8 string to add.
150
151 wmode: 1 for vertical mode, 0 for horizontal.
152
153 bidi_level: The bidirectional level for this glyph.
154
155 markup_dir: The direction of the text as specified in the markup.
156
157 language: The language in use (if known, 0 otherwise)
158 (e.g. FZ_LANG_zh_Hans).
159
160 Returns the transform updated with the advance width of the
161 string.
162 */
163 fz_matrix fz_show_string(fz_context *ctx, fz_text *text, fz_font *font, fz_matrix trm, const char *s, int wmode, int bidi_level, fz_bidi_direction markup_dir, fz_text_language language);
164
165 /**
166 Measure the advance width of a UTF8 string should it be added to a text object.
167
168 This uses the same layout algorithms as fz_show_string, and can be used
169 to calculate text alignment adjustments.
170 */
171 fz_matrix
172 fz_measure_string(fz_context *ctx, fz_font *user_font, fz_matrix trm, const char *s, int wmode, int bidi_level, fz_bidi_direction markup_dir, fz_text_language language);
173
174 /**
175 Find the bounds of a given text object.
176
177 text: The text object to find the bounds of.
178
179 stroke: Pointer to the stroke attributes (for stroked
180 text), or NULL (for filled text).
181
182 ctm: The matrix in use.
183
184 r: pointer to storage for the bounds.
185
186 Returns a pointer to r, which is updated to contain the
187 bounding box for the text object.
188 */
189 fz_rect fz_bound_text(fz_context *ctx, const fz_text *text, const fz_stroke_state *stroke, fz_matrix ctm);
190
191 /**
192 Convert ISO 639 (639-{1,2,3,5}) language specification
193 strings losslessly to a 15 bit fz_text_language code.
194
195 No validation is carried out. Obviously invalid (out
196 of spec) codes will be mapped to FZ_LANG_UNSET, but
197 well-formed (but undefined) codes will be blithely
198 accepted.
199 */
200 fz_text_language fz_text_language_from_string(const char *str);
201
202 /**
203 Recover ISO 639 (639-{1,2,3,5}) language specification
204 strings losslessly from a 15 bit fz_text_language code.
205
206 No validation is carried out. See note above.
207 */
208 char *fz_string_from_text_language(char str[8], fz_text_language lang);
209
210 #endif