comparison mupdf-source/source/fitz/text.c @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 // Copyright (C) 2004-2025 Artifex Software, Inc.
2 //
3 // This file is part of MuPDF.
4 //
5 // MuPDF is free software: you can redistribute it and/or modify it under the
6 // terms of the GNU Affero General Public License as published by the Free
7 // Software Foundation, either version 3 of the License, or (at your option)
8 // any later version.
9 //
10 // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
11 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
13 // details.
14 //
15 // You should have received a copy of the GNU Affero General Public License
16 // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
17 //
18 // Alternative licensing terms are available from the licensor.
19 // For commercial licensing, see <https://www.artifex.com/> or contact
20 // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
21 // CA 94129, USA, for further information.
22
23 #include "mupdf/fitz.h"
24
25 #include <string.h>
26
27 fz_text *
28 fz_new_text(fz_context *ctx)
29 {
30 fz_text *text = fz_malloc_struct(ctx, fz_text);
31 text->refs = 1;
32 return text;
33 }
34
35 fz_text *
36 fz_keep_text(fz_context *ctx, const fz_text *textc)
37 {
38 fz_text *text = (fz_text *)textc; /* Explicit cast away of const */
39
40 return fz_keep_imp(ctx, text, &text->refs);
41 }
42
43 void
44 fz_drop_text(fz_context *ctx, const fz_text *textc)
45 {
46 fz_text *text = (fz_text *)textc; /* Explicit cast away of const */
47
48 if (fz_drop_imp(ctx, text, &text->refs))
49 {
50 fz_text_span *span = text->head;
51 while (span)
52 {
53 fz_text_span *next = span->next;
54 fz_drop_font(ctx, span->font);
55 fz_free(ctx, span->items);
56 fz_free(ctx, span);
57 span = next;
58 }
59 fz_free(ctx, text);
60 }
61 }
62
63 static fz_text_span *
64 fz_new_text_span(fz_context *ctx, fz_font *font, int wmode, int bidi_level, fz_bidi_direction markup_dir, fz_text_language language, fz_matrix trm)
65 {
66 fz_text_span *span = fz_malloc_struct(ctx, fz_text_span);
67 span->font = fz_keep_font(ctx, font);
68 span->wmode = wmode;
69 span->bidi_level = bidi_level;
70 span->markup_dir = markup_dir;
71 span->language = language;
72 span->trm = trm;
73 span->trm.e = 0;
74 span->trm.f = 0;
75 return span;
76 }
77
78 static fz_text_span *
79 fz_add_text_span(fz_context *ctx, fz_text *text, fz_font *font, int wmode, int bidi_level, fz_bidi_direction markup_dir, fz_text_language language, fz_matrix trm)
80 {
81 if (!text->tail)
82 {
83 text->head = text->tail = fz_new_text_span(ctx, font, wmode, bidi_level, markup_dir, language, trm);
84 }
85 else if (text->tail->font != font ||
86 text->tail->wmode != (unsigned int)wmode ||
87 text->tail->bidi_level != (unsigned int)bidi_level ||
88 text->tail->markup_dir != (unsigned int)markup_dir ||
89 text->tail->language != (unsigned int)language ||
90 text->tail->trm.a != trm.a ||
91 text->tail->trm.b != trm.b ||
92 text->tail->trm.c != trm.c ||
93 text->tail->trm.d != trm.d)
94 {
95 text->tail = text->tail->next = fz_new_text_span(ctx, font, wmode, bidi_level, markup_dir, language, trm);
96 }
97 return text->tail;
98 }
99
100 static void
101 fz_grow_text_span(fz_context *ctx, fz_text_span *span, int n)
102 {
103 int new_cap = span->cap;
104 if (span->len + n < new_cap)
105 return;
106 while (span->len + n > new_cap)
107 new_cap = new_cap + 36;
108 span->items = fz_realloc_array(ctx, span->items, new_cap, fz_text_item);
109 span->cap = new_cap;
110 }
111
112 void
113 fz_show_glyph_aux(fz_context *ctx, fz_text *text, fz_font *font, fz_matrix trm, float adv, int gid, int ucs, int cid, int wmode, int bidi_level, fz_bidi_direction markup_dir, fz_text_language lang)
114 {
115 fz_text_span *span;
116
117 if (text->refs != 1)
118 fz_throw(ctx, FZ_ERROR_ARGUMENT, "cannot modify shared text objects");
119
120 span = fz_add_text_span(ctx, text, font, wmode, bidi_level, markup_dir, lang, trm);
121
122 fz_grow_text_span(ctx, span, 1);
123
124 span->items[span->len].ucs = ucs;
125 span->items[span->len].gid = gid;
126 span->items[span->len].cid = cid;
127 span->items[span->len].x = trm.e;
128 span->items[span->len].y = trm.f;
129 span->items[span->len].adv = adv;
130 span->len++;
131 }
132
133 void
134 fz_show_glyph(fz_context *ctx, fz_text *text, fz_font *font, fz_matrix trm, int gid, int ucs, int wmode, int bidi_level, fz_bidi_direction markup_dir, fz_text_language lang)
135 {
136 float adv = (gid >= 0) ? fz_advance_glyph(ctx, font, gid, wmode) : 0;
137 fz_show_glyph_aux(ctx, text, font, trm, adv, gid, ucs, ucs, wmode, bidi_level, markup_dir, lang);
138 }
139
140 fz_matrix
141 fz_show_string(fz_context *ctx, fz_text *text, fz_font *user_font, fz_matrix trm, const char *s,
142 int wmode, int bidi_level, fz_bidi_direction markup_dir, fz_text_language language)
143 {
144 fz_font *font;
145 int gid, ucs;
146 float adv;
147
148 while (*s)
149 {
150 s += fz_chartorune(&ucs, s);
151 gid = fz_encode_character_with_fallback(ctx, user_font, ucs, 0, language, &font);
152 if (gid >= 0)
153 adv = fz_advance_glyph(ctx, font, gid, wmode);
154 else
155 adv = 0;
156 fz_show_glyph_aux(ctx, text, font, trm, adv, gid, ucs, ucs, wmode, bidi_level, markup_dir, language);
157 if (wmode == 0)
158 trm = fz_pre_translate(trm, adv, 0);
159 else
160 trm = fz_pre_translate(trm, 0, -adv);
161 }
162
163 return trm;
164 }
165
166 fz_matrix
167 fz_measure_string(fz_context *ctx, fz_font *user_font, fz_matrix trm, const char *s,
168 int wmode, int bidi_level, fz_bidi_direction markup_dir, fz_text_language language)
169 {
170 fz_font *font;
171 int gid, ucs;
172 float adv;
173
174 while (*s)
175 {
176 s += fz_chartorune(&ucs, s);
177 gid = fz_encode_character_with_fallback(ctx, user_font, ucs, 0, language, &font);
178 adv = fz_advance_glyph(ctx, font, gid, wmode);
179 if (wmode == 0)
180 trm = fz_pre_translate(trm, adv, 0);
181 else
182 trm = fz_pre_translate(trm, 0, -adv);
183 }
184
185 return trm;
186 }
187
188 fz_rect
189 fz_bound_text(fz_context *ctx, const fz_text *text, const fz_stroke_state *stroke, fz_matrix ctm)
190 {
191 fz_text_span *span;
192 fz_matrix tm, trm;
193 fz_rect gbox;
194 fz_rect bbox;
195 int i;
196
197 bbox = fz_empty_rect;
198
199 for (span = text->head; span; span = span->next)
200 {
201 if (span->len > 0)
202 {
203 tm = span->trm;
204 for (i = 0; i < span->len; i++)
205 {
206 if (span->items[i].gid >= 0)
207 {
208 tm.e = span->items[i].x;
209 tm.f = span->items[i].y;
210 trm = fz_concat(tm, ctm);
211 gbox = fz_bound_glyph(ctx, span->font, span->items[i].gid, trm);
212 bbox = fz_union_rect(bbox, gbox);
213 }
214 }
215 }
216 }
217
218 if (!fz_is_empty_rect(bbox))
219 {
220 if (stroke)
221 bbox = fz_adjust_rect_for_stroke(ctx, bbox, stroke, ctm);
222
223 /* Compensate for the glyph cache limited positioning precision */
224 bbox.x0 -= 1;
225 bbox.y0 -= 1;
226 bbox.x1 += 1;
227 bbox.y1 += 1;
228 }
229
230 return bbox;
231 }
232
233 fz_text_language fz_text_language_from_string(const char *str)
234 {
235 fz_text_language lang;
236
237 if (str == NULL || strlen(str) == 0)
238 return FZ_LANG_UNSET;
239
240 if (!strcmp(str, "zh-Hant") ||
241 !strcmp(str, "zh-HK") ||
242 !strcmp(str, "zh-MO") ||
243 !strcmp(str, "zh-SG") ||
244 !strcmp(str, "zh-TW"))
245 return FZ_LANG_zh_Hant;
246 if (!strcmp(str, "zh-Hans") ||
247 !strcmp(str, "zh-CN"))
248 return FZ_LANG_zh_Hans;
249
250 /* 1st char */
251 if (str[0] >= 'a' && str[0] <= 'z')
252 lang = str[0] - 'a' + 1;
253 else if (str[0] >= 'A' && str[0] <= 'Z')
254 lang = str[0] - 'A' + 1;
255 else
256 return 0;
257
258 /* 2nd char */
259 if (str[1] >= 'a' && str[1] <= 'z')
260 lang += 27*(str[1] - 'a' + 1);
261 else if (str[1] >= 'A' && str[1] <= 'Z')
262 lang += 27*(str[1] - 'A' + 1);
263 else
264 return 0; /* There are no valid 1 char language codes */
265
266 /* 3rd char */
267 if (str[2] >= 'a' && str[2] <= 'z')
268 lang += 27*27*(str[2] - 'a' + 1);
269 else if (str[2] >= 'A' && str[2] <= 'Z')
270 lang += 27*27*(str[2] - 'A' + 1);
271
272 /* We don't support iso 639-6 4 char codes, cos the standard
273 * has been withdrawn, and no one uses them. */
274 return lang;
275 }
276
277 char *fz_string_from_text_language(char str[8], fz_text_language lang)
278 {
279 int c;
280
281 /* str is supposed to be at least 8 chars in size */
282 if (str == NULL)
283 return NULL;
284 if (lang == FZ_LANG_UNSET)
285 return NULL;
286
287 if (lang == FZ_LANG_zh_Hant)
288 fz_strlcpy(str, "zh-Hant", 8);
289 else if (lang == FZ_LANG_zh_Hans)
290 fz_strlcpy(str, "zh-Hans", 8);
291 else
292 {
293 c = lang % 27;
294 lang = lang / 27;
295 str[0] = c == 0 ? 0 : c - 1 + 'a';
296 c = lang % 27;
297 lang = lang / 27;
298 str[1] = c == 0 ? 0 : c - 1 + 'a';
299 c = lang % 27;
300 str[2] = c == 0 ? 0 : c - 1 + 'a';
301 str[3] = 0;
302 }
303
304 return str;
305 }