Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/source/fitz/text.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 // Copyright (C) 2004-2025 Artifex Software, Inc. | |
| 2 // | |
| 3 // This file is part of MuPDF. | |
| 4 // | |
| 5 // MuPDF is free software: you can redistribute it and/or modify it under the | |
| 6 // terms of the GNU Affero General Public License as published by the Free | |
| 7 // Software Foundation, either version 3 of the License, or (at your option) | |
| 8 // any later version. | |
| 9 // | |
| 10 // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY | |
| 11 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | |
| 12 // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more | |
| 13 // details. | |
| 14 // | |
| 15 // You should have received a copy of the GNU Affero General Public License | |
| 16 // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html> | |
| 17 // | |
| 18 // Alternative licensing terms are available from the licensor. | |
| 19 // For commercial licensing, see <https://www.artifex.com/> or contact | |
| 20 // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, | |
| 21 // CA 94129, USA, for further information. | |
| 22 | |
| 23 #include "mupdf/fitz.h" | |
| 24 | |
| 25 #include <string.h> | |
| 26 | |
| 27 fz_text * | |
| 28 fz_new_text(fz_context *ctx) | |
| 29 { | |
| 30 fz_text *text = fz_malloc_struct(ctx, fz_text); | |
| 31 text->refs = 1; | |
| 32 return text; | |
| 33 } | |
| 34 | |
| 35 fz_text * | |
| 36 fz_keep_text(fz_context *ctx, const fz_text *textc) | |
| 37 { | |
| 38 fz_text *text = (fz_text *)textc; /* Explicit cast away of const */ | |
| 39 | |
| 40 return fz_keep_imp(ctx, text, &text->refs); | |
| 41 } | |
| 42 | |
| 43 void | |
| 44 fz_drop_text(fz_context *ctx, const fz_text *textc) | |
| 45 { | |
| 46 fz_text *text = (fz_text *)textc; /* Explicit cast away of const */ | |
| 47 | |
| 48 if (fz_drop_imp(ctx, text, &text->refs)) | |
| 49 { | |
| 50 fz_text_span *span = text->head; | |
| 51 while (span) | |
| 52 { | |
| 53 fz_text_span *next = span->next; | |
| 54 fz_drop_font(ctx, span->font); | |
| 55 fz_free(ctx, span->items); | |
| 56 fz_free(ctx, span); | |
| 57 span = next; | |
| 58 } | |
| 59 fz_free(ctx, text); | |
| 60 } | |
| 61 } | |
| 62 | |
| 63 static fz_text_span * | |
| 64 fz_new_text_span(fz_context *ctx, fz_font *font, int wmode, int bidi_level, fz_bidi_direction markup_dir, fz_text_language language, fz_matrix trm) | |
| 65 { | |
| 66 fz_text_span *span = fz_malloc_struct(ctx, fz_text_span); | |
| 67 span->font = fz_keep_font(ctx, font); | |
| 68 span->wmode = wmode; | |
| 69 span->bidi_level = bidi_level; | |
| 70 span->markup_dir = markup_dir; | |
| 71 span->language = language; | |
| 72 span->trm = trm; | |
| 73 span->trm.e = 0; | |
| 74 span->trm.f = 0; | |
| 75 return span; | |
| 76 } | |
| 77 | |
| 78 static fz_text_span * | |
| 79 fz_add_text_span(fz_context *ctx, fz_text *text, fz_font *font, int wmode, int bidi_level, fz_bidi_direction markup_dir, fz_text_language language, fz_matrix trm) | |
| 80 { | |
| 81 if (!text->tail) | |
| 82 { | |
| 83 text->head = text->tail = fz_new_text_span(ctx, font, wmode, bidi_level, markup_dir, language, trm); | |
| 84 } | |
| 85 else if (text->tail->font != font || | |
| 86 text->tail->wmode != (unsigned int)wmode || | |
| 87 text->tail->bidi_level != (unsigned int)bidi_level || | |
| 88 text->tail->markup_dir != (unsigned int)markup_dir || | |
| 89 text->tail->language != (unsigned int)language || | |
| 90 text->tail->trm.a != trm.a || | |
| 91 text->tail->trm.b != trm.b || | |
| 92 text->tail->trm.c != trm.c || | |
| 93 text->tail->trm.d != trm.d) | |
| 94 { | |
| 95 text->tail = text->tail->next = fz_new_text_span(ctx, font, wmode, bidi_level, markup_dir, language, trm); | |
| 96 } | |
| 97 return text->tail; | |
| 98 } | |
| 99 | |
| 100 static void | |
| 101 fz_grow_text_span(fz_context *ctx, fz_text_span *span, int n) | |
| 102 { | |
| 103 int new_cap = span->cap; | |
| 104 if (span->len + n < new_cap) | |
| 105 return; | |
| 106 while (span->len + n > new_cap) | |
| 107 new_cap = new_cap + 36; | |
| 108 span->items = fz_realloc_array(ctx, span->items, new_cap, fz_text_item); | |
| 109 span->cap = new_cap; | |
| 110 } | |
| 111 | |
| 112 void | |
| 113 fz_show_glyph_aux(fz_context *ctx, fz_text *text, fz_font *font, fz_matrix trm, float adv, int gid, int ucs, int cid, int wmode, int bidi_level, fz_bidi_direction markup_dir, fz_text_language lang) | |
| 114 { | |
| 115 fz_text_span *span; | |
| 116 | |
| 117 if (text->refs != 1) | |
| 118 fz_throw(ctx, FZ_ERROR_ARGUMENT, "cannot modify shared text objects"); | |
| 119 | |
| 120 span = fz_add_text_span(ctx, text, font, wmode, bidi_level, markup_dir, lang, trm); | |
| 121 | |
| 122 fz_grow_text_span(ctx, span, 1); | |
| 123 | |
| 124 span->items[span->len].ucs = ucs; | |
| 125 span->items[span->len].gid = gid; | |
| 126 span->items[span->len].cid = cid; | |
| 127 span->items[span->len].x = trm.e; | |
| 128 span->items[span->len].y = trm.f; | |
| 129 span->items[span->len].adv = adv; | |
| 130 span->len++; | |
| 131 } | |
| 132 | |
| 133 void | |
| 134 fz_show_glyph(fz_context *ctx, fz_text *text, fz_font *font, fz_matrix trm, int gid, int ucs, int wmode, int bidi_level, fz_bidi_direction markup_dir, fz_text_language lang) | |
| 135 { | |
| 136 float adv = (gid >= 0) ? fz_advance_glyph(ctx, font, gid, wmode) : 0; | |
| 137 fz_show_glyph_aux(ctx, text, font, trm, adv, gid, ucs, ucs, wmode, bidi_level, markup_dir, lang); | |
| 138 } | |
| 139 | |
| 140 fz_matrix | |
| 141 fz_show_string(fz_context *ctx, fz_text *text, fz_font *user_font, fz_matrix trm, const char *s, | |
| 142 int wmode, int bidi_level, fz_bidi_direction markup_dir, fz_text_language language) | |
| 143 { | |
| 144 fz_font *font; | |
| 145 int gid, ucs; | |
| 146 float adv; | |
| 147 | |
| 148 while (*s) | |
| 149 { | |
| 150 s += fz_chartorune(&ucs, s); | |
| 151 gid = fz_encode_character_with_fallback(ctx, user_font, ucs, 0, language, &font); | |
| 152 if (gid >= 0) | |
| 153 adv = fz_advance_glyph(ctx, font, gid, wmode); | |
| 154 else | |
| 155 adv = 0; | |
| 156 fz_show_glyph_aux(ctx, text, font, trm, adv, gid, ucs, ucs, wmode, bidi_level, markup_dir, language); | |
| 157 if (wmode == 0) | |
| 158 trm = fz_pre_translate(trm, adv, 0); | |
| 159 else | |
| 160 trm = fz_pre_translate(trm, 0, -adv); | |
| 161 } | |
| 162 | |
| 163 return trm; | |
| 164 } | |
| 165 | |
| 166 fz_matrix | |
| 167 fz_measure_string(fz_context *ctx, fz_font *user_font, fz_matrix trm, const char *s, | |
| 168 int wmode, int bidi_level, fz_bidi_direction markup_dir, fz_text_language language) | |
| 169 { | |
| 170 fz_font *font; | |
| 171 int gid, ucs; | |
| 172 float adv; | |
| 173 | |
| 174 while (*s) | |
| 175 { | |
| 176 s += fz_chartorune(&ucs, s); | |
| 177 gid = fz_encode_character_with_fallback(ctx, user_font, ucs, 0, language, &font); | |
| 178 adv = fz_advance_glyph(ctx, font, gid, wmode); | |
| 179 if (wmode == 0) | |
| 180 trm = fz_pre_translate(trm, adv, 0); | |
| 181 else | |
| 182 trm = fz_pre_translate(trm, 0, -adv); | |
| 183 } | |
| 184 | |
| 185 return trm; | |
| 186 } | |
| 187 | |
| 188 fz_rect | |
| 189 fz_bound_text(fz_context *ctx, const fz_text *text, const fz_stroke_state *stroke, fz_matrix ctm) | |
| 190 { | |
| 191 fz_text_span *span; | |
| 192 fz_matrix tm, trm; | |
| 193 fz_rect gbox; | |
| 194 fz_rect bbox; | |
| 195 int i; | |
| 196 | |
| 197 bbox = fz_empty_rect; | |
| 198 | |
| 199 for (span = text->head; span; span = span->next) | |
| 200 { | |
| 201 if (span->len > 0) | |
| 202 { | |
| 203 tm = span->trm; | |
| 204 for (i = 0; i < span->len; i++) | |
| 205 { | |
| 206 if (span->items[i].gid >= 0) | |
| 207 { | |
| 208 tm.e = span->items[i].x; | |
| 209 tm.f = span->items[i].y; | |
| 210 trm = fz_concat(tm, ctm); | |
| 211 gbox = fz_bound_glyph(ctx, span->font, span->items[i].gid, trm); | |
| 212 bbox = fz_union_rect(bbox, gbox); | |
| 213 } | |
| 214 } | |
| 215 } | |
| 216 } | |
| 217 | |
| 218 if (!fz_is_empty_rect(bbox)) | |
| 219 { | |
| 220 if (stroke) | |
| 221 bbox = fz_adjust_rect_for_stroke(ctx, bbox, stroke, ctm); | |
| 222 | |
| 223 /* Compensate for the glyph cache limited positioning precision */ | |
| 224 bbox.x0 -= 1; | |
| 225 bbox.y0 -= 1; | |
| 226 bbox.x1 += 1; | |
| 227 bbox.y1 += 1; | |
| 228 } | |
| 229 | |
| 230 return bbox; | |
| 231 } | |
| 232 | |
| 233 fz_text_language fz_text_language_from_string(const char *str) | |
| 234 { | |
| 235 fz_text_language lang; | |
| 236 | |
| 237 if (str == NULL || strlen(str) == 0) | |
| 238 return FZ_LANG_UNSET; | |
| 239 | |
| 240 if (!strcmp(str, "zh-Hant") || | |
| 241 !strcmp(str, "zh-HK") || | |
| 242 !strcmp(str, "zh-MO") || | |
| 243 !strcmp(str, "zh-SG") || | |
| 244 !strcmp(str, "zh-TW")) | |
| 245 return FZ_LANG_zh_Hant; | |
| 246 if (!strcmp(str, "zh-Hans") || | |
| 247 !strcmp(str, "zh-CN")) | |
| 248 return FZ_LANG_zh_Hans; | |
| 249 | |
| 250 /* 1st char */ | |
| 251 if (str[0] >= 'a' && str[0] <= 'z') | |
| 252 lang = str[0] - 'a' + 1; | |
| 253 else if (str[0] >= 'A' && str[0] <= 'Z') | |
| 254 lang = str[0] - 'A' + 1; | |
| 255 else | |
| 256 return 0; | |
| 257 | |
| 258 /* 2nd char */ | |
| 259 if (str[1] >= 'a' && str[1] <= 'z') | |
| 260 lang += 27*(str[1] - 'a' + 1); | |
| 261 else if (str[1] >= 'A' && str[1] <= 'Z') | |
| 262 lang += 27*(str[1] - 'A' + 1); | |
| 263 else | |
| 264 return 0; /* There are no valid 1 char language codes */ | |
| 265 | |
| 266 /* 3rd char */ | |
| 267 if (str[2] >= 'a' && str[2] <= 'z') | |
| 268 lang += 27*27*(str[2] - 'a' + 1); | |
| 269 else if (str[2] >= 'A' && str[2] <= 'Z') | |
| 270 lang += 27*27*(str[2] - 'A' + 1); | |
| 271 | |
| 272 /* We don't support iso 639-6 4 char codes, cos the standard | |
| 273 * has been withdrawn, and no one uses them. */ | |
| 274 return lang; | |
| 275 } | |
| 276 | |
| 277 char *fz_string_from_text_language(char str[8], fz_text_language lang) | |
| 278 { | |
| 279 int c; | |
| 280 | |
| 281 /* str is supposed to be at least 8 chars in size */ | |
| 282 if (str == NULL) | |
| 283 return NULL; | |
| 284 if (lang == FZ_LANG_UNSET) | |
| 285 return NULL; | |
| 286 | |
| 287 if (lang == FZ_LANG_zh_Hant) | |
| 288 fz_strlcpy(str, "zh-Hant", 8); | |
| 289 else if (lang == FZ_LANG_zh_Hans) | |
| 290 fz_strlcpy(str, "zh-Hans", 8); | |
| 291 else | |
| 292 { | |
| 293 c = lang % 27; | |
| 294 lang = lang / 27; | |
| 295 str[0] = c == 0 ? 0 : c - 1 + 'a'; | |
| 296 c = lang % 27; | |
| 297 lang = lang / 27; | |
| 298 str[1] = c == 0 ? 0 : c - 1 + 'a'; | |
| 299 c = lang % 27; | |
| 300 str[2] = c == 0 ? 0 : c - 1 + 'a'; | |
| 301 str[3] = 0; | |
| 302 } | |
| 303 | |
| 304 return str; | |
| 305 } |
