Mercurial > hgrepos > Python2 > PyMuPDF
diff mupdf-source/source/fitz/text.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mupdf-source/source/fitz/text.c Mon Sep 15 11:43:07 2025 +0200 @@ -0,0 +1,305 @@ +// Copyright (C) 2004-2025 Artifex Software, Inc. +// +// This file is part of MuPDF. +// +// MuPDF is free software: you can redistribute it and/or modify it under the +// terms of the GNU Affero General Public License as published by the Free +// Software Foundation, either version 3 of the License, or (at your option) +// any later version. +// +// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more +// details. +// +// You should have received a copy of the GNU Affero General Public License +// along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html> +// +// Alternative licensing terms are available from the licensor. +// For commercial licensing, see <https://www.artifex.com/> or contact +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. + +#include "mupdf/fitz.h" + +#include <string.h> + +fz_text * +fz_new_text(fz_context *ctx) +{ + fz_text *text = fz_malloc_struct(ctx, fz_text); + text->refs = 1; + return text; +} + +fz_text * +fz_keep_text(fz_context *ctx, const fz_text *textc) +{ + fz_text *text = (fz_text *)textc; /* Explicit cast away of const */ + + return fz_keep_imp(ctx, text, &text->refs); +} + +void +fz_drop_text(fz_context *ctx, const fz_text *textc) +{ + fz_text *text = (fz_text *)textc; /* Explicit cast away of const */ + + if (fz_drop_imp(ctx, text, &text->refs)) + { + fz_text_span *span = text->head; + while (span) + { + fz_text_span *next = span->next; + fz_drop_font(ctx, span->font); + fz_free(ctx, span->items); + fz_free(ctx, span); + span = next; + } + fz_free(ctx, text); + } +} + +static fz_text_span * +fz_new_text_span(fz_context *ctx, fz_font *font, int wmode, int bidi_level, fz_bidi_direction markup_dir, fz_text_language language, fz_matrix trm) +{ + fz_text_span *span = fz_malloc_struct(ctx, fz_text_span); + span->font = fz_keep_font(ctx, font); + span->wmode = wmode; + span->bidi_level = bidi_level; + span->markup_dir = markup_dir; + span->language = language; + span->trm = trm; + span->trm.e = 0; + span->trm.f = 0; + return span; +} + +static fz_text_span * +fz_add_text_span(fz_context *ctx, fz_text *text, fz_font *font, int wmode, int bidi_level, fz_bidi_direction markup_dir, fz_text_language language, fz_matrix trm) +{ + if (!text->tail) + { + text->head = text->tail = fz_new_text_span(ctx, font, wmode, bidi_level, markup_dir, language, trm); + } + else if (text->tail->font != font || + text->tail->wmode != (unsigned int)wmode || + text->tail->bidi_level != (unsigned int)bidi_level || + text->tail->markup_dir != (unsigned int)markup_dir || + text->tail->language != (unsigned int)language || + text->tail->trm.a != trm.a || + text->tail->trm.b != trm.b || + text->tail->trm.c != trm.c || + text->tail->trm.d != trm.d) + { + text->tail = text->tail->next = fz_new_text_span(ctx, font, wmode, bidi_level, markup_dir, language, trm); + } + return text->tail; +} + +static void +fz_grow_text_span(fz_context *ctx, fz_text_span *span, int n) +{ + int new_cap = span->cap; + if (span->len + n < new_cap) + return; + while (span->len + n > new_cap) + new_cap = new_cap + 36; + span->items = fz_realloc_array(ctx, span->items, new_cap, fz_text_item); + span->cap = new_cap; +} + +void +fz_show_glyph_aux(fz_context *ctx, fz_text *text, fz_font *font, fz_matrix trm, float adv, int gid, int ucs, int cid, int wmode, int bidi_level, fz_bidi_direction markup_dir, fz_text_language lang) +{ + fz_text_span *span; + + if (text->refs != 1) + fz_throw(ctx, FZ_ERROR_ARGUMENT, "cannot modify shared text objects"); + + span = fz_add_text_span(ctx, text, font, wmode, bidi_level, markup_dir, lang, trm); + + fz_grow_text_span(ctx, span, 1); + + span->items[span->len].ucs = ucs; + span->items[span->len].gid = gid; + span->items[span->len].cid = cid; + span->items[span->len].x = trm.e; + span->items[span->len].y = trm.f; + span->items[span->len].adv = adv; + span->len++; +} + +void +fz_show_glyph(fz_context *ctx, fz_text *text, fz_font *font, fz_matrix trm, int gid, int ucs, int wmode, int bidi_level, fz_bidi_direction markup_dir, fz_text_language lang) +{ + float adv = (gid >= 0) ? fz_advance_glyph(ctx, font, gid, wmode) : 0; + fz_show_glyph_aux(ctx, text, font, trm, adv, gid, ucs, ucs, wmode, bidi_level, markup_dir, lang); +} + +fz_matrix +fz_show_string(fz_context *ctx, fz_text *text, fz_font *user_font, fz_matrix trm, const char *s, + int wmode, int bidi_level, fz_bidi_direction markup_dir, fz_text_language language) +{ + fz_font *font; + int gid, ucs; + float adv; + + while (*s) + { + s += fz_chartorune(&ucs, s); + gid = fz_encode_character_with_fallback(ctx, user_font, ucs, 0, language, &font); + if (gid >= 0) + adv = fz_advance_glyph(ctx, font, gid, wmode); + else + adv = 0; + fz_show_glyph_aux(ctx, text, font, trm, adv, gid, ucs, ucs, wmode, bidi_level, markup_dir, language); + if (wmode == 0) + trm = fz_pre_translate(trm, adv, 0); + else + trm = fz_pre_translate(trm, 0, -adv); + } + + return trm; +} + +fz_matrix +fz_measure_string(fz_context *ctx, fz_font *user_font, fz_matrix trm, const char *s, + int wmode, int bidi_level, fz_bidi_direction markup_dir, fz_text_language language) +{ + fz_font *font; + int gid, ucs; + float adv; + + while (*s) + { + s += fz_chartorune(&ucs, s); + gid = fz_encode_character_with_fallback(ctx, user_font, ucs, 0, language, &font); + adv = fz_advance_glyph(ctx, font, gid, wmode); + if (wmode == 0) + trm = fz_pre_translate(trm, adv, 0); + else + trm = fz_pre_translate(trm, 0, -adv); + } + + return trm; +} + +fz_rect +fz_bound_text(fz_context *ctx, const fz_text *text, const fz_stroke_state *stroke, fz_matrix ctm) +{ + fz_text_span *span; + fz_matrix tm, trm; + fz_rect gbox; + fz_rect bbox; + int i; + + bbox = fz_empty_rect; + + for (span = text->head; span; span = span->next) + { + if (span->len > 0) + { + tm = span->trm; + for (i = 0; i < span->len; i++) + { + if (span->items[i].gid >= 0) + { + tm.e = span->items[i].x; + tm.f = span->items[i].y; + trm = fz_concat(tm, ctm); + gbox = fz_bound_glyph(ctx, span->font, span->items[i].gid, trm); + bbox = fz_union_rect(bbox, gbox); + } + } + } + } + + if (!fz_is_empty_rect(bbox)) + { + if (stroke) + bbox = fz_adjust_rect_for_stroke(ctx, bbox, stroke, ctm); + + /* Compensate for the glyph cache limited positioning precision */ + bbox.x0 -= 1; + bbox.y0 -= 1; + bbox.x1 += 1; + bbox.y1 += 1; + } + + return bbox; +} + +fz_text_language fz_text_language_from_string(const char *str) +{ + fz_text_language lang; + + if (str == NULL || strlen(str) == 0) + return FZ_LANG_UNSET; + + if (!strcmp(str, "zh-Hant") || + !strcmp(str, "zh-HK") || + !strcmp(str, "zh-MO") || + !strcmp(str, "zh-SG") || + !strcmp(str, "zh-TW")) + return FZ_LANG_zh_Hant; + if (!strcmp(str, "zh-Hans") || + !strcmp(str, "zh-CN")) + return FZ_LANG_zh_Hans; + + /* 1st char */ + if (str[0] >= 'a' && str[0] <= 'z') + lang = str[0] - 'a' + 1; + else if (str[0] >= 'A' && str[0] <= 'Z') + lang = str[0] - 'A' + 1; + else + return 0; + + /* 2nd char */ + if (str[1] >= 'a' && str[1] <= 'z') + lang += 27*(str[1] - 'a' + 1); + else if (str[1] >= 'A' && str[1] <= 'Z') + lang += 27*(str[1] - 'A' + 1); + else + return 0; /* There are no valid 1 char language codes */ + + /* 3rd char */ + if (str[2] >= 'a' && str[2] <= 'z') + lang += 27*27*(str[2] - 'a' + 1); + else if (str[2] >= 'A' && str[2] <= 'Z') + lang += 27*27*(str[2] - 'A' + 1); + + /* We don't support iso 639-6 4 char codes, cos the standard + * has been withdrawn, and no one uses them. */ + return lang; +} + +char *fz_string_from_text_language(char str[8], fz_text_language lang) +{ + int c; + + /* str is supposed to be at least 8 chars in size */ + if (str == NULL) + return NULL; + if (lang == FZ_LANG_UNSET) + return NULL; + + if (lang == FZ_LANG_zh_Hant) + fz_strlcpy(str, "zh-Hant", 8); + else if (lang == FZ_LANG_zh_Hans) + fz_strlcpy(str, "zh-Hans", 8); + else + { + c = lang % 27; + lang = lang / 27; + str[0] = c == 0 ? 0 : c - 1 + 'a'; + c = lang % 27; + lang = lang / 27; + str[1] = c == 0 ? 0 : c - 1 + 'a'; + c = lang % 27; + str[2] = c == 0 ? 0 : c - 1 + 'a'; + str[3] = 0; + } + + return str; +}
