diff mupdf-source/source/fitz/text.c @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mupdf-source/source/fitz/text.c	Mon Sep 15 11:43:07 2025 +0200
@@ -0,0 +1,305 @@
+// Copyright (C) 2004-2025 Artifex Software, Inc.
+//
+// This file is part of MuPDF.
+//
+// MuPDF is free software: you can redistribute it and/or modify it under the
+// terms of the GNU Affero General Public License as published by the Free
+// Software Foundation, either version 3 of the License, or (at your option)
+// any later version.
+//
+// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
+// details.
+//
+// You should have received a copy of the GNU Affero General Public License
+// along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
+//
+// Alternative licensing terms are available from the licensor.
+// For commercial licensing, see <https://www.artifex.com/> or contact
+// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
+// CA 94129, USA, for further information.
+
+#include "mupdf/fitz.h"
+
+#include <string.h>
+
+fz_text *
+fz_new_text(fz_context *ctx)
+{
+	fz_text *text = fz_malloc_struct(ctx, fz_text);
+	text->refs = 1;
+	return text;
+}
+
+fz_text *
+fz_keep_text(fz_context *ctx, const fz_text *textc)
+{
+	fz_text *text = (fz_text *)textc; /* Explicit cast away of const */
+
+	return fz_keep_imp(ctx, text, &text->refs);
+}
+
+void
+fz_drop_text(fz_context *ctx, const fz_text *textc)
+{
+	fz_text *text = (fz_text *)textc; /* Explicit cast away of const */
+
+	if (fz_drop_imp(ctx, text, &text->refs))
+	{
+		fz_text_span *span = text->head;
+		while (span)
+		{
+			fz_text_span *next = span->next;
+			fz_drop_font(ctx, span->font);
+			fz_free(ctx, span->items);
+			fz_free(ctx, span);
+			span = next;
+		}
+		fz_free(ctx, text);
+	}
+}
+
+static fz_text_span *
+fz_new_text_span(fz_context *ctx, fz_font *font, int wmode, int bidi_level, fz_bidi_direction markup_dir, fz_text_language language, fz_matrix trm)
+{
+	fz_text_span *span = fz_malloc_struct(ctx, fz_text_span);
+	span->font = fz_keep_font(ctx, font);
+	span->wmode = wmode;
+	span->bidi_level = bidi_level;
+	span->markup_dir = markup_dir;
+	span->language = language;
+	span->trm = trm;
+	span->trm.e = 0;
+	span->trm.f = 0;
+	return span;
+}
+
+static fz_text_span *
+fz_add_text_span(fz_context *ctx, fz_text *text, fz_font *font, int wmode, int bidi_level, fz_bidi_direction markup_dir, fz_text_language language, fz_matrix trm)
+{
+	if (!text->tail)
+	{
+		text->head = text->tail = fz_new_text_span(ctx, font, wmode, bidi_level, markup_dir, language, trm);
+	}
+	else if (text->tail->font != font ||
+		text->tail->wmode != (unsigned int)wmode ||
+		text->tail->bidi_level != (unsigned int)bidi_level ||
+		text->tail->markup_dir != (unsigned int)markup_dir ||
+		text->tail->language != (unsigned int)language ||
+		text->tail->trm.a != trm.a ||
+		text->tail->trm.b != trm.b ||
+		text->tail->trm.c != trm.c ||
+		text->tail->trm.d != trm.d)
+	{
+		text->tail = text->tail->next = fz_new_text_span(ctx, font, wmode, bidi_level, markup_dir, language, trm);
+	}
+	return text->tail;
+}
+
+static void
+fz_grow_text_span(fz_context *ctx, fz_text_span *span, int n)
+{
+	int new_cap = span->cap;
+	if (span->len + n < new_cap)
+		return;
+	while (span->len + n > new_cap)
+		new_cap = new_cap + 36;
+	span->items = fz_realloc_array(ctx, span->items, new_cap, fz_text_item);
+	span->cap = new_cap;
+}
+
+void
+fz_show_glyph_aux(fz_context *ctx, fz_text *text, fz_font *font, fz_matrix trm, float adv, int gid, int ucs, int cid, int wmode, int bidi_level, fz_bidi_direction markup_dir, fz_text_language lang)
+{
+	fz_text_span *span;
+
+	if (text->refs != 1)
+		fz_throw(ctx, FZ_ERROR_ARGUMENT, "cannot modify shared text objects");
+
+	span = fz_add_text_span(ctx, text, font, wmode, bidi_level, markup_dir, lang, trm);
+
+	fz_grow_text_span(ctx, span, 1);
+
+	span->items[span->len].ucs = ucs;
+	span->items[span->len].gid = gid;
+	span->items[span->len].cid = cid;
+	span->items[span->len].x = trm.e;
+	span->items[span->len].y = trm.f;
+	span->items[span->len].adv = adv;
+	span->len++;
+}
+
+void
+fz_show_glyph(fz_context *ctx, fz_text *text, fz_font *font, fz_matrix trm, int gid, int ucs, int wmode, int bidi_level, fz_bidi_direction markup_dir, fz_text_language lang)
+{
+	float adv = (gid >= 0) ? fz_advance_glyph(ctx, font, gid, wmode) : 0;
+	fz_show_glyph_aux(ctx, text, font, trm, adv, gid, ucs, ucs, wmode, bidi_level, markup_dir, lang);
+}
+
+fz_matrix
+fz_show_string(fz_context *ctx, fz_text *text, fz_font *user_font, fz_matrix trm, const char *s,
+	int wmode, int bidi_level, fz_bidi_direction markup_dir, fz_text_language language)
+{
+	fz_font *font;
+	int gid, ucs;
+	float adv;
+
+	while (*s)
+	{
+		s += fz_chartorune(&ucs, s);
+		gid = fz_encode_character_with_fallback(ctx, user_font, ucs, 0, language, &font);
+		if (gid >= 0)
+			adv = fz_advance_glyph(ctx, font, gid, wmode);
+		else
+			adv = 0;
+		fz_show_glyph_aux(ctx, text, font, trm, adv, gid, ucs, ucs, wmode, bidi_level, markup_dir, language);
+		if (wmode == 0)
+			trm = fz_pre_translate(trm, adv, 0);
+		else
+			trm = fz_pre_translate(trm, 0, -adv);
+	}
+
+	return trm;
+}
+
+fz_matrix
+fz_measure_string(fz_context *ctx, fz_font *user_font, fz_matrix trm, const char *s,
+	int wmode, int bidi_level, fz_bidi_direction markup_dir, fz_text_language language)
+{
+	fz_font *font;
+	int gid, ucs;
+	float adv;
+
+	while (*s)
+	{
+		s += fz_chartorune(&ucs, s);
+		gid = fz_encode_character_with_fallback(ctx, user_font, ucs, 0, language, &font);
+		adv = fz_advance_glyph(ctx, font, gid, wmode);
+		if (wmode == 0)
+			trm = fz_pre_translate(trm, adv, 0);
+		else
+			trm = fz_pre_translate(trm, 0, -adv);
+	}
+
+	return trm;
+}
+
+fz_rect
+fz_bound_text(fz_context *ctx, const fz_text *text, const fz_stroke_state *stroke, fz_matrix ctm)
+{
+	fz_text_span *span;
+	fz_matrix tm, trm;
+	fz_rect gbox;
+	fz_rect bbox;
+	int i;
+
+	bbox = fz_empty_rect;
+
+	for (span = text->head; span; span = span->next)
+	{
+		if (span->len > 0)
+		{
+			tm = span->trm;
+			for (i = 0; i < span->len; i++)
+			{
+				if (span->items[i].gid >= 0)
+				{
+					tm.e = span->items[i].x;
+					tm.f = span->items[i].y;
+					trm = fz_concat(tm, ctm);
+					gbox = fz_bound_glyph(ctx, span->font, span->items[i].gid, trm);
+					bbox = fz_union_rect(bbox, gbox);
+				}
+			}
+		}
+	}
+
+	if (!fz_is_empty_rect(bbox))
+	{
+		if (stroke)
+			bbox = fz_adjust_rect_for_stroke(ctx, bbox, stroke, ctm);
+
+		/* Compensate for the glyph cache limited positioning precision */
+		bbox.x0 -= 1;
+		bbox.y0 -= 1;
+		bbox.x1 += 1;
+		bbox.y1 += 1;
+	}
+
+	return bbox;
+}
+
+fz_text_language fz_text_language_from_string(const char *str)
+{
+	fz_text_language lang;
+
+	if (str == NULL || strlen(str) == 0)
+		return FZ_LANG_UNSET;
+
+	if (!strcmp(str, "zh-Hant") ||
+			!strcmp(str, "zh-HK") ||
+			!strcmp(str, "zh-MO") ||
+			!strcmp(str, "zh-SG") ||
+			!strcmp(str, "zh-TW"))
+		return FZ_LANG_zh_Hant;
+	if (!strcmp(str, "zh-Hans") ||
+			!strcmp(str, "zh-CN"))
+		return FZ_LANG_zh_Hans;
+
+	/* 1st char */
+	if (str[0] >= 'a' && str[0] <= 'z')
+		lang = str[0] - 'a' + 1;
+	else if (str[0] >= 'A' && str[0] <= 'Z')
+		lang = str[0] - 'A' + 1;
+	else
+		return 0;
+
+	/* 2nd char */
+	if (str[1] >= 'a' && str[1] <= 'z')
+		lang += 27*(str[1] - 'a' + 1);
+	else if (str[1] >= 'A' && str[1] <= 'Z')
+		lang += 27*(str[1] - 'A' + 1);
+	else
+		return 0; /* There are no valid 1 char language codes */
+
+	/* 3rd char */
+	if (str[2] >= 'a' && str[2] <= 'z')
+		lang += 27*27*(str[2] - 'a' + 1);
+	else if (str[2] >= 'A' && str[2] <= 'Z')
+		lang += 27*27*(str[2] - 'A' + 1);
+
+	/* We don't support iso 639-6 4 char codes, cos the standard
+	 * has been withdrawn, and no one uses them. */
+	return lang;
+}
+
+char *fz_string_from_text_language(char str[8], fz_text_language lang)
+{
+	int c;
+
+	/* str is supposed to be at least 8 chars in size */
+	if (str == NULL)
+		return NULL;
+	if (lang == FZ_LANG_UNSET)
+		return NULL;
+
+	if (lang == FZ_LANG_zh_Hant)
+		fz_strlcpy(str, "zh-Hant", 8);
+	else if (lang == FZ_LANG_zh_Hans)
+		fz_strlcpy(str, "zh-Hans", 8);
+	else
+	{
+		c = lang % 27;
+		lang = lang / 27;
+		str[0] = c == 0 ? 0 : c - 1 + 'a';
+		c = lang % 27;
+		lang = lang / 27;
+		str[1] = c == 0 ? 0 : c - 1 + 'a';
+		c = lang % 27;
+		str[2] = c == 0 ? 0 : c - 1 + 'a';
+		str[3] = 0;
+	}
+
+	return str;
+}