diff mupdf-source/source/xps/xps-glyphs.c @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mupdf-source/source/xps/xps-glyphs.c	Mon Sep 15 11:43:07 2025 +0200
@@ -0,0 +1,695 @@
+// Copyright (C) 2004-2021 Artifex Software, Inc.
+//
+// This file is part of MuPDF.
+//
+// MuPDF is free software: you can redistribute it and/or modify it under the
+// terms of the GNU Affero General Public License as published by the Free
+// Software Foundation, either version 3 of the License, or (at your option)
+// any later version.
+//
+// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
+// details.
+//
+// You should have received a copy of the GNU Affero General Public License
+// along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
+//
+// Alternative licensing terms are available from the licensor.
+// For commercial licensing, see <https://www.artifex.com/> or contact
+// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
+// CA 94129, USA, for further information.
+
+#include "mupdf/fitz.h"
+#include "xps-imp.h"
+
+#include <ft2build.h>
+#include FT_FREETYPE_H
+#include FT_ADVANCES_H
+
+static inline int ishex(int a)
+{
+	return (a >= 'A' && a <= 'F') ||
+		(a >= 'a' && a <= 'f') ||
+		(a >= '0' && a <= '9');
+}
+
+static inline int unhex(int a)
+{
+	if (a >= 'A' && a <= 'F') return a - 'A' + 0xA;
+	if (a >= 'a' && a <= 'f') return a - 'a' + 0xA;
+	if (a >= '0' && a <= '9') return a - '0';
+	return 0;
+}
+
+int
+xps_count_font_encodings(fz_context *ctx, fz_font *font)
+{
+	FT_Face face = fz_font_ft_face(ctx, font);
+	return face->num_charmaps;
+}
+
+void
+xps_identify_font_encoding(fz_context *ctx, fz_font *font, int idx, int *pid, int *eid)
+{
+	FT_Face face = fz_font_ft_face(ctx, font);
+	*pid = face->charmaps[idx]->platform_id;
+	*eid = face->charmaps[idx]->encoding_id;
+}
+
+void
+xps_select_font_encoding(fz_context *ctx, fz_font *font, int idx)
+{
+	FT_Face face = fz_font_ft_face(ctx, font);
+	fz_ft_lock(ctx);
+	FT_Set_Charmap(face, face->charmaps[idx]);
+	fz_ft_unlock(ctx);
+}
+
+int
+xps_encode_font_char(fz_context *ctx, fz_font *font, int code)
+{
+	FT_Face face = fz_font_ft_face(ctx, font);
+	int gid;
+	fz_ft_lock(ctx);
+	gid = FT_Get_Char_Index(face, code);
+	if (gid == 0 && face->charmap && face->charmap->platform_id == 3 && face->charmap->encoding_id == 0)
+		gid = FT_Get_Char_Index(face, 0xF000 | code);
+	fz_ft_unlock(ctx);
+	return gid;
+}
+
+void
+xps_measure_font_glyph(fz_context *ctx, xps_document *doc, fz_font *font, int gid, xps_glyph_metrics *mtx)
+{
+	int mask = FT_LOAD_NO_SCALE | FT_LOAD_IGNORE_TRANSFORM;
+	FT_Face face = fz_font_ft_face(ctx, font);
+	FT_Fixed hadv = 0, vadv = 0;
+
+	fz_ft_lock(ctx);
+	FT_Get_Advance(face, gid, mask, &hadv);
+	FT_Get_Advance(face, gid, mask | FT_LOAD_VERTICAL_LAYOUT, &vadv);
+	fz_ft_unlock(ctx);
+
+	mtx->hadv = (float) hadv / face->units_per_EM;
+	mtx->vadv = (float) vadv / face->units_per_EM;
+	mtx->vorg = (float) face->ascender / face->units_per_EM;
+}
+
+static fz_font *
+xps_lookup_font_imp(fz_context *ctx, xps_document *doc, char *name)
+{
+	xps_font_cache *cache;
+	for (cache = doc->font_table; cache; cache = cache->next)
+		if (!xps_strcasecmp(cache->name, name))
+			return fz_keep_font(ctx, cache->font);
+	return NULL;
+}
+
+static void
+xps_insert_font(fz_context *ctx, xps_document *doc, char *name, fz_font *font)
+{
+	xps_font_cache *cache = fz_malloc_struct(ctx, xps_font_cache);
+	cache->font = NULL;
+	cache->name = NULL;
+
+	fz_try(ctx)
+	{
+		cache->font = fz_keep_font(ctx, font);
+		cache->name = fz_strdup(ctx, name);
+		cache->next = doc->font_table;
+	}
+	fz_catch(ctx)
+	{
+		fz_drop_font(ctx, cache->font);
+		fz_free(ctx, cache->name);
+		fz_free(ctx, cache);
+		fz_rethrow(ctx);
+	}
+
+	doc->font_table = cache;
+}
+
+/*
+ * Some fonts in XPS are obfuscated by XOR:ing the first 32 bytes of the
+ * data with the GUID in the fontname.
+ */
+static void
+xps_deobfuscate_font_resource(fz_context *ctx, xps_document *doc, xps_part *part)
+{
+	unsigned char buf[33];
+	unsigned char key[16];
+	unsigned char *data;
+	size_t size;
+	char *p;
+	int i;
+
+	size = fz_buffer_storage(ctx, part->data, &data);
+	if (size < 32)
+	{
+		fz_warn(ctx, "insufficient data for font deobfuscation");
+		return;
+	}
+
+	p = strrchr(part->name, '/');
+	if (!p)
+		p = part->name;
+
+	for (i = 0; i < 32 && *p; p++)
+	{
+		if (ishex(*p))
+			buf[i++] = *p;
+	}
+	buf[i] = 0;
+
+	if (i != 32)
+	{
+		fz_warn(ctx, "cannot extract GUID from obfuscated font part name");
+		return;
+	}
+
+	for (i = 0; i < 16; i++)
+		key[i] = unhex(buf[i*2+0]) * 16 + unhex(buf[i*2+1]);
+
+	for (i = 0; i < 16; i++)
+	{
+		data[i] ^= key[15-i];
+		data[i+16] ^= key[15-i];
+	}
+}
+
+static void
+xps_select_best_font_encoding(fz_context *ctx, xps_document *doc, fz_font *font)
+{
+	static struct { int pid, eid; } xps_cmap_list[] =
+	{
+		{ 3, 10 },		/* Unicode with surrogates */
+		{ 3, 1 },		/* Unicode without surrogates */
+		{ 3, 5 },		/* Wansung */
+		{ 3, 4 },		/* Big5 */
+		{ 3, 3 },		/* Prc */
+		{ 3, 2 },		/* ShiftJis */
+		{ 3, 0 },		/* Symbol */
+		{ 1, 0 },
+		{ -1, -1 },
+	};
+
+	int i, k, n, pid, eid;
+
+	n = xps_count_font_encodings(ctx, font);
+	for (k = 0; xps_cmap_list[k].pid != -1; k++)
+	{
+		for (i = 0; i < n; i++)
+		{
+			xps_identify_font_encoding(ctx, font, i, &pid, &eid);
+			if (pid == xps_cmap_list[k].pid && eid == xps_cmap_list[k].eid)
+			{
+				xps_select_font_encoding(ctx, font, i);
+				return;
+			}
+		}
+	}
+
+	fz_warn(ctx, "cannot find a suitable cmap");
+}
+
+fz_font *
+xps_lookup_font(fz_context *ctx, xps_document *doc, char *base_uri, char *font_uri, char *style_att)
+{
+	char partname[1024];
+	char fakename[1024];
+	char *subfont;
+	int subfontid = 0;
+	xps_part *part;
+	fz_font *font;
+
+	xps_resolve_url(ctx, doc, partname, base_uri, font_uri, sizeof partname);
+	subfont = strrchr(partname, '#');
+	if (subfont)
+	{
+		subfontid = atoi(subfont + 1);
+		*subfont = 0;
+	}
+
+	/* Make a new part name for font with style simulation applied */
+	fz_strlcpy(fakename, partname, sizeof fakename);
+	if (style_att)
+	{
+		if (!strcmp(style_att, "BoldSimulation"))
+			fz_strlcat(fakename, "#Bold", sizeof fakename);
+		else if (!strcmp(style_att, "ItalicSimulation"))
+			fz_strlcat(fakename, "#Italic", sizeof fakename);
+		else if (!strcmp(style_att, "BoldItalicSimulation"))
+			fz_strlcat(fakename, "#BoldItalic", sizeof fakename);
+	}
+
+	font = xps_lookup_font_imp(ctx, doc, fakename);
+	if (!font)
+	{
+		fz_buffer *buf = NULL;
+		fz_var(buf);
+
+		fz_try(ctx)
+		{
+			part = xps_read_part(ctx, doc, partname);
+		}
+		fz_catch(ctx)
+		{
+			if (fz_caught(ctx) == FZ_ERROR_TRYLATER)
+			{
+				if (doc->cookie)
+				{
+					doc->cookie->incomplete = 1;
+					fz_ignore_error(ctx);
+				}
+				else
+					fz_rethrow(ctx);
+			}
+			else
+			{
+				fz_rethrow_if(ctx, FZ_ERROR_SYSTEM);
+				fz_report_error(ctx);
+				fz_warn(ctx, "cannot find font resource part '%s'", partname);
+			}
+			return NULL;
+		}
+
+		/* deobfuscate if necessary */
+		if (strstr(part->name, ".odttf"))
+			xps_deobfuscate_font_resource(ctx, doc, part);
+		if (strstr(part->name, ".ODTTF"))
+			xps_deobfuscate_font_resource(ctx, doc, part);
+
+		fz_var(font);
+		fz_try(ctx)
+		{
+			font = fz_new_font_from_buffer(ctx, NULL, part->data, subfontid, 1);
+			xps_select_best_font_encoding(ctx, doc, font);
+			xps_insert_font(ctx, doc, fakename, font);
+		}
+		fz_always(ctx)
+		{
+			xps_drop_part(ctx, doc, part);
+		}
+		fz_catch(ctx)
+		{
+			fz_drop_font(ctx, font);
+			fz_warn(ctx, "cannot load font resource '%s'", partname);
+			return NULL;
+		}
+
+		if (style_att)
+		{
+			fz_font_flags_t *flags = fz_font_flags(font);
+			int bold = !!strstr(style_att, "Bold");
+			int italic = !!strstr(style_att, "Italic");
+			flags->fake_bold = bold;
+			flags->is_bold = bold;
+			flags->fake_italic = italic;
+			flags->is_italic = italic;
+		}
+	}
+	return font;
+}
+
+/*
+ * Parse and draw an XPS <Glyphs> element.
+ *
+ * Indices syntax:
+
+ GlyphIndices	= GlyphMapping ( ";" GlyphMapping )
+ GlyphMapping	= ( [ClusterMapping] GlyphIndex ) [GlyphMetrics]
+ ClusterMapping = "(" ClusterCodeUnitCount [":" ClusterGlyphCount] ")"
+ ClusterCodeUnitCount	= * DIGIT
+ ClusterGlyphCount		= * DIGIT
+ GlyphIndex		= * DIGIT
+ GlyphMetrics	= "," AdvanceWidth ["," uOffset ["," vOffset]]
+ AdvanceWidth	= ["+"] RealNum
+ uOffset		= ["+" | "-"] RealNum
+ vOffset		= ["+" | "-"] RealNum
+ RealNum		= ((DIGIT ["." DIGIT]) | ("." DIGIT)) [Exponent]
+ Exponent		= ( ("E"|"e") ("+"|"-") DIGIT )
+
+ */
+
+static char *
+xps_parse_digits(char *s, int *digit)
+{
+	*digit = 0;
+	while (*s >= '0' && *s <= '9')
+	{
+		*digit = *digit * 10 + (*s - '0');
+		s ++;
+	}
+	return s;
+}
+
+static char *
+xps_parse_real_num(char *s, float *number, int *override)
+{
+	char *tail;
+	float v;
+	v = fz_strtof(s, &tail);
+	*override = tail != s;
+	if (*override)
+		*number = v;
+	return tail;
+}
+
+static char *
+xps_parse_cluster_mapping(char *s, int *code_count, int *glyph_count)
+{
+	if (*s == '(')
+		s = xps_parse_digits(s + 1, code_count);
+	if (*s == ':')
+		s = xps_parse_digits(s + 1, glyph_count);
+	if (*s == ')')
+		s ++;
+	return s;
+}
+
+static char *
+xps_parse_glyph_index(char *s, int *glyph_index)
+{
+	if (*s >= '0' && *s <= '9')
+		s = xps_parse_digits(s, glyph_index);
+	return s;
+}
+
+static char *
+xps_parse_glyph_metrics(char *s, float *advance, float *uofs, float *vofs, int bidi_level)
+{
+	int override;
+	if (*s == ',')
+	{
+		s = xps_parse_real_num(s + 1, advance, &override);
+		if (override && (bidi_level & 1))
+			*advance = -*advance;
+	}
+	if (*s == ',')
+		s = xps_parse_real_num(s + 1, uofs, &override);
+	if (*s == ',')
+		s = xps_parse_real_num(s + 1, vofs, &override);
+	return s;
+}
+
+fz_text *
+xps_parse_glyphs_imp(fz_context *ctx, xps_document *doc, fz_matrix ctm,
+	fz_font *font, float size, float originx, float originy,
+	int is_sideways, int bidi_level,
+	char *indices, char *unicode)
+{
+	xps_glyph_metrics mtx;
+	fz_text *text;
+	fz_matrix tm;
+	float x = originx;
+	float y = originy;
+	char *us = unicode;
+	char *is = indices;
+	size_t un = 0;
+
+	if (!unicode && !indices)
+		fz_warn(ctx, "glyphs element with neither characters nor indices");
+
+	if (us)
+	{
+		if (us[0] == '{' && us[1] == '}')
+			us = us + 2;
+		un = strlen(us);
+	}
+
+	if (is_sideways)
+		tm = fz_pre_scale(fz_rotate(90), -size, size);
+	else
+		tm = fz_scale(size, -size);
+
+	text = fz_new_text(ctx);
+
+	fz_try(ctx)
+	{
+		while ((us && un > 0) || (is && *is))
+		{
+			int char_code = FZ_REPLACEMENT_CHARACTER;
+			int code_count = 1;
+			int glyph_count = 1;
+
+			if (is && *is)
+			{
+				is = xps_parse_cluster_mapping(is, &code_count, &glyph_count);
+			}
+
+			if (code_count < 1)
+				code_count = 1;
+			if (glyph_count < 1)
+				glyph_count = 1;
+
+			/* TODO: add code chars with cluster mappings for text extraction */
+
+			while (code_count--)
+			{
+				if (us && un > 0)
+				{
+					int t = fz_chartorune(&char_code, us);
+					us += t; un -= t;
+				}
+			}
+
+			while (glyph_count--)
+			{
+				int glyph_index = -1;
+				float u_offset = 0;
+				float v_offset = 0;
+				float advance;
+				int dir;
+
+				if (is && *is)
+					is = xps_parse_glyph_index(is, &glyph_index);
+
+				if (glyph_index == -1)
+					glyph_index = xps_encode_font_char(ctx, font, char_code);
+
+				xps_measure_font_glyph(ctx, doc, font, glyph_index, &mtx);
+				if (is_sideways)
+					advance = mtx.vadv * 100;
+				else if (bidi_level & 1)
+					advance = -mtx.hadv * 100;
+				else
+					advance = mtx.hadv * 100;
+
+				if (fz_font_flags(font)->fake_bold)
+					advance *= 1.02f;
+
+				if (is && *is)
+				{
+					is = xps_parse_glyph_metrics(is, &advance, &u_offset, &v_offset, bidi_level);
+					if (*is == ';')
+						is ++;
+				}
+
+				if (bidi_level & 1)
+					u_offset = -mtx.hadv * 100 - u_offset;
+
+				u_offset = u_offset * 0.01f * size;
+				v_offset = v_offset * 0.01f * size;
+
+				if (is_sideways)
+				{
+					tm.e = x + u_offset + (mtx.vorg * size);
+					tm.f = y - v_offset + (mtx.hadv * 0.5f * size);
+				}
+				else
+				{
+					tm.e = x + u_offset;
+					tm.f = y - v_offset;
+				}
+
+				dir = bidi_level & 1 ? FZ_BIDI_RTL : FZ_BIDI_LTR;
+				fz_show_glyph(ctx, text, font, tm, glyph_index, char_code, is_sideways, bidi_level, dir, FZ_LANG_UNSET);
+
+				x += advance * 0.01f * size;
+			}
+		}
+	}
+	fz_catch(ctx)
+	{
+		fz_drop_text(ctx, text);
+		fz_rethrow(ctx);
+	}
+
+	return text;
+}
+
+void
+xps_parse_glyphs(fz_context *ctx, xps_document *doc, fz_matrix ctm,
+		char *base_uri, xps_resource *dict, fz_xml *root)
+{
+	fz_device *dev = doc->dev;
+
+	fz_xml *node;
+
+	char *fill_uri;
+	char *opacity_mask_uri;
+
+	char *bidi_level_att;
+	char *fill_att;
+	char *font_size_att;
+	char *font_uri_att;
+	char *origin_x_att;
+	char *origin_y_att;
+	char *is_sideways_att;
+	char *indices_att;
+	char *unicode_att;
+	char *style_att;
+	char *transform_att;
+	char *clip_att;
+	char *opacity_att;
+	char *opacity_mask_att;
+
+	fz_xml *transform_tag = NULL;
+	fz_xml *clip_tag = NULL;
+	fz_xml *fill_tag = NULL;
+	fz_xml *opacity_mask_tag = NULL;
+
+	char *fill_opacity_att = NULL;
+
+	fz_font *font;
+
+	float font_size = 10;
+	int is_sideways = 0;
+	int bidi_level = 0;
+
+	fz_text *text = NULL;
+	fz_rect area;
+
+	/*
+	 * Extract attributes and extended attributes.
+	 */
+
+	bidi_level_att = fz_xml_att(root, "BidiLevel");
+	fill_att = fz_xml_att(root, "Fill");
+	font_size_att = fz_xml_att(root, "FontRenderingEmSize");
+	font_uri_att = fz_xml_att(root, "FontUri");
+	origin_x_att = fz_xml_att(root, "OriginX");
+	origin_y_att = fz_xml_att(root, "OriginY");
+	is_sideways_att = fz_xml_att(root, "IsSideways");
+	indices_att = fz_xml_att(root, "Indices");
+	unicode_att = fz_xml_att(root, "UnicodeString");
+	style_att = fz_xml_att(root, "StyleSimulations");
+	transform_att = fz_xml_att(root, "RenderTransform");
+	clip_att = fz_xml_att(root, "Clip");
+	opacity_att = fz_xml_att(root, "Opacity");
+	opacity_mask_att = fz_xml_att(root, "OpacityMask");
+
+	for (node = fz_xml_down(root); node; node = fz_xml_next(node))
+	{
+		if (fz_xml_is_tag(node, "Glyphs.RenderTransform"))
+			transform_tag = fz_xml_down(node);
+		if (fz_xml_is_tag(node, "Glyphs.OpacityMask"))
+			opacity_mask_tag = fz_xml_down(node);
+		if (fz_xml_is_tag(node, "Glyphs.Clip"))
+			clip_tag = fz_xml_down(node);
+		if (fz_xml_is_tag(node, "Glyphs.Fill"))
+			fill_tag = fz_xml_down(node);
+	}
+
+	fill_uri = base_uri;
+	opacity_mask_uri = base_uri;
+
+	xps_resolve_resource_reference(ctx, doc, dict, &transform_att, &transform_tag, NULL);
+	xps_resolve_resource_reference(ctx, doc, dict, &clip_att, &clip_tag, NULL);
+	xps_resolve_resource_reference(ctx, doc, dict, &fill_att, &fill_tag, &fill_uri);
+	xps_resolve_resource_reference(ctx, doc, dict, &opacity_mask_att, &opacity_mask_tag, &opacity_mask_uri);
+
+	/*
+	 * Check that we have all the necessary information.
+	 */
+
+	if (!font_size_att || !font_uri_att || !origin_x_att || !origin_y_att) {
+		fz_warn(ctx, "missing attributes in glyphs element");
+		return;
+	}
+
+	if (!indices_att && !unicode_att)
+		return; /* nothing to draw */
+
+	if (is_sideways_att)
+		is_sideways = !strcmp(is_sideways_att, "true");
+
+	if (bidi_level_att)
+		bidi_level = atoi(bidi_level_att);
+
+	/*
+	 * Find and load the font resource.
+	 */
+
+	font = xps_lookup_font(ctx, doc, base_uri, font_uri_att, style_att);
+	if (!font)
+		font = fz_new_base14_font(ctx, "Times-Roman");
+
+	fz_var(text);
+
+	fz_try(ctx)
+	{
+		/*
+		 * Set up graphics state.
+		 */
+
+		ctm = xps_parse_transform(ctx, doc, transform_att, transform_tag, ctm);
+
+		if (clip_att || clip_tag)
+			xps_clip(ctx, doc, ctm, dict, clip_att, clip_tag);
+
+		font_size = fz_atof(font_size_att);
+
+		text = xps_parse_glyphs_imp(ctx, doc, ctm, font, font_size,
+				fz_atof(origin_x_att), fz_atof(origin_y_att),
+				is_sideways, bidi_level, indices_att, unicode_att);
+
+		area = fz_bound_text(ctx, text, NULL, ctm);
+
+		xps_begin_opacity(ctx, doc, ctm, area, opacity_mask_uri, dict, opacity_att, opacity_mask_tag);
+
+		/* If it's a solid color brush fill/stroke do a simple fill */
+
+		if (fz_xml_is_tag(fill_tag, "SolidColorBrush"))
+		{
+			fill_opacity_att = fz_xml_att(fill_tag, "Opacity");
+			fill_att = fz_xml_att(fill_tag, "Color");
+			fill_tag = NULL;
+		}
+
+		if (fill_att)
+		{
+			float samples[FZ_MAX_COLORS];
+			fz_colorspace *colorspace;
+
+			xps_parse_color(ctx, doc, base_uri, fill_att, &colorspace, samples);
+			if (fill_opacity_att)
+				samples[0] *= fz_atof(fill_opacity_att);
+			xps_set_color(ctx, doc, colorspace, samples);
+
+			fz_fill_text(ctx, dev, text, ctm, doc->colorspace, doc->color, doc->alpha, fz_default_color_params);
+		}
+
+		/* If it's a complex brush, use the charpath as a clip mask */
+
+		if (fill_tag)
+		{
+			fz_clip_text(ctx, dev, text, ctm, area);
+			xps_parse_brush(ctx, doc, ctm, area, fill_uri, dict, fill_tag);
+			fz_pop_clip(ctx, dev);
+		}
+
+		xps_end_opacity(ctx, doc, opacity_mask_uri, dict, opacity_att, opacity_mask_tag);
+
+		if (clip_att || clip_tag)
+			fz_pop_clip(ctx, dev);
+	}
+	fz_always(ctx)
+	{
+		fz_drop_text(ctx, text);
+		fz_drop_font(ctx, font);
+	}
+	fz_catch(ctx)
+		fz_rethrow(ctx);
+}