Python2/PyMuPDF: mupdf-source/source/fitz/subset-ttf.c comparison

comparison mupdf-source/source/fitz/subset-ttf.c @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.

author	Franz Glasner <fzglas.hg@dom66.de>
date	Mon, 15 Sep 2025 11:43:07 +0200
parents
children

comparison

equal deleted inserted replaced

-:1d09e1dec1d9
+:b50eed0cc0ef
+// Copyright (C) 2004-2025 Artifex Software, Inc.
+//
+// This file is part of MuPDF.
+//
+// MuPDF is free software: you can redistribute it and/or modify it under the
+// terms of the GNU Affero General Public License as published by the Free
+// Software Foundation, either version 3 of the License, or (at your option)
+// any later version.
+//
+// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
+// details.
+//
+// You should have received a copy of the GNU Affero General Public License
+// along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
+//
+// Alternative licensing terms are available from the licensor.
+// For commercial licensing, see <https://www.artifex.com/> or contact
+// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
+// CA 94129, USA, for further information.
+#include "mupdf/fitz.h"
+/*
+	For the purposes of this code, and to save my tiny brain from
+	overload, we will adopt the following notation:
+	1) The PDF file contains bytes of data. These bytes are looked
+	up in the MuPDF font handling to resolve to 'glyph ids' (gids).
+	These account for all the different encodings etc in use,
+	including the 'cmap' table within the font.
+	2) We are given the list of gids that are used in the document.
+	We arrange to keep any entries in the cmap or post tables that
+	maps to these gids.
+	We map the gids to the bottom of the range. This means that the
+	cmap and post tables need to be updated.
+	A similar optimisation would be to compress the range of cids
+	used to a prefix of the range used. This would mean that the
+	calling code needs to rewrite the data within the PDF file -
+	both in terms of the strings used with the PDF streams, and in
+	terms of the ToUnicode tables there (and the Widths etc).
+	For now, we'll ignore this optimisation.
+	Possibly, in the case of 'Identity' Tounicode mappings we
+	wouldn't actually want to do this range compression? It'd only
+	make the file larger.
+*/
+typedef struct
+{
+	uint16_t pid;
+	uint16_t psid;
+	uint32_t max;
+	uint16_t gid[256];
+} encoding_t;
+typedef struct
+{
+	uint32_t tag;
+	uint32_t checksum;
+	fz_buffer *tab;
+} tagged_table_t;
+typedef struct
+{
+	int is_otf;
+	int symbolic;
+	encoding_t *encoding;
+	uint16_t orig_num_glyphs;
+	uint16_t new_num_glyphs;
+	uint16_t index_to_loc_format;
+	uint8_t *index_to_loc_formatp;
+	uint16_t orig_num_long_hor_metrics;
+	uint16_t new_num_long_hor_metrics;
+	/* Pointer to the old tables (in the tagged table below) */
+	uint8_t *loca;
+	size_t *loca_len;
+	uint8_t *maxp;
+	/* Maps from old gid to new gid */
+	uint16_t *gid_renum;
+	int max;
+	int len;
+	tagged_table_t *table;
+} ttf_t;
+static uint32_t
+checksum(fz_buffer *buf)
+{
+	size_t i;
+	const uint8_t *d = (const uint8_t *)buf->data;
+	uint32_t cs = 0;
+	for (i = buf->len>>2; i > 0; i--)
+	{
+		cs += d[0]<<24;
+		cs += d[1]<<16;
+		cs += d[2]<<8;
+		cs += d[3];
+		d += 4;
+	}
+	i = buf->len - (buf->len & ~3);
+	switch (i)
+	{
+	case 3:
+		cs += d[2]<<8;
+		/* fallthrough */
+	case 2:
+		cs += d[1]<<16;
+		/* fallthrough */
+	case 1:
+		cs += d[0]<<24;
+	default:
+		break;
+	}
+	return cs;
+}
+static uint32_t
+find_table(fz_context *ctx, fz_stream *stm, uint32_t tag, uint32_t *len)
+{
+	int num_tables;
+	int i;
+	fz_seek(ctx, stm, 4, SEEK_SET);
+	num_tables = fz_read_int16(ctx, stm);
+	fz_seek(ctx, stm, 12, SEEK_SET);
+	for (i = 0; i < num_tables; i++)
+	{
+		uint32_t t = fz_read_uint32(ctx, stm);
+		uint32_t cs = fz_read_uint32(ctx, stm);
+		uint32_t off = fz_read_uint32(ctx, stm);
+		(void) cs; /* UNUSED */
+		*len = fz_read_uint32(ctx, stm);
+		if (t == tag)
+			return off;
+	}
+	return 0;
+}
+static fz_buffer *
+read_table(fz_context *ctx, fz_stream *stm, uint32_t tag, int compulsory)
+{
+	uint32_t size;
+	uint32_t off = find_table(ctx, stm, tag, &size);
+	fz_buffer *buf;
+	if (off == 0)
+	{
+		if (compulsory)
+			fz_throw(ctx, FZ_ERROR_FORMAT, "Required %c%c%c%c table missing", tag>>24, (tag>>16)&0xff, (tag>>8)&0xff, tag & 0xff);
+		return NULL;
+	}
+	fz_seek(ctx, stm, off, SEEK_SET);
+	buf = fz_new_buffer(ctx, size);
+	fz_try(ctx)
+	{
+		fz_read(ctx, stm, buf->data, size);
+		buf->len = size;
+	}
+	fz_catch(ctx)
+	{
+		fz_drop_buffer(ctx, buf);
+		fz_rethrow(ctx);
+	}
+	return buf;
+}
+#define TAG(s) \
+	(	(((uint8_t)s[0])<<24) | \
+		(((uint8_t)s[1])<<16) | \
+		(((uint8_t)s[2])<<8) | \
+		(((uint8_t)s[3])))
+static void
+add_table(fz_context *ctx, ttf_t *ttf, uint32_t tag, fz_buffer *tab)
+{
+	fz_try(ctx)
+	{
+		if (ttf->max == ttf->len)
+		{
+			int n = ttf->max * 2;
+			if (n == 0)
+				n = 16;
+			ttf->table = fz_realloc(ctx, ttf->table, sizeof(*ttf->table) * n);
+			ttf->max = n;
+		}
+		ttf->table[ttf->len].tag = tag;
+		ttf->table[ttf->len].tab = tab;
+		ttf->len++;
+	}
+	fz_catch(ctx)
+	{
+		fz_drop_buffer(ctx, tab);
+		fz_rethrow(ctx);
+	}
+}
+static void
+copy_table(fz_context *ctx, ttf_t *ttf, fz_stream *stm, uint32_t tag, int compulsory)
+{
+	fz_buffer *t;
+	t = read_table(ctx, stm, tag, compulsory);
+	if (t)
+		add_table(ctx, ttf, tag, t);
+}
+static int
+tabcmp(const void *a_, const void *b_)
+{
+	const tagged_table_t *a = (const tagged_table_t *)a_;
+	const tagged_table_t *b = (const tagged_table_t *)b_;
+	return (a->tag - b->tag);
+}
+static void
+sort_tables(fz_context *ctx, ttf_t *ttf)
+{
+	/* Avoid scanbuild/coverity false warning with this unnecessary test */
+	if (ttf->table == NULL || ttf->len == 0)
+		return;
+	qsort(ttf->table, ttf->len, sizeof(tagged_table_t), tabcmp);
+}
+static void
+checksum_tables(fz_context *ctx, ttf_t *ttf)
+{
+	int i;
+	for (i = 0; i < ttf->len; i++)
+		ttf->table[i].checksum = checksum(ttf->table[i].tab);
+}
+static void
+write_tables(fz_context *ctx, ttf_t *ttf, fz_output *out)
+{
+	int i = 0;
+	uint32_t offset;
+	/* scalar type - TTF for now - may need to cope with other types later. */
+	if (ttf->is_otf)
+		fz_write_int32_be(ctx, out, 0x4f54544f);
+	else
+		fz_write_int32_be(ctx, out, 0x00010000);
+	/* number of tables */
+	fz_write_uint16_be(ctx, out, ttf->len);
+	while (1<<(i+1) <= ttf->len)
+		i++;
+	/* searchRange */
+	fz_write_uint16_be(ctx, out, (1<<i)<<4);
+	/* entrySelector */
+	fz_write_uint16_be(ctx, out, i);
+	/* rangeShift*/
+	fz_write_uint16_be(ctx, out, (ttf->len - (1<<i))<<4);
+	/* Table directory */
+	offset = 12 + ttf->len * 16;
+	for (i = 0; i < ttf->len; i++)
+	{
+		fz_write_uint32_be(ctx, out, ttf->table[i].tag);
+		fz_write_uint32_be(ctx, out, ttf->table[i].checksum);
+		fz_write_uint32_be(ctx, out, offset);
+		fz_write_uint32_be(ctx, out, (uint32_t)ttf->table[i].tab->len);
+		offset += (uint32_t)ttf->table[i].tab->len;
+	}
+	/* Now the tables in turn */
+	for (i = 0; i < ttf->len; i++)
+	{
+		fz_write_buffer(ctx, out, ttf->table[i].tab);
+	}
+}
+static void
+fix_checksum(fz_context *ctx, fz_buffer *buf)
+{
+	uint8_t *data;
+	uint32_t sum = 0;
+	size_t len = fz_buffer_storage(ctx, buf, &data);
+	uint32_t namesize;
+	fz_stream *stm = fz_open_buffer(ctx, buf);
+	uint32_t csumpos = find_table(ctx, stm, TAG("head"), &namesize) + 8;
+	(void) len; // UNUSED
+	fz_drop_stream(ctx, stm);
+	/* First off, blat the old checksum */
+	memset(data+csumpos, 0, 4);
+	sum = checksum(buf);
+	sum = 0xb1b0afba-sum;
+	/* Insert it. */
+	data[csumpos] = sum>>24;
+	data[csumpos+1] = sum>>16;
+	data[csumpos+2] = sum>>8;
+	data[csumpos+3] = sum;
+}
+typedef struct
+{
+	uint16_t platform_id;
+	uint16_t platform_specific_id;
+	uint16_t language_id;
+	uint16_t name_id;
+	uint16_t len;
+	uint16_t offset;
+} name_record_t;
+static uint32_t get32(const uint8_t *d)
+{
+	return (d[0]<<24)|(d[1]<<16)|(d[2]<<8)|d[3];
+}
+static uint32_t get16(const uint8_t *d)
+{
+	return (d[0]<<8)|d[1];
+}
+static void put32(uint8_t *d, uint32_t v)
+{
+	d[0] = v>>24;
+	d[1] = v>>16;
+	d[2] = v>>8;
+	d[3] = v;
+}
+static void put16(uint8_t *d, uint32_t v)
+{
+	d[0] = v>>8;
+	d[1] = v;
+}
+typedef struct
+{
+	/* First 2 fields aren't actually needed for the pointer list
+	 * operation, but they serve as bounds for all the offsets used
+	 * within the ptr list. */
+	uint8_t *block;
+	size_t block_len;
+	uint32_t len;
+	uint32_t max;
+	uint8_t **ptr;
+} ptr_list_t;
+static void
+ptr_list_add(fz_context *ctx, ptr_list_t *pl, uint8_t *ptr)
+{
+	if (pl->len == pl->max)
+	{
+		int n = pl->max * 2;
+		if (n == 0)
+			n = 32;
+		pl->ptr = fz_realloc(ctx, pl->ptr, sizeof(*pl->ptr) * n);
+		pl->max = n;
+	}
+	pl->ptr[pl->len++] = ptr;
+}
+typedef int (cmp_t)(const uint8_t **a, const uint8_t **b);
+typedef int (void_cmp_t)(const void *, const void *);
+static void
+ptr_list_sort(fz_context *ctx, ptr_list_t *pl, cmp_t *cmp)
+{
+	/* Avoid scanbuild/coverity false warning with this unnecessary test */
+	if (pl->ptr == NULL || pl->len == 0)
+		return;
+	qsort(pl->ptr, pl->len, sizeof(*pl->ptr), (void_cmp_t *)cmp);
+}
+static void
+drop_ptr_list(fz_context *ctx, ptr_list_t *pl)
+{
+	fz_free(ctx, pl->ptr);
+}
+/* return 1 to keep, 0 to drop. */
+typedef int (filter_t)(const uint8_t *ptr, const uint8_t *blk, size_t len);
+/* This makes a pointer list from a filtered block, moving the underlying data as it filters. */
+static void
+ptr_list_compact(fz_context *ctx, ptr_list_t *pl, filter_t *fil, uint8_t *base, int n, size_t eltsize, uint8_t *block, size_t block_len)
+{
+	int i;
+	uint8_t *s = base;
+	uint8_t *d = base;
+	pl->block = block;
+	pl->block_len = block_len;
+	if (base < block || (size_t)(base - block) > block_len || (size_t)(base - block) + n * eltsize >= block_len)
+		fz_throw(ctx, FZ_ERROR_FORMAT, "Ptr List creation failed");
+	for (i = 0; i < n; i++)
+	{
+		if (fil(s, block, block_len))
+		{
+			ptr_list_add(ctx, pl, d);
+			if (s != d)
+				memmove(d, s, eltsize);
+			d += eltsize;
+		}
+		s += eltsize;
+	}
+}
+static int
+names_by_size(const uint8_t **a, const uint8_t **b)
+{
+	return get16((*b)+8) - get16((*a)+8);
+}
+static int
+filter_name_tables(const uint8_t *ptr, const uint8_t *block, size_t block_len)
+{
+	/* FIXME: For now, we keep everything. */
+	return 1;
+}
+#define UNFOUND ((uint32_t)-1)
+static uint32_t
+find_string_in_block(const uint8_t *str, size_t str_len, const uint8_t *block, size_t block_len)
+{
+	const uint8_t *b = block;
+	if (block_len == 0)
+		return UNFOUND;
+	assert(block_len >= str_len);
+	block_len -= str_len-1;
+	while (block_len--)
+	{
+		if (!memcmp(str, b, str_len))
+			return (uint32_t)(b - block);
+		b++;
+	}
+	return UNFOUND;
+}
+static void
+subset_name_table(fz_context *ctx, ttf_t *ttf, fz_stream *stm)
+{
+	fz_buffer *t = read_table(ctx, stm, TAG("name"), 0);
+	uint8_t *d;
+	uint32_t i, n, off;
+	ptr_list_t pl = { 0 };
+	size_t name_data_size;
+	uint8_t *new_name_data = NULL;
+	size_t new_len;
+	if (t == NULL)
+		return; /* No name table */
+	d = t->data;
+	fz_var(new_name_data);
+	fz_try(ctx)
+	{
+		if (get16(d) != 0 || t->len < 6)
+			fz_throw(ctx, FZ_ERROR_FORMAT, "Unsupported name table format");
+		n = get16(d+2);
+		off = get16(d+4);
+		name_data_size = t->len - 6 - 12*n;
+		if (t->len < 6 + 12*n)
+			fz_throw(ctx, FZ_ERROR_FORMAT, "Truncated name table");
+		ptr_list_compact(ctx, &pl, filter_name_tables, d+6, n, 12, d, t->len);
+		/* Sort our list so that the ones with the largest name data blocks come first. */
+		ptr_list_sort(ctx, &pl, names_by_size);
+		new_name_data = fz_malloc(ctx, name_data_size);
+		new_len = 0;
+		for (i = 0; i < pl.len; i++)
+		{
+			uint32_t name_len, offset, name_off;
+			uint8_t *name;
+			if (t->len < (size_t) (pl.ptr[i] - t->data) + 8 + 2)
+				fz_throw(ctx, FZ_ERROR_FORMAT, "Truncated name length in name table");
+			name_len = get16(pl.ptr[i] + 8);
+			if (t->len < (size_t) (pl.ptr[i] - t->data) + 10 + 2)
+				fz_throw(ctx, FZ_ERROR_FORMAT, "Truncated name offset in name table");
+			name_off = off + get16(pl.ptr[i] + 10);
+			name = d + name_off;
+			if (t->len < name_off + name_len)
+				fz_throw(ctx, FZ_ERROR_FORMAT, "Truncated name in name table");
+			offset = find_string_in_block(name, name_len, new_name_data, new_len);
+			if (offset == UNFOUND)
+			{
+				if (name_data_size < new_len + name_len)
+					fz_throw(ctx, FZ_ERROR_FORMAT, "Bad name table in TTF");
+				memcpy(new_name_data + new_len, name, name_len);
+				offset = (uint32_t)new_len;
+				new_len += name_len;
+			}
+			put16(pl.ptr[i]+10, offset);
+		}
+		memcpy(d + 6 + 12*pl.len, new_name_data, new_len);
+		t->len = 6 + 12*pl.len + new_len;
+		put16(d+4, 6 + 12*pl.len);
+	}
+	fz_always(ctx)
+	{
+		drop_ptr_list(ctx, &pl);
+		fz_free(ctx, new_name_data);
+	}
+	fz_catch(ctx)
+	{
+		fz_drop_buffer(ctx, t);
+		fz_rethrow(ctx);
+	}
+	add_table(ctx, ttf, TAG("name"), t);
+}
+static encoding_t *
+load_enc_tab0(fz_context *ctx, uint8_t *d, size_t data_size, uint32_t offset)
+{
+	encoding_t *enc;
+	int i;
+	if (data_size < 262)
+		fz_throw(ctx, FZ_ERROR_FORMAT, "Truncated cmap 0 format table");
+	enc = fz_malloc_struct(ctx, encoding_t);
+	d += offset + 6;
+	enc->max = 256;
+	for (i = 0; i < 256; i++)
+		enc->gid[i] = d[i];
+	return enc;
+}
+static encoding_t *
+load_enc_tab4(fz_context *ctx, uint8_t *d, size_t data_size, uint32_t offset)
+{
+	encoding_t *enc;
+	uint16_t seg_count;
+	uint32_t i;
+	if (data_size < offset + 26)
+		fz_throw(ctx, FZ_ERROR_FORMAT, "cmap4 too small");
+	seg_count = get16(d+offset+6); /* 2 * seg_count */
+	if (seg_count & 1)
+		fz_throw(ctx, FZ_ERROR_FORMAT, "Malformed cmap4 table");
+	seg_count >>= 1;
+	enc = fz_calloc(ctx, 1, sizeof(encoding_t) + sizeof(uint16_t) * (65536 - 256));
+	enc->max = 65536;
+	fz_try(ctx)
+	{
+		/* Run through the segments, counting how many are used. */
+		for (i = 0; i < seg_count; i++)
+		{
+			uint16_t seg_end, seg_start, delta, target, inner_offset;
+			uint32_t offset_ptr, s;
+			if (data_size < offset + 14 + 6 * seg_count + 2 + 2 * i + 2)
+				fz_throw(ctx, FZ_ERROR_FORMAT, "cmap4 too small");
+			seg_end = get16(d + offset + 14 + 2 * i);
+			seg_start = get16(d + offset + 14 + 2 * seg_count + 2 + 2 * i);
+			delta = get16(d + offset + 14 + 4 * seg_count + 2 + 2 * i);
+			offset_ptr = offset + 14 + 6 * seg_count + 2 + 2 * i;
+			inner_offset = get16(d + offset_ptr);
+			if (seg_start >= enc->max || seg_end >= enc->max || seg_end < seg_start)
+				fz_throw(ctx, FZ_ERROR_FORMAT, "Malformed cmap4 table.");
+			for (s = seg_start; s <= seg_end; s++)
+			{
+				if (inner_offset == 0)
+				{
+					target = delta + s;
+				}
+				else
+				{
+					if (data_size < offset_ptr + inner_offset + 2 * (s - seg_start) + 2)
+						fz_throw(ctx, FZ_ERROR_FORMAT, "cmap4 too small");
+					/* Yes. This is very screwy. The inner_offset is from the offset_ptr in use. */
+					target = get16(d + offset_ptr + inner_offset + 2 * (s - seg_start));
+					if (target != 0)
+						target += delta;
+				}
+				if (target != 0)
+					enc->gid[s] = target;
+			}
+		}
+	}
+	fz_catch(ctx)
+	{
+		fz_free(ctx, enc);
+		fz_rethrow(ctx);
+	}
+	return enc;
+}
+static encoding_t *
+load_enc_tab6(fz_context *ctx, uint8_t *d, size_t data_size, uint32_t offset)
+{
+	encoding_t *enc;
+	uint16_t first_code;
+	uint16_t entry_count;
+	uint16_t length;
+	uint32_t i;
+	if (data_size < 10)
+		fz_throw(ctx, FZ_ERROR_FORMAT, "cmap6 too small");
+	length = get16(d+offset+2);
+	first_code = get16(d+offset+6);
+	entry_count = get16(d+offset+8);
+	if (length < entry_count*2 + 10)
+		fz_throw(ctx, FZ_ERROR_FORMAT, "Malformed cmap6 table");
+	enc = fz_calloc(ctx, 1, sizeof(encoding_t) + sizeof(uint16_t) * (first_code + entry_count - 256));
+	enc->max = first_code + entry_count;
+	/* Run through the segments, counting how many are used. */
+	for (i = 0; i < entry_count; i++)
+	{
+		enc->gid[first_code+i] = get16(d+offset+10+i*2);
+	}
+	return enc;
+}
+static int
+is_encoding_all_zeros(fz_context *ctx, encoding_t *enc)
+{
+	uint32_t i;
+	if (enc != NULL)
+		for (i = 0; i < enc->max; i++)
+			if (enc->gid[i] != 0)
+				return 0;
+	return 1;
+}
+static encoding_t *
+load_enc(fz_context *ctx, fz_buffer *t, int pid, int psid)
+{
+	uint8_t *d = t->data;
+	size_t data_size = t->len;
+	uint32_t i, n;
+	if (data_size < 6 || get16(d) != 0)
+		fz_throw(ctx, FZ_ERROR_FORMAT, "Unsupported cmap table format");
+	n = get16(d+2);
+	if (data_size < 4 + 8*n)
+		fz_throw(ctx, FZ_ERROR_FORMAT, "Truncated cmap table");
+	for (i = 0; i < n; i++)
+	{
+		uint16_t plat_id = get16(d + 4 + i * 8);
+		uint16_t plat_spec_id = get16(d + 4 + i * 8 + 2);
+		uint32_t offset = get32(d + 4 + i * 8 + 4);
+		uint16_t fmt;
+		encoding_t *enc;
+		if (plat_id != pid || plat_spec_id != psid)
+			continue;
+		if (offset < 4 + 8 * n || offset + 2 >= data_size)
+			fz_throw(ctx, FZ_ERROR_FORMAT, "cmap table data out of range");
+		fmt = get16(d+offset);
+		switch(fmt)
+		{
+		case 0:
+			enc = load_enc_tab0(ctx, d, data_size, offset);
+			break;
+		case 4:
+			enc = load_enc_tab4(ctx, d, data_size, offset);
+			break;
+		case 6:
+			enc = load_enc_tab6(ctx, d, data_size, offset);
+			break;
+		default:
+			fz_throw(ctx, FZ_ERROR_FORMAT, "Unsupported cmap table format %d", fmt);
+		}
+		enc->pid = pid;
+		enc->psid = psid;
+		if (is_encoding_all_zeros(ctx, enc))
+		{
+			// ignore any encoding that is all zeros
+			fz_free(ctx, enc);
+			enc = NULL;
+		}
+		return enc;
+	}
+	return NULL;
+}
+static void
+load_encoding(fz_context *ctx, ttf_t *ttf, fz_stream *stm)
+{
+	fz_buffer *t = read_table(ctx, stm, TAG("cmap"), 1);
+	encoding_t *enc = NULL;
+	fz_var(enc);
+	fz_try(ctx)
+	{
+		if (ttf->symbolic)
+		{
+			/* For symbolic fonts, we look for (3,0) as per PDF Spec, then (1,0). */
+			enc = load_enc(ctx, t, 3, 0);
+			if (!enc)
+				enc = load_enc(ctx, t, 1, 0);
+		}
+		else
+		{
+			/* For non symbolic fonts, we look for (3,1) then (1,0), then (0,1), and finally (0,3). */
+			enc = load_enc(ctx, t, 3, 1);
+			if (!enc)
+				enc = load_enc(ctx, t, 1, 0);
+			if (!enc)
+				enc = load_enc(ctx, t, 0, 1);
+			if (!enc)
+				enc = load_enc(ctx, t, 0, 3);
+		}
+		if (!enc)
+			fz_throw(ctx, FZ_ERROR_FORMAT, "No suitable cmap table found");
+	}
+	fz_always(ctx)
+	{
+		fz_drop_buffer(ctx, t);
+	}
+	fz_catch(ctx)
+	{
+		fz_rethrow(ctx);
+	}
+	ttf->encoding = enc;
+}
+static void
+reduce_encoding(fz_context *ctx, ttf_t *ttf, int *gids, int num_gids)
+{
+	int i;
+	encoding_t *enc = ttf->encoding;
+	int n = enc->max;
+	for (i = 0; i < n; i++)
+	{
+		int gid = enc->gid[i];
+		int lo, hi;
+		if (gid == 0)
+			continue;
+		lo = 0;
+		hi = num_gids;
+		while (lo < hi)
+		{
+			int mid = (lo + hi)>>1;
+			int g = gids[mid];
+			if (g < gid)
+				lo = mid+1;
+			else if (g > gid)
+				hi = mid;
+			else
+				goto found; /* Leave this one as is. */
+		}
+		/* Not found */
+		enc->gid[i] = 0;
+	found:
+		{}
+	}
+}
+static void
+make_cmap(fz_context *ctx, ttf_t *ttf)
+{
+	uint32_t i;
+	uint32_t len;
+	uint32_t segs = 0;
+	uint32_t seg, seg_start, seg_end;
+	encoding_t *enc = ttf->encoding;
+	uint32_t n = enc->max;
+	uint32_t entries = 0;
+	fz_buffer *buf;
+	uint8_t *d;
+	uint32_t offset;
+	/* Make a type 4 table. */
+	/* Count the number of segments. */
+	for (i = 0; i < n; i++)
+	{
+		if (enc->gid[i] == 0)
+			continue;
+		seg_start = i;
+		seg_end = i;
+		for (i++; i<n; i++)
+		{
+			if (enc->gid[i] != 0)
+				seg_end = i;
+			else if (i - seg_end > 4)
+				break;
+		}
+		entries += seg_end - seg_start + 1;
+		segs++;
+	}
+	segs++; /* For the terminator */
+	len = 12 + 14 + 2 + segs * 2 * 4 + entries * 2;
+	buf = fz_new_buffer(ctx, len);
+	d = buf->data;
+	/* cmap header */
+	put16(d, 0); /* version */
+	put16(d+2, 1); /* num sub tables */
+	put16(d+4, enc->pid);
+	put16(d+6, enc->psid);
+	put32(d+8, 12); /* offset */
+	d += 12;
+	put16(d, 4); /* Format */
+	put16(d + 2, len-12); /* Length */
+	put16(d + 4, 0); /* FIXME: Language */
+	put16(d + 6, segs * 2);
+	i = 0;
+	while (1U<<(i+1) <= segs)
+		i++;
+	/* So 1<<i <= segs < 1<<(i+1) */
+	put16(d + 8, 1<<(i+1)); /* searchRange */
+	put16(d + 10, i); /* entrySelector */
+	put16(d + 12, 2 * segs - (1<<(i+1))); /* rangeShift */
+	put16(d + 14 + segs * 2, 0); /* reserved */
+	/* Now output the segment data */
+	entries = 14 + segs * 2 * 4 + 2; /* offset of where to put entries.*/
+	seg = 0;
+	for (i = 0; i < n; i++)
+	{
+		if (enc->gid[i] == 0)
+			continue;
+		seg_start = i;
+		seg_end = i;
+		offset = 14 + segs * 2 * 3 + 2 + seg * 2;
+		put16(d + offset - segs * 2, 0); /* Delta - always 0 for now. */
+		put16(d + offset, entries - offset); /* offset */
+		/* Insert an entry */
+		if (!ttf->is_otf && ttf->gid_renum && i < enc->max && enc->gid[i] < ttf->orig_num_glyphs)
+			put16(d + entries, (ttf->is_otf || ttf->gid_renum == NULL) ? enc->gid[i] : ttf->gid_renum[enc->gid[i]]);
+		else
+			put16(d + entries, enc->gid[i]);
+		entries += 2;
+		for (i++; i < n; i++)
+		{
+			if (enc->gid[i] != 0)
+			{
+				/* Include i in the range, which means we need to add entries for
+				 * seg_end to i inclusive. */
+				while (seg_end < i)
+				{
+					seg_end++;
+					if (!ttf->is_otf && ttf->gid_renum && seg_end < enc->max && enc->gid[seg_end] < ttf->orig_num_glyphs)
+						put16(d + entries, ttf->gid_renum[enc->gid[seg_end]]);
+					else
+						put16(d + entries, enc->gid[seg_end]);
+					entries += 2;
+				}
+			}
+			else if (i - seg_end > 4)
+				break;
+		}
+		put16(d + 14 + segs * 2 + seg * 2 + 2, seg_start);
+		put16(d + 14 + seg * 2, seg_end);
+		seg++;
+	}
+	offset = 14 + segs * 2 * 3 + 2 + seg * 2;
+	put16(d + 14 + segs * 2 + seg * 2 + 2, 0xffff);
+	put16(d + 14 + seg * 2, 0xffff);
+	put16(d + offset - segs * 2, 1); /* Delta */
+	put16(d + offset, 0); /* offset */
+	buf->len = entries + 12;
+	assert(buf->len == buf->cap);
+	add_table(ctx, ttf, TAG("cmap"), buf);
+}
+static void
+read_maxp(fz_context *ctx, ttf_t *ttf, fz_stream *stm)
+{
+	fz_buffer *t = read_table(ctx, stm, TAG("maxp"), 1);
+	if (t->len < 6)
+	{
+		fz_drop_buffer(ctx, t);
+		fz_throw(ctx, FZ_ERROR_FORMAT, "truncated maxp table");
+	}
+	ttf->orig_num_glyphs = get16(t->data+4);
+	add_table(ctx, ttf, TAG("maxp"), t);
+	ttf->maxp = t->data;
+}
+static void
+read_head(fz_context *ctx, ttf_t *ttf, fz_stream *stm)
+{
+	uint32_t version;
+	fz_buffer *t = read_table(ctx, stm, TAG("head"), 1);
+	if (t->len < 54)
+	{
+		fz_drop_buffer(ctx, t);
+		fz_throw(ctx, FZ_ERROR_FORMAT, "truncated head table");
+	}
+	version = get32(t->data);
+	if (version != 0x00010000)
+	{
+		fz_drop_buffer(ctx, t);
+		fz_throw(ctx, FZ_ERROR_FORMAT, "Unsupported head table version 0x%08x", version);
+	}
+	ttf->index_to_loc_formatp = t->data+50;
+	ttf->index_to_loc_format = get16(ttf->index_to_loc_formatp);
+	if (ttf->index_to_loc_format & ~1)
+	{
+		fz_drop_buffer(ctx, t);
+		fz_throw(ctx, FZ_ERROR_FORMAT, "Unsupported index_to_loc_format 0x%04x", ttf->index_to_loc_format);
+	}
+	add_table(ctx, ttf, TAG("head"), t);
+}
+static void
+read_loca(fz_context *ctx, ttf_t *ttf, fz_stream *stm)
+{
+	fz_buffer *t;
+	uint32_t len = (2<<ttf->index_to_loc_format) * (ttf->orig_num_glyphs+1);
+	t = read_table(ctx, stm, TAG("loca"), 1);
+	if (t->len < len)
+	{
+		fz_drop_buffer(ctx, t);
+		fz_throw(ctx, FZ_ERROR_FORMAT, "truncated loca table");
+	}
+	ttf->loca = t->data;
+	ttf->loca_len = &t->len;
+	add_table(ctx, ttf, TAG("loca"), t);
+}
+static void
+read_hhea(fz_context *ctx, ttf_t *ttf, fz_stream *stm)
+{
+	uint32_t version;
+	fz_buffer *t = read_table(ctx, stm, TAG("hhea"), 1);
+	uint16_t i;
+	if (t->len < 36)
+	{
+		fz_drop_buffer(ctx, t);
+		fz_throw(ctx, FZ_ERROR_FORMAT, "truncated hhea table");
+	}
+	version = get32(t->data);
+	if (version != 0x00010000)
+	{
+		fz_drop_buffer(ctx, t);
+		fz_throw(ctx, FZ_ERROR_FORMAT, "Unsupported hhea table version 0x%08x", version);
+	}
+	ttf->orig_num_long_hor_metrics = get16(t->data+34);
+	if (ttf->orig_num_long_hor_metrics > ttf->orig_num_glyphs)
+	{
+		fz_drop_buffer(ctx, t);
+		fz_throw(ctx, FZ_ERROR_FORMAT, "Overlong hhea table");
+	}
+	add_table(ctx, ttf, TAG("hhea"), t);
+	/* Previously gids 0 to orig_num_long_hor_metrics-1 were described with
+	 * hor metrics, and the ones afterwards were fixed widths. Find where
+	 * that dividing line is in our new reduced set. */
+	if (ttf->encoding && !ttf->is_otf && ttf->orig_num_long_hor_metrics > 0)
+	{
+		/* i = 0 is always kept long in subset_hmtx(). */
+		ttf->new_num_long_hor_metrics = 1;
+		for (i = ttf->orig_num_long_hor_metrics-1; i > 0; i--)
+			if (ttf->gid_renum[i])
+			{
+				ttf->new_num_long_hor_metrics = ttf->gid_renum[i]+1;
+				break;
+			}
+		put16(t->data+34, ttf->new_num_long_hor_metrics);
+	}
+	else
+	{
+		ttf->new_num_long_hor_metrics = ttf->orig_num_long_hor_metrics;
+	}
+}
+static uint32_t
+get_loca(fz_context *ctx, ttf_t *ttf, uint32_t n)
+{
+	if (ttf->index_to_loc_format == 0)
+	{
+		/* Short index - convert from words to bytes */
+		return get16(ttf->loca + n*2) * 2;
+	}
+	else
+	{
+		/* Long index - in bytes already */
+		return get32(ttf->loca + n*4);
+	}
+}
+static void
+put_loca(fz_context *ctx, ttf_t *ttf, uint32_t n, uint32_t off)
+{
+	if (ttf->index_to_loc_format == 0)
+	{
+		/* Short index - convert from bytes to words */
+		assert((off & 1) == 0);
+		put16(ttf->loca + n*2, off/2);
+	}
+	else
+	{
+		/* Long index - in bytes already */
+		put32(ttf->loca + n*4, off);
+	}
+}
+static void
+glyph_used(fz_context *ctx, ttf_t *ttf, fz_buffer *glyf, uint16_t i)
+{
+	uint32_t offset, len;
+	const uint8_t *data;
+	uint16_t flags;
+	if (i >= ttf->orig_num_glyphs)
+	{
+		fz_warn(ctx, "TTF subsetting; gid >= num_gids!");
+		return;
+	}
+	if (ttf->gid_renum[i] != 0)
+		return;
+	ttf->gid_renum[i] = 1;
+	/* If this glyf is composite, then we need to add any dependencies of it. */
+	offset = get_loca(ctx, ttf, i);
+	len = get_loca(ctx, ttf, i+1) - offset;
+	if (len == 0)
+		return;
+	if (offset+2 > glyf->len)
+		fz_throw(ctx, FZ_ERROR_FORMAT, "Corrupt glyf data");
+	data = glyf->data + offset;
+	if ((int16_t)get16(data) >= 0)
+		return; /* Single glyph - no dependencies */
+	data += 4 * 2 + 2;
+	if (len < 4*2 + 2)
+		fz_throw(ctx, FZ_ERROR_FORMAT, "Corrupt glyf data");
+	len -= 4 * 2 + 2;
+	do
+	{
+		uint16_t idx, skip;
+		if (len < 4)
+			fz_throw(ctx, FZ_ERROR_FORMAT, "Corrupt glyf data");
+		flags = get16(data);
+		idx = get16(data+2);
+		glyph_used(ctx, ttf, glyf, idx);
+#define ARGS_1_AND_2_ARE_WORDS 1
+#define ARGS_ARE_XY_VALUES 2
+#define WE_HAVE_A_SCALE 8
+#define MORE_COMPONENTS 32
+#define WE_HAVE_AN_X_AND_Y_SCALE 64
+#define WE_HAVE_A_TWO_BY_TWO 128
+		/* Skip the X and Y offsets */
+		if (flags & ARGS_1_AND_2_ARE_WORDS)
+			skip = 4 + 4;
+		else
+			skip = 4 + 2;
+		/* Skip the transformation */
+		switch (flags & (WE_HAVE_A_SCALE + WE_HAVE_AN_X_AND_Y_SCALE + WE_HAVE_A_TWO_BY_TWO))
+		{
+		case 0:
+			/* No extra to skip */
+			break;
+		case WE_HAVE_A_SCALE:
+			skip += 2;
+			break;
+		case WE_HAVE_AN_X_AND_Y_SCALE:
+			skip += 4;
+			break;
+		case WE_HAVE_A_TWO_BY_TWO:
+			skip += 8;
+			break;
+		}
+		if (len < skip)
+			fz_throw(ctx, FZ_ERROR_FORMAT, "Corrupt glyf data");
+		data += skip;
+		len -= skip;
+	}
+	while(flags & MORE_COMPONENTS);
+}
+static void
+renumber_composite(fz_context *ctx, ttf_t *ttf, uint8_t *data, uint32_t len)
+{
+	uint16_t flags;
+	uint16_t x;
+	data += 4 * 2 + 2;
+	if (len < 4*2 + 2)
+		fz_throw(ctx, FZ_ERROR_FORMAT, "Corrupt glyf data");
+	len -= 4 * 2 + 2;
+	do
+	{
+		uint16_t skip;
+		if (len < 4)
+			fz_throw(ctx, FZ_ERROR_FORMAT, "Corrupt glyf data");
+		flags = get16(data);
+		x = get16(data+2);
+		if (x >= ttf->orig_num_glyphs)
+			fz_throw(ctx, FZ_ERROR_FORMAT, "Corrupt glyf data");
+		put16(data+2, ttf->gid_renum[x]);
+		/* Skip the X and Y offsets */
+		if (flags & ARGS_1_AND_2_ARE_WORDS)
+			skip = 4 + 4;
+		else
+			skip = 4 + 2;
+		/* Skip the transformation */
+		switch (flags & (WE_HAVE_A_SCALE + WE_HAVE_AN_X_AND_Y_SCALE + WE_HAVE_A_TWO_BY_TWO))
+		{
+		case 0:
+			/* No extra to skip */
+			break;
+		case WE_HAVE_A_SCALE:
+			skip += 2;
+			break;
+		case WE_HAVE_AN_X_AND_Y_SCALE:
+			skip += 4;
+			break;
+		case WE_HAVE_A_TWO_BY_TWO:
+			skip += 8;
+			break;
+		}
+		if (len < skip)
+			fz_throw(ctx, FZ_ERROR_FORMAT, "Corrupt glyf data");
+		data += skip;
+		len -= skip;
+	}
+	while(flags & MORE_COMPONENTS);
+}
+static void
+read_glyf(fz_context *ctx, ttf_t *ttf, fz_stream *stm, int *gids, int num_gids)
+{
+	uint32_t len = get_loca(ctx, ttf, ttf->orig_num_glyphs);
+	fz_buffer *t = read_table(ctx, stm, TAG("glyf"), 1);
+	encoding_t *enc = ttf->encoding;
+	uint32_t last_loca, i, j, k;
+	uint32_t new_start, old_start, old_end, last_loca_ofs;
+	if (t->len < len)
+	{
+		fz_drop_buffer(ctx, t);
+		fz_throw(ctx, FZ_ERROR_FORMAT, "truncated glyf table");
+	}
+	add_table(ctx, ttf, TAG("glyf"), t);
+	/* Now, make the renumber list for the glyphs. */
+	ttf->gid_renum = fz_calloc(ctx, ttf->orig_num_glyphs, sizeof(uint16_t));
+	/* Initially, we'll use it just as a usage list. 0 = unused, 1 used */
+	/* glyph 0 is always used. */
+	glyph_used(ctx, ttf, t, 0);
+	if (enc)
+	{
+		uint32_t n = enc->max;
+		/* If we have an encoding table, run through it, and keep anything needed from there. */
+		for (i = 0; i < n; i++)
+			if (enc->gid[i])
+				glyph_used(ctx, ttf, t, enc->gid[i]);
+		/* Now convert from a usage table to a renumbering table. */
+		if (ttf->orig_num_glyphs > 0)
+		{
+			ttf->gid_renum[0] = 0;
+			j = 1;
+			for (i = 1; i < ttf->orig_num_glyphs; i++)
+				if (ttf->gid_renum[i])
+					ttf->gid_renum[i] = j++;
+			ttf->new_num_glyphs = j;
+		}
+		else
+		{
+			ttf->new_num_glyphs = 0;
+		}
+	}
+	else
+	{
+		/* We're a cid font. The cids are gids. */
+		for (i = 0; i < (uint32_t)num_gids; i++)
+			glyph_used(ctx, ttf, t, gids[i]);
+		ttf->new_num_glyphs = ttf->orig_num_glyphs;
+	}
+	/* Now subset the glyf table. */
+	if (enc)
+	{
+		old_start = get_loca(ctx, ttf, 0);
+		if (old_start > t->len)
+			fz_throw(ctx, FZ_ERROR_FORMAT, "Bad loca value");
+		old_end = get_loca(ctx, ttf, 1);
+		if (old_end > t->len || old_end < old_start)
+			fz_throw(ctx, FZ_ERROR_FORMAT, "Bad loca value");
+		len = old_end - old_start;
+		new_start = 0;
+		put_loca(ctx, ttf, 0, new_start);
+		last_loca = 0;
+		last_loca_ofs = len;
+		for (i = 0; i < ttf->orig_num_glyphs; i++)
+		{
+			old_end = get_loca(ctx, ttf, i + 1);
+			if (old_end > t->len || old_end < old_start)
+				fz_throw(ctx, FZ_ERROR_FORMAT, "Bad loca value");
+			len = old_end - old_start;
+			if (len > 0 && (i == 0 || ttf->gid_renum[i] != 0))
+			{
+				memmove(t->data + new_start, t->data + old_start, len);
+				if ((int16_t)get16(t->data + new_start) < 0)
+					renumber_composite(ctx, ttf, t->data + new_start, len);
+				for (k = last_loca + 1; k <= ttf->gid_renum[i]; k++)
+					put_loca(ctx, ttf, k, last_loca_ofs);
+				new_start += len;
+				last_loca = ttf->gid_renum[i];
+				last_loca_ofs = new_start;
+			}
+			old_start = old_end;
+		}
+		for (k = last_loca + 1; k <= ttf->new_num_glyphs; k++)
+			put_loca(ctx, ttf, k, last_loca_ofs);
+	}
+	else
+	{
+		new_start = 0;
+		old_start = get_loca(ctx, ttf, 0);
+		if (old_start > t->len)
+			fz_throw(ctx, FZ_ERROR_FORMAT, "Bad loca value");
+		for (i = 0; i < ttf->orig_num_glyphs; i++)
+		{
+			old_end = get_loca(ctx, ttf, i + 1);
+			if (old_end > t->len || old_end < old_start)
+				fz_throw(ctx, FZ_ERROR_FORMAT, "Bad loca value");
+			len = old_end - old_start;
+			if (len > 0 && ttf->gid_renum[i] != 0)
+			{
+				memmove(t->data + new_start, t->data + old_start, len);
+				put_loca(ctx, ttf, i, new_start);
+				new_start += len;
+			}
+			else
+			{
+				put_loca(ctx, ttf, i, new_start);
+			}
+			old_start = old_end;
+		}
+		put_loca(ctx, ttf, ttf->orig_num_glyphs, new_start);
+	}
+	*ttf->loca_len = (size_t) (ttf->new_num_glyphs + 1) * (2<<ttf->index_to_loc_format);
+	t->len = new_start;
+}
+static void
+update_num_glyphs(fz_context *ctx, ttf_t *ttf)
+{
+	put16(ttf->maxp + 4, ttf->new_num_glyphs);
+}
+static void
+subset_hmtx(fz_context *ctx, ttf_t *ttf, fz_stream *stm)
+{
+	fz_buffer *t = read_table(ctx, stm, TAG("hmtx"), 1);
+	uint16_t long_metrics, short_metrics, i, k;
+	uint8_t *s = t->data;
+	uint8_t *d = t->data;
+	int cidfont = (ttf->encoding == NULL);
+	long_metrics = ttf->orig_num_long_hor_metrics;
+	if (long_metrics > ttf->orig_num_glyphs)
+		long_metrics = ttf->orig_num_glyphs;
+	if (long_metrics > t->len / 4)
+		long_metrics = (uint16_t)(t->len / 4);
+	short_metrics = (uint16_t)((t->len - long_metrics * 4) / 2);
+	if (short_metrics > ttf->orig_num_glyphs - long_metrics)
+		short_metrics = ttf->orig_num_glyphs - long_metrics;
+	for (i = 0; i < long_metrics; i++)
+	{
+		if (i == 0 || ttf->is_otf || (i < ttf->orig_num_glyphs && ttf->gid_renum[i]))
+		{
+			put32(d, get32(s));
+			d += 4;
+		}
+		else if (cidfont)
+		{
+			put32(d, 0);
+			d += 4;
+		}
+		s += 4;
+	}
+	for (k = 0 ; k < short_metrics; k++, i++)
+	{
+		if (i == 0 || ttf->is_otf || (i < ttf->orig_num_glyphs && ttf->gid_renum[i]))
+		{
+			put16(d, get16(s));
+			d += 2;
+		}
+		else if (cidfont)
+		{
+			put16(d, 0);
+			d += 2;
+		}
+		s += 2;
+	}
+	t->len = (d - t->data);
+	add_table(ctx, ttf, TAG("hmtx"), t);
+}
+static void
+shrink_loca_if_possible(fz_context *ctx, ttf_t *ttf)
+{
+	uint32_t len;
+	uint16_t i, n;
+	uint8_t *loca;
+	if (ttf->index_to_loc_format == 0)
+		return; /* Can't shrink cos it's already shrunk! */
+	n = ttf->new_num_glyphs;
+	len = get_loca(ctx, ttf, n);
+	if (len >= 65536)
+		return; /* We can't shrink it, cos it's too big. */
+	loca = ttf->loca;
+	for (i = 0; i <= n; i++)
+	{
+		if (get32(loca + 4*i) & 1)
+			return; /* Can't shrink it, because an offset is not even */
+	}
+	for (i = 0; i <= n; i++)
+	{
+		put16(loca + 2*i, get32(loca + 4*i)/2);
+	}
+	*ttf->loca_len = 2*(n+1);
+	put16(ttf->index_to_loc_formatp, 0);
+}
+static struct { const char *charname; int idx; } macroman[] =
+{
+	{   ".notdef",                                 0},
+	{   ".null",                                   1},
+	{   "A",                                      36},
+	{   "AE",                                    144},
+	{   "Aacute",                                201},
+	{   "Acircumflex",                           199},
+	{   "Adieresis",                              98},
+	{   "Agrave",                                173},
+	{   "Aring",                                  99},
+	{   "Atilde",                                174},
+	{   "B",                                      37},
+	{   "C",                                      38},
+	{   "Cacute",                                253},
+	{   "Ccaron",                                255},
+	{   "Ccedilla",                              100},
+	{   "D",                                      39},
+	{   "Delta",                                 168},
+	{   "E",                                      40},
+	{   "Eacute",                                101},
+	{   "Ecircumflex",                           200},
+	{   "Edieresis",                             202},
+	{   "Egrave",                                203},
+	{   "Eth",                                   233},
+	{   "F",                                      41},
+	{   "G",                                      42},
+	{   "Gbreve",                                248},
+	{   "H",                                      43},
+	{   "I",                                      44},
+	{   "Iacute",                                204},
+	{   "Icircumflex",                           205},
+	{   "Idieresis",                             206},
+	{   "Idotaccent",                            250},
+	{   "Igrave",                                207},
+	{   "J",                                      45},
+	{   "K",                                      46},
+	{   "L",                                      47},
+	{   "Lslash",                                226},
+	{   "M",                                      48},
+	{   "N",                                      49},
+	{   "Ntilde",                                102},
+	{   "O",                                      50},
+	{   "OE",                                    176},
+	{   "Oacute",                                208},
+	{   "Ocircumflex",                           209},
+	{   "Odieresis",                             103},
+	{   "Ograve",                                211},
+	{   "Omega",                                 159},
+	{   "Oslash",                                145},
+	{   "Otilde",                                175},
+	{   "P",                                      51},
+	{   "Q",                                      52},
+	{   "R",                                      53},
+	{   "S",                                      54},
+	{   "Scaron",                                228},
+	{   "Scedilla",                              251},
+	{   "T",                                      55},
+	{   "Thorn",                                 237},
+	{   "U",                                      56},
+	{   "Uacute",                                212},
+	{   "Ucircumflex",                           213},
+	{   "Udieresis",                             104},
+	{   "Ugrave",                                214},
+	{   "V",                                      57},
+	{   "W",                                      58},
+	{   "X",                                      59},
+	{   "Y",                                      60},
+	{   "Yacute",                                235},
+	{   "Ydieresis",                             187},
+	{   "Z",                                      61},
+	{   "Zcaron",                                230},
+	{   "a",                                      68},
+	{   "aacute",                                105},
+	{   "acircumflex",                           107},
+	{   "acute",                                 141},
+	{   "adieresis",                             108},
+	{   "ae",                                    160},
+	{   "agrave",                                106},
+	{   "ampersand",                               9},
+	{   "apple",                                 210},
+	{   "approxequal",                           167},
+	{   "aring",                                 110},
+	{   "asciicircum",                            65},
+	{   "asciitilde",                             97},
+	{   "asterisk",                               13},
+	{   "at",                                     35},
+	{   "atilde",                                109},
+	{   "b",                                      69},
+	{   "backslash",                              63},
+	{   "bar",                                    95},
+	{   "braceleft",                              94},
+	{   "braceright",                             96},
+	{   "bracketleft",                            62},
+	{   "bracketright",                           64},
+	{   "breve",                                 219},
+	{   "brokenbar",                             232},
+	{   "bullet",                                135},
+	{   "c",                                      70},
+	{   "cacute",                                254},
+	{   "caron",                                 225},
+	{   "ccaron",                                256},
+	{   "ccedilla",                              111},
+	{   "cedilla",                               222},
+	{   "cent",                                  132},
+	{   "circumflex",                            216},
+	{   "colon",                                  29},
+	{   "comma",                                  15},
+	{   "copyright",                             139},
+	{   "currency",                              189},
+	{   "d",                                      71},
+	{   "dagger",                                130},
+	{   "daggerdbl",                             194},
+	{   "dcroat",                                257},
+	{   "degree",                                131},
+	{   "dieresis",                              142},
+	{   "divide",                                184},
+	{   "dollar",                                  7},
+	{   "dotaccent",                             220},
+	{   "dotlessi",                              215},
+	{   "e",                                      72},
+	{   "eacute",                                112},
+	{   "ecircumflex",                           114},
+	{   "edieresis",                             115},
+	{   "egrave",                                113},
+	{   "eight",                                  27},
+	{   "ellipsis",                              171},
+	{   "emdash",                                179},
+	{   "endash",                                178},
+	{   "equal",                                  32},
+	{   "eth",                                   234},
+	{   "exclam",                                  4},
+	{   "exclamdown",                            163},
+	{   "f",                                      73},
+	{   "fi",                                    192},
+	{   "five",                                   24},
+	{   "fl",                                    193},
+	{   "florin",                                166},
+	{   "four",                                   23},
+	{   "fraction",                              188},
+	{   "franc",                                 247},
+	{   "g",                                      74},
+	{   "gbreve",                                249},
+	{   "germandbls",                            137},
+	{   "grave",                                  67},
+	{   "greater",                                33},
+	{   "greaterequal",                          149},
+	{   "guillemotleft",                         169},
+	{   "guillemotright",                        170},
+	{   "guilsinglleft",                         190},
+	{   "guilsinglright",                        191},
+	{   "h",                                      75},
+	{   "hungarumlaut",                          223},
+	{   "hyphen",                                 16},
+	{   "i",                                      76},
+	{   "iacute",                                116},
+	{   "icircumflex",                           118},
+	{   "idieresis",                             119},
+	{   "igrave",                                117},
+	{   "infinity",                              146},
+	{   "integral",                              156},
+	{   "j",                                      77},
+	{   "k",                                      78},
+	{   "l",                                      79},
+	{   "less",                                   31},
+	{   "lessequal",                             148},
+	{   "logicalnot",                            164},
+	{   "lozenge",                               185},
+	{   "lslash",                                227},
+	{   "m",                                      80},
+	{   "macron",                                218},
+	{   "minus",                                 239},
+	{   "mu",                                    151},
+	{   "multiply",                              240},
+	{   "n",                                      81},
+	{   "nine",                                   28},
+	{   "nonbreakingspace",                      172},
+	{   "nonmarkingreturn",                        2},
+	{   "notequal",                              143},
+	{   "ntilde",                                120},
+	{   "numbersign",                              6},
+	{   "o",                                      82},
+	{   "oacute",                                121},
+	{   "ocircumflex",                           123},
+	{   "odieresis",                             124},
+	{   "oe",                                    177},
+	{   "ogonek",                                224},
+	{   "ograve",                                122},
+	{   "one",                                    20},
+	{   "onehalf",                               244},
+	{   "onequarter",                            245},
+	{   "onesuperior",                           241},
+	{   "ordfeminine",                           157},
+	{   "ordmasculine",                          158},
+	{   "oslash",                                161},
+	{   "otilde",                                125},
+	{   "p",                                      83},
+	{   "paragraph",                             136},
+	{   "parenleft",                              11},
+	{   "parenright",                             12},
+	{   "partialdiff",                           152},
+	{   "percent",                                 8},
+	{   "period",                                 17},
+	{   "periodcentered",                        195},
+	{   "perthousand",                           198},
+	{   "pi",                                    155},
+	{   "plus",                                   14},
+	{   "plusminus",                             147},
+	{   "product",                               154},
+	{   "q",                                      84},
+	{   "question",                               34},
+	{   "questiondown",                          162},
+	{   "quotedbl",                                5},
+	{   "quotedblbase",                          197},
+	{   "quotedblleft",                          180},
+	{   "quotedblright",                         181},
+	{   "quoteleft",                             182},
+	{   "quoteright",                            183},
+	{   "quotesinglbase",                        196},
+	{   "quotesingle",                            10},
+	{   "r",                                      85},
+	{   "radical",                               165},
+	{   "registered",                            138},
+	{   "ring",                                  221},
+	{   "s",                                      86},
+	{   "scaron",                                229},
+	{   "scedilla",                              252},
+	{   "section",                               134},
+	{   "semicolon",                              30},
+	{   "seven",                                  26},
+	{   "six",                                    25},
+	{   "slash",                                  18},
+	{   "space",                                   3},
+	{   "sterling",                              133},
+	{   "summation",                             153},
+	{   "t",                                      87},
+	{   "thorn",                                 238},
+	{   "three",                                  22},
+	{   "threequarters",                         246},
+	{   "threesuperior",                         243},
+	{   "tilde",                                 217},
+	{   "trademark",                             140},
+	{   "two",                                    21},
+	{   "twosuperior",                           242},
+	{   "u",                                      88},
+	{   "uacute",                                126},
+	{   "ucircumflex",                           128},
+	{   "udieresis",                             129},
+	{   "ugrave",                                127},
+	{   "underscore",                             66},
+	{   "v",                                      89},
+	{   "w",                                      90},
+	{   "x",                                      91},
+	{   "y",                                      92},
+	{   "yacute",                                236},
+	{   "ydieresis",                             186},
+	{   "yen",                                   150},
+	{   "z",                                      93},
+	{   "zcaron",                                231},
+	{   "zero",                                   19},
+};
+static int
+find_macroman_string(const char *s)
+{
+	int l, r, m;
+	int comparison;
+	l = 0;
+	r = nelem(macroman);
+	while (l <= r)
+	{
+		m = (l + r) >> 1;
+		comparison = strcmp(s, macroman[m].charname);
+		if (comparison < 0)
+			r = m - 1;
+		else if (comparison > 0)
+			l = m + 1;
+		else
+			return macroman[m].idx;
+	}
+	return -1;
+}
+static size_t
+subset_post2(fz_context *ctx, ttf_t *ttf, uint8_t *d, size_t len, int *gids, int num_gids)
+{
+	int i, n, new_glyphs, old_strings, new_strings;
+	int j;
+	fz_int2_heap heap = { 0 };
+	uint8_t *d0, *e, *p;
+	if (len < (size_t) 2 + 2 * ttf->orig_num_glyphs)
+		fz_throw(ctx, FZ_ERROR_FORMAT, "Truncated post table");
+	n = get16(d);
+	if ((uint32_t)n != ttf->orig_num_glyphs)
+		fz_throw(ctx, FZ_ERROR_FORMAT, "Malformed post table");
+	d0 = d;
+	d += 2; len -= 2;
+	e = d;
+	p = d;
+	/* Store all kept indexes. */
+	if (len < (size_t)n*2)
+		fz_throw(ctx, FZ_ERROR_FORMAT, "Malformed post table");
+	old_strings = 0;
+	new_strings = 0;
+	new_glyphs = 0;
+	j = 0;
+	len -= (size_t)n*2;
+	for (i = 0; i < n; i++)
+	{
+		uint16_t o = get16(d);
+		fz_int2 i2;
+		p += 2;
+		if (o >= 258)
+			old_strings++;
+		/* We're only keeping gids we want. */
+		/* Note we need to keep both the gids we were given by the caller, but also
+		 * those required as composites (in gid_renum, if we have it). */
+		if (i != 0 && (j >= num_gids || gids[j] != i) && (ttf->gid_renum == NULL || ttf->gid_renum[i] == 0))
+		{
+			memmove(d, d + 2, (n - i - 1) * 2);
+			continue;
+		}
+		if (j < num_gids && gids[j] == i)
+			j++;
+		d += 2;
+		e += 2;
+		/* We want this gid. */
+		new_glyphs++;
+		/* 257 or smaller: same as in the basic order, keep it as such. */
+		if (o <= 257)
+			continue;
+		/* check if string is one of the macroman standard ones, and use its index if so. */
+		{
+			uint8_t *q = d0 + 2 + (size_t) n * 2;
+			int k;
+			char buf[257] = { 0 };
+			int macidx;
+			for (k = 0; k < o - 258; k++)
+				q += 1 + *q;
+			for (k = 0; k < *q; k++)
+				buf[k] = *(q + 1 + k);
+			macidx = find_macroman_string(buf);
+			if (macidx >= 0)
+			{
+				put16(d - 2, macidx);
+				continue;
+			}
+		}
+		/* We want this gid, and it is a string. */
+		new_strings++;
+		/* Store the index. */
+		i2.a = o - 258;
+		i2.b = i;
+		fz_int2_heap_insert(ctx, &heap, i2);
+		/* Update string index value in table entry. */
+		put16(d - 2, 257 + new_strings);
+	}
+	d = p;
+	/* Update number of indexes */
+	put16(d0, new_glyphs);
+	fz_int2_heap_sort(ctx, &heap);
+	/* So, the heap is sorted on i2.a (the string indexes we want to keep),
+	 * and i2.b is the gid that refers to that index. */
+	/* Run through the list moving the strings down that we care about. */
+	j = 0;
+	n = old_strings;
+	for (i = 0; i < n; i++)
+	{
+		uint8_t slen;
+		if (len < 1)
+			fz_throw(ctx, FZ_ERROR_FORMAT, "Malformed post table");
+		slen = *d+1;
+		if (len < slen)
+			fz_throw(ctx, FZ_ERROR_FORMAT, "Malformed post table");
+		len -= slen;
+		if (j >= heap.len || heap.heap[j].a != i)
+		{
+			/* Drop this one. */
+			d += slen;
+			continue;
+		}
+		memmove(e, d, slen);
+		d += slen;
+		e += slen;
+		j++;
+	}
+	fz_free(ctx, heap.heap);
+	return e - d0;
+}
+static void
+subset_post(fz_context *ctx, ttf_t *ttf, fz_stream *stm, int *gids, int num_gids)
+{
+	fz_buffer *t = read_table(ctx, stm, TAG("post"), 0);
+	uint8_t *d;
+	size_t len;
+	uint32_t fmt;
+	if (t == NULL)
+		return;
+	d = t->data;
+	len = t->len;
+	if (len < 32)
+	{
+		fz_drop_buffer(ctx, t);
+		fz_throw(ctx, FZ_ERROR_FORMAT, "Truncated post table");
+	}
+	fmt = get32(d);
+	if (fmt != 0x00020000)
+	{
+		/* Fmt 1: Nothing to be gained by having this table. The cmap should
+		 * have all the mappings anyway, and we'll have broken it by renumbering
+		 * the gids down anyway. */
+		/* Fmt 2.5 deprecated. */
+		/* Fmt 3 and 4: should not be used for PDF. */
+		/* No other formats defined. */
+		fz_drop_buffer(ctx, t);
+		return;
+	}
+	d += 32; len -= 32;
+	fz_try(ctx)
+		len = subset_post2(ctx, ttf, d, len, gids, num_gids);
+	fz_catch(ctx)
+	{
+		fz_drop_buffer(ctx, t);
+		fz_rethrow(ctx);
+	}
+	t->len = 32 + len;
+	add_table(ctx, ttf, TAG("post"), t);
+}
+static void
+subset_CFF(fz_context *ctx, ttf_t *ttf, fz_stream *stm, int *gids, int num_gids, int symbolic, int cidfont)
+{
+	fz_buffer *t = read_table(ctx, stm, TAG("CFF "), 1);
+	fz_buffer *sub = NULL;
+	fz_var(sub);
+	fz_try(ctx)
+		sub = fz_subset_cff_for_gids(ctx, t, gids, num_gids, symbolic, cidfont);
+	fz_always(ctx)
+		fz_drop_buffer(ctx, t);
+	fz_catch(ctx)
+		fz_rethrow(ctx);
+	add_table(ctx, ttf, TAG("CFF "), sub);
+}
+fz_buffer *
+fz_subset_ttf_for_gids(fz_context *ctx, fz_buffer *orig, int *gids, int num_gids, int symbolic, int cidfont)
+{
+	fz_stream *stm = fz_open_buffer(ctx, orig);
+	ttf_t ttf = { 0 };
+	fz_buffer *newbuf = NULL;
+	fz_output *out = NULL;
+	fz_var(newbuf);
+	fz_var(out);
+	fz_try(ctx)
+	{
+		ttf.is_otf = (fz_read_uint32_le(ctx, stm) == 0x4f54544f);
+		ttf.symbolic = symbolic;
+		/* Subset the name table. No other dependencies. */
+		subset_name_table(ctx, &ttf, stm);
+		if (!cidfont)
+		{
+			/* Load the encoding. Populates the encoding table from the cmap table
+			 * in the original. cmap table is then discarded. */
+			load_encoding(ctx, &ttf, stm);
+			/* Blank out the bits of the encoding we don't need. */
+			reduce_encoding(ctx, &ttf, gids, num_gids);
+		}
+		/* Read maxp and store the table. Remember orig_num_glyphs. */
+		read_maxp(ctx, &ttf, stm);
+		/* Read head and store the table. Remember the loca index size. */
+		read_head(ctx, &ttf, stm);
+		if (ttf.is_otf)
+		{
+			subset_CFF(ctx, &ttf, stm, gids, num_gids, symbolic, cidfont);
+		}
+		/* Read loca and store it. Stash a pointer to the table for quick access. */
+		if (!ttf.is_otf)
+		{
+			read_loca(ctx, &ttf, stm);
+			/* Read the glyf data, and scan it for composites. This makes the gid_renum table,
+			 * subsets the glyf data, and rewrites the loca table. */
+			read_glyf(ctx, &ttf, stm, gids, num_gids);
+		}
+		/* Read hhea and store it. Remember numOfLongHorMetrics. */
+		read_hhea(ctx, &ttf, stm);
+		/* Read and subset hmtx. */
+		subset_hmtx(ctx, &ttf, stm);
+#ifdef DEBUG_SUBSETTING
+		if (!cidfont)
+		{
+			encoding_t *enc = ttf.encoding;
+			uint32_t i, n = enc->max;
+			for (i = 0; i < n; i++)
+				if (enc->gid[i])
+					printf("cid %x '%c'-> orig gid %d -> gid %d\n", i, (char)i, enc->gid[i], ttf.gid_renum[enc->gid[i]]);
+		}
+		{
+			uint32_t i;
+			for (i = 0; i < ttf.orig_num_glyphs; i++)
+				if (ttf.gid_renum[i])
+					printf("gid %d -> %d\n", i, ttf.gid_renum[i]);
+			for (i = 0; i <= ttf.new_num_glyphs; i++)
+				printf("LOCA %d = %x\n", i, get_loca(ctx, &ttf, i));
+		}
+#endif
+		if (!ttf.is_otf)
+		{
+			shrink_loca_if_possible(ctx, &ttf);
+			update_num_glyphs(ctx, &ttf);
+		}
+		if (!cidfont)
+		{
+			/* Now we can make the new cmap. */
+			make_cmap(ctx, &ttf);
+		}
+		if (!cidfont)
+		{
+			/* subset the post table */
+			subset_post(ctx, &ttf, stm, gids, num_gids);
+		}
+		copy_table(ctx, &ttf, stm, TAG("OS/2"), 0);
+		copy_table(ctx, &ttf, stm, TAG("cvt "), 0);
+		copy_table(ctx, &ttf, stm, TAG("fpgm"), 0);
+		copy_table(ctx, &ttf, stm, TAG("prep"), 0);
+		sort_tables(ctx, &ttf);
+		checksum_tables(ctx, &ttf);
+		newbuf = fz_new_buffer(ctx, 1024);
+		out = fz_new_output_with_buffer(ctx, newbuf);
+		write_tables(ctx, &ttf, out);
+		fz_close_output(ctx, out);
+		fix_checksum(ctx, newbuf);
+	}
+	fz_always(ctx)
+	{
+		int i;
+		fz_drop_output(ctx, out);
+		fz_drop_stream(ctx, stm);
+		for (i = 0; i < ttf.len; i++)
+			fz_drop_buffer(ctx, ttf.table[i].tab);
+		fz_free(ctx, ttf.table);
+		fz_free(ctx, ttf.gid_renum);
+		fz_free(ctx, ttf.encoding);
+	}
+	fz_catch(ctx)
+	{
+		fz_drop_buffer(ctx, newbuf);
+		fz_rethrow(ctx);
+	}
+	return newbuf;
+}

Mercurial > hgrepos > Python2 > PyMuPDF

comparison mupdf-source/source/fitz/subset-ttf.c @ 2:b50eed0cc0ef upstream