Mercurial > hgrepos > Python2 > PyMuPDF
diff mupdf-source/source/fitz/subset-ttf.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mupdf-source/source/fitz/subset-ttf.c Mon Sep 15 11:43:07 2025 +0200 @@ -0,0 +1,2050 @@ +// Copyright (C) 2004-2025 Artifex Software, Inc. +// +// This file is part of MuPDF. +// +// MuPDF is free software: you can redistribute it and/or modify it under the +// terms of the GNU Affero General Public License as published by the Free +// Software Foundation, either version 3 of the License, or (at your option) +// any later version. +// +// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more +// details. +// +// You should have received a copy of the GNU Affero General Public License +// along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html> +// +// Alternative licensing terms are available from the licensor. +// For commercial licensing, see <https://www.artifex.com/> or contact +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. + +#include "mupdf/fitz.h" + +/* + For the purposes of this code, and to save my tiny brain from + overload, we will adopt the following notation: + + 1) The PDF file contains bytes of data. These bytes are looked + up in the MuPDF font handling to resolve to 'glyph ids' (gids). + These account for all the different encodings etc in use, + including the 'cmap' table within the font. + + 2) We are given the list of gids that are used in the document. + We arrange to keep any entries in the cmap or post tables that + maps to these gids. + + We map the gids to the bottom of the range. This means that the + cmap and post tables need to be updated. + + A similar optimisation would be to compress the range of cids + used to a prefix of the range used. This would mean that the + calling code needs to rewrite the data within the PDF file - + both in terms of the strings used with the PDF streams, and in + terms of the ToUnicode tables there (and the Widths etc). + + For now, we'll ignore this optimisation. + + Possibly, in the case of 'Identity' Tounicode mappings we + wouldn't actually want to do this range compression? It'd only + make the file larger. +*/ + +typedef struct +{ + uint16_t pid; + uint16_t psid; + + uint32_t max; + uint16_t gid[256]; +} encoding_t; + +typedef struct +{ + uint32_t tag; + uint32_t checksum; + fz_buffer *tab; +} tagged_table_t; + +typedef struct +{ + int is_otf; + int symbolic; + encoding_t *encoding; + uint16_t orig_num_glyphs; + uint16_t new_num_glyphs; + uint16_t index_to_loc_format; + uint8_t *index_to_loc_formatp; + uint16_t orig_num_long_hor_metrics; + uint16_t new_num_long_hor_metrics; + + /* Pointer to the old tables (in the tagged table below) */ + uint8_t *loca; + size_t *loca_len; + uint8_t *maxp; + + /* Maps from old gid to new gid */ + uint16_t *gid_renum; + + int max; + int len; + tagged_table_t *table; +} ttf_t; + +static uint32_t +checksum(fz_buffer *buf) +{ + size_t i; + const uint8_t *d = (const uint8_t *)buf->data; + uint32_t cs = 0; + + for (i = buf->len>>2; i > 0; i--) + { + cs += d[0]<<24; + cs += d[1]<<16; + cs += d[2]<<8; + cs += d[3]; + d += 4; + } + i = buf->len - (buf->len & ~3); + switch (i) + { + case 3: + cs += d[2]<<8; + /* fallthrough */ + case 2: + cs += d[1]<<16; + /* fallthrough */ + case 1: + cs += d[0]<<24; + default: + break; + } + + return cs; +} + +static uint32_t +find_table(fz_context *ctx, fz_stream *stm, uint32_t tag, uint32_t *len) +{ + int num_tables; + int i; + + fz_seek(ctx, stm, 4, SEEK_SET); + num_tables = fz_read_int16(ctx, stm); + fz_seek(ctx, stm, 12, SEEK_SET); + + for (i = 0; i < num_tables; i++) + { + uint32_t t = fz_read_uint32(ctx, stm); + uint32_t cs = fz_read_uint32(ctx, stm); + uint32_t off = fz_read_uint32(ctx, stm); + (void) cs; /* UNUSED */ + *len = fz_read_uint32(ctx, stm); + if (t == tag) + return off; + } + + return 0; +} + +static fz_buffer * +read_table(fz_context *ctx, fz_stream *stm, uint32_t tag, int compulsory) +{ + uint32_t size; + uint32_t off = find_table(ctx, stm, tag, &size); + fz_buffer *buf; + + if (off == 0) + { + if (compulsory) + fz_throw(ctx, FZ_ERROR_FORMAT, "Required %c%c%c%c table missing", tag>>24, (tag>>16)&0xff, (tag>>8)&0xff, tag & 0xff); + return NULL; + } + + fz_seek(ctx, stm, off, SEEK_SET); + buf = fz_new_buffer(ctx, size); + + fz_try(ctx) + { + fz_read(ctx, stm, buf->data, size); + buf->len = size; + } + fz_catch(ctx) + { + fz_drop_buffer(ctx, buf); + fz_rethrow(ctx); + } + + return buf; +} + +#define TAG(s) \ + ( (((uint8_t)s[0])<<24) | \ + (((uint8_t)s[1])<<16) | \ + (((uint8_t)s[2])<<8) | \ + (((uint8_t)s[3]))) + +static void +add_table(fz_context *ctx, ttf_t *ttf, uint32_t tag, fz_buffer *tab) +{ + fz_try(ctx) + { + if (ttf->max == ttf->len) + { + int n = ttf->max * 2; + if (n == 0) + n = 16; + ttf->table = fz_realloc(ctx, ttf->table, sizeof(*ttf->table) * n); + ttf->max = n; + } + + ttf->table[ttf->len].tag = tag; + ttf->table[ttf->len].tab = tab; + ttf->len++; + } + fz_catch(ctx) + { + fz_drop_buffer(ctx, tab); + fz_rethrow(ctx); + } +} + +static void +copy_table(fz_context *ctx, ttf_t *ttf, fz_stream *stm, uint32_t tag, int compulsory) +{ + fz_buffer *t; + + t = read_table(ctx, stm, tag, compulsory); + if (t) + add_table(ctx, ttf, tag, t); +} + +static int +tabcmp(const void *a_, const void *b_) +{ + const tagged_table_t *a = (const tagged_table_t *)a_; + const tagged_table_t *b = (const tagged_table_t *)b_; + + return (a->tag - b->tag); +} + +static void +sort_tables(fz_context *ctx, ttf_t *ttf) +{ + /* Avoid scanbuild/coverity false warning with this unnecessary test */ + if (ttf->table == NULL || ttf->len == 0) + return; + qsort(ttf->table, ttf->len, sizeof(tagged_table_t), tabcmp); +} + +static void +checksum_tables(fz_context *ctx, ttf_t *ttf) +{ + int i; + + for (i = 0; i < ttf->len; i++) + ttf->table[i].checksum = checksum(ttf->table[i].tab); +} + +static void +write_tables(fz_context *ctx, ttf_t *ttf, fz_output *out) +{ + int i = 0; + uint32_t offset; + + /* scalar type - TTF for now - may need to cope with other types later. */ + if (ttf->is_otf) + fz_write_int32_be(ctx, out, 0x4f54544f); + else + fz_write_int32_be(ctx, out, 0x00010000); + + /* number of tables */ + fz_write_uint16_be(ctx, out, ttf->len); + + while (1<<(i+1) <= ttf->len) + i++; + + /* searchRange */ + fz_write_uint16_be(ctx, out, (1<<i)<<4); + + /* entrySelector */ + fz_write_uint16_be(ctx, out, i); + + /* rangeShift*/ + fz_write_uint16_be(ctx, out, (ttf->len - (1<<i))<<4); + + /* Table directory */ + offset = 12 + ttf->len * 16; + for (i = 0; i < ttf->len; i++) + { + fz_write_uint32_be(ctx, out, ttf->table[i].tag); + fz_write_uint32_be(ctx, out, ttf->table[i].checksum); + fz_write_uint32_be(ctx, out, offset); + fz_write_uint32_be(ctx, out, (uint32_t)ttf->table[i].tab->len); + offset += (uint32_t)ttf->table[i].tab->len; + } + + /* Now the tables in turn */ + for (i = 0; i < ttf->len; i++) + { + fz_write_buffer(ctx, out, ttf->table[i].tab); + } +} + +static void +fix_checksum(fz_context *ctx, fz_buffer *buf) +{ + uint8_t *data; + uint32_t sum = 0; + size_t len = fz_buffer_storage(ctx, buf, &data); + uint32_t namesize; + fz_stream *stm = fz_open_buffer(ctx, buf); + uint32_t csumpos = find_table(ctx, stm, TAG("head"), &namesize) + 8; + + (void) len; // UNUSED + + fz_drop_stream(ctx, stm); + + /* First off, blat the old checksum */ + memset(data+csumpos, 0, 4); + + sum = checksum(buf); + sum = 0xb1b0afba-sum; + + /* Insert it. */ + data[csumpos] = sum>>24; + data[csumpos+1] = sum>>16; + data[csumpos+2] = sum>>8; + data[csumpos+3] = sum; +} + +typedef struct +{ + uint16_t platform_id; + uint16_t platform_specific_id; + uint16_t language_id; + uint16_t name_id; + uint16_t len; + uint16_t offset; +} name_record_t; + +static uint32_t get32(const uint8_t *d) +{ + return (d[0]<<24)|(d[1]<<16)|(d[2]<<8)|d[3]; +} + +static uint32_t get16(const uint8_t *d) +{ + return (d[0]<<8)|d[1]; +} + +static void put32(uint8_t *d, uint32_t v) +{ + d[0] = v>>24; + d[1] = v>>16; + d[2] = v>>8; + d[3] = v; +} + +static void put16(uint8_t *d, uint32_t v) +{ + d[0] = v>>8; + d[1] = v; +} + +typedef struct +{ + /* First 2 fields aren't actually needed for the pointer list + * operation, but they serve as bounds for all the offsets used + * within the ptr list. */ + uint8_t *block; + size_t block_len; + + uint32_t len; + uint32_t max; + uint8_t **ptr; +} ptr_list_t; + +static void +ptr_list_add(fz_context *ctx, ptr_list_t *pl, uint8_t *ptr) +{ + if (pl->len == pl->max) + { + int n = pl->max * 2; + if (n == 0) + n = 32; + pl->ptr = fz_realloc(ctx, pl->ptr, sizeof(*pl->ptr) * n); + pl->max = n; + } + pl->ptr[pl->len++] = ptr; +} + +typedef int (cmp_t)(const uint8_t **a, const uint8_t **b); +typedef int (void_cmp_t)(const void *, const void *); + +static void +ptr_list_sort(fz_context *ctx, ptr_list_t *pl, cmp_t *cmp) +{ + /* Avoid scanbuild/coverity false warning with this unnecessary test */ + if (pl->ptr == NULL || pl->len == 0) + return; + qsort(pl->ptr, pl->len, sizeof(*pl->ptr), (void_cmp_t *)cmp); +} + +static void +drop_ptr_list(fz_context *ctx, ptr_list_t *pl) +{ + fz_free(ctx, pl->ptr); +} + +/* return 1 to keep, 0 to drop. */ +typedef int (filter_t)(const uint8_t *ptr, const uint8_t *blk, size_t len); + +/* This makes a pointer list from a filtered block, moving the underlying data as it filters. */ +static void +ptr_list_compact(fz_context *ctx, ptr_list_t *pl, filter_t *fil, uint8_t *base, int n, size_t eltsize, uint8_t *block, size_t block_len) +{ + int i; + uint8_t *s = base; + uint8_t *d = base; + + pl->block = block; + pl->block_len = block_len; + + if (base < block || (size_t)(base - block) > block_len || (size_t)(base - block) + n * eltsize >= block_len) + fz_throw(ctx, FZ_ERROR_FORMAT, "Ptr List creation failed"); + + for (i = 0; i < n; i++) + { + if (fil(s, block, block_len)) + { + ptr_list_add(ctx, pl, d); + if (s != d) + memmove(d, s, eltsize); + d += eltsize; + } + s += eltsize; + } +} + +static int +names_by_size(const uint8_t **a, const uint8_t **b) +{ + return get16((*b)+8) - get16((*a)+8); +} + +static int +filter_name_tables(const uint8_t *ptr, const uint8_t *block, size_t block_len) +{ + /* FIXME: For now, we keep everything. */ + return 1; +} + +#define UNFOUND ((uint32_t)-1) + +static uint32_t +find_string_in_block(const uint8_t *str, size_t str_len, const uint8_t *block, size_t block_len) +{ + const uint8_t *b = block; + + if (block_len == 0) + return UNFOUND; + + assert(block_len >= str_len); + + block_len -= str_len-1; + + while (block_len--) + { + if (!memcmp(str, b, str_len)) + return (uint32_t)(b - block); + b++; + } + + return UNFOUND; +} + +static void +subset_name_table(fz_context *ctx, ttf_t *ttf, fz_stream *stm) +{ + fz_buffer *t = read_table(ctx, stm, TAG("name"), 0); + uint8_t *d; + uint32_t i, n, off; + ptr_list_t pl = { 0 }; + size_t name_data_size; + uint8_t *new_name_data = NULL; + size_t new_len; + + if (t == NULL) + return; /* No name table */ + + d = t->data; + + fz_var(new_name_data); + + fz_try(ctx) + { + if (get16(d) != 0 || t->len < 6) + fz_throw(ctx, FZ_ERROR_FORMAT, "Unsupported name table format"); + + n = get16(d+2); + off = get16(d+4); + name_data_size = t->len - 6 - 12*n; + + if (t->len < 6 + 12*n) + fz_throw(ctx, FZ_ERROR_FORMAT, "Truncated name table"); + + ptr_list_compact(ctx, &pl, filter_name_tables, d+6, n, 12, d, t->len); + + /* Sort our list so that the ones with the largest name data blocks come first. */ + ptr_list_sort(ctx, &pl, names_by_size); + + new_name_data = fz_malloc(ctx, name_data_size); + new_len = 0; + for (i = 0; i < pl.len; i++) + { + uint32_t name_len, offset, name_off; + uint8_t *name; + + if (t->len < (size_t) (pl.ptr[i] - t->data) + 8 + 2) + fz_throw(ctx, FZ_ERROR_FORMAT, "Truncated name length in name table"); + name_len = get16(pl.ptr[i] + 8); + + if (t->len < (size_t) (pl.ptr[i] - t->data) + 10 + 2) + fz_throw(ctx, FZ_ERROR_FORMAT, "Truncated name offset in name table"); + name_off = off + get16(pl.ptr[i] + 10); + name = d + name_off; + + if (t->len < name_off + name_len) + fz_throw(ctx, FZ_ERROR_FORMAT, "Truncated name in name table"); + offset = find_string_in_block(name, name_len, new_name_data, new_len); + if (offset == UNFOUND) + { + if (name_data_size < new_len + name_len) + fz_throw(ctx, FZ_ERROR_FORMAT, "Bad name table in TTF"); + memcpy(new_name_data + new_len, name, name_len); + offset = (uint32_t)new_len; + new_len += name_len; + } + put16(pl.ptr[i]+10, offset); + } + memcpy(d + 6 + 12*pl.len, new_name_data, new_len); + t->len = 6 + 12*pl.len + new_len; + put16(d+4, 6 + 12*pl.len); + } + fz_always(ctx) + { + drop_ptr_list(ctx, &pl); + fz_free(ctx, new_name_data); + } + fz_catch(ctx) + { + fz_drop_buffer(ctx, t); + fz_rethrow(ctx); + } + + add_table(ctx, ttf, TAG("name"), t); +} + +static encoding_t * +load_enc_tab0(fz_context *ctx, uint8_t *d, size_t data_size, uint32_t offset) +{ + encoding_t *enc; + int i; + + if (data_size < 262) + fz_throw(ctx, FZ_ERROR_FORMAT, "Truncated cmap 0 format table"); + + enc = fz_malloc_struct(ctx, encoding_t); + d += offset + 6; + + enc->max = 256; + for (i = 0; i < 256; i++) + enc->gid[i] = d[i]; + + return enc; +} + +static encoding_t * +load_enc_tab4(fz_context *ctx, uint8_t *d, size_t data_size, uint32_t offset) +{ + encoding_t *enc; + uint16_t seg_count; + uint32_t i; + + if (data_size < offset + 26) + fz_throw(ctx, FZ_ERROR_FORMAT, "cmap4 too small"); + + seg_count = get16(d+offset+6); /* 2 * seg_count */ + + if (seg_count & 1) + fz_throw(ctx, FZ_ERROR_FORMAT, "Malformed cmap4 table"); + seg_count >>= 1; + + enc = fz_calloc(ctx, 1, sizeof(encoding_t) + sizeof(uint16_t) * (65536 - 256)); + enc->max = 65536; + + fz_try(ctx) + { + /* Run through the segments, counting how many are used. */ + for (i = 0; i < seg_count; i++) + { + uint16_t seg_end, seg_start, delta, target, inner_offset; + uint32_t offset_ptr, s; + + if (data_size < offset + 14 + 6 * seg_count + 2 + 2 * i + 2) + fz_throw(ctx, FZ_ERROR_FORMAT, "cmap4 too small"); + + seg_end = get16(d + offset + 14 + 2 * i); + seg_start = get16(d + offset + 14 + 2 * seg_count + 2 + 2 * i); + delta = get16(d + offset + 14 + 4 * seg_count + 2 + 2 * i); + offset_ptr = offset + 14 + 6 * seg_count + 2 + 2 * i; + inner_offset = get16(d + offset_ptr); + + if (seg_start >= enc->max || seg_end >= enc->max || seg_end < seg_start) + fz_throw(ctx, FZ_ERROR_FORMAT, "Malformed cmap4 table."); + + for (s = seg_start; s <= seg_end; s++) + { + if (inner_offset == 0) + { + target = delta + s; + } + else + { + if (data_size < offset_ptr + inner_offset + 2 * (s - seg_start) + 2) + fz_throw(ctx, FZ_ERROR_FORMAT, "cmap4 too small"); + + /* Yes. This is very screwy. The inner_offset is from the offset_ptr in use. */ + target = get16(d + offset_ptr + inner_offset + 2 * (s - seg_start)); + if (target != 0) + target += delta; + } + + if (target != 0) + enc->gid[s] = target; + } + } + } + fz_catch(ctx) + { + fz_free(ctx, enc); + fz_rethrow(ctx); + } + + return enc; +} + +static encoding_t * +load_enc_tab6(fz_context *ctx, uint8_t *d, size_t data_size, uint32_t offset) +{ + encoding_t *enc; + uint16_t first_code; + uint16_t entry_count; + uint16_t length; + uint32_t i; + + if (data_size < 10) + fz_throw(ctx, FZ_ERROR_FORMAT, "cmap6 too small"); + + length = get16(d+offset+2); + first_code = get16(d+offset+6); + entry_count = get16(d+offset+8); + + if (length < entry_count*2 + 10) + fz_throw(ctx, FZ_ERROR_FORMAT, "Malformed cmap6 table"); + + enc = fz_calloc(ctx, 1, sizeof(encoding_t) + sizeof(uint16_t) * (first_code + entry_count - 256)); + enc->max = first_code + entry_count; + + /* Run through the segments, counting how many are used. */ + for (i = 0; i < entry_count; i++) + { + enc->gid[first_code+i] = get16(d+offset+10+i*2); + } + + return enc; +} + +static int +is_encoding_all_zeros(fz_context *ctx, encoding_t *enc) +{ + uint32_t i; + + if (enc != NULL) + for (i = 0; i < enc->max; i++) + if (enc->gid[i] != 0) + return 0; + + return 1; +} + + +static encoding_t * +load_enc(fz_context *ctx, fz_buffer *t, int pid, int psid) +{ + uint8_t *d = t->data; + size_t data_size = t->len; + uint32_t i, n; + + if (data_size < 6 || get16(d) != 0) + fz_throw(ctx, FZ_ERROR_FORMAT, "Unsupported cmap table format"); + + n = get16(d+2); + + if (data_size < 4 + 8*n) + fz_throw(ctx, FZ_ERROR_FORMAT, "Truncated cmap table"); + + for (i = 0; i < n; i++) + { + uint16_t plat_id = get16(d + 4 + i * 8); + uint16_t plat_spec_id = get16(d + 4 + i * 8 + 2); + uint32_t offset = get32(d + 4 + i * 8 + 4); + uint16_t fmt; + encoding_t *enc; + + if (plat_id != pid || plat_spec_id != psid) + continue; + + if (offset < 4 + 8 * n || offset + 2 >= data_size) + fz_throw(ctx, FZ_ERROR_FORMAT, "cmap table data out of range"); + + fmt = get16(d+offset); + switch(fmt) + { + case 0: + enc = load_enc_tab0(ctx, d, data_size, offset); + break; + case 4: + enc = load_enc_tab4(ctx, d, data_size, offset); + break; + case 6: + enc = load_enc_tab6(ctx, d, data_size, offset); + break; + default: + fz_throw(ctx, FZ_ERROR_FORMAT, "Unsupported cmap table format %d", fmt); + } + + enc->pid = pid; + enc->psid = psid; + + if (is_encoding_all_zeros(ctx, enc)) + { + // ignore any encoding that is all zeros + fz_free(ctx, enc); + enc = NULL; + } + + return enc; + } + + return NULL; +} + +static void +load_encoding(fz_context *ctx, ttf_t *ttf, fz_stream *stm) +{ + fz_buffer *t = read_table(ctx, stm, TAG("cmap"), 1); + encoding_t *enc = NULL; + + fz_var(enc); + + fz_try(ctx) + { + if (ttf->symbolic) + { + /* For symbolic fonts, we look for (3,0) as per PDF Spec, then (1,0). */ + enc = load_enc(ctx, t, 3, 0); + if (!enc) + enc = load_enc(ctx, t, 1, 0); + } + else + { + /* For non symbolic fonts, we look for (3,1) then (1,0), then (0,1), and finally (0,3). */ + enc = load_enc(ctx, t, 3, 1); + if (!enc) + enc = load_enc(ctx, t, 1, 0); + if (!enc) + enc = load_enc(ctx, t, 0, 1); + if (!enc) + enc = load_enc(ctx, t, 0, 3); + } + if (!enc) + fz_throw(ctx, FZ_ERROR_FORMAT, "No suitable cmap table found"); + } + fz_always(ctx) + { + fz_drop_buffer(ctx, t); + } + fz_catch(ctx) + { + fz_rethrow(ctx); + } + + ttf->encoding = enc; +} + +static void +reduce_encoding(fz_context *ctx, ttf_t *ttf, int *gids, int num_gids) +{ + int i; + encoding_t *enc = ttf->encoding; + int n = enc->max; + + for (i = 0; i < n; i++) + { + int gid = enc->gid[i]; + int lo, hi; + + if (gid == 0) + continue; + + lo = 0; + hi = num_gids; + while (lo < hi) + { + int mid = (lo + hi)>>1; + int g = gids[mid]; + if (g < gid) + lo = mid+1; + else if (g > gid) + hi = mid; + else + goto found; /* Leave this one as is. */ + } + + /* Not found */ + enc->gid[i] = 0; + found: + {} + } +} + +static void +make_cmap(fz_context *ctx, ttf_t *ttf) +{ + uint32_t i; + uint32_t len; + uint32_t segs = 0; + uint32_t seg, seg_start, seg_end; + encoding_t *enc = ttf->encoding; + uint32_t n = enc->max; + uint32_t entries = 0; + fz_buffer *buf; + uint8_t *d; + uint32_t offset; + + /* Make a type 4 table. */ + + /* Count the number of segments. */ + for (i = 0; i < n; i++) + { + if (enc->gid[i] == 0) + continue; + + seg_start = i; + seg_end = i; + for (i++; i<n; i++) + { + if (enc->gid[i] != 0) + seg_end = i; + else if (i - seg_end > 4) + break; + } + entries += seg_end - seg_start + 1; + segs++; + } + segs++; /* For the terminator */ + + len = 12 + 14 + 2 + segs * 2 * 4 + entries * 2; + buf = fz_new_buffer(ctx, len); + d = buf->data; + + /* cmap header */ + put16(d, 0); /* version */ + put16(d+2, 1); /* num sub tables */ + put16(d+4, enc->pid); + put16(d+6, enc->psid); + put32(d+8, 12); /* offset */ + d += 12; + + put16(d, 4); /* Format */ + put16(d + 2, len-12); /* Length */ + put16(d + 4, 0); /* FIXME: Language */ + put16(d + 6, segs * 2); + i = 0; + while (1U<<(i+1) <= segs) + i++; + /* So 1<<i <= segs < 1<<(i+1) */ + put16(d + 8, 1<<(i+1)); /* searchRange */ + put16(d + 10, i); /* entrySelector */ + put16(d + 12, 2 * segs - (1<<(i+1))); /* rangeShift */ + put16(d + 14 + segs * 2, 0); /* reserved */ + + /* Now output the segment data */ + entries = 14 + segs * 2 * 4 + 2; /* offset of where to put entries.*/ + seg = 0; + for (i = 0; i < n; i++) + { + if (enc->gid[i] == 0) + continue; + + seg_start = i; + seg_end = i; + offset = 14 + segs * 2 * 3 + 2 + seg * 2; + put16(d + offset - segs * 2, 0); /* Delta - always 0 for now. */ + put16(d + offset, entries - offset); /* offset */ + + /* Insert an entry */ + if (!ttf->is_otf && ttf->gid_renum && i < enc->max && enc->gid[i] < ttf->orig_num_glyphs) + put16(d + entries, (ttf->is_otf || ttf->gid_renum == NULL) ? enc->gid[i] : ttf->gid_renum[enc->gid[i]]); + else + put16(d + entries, enc->gid[i]); + + entries += 2; + for (i++; i < n; i++) + { + if (enc->gid[i] != 0) + { + /* Include i in the range, which means we need to add entries for + * seg_end to i inclusive. */ + while (seg_end < i) + { + seg_end++; + if (!ttf->is_otf && ttf->gid_renum && seg_end < enc->max && enc->gid[seg_end] < ttf->orig_num_glyphs) + put16(d + entries, ttf->gid_renum[enc->gid[seg_end]]); + else + put16(d + entries, enc->gid[seg_end]); + entries += 2; + } + } + else if (i - seg_end > 4) + break; + } + put16(d + 14 + segs * 2 + seg * 2 + 2, seg_start); + put16(d + 14 + seg * 2, seg_end); + seg++; + } + offset = 14 + segs * 2 * 3 + 2 + seg * 2; + put16(d + 14 + segs * 2 + seg * 2 + 2, 0xffff); + put16(d + 14 + seg * 2, 0xffff); + put16(d + offset - segs * 2, 1); /* Delta */ + put16(d + offset, 0); /* offset */ + buf->len = entries + 12; + assert(buf->len == buf->cap); + + add_table(ctx, ttf, TAG("cmap"), buf); +} + +static void +read_maxp(fz_context *ctx, ttf_t *ttf, fz_stream *stm) +{ + fz_buffer *t = read_table(ctx, stm, TAG("maxp"), 1); + + if (t->len < 6) + { + fz_drop_buffer(ctx, t); + fz_throw(ctx, FZ_ERROR_FORMAT, "truncated maxp table"); + } + + ttf->orig_num_glyphs = get16(t->data+4); + + add_table(ctx, ttf, TAG("maxp"), t); + ttf->maxp = t->data; +} + +static void +read_head(fz_context *ctx, ttf_t *ttf, fz_stream *stm) +{ + uint32_t version; + fz_buffer *t = read_table(ctx, stm, TAG("head"), 1); + + if (t->len < 54) + { + fz_drop_buffer(ctx, t); + fz_throw(ctx, FZ_ERROR_FORMAT, "truncated head table"); + } + + version = get32(t->data); + if (version != 0x00010000) + { + fz_drop_buffer(ctx, t); + fz_throw(ctx, FZ_ERROR_FORMAT, "Unsupported head table version 0x%08x", version); + } + + ttf->index_to_loc_formatp = t->data+50; + ttf->index_to_loc_format = get16(ttf->index_to_loc_formatp); + if (ttf->index_to_loc_format & ~1) + { + fz_drop_buffer(ctx, t); + fz_throw(ctx, FZ_ERROR_FORMAT, "Unsupported index_to_loc_format 0x%04x", ttf->index_to_loc_format); + } + + add_table(ctx, ttf, TAG("head"), t); +} + +static void +read_loca(fz_context *ctx, ttf_t *ttf, fz_stream *stm) +{ + fz_buffer *t; + uint32_t len = (2<<ttf->index_to_loc_format) * (ttf->orig_num_glyphs+1); + + t = read_table(ctx, stm, TAG("loca"), 1); + + if (t->len < len) + { + fz_drop_buffer(ctx, t); + fz_throw(ctx, FZ_ERROR_FORMAT, "truncated loca table"); + } + + ttf->loca = t->data; + ttf->loca_len = &t->len; + + add_table(ctx, ttf, TAG("loca"), t); +} + +static void +read_hhea(fz_context *ctx, ttf_t *ttf, fz_stream *stm) +{ + uint32_t version; + fz_buffer *t = read_table(ctx, stm, TAG("hhea"), 1); + uint16_t i; + + if (t->len < 36) + { + fz_drop_buffer(ctx, t); + fz_throw(ctx, FZ_ERROR_FORMAT, "truncated hhea table"); + } + + version = get32(t->data); + if (version != 0x00010000) + { + fz_drop_buffer(ctx, t); + fz_throw(ctx, FZ_ERROR_FORMAT, "Unsupported hhea table version 0x%08x", version); + } + + ttf->orig_num_long_hor_metrics = get16(t->data+34); + if (ttf->orig_num_long_hor_metrics > ttf->orig_num_glyphs) + { + fz_drop_buffer(ctx, t); + fz_throw(ctx, FZ_ERROR_FORMAT, "Overlong hhea table"); + } + + add_table(ctx, ttf, TAG("hhea"), t); + + /* Previously gids 0 to orig_num_long_hor_metrics-1 were described with + * hor metrics, and the ones afterwards were fixed widths. Find where + * that dividing line is in our new reduced set. */ + if (ttf->encoding && !ttf->is_otf && ttf->orig_num_long_hor_metrics > 0) + { + /* i = 0 is always kept long in subset_hmtx(). */ + ttf->new_num_long_hor_metrics = 1; + for (i = ttf->orig_num_long_hor_metrics-1; i > 0; i--) + if (ttf->gid_renum[i]) + { + ttf->new_num_long_hor_metrics = ttf->gid_renum[i]+1; + break; + } + + put16(t->data+34, ttf->new_num_long_hor_metrics); + } + else + { + ttf->new_num_long_hor_metrics = ttf->orig_num_long_hor_metrics; + } +} + +static uint32_t +get_loca(fz_context *ctx, ttf_t *ttf, uint32_t n) +{ + if (ttf->index_to_loc_format == 0) + { + /* Short index - convert from words to bytes */ + return get16(ttf->loca + n*2) * 2; + } + else + { + /* Long index - in bytes already */ + return get32(ttf->loca + n*4); + } +} + +static void +put_loca(fz_context *ctx, ttf_t *ttf, uint32_t n, uint32_t off) +{ + if (ttf->index_to_loc_format == 0) + { + /* Short index - convert from bytes to words */ + assert((off & 1) == 0); + put16(ttf->loca + n*2, off/2); + } + else + { + /* Long index - in bytes already */ + put32(ttf->loca + n*4, off); + } +} + +static void +glyph_used(fz_context *ctx, ttf_t *ttf, fz_buffer *glyf, uint16_t i) +{ + uint32_t offset, len; + const uint8_t *data; + uint16_t flags; + + if (i >= ttf->orig_num_glyphs) + { + fz_warn(ctx, "TTF subsetting; gid >= num_gids!"); + return; + } + + if (ttf->gid_renum[i] != 0) + return; + + ttf->gid_renum[i] = 1; + + /* If this glyf is composite, then we need to add any dependencies of it. */ + offset = get_loca(ctx, ttf, i); + len = get_loca(ctx, ttf, i+1) - offset; + if (len == 0) + return; + if (offset+2 > glyf->len) + fz_throw(ctx, FZ_ERROR_FORMAT, "Corrupt glyf data"); + data = glyf->data + offset; + if ((int16_t)get16(data) >= 0) + return; /* Single glyph - no dependencies */ + data += 4 * 2 + 2; + if (len < 4*2 + 2) + fz_throw(ctx, FZ_ERROR_FORMAT, "Corrupt glyf data"); + len -= 4 * 2 + 2; + do + { + uint16_t idx, skip; + + if (len < 4) + fz_throw(ctx, FZ_ERROR_FORMAT, "Corrupt glyf data"); + + flags = get16(data); + idx = get16(data+2); + + glyph_used(ctx, ttf, glyf, idx); + +#define ARGS_1_AND_2_ARE_WORDS 1 +#define ARGS_ARE_XY_VALUES 2 +#define WE_HAVE_A_SCALE 8 +#define MORE_COMPONENTS 32 +#define WE_HAVE_AN_X_AND_Y_SCALE 64 +#define WE_HAVE_A_TWO_BY_TWO 128 + + /* Skip the X and Y offsets */ + if (flags & ARGS_1_AND_2_ARE_WORDS) + skip = 4 + 4; + else + skip = 4 + 2; + + /* Skip the transformation */ + switch (flags & (WE_HAVE_A_SCALE + WE_HAVE_AN_X_AND_Y_SCALE + WE_HAVE_A_TWO_BY_TWO)) + { + case 0: + /* No extra to skip */ + break; + case WE_HAVE_A_SCALE: + skip += 2; + break; + case WE_HAVE_AN_X_AND_Y_SCALE: + skip += 4; + break; + case WE_HAVE_A_TWO_BY_TWO: + skip += 8; + break; + } + if (len < skip) + fz_throw(ctx, FZ_ERROR_FORMAT, "Corrupt glyf data"); + data += skip; + len -= skip; + } + while(flags & MORE_COMPONENTS); +} + +static void +renumber_composite(fz_context *ctx, ttf_t *ttf, uint8_t *data, uint32_t len) +{ + uint16_t flags; + uint16_t x; + + data += 4 * 2 + 2; + if (len < 4*2 + 2) + fz_throw(ctx, FZ_ERROR_FORMAT, "Corrupt glyf data"); + len -= 4 * 2 + 2; + do + { + uint16_t skip; + + if (len < 4) + fz_throw(ctx, FZ_ERROR_FORMAT, "Corrupt glyf data"); + + flags = get16(data); + x = get16(data+2); + if (x >= ttf->orig_num_glyphs) + fz_throw(ctx, FZ_ERROR_FORMAT, "Corrupt glyf data"); + put16(data+2, ttf->gid_renum[x]); + + /* Skip the X and Y offsets */ + if (flags & ARGS_1_AND_2_ARE_WORDS) + skip = 4 + 4; + else + skip = 4 + 2; + + /* Skip the transformation */ + switch (flags & (WE_HAVE_A_SCALE + WE_HAVE_AN_X_AND_Y_SCALE + WE_HAVE_A_TWO_BY_TWO)) + { + case 0: + /* No extra to skip */ + break; + case WE_HAVE_A_SCALE: + skip += 2; + break; + case WE_HAVE_AN_X_AND_Y_SCALE: + skip += 4; + break; + case WE_HAVE_A_TWO_BY_TWO: + skip += 8; + break; + } + if (len < skip) + fz_throw(ctx, FZ_ERROR_FORMAT, "Corrupt glyf data"); + data += skip; + len -= skip; + } + while(flags & MORE_COMPONENTS); +} + +static void +read_glyf(fz_context *ctx, ttf_t *ttf, fz_stream *stm, int *gids, int num_gids) +{ + uint32_t len = get_loca(ctx, ttf, ttf->orig_num_glyphs); + fz_buffer *t = read_table(ctx, stm, TAG("glyf"), 1); + encoding_t *enc = ttf->encoding; + uint32_t last_loca, i, j, k; + uint32_t new_start, old_start, old_end, last_loca_ofs; + + if (t->len < len) + { + fz_drop_buffer(ctx, t); + fz_throw(ctx, FZ_ERROR_FORMAT, "truncated glyf table"); + } + + add_table(ctx, ttf, TAG("glyf"), t); + + /* Now, make the renumber list for the glyphs. */ + ttf->gid_renum = fz_calloc(ctx, ttf->orig_num_glyphs, sizeof(uint16_t)); + + /* Initially, we'll use it just as a usage list. 0 = unused, 1 used */ + + /* glyph 0 is always used. */ + glyph_used(ctx, ttf, t, 0); + + if (enc) + { + uint32_t n = enc->max; + /* If we have an encoding table, run through it, and keep anything needed from there. */ + for (i = 0; i < n; i++) + if (enc->gid[i]) + glyph_used(ctx, ttf, t, enc->gid[i]); + + /* Now convert from a usage table to a renumbering table. */ + if (ttf->orig_num_glyphs > 0) + { + ttf->gid_renum[0] = 0; + j = 1; + for (i = 1; i < ttf->orig_num_glyphs; i++) + if (ttf->gid_renum[i]) + ttf->gid_renum[i] = j++; + ttf->new_num_glyphs = j; + } + else + { + ttf->new_num_glyphs = 0; + } + } + else + { + /* We're a cid font. The cids are gids. */ + for (i = 0; i < (uint32_t)num_gids; i++) + glyph_used(ctx, ttf, t, gids[i]); + ttf->new_num_glyphs = ttf->orig_num_glyphs; + } + + /* Now subset the glyf table. */ + if (enc) + { + old_start = get_loca(ctx, ttf, 0); + if (old_start > t->len) + fz_throw(ctx, FZ_ERROR_FORMAT, "Bad loca value"); + old_end = get_loca(ctx, ttf, 1); + if (old_end > t->len || old_end < old_start) + fz_throw(ctx, FZ_ERROR_FORMAT, "Bad loca value"); + len = old_end - old_start; + new_start = 0; + put_loca(ctx, ttf, 0, new_start); + last_loca = 0; + last_loca_ofs = len; + for (i = 0; i < ttf->orig_num_glyphs; i++) + { + old_end = get_loca(ctx, ttf, i + 1); + if (old_end > t->len || old_end < old_start) + fz_throw(ctx, FZ_ERROR_FORMAT, "Bad loca value"); + len = old_end - old_start; + if (len > 0 && (i == 0 || ttf->gid_renum[i] != 0)) + { + memmove(t->data + new_start, t->data + old_start, len); + if ((int16_t)get16(t->data + new_start) < 0) + renumber_composite(ctx, ttf, t->data + new_start, len); + for (k = last_loca + 1; k <= ttf->gid_renum[i]; k++) + put_loca(ctx, ttf, k, last_loca_ofs); + new_start += len; + last_loca = ttf->gid_renum[i]; + last_loca_ofs = new_start; + } + old_start = old_end; + } + for (k = last_loca + 1; k <= ttf->new_num_glyphs; k++) + put_loca(ctx, ttf, k, last_loca_ofs); + } + else + { + new_start = 0; + old_start = get_loca(ctx, ttf, 0); + if (old_start > t->len) + fz_throw(ctx, FZ_ERROR_FORMAT, "Bad loca value"); + for (i = 0; i < ttf->orig_num_glyphs; i++) + { + old_end = get_loca(ctx, ttf, i + 1); + if (old_end > t->len || old_end < old_start) + fz_throw(ctx, FZ_ERROR_FORMAT, "Bad loca value"); + len = old_end - old_start; + if (len > 0 && ttf->gid_renum[i] != 0) + { + memmove(t->data + new_start, t->data + old_start, len); + put_loca(ctx, ttf, i, new_start); + new_start += len; + } + else + { + put_loca(ctx, ttf, i, new_start); + } + old_start = old_end; + } + put_loca(ctx, ttf, ttf->orig_num_glyphs, new_start); + } + + *ttf->loca_len = (size_t) (ttf->new_num_glyphs + 1) * (2<<ttf->index_to_loc_format); + t->len = new_start; +} + +static void +update_num_glyphs(fz_context *ctx, ttf_t *ttf) +{ + put16(ttf->maxp + 4, ttf->new_num_glyphs); +} + +static void +subset_hmtx(fz_context *ctx, ttf_t *ttf, fz_stream *stm) +{ + fz_buffer *t = read_table(ctx, stm, TAG("hmtx"), 1); + uint16_t long_metrics, short_metrics, i, k; + uint8_t *s = t->data; + uint8_t *d = t->data; + int cidfont = (ttf->encoding == NULL); + + long_metrics = ttf->orig_num_long_hor_metrics; + if (long_metrics > ttf->orig_num_glyphs) + long_metrics = ttf->orig_num_glyphs; + if (long_metrics > t->len / 4) + long_metrics = (uint16_t)(t->len / 4); + + short_metrics = (uint16_t)((t->len - long_metrics * 4) / 2); + if (short_metrics > ttf->orig_num_glyphs - long_metrics) + short_metrics = ttf->orig_num_glyphs - long_metrics; + + for (i = 0; i < long_metrics; i++) + { + if (i == 0 || ttf->is_otf || (i < ttf->orig_num_glyphs && ttf->gid_renum[i])) + { + put32(d, get32(s)); + d += 4; + } + else if (cidfont) + { + put32(d, 0); + d += 4; + } + s += 4; + } + for (k = 0 ; k < short_metrics; k++, i++) + { + if (i == 0 || ttf->is_otf || (i < ttf->orig_num_glyphs && ttf->gid_renum[i])) + { + put16(d, get16(s)); + d += 2; + } + else if (cidfont) + { + put16(d, 0); + d += 2; + } + s += 2; + } + t->len = (d - t->data); + + add_table(ctx, ttf, TAG("hmtx"), t); +} + +static void +shrink_loca_if_possible(fz_context *ctx, ttf_t *ttf) +{ + uint32_t len; + uint16_t i, n; + uint8_t *loca; + + if (ttf->index_to_loc_format == 0) + return; /* Can't shrink cos it's already shrunk! */ + + n = ttf->new_num_glyphs; + len = get_loca(ctx, ttf, n); + if (len >= 65536) + return; /* We can't shrink it, cos it's too big. */ + + loca = ttf->loca; + for (i = 0; i <= n; i++) + { + if (get32(loca + 4*i) & 1) + return; /* Can't shrink it, because an offset is not even */ + } + + for (i = 0; i <= n; i++) + { + put16(loca + 2*i, get32(loca + 4*i)/2); + } + *ttf->loca_len = 2*(n+1); + put16(ttf->index_to_loc_formatp, 0); +} + +static struct { const char *charname; int idx; } macroman[] = +{ + { ".notdef", 0}, + { ".null", 1}, + { "A", 36}, + { "AE", 144}, + { "Aacute", 201}, + { "Acircumflex", 199}, + { "Adieresis", 98}, + { "Agrave", 173}, + { "Aring", 99}, + { "Atilde", 174}, + { "B", 37}, + { "C", 38}, + { "Cacute", 253}, + { "Ccaron", 255}, + { "Ccedilla", 100}, + { "D", 39}, + { "Delta", 168}, + { "E", 40}, + { "Eacute", 101}, + { "Ecircumflex", 200}, + { "Edieresis", 202}, + { "Egrave", 203}, + { "Eth", 233}, + { "F", 41}, + { "G", 42}, + { "Gbreve", 248}, + { "H", 43}, + { "I", 44}, + { "Iacute", 204}, + { "Icircumflex", 205}, + { "Idieresis", 206}, + { "Idotaccent", 250}, + { "Igrave", 207}, + { "J", 45}, + { "K", 46}, + { "L", 47}, + { "Lslash", 226}, + { "M", 48}, + { "N", 49}, + { "Ntilde", 102}, + { "O", 50}, + { "OE", 176}, + { "Oacute", 208}, + { "Ocircumflex", 209}, + { "Odieresis", 103}, + { "Ograve", 211}, + { "Omega", 159}, + { "Oslash", 145}, + { "Otilde", 175}, + { "P", 51}, + { "Q", 52}, + { "R", 53}, + { "S", 54}, + { "Scaron", 228}, + { "Scedilla", 251}, + { "T", 55}, + { "Thorn", 237}, + { "U", 56}, + { "Uacute", 212}, + { "Ucircumflex", 213}, + { "Udieresis", 104}, + { "Ugrave", 214}, + { "V", 57}, + { "W", 58}, + { "X", 59}, + { "Y", 60}, + { "Yacute", 235}, + { "Ydieresis", 187}, + { "Z", 61}, + { "Zcaron", 230}, + { "a", 68}, + { "aacute", 105}, + { "acircumflex", 107}, + { "acute", 141}, + { "adieresis", 108}, + { "ae", 160}, + { "agrave", 106}, + { "ampersand", 9}, + { "apple", 210}, + { "approxequal", 167}, + { "aring", 110}, + { "asciicircum", 65}, + { "asciitilde", 97}, + { "asterisk", 13}, + { "at", 35}, + { "atilde", 109}, + { "b", 69}, + { "backslash", 63}, + { "bar", 95}, + { "braceleft", 94}, + { "braceright", 96}, + { "bracketleft", 62}, + { "bracketright", 64}, + { "breve", 219}, + { "brokenbar", 232}, + { "bullet", 135}, + { "c", 70}, + { "cacute", 254}, + { "caron", 225}, + { "ccaron", 256}, + { "ccedilla", 111}, + { "cedilla", 222}, + { "cent", 132}, + { "circumflex", 216}, + { "colon", 29}, + { "comma", 15}, + { "copyright", 139}, + { "currency", 189}, + { "d", 71}, + { "dagger", 130}, + { "daggerdbl", 194}, + { "dcroat", 257}, + { "degree", 131}, + { "dieresis", 142}, + { "divide", 184}, + { "dollar", 7}, + { "dotaccent", 220}, + { "dotlessi", 215}, + { "e", 72}, + { "eacute", 112}, + { "ecircumflex", 114}, + { "edieresis", 115}, + { "egrave", 113}, + { "eight", 27}, + { "ellipsis", 171}, + { "emdash", 179}, + { "endash", 178}, + { "equal", 32}, + { "eth", 234}, + { "exclam", 4}, + { "exclamdown", 163}, + { "f", 73}, + { "fi", 192}, + { "five", 24}, + { "fl", 193}, + { "florin", 166}, + { "four", 23}, + { "fraction", 188}, + { "franc", 247}, + { "g", 74}, + { "gbreve", 249}, + { "germandbls", 137}, + { "grave", 67}, + { "greater", 33}, + { "greaterequal", 149}, + { "guillemotleft", 169}, + { "guillemotright", 170}, + { "guilsinglleft", 190}, + { "guilsinglright", 191}, + { "h", 75}, + { "hungarumlaut", 223}, + { "hyphen", 16}, + { "i", 76}, + { "iacute", 116}, + { "icircumflex", 118}, + { "idieresis", 119}, + { "igrave", 117}, + { "infinity", 146}, + { "integral", 156}, + { "j", 77}, + { "k", 78}, + { "l", 79}, + { "less", 31}, + { "lessequal", 148}, + { "logicalnot", 164}, + { "lozenge", 185}, + { "lslash", 227}, + { "m", 80}, + { "macron", 218}, + { "minus", 239}, + { "mu", 151}, + { "multiply", 240}, + { "n", 81}, + { "nine", 28}, + { "nonbreakingspace", 172}, + { "nonmarkingreturn", 2}, + { "notequal", 143}, + { "ntilde", 120}, + { "numbersign", 6}, + { "o", 82}, + { "oacute", 121}, + { "ocircumflex", 123}, + { "odieresis", 124}, + { "oe", 177}, + { "ogonek", 224}, + { "ograve", 122}, + { "one", 20}, + { "onehalf", 244}, + { "onequarter", 245}, + { "onesuperior", 241}, + { "ordfeminine", 157}, + { "ordmasculine", 158}, + { "oslash", 161}, + { "otilde", 125}, + { "p", 83}, + { "paragraph", 136}, + { "parenleft", 11}, + { "parenright", 12}, + { "partialdiff", 152}, + { "percent", 8}, + { "period", 17}, + { "periodcentered", 195}, + { "perthousand", 198}, + { "pi", 155}, + { "plus", 14}, + { "plusminus", 147}, + { "product", 154}, + { "q", 84}, + { "question", 34}, + { "questiondown", 162}, + { "quotedbl", 5}, + { "quotedblbase", 197}, + { "quotedblleft", 180}, + { "quotedblright", 181}, + { "quoteleft", 182}, + { "quoteright", 183}, + { "quotesinglbase", 196}, + { "quotesingle", 10}, + { "r", 85}, + { "radical", 165}, + { "registered", 138}, + { "ring", 221}, + { "s", 86}, + { "scaron", 229}, + { "scedilla", 252}, + { "section", 134}, + { "semicolon", 30}, + { "seven", 26}, + { "six", 25}, + { "slash", 18}, + { "space", 3}, + { "sterling", 133}, + { "summation", 153}, + { "t", 87}, + { "thorn", 238}, + { "three", 22}, + { "threequarters", 246}, + { "threesuperior", 243}, + { "tilde", 217}, + { "trademark", 140}, + { "two", 21}, + { "twosuperior", 242}, + { "u", 88}, + { "uacute", 126}, + { "ucircumflex", 128}, + { "udieresis", 129}, + { "ugrave", 127}, + { "underscore", 66}, + { "v", 89}, + { "w", 90}, + { "x", 91}, + { "y", 92}, + { "yacute", 236}, + { "ydieresis", 186}, + { "yen", 150}, + { "z", 93}, + { "zcaron", 231}, + { "zero", 19}, +}; + +static int +find_macroman_string(const char *s) +{ + int l, r, m; + int comparison; + + l = 0; + r = nelem(macroman); + while (l <= r) + { + m = (l + r) >> 1; + comparison = strcmp(s, macroman[m].charname); + if (comparison < 0) + r = m - 1; + else if (comparison > 0) + l = m + 1; + else + return macroman[m].idx; + } + + return -1; +} + +static size_t +subset_post2(fz_context *ctx, ttf_t *ttf, uint8_t *d, size_t len, int *gids, int num_gids) +{ + int i, n, new_glyphs, old_strings, new_strings; + int j; + fz_int2_heap heap = { 0 }; + uint8_t *d0, *e, *p; + + if (len < (size_t) 2 + 2 * ttf->orig_num_glyphs) + fz_throw(ctx, FZ_ERROR_FORMAT, "Truncated post table"); + + n = get16(d); + if ((uint32_t)n != ttf->orig_num_glyphs) + fz_throw(ctx, FZ_ERROR_FORMAT, "Malformed post table"); + + d0 = d; + d += 2; len -= 2; + e = d; + p = d; + + /* Store all kept indexes. */ + if (len < (size_t)n*2) + fz_throw(ctx, FZ_ERROR_FORMAT, "Malformed post table"); + old_strings = 0; + new_strings = 0; + new_glyphs = 0; + j = 0; + len -= (size_t)n*2; + for (i = 0; i < n; i++) + { + uint16_t o = get16(d); + fz_int2 i2; + p += 2; + + if (o >= 258) + old_strings++; + + /* We're only keeping gids we want. */ + /* Note we need to keep both the gids we were given by the caller, but also + * those required as composites (in gid_renum, if we have it). */ + if (i != 0 && (j >= num_gids || gids[j] != i) && (ttf->gid_renum == NULL || ttf->gid_renum[i] == 0)) + { + memmove(d, d + 2, (n - i - 1) * 2); + continue; + } + if (j < num_gids && gids[j] == i) + j++; + + d += 2; + e += 2; + + /* We want this gid. */ + new_glyphs++; + + /* 257 or smaller: same as in the basic order, keep it as such. */ + if (o <= 257) + continue; + + /* check if string is one of the macroman standard ones, and use its index if so. */ + { + uint8_t *q = d0 + 2 + (size_t) n * 2; + int k; + char buf[257] = { 0 }; + int macidx; + for (k = 0; k < o - 258; k++) + q += 1 + *q; + for (k = 0; k < *q; k++) + buf[k] = *(q + 1 + k); + + macidx = find_macroman_string(buf); + + if (macidx >= 0) + { + put16(d - 2, macidx); + continue; + } + } + + /* We want this gid, and it is a string. */ + new_strings++; + + /* Store the index. */ + i2.a = o - 258; + i2.b = i; + fz_int2_heap_insert(ctx, &heap, i2); + + /* Update string index value in table entry. */ + put16(d - 2, 257 + new_strings); + } + + d = p; + + /* Update number of indexes */ + put16(d0, new_glyphs); + + fz_int2_heap_sort(ctx, &heap); + + /* So, the heap is sorted on i2.a (the string indexes we want to keep), + * and i2.b is the gid that refers to that index. */ + + /* Run through the list moving the strings down that we care about. */ + j = 0; + n = old_strings; + for (i = 0; i < n; i++) + { + uint8_t slen; + + if (len < 1) + fz_throw(ctx, FZ_ERROR_FORMAT, "Malformed post table"); + slen = *d+1; + if (len < slen) + fz_throw(ctx, FZ_ERROR_FORMAT, "Malformed post table"); + len -= slen; + + if (j >= heap.len || heap.heap[j].a != i) + { + /* Drop this one. */ + d += slen; + continue; + } + + memmove(e, d, slen); + d += slen; + e += slen; + + j++; + } + + fz_free(ctx, heap.heap); + + return e - d0; +} + +static void +subset_post(fz_context *ctx, ttf_t *ttf, fz_stream *stm, int *gids, int num_gids) +{ + fz_buffer *t = read_table(ctx, stm, TAG("post"), 0); + uint8_t *d; + size_t len; + uint32_t fmt; + + if (t == NULL) + return; + + d = t->data; + len = t->len; + + if (len < 32) + { + fz_drop_buffer(ctx, t); + fz_throw(ctx, FZ_ERROR_FORMAT, "Truncated post table"); + } + + fmt = get32(d); + + if (fmt != 0x00020000) + { + /* Fmt 1: Nothing to be gained by having this table. The cmap should + * have all the mappings anyway, and we'll have broken it by renumbering + * the gids down anyway. */ + /* Fmt 2.5 deprecated. */ + /* Fmt 3 and 4: should not be used for PDF. */ + /* No other formats defined. */ + fz_drop_buffer(ctx, t); + return; + } + d += 32; len -= 32; + fz_try(ctx) + len = subset_post2(ctx, ttf, d, len, gids, num_gids); + fz_catch(ctx) + { + fz_drop_buffer(ctx, t); + fz_rethrow(ctx); + } + + t->len = 32 + len; + + add_table(ctx, ttf, TAG("post"), t); +} + +static void +subset_CFF(fz_context *ctx, ttf_t *ttf, fz_stream *stm, int *gids, int num_gids, int symbolic, int cidfont) +{ + fz_buffer *t = read_table(ctx, stm, TAG("CFF "), 1); + fz_buffer *sub = NULL; + + fz_var(sub); + + fz_try(ctx) + sub = fz_subset_cff_for_gids(ctx, t, gids, num_gids, symbolic, cidfont); + fz_always(ctx) + fz_drop_buffer(ctx, t); + fz_catch(ctx) + fz_rethrow(ctx); + + add_table(ctx, ttf, TAG("CFF "), sub); +} + +fz_buffer * +fz_subset_ttf_for_gids(fz_context *ctx, fz_buffer *orig, int *gids, int num_gids, int symbolic, int cidfont) +{ + fz_stream *stm = fz_open_buffer(ctx, orig); + ttf_t ttf = { 0 }; + fz_buffer *newbuf = NULL; + fz_output *out = NULL; + + fz_var(newbuf); + fz_var(out); + + fz_try(ctx) + { + ttf.is_otf = (fz_read_uint32_le(ctx, stm) == 0x4f54544f); + ttf.symbolic = symbolic; + + /* Subset the name table. No other dependencies. */ + subset_name_table(ctx, &ttf, stm); + + if (!cidfont) + { + /* Load the encoding. Populates the encoding table from the cmap table + * in the original. cmap table is then discarded. */ + load_encoding(ctx, &ttf, stm); + + /* Blank out the bits of the encoding we don't need. */ + reduce_encoding(ctx, &ttf, gids, num_gids); + } + + /* Read maxp and store the table. Remember orig_num_glyphs. */ + read_maxp(ctx, &ttf, stm); + + /* Read head and store the table. Remember the loca index size. */ + read_head(ctx, &ttf, stm); + + if (ttf.is_otf) + { + subset_CFF(ctx, &ttf, stm, gids, num_gids, symbolic, cidfont); + } + + /* Read loca and store it. Stash a pointer to the table for quick access. */ + if (!ttf.is_otf) + { + read_loca(ctx, &ttf, stm); + + /* Read the glyf data, and scan it for composites. This makes the gid_renum table, + * subsets the glyf data, and rewrites the loca table. */ + read_glyf(ctx, &ttf, stm, gids, num_gids); + } + + /* Read hhea and store it. Remember numOfLongHorMetrics. */ + read_hhea(ctx, &ttf, stm); + + /* Read and subset hmtx. */ + subset_hmtx(ctx, &ttf, stm); + +#ifdef DEBUG_SUBSETTING + if (!cidfont) + { + encoding_t *enc = ttf.encoding; + uint32_t i, n = enc->max; + + for (i = 0; i < n; i++) + if (enc->gid[i]) + printf("cid %x '%c'-> orig gid %d -> gid %d\n", i, (char)i, enc->gid[i], ttf.gid_renum[enc->gid[i]]); + } + { + uint32_t i; + + for (i = 0; i < ttf.orig_num_glyphs; i++) + if (ttf.gid_renum[i]) + printf("gid %d -> %d\n", i, ttf.gid_renum[i]); + + for (i = 0; i <= ttf.new_num_glyphs; i++) + printf("LOCA %d = %x\n", i, get_loca(ctx, &ttf, i)); + } +#endif + if (!ttf.is_otf) + { + shrink_loca_if_possible(ctx, &ttf); + + update_num_glyphs(ctx, &ttf); + } + + if (!cidfont) + { + /* Now we can make the new cmap. */ + make_cmap(ctx, &ttf); + } + + if (!cidfont) + { + /* subset the post table */ + subset_post(ctx, &ttf, stm, gids, num_gids); + } + + copy_table(ctx, &ttf, stm, TAG("OS/2"), 0); + copy_table(ctx, &ttf, stm, TAG("cvt "), 0); + copy_table(ctx, &ttf, stm, TAG("fpgm"), 0); + copy_table(ctx, &ttf, stm, TAG("prep"), 0); + + sort_tables(ctx, &ttf); + checksum_tables(ctx, &ttf); + + newbuf = fz_new_buffer(ctx, 1024); + out = fz_new_output_with_buffer(ctx, newbuf); + + write_tables(ctx, &ttf, out); + + fz_close_output(ctx, out); + + fix_checksum(ctx, newbuf); + } + fz_always(ctx) + { + int i; + + fz_drop_output(ctx, out); + fz_drop_stream(ctx, stm); + for (i = 0; i < ttf.len; i++) + fz_drop_buffer(ctx, ttf.table[i].tab); + fz_free(ctx, ttf.table); + fz_free(ctx, ttf.gid_renum); + fz_free(ctx, ttf.encoding); + } + fz_catch(ctx) + { + fz_drop_buffer(ctx, newbuf); + fz_rethrow(ctx); + } + + return newbuf; +}
