comparison mupdf-source/source/fitz/subset-ttf.c @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 // Copyright (C) 2004-2025 Artifex Software, Inc.
2 //
3 // This file is part of MuPDF.
4 //
5 // MuPDF is free software: you can redistribute it and/or modify it under the
6 // terms of the GNU Affero General Public License as published by the Free
7 // Software Foundation, either version 3 of the License, or (at your option)
8 // any later version.
9 //
10 // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
11 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
13 // details.
14 //
15 // You should have received a copy of the GNU Affero General Public License
16 // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
17 //
18 // Alternative licensing terms are available from the licensor.
19 // For commercial licensing, see <https://www.artifex.com/> or contact
20 // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
21 // CA 94129, USA, for further information.
22
23 #include "mupdf/fitz.h"
24
25 /*
26 For the purposes of this code, and to save my tiny brain from
27 overload, we will adopt the following notation:
28
29 1) The PDF file contains bytes of data. These bytes are looked
30 up in the MuPDF font handling to resolve to 'glyph ids' (gids).
31 These account for all the different encodings etc in use,
32 including the 'cmap' table within the font.
33
34 2) We are given the list of gids that are used in the document.
35 We arrange to keep any entries in the cmap or post tables that
36 maps to these gids.
37
38 We map the gids to the bottom of the range. This means that the
39 cmap and post tables need to be updated.
40
41 A similar optimisation would be to compress the range of cids
42 used to a prefix of the range used. This would mean that the
43 calling code needs to rewrite the data within the PDF file -
44 both in terms of the strings used with the PDF streams, and in
45 terms of the ToUnicode tables there (and the Widths etc).
46
47 For now, we'll ignore this optimisation.
48
49 Possibly, in the case of 'Identity' Tounicode mappings we
50 wouldn't actually want to do this range compression? It'd only
51 make the file larger.
52 */
53
54 typedef struct
55 {
56 uint16_t pid;
57 uint16_t psid;
58
59 uint32_t max;
60 uint16_t gid[256];
61 } encoding_t;
62
63 typedef struct
64 {
65 uint32_t tag;
66 uint32_t checksum;
67 fz_buffer *tab;
68 } tagged_table_t;
69
70 typedef struct
71 {
72 int is_otf;
73 int symbolic;
74 encoding_t *encoding;
75 uint16_t orig_num_glyphs;
76 uint16_t new_num_glyphs;
77 uint16_t index_to_loc_format;
78 uint8_t *index_to_loc_formatp;
79 uint16_t orig_num_long_hor_metrics;
80 uint16_t new_num_long_hor_metrics;
81
82 /* Pointer to the old tables (in the tagged table below) */
83 uint8_t *loca;
84 size_t *loca_len;
85 uint8_t *maxp;
86
87 /* Maps from old gid to new gid */
88 uint16_t *gid_renum;
89
90 int max;
91 int len;
92 tagged_table_t *table;
93 } ttf_t;
94
95 static uint32_t
96 checksum(fz_buffer *buf)
97 {
98 size_t i;
99 const uint8_t *d = (const uint8_t *)buf->data;
100 uint32_t cs = 0;
101
102 for (i = buf->len>>2; i > 0; i--)
103 {
104 cs += d[0]<<24;
105 cs += d[1]<<16;
106 cs += d[2]<<8;
107 cs += d[3];
108 d += 4;
109 }
110 i = buf->len - (buf->len & ~3);
111 switch (i)
112 {
113 case 3:
114 cs += d[2]<<8;
115 /* fallthrough */
116 case 2:
117 cs += d[1]<<16;
118 /* fallthrough */
119 case 1:
120 cs += d[0]<<24;
121 default:
122 break;
123 }
124
125 return cs;
126 }
127
128 static uint32_t
129 find_table(fz_context *ctx, fz_stream *stm, uint32_t tag, uint32_t *len)
130 {
131 int num_tables;
132 int i;
133
134 fz_seek(ctx, stm, 4, SEEK_SET);
135 num_tables = fz_read_int16(ctx, stm);
136 fz_seek(ctx, stm, 12, SEEK_SET);
137
138 for (i = 0; i < num_tables; i++)
139 {
140 uint32_t t = fz_read_uint32(ctx, stm);
141 uint32_t cs = fz_read_uint32(ctx, stm);
142 uint32_t off = fz_read_uint32(ctx, stm);
143 (void) cs; /* UNUSED */
144 *len = fz_read_uint32(ctx, stm);
145 if (t == tag)
146 return off;
147 }
148
149 return 0;
150 }
151
152 static fz_buffer *
153 read_table(fz_context *ctx, fz_stream *stm, uint32_t tag, int compulsory)
154 {
155 uint32_t size;
156 uint32_t off = find_table(ctx, stm, tag, &size);
157 fz_buffer *buf;
158
159 if (off == 0)
160 {
161 if (compulsory)
162 fz_throw(ctx, FZ_ERROR_FORMAT, "Required %c%c%c%c table missing", tag>>24, (tag>>16)&0xff, (tag>>8)&0xff, tag & 0xff);
163 return NULL;
164 }
165
166 fz_seek(ctx, stm, off, SEEK_SET);
167 buf = fz_new_buffer(ctx, size);
168
169 fz_try(ctx)
170 {
171 fz_read(ctx, stm, buf->data, size);
172 buf->len = size;
173 }
174 fz_catch(ctx)
175 {
176 fz_drop_buffer(ctx, buf);
177 fz_rethrow(ctx);
178 }
179
180 return buf;
181 }
182
183 #define TAG(s) \
184 ( (((uint8_t)s[0])<<24) | \
185 (((uint8_t)s[1])<<16) | \
186 (((uint8_t)s[2])<<8) | \
187 (((uint8_t)s[3])))
188
189 static void
190 add_table(fz_context *ctx, ttf_t *ttf, uint32_t tag, fz_buffer *tab)
191 {
192 fz_try(ctx)
193 {
194 if (ttf->max == ttf->len)
195 {
196 int n = ttf->max * 2;
197 if (n == 0)
198 n = 16;
199 ttf->table = fz_realloc(ctx, ttf->table, sizeof(*ttf->table) * n);
200 ttf->max = n;
201 }
202
203 ttf->table[ttf->len].tag = tag;
204 ttf->table[ttf->len].tab = tab;
205 ttf->len++;
206 }
207 fz_catch(ctx)
208 {
209 fz_drop_buffer(ctx, tab);
210 fz_rethrow(ctx);
211 }
212 }
213
214 static void
215 copy_table(fz_context *ctx, ttf_t *ttf, fz_stream *stm, uint32_t tag, int compulsory)
216 {
217 fz_buffer *t;
218
219 t = read_table(ctx, stm, tag, compulsory);
220 if (t)
221 add_table(ctx, ttf, tag, t);
222 }
223
224 static int
225 tabcmp(const void *a_, const void *b_)
226 {
227 const tagged_table_t *a = (const tagged_table_t *)a_;
228 const tagged_table_t *b = (const tagged_table_t *)b_;
229
230 return (a->tag - b->tag);
231 }
232
233 static void
234 sort_tables(fz_context *ctx, ttf_t *ttf)
235 {
236 /* Avoid scanbuild/coverity false warning with this unnecessary test */
237 if (ttf->table == NULL || ttf->len == 0)
238 return;
239 qsort(ttf->table, ttf->len, sizeof(tagged_table_t), tabcmp);
240 }
241
242 static void
243 checksum_tables(fz_context *ctx, ttf_t *ttf)
244 {
245 int i;
246
247 for (i = 0; i < ttf->len; i++)
248 ttf->table[i].checksum = checksum(ttf->table[i].tab);
249 }
250
251 static void
252 write_tables(fz_context *ctx, ttf_t *ttf, fz_output *out)
253 {
254 int i = 0;
255 uint32_t offset;
256
257 /* scalar type - TTF for now - may need to cope with other types later. */
258 if (ttf->is_otf)
259 fz_write_int32_be(ctx, out, 0x4f54544f);
260 else
261 fz_write_int32_be(ctx, out, 0x00010000);
262
263 /* number of tables */
264 fz_write_uint16_be(ctx, out, ttf->len);
265
266 while (1<<(i+1) <= ttf->len)
267 i++;
268
269 /* searchRange */
270 fz_write_uint16_be(ctx, out, (1<<i)<<4);
271
272 /* entrySelector */
273 fz_write_uint16_be(ctx, out, i);
274
275 /* rangeShift*/
276 fz_write_uint16_be(ctx, out, (ttf->len - (1<<i))<<4);
277
278 /* Table directory */
279 offset = 12 + ttf->len * 16;
280 for (i = 0; i < ttf->len; i++)
281 {
282 fz_write_uint32_be(ctx, out, ttf->table[i].tag);
283 fz_write_uint32_be(ctx, out, ttf->table[i].checksum);
284 fz_write_uint32_be(ctx, out, offset);
285 fz_write_uint32_be(ctx, out, (uint32_t)ttf->table[i].tab->len);
286 offset += (uint32_t)ttf->table[i].tab->len;
287 }
288
289 /* Now the tables in turn */
290 for (i = 0; i < ttf->len; i++)
291 {
292 fz_write_buffer(ctx, out, ttf->table[i].tab);
293 }
294 }
295
296 static void
297 fix_checksum(fz_context *ctx, fz_buffer *buf)
298 {
299 uint8_t *data;
300 uint32_t sum = 0;
301 size_t len = fz_buffer_storage(ctx, buf, &data);
302 uint32_t namesize;
303 fz_stream *stm = fz_open_buffer(ctx, buf);
304 uint32_t csumpos = find_table(ctx, stm, TAG("head"), &namesize) + 8;
305
306 (void) len; // UNUSED
307
308 fz_drop_stream(ctx, stm);
309
310 /* First off, blat the old checksum */
311 memset(data+csumpos, 0, 4);
312
313 sum = checksum(buf);
314 sum = 0xb1b0afba-sum;
315
316 /* Insert it. */
317 data[csumpos] = sum>>24;
318 data[csumpos+1] = sum>>16;
319 data[csumpos+2] = sum>>8;
320 data[csumpos+3] = sum;
321 }
322
323 typedef struct
324 {
325 uint16_t platform_id;
326 uint16_t platform_specific_id;
327 uint16_t language_id;
328 uint16_t name_id;
329 uint16_t len;
330 uint16_t offset;
331 } name_record_t;
332
333 static uint32_t get32(const uint8_t *d)
334 {
335 return (d[0]<<24)|(d[1]<<16)|(d[2]<<8)|d[3];
336 }
337
338 static uint32_t get16(const uint8_t *d)
339 {
340 return (d[0]<<8)|d[1];
341 }
342
343 static void put32(uint8_t *d, uint32_t v)
344 {
345 d[0] = v>>24;
346 d[1] = v>>16;
347 d[2] = v>>8;
348 d[3] = v;
349 }
350
351 static void put16(uint8_t *d, uint32_t v)
352 {
353 d[0] = v>>8;
354 d[1] = v;
355 }
356
357 typedef struct
358 {
359 /* First 2 fields aren't actually needed for the pointer list
360 * operation, but they serve as bounds for all the offsets used
361 * within the ptr list. */
362 uint8_t *block;
363 size_t block_len;
364
365 uint32_t len;
366 uint32_t max;
367 uint8_t **ptr;
368 } ptr_list_t;
369
370 static void
371 ptr_list_add(fz_context *ctx, ptr_list_t *pl, uint8_t *ptr)
372 {
373 if (pl->len == pl->max)
374 {
375 int n = pl->max * 2;
376 if (n == 0)
377 n = 32;
378 pl->ptr = fz_realloc(ctx, pl->ptr, sizeof(*pl->ptr) * n);
379 pl->max = n;
380 }
381 pl->ptr[pl->len++] = ptr;
382 }
383
384 typedef int (cmp_t)(const uint8_t **a, const uint8_t **b);
385 typedef int (void_cmp_t)(const void *, const void *);
386
387 static void
388 ptr_list_sort(fz_context *ctx, ptr_list_t *pl, cmp_t *cmp)
389 {
390 /* Avoid scanbuild/coverity false warning with this unnecessary test */
391 if (pl->ptr == NULL || pl->len == 0)
392 return;
393 qsort(pl->ptr, pl->len, sizeof(*pl->ptr), (void_cmp_t *)cmp);
394 }
395
396 static void
397 drop_ptr_list(fz_context *ctx, ptr_list_t *pl)
398 {
399 fz_free(ctx, pl->ptr);
400 }
401
402 /* return 1 to keep, 0 to drop. */
403 typedef int (filter_t)(const uint8_t *ptr, const uint8_t *blk, size_t len);
404
405 /* This makes a pointer list from a filtered block, moving the underlying data as it filters. */
406 static void
407 ptr_list_compact(fz_context *ctx, ptr_list_t *pl, filter_t *fil, uint8_t *base, int n, size_t eltsize, uint8_t *block, size_t block_len)
408 {
409 int i;
410 uint8_t *s = base;
411 uint8_t *d = base;
412
413 pl->block = block;
414 pl->block_len = block_len;
415
416 if (base < block || (size_t)(base - block) > block_len || (size_t)(base - block) + n * eltsize >= block_len)
417 fz_throw(ctx, FZ_ERROR_FORMAT, "Ptr List creation failed");
418
419 for (i = 0; i < n; i++)
420 {
421 if (fil(s, block, block_len))
422 {
423 ptr_list_add(ctx, pl, d);
424 if (s != d)
425 memmove(d, s, eltsize);
426 d += eltsize;
427 }
428 s += eltsize;
429 }
430 }
431
432 static int
433 names_by_size(const uint8_t **a, const uint8_t **b)
434 {
435 return get16((*b)+8) - get16((*a)+8);
436 }
437
438 static int
439 filter_name_tables(const uint8_t *ptr, const uint8_t *block, size_t block_len)
440 {
441 /* FIXME: For now, we keep everything. */
442 return 1;
443 }
444
445 #define UNFOUND ((uint32_t)-1)
446
447 static uint32_t
448 find_string_in_block(const uint8_t *str, size_t str_len, const uint8_t *block, size_t block_len)
449 {
450 const uint8_t *b = block;
451
452 if (block_len == 0)
453 return UNFOUND;
454
455 assert(block_len >= str_len);
456
457 block_len -= str_len-1;
458
459 while (block_len--)
460 {
461 if (!memcmp(str, b, str_len))
462 return (uint32_t)(b - block);
463 b++;
464 }
465
466 return UNFOUND;
467 }
468
469 static void
470 subset_name_table(fz_context *ctx, ttf_t *ttf, fz_stream *stm)
471 {
472 fz_buffer *t = read_table(ctx, stm, TAG("name"), 0);
473 uint8_t *d;
474 uint32_t i, n, off;
475 ptr_list_t pl = { 0 };
476 size_t name_data_size;
477 uint8_t *new_name_data = NULL;
478 size_t new_len;
479
480 if (t == NULL)
481 return; /* No name table */
482
483 d = t->data;
484
485 fz_var(new_name_data);
486
487 fz_try(ctx)
488 {
489 if (get16(d) != 0 || t->len < 6)
490 fz_throw(ctx, FZ_ERROR_FORMAT, "Unsupported name table format");
491
492 n = get16(d+2);
493 off = get16(d+4);
494 name_data_size = t->len - 6 - 12*n;
495
496 if (t->len < 6 + 12*n)
497 fz_throw(ctx, FZ_ERROR_FORMAT, "Truncated name table");
498
499 ptr_list_compact(ctx, &pl, filter_name_tables, d+6, n, 12, d, t->len);
500
501 /* Sort our list so that the ones with the largest name data blocks come first. */
502 ptr_list_sort(ctx, &pl, names_by_size);
503
504 new_name_data = fz_malloc(ctx, name_data_size);
505 new_len = 0;
506 for (i = 0; i < pl.len; i++)
507 {
508 uint32_t name_len, offset, name_off;
509 uint8_t *name;
510
511 if (t->len < (size_t) (pl.ptr[i] - t->data) + 8 + 2)
512 fz_throw(ctx, FZ_ERROR_FORMAT, "Truncated name length in name table");
513 name_len = get16(pl.ptr[i] + 8);
514
515 if (t->len < (size_t) (pl.ptr[i] - t->data) + 10 + 2)
516 fz_throw(ctx, FZ_ERROR_FORMAT, "Truncated name offset in name table");
517 name_off = off + get16(pl.ptr[i] + 10);
518 name = d + name_off;
519
520 if (t->len < name_off + name_len)
521 fz_throw(ctx, FZ_ERROR_FORMAT, "Truncated name in name table");
522 offset = find_string_in_block(name, name_len, new_name_data, new_len);
523 if (offset == UNFOUND)
524 {
525 if (name_data_size < new_len + name_len)
526 fz_throw(ctx, FZ_ERROR_FORMAT, "Bad name table in TTF");
527 memcpy(new_name_data + new_len, name, name_len);
528 offset = (uint32_t)new_len;
529 new_len += name_len;
530 }
531 put16(pl.ptr[i]+10, offset);
532 }
533 memcpy(d + 6 + 12*pl.len, new_name_data, new_len);
534 t->len = 6 + 12*pl.len + new_len;
535 put16(d+4, 6 + 12*pl.len);
536 }
537 fz_always(ctx)
538 {
539 drop_ptr_list(ctx, &pl);
540 fz_free(ctx, new_name_data);
541 }
542 fz_catch(ctx)
543 {
544 fz_drop_buffer(ctx, t);
545 fz_rethrow(ctx);
546 }
547
548 add_table(ctx, ttf, TAG("name"), t);
549 }
550
551 static encoding_t *
552 load_enc_tab0(fz_context *ctx, uint8_t *d, size_t data_size, uint32_t offset)
553 {
554 encoding_t *enc;
555 int i;
556
557 if (data_size < 262)
558 fz_throw(ctx, FZ_ERROR_FORMAT, "Truncated cmap 0 format table");
559
560 enc = fz_malloc_struct(ctx, encoding_t);
561 d += offset + 6;
562
563 enc->max = 256;
564 for (i = 0; i < 256; i++)
565 enc->gid[i] = d[i];
566
567 return enc;
568 }
569
570 static encoding_t *
571 load_enc_tab4(fz_context *ctx, uint8_t *d, size_t data_size, uint32_t offset)
572 {
573 encoding_t *enc;
574 uint16_t seg_count;
575 uint32_t i;
576
577 if (data_size < offset + 26)
578 fz_throw(ctx, FZ_ERROR_FORMAT, "cmap4 too small");
579
580 seg_count = get16(d+offset+6); /* 2 * seg_count */
581
582 if (seg_count & 1)
583 fz_throw(ctx, FZ_ERROR_FORMAT, "Malformed cmap4 table");
584 seg_count >>= 1;
585
586 enc = fz_calloc(ctx, 1, sizeof(encoding_t) + sizeof(uint16_t) * (65536 - 256));
587 enc->max = 65536;
588
589 fz_try(ctx)
590 {
591 /* Run through the segments, counting how many are used. */
592 for (i = 0; i < seg_count; i++)
593 {
594 uint16_t seg_end, seg_start, delta, target, inner_offset;
595 uint32_t offset_ptr, s;
596
597 if (data_size < offset + 14 + 6 * seg_count + 2 + 2 * i + 2)
598 fz_throw(ctx, FZ_ERROR_FORMAT, "cmap4 too small");
599
600 seg_end = get16(d + offset + 14 + 2 * i);
601 seg_start = get16(d + offset + 14 + 2 * seg_count + 2 + 2 * i);
602 delta = get16(d + offset + 14 + 4 * seg_count + 2 + 2 * i);
603 offset_ptr = offset + 14 + 6 * seg_count + 2 + 2 * i;
604 inner_offset = get16(d + offset_ptr);
605
606 if (seg_start >= enc->max || seg_end >= enc->max || seg_end < seg_start)
607 fz_throw(ctx, FZ_ERROR_FORMAT, "Malformed cmap4 table.");
608
609 for (s = seg_start; s <= seg_end; s++)
610 {
611 if (inner_offset == 0)
612 {
613 target = delta + s;
614 }
615 else
616 {
617 if (data_size < offset_ptr + inner_offset + 2 * (s - seg_start) + 2)
618 fz_throw(ctx, FZ_ERROR_FORMAT, "cmap4 too small");
619
620 /* Yes. This is very screwy. The inner_offset is from the offset_ptr in use. */
621 target = get16(d + offset_ptr + inner_offset + 2 * (s - seg_start));
622 if (target != 0)
623 target += delta;
624 }
625
626 if (target != 0)
627 enc->gid[s] = target;
628 }
629 }
630 }
631 fz_catch(ctx)
632 {
633 fz_free(ctx, enc);
634 fz_rethrow(ctx);
635 }
636
637 return enc;
638 }
639
640 static encoding_t *
641 load_enc_tab6(fz_context *ctx, uint8_t *d, size_t data_size, uint32_t offset)
642 {
643 encoding_t *enc;
644 uint16_t first_code;
645 uint16_t entry_count;
646 uint16_t length;
647 uint32_t i;
648
649 if (data_size < 10)
650 fz_throw(ctx, FZ_ERROR_FORMAT, "cmap6 too small");
651
652 length = get16(d+offset+2);
653 first_code = get16(d+offset+6);
654 entry_count = get16(d+offset+8);
655
656 if (length < entry_count*2 + 10)
657 fz_throw(ctx, FZ_ERROR_FORMAT, "Malformed cmap6 table");
658
659 enc = fz_calloc(ctx, 1, sizeof(encoding_t) + sizeof(uint16_t) * (first_code + entry_count - 256));
660 enc->max = first_code + entry_count;
661
662 /* Run through the segments, counting how many are used. */
663 for (i = 0; i < entry_count; i++)
664 {
665 enc->gid[first_code+i] = get16(d+offset+10+i*2);
666 }
667
668 return enc;
669 }
670
671 static int
672 is_encoding_all_zeros(fz_context *ctx, encoding_t *enc)
673 {
674 uint32_t i;
675
676 if (enc != NULL)
677 for (i = 0; i < enc->max; i++)
678 if (enc->gid[i] != 0)
679 return 0;
680
681 return 1;
682 }
683
684
685 static encoding_t *
686 load_enc(fz_context *ctx, fz_buffer *t, int pid, int psid)
687 {
688 uint8_t *d = t->data;
689 size_t data_size = t->len;
690 uint32_t i, n;
691
692 if (data_size < 6 || get16(d) != 0)
693 fz_throw(ctx, FZ_ERROR_FORMAT, "Unsupported cmap table format");
694
695 n = get16(d+2);
696
697 if (data_size < 4 + 8*n)
698 fz_throw(ctx, FZ_ERROR_FORMAT, "Truncated cmap table");
699
700 for (i = 0; i < n; i++)
701 {
702 uint16_t plat_id = get16(d + 4 + i * 8);
703 uint16_t plat_spec_id = get16(d + 4 + i * 8 + 2);
704 uint32_t offset = get32(d + 4 + i * 8 + 4);
705 uint16_t fmt;
706 encoding_t *enc;
707
708 if (plat_id != pid || plat_spec_id != psid)
709 continue;
710
711 if (offset < 4 + 8 * n || offset + 2 >= data_size)
712 fz_throw(ctx, FZ_ERROR_FORMAT, "cmap table data out of range");
713
714 fmt = get16(d+offset);
715 switch(fmt)
716 {
717 case 0:
718 enc = load_enc_tab0(ctx, d, data_size, offset);
719 break;
720 case 4:
721 enc = load_enc_tab4(ctx, d, data_size, offset);
722 break;
723 case 6:
724 enc = load_enc_tab6(ctx, d, data_size, offset);
725 break;
726 default:
727 fz_throw(ctx, FZ_ERROR_FORMAT, "Unsupported cmap table format %d", fmt);
728 }
729
730 enc->pid = pid;
731 enc->psid = psid;
732
733 if (is_encoding_all_zeros(ctx, enc))
734 {
735 // ignore any encoding that is all zeros
736 fz_free(ctx, enc);
737 enc = NULL;
738 }
739
740 return enc;
741 }
742
743 return NULL;
744 }
745
746 static void
747 load_encoding(fz_context *ctx, ttf_t *ttf, fz_stream *stm)
748 {
749 fz_buffer *t = read_table(ctx, stm, TAG("cmap"), 1);
750 encoding_t *enc = NULL;
751
752 fz_var(enc);
753
754 fz_try(ctx)
755 {
756 if (ttf->symbolic)
757 {
758 /* For symbolic fonts, we look for (3,0) as per PDF Spec, then (1,0). */
759 enc = load_enc(ctx, t, 3, 0);
760 if (!enc)
761 enc = load_enc(ctx, t, 1, 0);
762 }
763 else
764 {
765 /* For non symbolic fonts, we look for (3,1) then (1,0), then (0,1), and finally (0,3). */
766 enc = load_enc(ctx, t, 3, 1);
767 if (!enc)
768 enc = load_enc(ctx, t, 1, 0);
769 if (!enc)
770 enc = load_enc(ctx, t, 0, 1);
771 if (!enc)
772 enc = load_enc(ctx, t, 0, 3);
773 }
774 if (!enc)
775 fz_throw(ctx, FZ_ERROR_FORMAT, "No suitable cmap table found");
776 }
777 fz_always(ctx)
778 {
779 fz_drop_buffer(ctx, t);
780 }
781 fz_catch(ctx)
782 {
783 fz_rethrow(ctx);
784 }
785
786 ttf->encoding = enc;
787 }
788
789 static void
790 reduce_encoding(fz_context *ctx, ttf_t *ttf, int *gids, int num_gids)
791 {
792 int i;
793 encoding_t *enc = ttf->encoding;
794 int n = enc->max;
795
796 for (i = 0; i < n; i++)
797 {
798 int gid = enc->gid[i];
799 int lo, hi;
800
801 if (gid == 0)
802 continue;
803
804 lo = 0;
805 hi = num_gids;
806 while (lo < hi)
807 {
808 int mid = (lo + hi)>>1;
809 int g = gids[mid];
810 if (g < gid)
811 lo = mid+1;
812 else if (g > gid)
813 hi = mid;
814 else
815 goto found; /* Leave this one as is. */
816 }
817
818 /* Not found */
819 enc->gid[i] = 0;
820 found:
821 {}
822 }
823 }
824
825 static void
826 make_cmap(fz_context *ctx, ttf_t *ttf)
827 {
828 uint32_t i;
829 uint32_t len;
830 uint32_t segs = 0;
831 uint32_t seg, seg_start, seg_end;
832 encoding_t *enc = ttf->encoding;
833 uint32_t n = enc->max;
834 uint32_t entries = 0;
835 fz_buffer *buf;
836 uint8_t *d;
837 uint32_t offset;
838
839 /* Make a type 4 table. */
840
841 /* Count the number of segments. */
842 for (i = 0; i < n; i++)
843 {
844 if (enc->gid[i] == 0)
845 continue;
846
847 seg_start = i;
848 seg_end = i;
849 for (i++; i<n; i++)
850 {
851 if (enc->gid[i] != 0)
852 seg_end = i;
853 else if (i - seg_end > 4)
854 break;
855 }
856 entries += seg_end - seg_start + 1;
857 segs++;
858 }
859 segs++; /* For the terminator */
860
861 len = 12 + 14 + 2 + segs * 2 * 4 + entries * 2;
862 buf = fz_new_buffer(ctx, len);
863 d = buf->data;
864
865 /* cmap header */
866 put16(d, 0); /* version */
867 put16(d+2, 1); /* num sub tables */
868 put16(d+4, enc->pid);
869 put16(d+6, enc->psid);
870 put32(d+8, 12); /* offset */
871 d += 12;
872
873 put16(d, 4); /* Format */
874 put16(d + 2, len-12); /* Length */
875 put16(d + 4, 0); /* FIXME: Language */
876 put16(d + 6, segs * 2);
877 i = 0;
878 while (1U<<(i+1) <= segs)
879 i++;
880 /* So 1<<i <= segs < 1<<(i+1) */
881 put16(d + 8, 1<<(i+1)); /* searchRange */
882 put16(d + 10, i); /* entrySelector */
883 put16(d + 12, 2 * segs - (1<<(i+1))); /* rangeShift */
884 put16(d + 14 + segs * 2, 0); /* reserved */
885
886 /* Now output the segment data */
887 entries = 14 + segs * 2 * 4 + 2; /* offset of where to put entries.*/
888 seg = 0;
889 for (i = 0; i < n; i++)
890 {
891 if (enc->gid[i] == 0)
892 continue;
893
894 seg_start = i;
895 seg_end = i;
896 offset = 14 + segs * 2 * 3 + 2 + seg * 2;
897 put16(d + offset - segs * 2, 0); /* Delta - always 0 for now. */
898 put16(d + offset, entries - offset); /* offset */
899
900 /* Insert an entry */
901 if (!ttf->is_otf && ttf->gid_renum && i < enc->max && enc->gid[i] < ttf->orig_num_glyphs)
902 put16(d + entries, (ttf->is_otf || ttf->gid_renum == NULL) ? enc->gid[i] : ttf->gid_renum[enc->gid[i]]);
903 else
904 put16(d + entries, enc->gid[i]);
905
906 entries += 2;
907 for (i++; i < n; i++)
908 {
909 if (enc->gid[i] != 0)
910 {
911 /* Include i in the range, which means we need to add entries for
912 * seg_end to i inclusive. */
913 while (seg_end < i)
914 {
915 seg_end++;
916 if (!ttf->is_otf && ttf->gid_renum && seg_end < enc->max && enc->gid[seg_end] < ttf->orig_num_glyphs)
917 put16(d + entries, ttf->gid_renum[enc->gid[seg_end]]);
918 else
919 put16(d + entries, enc->gid[seg_end]);
920 entries += 2;
921 }
922 }
923 else if (i - seg_end > 4)
924 break;
925 }
926 put16(d + 14 + segs * 2 + seg * 2 + 2, seg_start);
927 put16(d + 14 + seg * 2, seg_end);
928 seg++;
929 }
930 offset = 14 + segs * 2 * 3 + 2 + seg * 2;
931 put16(d + 14 + segs * 2 + seg * 2 + 2, 0xffff);
932 put16(d + 14 + seg * 2, 0xffff);
933 put16(d + offset - segs * 2, 1); /* Delta */
934 put16(d + offset, 0); /* offset */
935 buf->len = entries + 12;
936 assert(buf->len == buf->cap);
937
938 add_table(ctx, ttf, TAG("cmap"), buf);
939 }
940
941 static void
942 read_maxp(fz_context *ctx, ttf_t *ttf, fz_stream *stm)
943 {
944 fz_buffer *t = read_table(ctx, stm, TAG("maxp"), 1);
945
946 if (t->len < 6)
947 {
948 fz_drop_buffer(ctx, t);
949 fz_throw(ctx, FZ_ERROR_FORMAT, "truncated maxp table");
950 }
951
952 ttf->orig_num_glyphs = get16(t->data+4);
953
954 add_table(ctx, ttf, TAG("maxp"), t);
955 ttf->maxp = t->data;
956 }
957
958 static void
959 read_head(fz_context *ctx, ttf_t *ttf, fz_stream *stm)
960 {
961 uint32_t version;
962 fz_buffer *t = read_table(ctx, stm, TAG("head"), 1);
963
964 if (t->len < 54)
965 {
966 fz_drop_buffer(ctx, t);
967 fz_throw(ctx, FZ_ERROR_FORMAT, "truncated head table");
968 }
969
970 version = get32(t->data);
971 if (version != 0x00010000)
972 {
973 fz_drop_buffer(ctx, t);
974 fz_throw(ctx, FZ_ERROR_FORMAT, "Unsupported head table version 0x%08x", version);
975 }
976
977 ttf->index_to_loc_formatp = t->data+50;
978 ttf->index_to_loc_format = get16(ttf->index_to_loc_formatp);
979 if (ttf->index_to_loc_format & ~1)
980 {
981 fz_drop_buffer(ctx, t);
982 fz_throw(ctx, FZ_ERROR_FORMAT, "Unsupported index_to_loc_format 0x%04x", ttf->index_to_loc_format);
983 }
984
985 add_table(ctx, ttf, TAG("head"), t);
986 }
987
988 static void
989 read_loca(fz_context *ctx, ttf_t *ttf, fz_stream *stm)
990 {
991 fz_buffer *t;
992 uint32_t len = (2<<ttf->index_to_loc_format) * (ttf->orig_num_glyphs+1);
993
994 t = read_table(ctx, stm, TAG("loca"), 1);
995
996 if (t->len < len)
997 {
998 fz_drop_buffer(ctx, t);
999 fz_throw(ctx, FZ_ERROR_FORMAT, "truncated loca table");
1000 }
1001
1002 ttf->loca = t->data;
1003 ttf->loca_len = &t->len;
1004
1005 add_table(ctx, ttf, TAG("loca"), t);
1006 }
1007
1008 static void
1009 read_hhea(fz_context *ctx, ttf_t *ttf, fz_stream *stm)
1010 {
1011 uint32_t version;
1012 fz_buffer *t = read_table(ctx, stm, TAG("hhea"), 1);
1013 uint16_t i;
1014
1015 if (t->len < 36)
1016 {
1017 fz_drop_buffer(ctx, t);
1018 fz_throw(ctx, FZ_ERROR_FORMAT, "truncated hhea table");
1019 }
1020
1021 version = get32(t->data);
1022 if (version != 0x00010000)
1023 {
1024 fz_drop_buffer(ctx, t);
1025 fz_throw(ctx, FZ_ERROR_FORMAT, "Unsupported hhea table version 0x%08x", version);
1026 }
1027
1028 ttf->orig_num_long_hor_metrics = get16(t->data+34);
1029 if (ttf->orig_num_long_hor_metrics > ttf->orig_num_glyphs)
1030 {
1031 fz_drop_buffer(ctx, t);
1032 fz_throw(ctx, FZ_ERROR_FORMAT, "Overlong hhea table");
1033 }
1034
1035 add_table(ctx, ttf, TAG("hhea"), t);
1036
1037 /* Previously gids 0 to orig_num_long_hor_metrics-1 were described with
1038 * hor metrics, and the ones afterwards were fixed widths. Find where
1039 * that dividing line is in our new reduced set. */
1040 if (ttf->encoding && !ttf->is_otf && ttf->orig_num_long_hor_metrics > 0)
1041 {
1042 /* i = 0 is always kept long in subset_hmtx(). */
1043 ttf->new_num_long_hor_metrics = 1;
1044 for (i = ttf->orig_num_long_hor_metrics-1; i > 0; i--)
1045 if (ttf->gid_renum[i])
1046 {
1047 ttf->new_num_long_hor_metrics = ttf->gid_renum[i]+1;
1048 break;
1049 }
1050
1051 put16(t->data+34, ttf->new_num_long_hor_metrics);
1052 }
1053 else
1054 {
1055 ttf->new_num_long_hor_metrics = ttf->orig_num_long_hor_metrics;
1056 }
1057 }
1058
1059 static uint32_t
1060 get_loca(fz_context *ctx, ttf_t *ttf, uint32_t n)
1061 {
1062 if (ttf->index_to_loc_format == 0)
1063 {
1064 /* Short index - convert from words to bytes */
1065 return get16(ttf->loca + n*2) * 2;
1066 }
1067 else
1068 {
1069 /* Long index - in bytes already */
1070 return get32(ttf->loca + n*4);
1071 }
1072 }
1073
1074 static void
1075 put_loca(fz_context *ctx, ttf_t *ttf, uint32_t n, uint32_t off)
1076 {
1077 if (ttf->index_to_loc_format == 0)
1078 {
1079 /* Short index - convert from bytes to words */
1080 assert((off & 1) == 0);
1081 put16(ttf->loca + n*2, off/2);
1082 }
1083 else
1084 {
1085 /* Long index - in bytes already */
1086 put32(ttf->loca + n*4, off);
1087 }
1088 }
1089
1090 static void
1091 glyph_used(fz_context *ctx, ttf_t *ttf, fz_buffer *glyf, uint16_t i)
1092 {
1093 uint32_t offset, len;
1094 const uint8_t *data;
1095 uint16_t flags;
1096
1097 if (i >= ttf->orig_num_glyphs)
1098 {
1099 fz_warn(ctx, "TTF subsetting; gid >= num_gids!");
1100 return;
1101 }
1102
1103 if (ttf->gid_renum[i] != 0)
1104 return;
1105
1106 ttf->gid_renum[i] = 1;
1107
1108 /* If this glyf is composite, then we need to add any dependencies of it. */
1109 offset = get_loca(ctx, ttf, i);
1110 len = get_loca(ctx, ttf, i+1) - offset;
1111 if (len == 0)
1112 return;
1113 if (offset+2 > glyf->len)
1114 fz_throw(ctx, FZ_ERROR_FORMAT, "Corrupt glyf data");
1115 data = glyf->data + offset;
1116 if ((int16_t)get16(data) >= 0)
1117 return; /* Single glyph - no dependencies */
1118 data += 4 * 2 + 2;
1119 if (len < 4*2 + 2)
1120 fz_throw(ctx, FZ_ERROR_FORMAT, "Corrupt glyf data");
1121 len -= 4 * 2 + 2;
1122 do
1123 {
1124 uint16_t idx, skip;
1125
1126 if (len < 4)
1127 fz_throw(ctx, FZ_ERROR_FORMAT, "Corrupt glyf data");
1128
1129 flags = get16(data);
1130 idx = get16(data+2);
1131
1132 glyph_used(ctx, ttf, glyf, idx);
1133
1134 #define ARGS_1_AND_2_ARE_WORDS 1
1135 #define ARGS_ARE_XY_VALUES 2
1136 #define WE_HAVE_A_SCALE 8
1137 #define MORE_COMPONENTS 32
1138 #define WE_HAVE_AN_X_AND_Y_SCALE 64
1139 #define WE_HAVE_A_TWO_BY_TWO 128
1140
1141 /* Skip the X and Y offsets */
1142 if (flags & ARGS_1_AND_2_ARE_WORDS)
1143 skip = 4 + 4;
1144 else
1145 skip = 4 + 2;
1146
1147 /* Skip the transformation */
1148 switch (flags & (WE_HAVE_A_SCALE + WE_HAVE_AN_X_AND_Y_SCALE + WE_HAVE_A_TWO_BY_TWO))
1149 {
1150 case 0:
1151 /* No extra to skip */
1152 break;
1153 case WE_HAVE_A_SCALE:
1154 skip += 2;
1155 break;
1156 case WE_HAVE_AN_X_AND_Y_SCALE:
1157 skip += 4;
1158 break;
1159 case WE_HAVE_A_TWO_BY_TWO:
1160 skip += 8;
1161 break;
1162 }
1163 if (len < skip)
1164 fz_throw(ctx, FZ_ERROR_FORMAT, "Corrupt glyf data");
1165 data += skip;
1166 len -= skip;
1167 }
1168 while(flags & MORE_COMPONENTS);
1169 }
1170
1171 static void
1172 renumber_composite(fz_context *ctx, ttf_t *ttf, uint8_t *data, uint32_t len)
1173 {
1174 uint16_t flags;
1175 uint16_t x;
1176
1177 data += 4 * 2 + 2;
1178 if (len < 4*2 + 2)
1179 fz_throw(ctx, FZ_ERROR_FORMAT, "Corrupt glyf data");
1180 len -= 4 * 2 + 2;
1181 do
1182 {
1183 uint16_t skip;
1184
1185 if (len < 4)
1186 fz_throw(ctx, FZ_ERROR_FORMAT, "Corrupt glyf data");
1187
1188 flags = get16(data);
1189 x = get16(data+2);
1190 if (x >= ttf->orig_num_glyphs)
1191 fz_throw(ctx, FZ_ERROR_FORMAT, "Corrupt glyf data");
1192 put16(data+2, ttf->gid_renum[x]);
1193
1194 /* Skip the X and Y offsets */
1195 if (flags & ARGS_1_AND_2_ARE_WORDS)
1196 skip = 4 + 4;
1197 else
1198 skip = 4 + 2;
1199
1200 /* Skip the transformation */
1201 switch (flags & (WE_HAVE_A_SCALE + WE_HAVE_AN_X_AND_Y_SCALE + WE_HAVE_A_TWO_BY_TWO))
1202 {
1203 case 0:
1204 /* No extra to skip */
1205 break;
1206 case WE_HAVE_A_SCALE:
1207 skip += 2;
1208 break;
1209 case WE_HAVE_AN_X_AND_Y_SCALE:
1210 skip += 4;
1211 break;
1212 case WE_HAVE_A_TWO_BY_TWO:
1213 skip += 8;
1214 break;
1215 }
1216 if (len < skip)
1217 fz_throw(ctx, FZ_ERROR_FORMAT, "Corrupt glyf data");
1218 data += skip;
1219 len -= skip;
1220 }
1221 while(flags & MORE_COMPONENTS);
1222 }
1223
1224 static void
1225 read_glyf(fz_context *ctx, ttf_t *ttf, fz_stream *stm, int *gids, int num_gids)
1226 {
1227 uint32_t len = get_loca(ctx, ttf, ttf->orig_num_glyphs);
1228 fz_buffer *t = read_table(ctx, stm, TAG("glyf"), 1);
1229 encoding_t *enc = ttf->encoding;
1230 uint32_t last_loca, i, j, k;
1231 uint32_t new_start, old_start, old_end, last_loca_ofs;
1232
1233 if (t->len < len)
1234 {
1235 fz_drop_buffer(ctx, t);
1236 fz_throw(ctx, FZ_ERROR_FORMAT, "truncated glyf table");
1237 }
1238
1239 add_table(ctx, ttf, TAG("glyf"), t);
1240
1241 /* Now, make the renumber list for the glyphs. */
1242 ttf->gid_renum = fz_calloc(ctx, ttf->orig_num_glyphs, sizeof(uint16_t));
1243
1244 /* Initially, we'll use it just as a usage list. 0 = unused, 1 used */
1245
1246 /* glyph 0 is always used. */
1247 glyph_used(ctx, ttf, t, 0);
1248
1249 if (enc)
1250 {
1251 uint32_t n = enc->max;
1252 /* If we have an encoding table, run through it, and keep anything needed from there. */
1253 for (i = 0; i < n; i++)
1254 if (enc->gid[i])
1255 glyph_used(ctx, ttf, t, enc->gid[i]);
1256
1257 /* Now convert from a usage table to a renumbering table. */
1258 if (ttf->orig_num_glyphs > 0)
1259 {
1260 ttf->gid_renum[0] = 0;
1261 j = 1;
1262 for (i = 1; i < ttf->orig_num_glyphs; i++)
1263 if (ttf->gid_renum[i])
1264 ttf->gid_renum[i] = j++;
1265 ttf->new_num_glyphs = j;
1266 }
1267 else
1268 {
1269 ttf->new_num_glyphs = 0;
1270 }
1271 }
1272 else
1273 {
1274 /* We're a cid font. The cids are gids. */
1275 for (i = 0; i < (uint32_t)num_gids; i++)
1276 glyph_used(ctx, ttf, t, gids[i]);
1277 ttf->new_num_glyphs = ttf->orig_num_glyphs;
1278 }
1279
1280 /* Now subset the glyf table. */
1281 if (enc)
1282 {
1283 old_start = get_loca(ctx, ttf, 0);
1284 if (old_start > t->len)
1285 fz_throw(ctx, FZ_ERROR_FORMAT, "Bad loca value");
1286 old_end = get_loca(ctx, ttf, 1);
1287 if (old_end > t->len || old_end < old_start)
1288 fz_throw(ctx, FZ_ERROR_FORMAT, "Bad loca value");
1289 len = old_end - old_start;
1290 new_start = 0;
1291 put_loca(ctx, ttf, 0, new_start);
1292 last_loca = 0;
1293 last_loca_ofs = len;
1294 for (i = 0; i < ttf->orig_num_glyphs; i++)
1295 {
1296 old_end = get_loca(ctx, ttf, i + 1);
1297 if (old_end > t->len || old_end < old_start)
1298 fz_throw(ctx, FZ_ERROR_FORMAT, "Bad loca value");
1299 len = old_end - old_start;
1300 if (len > 0 && (i == 0 || ttf->gid_renum[i] != 0))
1301 {
1302 memmove(t->data + new_start, t->data + old_start, len);
1303 if ((int16_t)get16(t->data + new_start) < 0)
1304 renumber_composite(ctx, ttf, t->data + new_start, len);
1305 for (k = last_loca + 1; k <= ttf->gid_renum[i]; k++)
1306 put_loca(ctx, ttf, k, last_loca_ofs);
1307 new_start += len;
1308 last_loca = ttf->gid_renum[i];
1309 last_loca_ofs = new_start;
1310 }
1311 old_start = old_end;
1312 }
1313 for (k = last_loca + 1; k <= ttf->new_num_glyphs; k++)
1314 put_loca(ctx, ttf, k, last_loca_ofs);
1315 }
1316 else
1317 {
1318 new_start = 0;
1319 old_start = get_loca(ctx, ttf, 0);
1320 if (old_start > t->len)
1321 fz_throw(ctx, FZ_ERROR_FORMAT, "Bad loca value");
1322 for (i = 0; i < ttf->orig_num_glyphs; i++)
1323 {
1324 old_end = get_loca(ctx, ttf, i + 1);
1325 if (old_end > t->len || old_end < old_start)
1326 fz_throw(ctx, FZ_ERROR_FORMAT, "Bad loca value");
1327 len = old_end - old_start;
1328 if (len > 0 && ttf->gid_renum[i] != 0)
1329 {
1330 memmove(t->data + new_start, t->data + old_start, len);
1331 put_loca(ctx, ttf, i, new_start);
1332 new_start += len;
1333 }
1334 else
1335 {
1336 put_loca(ctx, ttf, i, new_start);
1337 }
1338 old_start = old_end;
1339 }
1340 put_loca(ctx, ttf, ttf->orig_num_glyphs, new_start);
1341 }
1342
1343 *ttf->loca_len = (size_t) (ttf->new_num_glyphs + 1) * (2<<ttf->index_to_loc_format);
1344 t->len = new_start;
1345 }
1346
1347 static void
1348 update_num_glyphs(fz_context *ctx, ttf_t *ttf)
1349 {
1350 put16(ttf->maxp + 4, ttf->new_num_glyphs);
1351 }
1352
1353 static void
1354 subset_hmtx(fz_context *ctx, ttf_t *ttf, fz_stream *stm)
1355 {
1356 fz_buffer *t = read_table(ctx, stm, TAG("hmtx"), 1);
1357 uint16_t long_metrics, short_metrics, i, k;
1358 uint8_t *s = t->data;
1359 uint8_t *d = t->data;
1360 int cidfont = (ttf->encoding == NULL);
1361
1362 long_metrics = ttf->orig_num_long_hor_metrics;
1363 if (long_metrics > ttf->orig_num_glyphs)
1364 long_metrics = ttf->orig_num_glyphs;
1365 if (long_metrics > t->len / 4)
1366 long_metrics = (uint16_t)(t->len / 4);
1367
1368 short_metrics = (uint16_t)((t->len - long_metrics * 4) / 2);
1369 if (short_metrics > ttf->orig_num_glyphs - long_metrics)
1370 short_metrics = ttf->orig_num_glyphs - long_metrics;
1371
1372 for (i = 0; i < long_metrics; i++)
1373 {
1374 if (i == 0 || ttf->is_otf || (i < ttf->orig_num_glyphs && ttf->gid_renum[i]))
1375 {
1376 put32(d, get32(s));
1377 d += 4;
1378 }
1379 else if (cidfont)
1380 {
1381 put32(d, 0);
1382 d += 4;
1383 }
1384 s += 4;
1385 }
1386 for (k = 0 ; k < short_metrics; k++, i++)
1387 {
1388 if (i == 0 || ttf->is_otf || (i < ttf->orig_num_glyphs && ttf->gid_renum[i]))
1389 {
1390 put16(d, get16(s));
1391 d += 2;
1392 }
1393 else if (cidfont)
1394 {
1395 put16(d, 0);
1396 d += 2;
1397 }
1398 s += 2;
1399 }
1400 t->len = (d - t->data);
1401
1402 add_table(ctx, ttf, TAG("hmtx"), t);
1403 }
1404
1405 static void
1406 shrink_loca_if_possible(fz_context *ctx, ttf_t *ttf)
1407 {
1408 uint32_t len;
1409 uint16_t i, n;
1410 uint8_t *loca;
1411
1412 if (ttf->index_to_loc_format == 0)
1413 return; /* Can't shrink cos it's already shrunk! */
1414
1415 n = ttf->new_num_glyphs;
1416 len = get_loca(ctx, ttf, n);
1417 if (len >= 65536)
1418 return; /* We can't shrink it, cos it's too big. */
1419
1420 loca = ttf->loca;
1421 for (i = 0; i <= n; i++)
1422 {
1423 if (get32(loca + 4*i) & 1)
1424 return; /* Can't shrink it, because an offset is not even */
1425 }
1426
1427 for (i = 0; i <= n; i++)
1428 {
1429 put16(loca + 2*i, get32(loca + 4*i)/2);
1430 }
1431 *ttf->loca_len = 2*(n+1);
1432 put16(ttf->index_to_loc_formatp, 0);
1433 }
1434
1435 static struct { const char *charname; int idx; } macroman[] =
1436 {
1437 { ".notdef", 0},
1438 { ".null", 1},
1439 { "A", 36},
1440 { "AE", 144},
1441 { "Aacute", 201},
1442 { "Acircumflex", 199},
1443 { "Adieresis", 98},
1444 { "Agrave", 173},
1445 { "Aring", 99},
1446 { "Atilde", 174},
1447 { "B", 37},
1448 { "C", 38},
1449 { "Cacute", 253},
1450 { "Ccaron", 255},
1451 { "Ccedilla", 100},
1452 { "D", 39},
1453 { "Delta", 168},
1454 { "E", 40},
1455 { "Eacute", 101},
1456 { "Ecircumflex", 200},
1457 { "Edieresis", 202},
1458 { "Egrave", 203},
1459 { "Eth", 233},
1460 { "F", 41},
1461 { "G", 42},
1462 { "Gbreve", 248},
1463 { "H", 43},
1464 { "I", 44},
1465 { "Iacute", 204},
1466 { "Icircumflex", 205},
1467 { "Idieresis", 206},
1468 { "Idotaccent", 250},
1469 { "Igrave", 207},
1470 { "J", 45},
1471 { "K", 46},
1472 { "L", 47},
1473 { "Lslash", 226},
1474 { "M", 48},
1475 { "N", 49},
1476 { "Ntilde", 102},
1477 { "O", 50},
1478 { "OE", 176},
1479 { "Oacute", 208},
1480 { "Ocircumflex", 209},
1481 { "Odieresis", 103},
1482 { "Ograve", 211},
1483 { "Omega", 159},
1484 { "Oslash", 145},
1485 { "Otilde", 175},
1486 { "P", 51},
1487 { "Q", 52},
1488 { "R", 53},
1489 { "S", 54},
1490 { "Scaron", 228},
1491 { "Scedilla", 251},
1492 { "T", 55},
1493 { "Thorn", 237},
1494 { "U", 56},
1495 { "Uacute", 212},
1496 { "Ucircumflex", 213},
1497 { "Udieresis", 104},
1498 { "Ugrave", 214},
1499 { "V", 57},
1500 { "W", 58},
1501 { "X", 59},
1502 { "Y", 60},
1503 { "Yacute", 235},
1504 { "Ydieresis", 187},
1505 { "Z", 61},
1506 { "Zcaron", 230},
1507 { "a", 68},
1508 { "aacute", 105},
1509 { "acircumflex", 107},
1510 { "acute", 141},
1511 { "adieresis", 108},
1512 { "ae", 160},
1513 { "agrave", 106},
1514 { "ampersand", 9},
1515 { "apple", 210},
1516 { "approxequal", 167},
1517 { "aring", 110},
1518 { "asciicircum", 65},
1519 { "asciitilde", 97},
1520 { "asterisk", 13},
1521 { "at", 35},
1522 { "atilde", 109},
1523 { "b", 69},
1524 { "backslash", 63},
1525 { "bar", 95},
1526 { "braceleft", 94},
1527 { "braceright", 96},
1528 { "bracketleft", 62},
1529 { "bracketright", 64},
1530 { "breve", 219},
1531 { "brokenbar", 232},
1532 { "bullet", 135},
1533 { "c", 70},
1534 { "cacute", 254},
1535 { "caron", 225},
1536 { "ccaron", 256},
1537 { "ccedilla", 111},
1538 { "cedilla", 222},
1539 { "cent", 132},
1540 { "circumflex", 216},
1541 { "colon", 29},
1542 { "comma", 15},
1543 { "copyright", 139},
1544 { "currency", 189},
1545 { "d", 71},
1546 { "dagger", 130},
1547 { "daggerdbl", 194},
1548 { "dcroat", 257},
1549 { "degree", 131},
1550 { "dieresis", 142},
1551 { "divide", 184},
1552 { "dollar", 7},
1553 { "dotaccent", 220},
1554 { "dotlessi", 215},
1555 { "e", 72},
1556 { "eacute", 112},
1557 { "ecircumflex", 114},
1558 { "edieresis", 115},
1559 { "egrave", 113},
1560 { "eight", 27},
1561 { "ellipsis", 171},
1562 { "emdash", 179},
1563 { "endash", 178},
1564 { "equal", 32},
1565 { "eth", 234},
1566 { "exclam", 4},
1567 { "exclamdown", 163},
1568 { "f", 73},
1569 { "fi", 192},
1570 { "five", 24},
1571 { "fl", 193},
1572 { "florin", 166},
1573 { "four", 23},
1574 { "fraction", 188},
1575 { "franc", 247},
1576 { "g", 74},
1577 { "gbreve", 249},
1578 { "germandbls", 137},
1579 { "grave", 67},
1580 { "greater", 33},
1581 { "greaterequal", 149},
1582 { "guillemotleft", 169},
1583 { "guillemotright", 170},
1584 { "guilsinglleft", 190},
1585 { "guilsinglright", 191},
1586 { "h", 75},
1587 { "hungarumlaut", 223},
1588 { "hyphen", 16},
1589 { "i", 76},
1590 { "iacute", 116},
1591 { "icircumflex", 118},
1592 { "idieresis", 119},
1593 { "igrave", 117},
1594 { "infinity", 146},
1595 { "integral", 156},
1596 { "j", 77},
1597 { "k", 78},
1598 { "l", 79},
1599 { "less", 31},
1600 { "lessequal", 148},
1601 { "logicalnot", 164},
1602 { "lozenge", 185},
1603 { "lslash", 227},
1604 { "m", 80},
1605 { "macron", 218},
1606 { "minus", 239},
1607 { "mu", 151},
1608 { "multiply", 240},
1609 { "n", 81},
1610 { "nine", 28},
1611 { "nonbreakingspace", 172},
1612 { "nonmarkingreturn", 2},
1613 { "notequal", 143},
1614 { "ntilde", 120},
1615 { "numbersign", 6},
1616 { "o", 82},
1617 { "oacute", 121},
1618 { "ocircumflex", 123},
1619 { "odieresis", 124},
1620 { "oe", 177},
1621 { "ogonek", 224},
1622 { "ograve", 122},
1623 { "one", 20},
1624 { "onehalf", 244},
1625 { "onequarter", 245},
1626 { "onesuperior", 241},
1627 { "ordfeminine", 157},
1628 { "ordmasculine", 158},
1629 { "oslash", 161},
1630 { "otilde", 125},
1631 { "p", 83},
1632 { "paragraph", 136},
1633 { "parenleft", 11},
1634 { "parenright", 12},
1635 { "partialdiff", 152},
1636 { "percent", 8},
1637 { "period", 17},
1638 { "periodcentered", 195},
1639 { "perthousand", 198},
1640 { "pi", 155},
1641 { "plus", 14},
1642 { "plusminus", 147},
1643 { "product", 154},
1644 { "q", 84},
1645 { "question", 34},
1646 { "questiondown", 162},
1647 { "quotedbl", 5},
1648 { "quotedblbase", 197},
1649 { "quotedblleft", 180},
1650 { "quotedblright", 181},
1651 { "quoteleft", 182},
1652 { "quoteright", 183},
1653 { "quotesinglbase", 196},
1654 { "quotesingle", 10},
1655 { "r", 85},
1656 { "radical", 165},
1657 { "registered", 138},
1658 { "ring", 221},
1659 { "s", 86},
1660 { "scaron", 229},
1661 { "scedilla", 252},
1662 { "section", 134},
1663 { "semicolon", 30},
1664 { "seven", 26},
1665 { "six", 25},
1666 { "slash", 18},
1667 { "space", 3},
1668 { "sterling", 133},
1669 { "summation", 153},
1670 { "t", 87},
1671 { "thorn", 238},
1672 { "three", 22},
1673 { "threequarters", 246},
1674 { "threesuperior", 243},
1675 { "tilde", 217},
1676 { "trademark", 140},
1677 { "two", 21},
1678 { "twosuperior", 242},
1679 { "u", 88},
1680 { "uacute", 126},
1681 { "ucircumflex", 128},
1682 { "udieresis", 129},
1683 { "ugrave", 127},
1684 { "underscore", 66},
1685 { "v", 89},
1686 { "w", 90},
1687 { "x", 91},
1688 { "y", 92},
1689 { "yacute", 236},
1690 { "ydieresis", 186},
1691 { "yen", 150},
1692 { "z", 93},
1693 { "zcaron", 231},
1694 { "zero", 19},
1695 };
1696
1697 static int
1698 find_macroman_string(const char *s)
1699 {
1700 int l, r, m;
1701 int comparison;
1702
1703 l = 0;
1704 r = nelem(macroman);
1705 while (l <= r)
1706 {
1707 m = (l + r) >> 1;
1708 comparison = strcmp(s, macroman[m].charname);
1709 if (comparison < 0)
1710 r = m - 1;
1711 else if (comparison > 0)
1712 l = m + 1;
1713 else
1714 return macroman[m].idx;
1715 }
1716
1717 return -1;
1718 }
1719
1720 static size_t
1721 subset_post2(fz_context *ctx, ttf_t *ttf, uint8_t *d, size_t len, int *gids, int num_gids)
1722 {
1723 int i, n, new_glyphs, old_strings, new_strings;
1724 int j;
1725 fz_int2_heap heap = { 0 };
1726 uint8_t *d0, *e, *p;
1727
1728 if (len < (size_t) 2 + 2 * ttf->orig_num_glyphs)
1729 fz_throw(ctx, FZ_ERROR_FORMAT, "Truncated post table");
1730
1731 n = get16(d);
1732 if ((uint32_t)n != ttf->orig_num_glyphs)
1733 fz_throw(ctx, FZ_ERROR_FORMAT, "Malformed post table");
1734
1735 d0 = d;
1736 d += 2; len -= 2;
1737 e = d;
1738 p = d;
1739
1740 /* Store all kept indexes. */
1741 if (len < (size_t)n*2)
1742 fz_throw(ctx, FZ_ERROR_FORMAT, "Malformed post table");
1743 old_strings = 0;
1744 new_strings = 0;
1745 new_glyphs = 0;
1746 j = 0;
1747 len -= (size_t)n*2;
1748 for (i = 0; i < n; i++)
1749 {
1750 uint16_t o = get16(d);
1751 fz_int2 i2;
1752 p += 2;
1753
1754 if (o >= 258)
1755 old_strings++;
1756
1757 /* We're only keeping gids we want. */
1758 /* Note we need to keep both the gids we were given by the caller, but also
1759 * those required as composites (in gid_renum, if we have it). */
1760 if (i != 0 && (j >= num_gids || gids[j] != i) && (ttf->gid_renum == NULL || ttf->gid_renum[i] == 0))
1761 {
1762 memmove(d, d + 2, (n - i - 1) * 2);
1763 continue;
1764 }
1765 if (j < num_gids && gids[j] == i)
1766 j++;
1767
1768 d += 2;
1769 e += 2;
1770
1771 /* We want this gid. */
1772 new_glyphs++;
1773
1774 /* 257 or smaller: same as in the basic order, keep it as such. */
1775 if (o <= 257)
1776 continue;
1777
1778 /* check if string is one of the macroman standard ones, and use its index if so. */
1779 {
1780 uint8_t *q = d0 + 2 + (size_t) n * 2;
1781 int k;
1782 char buf[257] = { 0 };
1783 int macidx;
1784 for (k = 0; k < o - 258; k++)
1785 q += 1 + *q;
1786 for (k = 0; k < *q; k++)
1787 buf[k] = *(q + 1 + k);
1788
1789 macidx = find_macroman_string(buf);
1790
1791 if (macidx >= 0)
1792 {
1793 put16(d - 2, macidx);
1794 continue;
1795 }
1796 }
1797
1798 /* We want this gid, and it is a string. */
1799 new_strings++;
1800
1801 /* Store the index. */
1802 i2.a = o - 258;
1803 i2.b = i;
1804 fz_int2_heap_insert(ctx, &heap, i2);
1805
1806 /* Update string index value in table entry. */
1807 put16(d - 2, 257 + new_strings);
1808 }
1809
1810 d = p;
1811
1812 /* Update number of indexes */
1813 put16(d0, new_glyphs);
1814
1815 fz_int2_heap_sort(ctx, &heap);
1816
1817 /* So, the heap is sorted on i2.a (the string indexes we want to keep),
1818 * and i2.b is the gid that refers to that index. */
1819
1820 /* Run through the list moving the strings down that we care about. */
1821 j = 0;
1822 n = old_strings;
1823 for (i = 0; i < n; i++)
1824 {
1825 uint8_t slen;
1826
1827 if (len < 1)
1828 fz_throw(ctx, FZ_ERROR_FORMAT, "Malformed post table");
1829 slen = *d+1;
1830 if (len < slen)
1831 fz_throw(ctx, FZ_ERROR_FORMAT, "Malformed post table");
1832 len -= slen;
1833
1834 if (j >= heap.len || heap.heap[j].a != i)
1835 {
1836 /* Drop this one. */
1837 d += slen;
1838 continue;
1839 }
1840
1841 memmove(e, d, slen);
1842 d += slen;
1843 e += slen;
1844
1845 j++;
1846 }
1847
1848 fz_free(ctx, heap.heap);
1849
1850 return e - d0;
1851 }
1852
1853 static void
1854 subset_post(fz_context *ctx, ttf_t *ttf, fz_stream *stm, int *gids, int num_gids)
1855 {
1856 fz_buffer *t = read_table(ctx, stm, TAG("post"), 0);
1857 uint8_t *d;
1858 size_t len;
1859 uint32_t fmt;
1860
1861 if (t == NULL)
1862 return;
1863
1864 d = t->data;
1865 len = t->len;
1866
1867 if (len < 32)
1868 {
1869 fz_drop_buffer(ctx, t);
1870 fz_throw(ctx, FZ_ERROR_FORMAT, "Truncated post table");
1871 }
1872
1873 fmt = get32(d);
1874
1875 if (fmt != 0x00020000)
1876 {
1877 /* Fmt 1: Nothing to be gained by having this table. The cmap should
1878 * have all the mappings anyway, and we'll have broken it by renumbering
1879 * the gids down anyway. */
1880 /* Fmt 2.5 deprecated. */
1881 /* Fmt 3 and 4: should not be used for PDF. */
1882 /* No other formats defined. */
1883 fz_drop_buffer(ctx, t);
1884 return;
1885 }
1886 d += 32; len -= 32;
1887 fz_try(ctx)
1888 len = subset_post2(ctx, ttf, d, len, gids, num_gids);
1889 fz_catch(ctx)
1890 {
1891 fz_drop_buffer(ctx, t);
1892 fz_rethrow(ctx);
1893 }
1894
1895 t->len = 32 + len;
1896
1897 add_table(ctx, ttf, TAG("post"), t);
1898 }
1899
1900 static void
1901 subset_CFF(fz_context *ctx, ttf_t *ttf, fz_stream *stm, int *gids, int num_gids, int symbolic, int cidfont)
1902 {
1903 fz_buffer *t = read_table(ctx, stm, TAG("CFF "), 1);
1904 fz_buffer *sub = NULL;
1905
1906 fz_var(sub);
1907
1908 fz_try(ctx)
1909 sub = fz_subset_cff_for_gids(ctx, t, gids, num_gids, symbolic, cidfont);
1910 fz_always(ctx)
1911 fz_drop_buffer(ctx, t);
1912 fz_catch(ctx)
1913 fz_rethrow(ctx);
1914
1915 add_table(ctx, ttf, TAG("CFF "), sub);
1916 }
1917
1918 fz_buffer *
1919 fz_subset_ttf_for_gids(fz_context *ctx, fz_buffer *orig, int *gids, int num_gids, int symbolic, int cidfont)
1920 {
1921 fz_stream *stm = fz_open_buffer(ctx, orig);
1922 ttf_t ttf = { 0 };
1923 fz_buffer *newbuf = NULL;
1924 fz_output *out = NULL;
1925
1926 fz_var(newbuf);
1927 fz_var(out);
1928
1929 fz_try(ctx)
1930 {
1931 ttf.is_otf = (fz_read_uint32_le(ctx, stm) == 0x4f54544f);
1932 ttf.symbolic = symbolic;
1933
1934 /* Subset the name table. No other dependencies. */
1935 subset_name_table(ctx, &ttf, stm);
1936
1937 if (!cidfont)
1938 {
1939 /* Load the encoding. Populates the encoding table from the cmap table
1940 * in the original. cmap table is then discarded. */
1941 load_encoding(ctx, &ttf, stm);
1942
1943 /* Blank out the bits of the encoding we don't need. */
1944 reduce_encoding(ctx, &ttf, gids, num_gids);
1945 }
1946
1947 /* Read maxp and store the table. Remember orig_num_glyphs. */
1948 read_maxp(ctx, &ttf, stm);
1949
1950 /* Read head and store the table. Remember the loca index size. */
1951 read_head(ctx, &ttf, stm);
1952
1953 if (ttf.is_otf)
1954 {
1955 subset_CFF(ctx, &ttf, stm, gids, num_gids, symbolic, cidfont);
1956 }
1957
1958 /* Read loca and store it. Stash a pointer to the table for quick access. */
1959 if (!ttf.is_otf)
1960 {
1961 read_loca(ctx, &ttf, stm);
1962
1963 /* Read the glyf data, and scan it for composites. This makes the gid_renum table,
1964 * subsets the glyf data, and rewrites the loca table. */
1965 read_glyf(ctx, &ttf, stm, gids, num_gids);
1966 }
1967
1968 /* Read hhea and store it. Remember numOfLongHorMetrics. */
1969 read_hhea(ctx, &ttf, stm);
1970
1971 /* Read and subset hmtx. */
1972 subset_hmtx(ctx, &ttf, stm);
1973
1974 #ifdef DEBUG_SUBSETTING
1975 if (!cidfont)
1976 {
1977 encoding_t *enc = ttf.encoding;
1978 uint32_t i, n = enc->max;
1979
1980 for (i = 0; i < n; i++)
1981 if (enc->gid[i])
1982 printf("cid %x '%c'-> orig gid %d -> gid %d\n", i, (char)i, enc->gid[i], ttf.gid_renum[enc->gid[i]]);
1983 }
1984 {
1985 uint32_t i;
1986
1987 for (i = 0; i < ttf.orig_num_glyphs; i++)
1988 if (ttf.gid_renum[i])
1989 printf("gid %d -> %d\n", i, ttf.gid_renum[i]);
1990
1991 for (i = 0; i <= ttf.new_num_glyphs; i++)
1992 printf("LOCA %d = %x\n", i, get_loca(ctx, &ttf, i));
1993 }
1994 #endif
1995 if (!ttf.is_otf)
1996 {
1997 shrink_loca_if_possible(ctx, &ttf);
1998
1999 update_num_glyphs(ctx, &ttf);
2000 }
2001
2002 if (!cidfont)
2003 {
2004 /* Now we can make the new cmap. */
2005 make_cmap(ctx, &ttf);
2006 }
2007
2008 if (!cidfont)
2009 {
2010 /* subset the post table */
2011 subset_post(ctx, &ttf, stm, gids, num_gids);
2012 }
2013
2014 copy_table(ctx, &ttf, stm, TAG("OS/2"), 0);
2015 copy_table(ctx, &ttf, stm, TAG("cvt "), 0);
2016 copy_table(ctx, &ttf, stm, TAG("fpgm"), 0);
2017 copy_table(ctx, &ttf, stm, TAG("prep"), 0);
2018
2019 sort_tables(ctx, &ttf);
2020 checksum_tables(ctx, &ttf);
2021
2022 newbuf = fz_new_buffer(ctx, 1024);
2023 out = fz_new_output_with_buffer(ctx, newbuf);
2024
2025 write_tables(ctx, &ttf, out);
2026
2027 fz_close_output(ctx, out);
2028
2029 fix_checksum(ctx, newbuf);
2030 }
2031 fz_always(ctx)
2032 {
2033 int i;
2034
2035 fz_drop_output(ctx, out);
2036 fz_drop_stream(ctx, stm);
2037 for (i = 0; i < ttf.len; i++)
2038 fz_drop_buffer(ctx, ttf.table[i].tab);
2039 fz_free(ctx, ttf.table);
2040 fz_free(ctx, ttf.gid_renum);
2041 fz_free(ctx, ttf.encoding);
2042 }
2043 fz_catch(ctx)
2044 {
2045 fz_drop_buffer(ctx, newbuf);
2046 fz_rethrow(ctx);
2047 }
2048
2049 return newbuf;
2050 }