comparison mupdf-source/source/fitz/subset-cff.c @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 // Copyright (C) 2004-2025 Artifex Software, Inc.
2 //
3 // This file is part of MuPDF.
4 //
5 // MuPDF is free software: you can redistribute it and/or modify it under the
6 // terms of the GNU Affero General Public License as published by the Free
7 // Software Foundation, either version 3 of the License, or (at your option)
8 // any later version.
9 //
10 // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
11 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
13 // details.
14 //
15 // You should have received a copy of the GNU Affero General Public License
16 // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
17 //
18 // Alternative licensing terms are available from the licensor.
19 // For commercial licensing, see <https://www.artifex.com/> or contact
20 // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
21 // CA 94129, USA, for further information.
22
23 #include "mupdf/fitz.h"
24
25 #include <math.h>
26
27 /*
28 For the purposes of this code, and to save my tiny brain from
29 overload, we will adopt the following notation:
30
31 1) The PDF file contains bytes of data. These bytes are looked
32 up in the MuPDF font handling to resolve to 'glyph ids' (gids).
33 These account for all the different encodings etc in use,
34 including the encoding table within the font.
35
36 (For CIDFonts, Cid = Gid, and there is no encoding table).
37
38 2) We are given the list of gids that are used in the document.
39
40 Unlike for simple TTFs, we don't map these down to the bottom of the
41 range, we just remove the definitions for them.
42
43 For now, I'm leaving zero size charstrings for subsetted glyphs.
44 This may need to be changed to be predefined charstrings that
45 just set a zero width if this is illegal.
46
47 Similarly, for now, we don't attempt to compact either the local
48 or global subrs.
49 */
50
51 /*
52 In CFF files, we have:
53
54 Charset: Maps from gid <-> glyph name
55
56 Encoding: Maps from code <-> gid
57 plus supplemental code -> glyph name (which must have been used already in the map)
58 */
59
60
61 /* Contains 1-4, to tell us the size of offsets used. */
62 typedef uint8_t offsize_t;
63
64 typedef struct
65 {
66 /* Index position and length in the original */
67 uint32_t index_offset;
68 uint32_t index_size;
69
70 /* Fields read from the index */
71 uint16_t count;
72 offsize_t offsize;
73 const uint8_t *offset; /* A pointer to the offset table, not to the data table! */
74
75 /* The offset of the byte before the data. The offset of the first
76 * object is always 1. Add the offset of any given object to this
77 * and you get the offset within the block. */
78 uint32_t data_offset;
79 } index_t;
80
81 typedef struct
82 {
83 uint8_t scanned;
84 uint16_t num;
85 } usage_t;
86
87 typedef struct
88 {
89 int len;
90 int max;
91 usage_t *list;
92 } usage_list_t;
93
94 typedef struct
95 {
96 uint8_t *base;
97 size_t len;
98
99 int symbolic;
100 int is_cidfont;
101
102 uint8_t major;
103 uint8_t minor;
104 uint8_t headersize;
105 offsize_t offsize;
106 offsize_t new_offsize;
107
108 index_t name_index;
109 index_t top_dict_index;
110 index_t string_index;
111 index_t global_index;
112 index_t charstrings_index;
113 index_t local_index;
114 index_t fdarray_index;
115 uint16_t gsubr_bias;
116 uint16_t subr_bias;
117 uint32_t top_dict_index_offset;
118 uint32_t string_index_offset;
119 uint32_t global_index_offset;
120 uint32_t encoding_offset;
121 uint32_t encoding_len;
122 uint32_t charset_offset;
123 uint32_t charset_len;
124 uint32_t charstrings_index_offset;
125 uint32_t private_offset;
126 uint32_t private_len;
127 uint32_t local_index_offset;
128 uint32_t fdselect_offset;
129 uint32_t fdselect_len;
130 uint32_t fdarray_index_offset;
131 uint32_t charstring_type;
132
133 uint16_t unpacked_charset_len;
134 uint16_t unpacked_charset_max;
135 uint16_t *unpacked_charset;
136
137 struct
138 {
139 fz_buffer *rewritten_dict;
140 fz_buffer *rewritten_private;
141 uint32_t offset;
142 uint32_t len;
143 uint32_t fixup;
144 uint32_t local_index_offset;
145 index_t local_index;
146 usage_list_t local_usage;
147 uint16_t subr_bias;
148 fz_buffer *local_subset;
149 } *fdarray;
150
151 struct
152 {
153 uint32_t charset;
154 uint32_t encoding;
155 uint32_t charstrings;
156 uint32_t privat;
157 uint32_t fdselect;
158 uint32_t fdarray;
159 } top_dict_fixup_offsets;
160
161 fz_buffer *charstrings_subset;
162 fz_buffer *top_dict_subset;
163 fz_buffer *private_subset;
164 fz_buffer *local_subset;
165 fz_buffer *global_subset;
166
167 usage_list_t local_usage;
168 usage_list_t global_usage;
169 usage_list_t gids_to_keep;
170 usage_list_t extra_gids_to_keep;
171
172 uint16_t *gid_to_cid;
173 uint8_t *gid_to_font;
174 } cff_t;
175
176 /* cid -> gid */
177 static const uint8_t standard_encoding[256] =
178 {
179 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
180 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
181 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
182 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
183 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
184 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
185 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
186 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 0,
187 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
188 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
189 0, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110,
190 0, 111, 112, 113, 114, 0, 115, 116, 117, 118, 119, 120, 121, 122, 0, 123,
191 0, 124, 125, 126, 127, 128, 129, 130, 131, 0, 132, 133, 0, 134, 135, 136,
192 137, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
193 0, 138, 0, 139, 0, 0, 0, 0, 140, 141, 142, 143, 0, 0, 0, 0,
194 0, 144, 0, 0, 0, 145, 0, 0, 146, 147, 148, 149, 0, 0, 0, 0
195 };
196
197 /* Simple functions for bigendian fetching/putting */
198
199 static uint32_t get16(const uint8_t *d)
200 {
201 return (d[0]<<8)|d[1];
202 }
203
204 static void put32(uint8_t *d, uint32_t v)
205 {
206 d[0] = v>>24;
207 d[1] = v>>16;
208 d[2] = v>>8;
209 d[3] = v;
210 }
211
212 static void put16(uint8_t *d, uint32_t v)
213 {
214 d[0] = v>>8;
215 d[1] = v;
216 }
217
218 static void put8(uint8_t *d, uint32_t v)
219 {
220 d[0] = v;
221 }
222
223 static uint32_t
224 get_offset(const uint8_t *d, offsize_t os)
225 {
226 uint32_t v = *d++;
227 if (os > 1)
228 v = (v<<8) | *d++;;
229 if (os > 2)
230 v = (v<<8) | *d++;;
231 if (os > 3)
232 v = (v<<8) | *d++;;
233
234 return v;
235 }
236
237 static void
238 put_offset(uint8_t *d, offsize_t os, uint32_t v)
239 {
240 if (os > 3)
241 d[3] = v, v >>= 8;
242 if (os > 2)
243 d[2] = v, v >>= 8;
244 if (os > 1)
245 d[1] = v, v >>= 8;
246 d[0] = v;
247 }
248
249 static uint8_t
250 offsize_for_offset(uint32_t offset)
251 {
252 if (offset < 256)
253 return 1;
254 if (offset < 65536)
255 return 2;
256 if (offset < (1<<24))
257 return 3;
258 return 4;
259 }
260
261 uint16_t
262 subr_bias(fz_context *ctx, cff_t *cff, uint16_t count)
263 {
264 if (cff->charstring_type == 1)
265 return 0;
266 else if (count < 1240)
267 return 107;
268 else if (count < 33900)
269 return 1131;
270 else
271 return 32768;
272 }
273
274 /* Index functions */
275
276 /* "Load" an index and check it for plausibility (no overflows etc) */
277 static uint32_t
278 index_load(fz_context *ctx, index_t *index, const uint8_t *base, uint32_t len, uint32_t offset)
279 {
280 uint32_t data_offset, i, v, prev;
281 offsize_t os;
282 const uint8_t *data = base + offset;
283 const uint8_t *data0 = data;
284
285 /* Non-existent tables leave the index empty */
286 if (offset == 0 || len == 0)
287 {
288 memset(index, 0, sizeof(*index));
289 return 0;
290 }
291
292 index->index_offset = offset;
293
294 if (offset >= len || len-offset < 2)
295 fz_throw(ctx, FZ_ERROR_FORMAT, "Truncated index");
296
297 index->count = get16(data);
298
299 if (index->count == 0)
300 return offset+2;
301
302 if (offset + 4 >= len)
303 fz_throw(ctx, FZ_ERROR_FORMAT, "Truncated index");
304
305 os = index->offsize = data[2];
306 if (index->offsize < 1 || index->offsize > 4)
307 fz_throw(ctx, FZ_ERROR_FORMAT, "Illegal offsize");
308
309 index->offset = data + 3;
310
311 data_offset = 3 + (index->count+1) * os - 1;
312 index->data_offset = data_offset + offset;
313
314 if (data_offset > len)
315 fz_throw(ctx, FZ_ERROR_FORMAT, "Truncated index");
316
317 data += 3;
318 prev = get_offset(data, os);
319 if (prev != 1)
320 fz_throw(ctx, FZ_ERROR_FORMAT, "Corrupt index");
321 data += os;
322 for (i = index->count; i > 0; i--)
323 {
324 v = get_offset(data, os);
325 data += os;
326 if (v < prev)
327 fz_throw(ctx, FZ_ERROR_FORMAT, "Index not monotonic");
328 prev = v;
329 }
330 if (v > len)
331 fz_throw(ctx, FZ_ERROR_FORMAT, "Truncated index");
332
333 data += prev - 1;
334 index->index_size = data - data0;
335
336 return index->index_size + offset;
337 }
338
339 static uint32_t
340 index_get(fz_context *ctx, index_t *index, int idx)
341 {
342 int os;
343 uint32_t v;
344
345 if (idx < 0 || idx > index->count || index->count == 0)
346 fz_throw(ctx, FZ_ERROR_FORMAT, "Index bounds");
347
348 os = index->offsize;
349 idx *= os;
350 v = get_offset(&index->offset[idx], index->offsize);
351
352 return index->data_offset + v;
353 }
354
355 /* DICT handling structures and functions */
356
357 #define HIOP(A) (A+22)
358
359 typedef enum
360 {
361 /* Top DICT Operators */
362 DICT_OP_version = 0,
363 DICT_OP_Notice = 1,
364 DICT_OP_Copyright = HIOP(0),
365 DICT_OP_FullName = 2,
366 DICT_OP_FamilyName = 3,
367 DICT_OP_Weight = 4,
368 DICT_OP_isFixedPitch = HIOP(1),
369 DICT_OP_ItalicAngle = HIOP(2),
370 DICT_OP_UnderlinePosition = HIOP(3),
371 DICT_OP_UnderlineThickness = HIOP(4),
372 DICT_OP_PaintType = HIOP(5),
373 DICT_OP_CharstringType = HIOP(6),
374 DICT_OP_FontMatrix = HIOP(7),
375 DICT_OP_UniqueID = 13,
376 DICT_OP_FontBBox = 5,
377 DICT_OP_StrokeWidth = HIOP(8),
378 DICT_OP_XUID = 14,
379 DICT_OP_charset = 15,
380 DICT_OP_Encoding = 16,
381 DICT_OP_CharStrings = 17,
382 DICT_OP_Private = 18,
383 DICT_OP_SyntheticBase = HIOP(20),
384 DICT_OP_Postscript = HIOP(21),
385 DICT_OP_BaseFontName = HIOP(22),
386 DICT_OP_BaseFontBlend = HIOP(23),
387
388 /* CIDFont Operators */
389 DICT_OP_ROS = HIOP(30),
390 DICT_OP_CIDFontVersion = HIOP(31),
391 DICT_OP_CIDFontRevision = HIOP(32),
392 DICT_OP_CIDFontType = HIOP(33),
393 DICT_OP_CIDCount = HIOP(34),
394 DICT_OP_UIDBase = HIOP(35),
395 DICT_OP_FDArray = HIOP(36),
396 DICT_OP_FDSelect = HIOP(37),
397 DICT_OP_FontName = HIOP(38),
398
399 /* Private DICT Operators */
400 DICT_OP_BlueValues = 6,
401 DICT_OP_OtherBlues = 7,
402 DICT_OP_FamilyBlues = 8,
403 DICT_OP_FamilyOtherBlues = 9,
404 DICT_OP_BlueScale = HIOP(9),
405 DICT_OP_BlueShift = HIOP(10),
406 DICT_OP_BlueFuzz = HIOP(11),
407 DICT_OP_StdHW = 10,
408 DICT_OP_StdVW = 11,
409 DICT_OP_StemSnapH = HIOP(12),
410 DICT_OP_StemSnapV = HIOP(13),
411 DICT_OP_ForceBold = HIOP(14),
412 DICT_OP_LanguageGroup = HIOP(17),
413 DICT_OP_ExpansionFactor = HIOP(18),
414 DICT_OP_initialRandomSeed = HIOP(19),
415 DICT_OP_Subrs = 19,
416 DICT_OP_defaultWidthX = 20,
417 DICT_OP_nominalWidthX = 21
418 } dict_operator;
419
420 typedef enum
421 {
422 da_int = 0,
423 da_real = 1,
424 da_operator = 2
425 } dict_arg_type;
426
427 typedef struct {
428 dict_arg_type type;
429 union {
430 uint32_t i;
431 float f;
432 } u;
433 } dict_arg;
434
435 #define DICT_MAX_ARGS 48
436
437 typedef struct {
438 const uint8_t *base;
439 size_t len;
440 uint32_t offset;
441 uint32_t end_offset;
442 uint8_t *val;
443 int eod;
444 int num_args;
445 dict_arg arg[DICT_MAX_ARGS+1];
446 } dict_iterator;
447
448 static uint8_t
449 dict_get_byte(fz_context *ctx, dict_iterator *di)
450 {
451 uint8_t b;
452
453 if (di->offset == di->end_offset)
454 di->eod = 1;
455 if (di->eod)
456 fz_throw(ctx, FZ_ERROR_FORMAT, "Overlong DICT data");
457 b = di->base[di->offset++];
458
459 return b;
460 }
461
462 static dict_arg
463 dict_get_arg(fz_context *ctx, dict_iterator *di)
464 {
465 uint8_t b0, b1, b2, b3, b4;
466 dict_arg d;
467
468 b0 = dict_get_byte(ctx, di);
469 if (b0 == 12)
470 {
471 b1 = dict_get_byte(ctx, di);
472 d.type = da_operator;
473 d.u.i = HIOP(b1);
474 return d;
475 }
476 else if (b0 <= 21)
477 {
478 d.type = da_operator;
479 d.u.i = b0;
480 return d;
481 }
482 else if (b0 <= 27)
483 {
484 malformed:
485 fz_throw(ctx, FZ_ERROR_FORMAT, "Malformed DICT");
486 }
487 else if (b0 == 28)
488 {
489 b1 = dict_get_byte(ctx, di);
490 b2 = dict_get_byte(ctx, di);
491 d.type = da_int;
492 d.u.i = (b1<<8) | b2;
493 }
494 else if (b0 == 29)
495 {
496 b1 = dict_get_byte(ctx, di);
497 b2 = dict_get_byte(ctx, di);
498 b3 = dict_get_byte(ctx, di);
499 b4 = dict_get_byte(ctx, di);
500 d.type = da_int;
501 d.u.i = (b1<<24) | (b2<<16) | (b3<<8) | b4;
502 }
503 else if (b0 == 30)
504 {
505 char cheap[32+5];
506 unsigned int i;
507
508 for (i = 0; i < sizeof(cheap)-5; )
509 {
510 static const char *dict = "0123456789.EE -f";
511 uint8_t b = dict_get_byte(ctx, di);
512
513 if ((b>>4) == 0xf)
514 break;
515 cheap[i++] = dict[b>>4];
516 if ((b>>4) == 0xc)
517 cheap[i++] = '-';
518
519 b &= 15;
520 if (b == 0xf)
521 break;
522 cheap[i++] = dict[b];
523 if (b == 0xc)
524 cheap[i++] = '-';
525 }
526 cheap[i++] = 0;
527 d.type = da_real;
528 d.u.f = fz_atof(cheap);
529 }
530 else if (b0 == 31)
531 {
532 goto malformed;
533 }
534 else if (b0 <= 246)
535 {
536 d.type = da_int;
537 d.u.i = b0-139;
538 }
539 else if (b0 <= 250)
540 {
541 b1 = dict_get_byte(ctx, di);
542 d.type = da_int;
543 d.u.i = ((b0-247)<<8) + b1 + 108;
544 }
545 else if (b0 <= 254)
546 {
547 b1 = dict_get_byte(ctx, di);
548 d.type = da_int;
549 d.u.i = -((b0-251)<<8) - b1 - 108;
550 }
551 else
552 goto malformed;
553
554 return d;
555 }
556
557 static dict_operator
558 dict_next(fz_context *ctx, dict_iterator *di)
559 {
560 int n;
561
562 if (di->offset >= di->end_offset)
563 {
564 di->eod = 1;
565 return 0;
566 }
567
568 n = 0;
569 while (di->offset < di->end_offset)
570 {
571 di->arg[n] = dict_get_arg(ctx, di);
572 if (di->arg[n].type == da_operator)
573 {
574 /* Sorted! Terminate loop. */
575 break;
576 }
577 if (n == DICT_MAX_ARGS)
578 fz_throw(ctx, FZ_ERROR_FORMAT, "Too many operands");
579 n++;
580 }
581 di->num_args = n;
582
583 return (dict_operator)di->arg[n].u.i;
584 }
585
586 static dict_operator
587 dict_init(fz_context *ctx, dict_iterator *di, const uint8_t *base, size_t len, uint32_t offset, uint32_t end)
588 {
589 di->base = base;
590 di->len = len;
591 di->offset = offset;
592 di->end_offset = end;
593 di->eod = (di->offset == di->end_offset);
594
595 if (di->offset > len || end > len)
596 fz_throw(ctx, FZ_ERROR_FORMAT, "Malformed DICT");
597
598 return dict_next(ctx, di);
599 }
600
601 static int
602 dict_more(dict_iterator *di)
603 {
604 return !di->eod;
605 }
606
607 static uint32_t
608 dict_arg_int(fz_context *ctx, dict_iterator *di, int idx)
609 {
610 if (idx < 0 || idx >= di->num_args)
611 fz_throw(ctx, FZ_ERROR_FORMAT, "Missing dict arg");
612
613 if (di->arg[idx].type != da_int)
614 fz_throw(ctx, FZ_ERROR_FORMAT, "DICT arg not an int");
615
616 return di->arg[idx].u.i;
617 }
618
619 static void
620 dict_write_arg(fz_context *ctx, fz_output *out, dict_arg d)
621 {
622 int si;
623 uint32_t i = d.u.i;
624
625 if (d.type == da_operator)
626 {
627 if (i >= HIOP(0))
628 {
629 fz_write_byte(ctx, out, 12);
630 i -= HIOP(0);
631 }
632 fz_write_byte(ctx, out, i);
633 return;
634 }
635
636 if (d.type == da_real)
637 {
638 char text[32];
639 unsigned int k, j;
640 uint8_t v;
641
642 fz_snprintf(text, sizeof(text)-1, "%g", d.u.f);
643
644 fz_write_byte(ctx, out, 30);
645 j = 4;
646 v = 0;
647 for (k = 0; k < sizeof(text)-1;)
648 {
649 char c = text[k++];
650
651 if (c >= '0' && c <= '9')
652 v |= (c - '0')<<j;
653 else if (c == '.')
654 v |= 0xa<<j;
655 else if (c == 'e' || c == 'E')
656 {
657 if (text[k] == '-')
658 {
659 v |= 0xc<<j;
660 k++;
661 }
662 else
663 {
664 v |= 0xb<<j;
665 }
666 }
667 else if (c == '-')
668 {
669 v |= 0xe<<j;
670 }
671 else if (c == 0)
672 break;
673
674 if (j == 0)
675 {
676 fz_write_byte(ctx, out, v);
677 v = 0;
678 }
679 j ^= 4;
680 }
681 if (j == 4)
682 v = 0xff;
683 else
684 v |= 0xf;
685 fz_write_byte(ctx, out, v);
686 return;
687 }
688
689 /* Must be an int. */
690 si = (int)i;
691 if (-107 <= si && si <= 107)
692 fz_write_byte(ctx, out, si+139);
693 else if (108 <= si && si <= 1131)
694 {
695 si -= 108;
696 fz_write_byte(ctx, out, (si>>8)+247);
697 fz_write_byte(ctx, out, si);
698 }
699 else if (-1131 <= si && si <= -108)
700 {
701 si = -si - 108;
702 fz_write_byte(ctx, out, (si>>8)+251);
703 fz_write_byte(ctx, out, si);
704 }
705 else if (-32768 <= si && si <= 32767)
706 {
707 fz_write_byte(ctx, out, 28);
708 fz_write_byte(ctx, out, si>>8);
709 fz_write_byte(ctx, out, si);
710 }
711 else
712 {
713 fz_write_byte(ctx, out, 29);
714 fz_write_byte(ctx, out, si>>24);
715 fz_write_byte(ctx, out, si>>16);
716 fz_write_byte(ctx, out, si>>8);
717 fz_write_byte(ctx, out, si);
718 }
719 }
720
721 static void
722 dict_write_args(fz_context *ctx, fz_output *out, dict_iterator *di)
723 {
724 int i;
725
726 for (i = 0; i <= di->num_args; i++)
727 {
728 dict_write_arg(ctx, out, di->arg[i]);
729 }
730 }
731
732 static void
733 do_subset(fz_context *ctx, cff_t *cff, fz_buffer **buffer, usage_list_t *keep_list, index_t *index, int keep_notdef)
734 {
735 uint8_t *d, *strings;
736 uint32_t i, offset, end;
737 uint32_t required, offset_size, fill;
738 uint32_t num_charstrings = index->count;
739 int gid;
740 int num_gids = keep_list->len;
741 const usage_t *gids = keep_list->list;
742
743 if (num_charstrings == 0)
744 return;
745
746 /* First count the required size. */
747 offset = index_get(ctx, index, 0);
748 required = 0;
749 gid = 0;
750 for (i = 0; i < num_charstrings; offset = end, i++)
751 {
752 end = index_get(ctx, index, i+1);
753 if (gid < num_gids && i == gids[gid].num)
754 {
755 /* Keep this */
756 gid++;
757 }
758 else if (keep_notdef && i == 0)
759 {
760 /* Keep this. */
761 }
762 else
763 {
764 /* Drop this */
765 required += 1;
766 continue;
767 }
768 required += end-offset;
769 }
770
771 /* So we need 'required' bytes of space for the strings themselves */
772 /* Do not forget to increment by one byte! This is because the
773 last entry in the offset table points to one byte beyond the end of
774 the required string data. Consider if the required string data occupies
775 255 bytes, then each offset for each of the required entries can be
776 represented by a single byte, but the last table entry would need to
777 point to offset 256, which cannot be represented by a single byte. */
778 offset_size = offsize_for_offset(required + 1);
779
780 required += 2 + 1 + (num_charstrings+1)*offset_size;
781
782 *buffer = fz_new_buffer(ctx, required);
783 d = (*buffer)->data;
784 (*buffer)->len = required;
785
786 /* Write out the index header */
787 put16(d, num_charstrings); /* count */
788 d +=2;
789 put8(d, offset_size); /* offset size */
790 d += 1;
791
792
793 /* Now copy the charstrings themselves */
794 strings = d + offset_size * (num_charstrings+1) - 1;
795 gid = 0;
796 fill = 1;
797 offset = index_get(ctx, index, 0);
798 for (i = 0; i < num_charstrings; offset = end, i++)
799 {
800 end = index_get(ctx, index, i+1);
801 if (gid < num_gids && gids[gid].num == i)
802 {
803 /* Keep this */
804 gid++;
805 }
806 else if (keep_notdef && i == 0)
807 {
808 /* Keep this */
809 }
810 else
811 {
812 /* Drop this */
813 put_offset(d, offset_size, fill);
814 d += offset_size;
815 strings[fill++] = 0x0e; /* endchar */
816 continue;
817 }
818
819 memcpy(strings + fill, &cff->base[offset], end-offset);
820 put_offset(d, offset_size, fill);
821 d += offset_size;
822 fill += end-offset;
823 }
824 put_offset(d, offset_size, fill);
825 }
826
827 static void
828 subset_charstrings(fz_context *ctx, cff_t *cff)
829 {
830 do_subset(ctx, cff, &cff->charstrings_subset, &cff->gids_to_keep, &cff->charstrings_index, 1);
831 }
832
833 static void
834 subset_locals(fz_context *ctx, cff_t *cff)
835 {
836 do_subset(ctx, cff, &cff->local_subset, &cff->local_usage, &cff->local_index, 0);
837 }
838
839 static void
840 subset_globals(fz_context *ctx, cff_t *cff)
841 {
842 do_subset(ctx, cff, &cff->global_subset, &cff->global_usage, &cff->global_index, 0);
843 }
844
845 static void
846 subset_fdarray_locals(fz_context *ctx, cff_t *cff)
847 {
848 uint16_t i, n = cff->fdarray_index.count;
849
850 for (i = 0; i < n; i++)
851 do_subset(ctx, cff, &cff->fdarray[i].local_subset, &cff->fdarray[i].local_usage, &cff->fdarray[i].local_index, 0);
852 }
853
854 /* Charstring "executing" functions */
855
856 static int
857 usage_list_find(fz_context *ctx, usage_list_t *list, int value)
858 {
859 /* are we on the list already? */
860 int lo = 0;
861 int hi = list->len;
862
863 while (lo < hi)
864 {
865 int mid = (lo + hi)>>1;
866 int v = list->list[mid].num;
867 if (v < value)
868 lo = mid+1;
869 else if (v > value)
870 hi = mid;
871 else
872 return mid;
873 }
874 return lo;
875 }
876
877 static int
878 usage_list_contains(fz_context *ctx, usage_list_t *list, int value)
879 {
880 int lo = usage_list_find(ctx, list, value);
881
882 return (lo < list->len && list->list[lo].num == value);
883 }
884
885 static void
886 usage_list_add(fz_context *ctx, usage_list_t *list, int value)
887 {
888 int lo = usage_list_find(ctx, list, value);
889
890 if (lo < list->len && list->list[lo].num == value)
891 return;
892
893 if (list->len == list->max)
894 {
895 int newmax = list->max * 2;
896
897 if (newmax == 0)
898 newmax = 32;
899
900 list->list = fz_realloc(ctx, list->list, sizeof(*list->list) * newmax);
901 list->max = newmax;
902 }
903
904 memmove(&list->list[lo+1], &list->list[lo], (list->len - lo) * sizeof(*list->list));
905 list->list[lo].num = value;
906 list->list[lo].scanned = 0;
907 list->len++;
908 }
909
910 static void
911 drop_usage_list(fz_context *ctx, usage_list_t *list)
912 {
913 if (!list)
914 return;
915 fz_free(ctx, list->list);
916 list->list = NULL;
917 }
918
919 static void
920 mark_subr_used(fz_context *ctx, cff_t *cff, int subr, int global, int local_subr_bias, usage_list_t *local_usage)
921 {
922 usage_list_t *list = global ? &cff->global_usage : local_usage;
923
924 subr += global ? cff->gsubr_bias : local_subr_bias;
925
926 usage_list_add(ctx, list, subr);
927 }
928
929 static void
930 use_sub_char(fz_context *ctx, cff_t *cff, int code)
931 {
932 /* code is a character code in 'standard encoding'. We
933 * need to map that to whatever glyph that would be in
934 * standard encoding, and mark that glyph as being used. */
935 uint32_t i, gid;
936
937 if (code < 0 || code > 255)
938 return;
939 i = standard_encoding[code];
940 if (i == 0)
941 return;
942
943 for (gid = 0; gid < cff->unpacked_charset_len; gid++)
944 {
945 if (cff->unpacked_charset[gid] == i)
946 break;
947 }
948 if (gid == cff->unpacked_charset_len)
949 {
950 fz_warn(ctx, "subsidiary char out of range");
951 return;
952 }
953
954 if (usage_list_contains(ctx, &cff->gids_to_keep, gid))
955 return;
956
957 usage_list_add(ctx, &cff->extra_gids_to_keep, gid);
958 }
959
960 #define ATLEAST(n) if (sp < n) goto atleast_fail;
961 #define POP(n) if (sp < n) goto atleast_fail;
962 #define PUSH(n) \
963 do { if (sp + n > (int)(sizeof(stack)/sizeof(*stack))) fz_throw(ctx, FZ_ERROR_FORMAT, "Stack overflow"); sp += n; } while (0)
964
965 static void
966 execute_charstring(fz_context *ctx, cff_t *cff, const uint8_t *pc, const uint8_t *end, uint16_t subr_bias, usage_list_t *local_usage)
967 {
968 double trans[32] = { 0 };
969 double stack[513];
970 int sp = 0;
971 int stem_hints = 0;
972 uint8_t c;
973
974 /* 0 => starting, 1 => had hstem, 2 => anything else */
975 int start = 0;
976
977 while (pc < end)
978 {
979 c = *pc++;
980
981 /* An operator other than one of the hint ones immediately
982 * disqualifies us from being in the hint extension state. */
983 if (c < 32 && (c != 1 && c != 18 && c != 19 && c != 20))
984 start = 2;
985
986 switch (c)
987 {
988 case 0:
989 case 2:
990 case 9:
991 case 13:
992 case 17:
993 fz_throw(ctx, FZ_ERROR_FORMAT, "Reserved charstring byte");
994 break;
995
996 /* Deal with all the hints together */
997 case 18: /* hstemhm */
998 case 1: /* hstem */
999 start = 1;
1000 case 23: /* vstemhm */
1001 case 3: /* vstem */
1002 stem_hints += (sp/2);
1003 goto clear;
1004
1005 case 19: /* hintmask */
1006 case 20: /* cntrmask */
1007 if (start == 1)
1008 stem_hints += (sp/2);
1009 pc += (stem_hints+7)>>3;
1010 if (pc > end)
1011 goto overflow;
1012 start = 2;
1013 goto clear;
1014
1015 /* The operators all clear the stack. */
1016 case 4: /* vmoveto */
1017 case 5: /* rlineto */
1018 case 6: /* hlineto */
1019 case 7: /* vlineto */
1020 case 8: /* rrcurveto */
1021 case 15: /* vsindex */
1022 case 21: /* rmoveto */
1023 case 22: /* hmoveto */
1024 case 24: /* rcurveline */
1025 case 25: /* rlinecurve */
1026 case 26: /* vvcurveto */
1027 case 27: /* hhcurveto */
1028 case 30: /* vhcurveto */
1029 case 31: /* hvcurveto */
1030 clear:
1031 sp = 0;
1032 break;
1033
1034
1035
1036 case 10: /* callsubr */
1037 ATLEAST(1);
1038 mark_subr_used(ctx, cff, stack[sp-1], 0, subr_bias, local_usage);
1039 sp--;
1040 break;
1041 case 11: /* return */
1042 pc = end;
1043 sp = 0;
1044 break;
1045 case 12: /* escape */
1046 {
1047 if (pc == end)
1048 {
1049 overflow:
1050 fz_throw(ctx, FZ_ERROR_FORMAT, "Buffer overflow in charstring");
1051 }
1052 c = *pc++;
1053 switch (c)
1054 {
1055 case 0: /* dotsection: deprecated, nop */
1056 sp = 0;
1057 break;
1058 case 3: /* and */
1059 ATLEAST(2);
1060 stack[sp-2] = (stack[sp-1] != 0 && stack[sp-2] != 0);
1061 sp--;
1062 break;
1063 case 4: /* or */
1064 ATLEAST(2);
1065 stack[sp-2] = (stack[sp-1] != 0 || stack[sp-2] != 0);
1066 sp--;
1067 break;
1068 case 5: /* not */
1069 ATLEAST(1);
1070 stack[sp-1] = (stack[sp-1] == 0);
1071 break;
1072
1073 case 9: /* abs */
1074 ATLEAST(1);
1075 if (stack[sp-1] < 0)
1076 stack[sp-1] = -stack[sp-1];
1077 break;
1078
1079 case 10: /* add */
1080 ATLEAST(2);
1081 stack[sp-2] += stack[sp-1];
1082 sp--;
1083 break;
1084
1085 case 11: /* sub */
1086 ATLEAST(2);
1087 stack[sp-2] -= stack[sp-1];
1088 sp--;
1089 break;
1090
1091 case 12: /* div */
1092 ATLEAST(2);
1093 if (stack[sp-2] != 0)
1094 stack[sp-2] /= stack[sp-1];
1095 sp--;
1096 break;
1097
1098 case 14: /* neg */
1099 ATLEAST(1);
1100 stack[sp-1] = -stack[sp-1];
1101 break;
1102
1103 case 15: /* eq */
1104 ATLEAST(2);
1105 stack[sp-2] = (stack[sp-1] == stack[sp-2]);
1106 sp--;
1107 break;
1108 case 18: /* drop */
1109 POP(1);
1110 break;
1111
1112 case 20: /* put */
1113 ATLEAST(2);
1114 if ((int)stack[sp-1] < 0 || (unsigned int)stack[sp-1] > sizeof(trans)/sizeof(*trans))
1115 fz_throw(ctx, FZ_ERROR_FORMAT, "Transient array over/underflow");
1116 trans[(int)stack[sp-1]] = stack[sp-2];
1117 sp -= 2;
1118 break;
1119 case 21: /* get */
1120 ATLEAST(1);
1121 if ((int)stack[sp-1] < 0 || (unsigned int)stack[sp-1] > sizeof(trans)/sizeof(*trans))
1122 fz_throw(ctx, FZ_ERROR_FORMAT, "Transient array over/underflow");
1123 stack[sp-1] = trans[(int)stack[sp-1]];
1124 break;
1125
1126 case 22: /* ifelse */
1127 ATLEAST(4);
1128 if (stack[sp-2] > stack[sp-1])
1129 stack[sp-4] = stack[sp-3];
1130 sp -= 3;
1131 break;
1132 case 23: /* random */
1133 PUSH(1);
1134 stack[sp-1] = 0.5;
1135 break;
1136
1137 case 24: /* mul */
1138 ATLEAST(2);
1139 stack[sp-2] *= stack[sp-1];
1140 break;
1141
1142 case 26: /* sqrt */
1143 ATLEAST(1);
1144 if (stack[sp-1] >= 0)
1145 stack[sp-1] = sqrtf(stack[sp-1]);
1146 break;
1147
1148 case 27: /* dup */
1149 ATLEAST(1);
1150 PUSH(1);
1151 stack[sp-1] = stack[sp-2];
1152 break;
1153
1154 case 28: /* exch */
1155 {
1156 double d;
1157 ATLEAST(2);
1158 d = stack[sp-1];
1159 stack[sp-1] = stack[sp-2];
1160 stack[sp-2] = d;
1161 break;
1162 }
1163 case 29: /* index */
1164 {
1165 int i;
1166 ATLEAST(1);
1167 i = (int)stack[sp-1];
1168 ATLEAST(i+1);
1169 if (i < 0 || i > sp-1)
1170 i = 0;
1171 stack[sp-1] = stack[sp-2-i];
1172 break;
1173 }
1174 case 30: /* roll */
1175 {
1176 int N, J;
1177 ATLEAST(2);
1178 J = stack[sp-1];
1179 N = stack[sp-2];
1180 if (N == 0)
1181 break;
1182 if (N < 0)
1183 fz_throw(ctx, FZ_ERROR_FORMAT, "Invalid roll");
1184 ATLEAST(2+N);
1185 if (J < 0)
1186 {
1187 J = N - ((-J) % N);
1188 if (J == 0)
1189 break;
1190 }
1191 while (J--)
1192 {
1193 double t = stack[sp-2];
1194 int i;
1195 for (i = N-1; i > 0; i--)
1196 {
1197 stack[sp-2-i] = stack[sp-3-i];
1198 }
1199 stack[sp-2-N] = t;
1200 }
1201 break;
1202 }
1203
1204
1205 case 34: /* hflex */
1206 case 35: /* flex */
1207 case 36: /* hflex1 */
1208 case 37: /* flex1 */
1209 sp = 0;
1210 break;
1211
1212
1213 default:
1214 fz_throw(ctx, FZ_ERROR_FORMAT, "Reserved charstring byte");
1215 }
1216 break;
1217 }
1218 case 14: /* endchar */
1219 pc = end;
1220 if (sp >= 4)
1221 {
1222 use_sub_char(ctx, cff, stack[sp-1]);
1223 use_sub_char(ctx, cff, stack[sp-2]);
1224 }
1225 sp = 0;
1226 break;
1227 case 16: /* blend */
1228 /* Consumes a lot of operators, leaves n, where n = stack[sp-1]. */
1229 ATLEAST(1);
1230 sp = stack[sp-1];
1231 break;
1232 case 29: /* callgsubr */
1233 ATLEAST(1);
1234 mark_subr_used(ctx, cff, stack[sp-1], 1, subr_bias, local_usage);
1235 sp--;
1236 break;
1237 case 28: /* shortint */
1238 if (pc + 2 >= end)
1239 {
1240 pc = end;
1241 break;
1242 }
1243 PUSH(1);
1244 stack[sp-1] = (pc[0]<<8) | pc[1];
1245 pc += 2;
1246 break;
1247 case 255: /* number */
1248 if (pc + 4 >= end)
1249 {
1250 pc = end;
1251 break;
1252 }
1253 PUSH(1);
1254 stack[sp-1] = ((pc[0]<<24) | (pc[1]<<16) | (pc[2]<<8) | pc[3]) / 65536.0;
1255 pc += 4;
1256 break;
1257 case 247: case 248: case 249: case 250: /* number */
1258 PUSH(1);
1259 stack[sp-1] = (c-247) * 256 + 108;
1260 if (pc >= end)
1261 break;
1262 stack[sp-1] += *pc++;
1263 break;
1264 case 251: case 252: case 253: case 254: /* number */
1265 PUSH(1);
1266 stack[sp-1] = -((c-251) * 256 + 108);
1267 if (pc >= end)
1268 break;
1269 stack[sp-1] -= *pc++;
1270 break;
1271 default: /* 32-246 */
1272 PUSH(1);
1273 stack[sp-1] = c-139;
1274 break;
1275 }
1276
1277 }
1278 return;
1279 atleast_fail:
1280 fz_throw(ctx, FZ_ERROR_FORMAT, "Insufficient operators on the stack: op=%d", c);
1281 }
1282
1283
1284 usage_list_t *
1285 get_font_locals(fz_context *ctx, cff_t *cff, int gid, int is_pdf_cidfont, uint16_t *subr_bias)
1286 {
1287 usage_t *gids = cff->gids_to_keep.list;
1288 int num_gids = cff->gids_to_keep.len;
1289
1290 if (is_pdf_cidfont && cff->is_cidfont)
1291 {
1292 uint8_t font = 0;
1293 if (gid < num_gids && gids[gid].num < cff->charstrings_index.count)
1294 font = cff->gid_to_font[gids[gid].num];
1295 else if (gid == 0)
1296 font = cff->gid_to_font[gid];
1297 if (font >= cff->fdarray_index.count)
1298 font = 0;
1299
1300 if (subr_bias)
1301 *subr_bias = cff->fdarray[font].subr_bias;
1302 return &cff->fdarray[font].local_usage;
1303 }
1304
1305 if (subr_bias)
1306 *subr_bias = cff->subr_bias;
1307 return &cff->local_usage;
1308 }
1309
1310 static void
1311 scan_charstrings(fz_context *ctx, cff_t *cff, int is_pdf_cidfont)
1312 {
1313 uint32_t offset, end;
1314 int num_charstrings = (int)cff->charstrings_index.count;
1315 int i, gid, font;
1316 usage_t *gids = cff->gids_to_keep.list;
1317 int num_gids = cff->gids_to_keep.len;
1318 int changed;
1319 uint16_t subr_bias;
1320 usage_list_t *local_usage = NULL;
1321
1322 /* Scan through the charstrings.*/
1323 offset = index_get(ctx, &cff->charstrings_index, 0);
1324 gid = 0;
1325 for (i = 0; i < num_charstrings; offset = end, i++)
1326 {
1327 end = index_get(ctx, &cff->charstrings_index, i+1);
1328 if (gid < num_gids && i == gids[gid].num)
1329 {
1330 /* Keep this */
1331 gid++;
1332 }
1333 else if (i == 0)
1334 {
1335 /* Keep this. */
1336 }
1337 else
1338 {
1339 /* Drop this */
1340 continue;
1341 }
1342 local_usage = get_font_locals(ctx, cff, gid, is_pdf_cidfont, &subr_bias);
1343 execute_charstring(ctx, cff, &cff->base[offset], &cff->base[end], subr_bias, local_usage);
1344 }
1345
1346 /* Now we search the 'extra' ones, the 'subrs' (local) and 'gsubrs' (globals)
1347 * that are used. Searching each of these might find more that need to be
1348 * searched, so we use a loop. */
1349 do
1350 {
1351 changed = 0;
1352 /* Extra (subsidiary) glyphs */
1353 for (i = 0; i < cff->extra_gids_to_keep.len; i++)
1354 {
1355 if (cff->extra_gids_to_keep.list[i].scanned)
1356 continue;
1357 cff->extra_gids_to_keep.list[i].scanned = 1;
1358 gid = cff->extra_gids_to_keep.list[i].num;
1359 usage_list_add(ctx, &cff->gids_to_keep, gid);
1360 offset = index_get(ctx, &cff->charstrings_index, gid);
1361 end = index_get(ctx, &cff->charstrings_index, gid+1);
1362
1363 local_usage = get_font_locals(ctx, cff, gid, is_pdf_cidfont, &subr_bias);
1364 execute_charstring(ctx, cff, &cff->base[offset], &cff->base[end], subr_bias, local_usage);
1365 changed = 1;
1366 }
1367
1368 /* Now, run through the locals, seeing what locals and globals they call. */
1369 for (i = 0; i < cff->local_usage.len; i++)
1370 {
1371 if (cff->local_usage.list[i].scanned)
1372 continue;
1373 cff->local_usage.list[i].scanned = 1;
1374 gid = cff->local_usage.list[i].num;
1375 offset = index_get(ctx, &cff->local_index, gid);
1376 end = index_get(ctx, &cff->local_index, gid+1);
1377
1378 local_usage = get_font_locals(ctx, cff, gid, is_pdf_cidfont, &subr_bias);
1379 execute_charstring(ctx, cff, &cff->base[offset], &cff->base[end], subr_bias, local_usage);
1380 changed = 1;
1381 }
1382
1383 /* Now, run through the per-font locals, seeing what per-font locals and globals they call. */
1384 for (font = 0; font < cff->fdarray_index.count; font++)
1385 {
1386 for (i = 0; i < cff->fdarray[font].local_usage.len; i++)
1387 {
1388 gid = cff->fdarray[font].local_usage.list[i].num;
1389
1390 if (cff->fdarray[font].local_usage.list[i].scanned)
1391 continue;
1392 cff->fdarray[font].local_usage.list[i].scanned = 1;
1393 gid = cff->fdarray[font].local_usage.list[i].num;
1394 offset = index_get(ctx, &cff->fdarray[font].local_index, gid);
1395 end = index_get(ctx, &cff->fdarray[font].local_index, gid+1);
1396
1397 local_usage = get_font_locals(ctx, cff, gid, is_pdf_cidfont, &subr_bias);
1398 execute_charstring(ctx, cff, &cff->base[offset], &cff->base[end], subr_bias, local_usage);
1399 changed = 1;
1400 }
1401 }
1402
1403 /* Now, run through the globals, seeing what globals they call. */
1404 for (i = 0; i < cff->global_usage.len; i++)
1405 {
1406 if (cff->global_usage.list[i].scanned)
1407 continue;
1408 cff->global_usage.list[i].scanned = 1;
1409 gid = cff->global_usage.list[i].num;
1410 offset = index_get(ctx, &cff->global_index, gid);
1411 end = index_get(ctx, &cff->global_index, gid+1);
1412
1413 local_usage = get_font_locals(ctx, cff, gid, is_pdf_cidfont, &subr_bias);
1414 execute_charstring(ctx, cff, &cff->base[offset], &cff->base[end], subr_bias, local_usage);
1415 changed = 1;
1416 }
1417 }
1418 while (changed);
1419 }
1420
1421 static void
1422 get_encoding_len(fz_context *ctx, cff_t *cff)
1423 {
1424 uint32_t encoding_offset = cff->encoding_offset;
1425 const uint8_t *d = cff->base + encoding_offset;
1426 uint8_t fmt;
1427 uint8_t n;
1428 uint32_t size;
1429
1430 if (encoding_offset < 2)
1431 {
1432 cff->encoding_len = 0;
1433 return;
1434 }
1435
1436 if (encoding_offset + 2 > cff->len)
1437 fz_throw(ctx, FZ_ERROR_FORMAT, "corrupt encoding");
1438
1439 fmt = *d++;
1440 n = *d++;
1441
1442 switch (fmt & 127)
1443 {
1444 case 0:
1445 size = 2 + n;
1446 break;
1447 case 1:
1448 size = 2 + n * 2;
1449 break;
1450 case 2:
1451 size = 2 + n * 3;
1452 break;
1453 default:
1454 fz_throw(ctx, FZ_ERROR_FORMAT, "Bad format encoding");
1455 }
1456
1457 if (encoding_offset + size > cff->len)
1458 fz_throw(ctx, FZ_ERROR_FORMAT, "corrupt encoding");
1459
1460 if (fmt & 128)
1461 {
1462 if (encoding_offset + size + 1 > cff->len)
1463 fz_throw(ctx, FZ_ERROR_FORMAT, "corrupt encoding");
1464
1465 n = *d++;
1466 size += 1 + n*3;
1467
1468 if (encoding_offset + size > cff->len)
1469 fz_throw(ctx, FZ_ERROR_FORMAT, "corrupt encoding");
1470 }
1471 cff->encoding_len = size;
1472 }
1473
1474 static void
1475 get_charset_len(fz_context *ctx, cff_t *cff)
1476 {
1477 uint32_t charset_offset = cff->charset_offset;
1478 const uint8_t *d = cff->base + charset_offset;
1479 const uint8_t *d0 = d;
1480 uint8_t fmt;
1481 uint32_t i, n;
1482
1483 if (charset_offset < 2)
1484 {
1485 cff->charset_len = 0;
1486 return;
1487 }
1488
1489 if (charset_offset + 1 > cff->len)
1490 fz_throw(ctx, FZ_ERROR_FORMAT, "corrupt charset");
1491
1492 fmt = *d++;
1493 n = cff->charstrings_index.count;
1494
1495 if (fmt == 0)
1496 {
1497 cff->unpacked_charset = fz_malloc(ctx, sizeof(uint16_t) * n);
1498 cff->unpacked_charset_len = cff->unpacked_charset_max = n;
1499 cff->unpacked_charset[0] = 0;
1500 for (i = 1; i < n; i++)
1501 {
1502 cff->unpacked_charset[i] = get16(d);
1503 d += 2;
1504 }
1505 }
1506 else if (fmt == 1)
1507 {
1508 cff->unpacked_charset = fz_malloc(ctx, sizeof(uint16_t) * 256);
1509 cff->unpacked_charset_max = 256;
1510 cff->unpacked_charset_len = 1;
1511 cff->unpacked_charset[0] = 0;
1512 n--;
1513 while (n > 0)
1514 {
1515 uint16_t first;
1516 uint32_t nleft;
1517 if (d + 3>= cff->base + cff->len)
1518 fz_throw(ctx, FZ_ERROR_FORMAT, "corrupt charset");
1519 first = get16(d);
1520 nleft = d[2] + 1;
1521 d += 3;
1522 if (nleft > n)
1523 fz_throw(ctx, FZ_ERROR_FORMAT, "corrupt charset");
1524 n -= nleft;
1525 while (nleft)
1526 {
1527 if (cff->unpacked_charset_len == cff->unpacked_charset_max)
1528 {
1529 cff->unpacked_charset = fz_realloc(ctx, cff->unpacked_charset, sizeof(uint16_t) * 2 * cff->unpacked_charset_max);
1530 cff->unpacked_charset_max *= 2;
1531 }
1532 cff->unpacked_charset[cff->unpacked_charset_len++] = first;
1533 first++;
1534 nleft--;
1535 }
1536 }
1537 }
1538 else if (fmt == 2)
1539 {
1540 cff->unpacked_charset = fz_malloc(ctx, sizeof(uint16_t) * 256);
1541 cff->unpacked_charset_max = 256;
1542 cff->unpacked_charset_len = 1;
1543 cff->unpacked_charset[0] = 0;
1544 n--;
1545 while (n > 0)
1546 {
1547 uint16_t first;
1548 uint32_t nleft;
1549 if (d + 4 >= cff->base + cff->len)
1550 fz_throw(ctx, FZ_ERROR_FORMAT, "corrupt charset");
1551 first = get16(d);
1552 nleft = get16(d+2) + 1;
1553 d += 4;
1554 if (nleft > n)
1555 fz_throw(ctx, FZ_ERROR_FORMAT, "corrupt charset");
1556 n -= nleft;
1557 while (nleft)
1558 {
1559 if (cff->unpacked_charset_len == cff->unpacked_charset_max)
1560 {
1561 cff->unpacked_charset = fz_realloc(ctx, cff->unpacked_charset, sizeof(uint16_t) * 2 * cff->unpacked_charset_max);
1562 cff->unpacked_charset_max *= 2;
1563 }
1564 cff->unpacked_charset[cff->unpacked_charset_len++] = first;
1565 first++;
1566 nleft--;
1567 }
1568 }
1569 }
1570 else
1571 {
1572 fz_throw(ctx, FZ_ERROR_FORMAT, "Bad charset format");
1573 }
1574
1575 cff->charset_len = (uint32_t)(d - d0);
1576 }
1577
1578 static void
1579 read_fdselect(fz_context *ctx, cff_t *cff)
1580 {
1581 uint32_t fdselect_offset = cff->fdselect_offset;
1582 const uint8_t *d = cff->base + fdselect_offset;
1583 const uint8_t *d0 = d;
1584 uint8_t fmt;
1585 uint16_t n, m, i, first, last, k;
1586
1587 if (fdselect_offset == 0)
1588 {
1589 cff->fdselect_len = 0;
1590 return;
1591 }
1592
1593 if (fdselect_offset + 1 > cff->len)
1594 fz_throw(ctx, FZ_ERROR_FORMAT, "corrupt fdselect");
1595
1596 fmt = *d++;
1597 n = cff->charstrings_index.count;
1598
1599 cff->gid_to_font = fz_calloc(ctx, n, sizeof(*cff->gid_to_font));
1600
1601 if (fmt == 0)
1602 {
1603 for (i = 0; i < n; i++)
1604 {
1605 if (d >= cff->base + cff->len)
1606 fz_throw(ctx, FZ_ERROR_FORMAT, "corrupt fdselect");
1607 cff->gid_to_font[i] = d[0];
1608 d++;
1609 }
1610 }
1611 else if (fmt == 3)
1612 {
1613 if (d + 2 >= cff->base + cff->len)
1614 fz_throw(ctx, FZ_ERROR_FORMAT, "corrupt fdselect");
1615 m = get16(d);
1616 d += 2;
1617 if (m > cff->charstrings_index.count)
1618 fz_throw(ctx, FZ_ERROR_FORMAT, "corrupt fdselect");
1619
1620 for (i = 0; i < m; i++)
1621 {
1622 if (d + 5 >= cff->base + cff->len)
1623 fz_throw(ctx, FZ_ERROR_FORMAT, "corrupt fdselect");
1624 first = get16(d);
1625 last = get16(d + 3);
1626 if (first >= cff->charstrings_index.count || last > cff->charstrings_index.count || first >= last)
1627 fz_throw(ctx, FZ_ERROR_FORMAT, "corrupt fdselect");
1628 for (k = first; k < last; k++)
1629 cff->gid_to_font[k] = d[2];
1630 d += 3;
1631 }
1632 }
1633
1634 cff->fdselect_len = (uint32_t)(d - d0);
1635 }
1636
1637 static void
1638 load_charset_for_cidfont(fz_context *ctx, cff_t *cff)
1639 {
1640 uint32_t charset_offset = cff->charset_offset;
1641 const uint8_t *d = cff->base + charset_offset;
1642 uint8_t fmt;
1643 uint32_t n = cff->charstrings_index.count;
1644 uint32_t i;
1645
1646 if (charset_offset + 1 > cff->len)
1647 fz_throw(ctx, FZ_ERROR_FORMAT, "corrupt charset");
1648
1649 fmt = *d++;
1650
1651 cff->gid_to_cid = fz_calloc(ctx, n, sizeof(*cff->gid_to_cid));
1652 cff->gid_to_cid[0] = 0;
1653
1654 if (fmt == 0)
1655 {
1656 for (i = 1; i < n; i++)
1657 {
1658 cff->gid_to_cid[i] = get16(d);
1659 d += 2;
1660 }
1661 }
1662 else if (fmt == 1)
1663 {
1664 for (i = 1; i < n;)
1665 {
1666 uint16_t first;
1667 int32_t nleft;
1668 if (d + 3 >= cff->base + cff->len)
1669 fz_throw(ctx, FZ_ERROR_FORMAT, "corrupt charset");
1670 first = get16(d);
1671 nleft = d[2] + 1;
1672 d += 3;
1673 while (nleft-- && i < n)
1674 {
1675 cff->gid_to_cid[i++] = first++;
1676 }
1677 }
1678 }
1679 else if (fmt == 2)
1680 {
1681 for (i = 1; i < n;)
1682 {
1683 uint16_t first;
1684 int32_t nleft;
1685 if (d + 4 >= cff->base + cff->len)
1686 fz_throw(ctx, FZ_ERROR_FORMAT, "corrupt charset");
1687 first = get16(d);
1688 nleft = get16(d+2) + 1;
1689 d += 4;
1690 while (nleft-- && i < n)
1691 {
1692 cff->gid_to_cid[i++] = first++;
1693 }
1694 }
1695 }
1696 else
1697 {
1698 fz_throw(ctx, FZ_ERROR_FORMAT, "Bad charset format");
1699 }
1700 }
1701
1702 static void
1703 write_offset(fz_context *ctx, fz_output *out, uint8_t os, uint32_t v)
1704 {
1705 if (os > 3)
1706 fz_write_byte(ctx, out, v>>24);
1707 if (os > 2)
1708 fz_write_byte(ctx, out, v>>16);
1709 if (os > 1)
1710 fz_write_byte(ctx, out, v>>8);
1711 fz_write_byte(ctx, out, v);
1712 }
1713
1714 static void
1715 output_name_index(fz_context *ctx, cff_t *cff, fz_output *out)
1716 {
1717 uint32_t name0 = index_get(ctx, &cff->name_index, 0);
1718 uint32_t name1 = index_get(ctx, &cff->name_index, 1);
1719 uint8_t os;
1720
1721 /* Turn name1 back into an offset from the index. */
1722 name1 -= name0;
1723 name1++;
1724 os = offsize_for_offset(name1);
1725
1726 fz_write_uint16_be(ctx, out, 1); /* Count */
1727 fz_write_byte(ctx, out, os); /* offsize */
1728 write_offset(ctx, out, os, 1); /* index[0] = 1 */
1729 write_offset(ctx, out, os, name1); /* index[1] = end */
1730 fz_write_data(ctx, out, cff->base + name0, name1-1);
1731 }
1732
1733 static void
1734 output_top_dict_index(fz_context *ctx, cff_t *cff, fz_output *out)
1735 {
1736 uint32_t top_dict_len = (uint32_t)cff->top_dict_subset->len;
1737 uint8_t os = offsize_for_offset((uint32_t)(1 + top_dict_len));
1738
1739 fz_write_uint16_be(ctx, out, 1); /* Count */
1740 fz_write_byte(ctx, out, os); /* offsize */
1741 write_offset(ctx, out, os, 1);
1742 write_offset(ctx, out, os, (uint32_t)(1 + cff->top_dict_subset->len));
1743
1744 /* And copy the updated top dict. */
1745 fz_write_data(ctx, out, cff->top_dict_subset->data, cff->top_dict_subset->len);
1746 }
1747
1748 static uint32_t
1749 rewrite_fdarray(fz_context *ctx, cff_t *cff, uint32_t offset0)
1750 {
1751 /* fdarray_index will start at offset0. */
1752 uint16_t i;
1753 uint16_t n = cff->fdarray_index.count;
1754 uint32_t len = 0;
1755 uint8_t os;
1756 size_t offset;
1757
1758 if (cff->fdarray == NULL)
1759 fz_throw(ctx, FZ_ERROR_FORMAT, "Expected to rewrite an fdarray");
1760
1761 /* Count how many bytes the index will require. */
1762 for (i = 0; i < n; i++)
1763 {
1764 len += (uint32_t)cff->fdarray[i].rewritten_dict->len;
1765 }
1766 os = offsize_for_offset(len+1);
1767 len += 2 + 1 + (n+1)*os;
1768
1769 /* Now offset0 + len points to where the private dicts
1770 * will go. Run through, fixing up the offsets in the
1771 * font dicts (this won't change the length). */
1772 offset = offset0 + len;
1773 for (i = 0; i < n; i++)
1774 {
1775 assert(cff->fdarray[i].rewritten_dict->data[cff->fdarray[i].fixup] == 29);
1776 assert(cff->fdarray[i].rewritten_dict->data[cff->fdarray[i].fixup+5] == 29);
1777 put32(&cff->fdarray[i].rewritten_dict->data[cff->fdarray[i].fixup+1], (uint32_t)cff->fdarray[i].rewritten_private->len);
1778 put32(&cff->fdarray[i].rewritten_dict->data[cff->fdarray[i].fixup+6], (uint32_t)offset);
1779 offset += cff->fdarray[i].rewritten_private->len;
1780 if (cff->fdarray[i].local_subset)
1781 {
1782 offset += cff->fdarray[i].local_subset->len;
1783 }
1784 else
1785 {
1786 offset += 2;
1787 }
1788 }
1789
1790 return (uint32_t)offset;
1791 }
1792
1793 static void
1794 update_dicts(fz_context *ctx, cff_t *cff, uint32_t offset)
1795 {
1796 uint8_t *top_dict_data = cff->top_dict_subset->data;
1797 uint32_t top_dict_len = (uint32_t)cff->top_dict_subset->len;
1798
1799 /* Update the offsets */
1800 /* Header
1801 Name Index
1802 Top Dict Index
1803 (Top Dict)
1804 String Index
1805 Global Subr Index
1806 Encodings
1807 Charsets
1808 FDSelect
1809 CharStrings Index
1810 Font DICT Index
1811 (Font Dict)
1812 Private DICT
1813 Local Subr Index
1814 */
1815 offset += 2 + 1 + 2 * offsize_for_offset(top_dict_len+1); /* offset = start of top_dict_index data */
1816 offset += top_dict_len; /* offset = end of top_dict */
1817 if (cff->string_index.index_size)
1818 offset += cff->string_index.index_size;
1819 else
1820 offset += 2;
1821 if (cff->global_subset)
1822 offset += (uint32_t)cff->global_subset->len;
1823 else if (cff->global_index.index_size)
1824 offset += cff->global_index.index_size;
1825 else
1826 offset += 2;
1827 if (cff->top_dict_fixup_offsets.encoding)
1828 {
1829 assert(top_dict_data[cff->top_dict_fixup_offsets.encoding] == 29);
1830 put32(top_dict_data + cff->top_dict_fixup_offsets.encoding+1, offset);
1831 offset += cff->encoding_len;
1832 }
1833 if (cff->top_dict_fixup_offsets.charset)
1834 {
1835 assert(top_dict_data[cff->top_dict_fixup_offsets.charset] == 29);
1836 put32(top_dict_data + cff->top_dict_fixup_offsets.charset+1, offset);
1837 offset += cff->charset_len;
1838 }
1839 if (cff->top_dict_fixup_offsets.fdselect)
1840 {
1841 assert(top_dict_data[cff->top_dict_fixup_offsets.fdselect] == 29);
1842 put32(top_dict_data + cff->top_dict_fixup_offsets.fdselect+1, offset);
1843 offset += cff->fdselect_len;
1844 }
1845 assert(top_dict_data[cff->top_dict_fixup_offsets.charstrings] == 29);
1846 put32(top_dict_data + cff->top_dict_fixup_offsets.charstrings+1, offset);
1847 if (cff->charstrings_subset)
1848 offset += (uint32_t)cff->charstrings_subset->len;
1849 else if (cff->charstrings_index.index_size)
1850 offset += cff->charstrings_index.index_size;
1851 else
1852 offset += 2;
1853 if (cff->top_dict_fixup_offsets.fdarray)
1854 {
1855 assert(top_dict_data[cff->top_dict_fixup_offsets.fdarray] == 29);
1856 put32(top_dict_data + cff->top_dict_fixup_offsets.fdarray+1, offset);
1857 offset = rewrite_fdarray(ctx, cff, offset);
1858 }
1859 if (cff->top_dict_fixup_offsets.privat)
1860 {
1861 assert(top_dict_data[cff->top_dict_fixup_offsets.privat] == 29);
1862 put32(top_dict_data + cff->top_dict_fixup_offsets.privat+1, (uint32_t)cff->private_subset->len);
1863 put32(top_dict_data + cff->top_dict_fixup_offsets.privat+6, offset);
1864 }
1865 }
1866
1867 static void
1868 read_top_dict(fz_context *ctx, cff_t *cff, int idx)
1869 {
1870 dict_iterator di;
1871 dict_operator k;
1872 uint32_t top_dict_offset = index_get(ctx, &cff->top_dict_index, idx);
1873 uint32_t top_dict_end = index_get(ctx, &cff->top_dict_index, idx+1);
1874
1875 for (k = dict_init(ctx, &di, cff->base, cff->len, top_dict_offset, top_dict_end); dict_more(&di); k = dict_next(ctx, &di))
1876 {
1877 switch (k)
1878 {
1879 case DICT_OP_ROS:
1880 cff->is_cidfont = 1;
1881 break;
1882 case DICT_OP_charset:
1883 cff->charset_offset = dict_arg_int(ctx, &di, 0);
1884 break;
1885 case DICT_OP_Encoding:
1886 cff->encoding_offset = dict_arg_int(ctx, &di, 0);
1887 break;
1888 case DICT_OP_CharstringType:
1889 cff->charstring_type = 1;
1890 break;
1891 case DICT_OP_CharStrings:
1892 cff->charstrings_index_offset = dict_arg_int(ctx, &di, 0);
1893 break;
1894 case DICT_OP_Private:
1895 cff->private_len = dict_arg_int(ctx, &di, 0);
1896 cff->private_offset = dict_arg_int(ctx, &di, 1);
1897 break;
1898 case DICT_OP_FDSelect:
1899 cff->fdselect_offset = dict_arg_int(ctx, &di, 0);
1900 break;
1901 case DICT_OP_FDArray:
1902 cff->fdarray_index_offset = dict_arg_int(ctx, &di, 0);
1903 break;
1904 default:
1905 break;
1906 }
1907 }
1908
1909 for (k = dict_init(ctx, &di, cff->base, cff->len, cff->private_offset, cff->private_offset + cff->private_len); dict_more(&di); k = dict_next(ctx, &di))
1910 {
1911 switch (k)
1912 {
1913 case DICT_OP_Subrs:
1914 cff->local_index_offset = dict_arg_int(ctx, &di, 0) + cff->private_offset;
1915 break;
1916 default:
1917 break;
1918 }
1919 }
1920 }
1921
1922 static void
1923 make_new_top_dict(fz_context *ctx, cff_t *cff)
1924 {
1925 dict_iterator di;
1926 dict_operator k;
1927 uint32_t top_dict_offset = index_get(ctx, &cff->top_dict_index, 0);
1928 uint32_t top_dict_end = index_get(ctx, &cff->top_dict_index, 1);
1929 fz_output *out = NULL;
1930
1931 cff->top_dict_subset = fz_new_buffer(ctx, 1024);
1932
1933 fz_var(out);
1934
1935 fz_try(ctx)
1936 {
1937 out = fz_new_output_with_buffer(ctx, cff->top_dict_subset);
1938
1939 for (k = dict_init(ctx, &di, cff->base, cff->len, top_dict_offset, top_dict_end); dict_more(&di); k = dict_next(ctx, &di))
1940 {
1941 switch (k)
1942 {
1943 case DICT_OP_charset:
1944 if (cff->charset_offset < 2)
1945 di.arg[0].u.i = cff->charset_offset;
1946 else
1947 {
1948 di.arg[0].u.i = 0x80000000;
1949 cff->top_dict_fixup_offsets.charset = fz_tell_output(ctx, out);
1950 }
1951 break;
1952 case DICT_OP_Encoding:
1953 if (cff->encoding_offset < 2)
1954 di.arg[0].u.i = cff->encoding_offset;
1955 else
1956 {
1957 di.arg[0].u.i = 0x80000000;
1958 cff->top_dict_fixup_offsets.encoding = fz_tell_output(ctx, out);
1959 }
1960 break;
1961 case DICT_OP_CharStrings:
1962 di.arg[0].u.i = 0x80000000;
1963 cff->top_dict_fixup_offsets.charstrings = fz_tell_output(ctx, out);
1964 break;
1965 case DICT_OP_Private:
1966 di.arg[0].u.i = 0x80000000;
1967 di.arg[1].u.i = 0x80000000;
1968 cff->top_dict_fixup_offsets.privat = fz_tell_output(ctx, out);
1969 break;
1970 case DICT_OP_FDSelect:
1971 di.arg[0].u.i = 0x80000000;
1972 cff->top_dict_fixup_offsets.fdselect = fz_tell_output(ctx, out);
1973 break;
1974 case DICT_OP_FDArray:
1975 di.arg[0].u.i = 0x80000000;
1976 cff->top_dict_fixup_offsets.fdarray = fz_tell_output(ctx, out);
1977 break;
1978 default:
1979 break;
1980 }
1981 dict_write_args(ctx, out, &di);
1982 }
1983
1984 fz_close_output(ctx, out);
1985 }
1986 fz_always(ctx)
1987 fz_drop_output(ctx, out);
1988 fz_catch(ctx)
1989 fz_rethrow(ctx);
1990 }
1991
1992 static void
1993 make_new_private_dict(fz_context *ctx, cff_t *cff)
1994 {
1995 dict_iterator di;
1996 dict_operator k;
1997 fz_output *out = NULL;
1998 int64_t len;
1999
2000 cff->private_subset = fz_new_buffer(ctx, 1024);
2001
2002 fz_var(out);
2003
2004 fz_try(ctx)
2005 {
2006 int subrs = 0;
2007 out = fz_new_output_with_buffer(ctx, cff->private_subset);
2008
2009 for (k = dict_init(ctx, &di, cff->base, cff->len, cff->private_offset, cff->private_offset + cff->private_len); dict_more(&di); k = dict_next(ctx, &di))
2010 {
2011 switch (k)
2012 {
2013 case DICT_OP_Subrs:
2014 subrs = 1;
2015 break;
2016 default:
2017 dict_write_args(ctx, out, &di);
2018 }
2019 }
2020
2021 if (subrs != 0)
2022 {
2023 /* Everything is in the DICT except for the local subr offset. Insert
2024 * that now. This is tricky, because what is the offset? It depends on
2025 * the size of the dict we are creating now, and the size of the dict
2026 * we are creating now depends on the size of the offset! */
2027 /* Length so far */
2028 len = fz_tell_output(ctx, out);
2029 /* We have to encode an offset, plus the Subrs token (19). Offset
2030 * can take up to 5 bytes. */
2031 if (len+2 < 107)
2032 {
2033 /* We can code it with a single byte encoding */
2034 len += 2;
2035 fz_write_byte(ctx, out, len + 139);
2036 }
2037 else if (len+3 < 1131)
2038 {
2039 /* We can code it with a 2 byte encoding */
2040 /* (b0-247) * 256 + b1 + 108 == len+3 */
2041 len = len+3 - 108;
2042 fz_write_byte(ctx, out, (len>>8) + 247);
2043 fz_write_byte(ctx, out, len);
2044 }
2045 else if (len+4 < 32767)
2046 {
2047 /* We can code it with a 3 byte encoding */
2048 len += 4;
2049 fz_write_byte(ctx, out, 28);
2050 fz_write_byte(ctx, out, len>>8);
2051 fz_write_byte(ctx, out, len);
2052 }
2053 else
2054 {
2055 /* We can code it with a 5 byte encoding */
2056 len += 5;
2057 fz_write_byte(ctx, out, 29);
2058 fz_write_byte(ctx, out, len>>24);
2059 fz_write_byte(ctx, out, len>>16);
2060 fz_write_byte(ctx, out, len>>8);
2061 fz_write_byte(ctx, out, len);
2062 }
2063 fz_write_byte(ctx, out, DICT_OP_Subrs);
2064 }
2065
2066 fz_close_output(ctx, out);
2067 }
2068 fz_always(ctx)
2069 fz_drop_output(ctx, out);
2070 fz_catch(ctx)
2071 fz_rethrow(ctx);
2072 }
2073
2074 static void
2075 read_fdarray_and_privates(fz_context *ctx, cff_t *cff)
2076 {
2077 dict_iterator di;
2078 dict_operator k;
2079 uint16_t i;
2080 uint16_t n = cff->fdarray_index.count;
2081 int subrs;
2082 int64_t len;
2083
2084 cff->fdarray = fz_calloc(ctx, n, sizeof(*cff->fdarray));
2085
2086 for (i = 0; i < n; i++)
2087 {
2088 uint32_t offset = index_get(ctx, &cff->fdarray_index, i);
2089 uint32_t end = index_get(ctx, &cff->fdarray_index, i+1);
2090 fz_output *out = NULL;
2091
2092 cff->fdarray[i].rewritten_dict = fz_new_buffer(ctx, 1024);
2093
2094 fz_var(out);
2095
2096 fz_try(ctx)
2097 {
2098 out = fz_new_output_with_buffer(ctx, cff->fdarray[i].rewritten_dict);
2099
2100 for (k = dict_init(ctx, &di, cff->base, cff->len, offset, end); dict_more(&di); k = dict_next(ctx, &di))
2101 {
2102 switch (k)
2103 {
2104 case DICT_OP_Private:
2105 cff->fdarray[i].len = di.arg[0].u.i;
2106 cff->fdarray[i].offset = di.arg[1].u.i;
2107 di.arg[0].u.i = 0x80000000;
2108 di.arg[1].u.i = 0x80000000;
2109 cff->fdarray[i].fixup = fz_tell_output(ctx, out);
2110 break;
2111 default:
2112 break;
2113 }
2114 dict_write_args(ctx, out, &di);
2115 }
2116
2117 fz_close_output(ctx, out);
2118 }
2119 fz_always(ctx)
2120 fz_drop_output(ctx, out);
2121 fz_catch(ctx)
2122 fz_rethrow(ctx);
2123
2124
2125 offset = cff->fdarray[i].offset;
2126 end = cff->fdarray[i].offset + cff->fdarray[i].len;
2127
2128 fz_try(ctx)
2129 {
2130 cff->fdarray[i].rewritten_private = fz_new_buffer(ctx, 1024);
2131
2132 out = fz_new_output_with_buffer(ctx, cff->fdarray[i].rewritten_private);
2133 cff->fdarray[i].local_index_offset = 0;
2134
2135 subrs = 0;
2136
2137 for (k = dict_init(ctx, &di, cff->base, cff->len, offset, end); dict_more(&di); k = dict_next(ctx, &di))
2138 {
2139 switch (k)
2140 {
2141 case DICT_OP_Subrs:
2142 subrs = 1;
2143 cff->fdarray[i].local_index_offset = dict_arg_int(ctx, &di, 0) + offset;
2144 break;
2145 default:
2146 dict_write_args(ctx, out, &di);
2147 break;
2148 }
2149 }
2150
2151 if (subrs != 0)
2152 {
2153 /* Everything is in the DICT except for the local subr offset. Insert
2154 * that now. This is tricky, because what is the offset? It depends on
2155 * the size of he dict we are creating now, and the size of the dict
2156 * we are creating now depends on the size of the offset! */
2157 /* Length so far */
2158 len = fz_tell_output(ctx, out);
2159 /* We have to encode an offset, plus the Subrs token (19). Offset
2160 * can take up to 5 bytes. */
2161 if (len+2 < 107)
2162 {
2163 /* We can code it with a single byte encoding */
2164 len += 2;
2165 fz_write_byte(ctx, out, len + 139);
2166 }
2167 else if (len+3 < 1131)
2168 {
2169 /* We can code it with a 2 byte encoding */
2170 /* (b0-247) * 256 + b1 + 108 == len+3 */
2171 len = len+3 - 108;
2172 fz_write_byte(ctx, out, (len>>8) + 247);
2173 fz_write_byte(ctx, out, len);
2174 }
2175 else if (len+4 < 32767)
2176 {
2177 /* We can code it with a 3 byte encoding */
2178 len += 4;
2179 fz_write_byte(ctx, out, 28);
2180 fz_write_byte(ctx, out, len>>8);
2181 fz_write_byte(ctx, out, len);
2182 }
2183 else
2184 {
2185 /* We can code it with a 5 byte encoding */
2186 len += 5;
2187 fz_write_byte(ctx, out, 29);
2188 fz_write_byte(ctx, out, len>>24);
2189 fz_write_byte(ctx, out, len>>16);
2190 fz_write_byte(ctx, out, len>>8);
2191 fz_write_byte(ctx, out, len);
2192 }
2193 fz_write_byte(ctx, out, DICT_OP_Subrs);
2194 }
2195
2196 fz_close_output(ctx, out);
2197 }
2198 fz_always(ctx)
2199 fz_drop_output(ctx, out);
2200 fz_catch(ctx)
2201 fz_rethrow(ctx);
2202
2203 if (cff->fdarray[i].local_index_offset != 0)
2204 {
2205 index_load(ctx, &cff->fdarray[i].local_index, cff->base, (uint32_t)cff->len, cff->fdarray[i].local_index_offset);
2206 cff->fdarray[i].subr_bias = subr_bias(ctx, cff, cff->fdarray[i].local_index.count);
2207 }
2208 }
2209 }
2210
2211 static void
2212 output_fdarray(fz_context *ctx, fz_output *out, cff_t *cff)
2213 {
2214 uint16_t i;
2215 uint16_t n = cff->fdarray_index.count;
2216 uint8_t os;
2217 uint32_t offset = 1;
2218 uint32_t len = 0;
2219
2220 for (i = 0; i < n; i++)
2221 {
2222 len += (uint32_t)cff->fdarray[i].rewritten_dict->len;
2223 }
2224 os = offsize_for_offset(len+1);
2225
2226 fz_write_uint16_be(ctx, out, cff->fdarray_index.count); /* Count */
2227 fz_write_byte(ctx, out, os); /* offsize */
2228
2229 /* First we write out the offsets of the rewritten dicts. */
2230 for (i = 0; i < n; i++)
2231 {
2232 write_offset(ctx, out, os, offset);
2233 offset += (uint32_t)cff->fdarray[i].rewritten_dict->len;
2234 }
2235 write_offset(ctx, out, os, offset);
2236
2237 /* Now write the dicts themselves. */
2238 for (i = 0; i < n; i++)
2239 {
2240 fz_write_data(ctx, out, cff->fdarray[i].rewritten_dict->data, cff->fdarray[i].rewritten_dict->len);
2241 }
2242
2243 /* Now we can write out the private dicts, unchanged from the original file. */
2244 for (i = 0; i < n; i++)
2245 {
2246 fz_write_data(ctx, out, cff->fdarray[i].rewritten_private->data, cff->fdarray[i].rewritten_private->len);
2247 if (cff->fdarray[i].local_subset)
2248 fz_write_data(ctx, out, cff->fdarray[i].local_subset->data, cff->fdarray[i].local_subset->len);
2249 else
2250 fz_write_uint16_be(ctx, out, 0);
2251 }
2252 }
2253
2254 /* Nasty O(n^2) thing. */
2255 static uint16_t
2256 cid_to_gid(fz_context *ctx, cff_t *cff, uint16_t cid)
2257 {
2258 uint32_t n = cff->charstrings_index.count;
2259 uint32_t i;
2260
2261 for (i = 0; i < n; i++)
2262 {
2263 if (cff->gid_to_cid[i] == cid)
2264 return i;
2265 }
2266 return 0;
2267 }
2268
2269
2270 fz_buffer *
2271 fz_subset_cff_for_gids(fz_context *ctx, fz_buffer *orig, int *gids, int num_gids, int symbolic, int is_pdf_cidfont)
2272 {
2273 cff_t cff = { 0 };
2274 fz_buffer *newbuf = NULL;
2275 uint8_t *base;
2276 size_t len;
2277 fz_output *out = NULL;
2278 int i;
2279 uint16_t n, k;
2280
2281 fz_var(newbuf);
2282 fz_var(out);
2283
2284 if (orig == NULL)
2285 return NULL;
2286
2287 base = orig->data;
2288 len = orig->len;
2289
2290 fz_try(ctx)
2291 {
2292 cff.base = base;
2293 cff.len = len;
2294
2295 cff.symbolic = symbolic;
2296
2297 if (len < 4)
2298 fz_throw(ctx, FZ_ERROR_FORMAT, "Truncated CFF");
2299
2300 cff.major = base[0];
2301 cff.minor = base[1];
2302 cff.headersize = base[2];
2303 cff.offsize = base[3];
2304
2305 if (cff.offsize > 4)
2306 fz_throw(ctx, FZ_ERROR_FORMAT, "Invalid offsize in CFF");
2307
2308 if (len > UINT32_MAX)
2309 fz_throw(ctx, FZ_ERROR_FORMAT, "CFF too large");
2310
2311 /* First, the name index */
2312 cff.top_dict_index_offset = index_load(ctx, &cff.name_index, base, (uint32_t)len, cff.headersize);
2313
2314 /* Next, the top dict index */
2315 cff.string_index_offset = index_load(ctx, &cff.top_dict_index, base, (uint32_t)len, cff.top_dict_index_offset);
2316
2317 /* Next, the string index */
2318 cff.global_index_offset = index_load(ctx, &cff.string_index, base, (uint32_t)len, cff.string_index_offset);
2319
2320 /* Next the Global subr index */
2321 index_load(ctx, &cff.global_index, base, (uint32_t)len, cff.global_index_offset);
2322
2323 /* Default value, possibly updated by top dict entries */
2324 cff.charstring_type = 2;
2325
2326 /* CFF files can contain several fonts, but we only want the first one. */
2327 read_top_dict(ctx, &cff, 0);
2328
2329 cff.gsubr_bias = subr_bias(ctx, &cff, cff.global_index.count);
2330
2331 if (cff.charstrings_index_offset == 0)
2332 fz_throw(ctx, FZ_ERROR_FORMAT, "Missing charstrings table");
2333
2334 index_load(ctx, &cff.charstrings_index, base, (uint32_t)len, cff.charstrings_index_offset);
2335 index_load(ctx, &cff.local_index, base, (uint32_t)len, cff.local_index_offset);
2336 cff.subr_bias = subr_bias(ctx, &cff, cff.local_index.count);
2337 index_load(ctx, &cff.fdarray_index, base, (uint32_t)len, cff.fdarray_index_offset);
2338
2339 get_encoding_len(ctx, &cff);
2340 get_charset_len(ctx, &cff);
2341
2342 if (is_pdf_cidfont && cff.is_cidfont)
2343 {
2344 read_fdselect(ctx, &cff);
2345 read_fdarray_and_privates(ctx, &cff);
2346 }
2347
2348 /* Move our list of gids into our own storage. */
2349 if (is_pdf_cidfont && cff.is_cidfont)
2350 {
2351 /* For CIDFontType0 FontDescriptor with a CFF that uses CIDFont operators,
2352 * we are given CIDs here, not GIDs. Accordingly
2353 * we need to look them up in the CharSet.
2354 */
2355 load_charset_for_cidfont(ctx, &cff);
2356 for (i = 0; i < num_gids; i++)
2357 usage_list_add(ctx, &cff.gids_to_keep, cid_to_gid(ctx, &cff, gids[i]));
2358 }
2359 else
2360 {
2361 /* For CIDFontType0 FontDescriptor with a CFF that DOES NOT use CIDFont operators,
2362 * and for Type1 FontDescriptors, we are given GIDs directly.
2363 */
2364 for (i = 0; i < num_gids; i++)
2365 usage_list_add(ctx, &cff.gids_to_keep, gids[i]);
2366 }
2367
2368 /* Scan charstrings. */
2369 scan_charstrings(ctx, &cff, is_pdf_cidfont);
2370
2371 /* Now subset the data. */
2372 subset_charstrings(ctx, &cff);
2373 if (is_pdf_cidfont && cff.is_cidfont)
2374 subset_fdarray_locals(ctx, &cff);
2375 subset_locals(ctx, &cff);
2376 subset_globals(ctx, &cff);
2377
2378 /* FIXME: cull the strings? */
2379
2380 /* Now, rewrite the font.
2381
2382 There are various sections for this, as follows:
2383
2384 SECTION CIDFonts Dict
2385 (Subsection) only Contains
2386 absolute
2387 offsets?
2388 Header
2389 Name Index
2390 Top Dict Index
2391 (Top Dict) Y
2392 String Index
2393 Global Subr Index
2394 Encodings
2395 Charsets
2396 FDSelect Y
2397 CharStrings Index
2398 Font DICT Index Y
2399 (Font Dict) N
2400 Private DICT N
2401 Local Subr Index
2402
2403 The size of global offsets varies according to how large the file is,
2404 therefore we need to take care.
2405
2406 The 'suffix' of sections from String Index onwards are independent of
2407 this global offset size, so we finalise those sections first.
2408
2409 We can then use this size to inform our choice of offset size for the
2410 top dictionary.
2411
2412 So, layout the sections from the end backwards.
2413 */
2414
2415 /* Local Subr Index */
2416 /* Private DICT */
2417 make_new_private_dict(ctx, &cff);
2418 /* Font DICT - CIDFont only */
2419 /* Charstrings - already done */
2420 /* FDSelect - CIDFont only */
2421 /* Charsets - unchanged */
2422 /* Encoding - unchanged */
2423 /* Globals */
2424 /* Strings - unchanged */
2425 make_new_top_dict(ctx, &cff);
2426
2427 newbuf = fz_new_buffer(ctx, 1024);
2428 out = fz_new_output_with_buffer(ctx, newbuf);
2429
2430 /* Copy header */
2431 fz_write_byte(ctx, out, cff.major);
2432 fz_write_byte(ctx, out, cff.minor);
2433 fz_write_byte(ctx, out, 4);
2434 fz_write_byte(ctx, out, cff.offsize);
2435
2436 output_name_index(ctx, &cff, out);
2437 update_dicts(ctx, &cff, fz_tell_output(ctx, out));
2438 output_top_dict_index(ctx, &cff, out);
2439
2440 /* Copy strings index */
2441 if (cff.string_index.index_size)
2442 fz_write_data(ctx, out, base + cff.string_index.index_offset, cff.string_index.index_size);
2443 else
2444 fz_write_uint16_be(ctx, out, 0);
2445 /* Copy globals index (if there is one) */
2446 if (cff.global_subset)
2447 fz_write_data(ctx, out, cff.global_subset->data, cff.global_subset->len);
2448 else if (cff.global_index.index_size)
2449 fz_write_data(ctx, out, base + cff.global_index.index_offset, cff.global_index.index_size);
2450 else
2451 fz_write_uint16_be(ctx, out, 0);
2452 /* Copy encoding */
2453 if (cff.encoding_offset > 2)
2454 fz_write_data(ctx, out, base + cff.encoding_offset, cff.encoding_len);
2455 /* Copy charset */
2456 if (cff.charset_offset > 2)
2457 fz_write_data(ctx, out, base + cff.charset_offset, cff.charset_len);
2458 if (cff.fdselect_offset)
2459 fz_write_data(ctx, out, base + cff.fdselect_offset, cff.fdselect_len);
2460 /* Copy charstrings */
2461 if (cff.charstrings_subset)
2462 fz_write_data(ctx, out, cff.charstrings_subset->data, cff.charstrings_subset->len);
2463 else if (cff.charstrings_index.index_size)
2464 fz_write_data(ctx, out, base + cff.charstrings_index.index_offset, cff.charstrings_index.index_size);
2465 else
2466 fz_write_uint16_be(ctx, out, 0);
2467 if (cff.fdarray)
2468 output_fdarray(ctx, out, &cff);
2469 /* Copy Private dict */
2470 fz_write_data(ctx, out, cff.private_subset->data, cff.private_subset->len);
2471 /* Copy the local table - subsetted if there is one, original if not, or maybe none! */
2472 if (cff.local_subset)
2473 fz_write_data(ctx, out, cff.local_subset->data, cff.local_subset->len);
2474 else if (cff.local_index.index_size)
2475 fz_write_data(ctx, out, base + cff.local_index.index_offset, cff.local_index.index_size);
2476
2477 fz_close_output(ctx, out);
2478 }
2479 fz_always(ctx)
2480 {
2481 fz_drop_output(ctx, out);
2482 fz_drop_buffer(ctx, cff.private_subset);
2483 fz_drop_buffer(ctx, cff.charstrings_subset);
2484 fz_drop_buffer(ctx, cff.top_dict_subset);
2485 fz_drop_buffer(ctx, cff.local_subset);
2486 fz_drop_buffer(ctx, cff.global_subset);
2487 fz_free(ctx, cff.gid_to_cid);
2488 fz_free(ctx, cff.gid_to_font);
2489 drop_usage_list(ctx, &cff.local_usage);
2490 drop_usage_list(ctx, &cff.global_usage);
2491 drop_usage_list(ctx, &cff.gids_to_keep);
2492 drop_usage_list(ctx, &cff.extra_gids_to_keep);
2493 if (cff.fdarray)
2494 {
2495 n = cff.fdarray_index.count;
2496 for (k = 0; k < n; k++)
2497 {
2498 fz_drop_buffer(ctx, cff.fdarray[k].rewritten_dict);
2499 fz_drop_buffer(ctx, cff.fdarray[k].rewritten_private);
2500 fz_drop_buffer(ctx, cff.fdarray[k].local_subset);
2501 drop_usage_list(ctx, &cff.fdarray[k].local_usage);
2502 }
2503 fz_free(ctx, cff.fdarray);
2504 }
2505 fz_free(ctx, cff.unpacked_charset);
2506 }
2507 fz_catch(ctx)
2508 {
2509 fz_drop_buffer(ctx, newbuf);
2510 fz_rethrow(ctx);
2511 }
2512
2513 return newbuf;
2514 }