comparison mupdf-source/source/pdf/pdf-subset.c @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 // Copyright (C) 2004-2025 Artifex Software, Inc.
2 //
3 // This file is part of MuPDF.
4 //
5 // MuPDF is free software: you can redistribute it and/or modify it under the
6 // terms of the GNU Affero General Public License as published by the Free
7 // Software Foundation, either version 3 of the License, or (at your option)
8 // any later version.
9 //
10 // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
11 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
13 // details.
14 //
15 // You should have received a copy of the GNU Affero General Public License
16 // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
17 //
18 // Alternative licensing terms are available from the licensor.
19 // For commercial licensing, see <https://www.artifex.com/> or contact
20 // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
21 // CA 94129, USA, for further information.
22
23
24 #include "mupdf/fitz.h"
25 #include "mupdf/pdf.h"
26
27 /* Define the following for some debugging output. */
28 #undef DEBUG_SUBSETTING
29
30 typedef struct gstate
31 {
32 struct gstate *next;
33 int current_font;
34 pdf_font_desc *font;
35 } gstate;
36
37 typedef struct resources_stack
38 {
39 struct resources_stack *next;
40 pdf_obj *res;
41 } resources_stack;
42
43 typedef struct
44 {
45 int num;
46 int gen;
47 int is_ttf;
48 int is_cidfont;
49 pdf_obj *fontfile;
50 unsigned char digest[16];
51
52 fz_int_heap gids;
53 fz_int_heap cids;
54
55 /* Pointers back to the top level fonts that refer to this. */
56 int max;
57 int len;
58 pdf_obj **font;
59 } font_usage_t;
60
61 typedef struct
62 {
63 int max;
64 int len;
65 font_usage_t *font;
66 } fonts_usage_t;
67
68 typedef struct
69 {
70 pdf_processor super;
71 resources_stack *rstack;
72 fonts_usage_t *usage;
73 gstate *gs;
74 } pdf_font_analysis_processor;
75
76 static void
77 pop_gstate(fz_context *ctx, pdf_font_analysis_processor *p)
78 {
79 gstate *gs = p->gs;
80 gstate *old;
81
82 if (gs == NULL)
83 return;
84
85 old = gs->next;
86 pdf_drop_font(ctx, gs->font);
87 fz_free(ctx, gs);
88 p->gs = old;
89 }
90
91 static void
92 drop_processor(fz_context *ctx, pdf_processor *proc)
93 {
94 pdf_font_analysis_processor *p = (pdf_font_analysis_processor*)proc;
95
96 while (p->rstack)
97 {
98 resources_stack *stk = p->rstack;
99 p->rstack = stk->next;
100 pdf_drop_obj(ctx, stk->res);
101 fz_free(ctx, stk);
102 }
103
104 while (p->gs)
105 pop_gstate(ctx, p);
106 }
107
108 static void
109 push_resources(fz_context *ctx, pdf_processor *proc, pdf_obj *res)
110 {
111 pdf_font_analysis_processor *p = (pdf_font_analysis_processor *)proc;
112 resources_stack *stk = fz_malloc_struct(ctx, resources_stack);
113
114 stk->next = p->rstack;
115 p->rstack = stk;
116 fz_try(ctx)
117 {
118 stk->res = pdf_keep_obj(ctx, res);
119 }
120 fz_catch(ctx)
121 {
122 pdf_drop_obj(ctx, stk->res);
123 p->rstack = stk->next;
124 fz_free(ctx, stk);
125 fz_rethrow(ctx);
126 }
127 }
128
129 static pdf_obj *
130 pop_resources(fz_context *ctx, pdf_processor *proc)
131 {
132 pdf_font_analysis_processor *p = (pdf_font_analysis_processor *)proc;
133 resources_stack *stk = p->rstack;
134 pdf_obj *res = p->rstack->res;
135
136 p->rstack = stk->next;
137 fz_free(ctx, stk);
138
139 return res;
140 }
141
142 static void
143 font_analysis_Q(fz_context *ctx, pdf_processor *proc)
144 {
145 pdf_font_analysis_processor *p = (pdf_font_analysis_processor*)proc;
146
147 pop_gstate(ctx, p);
148 }
149
150 static void
151 font_analysis_q(fz_context *ctx, pdf_processor *proc)
152 {
153 pdf_font_analysis_processor *p = (pdf_font_analysis_processor*)proc;
154 gstate *gs = p->gs;
155 gstate *new_gs = fz_malloc_struct(ctx, gstate);
156 p->gs = new_gs;
157
158 if (gs)
159 {
160 *new_gs = *gs;
161 new_gs->next = gs;
162 }
163
164 pdf_keep_font(ctx, new_gs->font);
165
166 }
167
168 static void
169 font_analysis_Tf(fz_context *ctx, pdf_processor *proc, const char *name, pdf_font_desc *font, float size)
170 {
171 pdf_font_analysis_processor *p = (pdf_font_analysis_processor*)proc;
172 pdf_obj *dict = pdf_dict_gets(ctx, pdf_dict_get(ctx, p->rstack->res, PDF_NAME(Font)), name);
173 pdf_obj *subtype, *fontdesc;
174 pdf_obj *fontfile = NULL;
175 pdf_obj *key;
176 int num, gen, i;
177 int is_cidfont = 0;
178 int is_ttf = 0;
179 unsigned char digest[16];
180
181 p->gs->current_font = -1; /* unknown font! */
182
183 if (dict == NULL)
184 return;
185
186 /* We can have multiple fonts that rely on the same underlying fontfile
187 * object. Therefore, resolve down to that. */
188 subtype = pdf_dict_get(ctx, dict, PDF_NAME(Subtype));
189
190 if (subtype == PDF_NAME(Type1) || subtype == PDF_NAME(MMType1))
191 {
192 // fontfile subtype should be Type1C for us to be able to subset it
193 key = PDF_NAME(FontFile);
194 fontdesc = pdf_dict_get(ctx, dict, PDF_NAME(FontDescriptor));
195 fontfile = pdf_dict_get(ctx, fontdesc, PDF_NAME(FontFile));
196 is_cidfont = 0;
197 is_ttf = 0;
198 }
199 else if (subtype == PDF_NAME(TrueType))
200 {
201 key = PDF_NAME(FontFile2);
202 fontdesc = pdf_dict_get(ctx, dict, PDF_NAME(FontDescriptor));
203 fontfile = pdf_dict_get(ctx, fontdesc, PDF_NAME(FontFile2));
204 is_cidfont = 0;
205 is_ttf = 1;
206 }
207 else if (pdf_name_eq(ctx, subtype, PDF_NAME(Type0)))
208 {
209 dict = pdf_array_get(ctx, pdf_dict_get(ctx, dict, PDF_NAME(DescendantFonts)), 0);
210 subtype = pdf_dict_get(ctx, dict, PDF_NAME(Subtype));
211 fontdesc = pdf_dict_get(ctx, dict, PDF_NAME(FontDescriptor));
212 if (subtype == PDF_NAME(CIDFontType0))
213 {
214 // fontfile subtype is either CIDFontType0C or OpenType
215 key = PDF_NAME(FontFile3);
216 fontfile = pdf_dict_get(ctx, fontdesc, PDF_NAME(FontFile3));
217 subtype = pdf_dict_get(ctx, fontfile, PDF_NAME(Subtype));
218 if (subtype == PDF_NAME(CIDFontType0C))
219 {
220 is_cidfont = 1;
221 is_ttf = 0;
222 }
223 else if (subtype == PDF_NAME(OpenType))
224 {
225 is_cidfont = 1;
226 is_ttf = 1;
227 }
228 else
229 {
230 fontfile = NULL;
231 }
232 }
233 else if (subtype == PDF_NAME(CIDFontType2))
234 {
235 key = PDF_NAME(FontFile2);
236 fontfile = pdf_dict_get(ctx, fontdesc, PDF_NAME(FontFile2));
237 is_cidfont = 1;
238 is_ttf = 1;
239 }
240 }
241
242 if (!fontfile)
243 {
244 #ifdef DEBUG_SUBSETTING
245 fz_write_printf(ctx, fz_stddbg(ctx), "No embedded file found for font of subtype %s\n", pdf_to_name(ctx, subtype));
246 #endif
247 return;
248 }
249
250 num = pdf_to_num(ctx, fontfile);
251 gen = pdf_to_gen(ctx, fontfile);
252
253 for (i = 0; i < p->usage->len; i++)
254 {
255 if (p->usage->font[i].num == num &&
256 p->usage->font[i].gen == gen)
257 break;
258 }
259
260 fz_font_digest(ctx, font->font, digest);
261
262 /* Check for duplicate fonts. (Fonts in the document that have
263 * the font stream included multiple times as different objects).
264 * This can happen with naive insertion routines. */
265 if (i == p->usage->len)
266 {
267 for (i = 0; i < p->usage->len; i++)
268 {
269 if (memcmp(digest, p->usage->font[i].digest, 16) == 0)
270 {
271 pdf_dict_put(ctx, fontdesc, key, p->usage->font[i].fontfile);
272 break;
273 }
274 }
275 }
276
277 pdf_drop_font(ctx, p->gs->font);
278 p->gs->font = pdf_keep_font(ctx, font);
279 p->gs->current_font = i;
280 if (i < p->usage->len)
281 {
282 int j;
283
284 for (j = 0; j < p->usage->font[i].len; j++)
285 {
286 if (pdf_objcmp(ctx, p->usage->font[i].font[j], dict) == 0)
287 return;
288 }
289
290 if (p->usage->font[i].len == p->usage->font[i].max)
291 {
292 int newmax = p->usage->font[i].max * 2;
293 p->usage->font[i].font = fz_realloc(ctx, p->usage->font[i].font, sizeof(*p->usage->font[i].font) * newmax);
294 p->usage->font[i].max = newmax;
295 }
296 p->usage->font[i].font[j] = pdf_keep_obj(ctx, dict);
297 p->usage->font[i].len++;
298
299 return;
300 }
301
302 if (p->usage->max == p->usage->len)
303 {
304 int n = p->usage->max * 2;
305
306 if (n == 0)
307 n = 32;
308 p->usage->font = (font_usage_t *)fz_realloc(ctx, p->usage->font, sizeof(*p->usage->font) * n);
309 p->usage->max = n;
310 }
311
312 p->usage->font[i].is_ttf = is_ttf;
313 p->usage->font[i].is_cidfont = is_cidfont;
314 p->usage->font[i].fontfile = pdf_keep_obj(ctx, fontfile);
315 p->usage->font[i].num = num;
316 p->usage->font[i].gen = gen;
317 p->usage->font[i].cids.len = 0;
318 p->usage->font[i].cids.max = 0;
319 p->usage->font[i].cids.heap = NULL;
320 p->usage->font[i].gids.len = 0;
321 p->usage->font[i].gids.max = 0;
322 p->usage->font[i].gids.heap = NULL;
323 p->usage->font[i].len = 0;
324 p->usage->font[i].max = 0;
325 p->usage->font[i].font = NULL;
326 memcpy(p->usage->font[i].digest, digest, 16);
327 p->usage->len++;
328
329 p->usage->font[i].font = fz_malloc(ctx, sizeof(*p->usage->font[i].font) * 4);
330 p->usage->font[i].len = 1;
331 p->usage->font[i].max = 4;
332 p->usage->font[i].font[0] = pdf_keep_obj(ctx, dict);
333 }
334
335 static void
336 show_char(fz_context *ctx, font_usage_t *font, int cid, int gid)
337 {
338 fz_int_heap_insert(ctx, &font->cids, cid);
339 fz_int_heap_insert(ctx, &font->gids, gid);
340 }
341
342 static void
343 show_string(fz_context *ctx, pdf_font_analysis_processor *p, unsigned char *buf, size_t len)
344 {
345 gstate *gs = p->gs;
346 pdf_font_desc *fontdesc = gs->font;
347 size_t pos = 0;
348 font_usage_t *font;
349
350 // Not an embedded font!
351 if (gs->current_font < 0 || fontdesc == NULL)
352 return;
353
354 font = &p->usage->font[gs->current_font];
355
356 while (pos < len)
357 {
358 unsigned int cpt;
359 int inc = pdf_decode_cmap(fontdesc->encoding, &buf[pos], &buf[len], &cpt);
360
361 int cid = pdf_lookup_cmap(fontdesc->encoding, cpt);
362 if (cid >= 0)
363 {
364 int gid = pdf_font_cid_to_gid(ctx, fontdesc, cid);
365 show_char(ctx, font, cid, gid);
366 }
367
368 pos += inc;
369 }
370 }
371
372 static void
373 show_text(fz_context *ctx, pdf_font_analysis_processor *p, pdf_obj *text)
374 {
375 gstate *gs = p->gs;
376 pdf_font_desc *fontdesc;
377 int i, n;
378
379 if (!gs)
380 return;
381 fontdesc = gs->font;
382 if (!fontdesc)
383 return;
384
385 if (pdf_is_string(ctx, text))
386 {
387 show_string(ctx, p, (unsigned char *)pdf_to_str_buf(ctx, text), pdf_to_str_len(ctx, text));
388 }
389 else if (pdf_is_array(ctx, text))
390 {
391 n = pdf_array_len(ctx, text);
392 for (i = 0; i < n; i++)
393 {
394 pdf_obj *item = pdf_array_get(ctx, text, i);
395 if (pdf_is_string(ctx, item))
396 {
397 show_string(ctx, p, (unsigned char *)pdf_to_str_buf(ctx, item), pdf_to_str_len(ctx, item));
398 }
399 }
400 }
401 }
402
403 static void
404 font_analysis_TJ(fz_context *ctx, pdf_processor *proc, pdf_obj *array)
405 {
406 pdf_font_analysis_processor *p = (pdf_font_analysis_processor*)proc;
407
408 show_text(ctx, p, array);
409 }
410
411 static void
412 font_analysis_Tj(fz_context *ctx, pdf_processor *proc, char *str, size_t len)
413 {
414 pdf_font_analysis_processor *p = (pdf_font_analysis_processor*)proc;
415
416 show_string(ctx, p, (unsigned char *)str, len);
417 }
418
419 static void
420 font_analysis_squote(fz_context *ctx, pdf_processor *proc, char *str, size_t len)
421 {
422 /* Note, we convert all T' operators to (maybe) a T* and a Tj */
423 pdf_font_analysis_processor *p = (pdf_font_analysis_processor*)proc;
424
425 show_string(ctx, p, (unsigned char *)str, len);
426 }
427
428 static void
429 font_analysis_dquote(fz_context *ctx, pdf_processor *proc, float aw, float ac, char *str, size_t len)
430 {
431 /* Note, we convert all T" operators to (maybe) a T*,
432 * (maybe) Tc, (maybe) Tw and a Tj. */
433 pdf_font_analysis_processor *p = (pdf_font_analysis_processor*)proc;
434
435 show_string(ctx, p, (unsigned char*)str, len);
436 }
437
438 static void
439 font_analysis_Do_form(fz_context *ctx, pdf_processor *proc, const char *name, pdf_obj *xobj)
440 {
441 pdf_font_analysis_processor *pr = (pdf_font_analysis_processor *)proc;
442 pdf_document *doc = pdf_get_bound_document(ctx, xobj);
443 pdf_obj *resources = pdf_xobject_resources(ctx, xobj);
444
445 if (!resources)
446 resources = pr->rstack->res;
447
448 pdf_process_contents(ctx, (pdf_processor*)pr, doc, resources, xobj, NULL, NULL);
449 }
450
451 static pdf_processor *
452 pdf_new_font_analysis_processor(fz_context *ctx, fonts_usage_t *usage)
453 {
454 pdf_font_analysis_processor *proc = (pdf_font_analysis_processor *)pdf_new_processor(ctx, sizeof *proc);
455
456 proc->super.drop_processor = drop_processor;
457 proc->super.push_resources = push_resources;
458 proc->super.pop_resources = pop_resources;
459
460 proc->super.op_Do_form = font_analysis_Do_form;
461
462 proc->super.op_Tf = font_analysis_Tf;
463 proc->super.op_Tj = font_analysis_Tj;
464 proc->super.op_TJ = font_analysis_TJ;
465 proc->super.op_squote = font_analysis_squote;
466 proc->super.op_dquote = font_analysis_dquote;
467
468 proc->super.op_q = font_analysis_q;
469 proc->super.op_Q = font_analysis_Q;
470
471 fz_try(ctx)
472 proc->gs = fz_malloc_struct(ctx, gstate);
473 fz_catch(ctx)
474 {
475 fz_free(ctx, proc);
476 fz_rethrow(ctx);
477 }
478
479 proc->gs->current_font = -1; // no font set yet
480
481 proc->usage = usage;
482
483 return &proc->super;
484 }
485
486 static void
487 examine_page(fz_context *ctx, pdf_document *doc, pdf_page *page, fonts_usage_t *usage)
488 {
489 pdf_processor *proc = pdf_new_font_analysis_processor(ctx, usage);
490 pdf_obj *contents = pdf_page_contents(ctx, page);
491 pdf_obj *resources = pdf_page_resources(ctx, page);
492 pdf_annot *annot, *widget;
493
494 fz_try(ctx)
495 {
496 pdf_process_contents(ctx, proc, doc, resources, contents, NULL, NULL);
497
498 pdf_processor_push_resources(ctx, proc, resources);
499 for (annot = pdf_first_annot(ctx, page); annot; annot = pdf_next_annot(ctx, annot))
500 pdf_process_annot(ctx, proc, annot, NULL);
501 for (widget = pdf_first_widget(ctx, page); widget; widget = pdf_next_widget(ctx, widget))
502 pdf_process_annot(ctx, proc, widget, NULL);
503 pdf_close_processor(ctx, proc);
504 }
505 fz_always(ctx)
506 {
507 pdf_drop_processor(ctx, proc);
508 }
509 fz_catch(ctx)
510 fz_rethrow(ctx);
511 }
512
513 static void
514 subset_ttf(fz_context *ctx, pdf_document *doc, font_usage_t *font, pdf_obj *fontfile, int symbolic, int cidfont)
515 {
516 fz_buffer *buf = pdf_load_stream(ctx, fontfile);
517 fz_buffer *newbuf = NULL;
518
519 if (buf->len == 0)
520 {
521 fz_drop_buffer(ctx, buf);
522 return;
523 }
524
525 fz_var(newbuf);
526
527 fz_try(ctx)
528 {
529 newbuf = fz_subset_ttf_for_gids(ctx, buf, font->gids.heap, font->gids.len, symbolic, cidfont);
530
531 pdf_update_stream(ctx, doc, fontfile, newbuf, 0);
532 pdf_dict_put_int(ctx, fontfile, PDF_NAME(Length1), newbuf->len);
533 }
534 fz_always(ctx)
535 {
536 fz_drop_buffer(ctx, newbuf);
537 fz_drop_buffer(ctx, buf);
538 }
539 fz_catch(ctx)
540 {
541 fz_rethrow(ctx);
542 }
543 }
544
545 static void
546 subset_cff(fz_context *ctx, pdf_document *doc, font_usage_t *font, pdf_obj *fontfile, int symbolic, int cidfont)
547 {
548 fz_buffer *buf = pdf_load_stream(ctx, fontfile);
549 fz_buffer *newbuf = NULL;
550
551 if (buf->len == 0)
552 {
553 fz_drop_buffer(ctx, buf);
554 return;
555 }
556
557 fz_var(newbuf);
558
559 fz_try(ctx)
560 {
561 newbuf = fz_subset_cff_for_gids(ctx, buf, font->gids.heap, font->gids.len, symbolic, cidfont);
562
563 pdf_update_stream(ctx, doc, fontfile, newbuf, 0);
564 pdf_dict_put_int(ctx, fontfile, PDF_NAME(Length1), newbuf->len);
565 }
566 fz_always(ctx)
567 {
568 fz_drop_buffer(ctx, newbuf);
569 fz_drop_buffer(ctx, buf);
570 }
571 fz_catch(ctx)
572 {
573 fz_rethrow(ctx);
574 }
575 }
576
577 static void
578 do_adjust_simple_font(fz_context *ctx, pdf_document *doc, font_usage_t *font, int n)
579 {
580 pdf_obj *obj = font->font[n];
581 int old_firstchar = pdf_dict_get_int(ctx, obj, PDF_NAME(FirstChar));
582 pdf_obj *old_widths = pdf_dict_get(ctx, obj, PDF_NAME(Widths));
583 int new_firstchar = font->cids.heap[0];
584 int new_lastchar = font->cids.heap[font->cids.len-1];
585 pdf_obj *widths;
586 int i;
587
588 pdf_dict_put_int(ctx, obj, PDF_NAME(FirstChar), new_firstchar);
589 pdf_dict_put_int(ctx, obj, PDF_NAME(LastChar), new_lastchar);
590 if (old_widths)
591 {
592 int j = 0;
593 widths = pdf_new_array(ctx, doc, new_lastchar - new_firstchar + 1);
594 for (i = new_firstchar; i <= new_lastchar; i++)
595 {
596 if (font->cids.heap[j] == i)
597 {
598 pdf_array_push_int(ctx, widths, pdf_array_get_int(ctx, old_widths, i - old_firstchar));
599 j++;
600 }
601 else
602 pdf_array_push_int(ctx, widths, 0);
603 }
604 pdf_dict_put_drop(ctx, obj, PDF_NAME(Widths), widths);
605 }
606 }
607
608 static void
609 adjust_simple_font(fz_context *ctx, pdf_document *doc, font_usage_t *font)
610 {
611 int i;
612
613 for (i = 0; i < font->len; i++)
614 do_adjust_simple_font(ctx, doc, font, i);
615 }
616
617
618 static pdf_obj *
619 get_fontdesc(fz_context *ctx, pdf_obj *font)
620 {
621 pdf_obj *fontdesc = pdf_dict_get(ctx, font, PDF_NAME(FontDescriptor));
622
623 if (fontdesc)
624 return fontdesc;
625
626 return pdf_dict_get(ctx, pdf_array_get(ctx, pdf_dict_get(ctx, font, PDF_NAME(DescendantFonts)), 0), PDF_NAME(FontDescriptor));
627 }
628
629 static void
630 prefix_font_name(fz_context *ctx, pdf_document *doc, pdf_obj *font, pdf_obj *file)
631 {
632 fz_buffer *buf;
633 uint32_t digest[4], v;
634 pdf_obj *fontdesc = get_fontdesc(ctx, font);
635 const char *name = pdf_dict_get_name(ctx, fontdesc, PDF_NAME(FontName));
636 char new_name[256];
637 size_t len;
638
639 /* If there is no name, just exit. Possibly should throw here. */
640 if (name == NULL)
641 return;
642
643 len = strlen(name);
644 if (len > 6 && name[6] == '+')
645 return; /* Already a subset name */
646
647 buf = pdf_load_stream(ctx, file);
648 fz_md5_buffer(ctx, buf, (uint8_t *)digest);
649 fz_drop_buffer(ctx, buf);
650
651 v = digest[0] ^ digest[1] ^ digest[2] ^ digest[3];
652 new_name[0] = 'A' + (v % 26);
653 v /= 26;
654 new_name[1] = 'A' + (v % 26);
655 v /= 26;
656 new_name[2] = 'A' + (v % 26);
657 v /= 26;
658 new_name[3] = 'A' + (v % 26);
659 v /= 26;
660 new_name[4] = 'A' + (v % 26);
661 v /= 26;
662 new_name[5] = 'A' + (v % 26);
663 new_name[6] = '+';
664
665 memcpy(new_name+7, name, len > sizeof(new_name)-8 ? sizeof(new_name)-8 : len+1);
666 new_name[sizeof(new_name)-1] = 0;
667
668 pdf_dict_put_name(ctx, fontdesc, PDF_NAME(FontName), new_name);
669 }
670
671 static int
672 get_symbolic(fz_context *ctx, font_usage_t *font)
673 {
674 int i, flags, symbolic, symbolic2;
675 pdf_obj *fontdesc;
676
677 if (!font || font->len == 0)
678 return 0;
679
680 fontdesc = pdf_dict_get(ctx, font->font[0], PDF_NAME(FontDescriptor));
681 flags = pdf_dict_get_int(ctx, fontdesc, PDF_NAME(Flags));
682 symbolic = (!!(flags & 4)) | ((flags & 32) == 0);
683
684 for (i = 1; i < font->len; i++)
685 {
686 fontdesc = pdf_dict_get(ctx, font->font[i], PDF_NAME(FontDescriptor));
687 flags = pdf_dict_get_int(ctx, fontdesc, PDF_NAME(Flags));
688 symbolic2 = (!!(flags & 4)) | ((flags & 32) == 0);
689
690 if (symbolic != symbolic2)
691 {
692 fz_warn(ctx, "Font cannot be both symbolic and non-symbolic. Skipping subsetting.");
693 return -1;
694 }
695 }
696
697 return symbolic;
698 }
699
700 static pdf_obj *get_subtype(fz_context *ctx, font_usage_t *font)
701 {
702 /* If we can get the subtype from the fontfile, great. Use that. */
703 pdf_obj *subtype = pdf_dict_get(ctx, font->fontfile, PDF_NAME(Subtype));
704 int i;
705
706 if (subtype != NULL)
707 return subtype;
708
709 /* Otherwise we'll have to get it from the font objects, and they'd
710 * all better agree. */
711 if (font->len == 0)
712 return NULL;
713
714 subtype = pdf_dict_get(ctx, font->font[0], PDF_NAME(Subtype));
715
716 for (i = 1; i < font->len; i++)
717 {
718 pdf_obj *subtype2 = pdf_dict_get(ctx, font->font[i], PDF_NAME(Subtype));
719
720 if (pdf_objcmp(ctx, subtype, subtype2))
721 return NULL;
722 }
723 return subtype;
724 }
725
726 void
727 pdf_subset_fonts(fz_context *ctx, pdf_document *doc, int len, const int *pages)
728 {
729 int i, j;
730 pdf_page *page = NULL;
731 fonts_usage_t usage = { 0 };
732
733 fz_var(page);
734
735 fz_try(ctx)
736 {
737 if (len == 0)
738 {
739 /* Process every page. */
740 len = pdf_count_pages(ctx, doc);
741 for (i = 0; i < len; i++)
742 {
743 page = pdf_load_page(ctx, doc, i);
744
745 examine_page(ctx, doc, page, &usage);
746
747 fz_drop_page(ctx, (fz_page *)page);
748 page = NULL;
749 }
750 }
751 else
752 {
753 /* Process just the pages we are given. */
754 for (i = 0; i < len; i++)
755 {
756 page = pdf_load_page(ctx, doc, pages[i]);
757
758 examine_page(ctx, doc, page, &usage);
759
760 fz_drop_page(ctx, (fz_page *)page);
761 page = NULL;
762 }
763 }
764
765 /* All our font usage data is in heaps. Sort the heaps. */
766 for (i = 0; i < usage.len; i++)
767 {
768 font_usage_t *font = &usage.font[i];
769
770 fz_int_heap_sort(ctx, &font->cids);
771 fz_int_heap_uniq(ctx, &font->cids);
772 fz_int_heap_sort(ctx, &font->gids);
773 fz_int_heap_uniq(ctx, &font->gids);
774 }
775
776 /* Now, actually subset the fonts. */
777 for (i = 0; i < usage.len; i++)
778 {
779 font_usage_t *font = &usage.font[i];
780 pdf_obj *subtype = get_subtype(ctx, font);
781 int symbolic = get_symbolic(ctx, font);
782 if (symbolic < 0)
783 continue;
784
785 /* Not sure this can ever happen, and if it does this is not a great
786 * way to handle it, but it'll do for now. */
787 if (font->gids.len == 0 || font->cids.len == 0 || subtype == NULL)
788 continue;
789
790 #ifdef DEBUG_SUBSETTING
791 fz_write_printf(ctx, fz_stddbg(ctx), "font->obj=%d subtype=", pdf_to_num(ctx, font->fontfile));
792 pdf_debug_obj(ctx, subtype);
793 fz_write_printf(ctx, fz_stddbg(ctx), "\n");
794 pdf_debug_obj(ctx, pdf_dict_get(ctx, font->font[0], PDF_NAME(FontDescriptor)));
795 #endif
796
797 /* If we hit a (non-SYSTEM) problem subsetting a font, give up for this font alone.
798 * This will leave this font alone. */
799 fz_try(ctx)
800 {
801 if (font->is_ttf)
802 subset_ttf(ctx, doc, font, font->fontfile, symbolic, font->is_cidfont);
803 else if (font->is_cidfont)
804 subset_cff(ctx, doc, font, font->fontfile, symbolic, font->is_cidfont);
805 }
806 fz_catch(ctx)
807 {
808 fz_rethrow_if(ctx, FZ_ERROR_SYSTEM);
809 fz_report_error(ctx);
810 continue;
811 }
812
813 /* Any problems changing these parts of the fonts are really fatal though. */
814 if (pdf_name_eq(ctx, subtype, PDF_NAME(TrueType)) ||
815 pdf_name_eq(ctx, subtype, PDF_NAME(Type1)))
816 {
817 adjust_simple_font(ctx, doc, font);
818 }
819
820 /* And prefix the name */
821 for (j = 0; j < font->len; j++)
822 prefix_font_name(ctx, doc, font->font[j], font->fontfile);
823 }
824 }
825 fz_always(ctx)
826 {
827 fz_drop_page(ctx, (fz_page *)page);
828
829 for (i = 0; i < usage.len; i++)
830 {
831 pdf_drop_obj(ctx, usage.font[i].fontfile);
832 fz_free(ctx, usage.font[i].cids.heap);
833 fz_free(ctx, usage.font[i].gids.heap);
834 for (j = 0; j < usage.font[i].len; j++)
835 pdf_drop_obj(ctx, usage.font[i].font[j]);
836 fz_free(ctx, usage.font[i].font);
837 }
838 fz_free(ctx, usage.font);
839 }
840 fz_catch(ctx)
841 fz_rethrow(ctx);
842 }