Mercurial > hgrepos > Python2 > PyMuPDF
diff mupdf-source/source/pdf/pdf-subset.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mupdf-source/source/pdf/pdf-subset.c Mon Sep 15 11:43:07 2025 +0200 @@ -0,0 +1,842 @@ +// Copyright (C) 2004-2025 Artifex Software, Inc. +// +// This file is part of MuPDF. +// +// MuPDF is free software: you can redistribute it and/or modify it under the +// terms of the GNU Affero General Public License as published by the Free +// Software Foundation, either version 3 of the License, or (at your option) +// any later version. +// +// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more +// details. +// +// You should have received a copy of the GNU Affero General Public License +// along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html> +// +// Alternative licensing terms are available from the licensor. +// For commercial licensing, see <https://www.artifex.com/> or contact +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. + + +#include "mupdf/fitz.h" +#include "mupdf/pdf.h" + +/* Define the following for some debugging output. */ +#undef DEBUG_SUBSETTING + +typedef struct gstate +{ + struct gstate *next; + int current_font; + pdf_font_desc *font; +} gstate; + +typedef struct resources_stack +{ + struct resources_stack *next; + pdf_obj *res; +} resources_stack; + +typedef struct +{ + int num; + int gen; + int is_ttf; + int is_cidfont; + pdf_obj *fontfile; + unsigned char digest[16]; + + fz_int_heap gids; + fz_int_heap cids; + + /* Pointers back to the top level fonts that refer to this. */ + int max; + int len; + pdf_obj **font; +} font_usage_t; + +typedef struct +{ + int max; + int len; + font_usage_t *font; +} fonts_usage_t; + +typedef struct +{ + pdf_processor super; + resources_stack *rstack; + fonts_usage_t *usage; + gstate *gs; +} pdf_font_analysis_processor; + +static void +pop_gstate(fz_context *ctx, pdf_font_analysis_processor *p) +{ + gstate *gs = p->gs; + gstate *old; + + if (gs == NULL) + return; + + old = gs->next; + pdf_drop_font(ctx, gs->font); + fz_free(ctx, gs); + p->gs = old; +} + +static void +drop_processor(fz_context *ctx, pdf_processor *proc) +{ + pdf_font_analysis_processor *p = (pdf_font_analysis_processor*)proc; + + while (p->rstack) + { + resources_stack *stk = p->rstack; + p->rstack = stk->next; + pdf_drop_obj(ctx, stk->res); + fz_free(ctx, stk); + } + + while (p->gs) + pop_gstate(ctx, p); +} + +static void +push_resources(fz_context *ctx, pdf_processor *proc, pdf_obj *res) +{ + pdf_font_analysis_processor *p = (pdf_font_analysis_processor *)proc; + resources_stack *stk = fz_malloc_struct(ctx, resources_stack); + + stk->next = p->rstack; + p->rstack = stk; + fz_try(ctx) + { + stk->res = pdf_keep_obj(ctx, res); + } + fz_catch(ctx) + { + pdf_drop_obj(ctx, stk->res); + p->rstack = stk->next; + fz_free(ctx, stk); + fz_rethrow(ctx); + } +} + +static pdf_obj * +pop_resources(fz_context *ctx, pdf_processor *proc) +{ + pdf_font_analysis_processor *p = (pdf_font_analysis_processor *)proc; + resources_stack *stk = p->rstack; + pdf_obj *res = p->rstack->res; + + p->rstack = stk->next; + fz_free(ctx, stk); + + return res; +} + +static void +font_analysis_Q(fz_context *ctx, pdf_processor *proc) +{ + pdf_font_analysis_processor *p = (pdf_font_analysis_processor*)proc; + + pop_gstate(ctx, p); +} + +static void +font_analysis_q(fz_context *ctx, pdf_processor *proc) +{ + pdf_font_analysis_processor *p = (pdf_font_analysis_processor*)proc; + gstate *gs = p->gs; + gstate *new_gs = fz_malloc_struct(ctx, gstate); + p->gs = new_gs; + + if (gs) + { + *new_gs = *gs; + new_gs->next = gs; + } + + pdf_keep_font(ctx, new_gs->font); + +} + +static void +font_analysis_Tf(fz_context *ctx, pdf_processor *proc, const char *name, pdf_font_desc *font, float size) +{ + pdf_font_analysis_processor *p = (pdf_font_analysis_processor*)proc; + pdf_obj *dict = pdf_dict_gets(ctx, pdf_dict_get(ctx, p->rstack->res, PDF_NAME(Font)), name); + pdf_obj *subtype, *fontdesc; + pdf_obj *fontfile = NULL; + pdf_obj *key; + int num, gen, i; + int is_cidfont = 0; + int is_ttf = 0; + unsigned char digest[16]; + + p->gs->current_font = -1; /* unknown font! */ + + if (dict == NULL) + return; + + /* We can have multiple fonts that rely on the same underlying fontfile + * object. Therefore, resolve down to that. */ + subtype = pdf_dict_get(ctx, dict, PDF_NAME(Subtype)); + + if (subtype == PDF_NAME(Type1) || subtype == PDF_NAME(MMType1)) + { + // fontfile subtype should be Type1C for us to be able to subset it + key = PDF_NAME(FontFile); + fontdesc = pdf_dict_get(ctx, dict, PDF_NAME(FontDescriptor)); + fontfile = pdf_dict_get(ctx, fontdesc, PDF_NAME(FontFile)); + is_cidfont = 0; + is_ttf = 0; + } + else if (subtype == PDF_NAME(TrueType)) + { + key = PDF_NAME(FontFile2); + fontdesc = pdf_dict_get(ctx, dict, PDF_NAME(FontDescriptor)); + fontfile = pdf_dict_get(ctx, fontdesc, PDF_NAME(FontFile2)); + is_cidfont = 0; + is_ttf = 1; + } + else if (pdf_name_eq(ctx, subtype, PDF_NAME(Type0))) + { + dict = pdf_array_get(ctx, pdf_dict_get(ctx, dict, PDF_NAME(DescendantFonts)), 0); + subtype = pdf_dict_get(ctx, dict, PDF_NAME(Subtype)); + fontdesc = pdf_dict_get(ctx, dict, PDF_NAME(FontDescriptor)); + if (subtype == PDF_NAME(CIDFontType0)) + { + // fontfile subtype is either CIDFontType0C or OpenType + key = PDF_NAME(FontFile3); + fontfile = pdf_dict_get(ctx, fontdesc, PDF_NAME(FontFile3)); + subtype = pdf_dict_get(ctx, fontfile, PDF_NAME(Subtype)); + if (subtype == PDF_NAME(CIDFontType0C)) + { + is_cidfont = 1; + is_ttf = 0; + } + else if (subtype == PDF_NAME(OpenType)) + { + is_cidfont = 1; + is_ttf = 1; + } + else + { + fontfile = NULL; + } + } + else if (subtype == PDF_NAME(CIDFontType2)) + { + key = PDF_NAME(FontFile2); + fontfile = pdf_dict_get(ctx, fontdesc, PDF_NAME(FontFile2)); + is_cidfont = 1; + is_ttf = 1; + } + } + + if (!fontfile) + { +#ifdef DEBUG_SUBSETTING + fz_write_printf(ctx, fz_stddbg(ctx), "No embedded file found for font of subtype %s\n", pdf_to_name(ctx, subtype)); +#endif + return; + } + + num = pdf_to_num(ctx, fontfile); + gen = pdf_to_gen(ctx, fontfile); + + for (i = 0; i < p->usage->len; i++) + { + if (p->usage->font[i].num == num && + p->usage->font[i].gen == gen) + break; + } + + fz_font_digest(ctx, font->font, digest); + + /* Check for duplicate fonts. (Fonts in the document that have + * the font stream included multiple times as different objects). + * This can happen with naive insertion routines. */ + if (i == p->usage->len) + { + for (i = 0; i < p->usage->len; i++) + { + if (memcmp(digest, p->usage->font[i].digest, 16) == 0) + { + pdf_dict_put(ctx, fontdesc, key, p->usage->font[i].fontfile); + break; + } + } + } + + pdf_drop_font(ctx, p->gs->font); + p->gs->font = pdf_keep_font(ctx, font); + p->gs->current_font = i; + if (i < p->usage->len) + { + int j; + + for (j = 0; j < p->usage->font[i].len; j++) + { + if (pdf_objcmp(ctx, p->usage->font[i].font[j], dict) == 0) + return; + } + + if (p->usage->font[i].len == p->usage->font[i].max) + { + int newmax = p->usage->font[i].max * 2; + p->usage->font[i].font = fz_realloc(ctx, p->usage->font[i].font, sizeof(*p->usage->font[i].font) * newmax); + p->usage->font[i].max = newmax; + } + p->usage->font[i].font[j] = pdf_keep_obj(ctx, dict); + p->usage->font[i].len++; + + return; + } + + if (p->usage->max == p->usage->len) + { + int n = p->usage->max * 2; + + if (n == 0) + n = 32; + p->usage->font = (font_usage_t *)fz_realloc(ctx, p->usage->font, sizeof(*p->usage->font) * n); + p->usage->max = n; + } + + p->usage->font[i].is_ttf = is_ttf; + p->usage->font[i].is_cidfont = is_cidfont; + p->usage->font[i].fontfile = pdf_keep_obj(ctx, fontfile); + p->usage->font[i].num = num; + p->usage->font[i].gen = gen; + p->usage->font[i].cids.len = 0; + p->usage->font[i].cids.max = 0; + p->usage->font[i].cids.heap = NULL; + p->usage->font[i].gids.len = 0; + p->usage->font[i].gids.max = 0; + p->usage->font[i].gids.heap = NULL; + p->usage->font[i].len = 0; + p->usage->font[i].max = 0; + p->usage->font[i].font = NULL; + memcpy(p->usage->font[i].digest, digest, 16); + p->usage->len++; + + p->usage->font[i].font = fz_malloc(ctx, sizeof(*p->usage->font[i].font) * 4); + p->usage->font[i].len = 1; + p->usage->font[i].max = 4; + p->usage->font[i].font[0] = pdf_keep_obj(ctx, dict); +} + +static void +show_char(fz_context *ctx, font_usage_t *font, int cid, int gid) +{ + fz_int_heap_insert(ctx, &font->cids, cid); + fz_int_heap_insert(ctx, &font->gids, gid); +} + +static void +show_string(fz_context *ctx, pdf_font_analysis_processor *p, unsigned char *buf, size_t len) +{ + gstate *gs = p->gs; + pdf_font_desc *fontdesc = gs->font; + size_t pos = 0; + font_usage_t *font; + + // Not an embedded font! + if (gs->current_font < 0 || fontdesc == NULL) + return; + + font = &p->usage->font[gs->current_font]; + + while (pos < len) + { + unsigned int cpt; + int inc = pdf_decode_cmap(fontdesc->encoding, &buf[pos], &buf[len], &cpt); + + int cid = pdf_lookup_cmap(fontdesc->encoding, cpt); + if (cid >= 0) + { + int gid = pdf_font_cid_to_gid(ctx, fontdesc, cid); + show_char(ctx, font, cid, gid); + } + + pos += inc; + } +} + +static void +show_text(fz_context *ctx, pdf_font_analysis_processor *p, pdf_obj *text) +{ + gstate *gs = p->gs; + pdf_font_desc *fontdesc; + int i, n; + + if (!gs) + return; + fontdesc = gs->font; + if (!fontdesc) + return; + + if (pdf_is_string(ctx, text)) + { + show_string(ctx, p, (unsigned char *)pdf_to_str_buf(ctx, text), pdf_to_str_len(ctx, text)); + } + else if (pdf_is_array(ctx, text)) + { + n = pdf_array_len(ctx, text); + for (i = 0; i < n; i++) + { + pdf_obj *item = pdf_array_get(ctx, text, i); + if (pdf_is_string(ctx, item)) + { + show_string(ctx, p, (unsigned char *)pdf_to_str_buf(ctx, item), pdf_to_str_len(ctx, item)); + } + } + } +} + +static void +font_analysis_TJ(fz_context *ctx, pdf_processor *proc, pdf_obj *array) +{ + pdf_font_analysis_processor *p = (pdf_font_analysis_processor*)proc; + + show_text(ctx, p, array); +} + +static void +font_analysis_Tj(fz_context *ctx, pdf_processor *proc, char *str, size_t len) +{ + pdf_font_analysis_processor *p = (pdf_font_analysis_processor*)proc; + + show_string(ctx, p, (unsigned char *)str, len); +} + +static void +font_analysis_squote(fz_context *ctx, pdf_processor *proc, char *str, size_t len) +{ + /* Note, we convert all T' operators to (maybe) a T* and a Tj */ + pdf_font_analysis_processor *p = (pdf_font_analysis_processor*)proc; + + show_string(ctx, p, (unsigned char *)str, len); +} + +static void +font_analysis_dquote(fz_context *ctx, pdf_processor *proc, float aw, float ac, char *str, size_t len) +{ + /* Note, we convert all T" operators to (maybe) a T*, + * (maybe) Tc, (maybe) Tw and a Tj. */ + pdf_font_analysis_processor *p = (pdf_font_analysis_processor*)proc; + + show_string(ctx, p, (unsigned char*)str, len); +} + +static void +font_analysis_Do_form(fz_context *ctx, pdf_processor *proc, const char *name, pdf_obj *xobj) +{ + pdf_font_analysis_processor *pr = (pdf_font_analysis_processor *)proc; + pdf_document *doc = pdf_get_bound_document(ctx, xobj); + pdf_obj *resources = pdf_xobject_resources(ctx, xobj); + + if (!resources) + resources = pr->rstack->res; + + pdf_process_contents(ctx, (pdf_processor*)pr, doc, resources, xobj, NULL, NULL); +} + +static pdf_processor * +pdf_new_font_analysis_processor(fz_context *ctx, fonts_usage_t *usage) +{ + pdf_font_analysis_processor *proc = (pdf_font_analysis_processor *)pdf_new_processor(ctx, sizeof *proc); + + proc->super.drop_processor = drop_processor; + proc->super.push_resources = push_resources; + proc->super.pop_resources = pop_resources; + + proc->super.op_Do_form = font_analysis_Do_form; + + proc->super.op_Tf = font_analysis_Tf; + proc->super.op_Tj = font_analysis_Tj; + proc->super.op_TJ = font_analysis_TJ; + proc->super.op_squote = font_analysis_squote; + proc->super.op_dquote = font_analysis_dquote; + + proc->super.op_q = font_analysis_q; + proc->super.op_Q = font_analysis_Q; + + fz_try(ctx) + proc->gs = fz_malloc_struct(ctx, gstate); + fz_catch(ctx) + { + fz_free(ctx, proc); + fz_rethrow(ctx); + } + + proc->gs->current_font = -1; // no font set yet + + proc->usage = usage; + + return &proc->super; +} + +static void +examine_page(fz_context *ctx, pdf_document *doc, pdf_page *page, fonts_usage_t *usage) +{ + pdf_processor *proc = pdf_new_font_analysis_processor(ctx, usage); + pdf_obj *contents = pdf_page_contents(ctx, page); + pdf_obj *resources = pdf_page_resources(ctx, page); + pdf_annot *annot, *widget; + + fz_try(ctx) + { + pdf_process_contents(ctx, proc, doc, resources, contents, NULL, NULL); + + pdf_processor_push_resources(ctx, proc, resources); + for (annot = pdf_first_annot(ctx, page); annot; annot = pdf_next_annot(ctx, annot)) + pdf_process_annot(ctx, proc, annot, NULL); + for (widget = pdf_first_widget(ctx, page); widget; widget = pdf_next_widget(ctx, widget)) + pdf_process_annot(ctx, proc, widget, NULL); + pdf_close_processor(ctx, proc); + } + fz_always(ctx) + { + pdf_drop_processor(ctx, proc); + } + fz_catch(ctx) + fz_rethrow(ctx); +} + +static void +subset_ttf(fz_context *ctx, pdf_document *doc, font_usage_t *font, pdf_obj *fontfile, int symbolic, int cidfont) +{ + fz_buffer *buf = pdf_load_stream(ctx, fontfile); + fz_buffer *newbuf = NULL; + + if (buf->len == 0) + { + fz_drop_buffer(ctx, buf); + return; + } + + fz_var(newbuf); + + fz_try(ctx) + { + newbuf = fz_subset_ttf_for_gids(ctx, buf, font->gids.heap, font->gids.len, symbolic, cidfont); + + pdf_update_stream(ctx, doc, fontfile, newbuf, 0); + pdf_dict_put_int(ctx, fontfile, PDF_NAME(Length1), newbuf->len); + } + fz_always(ctx) + { + fz_drop_buffer(ctx, newbuf); + fz_drop_buffer(ctx, buf); + } + fz_catch(ctx) + { + fz_rethrow(ctx); + } +} + +static void +subset_cff(fz_context *ctx, pdf_document *doc, font_usage_t *font, pdf_obj *fontfile, int symbolic, int cidfont) +{ + fz_buffer *buf = pdf_load_stream(ctx, fontfile); + fz_buffer *newbuf = NULL; + + if (buf->len == 0) + { + fz_drop_buffer(ctx, buf); + return; + } + + fz_var(newbuf); + + fz_try(ctx) + { + newbuf = fz_subset_cff_for_gids(ctx, buf, font->gids.heap, font->gids.len, symbolic, cidfont); + + pdf_update_stream(ctx, doc, fontfile, newbuf, 0); + pdf_dict_put_int(ctx, fontfile, PDF_NAME(Length1), newbuf->len); + } + fz_always(ctx) + { + fz_drop_buffer(ctx, newbuf); + fz_drop_buffer(ctx, buf); + } + fz_catch(ctx) + { + fz_rethrow(ctx); + } +} + +static void +do_adjust_simple_font(fz_context *ctx, pdf_document *doc, font_usage_t *font, int n) +{ + pdf_obj *obj = font->font[n]; + int old_firstchar = pdf_dict_get_int(ctx, obj, PDF_NAME(FirstChar)); + pdf_obj *old_widths = pdf_dict_get(ctx, obj, PDF_NAME(Widths)); + int new_firstchar = font->cids.heap[0]; + int new_lastchar = font->cids.heap[font->cids.len-1]; + pdf_obj *widths; + int i; + + pdf_dict_put_int(ctx, obj, PDF_NAME(FirstChar), new_firstchar); + pdf_dict_put_int(ctx, obj, PDF_NAME(LastChar), new_lastchar); + if (old_widths) + { + int j = 0; + widths = pdf_new_array(ctx, doc, new_lastchar - new_firstchar + 1); + for (i = new_firstchar; i <= new_lastchar; i++) + { + if (font->cids.heap[j] == i) + { + pdf_array_push_int(ctx, widths, pdf_array_get_int(ctx, old_widths, i - old_firstchar)); + j++; + } + else + pdf_array_push_int(ctx, widths, 0); + } + pdf_dict_put_drop(ctx, obj, PDF_NAME(Widths), widths); + } +} + +static void +adjust_simple_font(fz_context *ctx, pdf_document *doc, font_usage_t *font) +{ + int i; + + for (i = 0; i < font->len; i++) + do_adjust_simple_font(ctx, doc, font, i); +} + + +static pdf_obj * +get_fontdesc(fz_context *ctx, pdf_obj *font) +{ + pdf_obj *fontdesc = pdf_dict_get(ctx, font, PDF_NAME(FontDescriptor)); + + if (fontdesc) + return fontdesc; + + return pdf_dict_get(ctx, pdf_array_get(ctx, pdf_dict_get(ctx, font, PDF_NAME(DescendantFonts)), 0), PDF_NAME(FontDescriptor)); +} + +static void +prefix_font_name(fz_context *ctx, pdf_document *doc, pdf_obj *font, pdf_obj *file) +{ + fz_buffer *buf; + uint32_t digest[4], v; + pdf_obj *fontdesc = get_fontdesc(ctx, font); + const char *name = pdf_dict_get_name(ctx, fontdesc, PDF_NAME(FontName)); + char new_name[256]; + size_t len; + + /* If there is no name, just exit. Possibly should throw here. */ + if (name == NULL) + return; + + len = strlen(name); + if (len > 6 && name[6] == '+') + return; /* Already a subset name */ + + buf = pdf_load_stream(ctx, file); + fz_md5_buffer(ctx, buf, (uint8_t *)digest); + fz_drop_buffer(ctx, buf); + + v = digest[0] ^ digest[1] ^ digest[2] ^ digest[3]; + new_name[0] = 'A' + (v % 26); + v /= 26; + new_name[1] = 'A' + (v % 26); + v /= 26; + new_name[2] = 'A' + (v % 26); + v /= 26; + new_name[3] = 'A' + (v % 26); + v /= 26; + new_name[4] = 'A' + (v % 26); + v /= 26; + new_name[5] = 'A' + (v % 26); + new_name[6] = '+'; + + memcpy(new_name+7, name, len > sizeof(new_name)-8 ? sizeof(new_name)-8 : len+1); + new_name[sizeof(new_name)-1] = 0; + + pdf_dict_put_name(ctx, fontdesc, PDF_NAME(FontName), new_name); +} + +static int +get_symbolic(fz_context *ctx, font_usage_t *font) +{ + int i, flags, symbolic, symbolic2; + pdf_obj *fontdesc; + + if (!font || font->len == 0) + return 0; + + fontdesc = pdf_dict_get(ctx, font->font[0], PDF_NAME(FontDescriptor)); + flags = pdf_dict_get_int(ctx, fontdesc, PDF_NAME(Flags)); + symbolic = (!!(flags & 4)) | ((flags & 32) == 0); + + for (i = 1; i < font->len; i++) + { + fontdesc = pdf_dict_get(ctx, font->font[i], PDF_NAME(FontDescriptor)); + flags = pdf_dict_get_int(ctx, fontdesc, PDF_NAME(Flags)); + symbolic2 = (!!(flags & 4)) | ((flags & 32) == 0); + + if (symbolic != symbolic2) + { + fz_warn(ctx, "Font cannot be both symbolic and non-symbolic. Skipping subsetting."); + return -1; + } + } + + return symbolic; +} + +static pdf_obj *get_subtype(fz_context *ctx, font_usage_t *font) +{ + /* If we can get the subtype from the fontfile, great. Use that. */ + pdf_obj *subtype = pdf_dict_get(ctx, font->fontfile, PDF_NAME(Subtype)); + int i; + + if (subtype != NULL) + return subtype; + + /* Otherwise we'll have to get it from the font objects, and they'd + * all better agree. */ + if (font->len == 0) + return NULL; + + subtype = pdf_dict_get(ctx, font->font[0], PDF_NAME(Subtype)); + + for (i = 1; i < font->len; i++) + { + pdf_obj *subtype2 = pdf_dict_get(ctx, font->font[i], PDF_NAME(Subtype)); + + if (pdf_objcmp(ctx, subtype, subtype2)) + return NULL; + } + return subtype; +} + +void +pdf_subset_fonts(fz_context *ctx, pdf_document *doc, int len, const int *pages) +{ + int i, j; + pdf_page *page = NULL; + fonts_usage_t usage = { 0 }; + + fz_var(page); + + fz_try(ctx) + { + if (len == 0) + { + /* Process every page. */ + len = pdf_count_pages(ctx, doc); + for (i = 0; i < len; i++) + { + page = pdf_load_page(ctx, doc, i); + + examine_page(ctx, doc, page, &usage); + + fz_drop_page(ctx, (fz_page *)page); + page = NULL; + } + } + else + { + /* Process just the pages we are given. */ + for (i = 0; i < len; i++) + { + page = pdf_load_page(ctx, doc, pages[i]); + + examine_page(ctx, doc, page, &usage); + + fz_drop_page(ctx, (fz_page *)page); + page = NULL; + } + } + + /* All our font usage data is in heaps. Sort the heaps. */ + for (i = 0; i < usage.len; i++) + { + font_usage_t *font = &usage.font[i]; + + fz_int_heap_sort(ctx, &font->cids); + fz_int_heap_uniq(ctx, &font->cids); + fz_int_heap_sort(ctx, &font->gids); + fz_int_heap_uniq(ctx, &font->gids); + } + + /* Now, actually subset the fonts. */ + for (i = 0; i < usage.len; i++) + { + font_usage_t *font = &usage.font[i]; + pdf_obj *subtype = get_subtype(ctx, font); + int symbolic = get_symbolic(ctx, font); + if (symbolic < 0) + continue; + + /* Not sure this can ever happen, and if it does this is not a great + * way to handle it, but it'll do for now. */ + if (font->gids.len == 0 || font->cids.len == 0 || subtype == NULL) + continue; + +#ifdef DEBUG_SUBSETTING + fz_write_printf(ctx, fz_stddbg(ctx), "font->obj=%d subtype=", pdf_to_num(ctx, font->fontfile)); + pdf_debug_obj(ctx, subtype); + fz_write_printf(ctx, fz_stddbg(ctx), "\n"); + pdf_debug_obj(ctx, pdf_dict_get(ctx, font->font[0], PDF_NAME(FontDescriptor))); +#endif + + /* If we hit a (non-SYSTEM) problem subsetting a font, give up for this font alone. + * This will leave this font alone. */ + fz_try(ctx) + { + if (font->is_ttf) + subset_ttf(ctx, doc, font, font->fontfile, symbolic, font->is_cidfont); + else if (font->is_cidfont) + subset_cff(ctx, doc, font, font->fontfile, symbolic, font->is_cidfont); + } + fz_catch(ctx) + { + fz_rethrow_if(ctx, FZ_ERROR_SYSTEM); + fz_report_error(ctx); + continue; + } + + /* Any problems changing these parts of the fonts are really fatal though. */ + if (pdf_name_eq(ctx, subtype, PDF_NAME(TrueType)) || + pdf_name_eq(ctx, subtype, PDF_NAME(Type1))) + { + adjust_simple_font(ctx, doc, font); + } + + /* And prefix the name */ + for (j = 0; j < font->len; j++) + prefix_font_name(ctx, doc, font->font[j], font->fontfile); + } + } + fz_always(ctx) + { + fz_drop_page(ctx, (fz_page *)page); + + for (i = 0; i < usage.len; i++) + { + pdf_drop_obj(ctx, usage.font[i].fontfile); + fz_free(ctx, usage.font[i].cids.heap); + fz_free(ctx, usage.font[i].gids.heap); + for (j = 0; j < usage.font[i].len; j++) + pdf_drop_obj(ctx, usage.font[i].font[j]); + fz_free(ctx, usage.font[i].font); + } + fz_free(ctx, usage.font); + } + fz_catch(ctx) + fz_rethrow(ctx); +}
