Mercurial > hgrepos > Python2 > PyMuPDF
diff mupdf-source/source/pdf/pdf-interpret.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mupdf-source/source/pdf/pdf-interpret.c Mon Sep 15 11:43:07 2025 +0200 @@ -0,0 +1,1383 @@ +// Copyright (C) 2004-2025 Artifex Software, Inc. +// +// This file is part of MuPDF. +// +// MuPDF is free software: you can redistribute it and/or modify it under the +// terms of the GNU Affero General Public License as published by the Free +// Software Foundation, either version 3 of the License, or (at your option) +// any later version. +// +// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more +// details. +// +// You should have received a copy of the GNU Affero General Public License +// along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html> +// +// Alternative licensing terms are available from the licensor. +// For commercial licensing, see <https://www.artifex.com/> or contact +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. + +#include "mupdf/fitz.h" +#include "pdf-annot-imp.h" + +#include <string.h> +#include <math.h> + +/* Maximum number of errors before aborting */ +#define MAX_SYNTAX_ERRORS 100 + +void * +pdf_new_processor(fz_context *ctx, int size) +{ + pdf_processor *ret = Memento_label(fz_calloc(ctx, 1, size), "pdf_processor"); + ret->refs = 1; + return ret; +} + +pdf_processor * +pdf_keep_processor(fz_context *ctx, pdf_processor *proc) +{ + return fz_keep_imp(ctx, proc, &proc->refs); +} + +void +pdf_close_processor(fz_context *ctx, pdf_processor *proc) +{ + void (*close_processor)(fz_context *ctx, pdf_processor *proc); + + if (!proc || proc->closed) + return; + + proc->closed = 1; + close_processor = proc->close_processor; + if (!close_processor) + return; + + close_processor(ctx, proc); /* Tail recursion */ +} + +void +pdf_drop_processor(fz_context *ctx, pdf_processor *proc) +{ + if (fz_drop_imp(ctx, proc, &proc->refs)) + { + if (!proc->closed) + fz_warn(ctx, "dropping unclosed PDF processor"); + if (proc->drop_processor) + proc->drop_processor(ctx, proc); + fz_free(ctx, proc); + } +} + +void pdf_reset_processor(fz_context *ctx, pdf_processor *proc) +{ + if (proc == NULL) + return; + + proc->closed = 0; + + if (proc->reset_processor == NULL) + fz_throw(ctx, FZ_ERROR_ARGUMENT, "Cannot reset PDF processor"); + + proc->reset_processor(ctx, proc); +} + +static void +pdf_init_csi(fz_context *ctx, pdf_csi *csi, pdf_document *doc, pdf_obj *rdb, pdf_lexbuf *buf, fz_cookie *cookie) +{ + memset(csi, 0, sizeof *csi); + csi->doc = doc; + csi->rdb = rdb; + csi->buf = buf; + csi->cookie = cookie; +} + +static void +pdf_clear_stack(fz_context *ctx, pdf_csi *csi) +{ + int i; + + pdf_drop_obj(ctx, csi->obj); + csi->obj = NULL; + + csi->name[0] = 0; + csi->string_len = 0; + for (i = 0; i < csi->top; i++) + csi->stack[i] = 0; + + csi->top = 0; +} + +static pdf_font_desc * +pdf_try_load_font(fz_context *ctx, pdf_document *doc, pdf_obj *rdb, pdf_obj *font, fz_cookie *cookie) +{ + pdf_font_desc *desc = NULL; + fz_try(ctx) + desc = pdf_load_font(ctx, doc, rdb, font); + fz_catch(ctx) + { + if (fz_caught(ctx) == FZ_ERROR_TRYLATER) + { + fz_ignore_error(ctx); + if (cookie) + cookie->incomplete++; + } + else + { + fz_rethrow_if(ctx, FZ_ERROR_SYSTEM); + fz_report_error(ctx); + } + } + if (desc == NULL) + desc = pdf_load_hail_mary_font(ctx, doc); + return desc; +} + +static fz_image * +parse_inline_image(fz_context *ctx, pdf_csi *csi, fz_stream *stm, char *csname, int cslen) +{ + pdf_document *doc = csi->doc; + pdf_obj *rdb = csi->rdb; + pdf_obj *obj = NULL; + pdf_obj *cs; + fz_image *img = NULL; + int ch, found; + + fz_var(obj); + fz_var(img); + + fz_try(ctx) + { + obj = pdf_parse_dict(ctx, doc, stm, &doc->lexbuf.base); + + if (csname) + { + cs = pdf_dict_get(ctx, obj, PDF_NAME(CS)); + if (!pdf_is_indirect(ctx, cs) && pdf_is_name(ctx, cs)) + fz_strlcpy(csname, pdf_to_name(ctx, cs), cslen); + else + csname[0] = 0; + } + + /* read whitespace after ID keyword */ + ch = fz_read_byte(ctx, stm); + if (ch == '\r') + if (fz_peek_byte(ctx, stm) == '\n') + fz_read_byte(ctx, stm); + + img = pdf_load_inline_image(ctx, doc, rdb, obj, stm); + + /* find EI */ + found = 0; + ch = fz_read_byte(ctx, stm); + do + { + while (ch != 'E' && ch != EOF) + ch = fz_read_byte(ctx, stm); + if (ch == 'E') + { + ch = fz_read_byte(ctx, stm); + if (ch == 'I') + { + ch = fz_peek_byte(ctx, stm); + if (ch == ' ' || ch <= 32 || ch == '<' || ch == '/') + { + found = 1; + break; + } + } + } + } while (ch != EOF); + if (!found) + fz_throw(ctx, FZ_ERROR_SYNTAX, "syntax error after inline image"); + } + fz_always(ctx) + { + pdf_drop_obj(ctx, obj); + } + fz_catch(ctx) + { + fz_drop_image(ctx, img); + fz_rethrow(ctx); + } + + return img; +} + +static void +pdf_process_extgstate(fz_context *ctx, pdf_processor *proc, pdf_csi *csi, pdf_obj *dict) +{ + pdf_obj *obj; + + obj = pdf_dict_get(ctx, dict, PDF_NAME(LW)); + if (pdf_is_number(ctx, obj) && proc->op_w) + proc->op_w(ctx, proc, pdf_to_real(ctx, obj)); + + obj = pdf_dict_get(ctx, dict, PDF_NAME(LC)); + if (pdf_is_int(ctx, obj) && proc->op_J) + proc->op_J(ctx, proc, fz_clampi(pdf_to_int(ctx, obj), 0, 2)); + + obj = pdf_dict_get(ctx, dict, PDF_NAME(LJ)); + if (pdf_is_int(ctx, obj) && proc->op_j) + proc->op_j(ctx, proc, fz_clampi(pdf_to_int(ctx, obj), 0, 2)); + + obj = pdf_dict_get(ctx, dict, PDF_NAME(ML)); + if (pdf_is_number(ctx, obj) && proc->op_M) + proc->op_M(ctx, proc, pdf_to_real(ctx, obj)); + + obj = pdf_dict_get(ctx, dict, PDF_NAME(D)); + if (pdf_is_array(ctx, obj) && proc->op_d) + { + pdf_obj *dash_array = pdf_array_get(ctx, obj, 0); + pdf_obj *dash_phase = pdf_array_get(ctx, obj, 1); + proc->op_d(ctx, proc, dash_array, pdf_to_real(ctx, dash_phase)); + } + + obj = pdf_dict_get(ctx, dict, PDF_NAME(RI)); + if (pdf_is_name(ctx, obj) && proc->op_ri) + proc->op_ri(ctx, proc, pdf_to_name(ctx, obj)); + + obj = pdf_dict_get(ctx, dict, PDF_NAME(FL)); + if (pdf_is_number(ctx, obj) && proc->op_i) + proc->op_i(ctx, proc, pdf_to_real(ctx, obj)); + + obj = pdf_dict_get(ctx, dict, PDF_NAME(Font)); + if (pdf_is_array(ctx, obj) && proc->op_Tf) + { + pdf_obj *font_ref = pdf_array_get(ctx, obj, 0); + pdf_obj *font_size = pdf_array_get(ctx, obj, 1); + pdf_font_desc *font; + if (pdf_is_dict(ctx, font_ref)) + font = pdf_try_load_font(ctx, csi->doc, csi->rdb, font_ref, csi->cookie); + else + font = pdf_load_hail_mary_font(ctx, csi->doc); + fz_try(ctx) + proc->op_Tf(ctx, proc, "ExtGState", font, pdf_to_real(ctx, font_size)); + fz_always(ctx) + pdf_drop_font(ctx, font); + fz_catch(ctx) + fz_rethrow(ctx); + } + + /* overprint and color management */ + + obj = pdf_dict_get(ctx, dict, PDF_NAME(OP)); + if (pdf_is_bool(ctx, obj) && proc->op_gs_OP) + proc->op_gs_OP(ctx, proc, pdf_to_bool(ctx, obj)); + + obj = pdf_dict_get(ctx, dict, PDF_NAME(op)); + if (pdf_is_bool(ctx, obj) && proc->op_gs_op) + proc->op_gs_op(ctx, proc, pdf_to_bool(ctx, obj)); + + obj = pdf_dict_get(ctx, dict, PDF_NAME(OPM)); + if (pdf_is_int(ctx, obj) && proc->op_gs_OPM) + proc->op_gs_OPM(ctx, proc, pdf_to_int(ctx, obj)); + + obj = pdf_dict_get(ctx, dict, PDF_NAME(UseBlackPtComp)); + if (pdf_is_name(ctx, obj) && proc->op_gs_UseBlackPtComp) + proc->op_gs_UseBlackPtComp(ctx, proc, obj); + + /* transfer functions */ + + obj = pdf_dict_get(ctx, dict, PDF_NAME(TR2)); + if (pdf_is_name(ctx, obj)) + if (!pdf_name_eq(ctx, obj, PDF_NAME(Identity)) && !pdf_name_eq(ctx, obj, PDF_NAME(Default))) + fz_warn(ctx, "ignoring transfer function"); + if (!obj) /* TR is ignored in the presence of TR2 */ + { + pdf_obj *tr = pdf_dict_get(ctx, dict, PDF_NAME(TR)); + if (pdf_is_name(ctx, tr)) + if (!pdf_name_eq(ctx, tr, PDF_NAME(Identity))) + fz_warn(ctx, "ignoring transfer function"); + } + + /* transparency state */ + + obj = pdf_dict_get(ctx, dict, PDF_NAME(CA)); + if (pdf_is_number(ctx, obj) && proc->op_gs_CA) + proc->op_gs_CA(ctx, proc, pdf_to_real(ctx, obj)); + + obj = pdf_dict_get(ctx, dict, PDF_NAME(ca)); + if (pdf_is_number(ctx, obj) && proc->op_gs_ca) + proc->op_gs_ca(ctx, proc, pdf_to_real(ctx, obj)); + + obj = pdf_dict_get(ctx, dict, PDF_NAME(BM)); + if (pdf_is_array(ctx, obj)) + obj = pdf_array_get(ctx, obj, 0); + if (pdf_is_name(ctx, obj) && proc->op_gs_BM) + proc->op_gs_BM(ctx, proc, pdf_to_name(ctx, obj)); + + obj = pdf_dict_get(ctx, dict, PDF_NAME(SMask)); + if (proc->op_gs_SMask) + { + if (pdf_is_dict(ctx, obj)) + { + pdf_obj *xobj, *s, *bc, *tr; + float softmask_bc[FZ_MAX_COLORS]; + fz_colorspace *softmask_cs; + int colorspace_n = 1; + int k, luminosity; + + xobj = pdf_dict_get(ctx, obj, PDF_NAME(G)); + + softmask_cs = pdf_xobject_colorspace(ctx, xobj); + fz_try(ctx) + { + if (softmask_cs) + colorspace_n = fz_colorspace_n(ctx, softmask_cs); + + /* Default background color is black. */ + for (k = 0; k < colorspace_n; k++) + softmask_bc[k] = 0; + /* Which in CMYK means not all zeros! This should really be + * a test for subtractive color spaces, but this will have + * to do for now. */ + if (fz_colorspace_is_cmyk(ctx, softmask_cs)) + { + /* Default background color is black. */ + for (k = 0; k < colorspace_n; k++) + softmask_bc[k] = 0; + /* Which in CMYK means not all zeros! This should really be + * a test for subtractive color spaces, but this will have + * to do for now. */ + if (fz_colorspace_is_cmyk(ctx, softmask_cs)) + softmask_bc[3] = 1.0f; + } + + bc = pdf_dict_get(ctx, obj, PDF_NAME(BC)); + if (pdf_is_array(ctx, bc)) + { + for (k = 0; k < colorspace_n; k++) + softmask_bc[k] = pdf_array_get_real(ctx, bc, k); + } + + s = pdf_dict_get(ctx, obj, PDF_NAME(S)); + if (pdf_name_eq(ctx, s, PDF_NAME(Luminosity))) + luminosity = 1; + else + luminosity = 0; + + tr = pdf_dict_get(ctx, obj, PDF_NAME(TR)); + if (tr && pdf_name_eq(ctx, tr, PDF_NAME(Identity))) + tr = NULL; + + proc->op_gs_SMask(ctx, proc, xobj, softmask_cs, softmask_bc, luminosity, tr); + } + fz_always(ctx) + fz_drop_colorspace(ctx, softmask_cs); + fz_catch(ctx) + fz_rethrow(ctx); + } + else if (pdf_is_name(ctx, obj) && pdf_name_eq(ctx, obj, PDF_NAME(None))) + { + proc->op_gs_SMask(ctx, proc, NULL, NULL, NULL, 0, NULL); + } + } +} + +static void +pdf_process_Do(fz_context *ctx, pdf_processor *proc, pdf_csi *csi) +{ + pdf_obj *xres, *xobj, *subtype; + + xres = pdf_dict_get(ctx, csi->rdb, PDF_NAME(XObject)); + xobj = pdf_dict_gets(ctx, xres, csi->name); + if (!xobj) + fz_throw(ctx, FZ_ERROR_SYNTAX, "cannot find XObject resource '%s'", csi->name); + subtype = pdf_dict_get(ctx, xobj, PDF_NAME(Subtype)); + if (pdf_name_eq(ctx, subtype, PDF_NAME(Form))) + { + pdf_obj *st = pdf_dict_get(ctx, xobj, PDF_NAME(Subtype2)); + if (st) + subtype = st; + } + if (!pdf_is_name(ctx, subtype)) + fz_throw(ctx, FZ_ERROR_SYNTAX, "no XObject subtype specified"); + + if (pdf_is_ocg_hidden(ctx, csi->doc, csi->rdb, proc->usage, pdf_dict_get(ctx, xobj, PDF_NAME(OC)))) + return; + + if (pdf_name_eq(ctx, subtype, PDF_NAME(Form))) + { + if (proc->op_Do_form) + proc->op_Do_form(ctx, proc, csi->name, xobj); + } + + else if (pdf_name_eq(ctx, subtype, PDF_NAME(Image))) + { + if (proc->op_Do_image) + { + fz_image *image = NULL; + + if (proc->requirements && PDF_PROCESSOR_REQUIRES_DECODED_IMAGES) + image = pdf_load_image(ctx, csi->doc, xobj); + fz_try(ctx) + proc->op_Do_image(ctx, proc, csi->name, image); + fz_always(ctx) + fz_drop_image(ctx, image); + fz_catch(ctx) + fz_rethrow(ctx); + } + } + + else if (!strcmp(pdf_to_name(ctx, subtype), "PS")) + fz_warn(ctx, "ignoring XObject with subtype PS"); + else + fz_warn(ctx, "ignoring XObject with unknown subtype: '%s'", pdf_to_name(ctx, subtype)); +} + +static void +pdf_process_CS(fz_context *ctx, pdf_processor *proc, pdf_csi *csi, int stroke) +{ + fz_colorspace *cs; + + if (!proc->op_CS || !proc->op_cs) + return; + + if (!strcmp(csi->name, "Pattern")) + { + if (stroke) + proc->op_CS(ctx, proc, "Pattern", NULL); + else + proc->op_cs(ctx, proc, "Pattern", NULL); + return; + } + + if (!strcmp(csi->name, "DeviceGray")) + cs = fz_keep_colorspace(ctx, fz_device_gray(ctx)); + else if (!strcmp(csi->name, "DeviceRGB")) + cs = fz_keep_colorspace(ctx, fz_device_rgb(ctx)); + else if (!strcmp(csi->name, "DeviceCMYK")) + cs = fz_keep_colorspace(ctx, fz_device_cmyk(ctx)); + else + { + pdf_obj *csres, *csobj; + csres = pdf_dict_get(ctx, csi->rdb, PDF_NAME(ColorSpace)); + csobj = pdf_dict_gets(ctx, csres, csi->name); + if (!csobj) + fz_throw(ctx, FZ_ERROR_SYNTAX, "cannot find ColorSpace resource '%s'", csi->name); + if (pdf_is_array(ctx, csobj) && pdf_array_len(ctx, csobj) == 1 && pdf_name_eq(ctx, pdf_array_get(ctx, csobj, 0), PDF_NAME(Pattern))) + { + if (stroke) + proc->op_CS(ctx, proc, "Pattern", NULL); + else + proc->op_cs(ctx, proc, "Pattern", NULL); + return; + } + cs = pdf_load_colorspace(ctx, csobj); + } + + fz_try(ctx) + { + if (stroke) + proc->op_CS(ctx, proc, csi->name, cs); + else + proc->op_cs(ctx, proc, csi->name, cs); + } + fz_always(ctx) + fz_drop_colorspace(ctx, cs); + fz_catch(ctx) + fz_rethrow(ctx); +} + +static void +pdf_process_SC(fz_context *ctx, pdf_processor *proc, pdf_csi *csi, int stroke) +{ + if (csi->name[0]) + { + pdf_obj *patres, *patobj; + int type; + + patres = pdf_dict_get(ctx, csi->rdb, PDF_NAME(Pattern)); + patobj = pdf_dict_gets(ctx, patres, csi->name); + if (!patobj) + fz_throw(ctx, FZ_ERROR_SYNTAX, "cannot find Pattern resource '%s'", csi->name); + + type = pdf_dict_get_int(ctx, patobj, PDF_NAME(PatternType)); + + if (type == 1) + { + if (proc->op_SC_pattern && proc->op_sc_pattern) + { + pdf_pattern *pat = pdf_load_pattern(ctx, csi->doc, patobj); + fz_try(ctx) + { + if (stroke) + proc->op_SC_pattern(ctx, proc, csi->name, pat, csi->top, csi->stack); + else + proc->op_sc_pattern(ctx, proc, csi->name, pat, csi->top, csi->stack); + } + fz_always(ctx) + pdf_drop_pattern(ctx, pat); + fz_catch(ctx) + fz_rethrow(ctx); + } + } + + else if (type == 2) + { + if (proc->op_SC_shade && proc->op_sc_shade) + { + fz_shade *shade = pdf_load_shading(ctx, csi->doc, patobj); + fz_try(ctx) + { + if (stroke) + proc->op_SC_shade(ctx, proc, csi->name, shade); + else + proc->op_sc_shade(ctx, proc, csi->name, shade); + } + fz_always(ctx) + fz_drop_shade(ctx, shade); + fz_catch(ctx) + fz_rethrow(ctx); + } + } + + else + { + fz_throw(ctx, FZ_ERROR_SYNTAX, "unknown pattern type: %d", type); + } + } + + else + { + if (proc->op_SC_color && proc->op_sc_color) + { + if (stroke) + proc->op_SC_color(ctx, proc, csi->top, csi->stack); + else + proc->op_sc_color(ctx, proc, csi->top, csi->stack); + } + } +} + +static pdf_obj * +resolve_properties(fz_context *ctx, pdf_csi *csi, pdf_obj *obj) +{ + if (pdf_is_name(ctx, obj)) + return pdf_dict_get(ctx, pdf_dict_get(ctx, csi->rdb, PDF_NAME(Properties)), obj); + else + return obj; +} + +static void +pdf_process_BDC(fz_context *ctx, pdf_processor *proc, pdf_csi *csi) +{ + if (proc->op_BDC) + proc->op_BDC(ctx, proc, csi->name, csi->obj, resolve_properties(ctx, csi, csi->obj)); + + /* Already hidden, no need to look further */ + if (proc->hidden > 0) + { + ++proc->hidden; + return; + } + + /* We only look at OC groups here */ + if (strcmp(csi->name, "OC")) + return; + + if (pdf_is_ocg_hidden(ctx, csi->doc, csi->rdb, proc->usage, csi->obj)) + ++proc->hidden; +} + +static void +pdf_process_BMC(fz_context *ctx, pdf_processor *proc, pdf_csi *csi, const char *name) +{ + if (proc->op_BMC) + proc->op_BMC(ctx, proc, name); + if (proc->hidden > 0) + ++proc->hidden; +} + +static void +pdf_process_EMC(fz_context *ctx, pdf_processor *proc, pdf_csi *csi) +{ + if (proc->op_EMC) + proc->op_EMC(ctx, proc); + if (proc->hidden > 0) + --proc->hidden; +} + +static void +pdf_process_gsave(fz_context *ctx, pdf_processor *proc, pdf_csi *csi) +{ + ++csi->gstate; + if (proc->op_q) + proc->op_q(ctx, proc); +} + +static void +pdf_process_grestore(fz_context *ctx, pdf_processor *proc, pdf_csi *csi) +{ + --csi->gstate; + if (proc->op_Q) + proc->op_Q(ctx, proc); +} + +static void +pdf_process_end(fz_context *ctx, pdf_processor *proc, pdf_csi *csi) +{ + if (proc->op_EOD) + proc->op_EOD(ctx, proc); + while (csi->gstate > 0) + pdf_process_grestore(ctx, proc, csi); + if (proc->op_END) + proc->op_END(ctx, proc); +} + +static int is_known_bad_word(const char *word) +{ + switch (*word) + { + case 'I': return !strcmp(word, "Infinity"); + case 'N': return !strcmp(word, "NaN"); + case 'i': return !strcmp(word, "inf"); + case 'n': return !strcmp(word, "nan"); + } + return 0; +} + +#define A(a) (a) +#define B(a,b) (a | b << 8) +#define C(a,b,c) (a | b << 8 | c << 16) + +static void +pdf_process_keyword(fz_context *ctx, pdf_processor *proc, pdf_csi *csi, fz_stream *stm, char *word) +{ + float *s = csi->stack; + char csname[40]; + int key; + + key = word[0]; + if (word[1]) + { + key |= word[1] << 8; + if (word[2]) + { + key |= word[2] << 16; + if (word[3]) + key = 0; + } + } + + switch (key) + { + default: + if (!csi->xbalance) + { + if (is_known_bad_word(word)) + fz_warn(ctx, "unknown keyword: '%s'", word); + else + fz_throw(ctx, FZ_ERROR_SYNTAX, "unknown keyword: '%s'", word); + } + break; + + /* general graphics state */ + case A('w'): if (proc->op_w) proc->op_w(ctx, proc, s[0]); break; + case A('j'): if (proc->op_j) proc->op_j(ctx, proc, fz_clampi(s[0], 0, 2)); break; + case A('J'): if (proc->op_J) proc->op_J(ctx, proc, fz_clampi(s[0], 0, 2)); break; + case A('M'): if (proc->op_M) proc->op_M(ctx, proc, s[0]); break; + case A('d'): if (proc->op_d) proc->op_d(ctx, proc, csi->obj, s[0]); break; + case B('r','i'): if (proc->op_ri) proc->op_ri(ctx, proc, csi->name); break; + case A('i'): if (proc->op_i) proc->op_i(ctx, proc, s[0]); break; + + case B('g','s'): + { + pdf_obj *gsres, *gsobj; + gsres = pdf_dict_get(ctx, csi->rdb, PDF_NAME(ExtGState)); + gsobj = pdf_dict_gets(ctx, gsres, csi->name); + if (!gsobj) + fz_throw(ctx, FZ_ERROR_SYNTAX, "cannot find ExtGState resource '%s'", csi->name); + if (proc->op_gs_begin) + proc->op_gs_begin(ctx, proc, csi->name, gsobj); + pdf_process_extgstate(ctx, proc, csi, gsobj); + if (proc->op_gs_end) + proc->op_gs_end(ctx, proc); + } + break; + + /* special graphics state */ + case A('q'): pdf_process_gsave(ctx, proc, csi); break; + case A('Q'): pdf_process_grestore(ctx, proc, csi); break; + case B('c','m'): if (proc->op_cm) proc->op_cm(ctx, proc, s[0], s[1], s[2], s[3], s[4], s[5]); break; + + /* path construction */ + case A('m'): if (proc->op_m) proc->op_m(ctx, proc, s[0], s[1]); break; + case A('l'): if (proc->op_l) proc->op_l(ctx, proc, s[0], s[1]); break; + case A('c'): if (proc->op_c) proc->op_c(ctx, proc, s[0], s[1], s[2], s[3], s[4], s[5]); break; + case A('v'): if (proc->op_v) proc->op_v(ctx, proc, s[0], s[1], s[2], s[3]); break; + case A('y'): if (proc->op_y) proc->op_y(ctx, proc, s[0], s[1], s[2], s[3]); break; + case A('h'): if (proc->op_h) proc->op_h(ctx, proc); break; + case B('r','e'): if (proc->op_re) proc->op_re(ctx, proc, s[0], s[1], s[2], s[3]); break; + + /* path painting */ + case A('S'): if (proc->op_S) proc->op_S(ctx, proc); break; + case A('s'): if (proc->op_s) proc->op_s(ctx, proc); break; + case A('F'): if (proc->op_F) proc->op_F(ctx, proc); break; + case A('f'): if (proc->op_f) proc->op_f(ctx, proc); break; + case B('f','*'): if (proc->op_fstar) proc->op_fstar(ctx, proc); break; + case A('B'): if (proc->op_B) proc->op_B(ctx, proc); break; + case B('B','*'): if (proc->op_Bstar) proc->op_Bstar(ctx, proc); break; + case A('b'): if (proc->op_b) proc->op_b(ctx, proc); break; + case B('b','*'): if (proc->op_bstar) proc->op_bstar(ctx, proc); break; + case A('n'): if (proc->op_n) proc->op_n(ctx, proc); break; + + /* path clipping */ + case A('W'): if (proc->op_W) proc->op_W(ctx, proc); break; + case B('W','*'): if (proc->op_Wstar) proc->op_Wstar(ctx, proc); break; + + /* text objects */ + case B('B','T'): csi->in_text = 1; if (proc->op_BT) proc->op_BT(ctx, proc); break; + case B('E','T'): csi->in_text = 0; if (proc->op_ET) proc->op_ET(ctx, proc); break; + + /* text state */ + case B('T','c'): if (proc->op_Tc) proc->op_Tc(ctx, proc, s[0]); break; + case B('T','w'): if (proc->op_Tw) proc->op_Tw(ctx, proc, s[0]); break; + case B('T','z'): if (proc->op_Tz) proc->op_Tz(ctx, proc, s[0]); break; + case B('T','L'): if (proc->op_TL) proc->op_TL(ctx, proc, s[0]); break; + case B('T','r'): if (proc->op_Tr) proc->op_Tr(ctx, proc, s[0]); break; + case B('T','s'): if (proc->op_Ts) proc->op_Ts(ctx, proc, s[0]); break; + + case B('T','f'): + if (proc->op_Tf) + { + pdf_obj *fontres, *fontobj; + pdf_font_desc *font; + fontres = pdf_dict_get(ctx, csi->rdb, PDF_NAME(Font)); + fontobj = pdf_dict_gets(ctx, fontres, csi->name); + if (pdf_is_dict(ctx, fontobj)) + font = pdf_try_load_font(ctx, csi->doc, csi->rdb, fontobj, csi->cookie); + else + font = pdf_load_hail_mary_font(ctx, csi->doc); + fz_try(ctx) + proc->op_Tf(ctx, proc, csi->name, font, s[0]); + fz_always(ctx) + pdf_drop_font(ctx, font); + fz_catch(ctx) + fz_rethrow(ctx); + } + break; + + /* text positioning */ + case B('T','d'): if (proc->op_Td) proc->op_Td(ctx, proc, s[0], s[1]); break; + case B('T','D'): if (proc->op_TD) proc->op_TD(ctx, proc, s[0], s[1]); break; + case B('T','m'): if (proc->op_Tm) proc->op_Tm(ctx, proc, s[0], s[1], s[2], s[3], s[4], s[5]); break; + case B('T','*'): if (proc->op_Tstar) proc->op_Tstar(ctx, proc); break; + + /* text showing */ + case B('T','J'): if (proc->op_TJ) proc->op_TJ(ctx, proc, csi->obj); break; + case B('T','j'): + if (proc->op_Tj) + { + if (csi->string_len > 0) + proc->op_Tj(ctx, proc, csi->string, csi->string_len); + else + proc->op_Tj(ctx, proc, pdf_to_str_buf(ctx, csi->obj), pdf_to_str_len(ctx, csi->obj)); + } + break; + case A('\''): + if (proc->op_squote) + { + if (csi->string_len > 0) + proc->op_squote(ctx, proc, csi->string, csi->string_len); + else + proc->op_squote(ctx, proc, pdf_to_str_buf(ctx, csi->obj), pdf_to_str_len(ctx, csi->obj)); + } + break; + case A('"'): + if (proc->op_dquote) + { + if (csi->string_len > 0) + proc->op_dquote(ctx, proc, s[0], s[1], csi->string, csi->string_len); + else + proc->op_dquote(ctx, proc, s[0], s[1], pdf_to_str_buf(ctx, csi->obj), pdf_to_str_len(ctx, csi->obj)); + } + break; + + /* type 3 fonts */ + case B('d','0'): if (proc->op_d0) proc->op_d0(ctx, proc, s[0], s[1]); break; + case B('d','1'): if (proc->op_d1) proc->op_d1(ctx, proc, s[0], s[1], s[2], s[3], s[4], s[5]); break; + + /* color */ + case B('C','S'): pdf_process_CS(ctx, proc, csi, 1); break; + case B('c','s'): pdf_process_CS(ctx, proc, csi, 0); break; + case B('S','C'): pdf_process_SC(ctx, proc, csi, 1); break; + case B('s','c'): pdf_process_SC(ctx, proc, csi, 0); break; + case C('S','C','N'): pdf_process_SC(ctx, proc, csi, 1); break; + case C('s','c','n'): pdf_process_SC(ctx, proc, csi, 0); break; + + case A('G'): if (proc->op_G) proc->op_G(ctx, proc, s[0]); break; + case A('g'): if (proc->op_g) proc->op_g(ctx, proc, s[0]); break; + case B('R','G'): if (proc->op_RG) proc->op_RG(ctx, proc, s[0], s[1], s[2]); break; + case B('r','g'): if (proc->op_rg) proc->op_rg(ctx, proc, s[0], s[1], s[2]); break; + case A('K'): if (proc->op_K) proc->op_K(ctx, proc, s[0], s[1], s[2], s[3]); break; + case A('k'): if (proc->op_k) proc->op_k(ctx, proc, s[0], s[1], s[2], s[3]); break; + + /* shadings, images, xobjects */ + case B('B','I'): + { + fz_image *img = parse_inline_image(ctx, csi, stm, csname, sizeof csname); + fz_try(ctx) + { + if (proc->op_BI) + proc->op_BI(ctx, proc, img, csname[0] ? csname : NULL); + } + fz_always(ctx) + fz_drop_image(ctx, img); + fz_catch(ctx) + fz_rethrow(ctx); + } + break; + + case B('s','h'): + if (proc->op_sh) + { + pdf_obj *shaderes, *shadeobj; + fz_shade *shade; + shaderes = pdf_dict_get(ctx, csi->rdb, PDF_NAME(Shading)); + shadeobj = pdf_dict_gets(ctx, shaderes, csi->name); + if (!shadeobj) + fz_throw(ctx, FZ_ERROR_SYNTAX, "cannot find Shading resource '%s'", csi->name); + shade = pdf_load_shading(ctx, csi->doc, shadeobj); + fz_try(ctx) + proc->op_sh(ctx, proc, csi->name, shade); + fz_always(ctx) + fz_drop_shade(ctx, shade); + fz_catch(ctx) + fz_rethrow(ctx); + } + break; + + case B('D','o'): pdf_process_Do(ctx, proc, csi); break; + + /* marked content */ + case B('M','P'): if (proc->op_MP) proc->op_MP(ctx, proc, csi->name); break; + case B('D','P'): if (proc->op_DP) proc->op_DP(ctx, proc, csi->name, csi->obj, resolve_properties(ctx, csi, csi->obj)); break; + case C('B','M','C'): pdf_process_BMC(ctx, proc, csi, csi->name); break; + case C('B','D','C'): pdf_process_BDC(ctx, proc, csi); break; + case C('E','M','C'): pdf_process_EMC(ctx, proc, csi); break; + + /* compatibility */ + case B('B','X'): ++csi->xbalance; if (proc->op_BX) proc->op_BX(ctx, proc); break; + case B('E','X'): --csi->xbalance; if (proc->op_EX) proc->op_EX(ctx, proc); break; + } +} + +static void +pdf_process_stream(fz_context *ctx, pdf_processor *proc, pdf_csi *csi, fz_stream *stm) +{ + pdf_document *doc = csi->doc; + pdf_lexbuf *buf = csi->buf; + fz_cookie *cookie = csi->cookie; + + pdf_token tok = PDF_TOK_ERROR; + int in_text_array = 0; + int syntax_errors = 0; + + /* make sure we have a clean slate if we come here from flush_text */ + pdf_clear_stack(ctx, csi); + + fz_var(in_text_array); + fz_var(tok); + + if (cookie) + { + cookie->progress_max = (size_t)-1; + cookie->progress = 0; + } + + do + { + fz_try(ctx) + { + do + { + /* Check the cookie */ + if (cookie) + { + if (cookie->abort) + { + tok = PDF_TOK_EOF; + break; + } + cookie->progress++; + } + + tok = pdf_lex(ctx, stm, buf); + + if (in_text_array) + { + switch(tok) + { + case PDF_TOK_CLOSE_ARRAY: + in_text_array = 0; + break; + case PDF_TOK_REAL: + pdf_array_push_real(ctx, csi->obj, buf->f); + break; + case PDF_TOK_INT: + pdf_array_push_int(ctx, csi->obj, buf->i); + break; + case PDF_TOK_STRING: + pdf_array_push_string(ctx, csi->obj, buf->scratch, buf->len); + break; + case PDF_TOK_EOF: + break; + case PDF_TOK_KEYWORD: + if (buf->scratch[0] == 'T' && (buf->scratch[1] == 'w' || buf->scratch[1] == 'c') && buf->scratch[2] == 0) + { + int n = pdf_array_len(ctx, csi->obj); + if (n > 0) + { + pdf_obj *o = pdf_array_get(ctx, csi->obj, n-1); + if (pdf_is_number(ctx, o)) + { + csi->stack[0] = pdf_to_real(ctx, o); + pdf_array_delete(ctx, csi->obj, n-1); + pdf_process_keyword(ctx, proc, csi, stm, buf->scratch); + } + } + } + /* Deliberate Fallthrough! */ + default: + fz_throw(ctx, FZ_ERROR_SYNTAX, "syntax error in array"); + } + } + else switch (tok) + { + case PDF_TOK_ENDSTREAM: + case PDF_TOK_EOF: + tok = PDF_TOK_EOF; + break; + + case PDF_TOK_OPEN_ARRAY: + if (csi->obj) + { + pdf_drop_obj(ctx, csi->obj); + csi->obj = NULL; + } + if (csi->in_text) + { + in_text_array = 1; + csi->obj = pdf_new_array(ctx, doc, 4); + } + else + { + csi->obj = pdf_parse_array(ctx, doc, stm, buf); + } + break; + + case PDF_TOK_OPEN_DICT: + if (csi->obj) + { + pdf_drop_obj(ctx, csi->obj); + csi->obj = NULL; + } + csi->obj = pdf_parse_dict(ctx, doc, stm, buf); + break; + + case PDF_TOK_NAME: + if (csi->name[0]) + { + pdf_drop_obj(ctx, csi->obj); + csi->obj = NULL; + csi->obj = pdf_new_name(ctx, buf->scratch); + } + else + fz_strlcpy(csi->name, buf->scratch, sizeof(csi->name)); + break; + + case PDF_TOK_INT: + if (csi->top < (int)nelem(csi->stack)) { + csi->stack[csi->top] = buf->i; + csi->top ++; + } + else + fz_throw(ctx, FZ_ERROR_SYNTAX, "stack overflow"); + break; + + case PDF_TOK_REAL: + if (csi->top < (int)nelem(csi->stack)) { + csi->stack[csi->top] = buf->f; + csi->top ++; + } + else + fz_throw(ctx, FZ_ERROR_SYNTAX, "stack overflow"); + break; + + case PDF_TOK_STRING: + if (buf->len <= sizeof(csi->string)) + { + memcpy(csi->string, buf->scratch, buf->len); + csi->string_len = buf->len; + } + else + { + if (csi->obj) + { + pdf_drop_obj(ctx, csi->obj); + csi->obj = NULL; + } + csi->obj = pdf_new_string(ctx, buf->scratch, buf->len); + } + break; + + case PDF_TOK_KEYWORD: + pdf_process_keyword(ctx, proc, csi, stm, buf->scratch); + pdf_clear_stack(ctx, csi); + break; + + default: + fz_throw(ctx, FZ_ERROR_SYNTAX, "syntax error in content stream"); + } + } + while (tok != PDF_TOK_EOF); + } + fz_always(ctx) + { + pdf_clear_stack(ctx, csi); + } + fz_catch(ctx) + { + int caught = fz_caught(ctx); + if (cookie) + { + if (caught == FZ_ERROR_TRYLATER) + { + fz_ignore_error(ctx); + cookie->incomplete++; + tok = PDF_TOK_EOF; + } + else if (caught == FZ_ERROR_ABORT) + { + fz_rethrow(ctx); + } + else if (caught == FZ_ERROR_SYNTAX) + { + fz_report_error(ctx); + cookie->errors++; + if (++syntax_errors >= MAX_SYNTAX_ERRORS) + { + fz_warn(ctx, "too many syntax errors; ignoring rest of page"); + tok = PDF_TOK_EOF; + } + } + else + { + fz_rethrow(ctx); + } + } + else + { + if (caught == FZ_ERROR_TRYLATER) + { + fz_ignore_error(ctx); + tok = PDF_TOK_EOF; + } + else if (caught == FZ_ERROR_ABORT) + { + fz_rethrow(ctx); + } + else if (caught == FZ_ERROR_SYNTAX) + { + fz_report_error(ctx); + if (++syntax_errors >= MAX_SYNTAX_ERRORS) + { + fz_warn(ctx, "too many syntax errors; ignoring rest of page"); + tok = PDF_TOK_EOF; + } + } + else + { + fz_rethrow(ctx); + } + } + + /* If we do catch an error, then reset ourselves to a base lexing state */ + in_text_array = 0; + } + } + while (tok != PDF_TOK_EOF); + + if (syntax_errors > 0) + fz_warn(ctx, "encountered syntax errors; page may not be correct"); +} + +void pdf_processor_push_resources(fz_context *ctx, pdf_processor *proc, pdf_obj *res) +{ + proc->push_resources(ctx, proc, res); +} + +pdf_obj *pdf_processor_pop_resources(fz_context *ctx, pdf_processor *proc) +{ + return proc->pop_resources(ctx, proc); +} + +void +pdf_process_raw_contents(fz_context *ctx, pdf_processor *proc, pdf_document *doc, pdf_obj *rdb, pdf_obj *stmobj, fz_cookie *cookie) +{ + pdf_csi csi; + pdf_lexbuf buf; + fz_stream *stm = NULL; + + if (!stmobj) + return; + + fz_var(stm); + + pdf_lexbuf_init(ctx, &buf, PDF_LEXBUF_SMALL); + pdf_init_csi(ctx, &csi, doc, rdb, &buf, cookie); + + fz_try(ctx) + { + fz_defer_reap_start(ctx); + stm = pdf_open_contents_stream(ctx, doc, stmobj); + pdf_process_stream(ctx, proc, &csi, stm); + pdf_process_end(ctx, proc, &csi); + } + fz_always(ctx) + { + fz_defer_reap_end(ctx); + fz_drop_stream(ctx, stm); + pdf_clear_stack(ctx, &csi); + pdf_lexbuf_fin(ctx, &buf); + } + fz_catch(ctx) + { + proc->close_processor = NULL; /* aborted run, don't warn about unclosed processor */ + fz_rethrow(ctx); + } +} + +void +pdf_process_contents(fz_context *ctx, pdf_processor *proc, pdf_document *doc, pdf_obj *rdb, pdf_obj *stmobj, fz_cookie *cookie, pdf_obj **out_res) +{ + pdf_processor_push_resources(ctx, proc, rdb); + fz_try(ctx) + pdf_process_raw_contents(ctx, proc, doc, rdb, stmobj, cookie); + fz_always(ctx) + { + pdf_obj *res = pdf_processor_pop_resources(ctx, proc); + if (out_res) + *out_res = res; + else + pdf_drop_obj(ctx, res); + } + fz_catch(ctx) + fz_rethrow(ctx); +} + +/* Bug 702543: It looks like certain types of annotation are never + * printed. */ +static int +pdf_should_print_annot(fz_context *ctx, pdf_annot *annot) +{ + enum pdf_annot_type type = pdf_annot_type(ctx, annot); + + /* We may need to add more types here. */ + if (type == PDF_ANNOT_FILE_ATTACHMENT) + return 0; + + return 1; +} + +void +pdf_process_annot(fz_context *ctx, pdf_processor *proc, pdf_annot *annot, fz_cookie *cookie) +{ + int flags = pdf_dict_get_int(ctx, annot->obj, PDF_NAME(F)); + fz_matrix matrix; + pdf_obj *ap; + + if (flags & (PDF_ANNOT_IS_INVISIBLE | PDF_ANNOT_IS_HIDDEN) || annot->hidden_editing) + return; + + /* popup annotations should never be drawn */ + if (pdf_annot_type(ctx, annot) == PDF_ANNOT_POPUP) + return; + + if (proc->usage) + { + if (!strcmp(proc->usage, "Print")) + { + if (!(flags & PDF_ANNOT_IS_PRINT)) + return; + if (!pdf_should_print_annot(ctx, annot)) + return; + } + if (!strcmp(proc->usage, "View") && (flags & PDF_ANNOT_IS_NO_VIEW)) + return; + } + + /* TODO: NoZoom and NoRotate */ + + /* XXX what resources, if any, to use for this check? */ + if (pdf_is_ocg_hidden(ctx, annot->page->doc, NULL, proc->usage, pdf_dict_get(ctx, annot->obj, PDF_NAME(OC)))) + return; + + ap = pdf_annot_ap(ctx, annot); + + if (!ap) + return; + + matrix = pdf_annot_transform(ctx, annot); + if (proc->op_q) + proc->op_q(ctx, proc); + if (proc->op_cm) + proc->op_cm(ctx, proc, + matrix.a, matrix.b, + matrix.c, matrix.d, + matrix.e, matrix.f); + if (proc->op_Do_form) + proc->op_Do_form(ctx, proc, NULL, ap); + if (proc->op_Q) + proc->op_Q(ctx, proc); +} + +void +pdf_process_glyph(fz_context *ctx, pdf_processor *proc, pdf_document *doc, pdf_obj *rdb, fz_buffer *contents) +{ + pdf_csi csi; + pdf_lexbuf buf; + fz_stream *stm = NULL; + + fz_var(stm); + + if (!contents) + return; + + pdf_lexbuf_init(ctx, &buf, PDF_LEXBUF_SMALL); + pdf_init_csi(ctx, &csi, doc, rdb, &buf, NULL); + + fz_try(ctx) + { + pdf_processor_push_resources(ctx, proc, rdb); + stm = fz_open_buffer(ctx, contents); + pdf_process_stream(ctx, proc, &csi, stm); + pdf_process_end(ctx, proc, &csi); + } + fz_always(ctx) + { + pdf_drop_obj(ctx, pdf_processor_pop_resources(ctx, proc)); + fz_drop_stream(ctx, stm); + pdf_clear_stack(ctx, &csi); + pdf_lexbuf_fin(ctx, &buf); + } + fz_catch(ctx) + { + /* Note: Any SYNTAX errors should have been swallowed + * by pdf_process_stream, but in case any escape from other + * functions, recast the error type here to be safe. */ + fz_morph_error(ctx, FZ_ERROR_SYNTAX, FZ_ERROR_FORMAT); + fz_rethrow(ctx); + } +} + +void +pdf_tos_save(fz_context *ctx, pdf_text_object_state *tos, fz_matrix save[2]) +{ + save[0] = tos->tm; + save[1] = tos->tlm; +} + +void +pdf_tos_restore(fz_context *ctx, pdf_text_object_state *tos, fz_matrix save[2]) +{ + tos->tm = save[0]; + tos->tlm = save[1]; +} + +fz_text * +pdf_tos_get_text(fz_context *ctx, pdf_text_object_state *tos) +{ + fz_text *text = tos->text; + + tos->text = NULL; + + return text; +} + +void +pdf_tos_reset(fz_context *ctx, pdf_text_object_state *tos, int render) +{ + tos->text = fz_new_text(ctx); + tos->text_mode = render; + tos->text_bbox = fz_empty_rect; +} + +int +pdf_tos_make_trm(fz_context *ctx, pdf_text_object_state *tos, pdf_text_state *text, pdf_font_desc *fontdesc, int cid, fz_matrix *trm, float *adv) +{ + fz_matrix tsm; + + tsm.a = text->size * text->scale; + tsm.b = 0; + tsm.c = 0; + tsm.d = text->size; + tsm.e = 0; + tsm.f = text->rise; + + if (fontdesc->wmode == 0) + { + pdf_hmtx h = pdf_lookup_hmtx(ctx, fontdesc, cid); + float w0 = *adv = h.w * 0.001f; + tos->char_tx = (w0 * text->size + text->char_space) * text->scale; + tos->char_ty = 0; + } + else + { + pdf_vmtx v = pdf_lookup_vmtx(ctx, fontdesc, cid); + float w1 = *adv = v.w * 0.001f; + tsm.e -= v.x * fabsf(text->size) * 0.001f; + tsm.f -= v.y * text->size * 0.001f; + tos->char_tx = 0; + tos->char_ty = w1 * text->size + text->char_space; + } + + *trm = fz_concat(tsm, tos->tm); + + tos->cid = cid; + tos->gid = pdf_font_cid_to_gid(ctx, fontdesc, cid); + tos->fontdesc = fontdesc; + + /* Compensate for the glyph cache limited positioning precision */ + tos->char_bbox = fz_expand_rect(fz_bound_glyph(ctx, fontdesc->font, tos->gid, *trm), 1); + + return tos->gid; +} + +void +pdf_tos_move_after_char(fz_context *ctx, pdf_text_object_state *tos) +{ + tos->text_bbox = fz_union_rect(tos->text_bbox, tos->char_bbox); + tos->tm = fz_pre_translate(tos->tm, tos->char_tx, tos->char_ty); +} + +void +pdf_tos_translate(pdf_text_object_state *tos, float tx, float ty) +{ + tos->tlm = fz_pre_translate(tos->tlm, tx, ty); + tos->tm = tos->tlm; +} + +void +pdf_tos_set_matrix(pdf_text_object_state *tos, float a, float b, float c, float d, float e, float f) +{ + tos->tm.a = a; + tos->tm.b = b; + tos->tm.c = c; + tos->tm.d = d; + tos->tm.e = e; + tos->tm.f = f; + tos->tlm = tos->tm; +} + +void +pdf_tos_newline(pdf_text_object_state *tos, float leading) +{ + tos->tlm = fz_pre_translate(tos->tlm, 0, -leading); + tos->tm = tos->tlm; +}
