Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/source/pdf/pdf-interpret.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 // Copyright (C) 2004-2025 Artifex Software, Inc. | |
| 2 // | |
| 3 // This file is part of MuPDF. | |
| 4 // | |
| 5 // MuPDF is free software: you can redistribute it and/or modify it under the | |
| 6 // terms of the GNU Affero General Public License as published by the Free | |
| 7 // Software Foundation, either version 3 of the License, or (at your option) | |
| 8 // any later version. | |
| 9 // | |
| 10 // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY | |
| 11 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | |
| 12 // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more | |
| 13 // details. | |
| 14 // | |
| 15 // You should have received a copy of the GNU Affero General Public License | |
| 16 // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html> | |
| 17 // | |
| 18 // Alternative licensing terms are available from the licensor. | |
| 19 // For commercial licensing, see <https://www.artifex.com/> or contact | |
| 20 // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, | |
| 21 // CA 94129, USA, for further information. | |
| 22 | |
| 23 #include "mupdf/fitz.h" | |
| 24 #include "pdf-annot-imp.h" | |
| 25 | |
| 26 #include <string.h> | |
| 27 #include <math.h> | |
| 28 | |
| 29 /* Maximum number of errors before aborting */ | |
| 30 #define MAX_SYNTAX_ERRORS 100 | |
| 31 | |
| 32 void * | |
| 33 pdf_new_processor(fz_context *ctx, int size) | |
| 34 { | |
| 35 pdf_processor *ret = Memento_label(fz_calloc(ctx, 1, size), "pdf_processor"); | |
| 36 ret->refs = 1; | |
| 37 return ret; | |
| 38 } | |
| 39 | |
| 40 pdf_processor * | |
| 41 pdf_keep_processor(fz_context *ctx, pdf_processor *proc) | |
| 42 { | |
| 43 return fz_keep_imp(ctx, proc, &proc->refs); | |
| 44 } | |
| 45 | |
| 46 void | |
| 47 pdf_close_processor(fz_context *ctx, pdf_processor *proc) | |
| 48 { | |
| 49 void (*close_processor)(fz_context *ctx, pdf_processor *proc); | |
| 50 | |
| 51 if (!proc || proc->closed) | |
| 52 return; | |
| 53 | |
| 54 proc->closed = 1; | |
| 55 close_processor = proc->close_processor; | |
| 56 if (!close_processor) | |
| 57 return; | |
| 58 | |
| 59 close_processor(ctx, proc); /* Tail recursion */ | |
| 60 } | |
| 61 | |
| 62 void | |
| 63 pdf_drop_processor(fz_context *ctx, pdf_processor *proc) | |
| 64 { | |
| 65 if (fz_drop_imp(ctx, proc, &proc->refs)) | |
| 66 { | |
| 67 if (!proc->closed) | |
| 68 fz_warn(ctx, "dropping unclosed PDF processor"); | |
| 69 if (proc->drop_processor) | |
| 70 proc->drop_processor(ctx, proc); | |
| 71 fz_free(ctx, proc); | |
| 72 } | |
| 73 } | |
| 74 | |
| 75 void pdf_reset_processor(fz_context *ctx, pdf_processor *proc) | |
| 76 { | |
| 77 if (proc == NULL) | |
| 78 return; | |
| 79 | |
| 80 proc->closed = 0; | |
| 81 | |
| 82 if (proc->reset_processor == NULL) | |
| 83 fz_throw(ctx, FZ_ERROR_ARGUMENT, "Cannot reset PDF processor"); | |
| 84 | |
| 85 proc->reset_processor(ctx, proc); | |
| 86 } | |
| 87 | |
| 88 static void | |
| 89 pdf_init_csi(fz_context *ctx, pdf_csi *csi, pdf_document *doc, pdf_obj *rdb, pdf_lexbuf *buf, fz_cookie *cookie) | |
| 90 { | |
| 91 memset(csi, 0, sizeof *csi); | |
| 92 csi->doc = doc; | |
| 93 csi->rdb = rdb; | |
| 94 csi->buf = buf; | |
| 95 csi->cookie = cookie; | |
| 96 } | |
| 97 | |
| 98 static void | |
| 99 pdf_clear_stack(fz_context *ctx, pdf_csi *csi) | |
| 100 { | |
| 101 int i; | |
| 102 | |
| 103 pdf_drop_obj(ctx, csi->obj); | |
| 104 csi->obj = NULL; | |
| 105 | |
| 106 csi->name[0] = 0; | |
| 107 csi->string_len = 0; | |
| 108 for (i = 0; i < csi->top; i++) | |
| 109 csi->stack[i] = 0; | |
| 110 | |
| 111 csi->top = 0; | |
| 112 } | |
| 113 | |
| 114 static pdf_font_desc * | |
| 115 pdf_try_load_font(fz_context *ctx, pdf_document *doc, pdf_obj *rdb, pdf_obj *font, fz_cookie *cookie) | |
| 116 { | |
| 117 pdf_font_desc *desc = NULL; | |
| 118 fz_try(ctx) | |
| 119 desc = pdf_load_font(ctx, doc, rdb, font); | |
| 120 fz_catch(ctx) | |
| 121 { | |
| 122 if (fz_caught(ctx) == FZ_ERROR_TRYLATER) | |
| 123 { | |
| 124 fz_ignore_error(ctx); | |
| 125 if (cookie) | |
| 126 cookie->incomplete++; | |
| 127 } | |
| 128 else | |
| 129 { | |
| 130 fz_rethrow_if(ctx, FZ_ERROR_SYSTEM); | |
| 131 fz_report_error(ctx); | |
| 132 } | |
| 133 } | |
| 134 if (desc == NULL) | |
| 135 desc = pdf_load_hail_mary_font(ctx, doc); | |
| 136 return desc; | |
| 137 } | |
| 138 | |
| 139 static fz_image * | |
| 140 parse_inline_image(fz_context *ctx, pdf_csi *csi, fz_stream *stm, char *csname, int cslen) | |
| 141 { | |
| 142 pdf_document *doc = csi->doc; | |
| 143 pdf_obj *rdb = csi->rdb; | |
| 144 pdf_obj *obj = NULL; | |
| 145 pdf_obj *cs; | |
| 146 fz_image *img = NULL; | |
| 147 int ch, found; | |
| 148 | |
| 149 fz_var(obj); | |
| 150 fz_var(img); | |
| 151 | |
| 152 fz_try(ctx) | |
| 153 { | |
| 154 obj = pdf_parse_dict(ctx, doc, stm, &doc->lexbuf.base); | |
| 155 | |
| 156 if (csname) | |
| 157 { | |
| 158 cs = pdf_dict_get(ctx, obj, PDF_NAME(CS)); | |
| 159 if (!pdf_is_indirect(ctx, cs) && pdf_is_name(ctx, cs)) | |
| 160 fz_strlcpy(csname, pdf_to_name(ctx, cs), cslen); | |
| 161 else | |
| 162 csname[0] = 0; | |
| 163 } | |
| 164 | |
| 165 /* read whitespace after ID keyword */ | |
| 166 ch = fz_read_byte(ctx, stm); | |
| 167 if (ch == '\r') | |
| 168 if (fz_peek_byte(ctx, stm) == '\n') | |
| 169 fz_read_byte(ctx, stm); | |
| 170 | |
| 171 img = pdf_load_inline_image(ctx, doc, rdb, obj, stm); | |
| 172 | |
| 173 /* find EI */ | |
| 174 found = 0; | |
| 175 ch = fz_read_byte(ctx, stm); | |
| 176 do | |
| 177 { | |
| 178 while (ch != 'E' && ch != EOF) | |
| 179 ch = fz_read_byte(ctx, stm); | |
| 180 if (ch == 'E') | |
| 181 { | |
| 182 ch = fz_read_byte(ctx, stm); | |
| 183 if (ch == 'I') | |
| 184 { | |
| 185 ch = fz_peek_byte(ctx, stm); | |
| 186 if (ch == ' ' || ch <= 32 || ch == '<' || ch == '/') | |
| 187 { | |
| 188 found = 1; | |
| 189 break; | |
| 190 } | |
| 191 } | |
| 192 } | |
| 193 } while (ch != EOF); | |
| 194 if (!found) | |
| 195 fz_throw(ctx, FZ_ERROR_SYNTAX, "syntax error after inline image"); | |
| 196 } | |
| 197 fz_always(ctx) | |
| 198 { | |
| 199 pdf_drop_obj(ctx, obj); | |
| 200 } | |
| 201 fz_catch(ctx) | |
| 202 { | |
| 203 fz_drop_image(ctx, img); | |
| 204 fz_rethrow(ctx); | |
| 205 } | |
| 206 | |
| 207 return img; | |
| 208 } | |
| 209 | |
| 210 static void | |
| 211 pdf_process_extgstate(fz_context *ctx, pdf_processor *proc, pdf_csi *csi, pdf_obj *dict) | |
| 212 { | |
| 213 pdf_obj *obj; | |
| 214 | |
| 215 obj = pdf_dict_get(ctx, dict, PDF_NAME(LW)); | |
| 216 if (pdf_is_number(ctx, obj) && proc->op_w) | |
| 217 proc->op_w(ctx, proc, pdf_to_real(ctx, obj)); | |
| 218 | |
| 219 obj = pdf_dict_get(ctx, dict, PDF_NAME(LC)); | |
| 220 if (pdf_is_int(ctx, obj) && proc->op_J) | |
| 221 proc->op_J(ctx, proc, fz_clampi(pdf_to_int(ctx, obj), 0, 2)); | |
| 222 | |
| 223 obj = pdf_dict_get(ctx, dict, PDF_NAME(LJ)); | |
| 224 if (pdf_is_int(ctx, obj) && proc->op_j) | |
| 225 proc->op_j(ctx, proc, fz_clampi(pdf_to_int(ctx, obj), 0, 2)); | |
| 226 | |
| 227 obj = pdf_dict_get(ctx, dict, PDF_NAME(ML)); | |
| 228 if (pdf_is_number(ctx, obj) && proc->op_M) | |
| 229 proc->op_M(ctx, proc, pdf_to_real(ctx, obj)); | |
| 230 | |
| 231 obj = pdf_dict_get(ctx, dict, PDF_NAME(D)); | |
| 232 if (pdf_is_array(ctx, obj) && proc->op_d) | |
| 233 { | |
| 234 pdf_obj *dash_array = pdf_array_get(ctx, obj, 0); | |
| 235 pdf_obj *dash_phase = pdf_array_get(ctx, obj, 1); | |
| 236 proc->op_d(ctx, proc, dash_array, pdf_to_real(ctx, dash_phase)); | |
| 237 } | |
| 238 | |
| 239 obj = pdf_dict_get(ctx, dict, PDF_NAME(RI)); | |
| 240 if (pdf_is_name(ctx, obj) && proc->op_ri) | |
| 241 proc->op_ri(ctx, proc, pdf_to_name(ctx, obj)); | |
| 242 | |
| 243 obj = pdf_dict_get(ctx, dict, PDF_NAME(FL)); | |
| 244 if (pdf_is_number(ctx, obj) && proc->op_i) | |
| 245 proc->op_i(ctx, proc, pdf_to_real(ctx, obj)); | |
| 246 | |
| 247 obj = pdf_dict_get(ctx, dict, PDF_NAME(Font)); | |
| 248 if (pdf_is_array(ctx, obj) && proc->op_Tf) | |
| 249 { | |
| 250 pdf_obj *font_ref = pdf_array_get(ctx, obj, 0); | |
| 251 pdf_obj *font_size = pdf_array_get(ctx, obj, 1); | |
| 252 pdf_font_desc *font; | |
| 253 if (pdf_is_dict(ctx, font_ref)) | |
| 254 font = pdf_try_load_font(ctx, csi->doc, csi->rdb, font_ref, csi->cookie); | |
| 255 else | |
| 256 font = pdf_load_hail_mary_font(ctx, csi->doc); | |
| 257 fz_try(ctx) | |
| 258 proc->op_Tf(ctx, proc, "ExtGState", font, pdf_to_real(ctx, font_size)); | |
| 259 fz_always(ctx) | |
| 260 pdf_drop_font(ctx, font); | |
| 261 fz_catch(ctx) | |
| 262 fz_rethrow(ctx); | |
| 263 } | |
| 264 | |
| 265 /* overprint and color management */ | |
| 266 | |
| 267 obj = pdf_dict_get(ctx, dict, PDF_NAME(OP)); | |
| 268 if (pdf_is_bool(ctx, obj) && proc->op_gs_OP) | |
| 269 proc->op_gs_OP(ctx, proc, pdf_to_bool(ctx, obj)); | |
| 270 | |
| 271 obj = pdf_dict_get(ctx, dict, PDF_NAME(op)); | |
| 272 if (pdf_is_bool(ctx, obj) && proc->op_gs_op) | |
| 273 proc->op_gs_op(ctx, proc, pdf_to_bool(ctx, obj)); | |
| 274 | |
| 275 obj = pdf_dict_get(ctx, dict, PDF_NAME(OPM)); | |
| 276 if (pdf_is_int(ctx, obj) && proc->op_gs_OPM) | |
| 277 proc->op_gs_OPM(ctx, proc, pdf_to_int(ctx, obj)); | |
| 278 | |
| 279 obj = pdf_dict_get(ctx, dict, PDF_NAME(UseBlackPtComp)); | |
| 280 if (pdf_is_name(ctx, obj) && proc->op_gs_UseBlackPtComp) | |
| 281 proc->op_gs_UseBlackPtComp(ctx, proc, obj); | |
| 282 | |
| 283 /* transfer functions */ | |
| 284 | |
| 285 obj = pdf_dict_get(ctx, dict, PDF_NAME(TR2)); | |
| 286 if (pdf_is_name(ctx, obj)) | |
| 287 if (!pdf_name_eq(ctx, obj, PDF_NAME(Identity)) && !pdf_name_eq(ctx, obj, PDF_NAME(Default))) | |
| 288 fz_warn(ctx, "ignoring transfer function"); | |
| 289 if (!obj) /* TR is ignored in the presence of TR2 */ | |
| 290 { | |
| 291 pdf_obj *tr = pdf_dict_get(ctx, dict, PDF_NAME(TR)); | |
| 292 if (pdf_is_name(ctx, tr)) | |
| 293 if (!pdf_name_eq(ctx, tr, PDF_NAME(Identity))) | |
| 294 fz_warn(ctx, "ignoring transfer function"); | |
| 295 } | |
| 296 | |
| 297 /* transparency state */ | |
| 298 | |
| 299 obj = pdf_dict_get(ctx, dict, PDF_NAME(CA)); | |
| 300 if (pdf_is_number(ctx, obj) && proc->op_gs_CA) | |
| 301 proc->op_gs_CA(ctx, proc, pdf_to_real(ctx, obj)); | |
| 302 | |
| 303 obj = pdf_dict_get(ctx, dict, PDF_NAME(ca)); | |
| 304 if (pdf_is_number(ctx, obj) && proc->op_gs_ca) | |
| 305 proc->op_gs_ca(ctx, proc, pdf_to_real(ctx, obj)); | |
| 306 | |
| 307 obj = pdf_dict_get(ctx, dict, PDF_NAME(BM)); | |
| 308 if (pdf_is_array(ctx, obj)) | |
| 309 obj = pdf_array_get(ctx, obj, 0); | |
| 310 if (pdf_is_name(ctx, obj) && proc->op_gs_BM) | |
| 311 proc->op_gs_BM(ctx, proc, pdf_to_name(ctx, obj)); | |
| 312 | |
| 313 obj = pdf_dict_get(ctx, dict, PDF_NAME(SMask)); | |
| 314 if (proc->op_gs_SMask) | |
| 315 { | |
| 316 if (pdf_is_dict(ctx, obj)) | |
| 317 { | |
| 318 pdf_obj *xobj, *s, *bc, *tr; | |
| 319 float softmask_bc[FZ_MAX_COLORS]; | |
| 320 fz_colorspace *softmask_cs; | |
| 321 int colorspace_n = 1; | |
| 322 int k, luminosity; | |
| 323 | |
| 324 xobj = pdf_dict_get(ctx, obj, PDF_NAME(G)); | |
| 325 | |
| 326 softmask_cs = pdf_xobject_colorspace(ctx, xobj); | |
| 327 fz_try(ctx) | |
| 328 { | |
| 329 if (softmask_cs) | |
| 330 colorspace_n = fz_colorspace_n(ctx, softmask_cs); | |
| 331 | |
| 332 /* Default background color is black. */ | |
| 333 for (k = 0; k < colorspace_n; k++) | |
| 334 softmask_bc[k] = 0; | |
| 335 /* Which in CMYK means not all zeros! This should really be | |
| 336 * a test for subtractive color spaces, but this will have | |
| 337 * to do for now. */ | |
| 338 if (fz_colorspace_is_cmyk(ctx, softmask_cs)) | |
| 339 { | |
| 340 /* Default background color is black. */ | |
| 341 for (k = 0; k < colorspace_n; k++) | |
| 342 softmask_bc[k] = 0; | |
| 343 /* Which in CMYK means not all zeros! This should really be | |
| 344 * a test for subtractive color spaces, but this will have | |
| 345 * to do for now. */ | |
| 346 if (fz_colorspace_is_cmyk(ctx, softmask_cs)) | |
| 347 softmask_bc[3] = 1.0f; | |
| 348 } | |
| 349 | |
| 350 bc = pdf_dict_get(ctx, obj, PDF_NAME(BC)); | |
| 351 if (pdf_is_array(ctx, bc)) | |
| 352 { | |
| 353 for (k = 0; k < colorspace_n; k++) | |
| 354 softmask_bc[k] = pdf_array_get_real(ctx, bc, k); | |
| 355 } | |
| 356 | |
| 357 s = pdf_dict_get(ctx, obj, PDF_NAME(S)); | |
| 358 if (pdf_name_eq(ctx, s, PDF_NAME(Luminosity))) | |
| 359 luminosity = 1; | |
| 360 else | |
| 361 luminosity = 0; | |
| 362 | |
| 363 tr = pdf_dict_get(ctx, obj, PDF_NAME(TR)); | |
| 364 if (tr && pdf_name_eq(ctx, tr, PDF_NAME(Identity))) | |
| 365 tr = NULL; | |
| 366 | |
| 367 proc->op_gs_SMask(ctx, proc, xobj, softmask_cs, softmask_bc, luminosity, tr); | |
| 368 } | |
| 369 fz_always(ctx) | |
| 370 fz_drop_colorspace(ctx, softmask_cs); | |
| 371 fz_catch(ctx) | |
| 372 fz_rethrow(ctx); | |
| 373 } | |
| 374 else if (pdf_is_name(ctx, obj) && pdf_name_eq(ctx, obj, PDF_NAME(None))) | |
| 375 { | |
| 376 proc->op_gs_SMask(ctx, proc, NULL, NULL, NULL, 0, NULL); | |
| 377 } | |
| 378 } | |
| 379 } | |
| 380 | |
| 381 static void | |
| 382 pdf_process_Do(fz_context *ctx, pdf_processor *proc, pdf_csi *csi) | |
| 383 { | |
| 384 pdf_obj *xres, *xobj, *subtype; | |
| 385 | |
| 386 xres = pdf_dict_get(ctx, csi->rdb, PDF_NAME(XObject)); | |
| 387 xobj = pdf_dict_gets(ctx, xres, csi->name); | |
| 388 if (!xobj) | |
| 389 fz_throw(ctx, FZ_ERROR_SYNTAX, "cannot find XObject resource '%s'", csi->name); | |
| 390 subtype = pdf_dict_get(ctx, xobj, PDF_NAME(Subtype)); | |
| 391 if (pdf_name_eq(ctx, subtype, PDF_NAME(Form))) | |
| 392 { | |
| 393 pdf_obj *st = pdf_dict_get(ctx, xobj, PDF_NAME(Subtype2)); | |
| 394 if (st) | |
| 395 subtype = st; | |
| 396 } | |
| 397 if (!pdf_is_name(ctx, subtype)) | |
| 398 fz_throw(ctx, FZ_ERROR_SYNTAX, "no XObject subtype specified"); | |
| 399 | |
| 400 if (pdf_is_ocg_hidden(ctx, csi->doc, csi->rdb, proc->usage, pdf_dict_get(ctx, xobj, PDF_NAME(OC)))) | |
| 401 return; | |
| 402 | |
| 403 if (pdf_name_eq(ctx, subtype, PDF_NAME(Form))) | |
| 404 { | |
| 405 if (proc->op_Do_form) | |
| 406 proc->op_Do_form(ctx, proc, csi->name, xobj); | |
| 407 } | |
| 408 | |
| 409 else if (pdf_name_eq(ctx, subtype, PDF_NAME(Image))) | |
| 410 { | |
| 411 if (proc->op_Do_image) | |
| 412 { | |
| 413 fz_image *image = NULL; | |
| 414 | |
| 415 if (proc->requirements && PDF_PROCESSOR_REQUIRES_DECODED_IMAGES) | |
| 416 image = pdf_load_image(ctx, csi->doc, xobj); | |
| 417 fz_try(ctx) | |
| 418 proc->op_Do_image(ctx, proc, csi->name, image); | |
| 419 fz_always(ctx) | |
| 420 fz_drop_image(ctx, image); | |
| 421 fz_catch(ctx) | |
| 422 fz_rethrow(ctx); | |
| 423 } | |
| 424 } | |
| 425 | |
| 426 else if (!strcmp(pdf_to_name(ctx, subtype), "PS")) | |
| 427 fz_warn(ctx, "ignoring XObject with subtype PS"); | |
| 428 else | |
| 429 fz_warn(ctx, "ignoring XObject with unknown subtype: '%s'", pdf_to_name(ctx, subtype)); | |
| 430 } | |
| 431 | |
| 432 static void | |
| 433 pdf_process_CS(fz_context *ctx, pdf_processor *proc, pdf_csi *csi, int stroke) | |
| 434 { | |
| 435 fz_colorspace *cs; | |
| 436 | |
| 437 if (!proc->op_CS || !proc->op_cs) | |
| 438 return; | |
| 439 | |
| 440 if (!strcmp(csi->name, "Pattern")) | |
| 441 { | |
| 442 if (stroke) | |
| 443 proc->op_CS(ctx, proc, "Pattern", NULL); | |
| 444 else | |
| 445 proc->op_cs(ctx, proc, "Pattern", NULL); | |
| 446 return; | |
| 447 } | |
| 448 | |
| 449 if (!strcmp(csi->name, "DeviceGray")) | |
| 450 cs = fz_keep_colorspace(ctx, fz_device_gray(ctx)); | |
| 451 else if (!strcmp(csi->name, "DeviceRGB")) | |
| 452 cs = fz_keep_colorspace(ctx, fz_device_rgb(ctx)); | |
| 453 else if (!strcmp(csi->name, "DeviceCMYK")) | |
| 454 cs = fz_keep_colorspace(ctx, fz_device_cmyk(ctx)); | |
| 455 else | |
| 456 { | |
| 457 pdf_obj *csres, *csobj; | |
| 458 csres = pdf_dict_get(ctx, csi->rdb, PDF_NAME(ColorSpace)); | |
| 459 csobj = pdf_dict_gets(ctx, csres, csi->name); | |
| 460 if (!csobj) | |
| 461 fz_throw(ctx, FZ_ERROR_SYNTAX, "cannot find ColorSpace resource '%s'", csi->name); | |
| 462 if (pdf_is_array(ctx, csobj) && pdf_array_len(ctx, csobj) == 1 && pdf_name_eq(ctx, pdf_array_get(ctx, csobj, 0), PDF_NAME(Pattern))) | |
| 463 { | |
| 464 if (stroke) | |
| 465 proc->op_CS(ctx, proc, "Pattern", NULL); | |
| 466 else | |
| 467 proc->op_cs(ctx, proc, "Pattern", NULL); | |
| 468 return; | |
| 469 } | |
| 470 cs = pdf_load_colorspace(ctx, csobj); | |
| 471 } | |
| 472 | |
| 473 fz_try(ctx) | |
| 474 { | |
| 475 if (stroke) | |
| 476 proc->op_CS(ctx, proc, csi->name, cs); | |
| 477 else | |
| 478 proc->op_cs(ctx, proc, csi->name, cs); | |
| 479 } | |
| 480 fz_always(ctx) | |
| 481 fz_drop_colorspace(ctx, cs); | |
| 482 fz_catch(ctx) | |
| 483 fz_rethrow(ctx); | |
| 484 } | |
| 485 | |
| 486 static void | |
| 487 pdf_process_SC(fz_context *ctx, pdf_processor *proc, pdf_csi *csi, int stroke) | |
| 488 { | |
| 489 if (csi->name[0]) | |
| 490 { | |
| 491 pdf_obj *patres, *patobj; | |
| 492 int type; | |
| 493 | |
| 494 patres = pdf_dict_get(ctx, csi->rdb, PDF_NAME(Pattern)); | |
| 495 patobj = pdf_dict_gets(ctx, patres, csi->name); | |
| 496 if (!patobj) | |
| 497 fz_throw(ctx, FZ_ERROR_SYNTAX, "cannot find Pattern resource '%s'", csi->name); | |
| 498 | |
| 499 type = pdf_dict_get_int(ctx, patobj, PDF_NAME(PatternType)); | |
| 500 | |
| 501 if (type == 1) | |
| 502 { | |
| 503 if (proc->op_SC_pattern && proc->op_sc_pattern) | |
| 504 { | |
| 505 pdf_pattern *pat = pdf_load_pattern(ctx, csi->doc, patobj); | |
| 506 fz_try(ctx) | |
| 507 { | |
| 508 if (stroke) | |
| 509 proc->op_SC_pattern(ctx, proc, csi->name, pat, csi->top, csi->stack); | |
| 510 else | |
| 511 proc->op_sc_pattern(ctx, proc, csi->name, pat, csi->top, csi->stack); | |
| 512 } | |
| 513 fz_always(ctx) | |
| 514 pdf_drop_pattern(ctx, pat); | |
| 515 fz_catch(ctx) | |
| 516 fz_rethrow(ctx); | |
| 517 } | |
| 518 } | |
| 519 | |
| 520 else if (type == 2) | |
| 521 { | |
| 522 if (proc->op_SC_shade && proc->op_sc_shade) | |
| 523 { | |
| 524 fz_shade *shade = pdf_load_shading(ctx, csi->doc, patobj); | |
| 525 fz_try(ctx) | |
| 526 { | |
| 527 if (stroke) | |
| 528 proc->op_SC_shade(ctx, proc, csi->name, shade); | |
| 529 else | |
| 530 proc->op_sc_shade(ctx, proc, csi->name, shade); | |
| 531 } | |
| 532 fz_always(ctx) | |
| 533 fz_drop_shade(ctx, shade); | |
| 534 fz_catch(ctx) | |
| 535 fz_rethrow(ctx); | |
| 536 } | |
| 537 } | |
| 538 | |
| 539 else | |
| 540 { | |
| 541 fz_throw(ctx, FZ_ERROR_SYNTAX, "unknown pattern type: %d", type); | |
| 542 } | |
| 543 } | |
| 544 | |
| 545 else | |
| 546 { | |
| 547 if (proc->op_SC_color && proc->op_sc_color) | |
| 548 { | |
| 549 if (stroke) | |
| 550 proc->op_SC_color(ctx, proc, csi->top, csi->stack); | |
| 551 else | |
| 552 proc->op_sc_color(ctx, proc, csi->top, csi->stack); | |
| 553 } | |
| 554 } | |
| 555 } | |
| 556 | |
| 557 static pdf_obj * | |
| 558 resolve_properties(fz_context *ctx, pdf_csi *csi, pdf_obj *obj) | |
| 559 { | |
| 560 if (pdf_is_name(ctx, obj)) | |
| 561 return pdf_dict_get(ctx, pdf_dict_get(ctx, csi->rdb, PDF_NAME(Properties)), obj); | |
| 562 else | |
| 563 return obj; | |
| 564 } | |
| 565 | |
| 566 static void | |
| 567 pdf_process_BDC(fz_context *ctx, pdf_processor *proc, pdf_csi *csi) | |
| 568 { | |
| 569 if (proc->op_BDC) | |
| 570 proc->op_BDC(ctx, proc, csi->name, csi->obj, resolve_properties(ctx, csi, csi->obj)); | |
| 571 | |
| 572 /* Already hidden, no need to look further */ | |
| 573 if (proc->hidden > 0) | |
| 574 { | |
| 575 ++proc->hidden; | |
| 576 return; | |
| 577 } | |
| 578 | |
| 579 /* We only look at OC groups here */ | |
| 580 if (strcmp(csi->name, "OC")) | |
| 581 return; | |
| 582 | |
| 583 if (pdf_is_ocg_hidden(ctx, csi->doc, csi->rdb, proc->usage, csi->obj)) | |
| 584 ++proc->hidden; | |
| 585 } | |
| 586 | |
| 587 static void | |
| 588 pdf_process_BMC(fz_context *ctx, pdf_processor *proc, pdf_csi *csi, const char *name) | |
| 589 { | |
| 590 if (proc->op_BMC) | |
| 591 proc->op_BMC(ctx, proc, name); | |
| 592 if (proc->hidden > 0) | |
| 593 ++proc->hidden; | |
| 594 } | |
| 595 | |
| 596 static void | |
| 597 pdf_process_EMC(fz_context *ctx, pdf_processor *proc, pdf_csi *csi) | |
| 598 { | |
| 599 if (proc->op_EMC) | |
| 600 proc->op_EMC(ctx, proc); | |
| 601 if (proc->hidden > 0) | |
| 602 --proc->hidden; | |
| 603 } | |
| 604 | |
| 605 static void | |
| 606 pdf_process_gsave(fz_context *ctx, pdf_processor *proc, pdf_csi *csi) | |
| 607 { | |
| 608 ++csi->gstate; | |
| 609 if (proc->op_q) | |
| 610 proc->op_q(ctx, proc); | |
| 611 } | |
| 612 | |
| 613 static void | |
| 614 pdf_process_grestore(fz_context *ctx, pdf_processor *proc, pdf_csi *csi) | |
| 615 { | |
| 616 --csi->gstate; | |
| 617 if (proc->op_Q) | |
| 618 proc->op_Q(ctx, proc); | |
| 619 } | |
| 620 | |
| 621 static void | |
| 622 pdf_process_end(fz_context *ctx, pdf_processor *proc, pdf_csi *csi) | |
| 623 { | |
| 624 if (proc->op_EOD) | |
| 625 proc->op_EOD(ctx, proc); | |
| 626 while (csi->gstate > 0) | |
| 627 pdf_process_grestore(ctx, proc, csi); | |
| 628 if (proc->op_END) | |
| 629 proc->op_END(ctx, proc); | |
| 630 } | |
| 631 | |
| 632 static int is_known_bad_word(const char *word) | |
| 633 { | |
| 634 switch (*word) | |
| 635 { | |
| 636 case 'I': return !strcmp(word, "Infinity"); | |
| 637 case 'N': return !strcmp(word, "NaN"); | |
| 638 case 'i': return !strcmp(word, "inf"); | |
| 639 case 'n': return !strcmp(word, "nan"); | |
| 640 } | |
| 641 return 0; | |
| 642 } | |
| 643 | |
| 644 #define A(a) (a) | |
| 645 #define B(a,b) (a | b << 8) | |
| 646 #define C(a,b,c) (a | b << 8 | c << 16) | |
| 647 | |
| 648 static void | |
| 649 pdf_process_keyword(fz_context *ctx, pdf_processor *proc, pdf_csi *csi, fz_stream *stm, char *word) | |
| 650 { | |
| 651 float *s = csi->stack; | |
| 652 char csname[40]; | |
| 653 int key; | |
| 654 | |
| 655 key = word[0]; | |
| 656 if (word[1]) | |
| 657 { | |
| 658 key |= word[1] << 8; | |
| 659 if (word[2]) | |
| 660 { | |
| 661 key |= word[2] << 16; | |
| 662 if (word[3]) | |
| 663 key = 0; | |
| 664 } | |
| 665 } | |
| 666 | |
| 667 switch (key) | |
| 668 { | |
| 669 default: | |
| 670 if (!csi->xbalance) | |
| 671 { | |
| 672 if (is_known_bad_word(word)) | |
| 673 fz_warn(ctx, "unknown keyword: '%s'", word); | |
| 674 else | |
| 675 fz_throw(ctx, FZ_ERROR_SYNTAX, "unknown keyword: '%s'", word); | |
| 676 } | |
| 677 break; | |
| 678 | |
| 679 /* general graphics state */ | |
| 680 case A('w'): if (proc->op_w) proc->op_w(ctx, proc, s[0]); break; | |
| 681 case A('j'): if (proc->op_j) proc->op_j(ctx, proc, fz_clampi(s[0], 0, 2)); break; | |
| 682 case A('J'): if (proc->op_J) proc->op_J(ctx, proc, fz_clampi(s[0], 0, 2)); break; | |
| 683 case A('M'): if (proc->op_M) proc->op_M(ctx, proc, s[0]); break; | |
| 684 case A('d'): if (proc->op_d) proc->op_d(ctx, proc, csi->obj, s[0]); break; | |
| 685 case B('r','i'): if (proc->op_ri) proc->op_ri(ctx, proc, csi->name); break; | |
| 686 case A('i'): if (proc->op_i) proc->op_i(ctx, proc, s[0]); break; | |
| 687 | |
| 688 case B('g','s'): | |
| 689 { | |
| 690 pdf_obj *gsres, *gsobj; | |
| 691 gsres = pdf_dict_get(ctx, csi->rdb, PDF_NAME(ExtGState)); | |
| 692 gsobj = pdf_dict_gets(ctx, gsres, csi->name); | |
| 693 if (!gsobj) | |
| 694 fz_throw(ctx, FZ_ERROR_SYNTAX, "cannot find ExtGState resource '%s'", csi->name); | |
| 695 if (proc->op_gs_begin) | |
| 696 proc->op_gs_begin(ctx, proc, csi->name, gsobj); | |
| 697 pdf_process_extgstate(ctx, proc, csi, gsobj); | |
| 698 if (proc->op_gs_end) | |
| 699 proc->op_gs_end(ctx, proc); | |
| 700 } | |
| 701 break; | |
| 702 | |
| 703 /* special graphics state */ | |
| 704 case A('q'): pdf_process_gsave(ctx, proc, csi); break; | |
| 705 case A('Q'): pdf_process_grestore(ctx, proc, csi); break; | |
| 706 case B('c','m'): if (proc->op_cm) proc->op_cm(ctx, proc, s[0], s[1], s[2], s[3], s[4], s[5]); break; | |
| 707 | |
| 708 /* path construction */ | |
| 709 case A('m'): if (proc->op_m) proc->op_m(ctx, proc, s[0], s[1]); break; | |
| 710 case A('l'): if (proc->op_l) proc->op_l(ctx, proc, s[0], s[1]); break; | |
| 711 case A('c'): if (proc->op_c) proc->op_c(ctx, proc, s[0], s[1], s[2], s[3], s[4], s[5]); break; | |
| 712 case A('v'): if (proc->op_v) proc->op_v(ctx, proc, s[0], s[1], s[2], s[3]); break; | |
| 713 case A('y'): if (proc->op_y) proc->op_y(ctx, proc, s[0], s[1], s[2], s[3]); break; | |
| 714 case A('h'): if (proc->op_h) proc->op_h(ctx, proc); break; | |
| 715 case B('r','e'): if (proc->op_re) proc->op_re(ctx, proc, s[0], s[1], s[2], s[3]); break; | |
| 716 | |
| 717 /* path painting */ | |
| 718 case A('S'): if (proc->op_S) proc->op_S(ctx, proc); break; | |
| 719 case A('s'): if (proc->op_s) proc->op_s(ctx, proc); break; | |
| 720 case A('F'): if (proc->op_F) proc->op_F(ctx, proc); break; | |
| 721 case A('f'): if (proc->op_f) proc->op_f(ctx, proc); break; | |
| 722 case B('f','*'): if (proc->op_fstar) proc->op_fstar(ctx, proc); break; | |
| 723 case A('B'): if (proc->op_B) proc->op_B(ctx, proc); break; | |
| 724 case B('B','*'): if (proc->op_Bstar) proc->op_Bstar(ctx, proc); break; | |
| 725 case A('b'): if (proc->op_b) proc->op_b(ctx, proc); break; | |
| 726 case B('b','*'): if (proc->op_bstar) proc->op_bstar(ctx, proc); break; | |
| 727 case A('n'): if (proc->op_n) proc->op_n(ctx, proc); break; | |
| 728 | |
| 729 /* path clipping */ | |
| 730 case A('W'): if (proc->op_W) proc->op_W(ctx, proc); break; | |
| 731 case B('W','*'): if (proc->op_Wstar) proc->op_Wstar(ctx, proc); break; | |
| 732 | |
| 733 /* text objects */ | |
| 734 case B('B','T'): csi->in_text = 1; if (proc->op_BT) proc->op_BT(ctx, proc); break; | |
| 735 case B('E','T'): csi->in_text = 0; if (proc->op_ET) proc->op_ET(ctx, proc); break; | |
| 736 | |
| 737 /* text state */ | |
| 738 case B('T','c'): if (proc->op_Tc) proc->op_Tc(ctx, proc, s[0]); break; | |
| 739 case B('T','w'): if (proc->op_Tw) proc->op_Tw(ctx, proc, s[0]); break; | |
| 740 case B('T','z'): if (proc->op_Tz) proc->op_Tz(ctx, proc, s[0]); break; | |
| 741 case B('T','L'): if (proc->op_TL) proc->op_TL(ctx, proc, s[0]); break; | |
| 742 case B('T','r'): if (proc->op_Tr) proc->op_Tr(ctx, proc, s[0]); break; | |
| 743 case B('T','s'): if (proc->op_Ts) proc->op_Ts(ctx, proc, s[0]); break; | |
| 744 | |
| 745 case B('T','f'): | |
| 746 if (proc->op_Tf) | |
| 747 { | |
| 748 pdf_obj *fontres, *fontobj; | |
| 749 pdf_font_desc *font; | |
| 750 fontres = pdf_dict_get(ctx, csi->rdb, PDF_NAME(Font)); | |
| 751 fontobj = pdf_dict_gets(ctx, fontres, csi->name); | |
| 752 if (pdf_is_dict(ctx, fontobj)) | |
| 753 font = pdf_try_load_font(ctx, csi->doc, csi->rdb, fontobj, csi->cookie); | |
| 754 else | |
| 755 font = pdf_load_hail_mary_font(ctx, csi->doc); | |
| 756 fz_try(ctx) | |
| 757 proc->op_Tf(ctx, proc, csi->name, font, s[0]); | |
| 758 fz_always(ctx) | |
| 759 pdf_drop_font(ctx, font); | |
| 760 fz_catch(ctx) | |
| 761 fz_rethrow(ctx); | |
| 762 } | |
| 763 break; | |
| 764 | |
| 765 /* text positioning */ | |
| 766 case B('T','d'): if (proc->op_Td) proc->op_Td(ctx, proc, s[0], s[1]); break; | |
| 767 case B('T','D'): if (proc->op_TD) proc->op_TD(ctx, proc, s[0], s[1]); break; | |
| 768 case B('T','m'): if (proc->op_Tm) proc->op_Tm(ctx, proc, s[0], s[1], s[2], s[3], s[4], s[5]); break; | |
| 769 case B('T','*'): if (proc->op_Tstar) proc->op_Tstar(ctx, proc); break; | |
| 770 | |
| 771 /* text showing */ | |
| 772 case B('T','J'): if (proc->op_TJ) proc->op_TJ(ctx, proc, csi->obj); break; | |
| 773 case B('T','j'): | |
| 774 if (proc->op_Tj) | |
| 775 { | |
| 776 if (csi->string_len > 0) | |
| 777 proc->op_Tj(ctx, proc, csi->string, csi->string_len); | |
| 778 else | |
| 779 proc->op_Tj(ctx, proc, pdf_to_str_buf(ctx, csi->obj), pdf_to_str_len(ctx, csi->obj)); | |
| 780 } | |
| 781 break; | |
| 782 case A('\''): | |
| 783 if (proc->op_squote) | |
| 784 { | |
| 785 if (csi->string_len > 0) | |
| 786 proc->op_squote(ctx, proc, csi->string, csi->string_len); | |
| 787 else | |
| 788 proc->op_squote(ctx, proc, pdf_to_str_buf(ctx, csi->obj), pdf_to_str_len(ctx, csi->obj)); | |
| 789 } | |
| 790 break; | |
| 791 case A('"'): | |
| 792 if (proc->op_dquote) | |
| 793 { | |
| 794 if (csi->string_len > 0) | |
| 795 proc->op_dquote(ctx, proc, s[0], s[1], csi->string, csi->string_len); | |
| 796 else | |
| 797 proc->op_dquote(ctx, proc, s[0], s[1], pdf_to_str_buf(ctx, csi->obj), pdf_to_str_len(ctx, csi->obj)); | |
| 798 } | |
| 799 break; | |
| 800 | |
| 801 /* type 3 fonts */ | |
| 802 case B('d','0'): if (proc->op_d0) proc->op_d0(ctx, proc, s[0], s[1]); break; | |
| 803 case B('d','1'): if (proc->op_d1) proc->op_d1(ctx, proc, s[0], s[1], s[2], s[3], s[4], s[5]); break; | |
| 804 | |
| 805 /* color */ | |
| 806 case B('C','S'): pdf_process_CS(ctx, proc, csi, 1); break; | |
| 807 case B('c','s'): pdf_process_CS(ctx, proc, csi, 0); break; | |
| 808 case B('S','C'): pdf_process_SC(ctx, proc, csi, 1); break; | |
| 809 case B('s','c'): pdf_process_SC(ctx, proc, csi, 0); break; | |
| 810 case C('S','C','N'): pdf_process_SC(ctx, proc, csi, 1); break; | |
| 811 case C('s','c','n'): pdf_process_SC(ctx, proc, csi, 0); break; | |
| 812 | |
| 813 case A('G'): if (proc->op_G) proc->op_G(ctx, proc, s[0]); break; | |
| 814 case A('g'): if (proc->op_g) proc->op_g(ctx, proc, s[0]); break; | |
| 815 case B('R','G'): if (proc->op_RG) proc->op_RG(ctx, proc, s[0], s[1], s[2]); break; | |
| 816 case B('r','g'): if (proc->op_rg) proc->op_rg(ctx, proc, s[0], s[1], s[2]); break; | |
| 817 case A('K'): if (proc->op_K) proc->op_K(ctx, proc, s[0], s[1], s[2], s[3]); break; | |
| 818 case A('k'): if (proc->op_k) proc->op_k(ctx, proc, s[0], s[1], s[2], s[3]); break; | |
| 819 | |
| 820 /* shadings, images, xobjects */ | |
| 821 case B('B','I'): | |
| 822 { | |
| 823 fz_image *img = parse_inline_image(ctx, csi, stm, csname, sizeof csname); | |
| 824 fz_try(ctx) | |
| 825 { | |
| 826 if (proc->op_BI) | |
| 827 proc->op_BI(ctx, proc, img, csname[0] ? csname : NULL); | |
| 828 } | |
| 829 fz_always(ctx) | |
| 830 fz_drop_image(ctx, img); | |
| 831 fz_catch(ctx) | |
| 832 fz_rethrow(ctx); | |
| 833 } | |
| 834 break; | |
| 835 | |
| 836 case B('s','h'): | |
| 837 if (proc->op_sh) | |
| 838 { | |
| 839 pdf_obj *shaderes, *shadeobj; | |
| 840 fz_shade *shade; | |
| 841 shaderes = pdf_dict_get(ctx, csi->rdb, PDF_NAME(Shading)); | |
| 842 shadeobj = pdf_dict_gets(ctx, shaderes, csi->name); | |
| 843 if (!shadeobj) | |
| 844 fz_throw(ctx, FZ_ERROR_SYNTAX, "cannot find Shading resource '%s'", csi->name); | |
| 845 shade = pdf_load_shading(ctx, csi->doc, shadeobj); | |
| 846 fz_try(ctx) | |
| 847 proc->op_sh(ctx, proc, csi->name, shade); | |
| 848 fz_always(ctx) | |
| 849 fz_drop_shade(ctx, shade); | |
| 850 fz_catch(ctx) | |
| 851 fz_rethrow(ctx); | |
| 852 } | |
| 853 break; | |
| 854 | |
| 855 case B('D','o'): pdf_process_Do(ctx, proc, csi); break; | |
| 856 | |
| 857 /* marked content */ | |
| 858 case B('M','P'): if (proc->op_MP) proc->op_MP(ctx, proc, csi->name); break; | |
| 859 case B('D','P'): if (proc->op_DP) proc->op_DP(ctx, proc, csi->name, csi->obj, resolve_properties(ctx, csi, csi->obj)); break; | |
| 860 case C('B','M','C'): pdf_process_BMC(ctx, proc, csi, csi->name); break; | |
| 861 case C('B','D','C'): pdf_process_BDC(ctx, proc, csi); break; | |
| 862 case C('E','M','C'): pdf_process_EMC(ctx, proc, csi); break; | |
| 863 | |
| 864 /* compatibility */ | |
| 865 case B('B','X'): ++csi->xbalance; if (proc->op_BX) proc->op_BX(ctx, proc); break; | |
| 866 case B('E','X'): --csi->xbalance; if (proc->op_EX) proc->op_EX(ctx, proc); break; | |
| 867 } | |
| 868 } | |
| 869 | |
| 870 static void | |
| 871 pdf_process_stream(fz_context *ctx, pdf_processor *proc, pdf_csi *csi, fz_stream *stm) | |
| 872 { | |
| 873 pdf_document *doc = csi->doc; | |
| 874 pdf_lexbuf *buf = csi->buf; | |
| 875 fz_cookie *cookie = csi->cookie; | |
| 876 | |
| 877 pdf_token tok = PDF_TOK_ERROR; | |
| 878 int in_text_array = 0; | |
| 879 int syntax_errors = 0; | |
| 880 | |
| 881 /* make sure we have a clean slate if we come here from flush_text */ | |
| 882 pdf_clear_stack(ctx, csi); | |
| 883 | |
| 884 fz_var(in_text_array); | |
| 885 fz_var(tok); | |
| 886 | |
| 887 if (cookie) | |
| 888 { | |
| 889 cookie->progress_max = (size_t)-1; | |
| 890 cookie->progress = 0; | |
| 891 } | |
| 892 | |
| 893 do | |
| 894 { | |
| 895 fz_try(ctx) | |
| 896 { | |
| 897 do | |
| 898 { | |
| 899 /* Check the cookie */ | |
| 900 if (cookie) | |
| 901 { | |
| 902 if (cookie->abort) | |
| 903 { | |
| 904 tok = PDF_TOK_EOF; | |
| 905 break; | |
| 906 } | |
| 907 cookie->progress++; | |
| 908 } | |
| 909 | |
| 910 tok = pdf_lex(ctx, stm, buf); | |
| 911 | |
| 912 if (in_text_array) | |
| 913 { | |
| 914 switch(tok) | |
| 915 { | |
| 916 case PDF_TOK_CLOSE_ARRAY: | |
| 917 in_text_array = 0; | |
| 918 break; | |
| 919 case PDF_TOK_REAL: | |
| 920 pdf_array_push_real(ctx, csi->obj, buf->f); | |
| 921 break; | |
| 922 case PDF_TOK_INT: | |
| 923 pdf_array_push_int(ctx, csi->obj, buf->i); | |
| 924 break; | |
| 925 case PDF_TOK_STRING: | |
| 926 pdf_array_push_string(ctx, csi->obj, buf->scratch, buf->len); | |
| 927 break; | |
| 928 case PDF_TOK_EOF: | |
| 929 break; | |
| 930 case PDF_TOK_KEYWORD: | |
| 931 if (buf->scratch[0] == 'T' && (buf->scratch[1] == 'w' || buf->scratch[1] == 'c') && buf->scratch[2] == 0) | |
| 932 { | |
| 933 int n = pdf_array_len(ctx, csi->obj); | |
| 934 if (n > 0) | |
| 935 { | |
| 936 pdf_obj *o = pdf_array_get(ctx, csi->obj, n-1); | |
| 937 if (pdf_is_number(ctx, o)) | |
| 938 { | |
| 939 csi->stack[0] = pdf_to_real(ctx, o); | |
| 940 pdf_array_delete(ctx, csi->obj, n-1); | |
| 941 pdf_process_keyword(ctx, proc, csi, stm, buf->scratch); | |
| 942 } | |
| 943 } | |
| 944 } | |
| 945 /* Deliberate Fallthrough! */ | |
| 946 default: | |
| 947 fz_throw(ctx, FZ_ERROR_SYNTAX, "syntax error in array"); | |
| 948 } | |
| 949 } | |
| 950 else switch (tok) | |
| 951 { | |
| 952 case PDF_TOK_ENDSTREAM: | |
| 953 case PDF_TOK_EOF: | |
| 954 tok = PDF_TOK_EOF; | |
| 955 break; | |
| 956 | |
| 957 case PDF_TOK_OPEN_ARRAY: | |
| 958 if (csi->obj) | |
| 959 { | |
| 960 pdf_drop_obj(ctx, csi->obj); | |
| 961 csi->obj = NULL; | |
| 962 } | |
| 963 if (csi->in_text) | |
| 964 { | |
| 965 in_text_array = 1; | |
| 966 csi->obj = pdf_new_array(ctx, doc, 4); | |
| 967 } | |
| 968 else | |
| 969 { | |
| 970 csi->obj = pdf_parse_array(ctx, doc, stm, buf); | |
| 971 } | |
| 972 break; | |
| 973 | |
| 974 case PDF_TOK_OPEN_DICT: | |
| 975 if (csi->obj) | |
| 976 { | |
| 977 pdf_drop_obj(ctx, csi->obj); | |
| 978 csi->obj = NULL; | |
| 979 } | |
| 980 csi->obj = pdf_parse_dict(ctx, doc, stm, buf); | |
| 981 break; | |
| 982 | |
| 983 case PDF_TOK_NAME: | |
| 984 if (csi->name[0]) | |
| 985 { | |
| 986 pdf_drop_obj(ctx, csi->obj); | |
| 987 csi->obj = NULL; | |
| 988 csi->obj = pdf_new_name(ctx, buf->scratch); | |
| 989 } | |
| 990 else | |
| 991 fz_strlcpy(csi->name, buf->scratch, sizeof(csi->name)); | |
| 992 break; | |
| 993 | |
| 994 case PDF_TOK_INT: | |
| 995 if (csi->top < (int)nelem(csi->stack)) { | |
| 996 csi->stack[csi->top] = buf->i; | |
| 997 csi->top ++; | |
| 998 } | |
| 999 else | |
| 1000 fz_throw(ctx, FZ_ERROR_SYNTAX, "stack overflow"); | |
| 1001 break; | |
| 1002 | |
| 1003 case PDF_TOK_REAL: | |
| 1004 if (csi->top < (int)nelem(csi->stack)) { | |
| 1005 csi->stack[csi->top] = buf->f; | |
| 1006 csi->top ++; | |
| 1007 } | |
| 1008 else | |
| 1009 fz_throw(ctx, FZ_ERROR_SYNTAX, "stack overflow"); | |
| 1010 break; | |
| 1011 | |
| 1012 case PDF_TOK_STRING: | |
| 1013 if (buf->len <= sizeof(csi->string)) | |
| 1014 { | |
| 1015 memcpy(csi->string, buf->scratch, buf->len); | |
| 1016 csi->string_len = buf->len; | |
| 1017 } | |
| 1018 else | |
| 1019 { | |
| 1020 if (csi->obj) | |
| 1021 { | |
| 1022 pdf_drop_obj(ctx, csi->obj); | |
| 1023 csi->obj = NULL; | |
| 1024 } | |
| 1025 csi->obj = pdf_new_string(ctx, buf->scratch, buf->len); | |
| 1026 } | |
| 1027 break; | |
| 1028 | |
| 1029 case PDF_TOK_KEYWORD: | |
| 1030 pdf_process_keyword(ctx, proc, csi, stm, buf->scratch); | |
| 1031 pdf_clear_stack(ctx, csi); | |
| 1032 break; | |
| 1033 | |
| 1034 default: | |
| 1035 fz_throw(ctx, FZ_ERROR_SYNTAX, "syntax error in content stream"); | |
| 1036 } | |
| 1037 } | |
| 1038 while (tok != PDF_TOK_EOF); | |
| 1039 } | |
| 1040 fz_always(ctx) | |
| 1041 { | |
| 1042 pdf_clear_stack(ctx, csi); | |
| 1043 } | |
| 1044 fz_catch(ctx) | |
| 1045 { | |
| 1046 int caught = fz_caught(ctx); | |
| 1047 if (cookie) | |
| 1048 { | |
| 1049 if (caught == FZ_ERROR_TRYLATER) | |
| 1050 { | |
| 1051 fz_ignore_error(ctx); | |
| 1052 cookie->incomplete++; | |
| 1053 tok = PDF_TOK_EOF; | |
| 1054 } | |
| 1055 else if (caught == FZ_ERROR_ABORT) | |
| 1056 { | |
| 1057 fz_rethrow(ctx); | |
| 1058 } | |
| 1059 else if (caught == FZ_ERROR_SYNTAX) | |
| 1060 { | |
| 1061 fz_report_error(ctx); | |
| 1062 cookie->errors++; | |
| 1063 if (++syntax_errors >= MAX_SYNTAX_ERRORS) | |
| 1064 { | |
| 1065 fz_warn(ctx, "too many syntax errors; ignoring rest of page"); | |
| 1066 tok = PDF_TOK_EOF; | |
| 1067 } | |
| 1068 } | |
| 1069 else | |
| 1070 { | |
| 1071 fz_rethrow(ctx); | |
| 1072 } | |
| 1073 } | |
| 1074 else | |
| 1075 { | |
| 1076 if (caught == FZ_ERROR_TRYLATER) | |
| 1077 { | |
| 1078 fz_ignore_error(ctx); | |
| 1079 tok = PDF_TOK_EOF; | |
| 1080 } | |
| 1081 else if (caught == FZ_ERROR_ABORT) | |
| 1082 { | |
| 1083 fz_rethrow(ctx); | |
| 1084 } | |
| 1085 else if (caught == FZ_ERROR_SYNTAX) | |
| 1086 { | |
| 1087 fz_report_error(ctx); | |
| 1088 if (++syntax_errors >= MAX_SYNTAX_ERRORS) | |
| 1089 { | |
| 1090 fz_warn(ctx, "too many syntax errors; ignoring rest of page"); | |
| 1091 tok = PDF_TOK_EOF; | |
| 1092 } | |
| 1093 } | |
| 1094 else | |
| 1095 { | |
| 1096 fz_rethrow(ctx); | |
| 1097 } | |
| 1098 } | |
| 1099 | |
| 1100 /* If we do catch an error, then reset ourselves to a base lexing state */ | |
| 1101 in_text_array = 0; | |
| 1102 } | |
| 1103 } | |
| 1104 while (tok != PDF_TOK_EOF); | |
| 1105 | |
| 1106 if (syntax_errors > 0) | |
| 1107 fz_warn(ctx, "encountered syntax errors; page may not be correct"); | |
| 1108 } | |
| 1109 | |
| 1110 void pdf_processor_push_resources(fz_context *ctx, pdf_processor *proc, pdf_obj *res) | |
| 1111 { | |
| 1112 proc->push_resources(ctx, proc, res); | |
| 1113 } | |
| 1114 | |
| 1115 pdf_obj *pdf_processor_pop_resources(fz_context *ctx, pdf_processor *proc) | |
| 1116 { | |
| 1117 return proc->pop_resources(ctx, proc); | |
| 1118 } | |
| 1119 | |
| 1120 void | |
| 1121 pdf_process_raw_contents(fz_context *ctx, pdf_processor *proc, pdf_document *doc, pdf_obj *rdb, pdf_obj *stmobj, fz_cookie *cookie) | |
| 1122 { | |
| 1123 pdf_csi csi; | |
| 1124 pdf_lexbuf buf; | |
| 1125 fz_stream *stm = NULL; | |
| 1126 | |
| 1127 if (!stmobj) | |
| 1128 return; | |
| 1129 | |
| 1130 fz_var(stm); | |
| 1131 | |
| 1132 pdf_lexbuf_init(ctx, &buf, PDF_LEXBUF_SMALL); | |
| 1133 pdf_init_csi(ctx, &csi, doc, rdb, &buf, cookie); | |
| 1134 | |
| 1135 fz_try(ctx) | |
| 1136 { | |
| 1137 fz_defer_reap_start(ctx); | |
| 1138 stm = pdf_open_contents_stream(ctx, doc, stmobj); | |
| 1139 pdf_process_stream(ctx, proc, &csi, stm); | |
| 1140 pdf_process_end(ctx, proc, &csi); | |
| 1141 } | |
| 1142 fz_always(ctx) | |
| 1143 { | |
| 1144 fz_defer_reap_end(ctx); | |
| 1145 fz_drop_stream(ctx, stm); | |
| 1146 pdf_clear_stack(ctx, &csi); | |
| 1147 pdf_lexbuf_fin(ctx, &buf); | |
| 1148 } | |
| 1149 fz_catch(ctx) | |
| 1150 { | |
| 1151 proc->close_processor = NULL; /* aborted run, don't warn about unclosed processor */ | |
| 1152 fz_rethrow(ctx); | |
| 1153 } | |
| 1154 } | |
| 1155 | |
| 1156 void | |
| 1157 pdf_process_contents(fz_context *ctx, pdf_processor *proc, pdf_document *doc, pdf_obj *rdb, pdf_obj *stmobj, fz_cookie *cookie, pdf_obj **out_res) | |
| 1158 { | |
| 1159 pdf_processor_push_resources(ctx, proc, rdb); | |
| 1160 fz_try(ctx) | |
| 1161 pdf_process_raw_contents(ctx, proc, doc, rdb, stmobj, cookie); | |
| 1162 fz_always(ctx) | |
| 1163 { | |
| 1164 pdf_obj *res = pdf_processor_pop_resources(ctx, proc); | |
| 1165 if (out_res) | |
| 1166 *out_res = res; | |
| 1167 else | |
| 1168 pdf_drop_obj(ctx, res); | |
| 1169 } | |
| 1170 fz_catch(ctx) | |
| 1171 fz_rethrow(ctx); | |
| 1172 } | |
| 1173 | |
| 1174 /* Bug 702543: It looks like certain types of annotation are never | |
| 1175 * printed. */ | |
| 1176 static int | |
| 1177 pdf_should_print_annot(fz_context *ctx, pdf_annot *annot) | |
| 1178 { | |
| 1179 enum pdf_annot_type type = pdf_annot_type(ctx, annot); | |
| 1180 | |
| 1181 /* We may need to add more types here. */ | |
| 1182 if (type == PDF_ANNOT_FILE_ATTACHMENT) | |
| 1183 return 0; | |
| 1184 | |
| 1185 return 1; | |
| 1186 } | |
| 1187 | |
| 1188 void | |
| 1189 pdf_process_annot(fz_context *ctx, pdf_processor *proc, pdf_annot *annot, fz_cookie *cookie) | |
| 1190 { | |
| 1191 int flags = pdf_dict_get_int(ctx, annot->obj, PDF_NAME(F)); | |
| 1192 fz_matrix matrix; | |
| 1193 pdf_obj *ap; | |
| 1194 | |
| 1195 if (flags & (PDF_ANNOT_IS_INVISIBLE | PDF_ANNOT_IS_HIDDEN) || annot->hidden_editing) | |
| 1196 return; | |
| 1197 | |
| 1198 /* popup annotations should never be drawn */ | |
| 1199 if (pdf_annot_type(ctx, annot) == PDF_ANNOT_POPUP) | |
| 1200 return; | |
| 1201 | |
| 1202 if (proc->usage) | |
| 1203 { | |
| 1204 if (!strcmp(proc->usage, "Print")) | |
| 1205 { | |
| 1206 if (!(flags & PDF_ANNOT_IS_PRINT)) | |
| 1207 return; | |
| 1208 if (!pdf_should_print_annot(ctx, annot)) | |
| 1209 return; | |
| 1210 } | |
| 1211 if (!strcmp(proc->usage, "View") && (flags & PDF_ANNOT_IS_NO_VIEW)) | |
| 1212 return; | |
| 1213 } | |
| 1214 | |
| 1215 /* TODO: NoZoom and NoRotate */ | |
| 1216 | |
| 1217 /* XXX what resources, if any, to use for this check? */ | |
| 1218 if (pdf_is_ocg_hidden(ctx, annot->page->doc, NULL, proc->usage, pdf_dict_get(ctx, annot->obj, PDF_NAME(OC)))) | |
| 1219 return; | |
| 1220 | |
| 1221 ap = pdf_annot_ap(ctx, annot); | |
| 1222 | |
| 1223 if (!ap) | |
| 1224 return; | |
| 1225 | |
| 1226 matrix = pdf_annot_transform(ctx, annot); | |
| 1227 if (proc->op_q) | |
| 1228 proc->op_q(ctx, proc); | |
| 1229 if (proc->op_cm) | |
| 1230 proc->op_cm(ctx, proc, | |
| 1231 matrix.a, matrix.b, | |
| 1232 matrix.c, matrix.d, | |
| 1233 matrix.e, matrix.f); | |
| 1234 if (proc->op_Do_form) | |
| 1235 proc->op_Do_form(ctx, proc, NULL, ap); | |
| 1236 if (proc->op_Q) | |
| 1237 proc->op_Q(ctx, proc); | |
| 1238 } | |
| 1239 | |
| 1240 void | |
| 1241 pdf_process_glyph(fz_context *ctx, pdf_processor *proc, pdf_document *doc, pdf_obj *rdb, fz_buffer *contents) | |
| 1242 { | |
| 1243 pdf_csi csi; | |
| 1244 pdf_lexbuf buf; | |
| 1245 fz_stream *stm = NULL; | |
| 1246 | |
| 1247 fz_var(stm); | |
| 1248 | |
| 1249 if (!contents) | |
| 1250 return; | |
| 1251 | |
| 1252 pdf_lexbuf_init(ctx, &buf, PDF_LEXBUF_SMALL); | |
| 1253 pdf_init_csi(ctx, &csi, doc, rdb, &buf, NULL); | |
| 1254 | |
| 1255 fz_try(ctx) | |
| 1256 { | |
| 1257 pdf_processor_push_resources(ctx, proc, rdb); | |
| 1258 stm = fz_open_buffer(ctx, contents); | |
| 1259 pdf_process_stream(ctx, proc, &csi, stm); | |
| 1260 pdf_process_end(ctx, proc, &csi); | |
| 1261 } | |
| 1262 fz_always(ctx) | |
| 1263 { | |
| 1264 pdf_drop_obj(ctx, pdf_processor_pop_resources(ctx, proc)); | |
| 1265 fz_drop_stream(ctx, stm); | |
| 1266 pdf_clear_stack(ctx, &csi); | |
| 1267 pdf_lexbuf_fin(ctx, &buf); | |
| 1268 } | |
| 1269 fz_catch(ctx) | |
| 1270 { | |
| 1271 /* Note: Any SYNTAX errors should have been swallowed | |
| 1272 * by pdf_process_stream, but in case any escape from other | |
| 1273 * functions, recast the error type here to be safe. */ | |
| 1274 fz_morph_error(ctx, FZ_ERROR_SYNTAX, FZ_ERROR_FORMAT); | |
| 1275 fz_rethrow(ctx); | |
| 1276 } | |
| 1277 } | |
| 1278 | |
| 1279 void | |
| 1280 pdf_tos_save(fz_context *ctx, pdf_text_object_state *tos, fz_matrix save[2]) | |
| 1281 { | |
| 1282 save[0] = tos->tm; | |
| 1283 save[1] = tos->tlm; | |
| 1284 } | |
| 1285 | |
| 1286 void | |
| 1287 pdf_tos_restore(fz_context *ctx, pdf_text_object_state *tos, fz_matrix save[2]) | |
| 1288 { | |
| 1289 tos->tm = save[0]; | |
| 1290 tos->tlm = save[1]; | |
| 1291 } | |
| 1292 | |
| 1293 fz_text * | |
| 1294 pdf_tos_get_text(fz_context *ctx, pdf_text_object_state *tos) | |
| 1295 { | |
| 1296 fz_text *text = tos->text; | |
| 1297 | |
| 1298 tos->text = NULL; | |
| 1299 | |
| 1300 return text; | |
| 1301 } | |
| 1302 | |
| 1303 void | |
| 1304 pdf_tos_reset(fz_context *ctx, pdf_text_object_state *tos, int render) | |
| 1305 { | |
| 1306 tos->text = fz_new_text(ctx); | |
| 1307 tos->text_mode = render; | |
| 1308 tos->text_bbox = fz_empty_rect; | |
| 1309 } | |
| 1310 | |
| 1311 int | |
| 1312 pdf_tos_make_trm(fz_context *ctx, pdf_text_object_state *tos, pdf_text_state *text, pdf_font_desc *fontdesc, int cid, fz_matrix *trm, float *adv) | |
| 1313 { | |
| 1314 fz_matrix tsm; | |
| 1315 | |
| 1316 tsm.a = text->size * text->scale; | |
| 1317 tsm.b = 0; | |
| 1318 tsm.c = 0; | |
| 1319 tsm.d = text->size; | |
| 1320 tsm.e = 0; | |
| 1321 tsm.f = text->rise; | |
| 1322 | |
| 1323 if (fontdesc->wmode == 0) | |
| 1324 { | |
| 1325 pdf_hmtx h = pdf_lookup_hmtx(ctx, fontdesc, cid); | |
| 1326 float w0 = *adv = h.w * 0.001f; | |
| 1327 tos->char_tx = (w0 * text->size + text->char_space) * text->scale; | |
| 1328 tos->char_ty = 0; | |
| 1329 } | |
| 1330 else | |
| 1331 { | |
| 1332 pdf_vmtx v = pdf_lookup_vmtx(ctx, fontdesc, cid); | |
| 1333 float w1 = *adv = v.w * 0.001f; | |
| 1334 tsm.e -= v.x * fabsf(text->size) * 0.001f; | |
| 1335 tsm.f -= v.y * text->size * 0.001f; | |
| 1336 tos->char_tx = 0; | |
| 1337 tos->char_ty = w1 * text->size + text->char_space; | |
| 1338 } | |
| 1339 | |
| 1340 *trm = fz_concat(tsm, tos->tm); | |
| 1341 | |
| 1342 tos->cid = cid; | |
| 1343 tos->gid = pdf_font_cid_to_gid(ctx, fontdesc, cid); | |
| 1344 tos->fontdesc = fontdesc; | |
| 1345 | |
| 1346 /* Compensate for the glyph cache limited positioning precision */ | |
| 1347 tos->char_bbox = fz_expand_rect(fz_bound_glyph(ctx, fontdesc->font, tos->gid, *trm), 1); | |
| 1348 | |
| 1349 return tos->gid; | |
| 1350 } | |
| 1351 | |
| 1352 void | |
| 1353 pdf_tos_move_after_char(fz_context *ctx, pdf_text_object_state *tos) | |
| 1354 { | |
| 1355 tos->text_bbox = fz_union_rect(tos->text_bbox, tos->char_bbox); | |
| 1356 tos->tm = fz_pre_translate(tos->tm, tos->char_tx, tos->char_ty); | |
| 1357 } | |
| 1358 | |
| 1359 void | |
| 1360 pdf_tos_translate(pdf_text_object_state *tos, float tx, float ty) | |
| 1361 { | |
| 1362 tos->tlm = fz_pre_translate(tos->tlm, tx, ty); | |
| 1363 tos->tm = tos->tlm; | |
| 1364 } | |
| 1365 | |
| 1366 void | |
| 1367 pdf_tos_set_matrix(pdf_text_object_state *tos, float a, float b, float c, float d, float e, float f) | |
| 1368 { | |
| 1369 tos->tm.a = a; | |
| 1370 tos->tm.b = b; | |
| 1371 tos->tm.c = c; | |
| 1372 tos->tm.d = d; | |
| 1373 tos->tm.e = e; | |
| 1374 tos->tm.f = f; | |
| 1375 tos->tlm = tos->tm; | |
| 1376 } | |
| 1377 | |
| 1378 void | |
| 1379 pdf_tos_newline(pdf_text_object_state *tos, float leading) | |
| 1380 { | |
| 1381 tos->tlm = fz_pre_translate(tos->tlm, 0, -leading); | |
| 1382 tos->tm = tos->tlm; | |
| 1383 } |
