Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/source/pdf/pdf-xref.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 // Copyright (C) 2004-2025 Artifex Software, Inc. | |
| 2 // | |
| 3 // This file is part of MuPDF. | |
| 4 // | |
| 5 // MuPDF is free software: you can redistribute it and/or modify it under the | |
| 6 // terms of the GNU Affero General Public License as published by the Free | |
| 7 // Software Foundation, either version 3 of the License, or (at your option) | |
| 8 // any later version. | |
| 9 // | |
| 10 // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY | |
| 11 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | |
| 12 // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more | |
| 13 // details. | |
| 14 // | |
| 15 // You should have received a copy of the GNU Affero General Public License | |
| 16 // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html> | |
| 17 // | |
| 18 // Alternative licensing terms are available from the licensor. | |
| 19 // For commercial licensing, see <https://www.artifex.com/> or contact | |
| 20 // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, | |
| 21 // CA 94129, USA, for further information. | |
| 22 | |
| 23 #include "mupdf/fitz.h" | |
| 24 #include "pdf-annot-imp.h" | |
| 25 #include "pdf-imp.h" | |
| 26 | |
| 27 #include <assert.h> | |
| 28 #include <limits.h> | |
| 29 #include <string.h> | |
| 30 | |
| 31 #undef DEBUG_PROGESSIVE_ADVANCE | |
| 32 | |
| 33 #ifdef DEBUG_PROGESSIVE_ADVANCE | |
| 34 #define DEBUGMESS(A) do { fz_warn A; } while (0) | |
| 35 #else | |
| 36 #define DEBUGMESS(A) do { } while (0) | |
| 37 #endif | |
| 38 | |
| 39 #define isdigit(c) (c >= '0' && c <= '9') | |
| 40 | |
| 41 static inline int iswhite(int ch) | |
| 42 { | |
| 43 return | |
| 44 ch == '\000' || ch == '\011' || ch == '\012' || | |
| 45 ch == '\014' || ch == '\015' || ch == '\040'; | |
| 46 } | |
| 47 | |
| 48 /* | |
| 49 * xref tables | |
| 50 */ | |
| 51 | |
| 52 static void | |
| 53 pdf_drop_xref_subsec(fz_context *ctx, pdf_xref *xref) | |
| 54 { | |
| 55 pdf_xref_subsec *sub = xref->subsec; | |
| 56 pdf_unsaved_sig *usig; | |
| 57 int e; | |
| 58 | |
| 59 while (sub != NULL) | |
| 60 { | |
| 61 pdf_xref_subsec *next_sub = sub->next; | |
| 62 for (e = 0; e < sub->len; e++) | |
| 63 { | |
| 64 pdf_xref_entry *entry = &sub->table[e]; | |
| 65 pdf_drop_obj(ctx, entry->obj); | |
| 66 fz_drop_buffer(ctx, entry->stm_buf); | |
| 67 } | |
| 68 fz_free(ctx, sub->table); | |
| 69 fz_free(ctx, sub); | |
| 70 sub = next_sub; | |
| 71 } | |
| 72 | |
| 73 pdf_drop_obj(ctx, xref->pre_repair_trailer); | |
| 74 pdf_drop_obj(ctx, xref->trailer); | |
| 75 | |
| 76 while ((usig = xref->unsaved_sigs) != NULL) | |
| 77 { | |
| 78 xref->unsaved_sigs = usig->next; | |
| 79 pdf_drop_obj(ctx, usig->field); | |
| 80 pdf_drop_signer(ctx, usig->signer); | |
| 81 fz_free(ctx, usig); | |
| 82 } | |
| 83 } | |
| 84 | |
| 85 static void pdf_drop_xref_sections_imp(fz_context *ctx, pdf_document *doc, pdf_xref *xref_sections, int num_xref_sections) | |
| 86 { | |
| 87 int x; | |
| 88 | |
| 89 for (x = 0; x < num_xref_sections; x++) | |
| 90 pdf_drop_xref_subsec(ctx, &xref_sections[x]); | |
| 91 | |
| 92 fz_free(ctx, xref_sections); | |
| 93 } | |
| 94 | |
| 95 static void pdf_drop_xref_sections(fz_context *ctx, pdf_document *doc) | |
| 96 { | |
| 97 pdf_drop_xref_sections_imp(ctx, doc, doc->saved_xref_sections, doc->saved_num_xref_sections); | |
| 98 pdf_drop_xref_sections_imp(ctx, doc, doc->xref_sections, doc->num_xref_sections); | |
| 99 | |
| 100 doc->saved_xref_sections = NULL; | |
| 101 doc->saved_num_xref_sections = 0; | |
| 102 doc->xref_sections = NULL; | |
| 103 doc->num_xref_sections = 0; | |
| 104 doc->num_incremental_sections = 0; | |
| 105 } | |
| 106 | |
| 107 static void | |
| 108 extend_xref_index(fz_context *ctx, pdf_document *doc, int newlen) | |
| 109 { | |
| 110 int i; | |
| 111 | |
| 112 doc->xref_index = fz_realloc_array(ctx, doc->xref_index, newlen, int); | |
| 113 for (i = doc->max_xref_len; i < newlen; i++) | |
| 114 { | |
| 115 doc->xref_index[i] = 0; | |
| 116 } | |
| 117 doc->max_xref_len = newlen; | |
| 118 } | |
| 119 | |
| 120 static void | |
| 121 resize_xref_sub(fz_context *ctx, pdf_xref *xref, int base, int newlen) | |
| 122 { | |
| 123 pdf_xref_subsec *sub; | |
| 124 int i; | |
| 125 | |
| 126 assert(xref != NULL); | |
| 127 sub = xref->subsec; | |
| 128 assert(sub->next == NULL && sub->start == base && sub->len+base == xref->num_objects); | |
| 129 assert(newlen+base > xref->num_objects); | |
| 130 | |
| 131 sub->table = fz_realloc_array(ctx, sub->table, newlen, pdf_xref_entry); | |
| 132 for (i = sub->len; i < newlen; i++) | |
| 133 { | |
| 134 sub->table[i].type = 0; | |
| 135 sub->table[i].ofs = 0; | |
| 136 sub->table[i].gen = 0; | |
| 137 sub->table[i].num = 0; | |
| 138 sub->table[i].stm_ofs = 0; | |
| 139 sub->table[i].stm_buf = NULL; | |
| 140 sub->table[i].obj = NULL; | |
| 141 } | |
| 142 sub->len = newlen; | |
| 143 if (newlen+base > xref->num_objects) | |
| 144 xref->num_objects = newlen+base; | |
| 145 } | |
| 146 | |
| 147 /* This is only ever called when we already have an incremental | |
| 148 * xref. This means there will only be 1 subsec, and it will be | |
| 149 * a complete subsec. */ | |
| 150 static void pdf_resize_xref(fz_context *ctx, pdf_document *doc, int newlen) | |
| 151 { | |
| 152 pdf_xref *xref = &doc->xref_sections[doc->xref_base]; | |
| 153 | |
| 154 resize_xref_sub(ctx, xref, 0, newlen); | |
| 155 if (doc->max_xref_len < newlen) | |
| 156 extend_xref_index(ctx, doc, newlen); | |
| 157 } | |
| 158 | |
| 159 static void pdf_populate_next_xref_level(fz_context *ctx, pdf_document *doc) | |
| 160 { | |
| 161 pdf_xref *xref; | |
| 162 doc->xref_sections = fz_realloc_array(ctx, doc->xref_sections, doc->num_xref_sections + 1, pdf_xref); | |
| 163 doc->num_xref_sections++; | |
| 164 | |
| 165 xref = &doc->xref_sections[doc->num_xref_sections - 1]; | |
| 166 xref->subsec = NULL; | |
| 167 xref->num_objects = 0; | |
| 168 xref->trailer = NULL; | |
| 169 xref->pre_repair_trailer = NULL; | |
| 170 xref->unsaved_sigs = NULL; | |
| 171 xref->unsaved_sigs_end = NULL; | |
| 172 } | |
| 173 | |
| 174 pdf_obj *pdf_trailer(fz_context *ctx, pdf_document *doc) | |
| 175 { | |
| 176 /* Return the document's trailer (of the appropriate vintage) */ | |
| 177 pdf_xref *xrefs = doc->xref_sections; | |
| 178 | |
| 179 return xrefs ? xrefs[doc->xref_base].trailer : NULL; | |
| 180 } | |
| 181 | |
| 182 void pdf_set_populating_xref_trailer(fz_context *ctx, pdf_document *doc, pdf_obj *trailer) | |
| 183 { | |
| 184 /* Update the trailer of the xref section being populated */ | |
| 185 pdf_xref *xref = &doc->xref_sections[doc->num_xref_sections - 1]; | |
| 186 if (xref->trailer) | |
| 187 { | |
| 188 pdf_drop_obj(ctx, xref->pre_repair_trailer); | |
| 189 xref->pre_repair_trailer = xref->trailer; | |
| 190 } | |
| 191 xref->trailer = pdf_keep_obj(ctx, trailer); | |
| 192 } | |
| 193 | |
| 194 int pdf_xref_len(fz_context *ctx, pdf_document *doc) | |
| 195 { | |
| 196 int i = doc->xref_base; | |
| 197 int xref_len = 0; | |
| 198 | |
| 199 if (doc->local_xref && doc->local_xref_nesting > 0) | |
| 200 xref_len = doc->local_xref->num_objects; | |
| 201 | |
| 202 while (i < doc->num_xref_sections) | |
| 203 xref_len = fz_maxi(xref_len, doc->xref_sections[i++].num_objects); | |
| 204 | |
| 205 return xref_len; | |
| 206 } | |
| 207 | |
| 208 /* Ensure that the given xref has a single subsection | |
| 209 * that covers the entire range. */ | |
| 210 static void | |
| 211 ensure_solid_xref(fz_context *ctx, pdf_document *doc, int num, int which) | |
| 212 { | |
| 213 pdf_xref *xref = &doc->xref_sections[which]; | |
| 214 pdf_xref_subsec *sub = xref->subsec; | |
| 215 pdf_xref_subsec *new_sub; | |
| 216 | |
| 217 if (num < xref->num_objects) | |
| 218 num = xref->num_objects; | |
| 219 | |
| 220 if (sub != NULL && sub->next == NULL && sub->start == 0 && sub->len >= num) | |
| 221 return; | |
| 222 | |
| 223 new_sub = fz_malloc_struct(ctx, pdf_xref_subsec); | |
| 224 fz_try(ctx) | |
| 225 { | |
| 226 new_sub->table = fz_malloc_struct_array(ctx, num, pdf_xref_entry); | |
| 227 new_sub->start = 0; | |
| 228 new_sub->len = num; | |
| 229 new_sub->next = NULL; | |
| 230 } | |
| 231 fz_catch(ctx) | |
| 232 { | |
| 233 fz_free(ctx, new_sub); | |
| 234 fz_rethrow(ctx); | |
| 235 } | |
| 236 | |
| 237 /* Move objects over to the new subsection and destroy the old | |
| 238 * ones */ | |
| 239 sub = xref->subsec; | |
| 240 while (sub != NULL) | |
| 241 { | |
| 242 pdf_xref_subsec *next = sub->next; | |
| 243 int i; | |
| 244 | |
| 245 for (i = 0; i < sub->len; i++) | |
| 246 { | |
| 247 new_sub->table[i+sub->start] = sub->table[i]; | |
| 248 } | |
| 249 fz_free(ctx, sub->table); | |
| 250 fz_free(ctx, sub); | |
| 251 sub = next; | |
| 252 } | |
| 253 xref->num_objects = num; | |
| 254 xref->subsec = new_sub; | |
| 255 if (doc->max_xref_len < num) | |
| 256 extend_xref_index(ctx, doc, num); | |
| 257 } | |
| 258 | |
| 259 static pdf_xref_entry * | |
| 260 pdf_get_local_xref_entry(fz_context *ctx, pdf_document *doc, int num) | |
| 261 { | |
| 262 pdf_xref *xref = doc->local_xref; | |
| 263 pdf_xref_subsec *sub; | |
| 264 | |
| 265 if (xref == NULL || doc->local_xref_nesting == 0) | |
| 266 fz_throw(ctx, FZ_ERROR_ARGUMENT, "Local xref not present!"); | |
| 267 | |
| 268 /* Local xrefs only ever have 1 section, and it should be solid. */ | |
| 269 sub = xref->subsec; | |
| 270 assert(sub && !sub->next); | |
| 271 if (num >= sub->start && num < sub->start + sub->len) | |
| 272 return &sub->table[num - sub->start]; | |
| 273 | |
| 274 /* Expand the xref so we can return a pointer. */ | |
| 275 resize_xref_sub(ctx, xref, 0, num+1); | |
| 276 sub = xref->subsec; | |
| 277 return &sub->table[num - sub->start]; | |
| 278 } | |
| 279 | |
| 280 pdf_xref_entry *pdf_get_populating_xref_entry(fz_context *ctx, pdf_document *doc, int num) | |
| 281 { | |
| 282 /* Return an entry within the xref currently being populated */ | |
| 283 pdf_xref *xref; | |
| 284 pdf_xref_subsec *sub; | |
| 285 | |
| 286 if (doc->num_xref_sections == 0) | |
| 287 { | |
| 288 doc->xref_sections = fz_malloc_struct(ctx, pdf_xref); | |
| 289 doc->num_xref_sections = 1; | |
| 290 } | |
| 291 | |
| 292 if (doc->local_xref && doc->local_xref_nesting > 0) | |
| 293 return pdf_get_local_xref_entry(ctx, doc, num); | |
| 294 | |
| 295 /* Prevent accidental heap underflow */ | |
| 296 if (num < 0 || num > PDF_MAX_OBJECT_NUMBER) | |
| 297 fz_throw(ctx, FZ_ERROR_ARGUMENT, "object number out of range (%d)", num); | |
| 298 | |
| 299 /* Return the pointer to the entry in the last section. */ | |
| 300 xref = &doc->xref_sections[doc->num_xref_sections-1]; | |
| 301 | |
| 302 for (sub = xref->subsec; sub != NULL; sub = sub->next) | |
| 303 { | |
| 304 if (num >= sub->start && num < sub->start + sub->len) | |
| 305 return &sub->table[num-sub->start]; | |
| 306 } | |
| 307 | |
| 308 /* We've been asked for an object that's not in a subsec. */ | |
| 309 ensure_solid_xref(ctx, doc, num+1, doc->num_xref_sections-1); | |
| 310 xref = &doc->xref_sections[doc->num_xref_sections-1]; | |
| 311 sub = xref->subsec; | |
| 312 | |
| 313 return &sub->table[num-sub->start]; | |
| 314 } | |
| 315 | |
| 316 /* It is vital that pdf_get_xref_entry_aux called with !solidify_if_needed | |
| 317 * and a value object number, does NOT try/catch or throw. */ | |
| 318 static | |
| 319 pdf_xref_entry *pdf_get_xref_entry_aux(fz_context *ctx, pdf_document *doc, int i, int solidify_if_needed) | |
| 320 { | |
| 321 pdf_xref *xref = NULL; | |
| 322 pdf_xref_subsec *sub; | |
| 323 int j; | |
| 324 | |
| 325 if (i < 0) | |
| 326 fz_throw(ctx, FZ_ERROR_ARGUMENT, "Negative object number requested"); | |
| 327 | |
| 328 if (i < doc->max_xref_len) | |
| 329 j = doc->xref_index[i]; | |
| 330 else | |
| 331 j = 0; | |
| 332 | |
| 333 /* If we have an active local xref, check there first. */ | |
| 334 if (doc->local_xref && doc->local_xref_nesting > 0) | |
| 335 { | |
| 336 xref = doc->local_xref; | |
| 337 | |
| 338 if (i < xref->num_objects) | |
| 339 { | |
| 340 for (sub = xref->subsec; sub != NULL; sub = sub->next) | |
| 341 { | |
| 342 pdf_xref_entry *entry; | |
| 343 | |
| 344 if (i < sub->start || i >= sub->start + sub->len) | |
| 345 continue; | |
| 346 | |
| 347 entry = &sub->table[i - sub->start]; | |
| 348 if (entry->type) | |
| 349 return entry; | |
| 350 } | |
| 351 } | |
| 352 } | |
| 353 | |
| 354 /* We may be accessing an earlier version of the document using xref_base | |
| 355 * and j may be an index into a later xref section */ | |
| 356 if (doc->xref_base > j) | |
| 357 j = doc->xref_base; | |
| 358 else | |
| 359 j = 0; | |
| 360 | |
| 361 | |
| 362 /* Find the first xref section where the entry is defined. */ | |
| 363 for (; j < doc->num_xref_sections; j++) | |
| 364 { | |
| 365 xref = &doc->xref_sections[j]; | |
| 366 | |
| 367 if (i < xref->num_objects) | |
| 368 { | |
| 369 for (sub = xref->subsec; sub != NULL; sub = sub->next) | |
| 370 { | |
| 371 pdf_xref_entry *entry; | |
| 372 | |
| 373 if (i < sub->start || i >= sub->start + sub->len) | |
| 374 continue; | |
| 375 | |
| 376 entry = &sub->table[i - sub->start]; | |
| 377 if (entry->type) | |
| 378 { | |
| 379 /* Don't update xref_index if xref_base may have | |
| 380 * influenced the value of j */ | |
| 381 if (doc->xref_base == 0) | |
| 382 doc->xref_index[i] = j; | |
| 383 return entry; | |
| 384 } | |
| 385 } | |
| 386 } | |
| 387 } | |
| 388 | |
| 389 /* Didn't find the entry in any section. Return the entry from | |
| 390 * the local_xref (if there is one active), or the final section. */ | |
| 391 if (doc->local_xref && doc->local_xref_nesting > 0) | |
| 392 { | |
| 393 if (xref == NULL || i < xref->num_objects) | |
| 394 { | |
| 395 xref = doc->local_xref; | |
| 396 sub = xref->subsec; | |
| 397 assert(sub != NULL && sub->next == NULL); | |
| 398 if (i >= sub->start && i < sub->start + sub->len) | |
| 399 return &sub->table[i - sub->start]; | |
| 400 } | |
| 401 | |
| 402 /* Expand the xref so we can return a pointer. */ | |
| 403 resize_xref_sub(ctx, xref, 0, i+1); | |
| 404 sub = xref->subsec; | |
| 405 return &sub->table[i - sub->start]; | |
| 406 } | |
| 407 | |
| 408 doc->xref_index[i] = 0; | |
| 409 if (xref == NULL || i < xref->num_objects) | |
| 410 { | |
| 411 xref = &doc->xref_sections[doc->xref_base]; | |
| 412 for (sub = xref->subsec; sub != NULL; sub = sub->next) | |
| 413 { | |
| 414 if (i >= sub->start && i < sub->start + sub->len) | |
| 415 return &sub->table[i - sub->start]; | |
| 416 } | |
| 417 } | |
| 418 | |
| 419 /* Some really hairy code here. When we are reading the file in | |
| 420 * initially, we read from 'newest' to 'oldest' (i.e. from 0 to | |
| 421 * doc->num_xref_sections-1). Each section is created initially | |
| 422 * with num_objects == 0 in it, and remains like that while we | |
| 423 * are parsing the stream from the file. This is the only time | |
| 424 * we'll ever have xref_sections with 0 objects in them. */ | |
| 425 if (doc->xref_sections[doc->num_xref_sections-1].num_objects == 0) | |
| 426 { | |
| 427 /* The oldest xref section has 0 objects in it. So we are | |
| 428 * parsing an xref stream while loading. We don't want to | |
| 429 * solidify the xref we are currently parsing for (as it'll | |
| 430 * get very confused, and end up a different 'shape' in | |
| 431 * memory to that which is in the file, and would hence | |
| 432 * render 'fingerprinting' for snapshotting invalid) so | |
| 433 * just give up at this point. */ | |
| 434 return NULL; | |
| 435 } | |
| 436 | |
| 437 if (!solidify_if_needed) | |
| 438 return NULL; | |
| 439 | |
| 440 /* At this point, we solidify the xref. This ensures that we | |
| 441 * can return a pointer. This is the only case where this function | |
| 442 * might throw an exception, and it will never happen when we are | |
| 443 * working within a 'solid' xref. */ | |
| 444 ensure_solid_xref(ctx, doc, i+1, 0); | |
| 445 xref = &doc->xref_sections[0]; | |
| 446 sub = xref->subsec; | |
| 447 return &sub->table[i - sub->start]; | |
| 448 } | |
| 449 | |
| 450 pdf_xref_entry *pdf_get_xref_entry(fz_context *ctx, pdf_document *doc, int i) | |
| 451 { | |
| 452 return pdf_get_xref_entry_aux(ctx, doc, i, 1); | |
| 453 } | |
| 454 | |
| 455 pdf_xref_entry *pdf_get_xref_entry_no_change(fz_context *ctx, pdf_document *doc, int i) | |
| 456 { | |
| 457 return pdf_get_xref_entry_aux(ctx, doc, i, 0); | |
| 458 } | |
| 459 | |
| 460 pdf_xref_entry *pdf_get_xref_entry_no_null(fz_context *ctx, pdf_document *doc, int i) | |
| 461 { | |
| 462 pdf_xref_entry *entry = pdf_get_xref_entry(ctx, doc, i); | |
| 463 if (entry != NULL) | |
| 464 return entry; | |
| 465 fz_throw(ctx, FZ_ERROR_ARGUMENT, "cannot find object in xref (%d 0 R), but not allowed to return NULL", i); | |
| 466 } | |
| 467 | |
| 468 void pdf_xref_entry_map(fz_context *ctx, pdf_document *doc, void (*fn)(fz_context *, pdf_xref_entry *, int, pdf_document *, void *), void *arg) | |
| 469 { | |
| 470 int i, j; | |
| 471 pdf_xref_subsec *sub; | |
| 472 int xref_base = doc->xref_base; | |
| 473 | |
| 474 fz_try(ctx) | |
| 475 { | |
| 476 /* Map over any active local xref first. */ | |
| 477 if (doc->local_xref && doc->local_xref_nesting > 0) | |
| 478 { | |
| 479 pdf_xref *xref = doc->local_xref; | |
| 480 | |
| 481 for (sub = xref->subsec; sub != NULL; sub = sub->next) | |
| 482 { | |
| 483 for (i = sub->start; i < sub->start + sub->len; i++) | |
| 484 { | |
| 485 pdf_xref_entry *entry = &sub->table[i - sub->start]; | |
| 486 if (entry->type) | |
| 487 fn(ctx, entry, i, doc, arg); | |
| 488 } | |
| 489 } | |
| 490 } | |
| 491 | |
| 492 for (j = 0; j < doc->num_xref_sections; j++) | |
| 493 { | |
| 494 pdf_xref *xref = &doc->xref_sections[j]; | |
| 495 doc->xref_base = j; | |
| 496 | |
| 497 for (sub = xref->subsec; sub != NULL; sub = sub->next) | |
| 498 { | |
| 499 for (i = sub->start; i < sub->start + sub->len; i++) | |
| 500 { | |
| 501 pdf_xref_entry *entry = &sub->table[i - sub->start]; | |
| 502 if (entry->type) | |
| 503 fn(ctx, entry, i, doc, arg); | |
| 504 } | |
| 505 } | |
| 506 } | |
| 507 } | |
| 508 fz_always(ctx) | |
| 509 { | |
| 510 doc->xref_base = xref_base; | |
| 511 } | |
| 512 fz_catch(ctx) | |
| 513 fz_rethrow(ctx); | |
| 514 } | |
| 515 | |
| 516 /* | |
| 517 Ensure we have an incremental xref section where we can store | |
| 518 updated versions of indirect objects. This is a new xref section | |
| 519 consisting of a single xref subsection. | |
| 520 */ | |
| 521 static void ensure_incremental_xref(fz_context *ctx, pdf_document *doc) | |
| 522 { | |
| 523 /* If there are as yet no incremental sections, or if the most recent | |
| 524 * one has been used to sign a signature field, then we need a new one. | |
| 525 * After a signing, any further document changes require a new increment */ | |
| 526 if ((doc->num_incremental_sections == 0 || doc->xref_sections[0].unsaved_sigs != NULL) | |
| 527 && !doc->disallow_new_increments) | |
| 528 { | |
| 529 pdf_xref *xref = &doc->xref_sections[0]; | |
| 530 pdf_xref *pxref; | |
| 531 pdf_xref_entry *new_table = fz_malloc_struct_array(ctx, xref->num_objects, pdf_xref_entry); | |
| 532 pdf_xref_subsec *sub = NULL; | |
| 533 pdf_obj *trailer = NULL; | |
| 534 int i; | |
| 535 | |
| 536 fz_var(trailer); | |
| 537 fz_var(sub); | |
| 538 fz_try(ctx) | |
| 539 { | |
| 540 sub = fz_malloc_struct(ctx, pdf_xref_subsec); | |
| 541 trailer = xref->trailer ? pdf_copy_dict(ctx, xref->trailer) : NULL; | |
| 542 doc->xref_sections = fz_realloc_array(ctx, doc->xref_sections, doc->num_xref_sections + 1, pdf_xref); | |
| 543 xref = &doc->xref_sections[0]; | |
| 544 pxref = &doc->xref_sections[1]; | |
| 545 memmove(pxref, xref, doc->num_xref_sections * sizeof(pdf_xref)); | |
| 546 /* xref->num_objects is already correct */ | |
| 547 xref->subsec = sub; | |
| 548 sub = NULL; | |
| 549 xref->trailer = trailer; | |
| 550 xref->pre_repair_trailer = NULL; | |
| 551 xref->unsaved_sigs = NULL; | |
| 552 xref->unsaved_sigs_end = NULL; | |
| 553 xref->subsec->next = NULL; | |
| 554 xref->subsec->len = xref->num_objects; | |
| 555 xref->subsec->start = 0; | |
| 556 xref->subsec->table = new_table; | |
| 557 doc->num_xref_sections++; | |
| 558 doc->num_incremental_sections++; | |
| 559 } | |
| 560 fz_catch(ctx) | |
| 561 { | |
| 562 fz_free(ctx, sub); | |
| 563 fz_free(ctx, new_table); | |
| 564 pdf_drop_obj(ctx, trailer); | |
| 565 fz_rethrow(ctx); | |
| 566 } | |
| 567 | |
| 568 /* Update the xref_index */ | |
| 569 for (i = 0; i < doc->max_xref_len; i++) | |
| 570 { | |
| 571 doc->xref_index[i]++; | |
| 572 } | |
| 573 } | |
| 574 } | |
| 575 | |
| 576 /* Used when altering a document */ | |
| 577 pdf_xref_entry *pdf_get_incremental_xref_entry(fz_context *ctx, pdf_document *doc, int i) | |
| 578 { | |
| 579 pdf_xref *xref; | |
| 580 pdf_xref_subsec *sub; | |
| 581 | |
| 582 /* Make a new final xref section if we haven't already */ | |
| 583 ensure_incremental_xref(ctx, doc); | |
| 584 | |
| 585 xref = &doc->xref_sections[doc->xref_base]; | |
| 586 if (i >= xref->num_objects) | |
| 587 pdf_resize_xref(ctx, doc, i + 1); | |
| 588 | |
| 589 sub = xref->subsec; | |
| 590 assert(sub != NULL && sub->next == NULL); | |
| 591 assert(i >= sub->start && i < sub->start + sub->len); | |
| 592 doc->xref_index[i] = 0; | |
| 593 return &sub->table[i - sub->start]; | |
| 594 } | |
| 595 | |
| 596 int pdf_xref_is_incremental(fz_context *ctx, pdf_document *doc, int num) | |
| 597 { | |
| 598 pdf_xref *xref = &doc->xref_sections[doc->xref_base]; | |
| 599 pdf_xref_subsec *sub = xref->subsec; | |
| 600 | |
| 601 assert(sub != NULL && sub->next == NULL && sub->len == xref->num_objects && sub->start == 0); | |
| 602 | |
| 603 return num < xref->num_objects && sub->table[num].type; | |
| 604 } | |
| 605 | |
| 606 /* Used when clearing signatures. Removes the signature | |
| 607 from the list of unsaved signed signatures. */ | |
| 608 void pdf_xref_remove_unsaved_signature(fz_context *ctx, pdf_document *doc, pdf_obj *field) | |
| 609 { | |
| 610 int num = pdf_to_num(ctx, field); | |
| 611 int idx = doc->xref_index[num]; | |
| 612 pdf_xref *xref = &doc->xref_sections[idx]; | |
| 613 pdf_unsaved_sig **usigptr = &xref->unsaved_sigs; | |
| 614 pdf_unsaved_sig *usig = xref->unsaved_sigs; | |
| 615 | |
| 616 while (usig) | |
| 617 { | |
| 618 pdf_unsaved_sig **nextptr = &usig->next; | |
| 619 pdf_unsaved_sig *next = usig->next; | |
| 620 | |
| 621 if (usig->field == field) | |
| 622 { | |
| 623 if (xref->unsaved_sigs_end == &usig->next) | |
| 624 { | |
| 625 if (usig->next) | |
| 626 xref->unsaved_sigs_end = &usig->next->next; | |
| 627 else | |
| 628 xref->unsaved_sigs_end = NULL; | |
| 629 } | |
| 630 if (usigptr) | |
| 631 *usigptr = usig->next; | |
| 632 | |
| 633 usig->next = NULL; | |
| 634 pdf_drop_obj(ctx, usig->field); | |
| 635 pdf_drop_signer(ctx, usig->signer); | |
| 636 fz_free(ctx, usig); | |
| 637 | |
| 638 break; | |
| 639 } | |
| 640 | |
| 641 usig = next; | |
| 642 usigptr = nextptr; | |
| 643 } | |
| 644 } | |
| 645 | |
| 646 void pdf_xref_store_unsaved_signature(fz_context *ctx, pdf_document *doc, pdf_obj *field, pdf_pkcs7_signer *signer) | |
| 647 { | |
| 648 pdf_xref *xref = &doc->xref_sections[0]; | |
| 649 pdf_unsaved_sig *unsaved_sig; | |
| 650 | |
| 651 /* Record details within the document structure so that contents | |
| 652 * and byte_range can be updated with their correct values at | |
| 653 * saving time */ | |
| 654 unsaved_sig = fz_malloc_struct(ctx, pdf_unsaved_sig); | |
| 655 unsaved_sig->field = pdf_keep_obj(ctx, field); | |
| 656 unsaved_sig->signer = signer->keep(ctx, signer); | |
| 657 unsaved_sig->next = NULL; | |
| 658 if (xref->unsaved_sigs_end == NULL) | |
| 659 xref->unsaved_sigs_end = &xref->unsaved_sigs; | |
| 660 | |
| 661 *xref->unsaved_sigs_end = unsaved_sig; | |
| 662 xref->unsaved_sigs_end = &unsaved_sig->next; | |
| 663 } | |
| 664 | |
| 665 int pdf_xref_obj_is_unsaved_signature(pdf_document *doc, pdf_obj *obj) | |
| 666 { | |
| 667 int i; | |
| 668 for (i = 0; i < doc->num_incremental_sections; i++) | |
| 669 { | |
| 670 pdf_xref *xref = &doc->xref_sections[i]; | |
| 671 pdf_unsaved_sig *usig; | |
| 672 | |
| 673 for (usig = xref->unsaved_sigs; usig; usig = usig->next) | |
| 674 { | |
| 675 if (usig->field == obj) | |
| 676 return 1; | |
| 677 } | |
| 678 } | |
| 679 | |
| 680 return 0; | |
| 681 } | |
| 682 | |
| 683 void pdf_ensure_solid_xref(fz_context *ctx, pdf_document *doc, int num) | |
| 684 { | |
| 685 if (doc->num_xref_sections == 0) | |
| 686 pdf_populate_next_xref_level(ctx, doc); | |
| 687 | |
| 688 ensure_solid_xref(ctx, doc, num, 0); | |
| 689 } | |
| 690 | |
| 691 int pdf_xref_ensure_incremental_object(fz_context *ctx, pdf_document *doc, int num) | |
| 692 { | |
| 693 pdf_xref_entry *new_entry, *old_entry; | |
| 694 pdf_xref_subsec *sub = NULL; | |
| 695 int i; | |
| 696 pdf_obj *copy; | |
| 697 | |
| 698 /* Make sure we have created an xref section for incremental updates */ | |
| 699 ensure_incremental_xref(ctx, doc); | |
| 700 | |
| 701 /* Search for the section that contains this object */ | |
| 702 for (i = doc->xref_index[num]; i < doc->num_xref_sections; i++) | |
| 703 { | |
| 704 pdf_xref *xref = &doc->xref_sections[i]; | |
| 705 | |
| 706 if (num < 0 && num >= xref->num_objects) | |
| 707 break; | |
| 708 for (sub = xref->subsec; sub != NULL; sub = sub->next) | |
| 709 { | |
| 710 if (sub->start <= num && num < sub->start + sub->len && sub->table[num - sub->start].type) | |
| 711 break; | |
| 712 } | |
| 713 if (sub != NULL) | |
| 714 break; | |
| 715 } | |
| 716 /* sub == NULL implies we did not find it */ | |
| 717 | |
| 718 /* If we don't find it, or it's already in the incremental section, return */ | |
| 719 if (i == 0 || sub == NULL) | |
| 720 return 0; | |
| 721 | |
| 722 copy = pdf_deep_copy_obj(ctx, sub->table[num - sub->start].obj); | |
| 723 | |
| 724 /* Move the object to the incremental section */ | |
| 725 i = doc->xref_index[num]; | |
| 726 doc->xref_index[num] = 0; | |
| 727 old_entry = &sub->table[num - sub->start]; | |
| 728 fz_try(ctx) | |
| 729 new_entry = pdf_get_incremental_xref_entry(ctx, doc, num); | |
| 730 fz_catch(ctx) | |
| 731 { | |
| 732 pdf_drop_obj(ctx, copy); | |
| 733 doc->xref_index[num] = i; | |
| 734 fz_rethrow(ctx); | |
| 735 } | |
| 736 *new_entry = *old_entry; | |
| 737 if (new_entry->type == 'o') | |
| 738 { | |
| 739 new_entry->type = 'n'; | |
| 740 new_entry->gen = 0; | |
| 741 } | |
| 742 /* Better keep a copy. We must override the old entry with | |
| 743 * the copy because the caller may be holding a reference to | |
| 744 * the original and expect it to end up in the new entry */ | |
| 745 old_entry->obj = copy; | |
| 746 old_entry->stm_buf = NULL; | |
| 747 | |
| 748 return 1; | |
| 749 } | |
| 750 | |
| 751 void pdf_xref_ensure_local_object(fz_context *ctx, pdf_document *doc, int num) | |
| 752 { | |
| 753 pdf_xref_entry *new_entry, *old_entry; | |
| 754 pdf_xref_subsec *sub = NULL; | |
| 755 int i; | |
| 756 pdf_xref *xref; | |
| 757 pdf_obj *copy; | |
| 758 | |
| 759 /* Is it in the local section already? */ | |
| 760 xref = doc->local_xref; | |
| 761 for (sub = xref->subsec; sub != NULL; sub = sub->next) | |
| 762 { | |
| 763 if (sub->start <= num && num < sub->start + sub->len && sub->table[num - sub->start].type) | |
| 764 break; | |
| 765 } | |
| 766 /* If we found it, it's in the local section already. */ | |
| 767 if (sub != NULL) | |
| 768 return; | |
| 769 | |
| 770 /* Search for the section that contains this object */ | |
| 771 for (i = doc->xref_index[num]; i < doc->num_xref_sections; i++) | |
| 772 { | |
| 773 xref = &doc->xref_sections[i]; | |
| 774 | |
| 775 if (num < 0 && num >= xref->num_objects) | |
| 776 break; | |
| 777 for (sub = xref->subsec; sub != NULL; sub = sub->next) | |
| 778 { | |
| 779 if (sub->start <= num && num < sub->start + sub->len && sub->table[num - sub->start].type) | |
| 780 break; | |
| 781 } | |
| 782 if (sub != NULL) | |
| 783 break; | |
| 784 } | |
| 785 /* sub == NULL implies we did not find it */ | |
| 786 if (sub == NULL) | |
| 787 return; /* No object to find */ | |
| 788 | |
| 789 copy = pdf_deep_copy_obj(ctx, sub->table[num - sub->start].obj); | |
| 790 | |
| 791 /* Copy the object to the local section */ | |
| 792 i = doc->xref_index[num]; | |
| 793 doc->xref_index[num] = 0; | |
| 794 old_entry = &sub->table[num - sub->start]; | |
| 795 fz_try(ctx) | |
| 796 new_entry = pdf_get_local_xref_entry(ctx, doc, num); | |
| 797 fz_catch(ctx) | |
| 798 { | |
| 799 pdf_drop_obj(ctx, copy); | |
| 800 doc->xref_index[num] = i; | |
| 801 fz_rethrow(ctx); | |
| 802 } | |
| 803 *new_entry = *old_entry; | |
| 804 if (new_entry->type == 'o') | |
| 805 { | |
| 806 new_entry->type = 'n'; | |
| 807 new_entry->gen = 0; | |
| 808 } | |
| 809 new_entry->stm_buf = NULL; | |
| 810 new_entry->obj = NULL; | |
| 811 /* old entry is incremental and may have changes. | |
| 812 * Better keep a copy. We must override the old entry with | |
| 813 * the copy because the caller may be holding a reference to | |
| 814 * the original and expect it to end up in the new entry */ | |
| 815 new_entry->obj = old_entry->obj; | |
| 816 old_entry->obj = copy; | |
| 817 new_entry->stm_buf = NULL; /* FIXME */ | |
| 818 } | |
| 819 | |
| 820 void pdf_replace_xref(fz_context *ctx, pdf_document *doc, pdf_xref_entry *entries, int n) | |
| 821 { | |
| 822 int *xref_index = NULL; | |
| 823 pdf_xref *xref = NULL; | |
| 824 pdf_xref_subsec *sub; | |
| 825 | |
| 826 fz_var(xref_index); | |
| 827 fz_var(xref); | |
| 828 | |
| 829 fz_try(ctx) | |
| 830 { | |
| 831 xref_index = fz_calloc(ctx, n, sizeof(int)); | |
| 832 xref = fz_malloc_struct(ctx, pdf_xref); | |
| 833 sub = fz_malloc_struct(ctx, pdf_xref_subsec); | |
| 834 } | |
| 835 fz_catch(ctx) | |
| 836 { | |
| 837 fz_free(ctx, xref); | |
| 838 fz_free(ctx, xref_index); | |
| 839 fz_rethrow(ctx); | |
| 840 } | |
| 841 | |
| 842 sub->table = entries; | |
| 843 sub->start = 0; | |
| 844 sub->len = n; | |
| 845 | |
| 846 xref->subsec = sub; | |
| 847 xref->num_objects = n; | |
| 848 xref->trailer = pdf_keep_obj(ctx, pdf_trailer(ctx, doc)); | |
| 849 | |
| 850 /* The new table completely replaces the previous separate sections */ | |
| 851 pdf_drop_xref_sections(ctx, doc); | |
| 852 | |
| 853 doc->xref_sections = xref; | |
| 854 doc->num_xref_sections = 1; | |
| 855 doc->num_incremental_sections = 0; | |
| 856 doc->xref_base = 0; | |
| 857 doc->disallow_new_increments = 0; | |
| 858 doc->max_xref_len = n; | |
| 859 | |
| 860 fz_free(ctx, doc->xref_index); | |
| 861 doc->xref_index = xref_index; | |
| 862 } | |
| 863 | |
| 864 void pdf_forget_xref(fz_context *ctx, pdf_document *doc) | |
| 865 { | |
| 866 pdf_obj *trailer = pdf_keep_obj(ctx, pdf_trailer(ctx, doc)); | |
| 867 | |
| 868 pdf_drop_local_xref_and_resources(ctx, doc); | |
| 869 | |
| 870 if (doc->saved_xref_sections) | |
| 871 pdf_drop_xref_sections_imp(ctx, doc, doc->saved_xref_sections, doc->saved_num_xref_sections); | |
| 872 | |
| 873 doc->saved_xref_sections = doc->xref_sections; | |
| 874 doc->saved_num_xref_sections = doc->num_xref_sections; | |
| 875 | |
| 876 doc->xref_sections = NULL; | |
| 877 doc->startxref = 0; | |
| 878 doc->num_xref_sections = 0; | |
| 879 doc->num_incremental_sections = 0; | |
| 880 doc->xref_base = 0; | |
| 881 doc->disallow_new_increments = 0; | |
| 882 | |
| 883 fz_try(ctx) | |
| 884 { | |
| 885 pdf_get_populating_xref_entry(ctx, doc, 0); | |
| 886 } | |
| 887 fz_catch(ctx) | |
| 888 { | |
| 889 pdf_drop_obj(ctx, trailer); | |
| 890 fz_rethrow(ctx); | |
| 891 } | |
| 892 | |
| 893 /* Set the trailer of the final xref section. */ | |
| 894 doc->xref_sections[0].trailer = trailer; | |
| 895 } | |
| 896 | |
| 897 /* | |
| 898 * magic version tag and startxref | |
| 899 */ | |
| 900 | |
| 901 int | |
| 902 pdf_version(fz_context *ctx, pdf_document *doc) | |
| 903 { | |
| 904 int version = doc->version; | |
| 905 fz_try(ctx) | |
| 906 { | |
| 907 pdf_obj *obj = pdf_dict_getl(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root), PDF_NAME(Version), NULL); | |
| 908 const char *str = pdf_to_name(ctx, obj); | |
| 909 if (*str) | |
| 910 version = 10 * (fz_atof(str) + 0.05f); | |
| 911 } | |
| 912 fz_catch(ctx) | |
| 913 { | |
| 914 fz_rethrow_if(ctx, FZ_ERROR_TRYLATER); | |
| 915 fz_rethrow_if(ctx, FZ_ERROR_SYSTEM); | |
| 916 fz_report_error(ctx); | |
| 917 fz_warn(ctx, "Ignoring broken Root/Version number."); | |
| 918 } | |
| 919 return version; | |
| 920 } | |
| 921 | |
| 922 static void | |
| 923 pdf_load_version(fz_context *ctx, pdf_document *doc) | |
| 924 { | |
| 925 char buf[1024]; | |
| 926 char *s = NULL; | |
| 927 size_t i, n; | |
| 928 | |
| 929 /* look for '%PDF' version marker within first kilobyte of file */ | |
| 930 fz_seek(ctx, doc->file, 0, SEEK_SET); | |
| 931 n = fz_read(ctx, doc->file, (unsigned char*) buf, sizeof buf); | |
| 932 if (n < 5) | |
| 933 fz_throw(ctx, FZ_ERROR_FORMAT, "cannot find version marker"); | |
| 934 buf[n-1] = 0; | |
| 935 for (i = 0; i < n - 5; i++) | |
| 936 { | |
| 937 if (memcmp(&buf[i], "%PDF-", 5) == 0 || memcmp(&buf[i], "%FDF-", 5) == 0) | |
| 938 { | |
| 939 s = buf + i; | |
| 940 break; | |
| 941 } | |
| 942 } | |
| 943 if (!s) | |
| 944 fz_throw(ctx, FZ_ERROR_FORMAT, "cannot find version marker"); | |
| 945 | |
| 946 if (s[1] == 'F') | |
| 947 doc->is_fdf = 1; | |
| 948 | |
| 949 doc->version = 10 * (fz_atof(s+5) + 0.05f); | |
| 950 if ((doc->version < 10 || doc->version > 17) && doc->version != 20) | |
| 951 fz_warn(ctx, "unknown PDF version: %d.%d", doc->version / 10, doc->version % 10); | |
| 952 | |
| 953 if (s != buf) | |
| 954 { | |
| 955 fz_warn(ctx, "garbage bytes before version marker"); | |
| 956 doc->bias = s - buf; | |
| 957 } | |
| 958 | |
| 959 fz_seek(ctx, doc->file, doc->bias, SEEK_SET); | |
| 960 } | |
| 961 | |
| 962 static void | |
| 963 pdf_read_start_xref(fz_context *ctx, pdf_document *doc) | |
| 964 { | |
| 965 unsigned char buf[1024]; | |
| 966 size_t i, n; | |
| 967 int64_t t; | |
| 968 | |
| 969 fz_seek(ctx, doc->file, 0, SEEK_END); | |
| 970 | |
| 971 doc->file_size = fz_tell(ctx, doc->file); | |
| 972 | |
| 973 t = fz_maxi64(0, doc->file_size - (int64_t)sizeof buf); | |
| 974 fz_seek(ctx, doc->file, t, SEEK_SET); | |
| 975 | |
| 976 n = fz_read(ctx, doc->file, buf, sizeof buf); | |
| 977 if (n < 9) | |
| 978 fz_throw(ctx, FZ_ERROR_FORMAT, "cannot find startxref"); | |
| 979 | |
| 980 i = n - 9; | |
| 981 do | |
| 982 { | |
| 983 if (memcmp(buf + i, "startxref", 9) == 0) | |
| 984 { | |
| 985 i += 9; | |
| 986 while (i < n && iswhite(buf[i])) | |
| 987 i ++; | |
| 988 doc->startxref = 0; | |
| 989 while (i < n && isdigit(buf[i])) | |
| 990 { | |
| 991 if (doc->startxref >= INT64_MAX/10) | |
| 992 fz_throw(ctx, FZ_ERROR_LIMIT, "startxref too large"); | |
| 993 doc->startxref = doc->startxref * 10 + (buf[i++] - '0'); | |
| 994 } | |
| 995 if (doc->startxref != 0) | |
| 996 return; | |
| 997 break; | |
| 998 } | |
| 999 } while (i-- > 0); | |
| 1000 | |
| 1001 fz_throw(ctx, FZ_ERROR_FORMAT, "cannot find startxref"); | |
| 1002 } | |
| 1003 | |
| 1004 void fz_skip_space(fz_context *ctx, fz_stream *stm) | |
| 1005 { | |
| 1006 do | |
| 1007 { | |
| 1008 int c = fz_peek_byte(ctx, stm); | |
| 1009 if (c == EOF || c > 32) | |
| 1010 return; | |
| 1011 (void)fz_read_byte(ctx, stm); | |
| 1012 } | |
| 1013 while (1); | |
| 1014 } | |
| 1015 | |
| 1016 int fz_skip_string(fz_context *ctx, fz_stream *stm, const char *str) | |
| 1017 { | |
| 1018 while (*str) | |
| 1019 { | |
| 1020 int c = fz_peek_byte(ctx, stm); | |
| 1021 if (c == EOF || c != *str++) | |
| 1022 return 1; | |
| 1023 (void)fz_read_byte(ctx, stm); | |
| 1024 } | |
| 1025 return 0; | |
| 1026 } | |
| 1027 | |
| 1028 /* | |
| 1029 * trailer dictionary | |
| 1030 */ | |
| 1031 | |
| 1032 static int | |
| 1033 pdf_xref_size_from_old_trailer(fz_context *ctx, pdf_document *doc) | |
| 1034 { | |
| 1035 int len; | |
| 1036 char *s; | |
| 1037 int64_t t; | |
| 1038 pdf_token tok; | |
| 1039 int c; | |
| 1040 int size = 0; | |
| 1041 int64_t ofs; | |
| 1042 pdf_obj *trailer = NULL; | |
| 1043 size_t n; | |
| 1044 pdf_lexbuf *buf = &doc->lexbuf.base; | |
| 1045 pdf_obj *obj = NULL; | |
| 1046 | |
| 1047 fz_var(trailer); | |
| 1048 | |
| 1049 /* Record the current file read offset so that we can reinstate it */ | |
| 1050 ofs = fz_tell(ctx, doc->file); | |
| 1051 | |
| 1052 fz_skip_space(ctx, doc->file); | |
| 1053 if (fz_skip_string(ctx, doc->file, "xref")) | |
| 1054 fz_throw(ctx, FZ_ERROR_FORMAT, "cannot find xref marker"); | |
| 1055 fz_skip_space(ctx, doc->file); | |
| 1056 | |
| 1057 while (1) | |
| 1058 { | |
| 1059 c = fz_peek_byte(ctx, doc->file); | |
| 1060 if (!isdigit(c)) | |
| 1061 break; | |
| 1062 | |
| 1063 fz_read_line(ctx, doc->file, buf->scratch, buf->size); | |
| 1064 s = buf->scratch; | |
| 1065 fz_strsep(&s, " "); /* ignore start */ | |
| 1066 if (!s) | |
| 1067 fz_throw(ctx, FZ_ERROR_FORMAT, "xref subsection length missing"); | |
| 1068 len = fz_atoi(fz_strsep(&s, " ")); | |
| 1069 if (len < 0) | |
| 1070 fz_throw(ctx, FZ_ERROR_FORMAT, "xref subsection length must be positive"); | |
| 1071 | |
| 1072 /* broken pdfs where the section is not on a separate line */ | |
| 1073 if (s && *s != '\0') | |
| 1074 fz_seek(ctx, doc->file, -(2 + (int)strlen(s)), SEEK_CUR); | |
| 1075 | |
| 1076 t = fz_tell(ctx, doc->file); | |
| 1077 if (t < 0) | |
| 1078 fz_throw(ctx, FZ_ERROR_SYSTEM, "cannot tell in file"); | |
| 1079 | |
| 1080 /* Spec says xref entries should be 20 bytes, but it's not infrequent | |
| 1081 * to see 19, in particular for some PCLm drivers. Cope. */ | |
| 1082 if (len > 0) | |
| 1083 { | |
| 1084 n = fz_read(ctx, doc->file, (unsigned char *)buf->scratch, 20); | |
| 1085 if (n < 19) | |
| 1086 fz_throw(ctx, FZ_ERROR_FORMAT, "malformed xref table"); | |
| 1087 if (n == 20 && buf->scratch[19] > 32) | |
| 1088 n = 19; | |
| 1089 } | |
| 1090 else | |
| 1091 n = 20; | |
| 1092 | |
| 1093 if (len > (int64_t)((INT64_MAX - t) / n)) | |
| 1094 fz_throw(ctx, FZ_ERROR_LIMIT, "xref has too many entries"); | |
| 1095 | |
| 1096 fz_seek(ctx, doc->file, t + n * (int64_t)len, SEEK_SET); | |
| 1097 } | |
| 1098 | |
| 1099 fz_try(ctx) | |
| 1100 { | |
| 1101 tok = pdf_lex(ctx, doc->file, buf); | |
| 1102 if (tok != PDF_TOK_TRAILER) | |
| 1103 fz_throw(ctx, FZ_ERROR_FORMAT, "expected trailer marker"); | |
| 1104 | |
| 1105 tok = pdf_lex(ctx, doc->file, buf); | |
| 1106 if (tok != PDF_TOK_OPEN_DICT) | |
| 1107 fz_throw(ctx, FZ_ERROR_FORMAT, "expected trailer dictionary"); | |
| 1108 | |
| 1109 trailer = pdf_parse_dict(ctx, doc, doc->file, buf); | |
| 1110 | |
| 1111 obj = pdf_dict_get(ctx, trailer, PDF_NAME(Size)); | |
| 1112 if (pdf_is_indirect(ctx, obj)) | |
| 1113 fz_throw(ctx, FZ_ERROR_FORMAT, "trailer Size entry is indirect"); | |
| 1114 | |
| 1115 size = pdf_dict_get_int(ctx, trailer, PDF_NAME(Size)); | |
| 1116 if (size < 0 || size > PDF_MAX_OBJECT_NUMBER + 1) | |
| 1117 fz_throw(ctx, FZ_ERROR_FORMAT, "trailer Size entry out of range"); | |
| 1118 } | |
| 1119 fz_always(ctx) | |
| 1120 { | |
| 1121 pdf_drop_obj(ctx, trailer); | |
| 1122 } | |
| 1123 fz_catch(ctx) | |
| 1124 { | |
| 1125 fz_rethrow(ctx); | |
| 1126 } | |
| 1127 | |
| 1128 fz_seek(ctx, doc->file, ofs, SEEK_SET); | |
| 1129 | |
| 1130 return size; | |
| 1131 } | |
| 1132 | |
| 1133 static pdf_xref_entry * | |
| 1134 pdf_xref_find_subsection(fz_context *ctx, pdf_document *doc, int start, int len) | |
| 1135 { | |
| 1136 pdf_xref *xref = &doc->xref_sections[doc->num_xref_sections-1]; | |
| 1137 pdf_xref_subsec *sub, *extend = NULL; | |
| 1138 int num_objects; | |
| 1139 int solidify = 0; | |
| 1140 | |
| 1141 if (len == 0) | |
| 1142 return NULL; | |
| 1143 | |
| 1144 /* Different cases here. | |
| 1145 * Case 1) We might be asking for a subsection (or a subset of a | |
| 1146 * subsection) that we already have - Just return it. | |
| 1147 * Case 2) We might be asking for a subsection that overlaps (or | |
| 1148 * extends) a subsection we already have - extend the existing one. | |
| 1149 * Case 3) We might be asking for a subsection that overlaps multiple | |
| 1150 * existing subsections - solidify the whole set. | |
| 1151 * Case 4) We might be asking for a completely new subsection - just | |
| 1152 * allocate it. | |
| 1153 */ | |
| 1154 | |
| 1155 /* Sanity check */ | |
| 1156 for (sub = xref->subsec; sub != NULL; sub = sub->next) | |
| 1157 { | |
| 1158 if (start >= sub->start && start <= sub->start + sub->len) | |
| 1159 { | |
| 1160 /* 'start' is in (or immediately after) 'sub' */ | |
| 1161 if (start + len <= sub->start + sub->len) | |
| 1162 { | |
| 1163 /* And so is start+len-1 - just return this! Case 1. */ | |
| 1164 return &sub->table[start-sub->start]; | |
| 1165 } | |
| 1166 /* So we overlap with sub. */ | |
| 1167 if (extend == NULL) | |
| 1168 { | |
| 1169 /* Maybe we can extend sub? */ | |
| 1170 extend = sub; | |
| 1171 } | |
| 1172 else | |
| 1173 { | |
| 1174 /* OK, so we've already found an overlapping one. We'll need to solidify. Case 3. */ | |
| 1175 solidify = 1; | |
| 1176 break; | |
| 1177 } | |
| 1178 } | |
| 1179 else if (start + len > sub->start && start + len < sub->start + sub->len) | |
| 1180 { | |
| 1181 /* The end of the start+len range is in 'sub'. */ | |
| 1182 /* For now, we won't support extending sub backwards. Just take this as | |
| 1183 * needing to solidify. Case 3. */ | |
| 1184 solidify = 1; | |
| 1185 break; | |
| 1186 } | |
| 1187 else if (start < sub->start && start + len >= sub->start + sub->len) | |
| 1188 { | |
| 1189 /* The end of the start+len range is beyond 'sub'. */ | |
| 1190 /* For now, we won't support extending sub backwards. Just take this as | |
| 1191 * needing to solidify. Another variant of case 3. */ | |
| 1192 solidify = 1; | |
| 1193 break; | |
| 1194 } | |
| 1195 } | |
| 1196 | |
| 1197 num_objects = xref->num_objects; | |
| 1198 if (num_objects < start + len) | |
| 1199 num_objects = start + len; | |
| 1200 | |
| 1201 if (solidify) | |
| 1202 { | |
| 1203 /* Case 3: Solidify the xref */ | |
| 1204 ensure_solid_xref(ctx, doc, num_objects, doc->num_xref_sections-1); | |
| 1205 xref = &doc->xref_sections[doc->num_xref_sections-1]; | |
| 1206 sub = xref->subsec; | |
| 1207 } | |
| 1208 else if (extend) | |
| 1209 { | |
| 1210 /* Case 2: Extend the subsection */ | |
| 1211 int newlen = start + len - extend->start; | |
| 1212 sub = extend; | |
| 1213 sub->table = fz_realloc_array(ctx, sub->table, newlen, pdf_xref_entry); | |
| 1214 memset(&sub->table[sub->len], 0, sizeof(pdf_xref_entry) * (newlen - sub->len)); | |
| 1215 sub->len = newlen; | |
| 1216 if (xref->num_objects < sub->start + sub->len) | |
| 1217 xref->num_objects = sub->start + sub->len; | |
| 1218 if (doc->max_xref_len < sub->start + sub->len) | |
| 1219 extend_xref_index(ctx, doc, sub->start + sub->len); | |
| 1220 } | |
| 1221 else | |
| 1222 { | |
| 1223 /* Case 4 */ | |
| 1224 sub = fz_malloc_struct(ctx, pdf_xref_subsec); | |
| 1225 fz_try(ctx) | |
| 1226 { | |
| 1227 sub->table = fz_malloc_struct_array(ctx, len, pdf_xref_entry); | |
| 1228 sub->start = start; | |
| 1229 sub->len = len; | |
| 1230 sub->next = xref->subsec; | |
| 1231 xref->subsec = sub; | |
| 1232 } | |
| 1233 fz_catch(ctx) | |
| 1234 { | |
| 1235 fz_free(ctx, sub); | |
| 1236 fz_rethrow(ctx); | |
| 1237 } | |
| 1238 if (xref->num_objects < num_objects) | |
| 1239 xref->num_objects = num_objects; | |
| 1240 if (doc->max_xref_len < num_objects) | |
| 1241 extend_xref_index(ctx, doc, num_objects); | |
| 1242 } | |
| 1243 return &sub->table[start-sub->start]; | |
| 1244 } | |
| 1245 | |
| 1246 static inline void | |
| 1247 validate_object_number_range(fz_context *ctx, int first, int len, const char *what) | |
| 1248 { | |
| 1249 if (first < 0 || first > PDF_MAX_OBJECT_NUMBER) | |
| 1250 fz_throw(ctx, FZ_ERROR_FORMAT, "first object number in %s out of range", what); | |
| 1251 if (len < 0 || len > PDF_MAX_OBJECT_NUMBER) | |
| 1252 fz_throw(ctx, FZ_ERROR_FORMAT, "number of objects in %s out of range", what); | |
| 1253 if (len > 0 && len - 1 > PDF_MAX_OBJECT_NUMBER - first) | |
| 1254 fz_throw(ctx, FZ_ERROR_FORMAT, "last object number in %s out of range", what); | |
| 1255 } | |
| 1256 | |
| 1257 static pdf_obj * | |
| 1258 pdf_read_old_xref(fz_context *ctx, pdf_document *doc) | |
| 1259 { | |
| 1260 int start, len, c, i, xref_len, carried; | |
| 1261 fz_stream *file = doc->file; | |
| 1262 pdf_xref_entry *table; | |
| 1263 pdf_token tok; | |
| 1264 size_t n; | |
| 1265 char *s, *e; | |
| 1266 pdf_lexbuf *buf = &doc->lexbuf.base; | |
| 1267 | |
| 1268 xref_len = pdf_xref_size_from_old_trailer(ctx, doc); | |
| 1269 | |
| 1270 fz_skip_space(ctx, doc->file); | |
| 1271 if (fz_skip_string(ctx, doc->file, "xref")) | |
| 1272 fz_throw(ctx, FZ_ERROR_FORMAT, "cannot find xref marker"); | |
| 1273 fz_skip_space(ctx, doc->file); | |
| 1274 | |
| 1275 while (1) | |
| 1276 { | |
| 1277 c = fz_peek_byte(ctx, file); | |
| 1278 if (!isdigit(c)) | |
| 1279 break; | |
| 1280 | |
| 1281 fz_read_line(ctx, file, buf->scratch, buf->size); | |
| 1282 s = buf->scratch; | |
| 1283 start = fz_atoi(fz_strsep(&s, " ")); | |
| 1284 len = fz_atoi(fz_strsep(&s, " ")); | |
| 1285 | |
| 1286 /* broken pdfs where the section is not on a separate line */ | |
| 1287 if (s && *s != '\0') | |
| 1288 { | |
| 1289 fz_warn(ctx, "broken xref subsection. proceeding anyway."); | |
| 1290 fz_seek(ctx, file, -(2 + (int)strlen(s)), SEEK_CUR); | |
| 1291 } | |
| 1292 | |
| 1293 validate_object_number_range(ctx, start, len, "xref subsection"); | |
| 1294 | |
| 1295 /* broken pdfs where size in trailer undershoots entries in xref sections */ | |
| 1296 if (start + len > xref_len) | |
| 1297 { | |
| 1298 fz_warn(ctx, "broken xref subsection, proceeding anyway."); | |
| 1299 } | |
| 1300 | |
| 1301 table = pdf_xref_find_subsection(ctx, doc, start, len); | |
| 1302 | |
| 1303 /* Xref entries SHOULD be 20 bytes long, but we see 19 byte | |
| 1304 * ones more frequently than we'd like (e.g. PCLm drivers). | |
| 1305 * Cope with this by 'carrying' data forward. */ | |
| 1306 carried = 0; | |
| 1307 for (i = 0; i < len; i++) | |
| 1308 { | |
| 1309 pdf_xref_entry *entry = &table[i]; | |
| 1310 n = fz_read(ctx, file, (unsigned char *) buf->scratch + carried, 20-carried); | |
| 1311 if (n != (size_t)(20-carried)) | |
| 1312 fz_throw(ctx, FZ_ERROR_FORMAT, "unexpected EOF in xref table"); | |
| 1313 n += carried; | |
| 1314 buf->scratch[n] = '\0'; | |
| 1315 if (!entry->type) | |
| 1316 { | |
| 1317 s = buf->scratch; | |
| 1318 e = s + n; | |
| 1319 | |
| 1320 entry->num = start + i; | |
| 1321 | |
| 1322 /* broken pdfs where line start with white space */ | |
| 1323 while (s < e && iswhite(*s)) | |
| 1324 s++; | |
| 1325 | |
| 1326 if (s == e || !isdigit(*s)) | |
| 1327 fz_throw(ctx, FZ_ERROR_FORMAT, "xref offset missing"); | |
| 1328 while (s < e && isdigit(*s)) | |
| 1329 entry->ofs = entry->ofs * 10 + *s++ - '0'; | |
| 1330 | |
| 1331 while (s < e && iswhite(*s)) | |
| 1332 s++; | |
| 1333 if (s == e || !isdigit(*s)) | |
| 1334 fz_throw(ctx, FZ_ERROR_FORMAT, "xref generation number missing"); | |
| 1335 while (s < e && isdigit(*s)) | |
| 1336 entry->gen = entry->gen * 10 + *s++ - '0'; | |
| 1337 | |
| 1338 while (s < e && iswhite(*s)) | |
| 1339 s++; | |
| 1340 if (s == e || (*s != 'f' && *s != 'n' && *s != 'o')) | |
| 1341 fz_throw(ctx, FZ_ERROR_FORMAT, "unexpected xref type: 0x%x (%d %d R)", s == e ? 0 : *s, entry->num, entry->gen); | |
| 1342 entry->type = *s++; | |
| 1343 | |
| 1344 /* If the last byte of our buffer isn't an EOL (or space), carry one byte forward */ | |
| 1345 carried = buf->scratch[19] > 32; | |
| 1346 if (carried) | |
| 1347 buf->scratch[0] = buf->scratch[19]; | |
| 1348 } | |
| 1349 } | |
| 1350 if (carried) | |
| 1351 fz_unread_byte(ctx, file); | |
| 1352 } | |
| 1353 | |
| 1354 tok = pdf_lex(ctx, file, buf); | |
| 1355 if (tok != PDF_TOK_TRAILER) | |
| 1356 fz_throw(ctx, FZ_ERROR_FORMAT, "expected trailer marker"); | |
| 1357 | |
| 1358 tok = pdf_lex(ctx, file, buf); | |
| 1359 if (tok != PDF_TOK_OPEN_DICT) | |
| 1360 fz_throw(ctx, FZ_ERROR_FORMAT, "expected trailer dictionary"); | |
| 1361 | |
| 1362 doc->last_xref_was_old_style = 1; | |
| 1363 | |
| 1364 return pdf_parse_dict(ctx, doc, file, buf); | |
| 1365 } | |
| 1366 | |
| 1367 static void | |
| 1368 pdf_read_new_xref_section(fz_context *ctx, pdf_document *doc, fz_stream *stm, int i0, int i1, int w0, int w1, int w2) | |
| 1369 { | |
| 1370 pdf_xref_entry *table; | |
| 1371 int i, n; | |
| 1372 | |
| 1373 validate_object_number_range(ctx, i0, i1, "xref subsection"); | |
| 1374 | |
| 1375 table = pdf_xref_find_subsection(ctx, doc, i0, i1); | |
| 1376 for (i = i0; i < i0 + i1; i++) | |
| 1377 { | |
| 1378 pdf_xref_entry *entry = &table[i-i0]; | |
| 1379 int a = 0; | |
| 1380 int64_t b = 0; | |
| 1381 int c = 0; | |
| 1382 | |
| 1383 if (fz_is_eof(ctx, stm)) | |
| 1384 fz_throw(ctx, FZ_ERROR_FORMAT, "truncated xref stream"); | |
| 1385 | |
| 1386 for (n = 0; n < w0; n++) | |
| 1387 a = (a << 8) + fz_read_byte(ctx, stm); | |
| 1388 for (n = 0; n < w1; n++) | |
| 1389 b = (b << 8) + fz_read_byte(ctx, stm); | |
| 1390 for (n = 0; n < w2; n++) | |
| 1391 c = (c << 8) + fz_read_byte(ctx, stm); | |
| 1392 | |
| 1393 if (!entry->type) | |
| 1394 { | |
| 1395 int t = w0 ? a : 1; | |
| 1396 entry->type = t == 0 ? 'f' : t == 1 ? 'n' : t == 2 ? 'o' : 0; | |
| 1397 entry->ofs = w1 ? b : 0; | |
| 1398 entry->gen = w2 ? c : 0; | |
| 1399 entry->num = i; | |
| 1400 } | |
| 1401 } | |
| 1402 | |
| 1403 doc->last_xref_was_old_style = 0; | |
| 1404 } | |
| 1405 | |
| 1406 /* Entered with file locked, remains locked throughout. */ | |
| 1407 static pdf_obj * | |
| 1408 pdf_read_new_xref(fz_context *ctx, pdf_document *doc) | |
| 1409 { | |
| 1410 fz_stream *stm = NULL; | |
| 1411 pdf_obj *trailer = NULL; | |
| 1412 pdf_obj *index = NULL; | |
| 1413 pdf_obj *obj = NULL; | |
| 1414 int gen, num = 0; | |
| 1415 int64_t ofs, stm_ofs; | |
| 1416 int size, w0, w1, w2; | |
| 1417 int t; | |
| 1418 | |
| 1419 fz_var(trailer); | |
| 1420 fz_var(stm); | |
| 1421 | |
| 1422 fz_try(ctx) | |
| 1423 { | |
| 1424 ofs = fz_tell(ctx, doc->file); | |
| 1425 trailer = pdf_parse_ind_obj(ctx, doc, doc->file, &num, &gen, &stm_ofs, NULL); | |
| 1426 if (num == 0) | |
| 1427 fz_throw(ctx, FZ_ERROR_FORMAT, "Trailer object number cannot be 0\n"); | |
| 1428 } | |
| 1429 fz_catch(ctx) | |
| 1430 { | |
| 1431 pdf_drop_obj(ctx, trailer); | |
| 1432 fz_rethrow(ctx); | |
| 1433 } | |
| 1434 | |
| 1435 fz_try(ctx) | |
| 1436 { | |
| 1437 pdf_xref_entry *entry; | |
| 1438 | |
| 1439 obj = pdf_dict_get(ctx, trailer, PDF_NAME(Size)); | |
| 1440 if (!obj) | |
| 1441 fz_throw(ctx, FZ_ERROR_FORMAT, "xref stream missing Size entry (%d 0 R)", num); | |
| 1442 | |
| 1443 size = pdf_to_int(ctx, obj); | |
| 1444 | |
| 1445 /* Bug708176: If the PDF file producer has declared Size without | |
| 1446 * including this object, then increment it. */ | |
| 1447 if (size == num) | |
| 1448 pdf_dict_put_int(ctx, trailer, PDF_NAME(Size), size+1); | |
| 1449 | |
| 1450 obj = pdf_dict_get(ctx, trailer, PDF_NAME(W)); | |
| 1451 if (!obj) | |
| 1452 fz_throw(ctx, FZ_ERROR_FORMAT, "xref stream missing W entry (%d R)", num); | |
| 1453 | |
| 1454 if (pdf_is_indirect(ctx, pdf_array_get(ctx, obj, 0))) | |
| 1455 fz_throw(ctx, FZ_ERROR_FORMAT, "xref stream object type field width an indirect object"); | |
| 1456 if (pdf_is_indirect(ctx, pdf_array_get(ctx, obj, 1))) | |
| 1457 fz_throw(ctx, FZ_ERROR_FORMAT, "xref stream object field 2 width an indirect object"); | |
| 1458 if (pdf_is_indirect(ctx, pdf_array_get(ctx, obj, 2))) | |
| 1459 fz_throw(ctx, FZ_ERROR_FORMAT, "xref stream object field 3 width an indirect object"); | |
| 1460 | |
| 1461 if (doc->file_reading_linearly && pdf_dict_get(ctx, trailer, PDF_NAME(Encrypt))) | |
| 1462 fz_throw(ctx, FZ_ERROR_ARGUMENT, "Cannot read linearly with encryption"); | |
| 1463 | |
| 1464 w0 = pdf_array_get_int(ctx, obj, 0); | |
| 1465 w1 = pdf_array_get_int(ctx, obj, 1); | |
| 1466 w2 = pdf_array_get_int(ctx, obj, 2); | |
| 1467 | |
| 1468 if (w0 < 0) | |
| 1469 fz_warn(ctx, "xref stream objects have corrupt type"); | |
| 1470 if (w1 < 0) | |
| 1471 fz_warn(ctx, "xref stream objects have corrupt offset"); | |
| 1472 if (w2 < 0) | |
| 1473 fz_warn(ctx, "xref stream objects have corrupt generation"); | |
| 1474 | |
| 1475 w0 = w0 < 0 ? 0 : w0; | |
| 1476 w1 = w1 < 0 ? 0 : w1; | |
| 1477 w2 = w2 < 0 ? 0 : w2; | |
| 1478 | |
| 1479 index = pdf_dict_get(ctx, trailer, PDF_NAME(Index)); | |
| 1480 | |
| 1481 stm = pdf_open_stream_with_offset(ctx, doc, num, trailer, stm_ofs); | |
| 1482 | |
| 1483 if (!index) | |
| 1484 { | |
| 1485 pdf_read_new_xref_section(ctx, doc, stm, 0, size, w0, w1, w2); | |
| 1486 } | |
| 1487 else | |
| 1488 { | |
| 1489 int n = pdf_array_len(ctx, index); | |
| 1490 for (t = 0; t < n; t += 2) | |
| 1491 { | |
| 1492 int i0 = pdf_array_get_int(ctx, index, t + 0); | |
| 1493 int i1 = pdf_array_get_int(ctx, index, t + 1); | |
| 1494 pdf_read_new_xref_section(ctx, doc, stm, i0, i1, w0, w1, w2); | |
| 1495 } | |
| 1496 } | |
| 1497 entry = pdf_get_populating_xref_entry(ctx, doc, num); | |
| 1498 entry->ofs = ofs; | |
| 1499 entry->gen = gen; | |
| 1500 entry->num = num; | |
| 1501 entry->stm_ofs = stm_ofs; | |
| 1502 pdf_drop_obj(ctx, entry->obj); | |
| 1503 entry->obj = pdf_keep_obj(ctx, trailer); | |
| 1504 entry->type = 'n'; | |
| 1505 pdf_set_obj_parent(ctx, trailer, num); | |
| 1506 } | |
| 1507 fz_always(ctx) | |
| 1508 { | |
| 1509 fz_drop_stream(ctx, stm); | |
| 1510 } | |
| 1511 fz_catch(ctx) | |
| 1512 { | |
| 1513 pdf_drop_obj(ctx, trailer); | |
| 1514 fz_rethrow(ctx); | |
| 1515 } | |
| 1516 | |
| 1517 return trailer; | |
| 1518 } | |
| 1519 | |
| 1520 static pdf_obj * | |
| 1521 pdf_read_xref(fz_context *ctx, pdf_document *doc, int64_t ofs) | |
| 1522 { | |
| 1523 pdf_obj *trailer; | |
| 1524 int c; | |
| 1525 | |
| 1526 fz_seek(ctx, doc->file, doc->bias + ofs, SEEK_SET); | |
| 1527 | |
| 1528 while (iswhite(fz_peek_byte(ctx, doc->file))) | |
| 1529 fz_read_byte(ctx, doc->file); | |
| 1530 | |
| 1531 c = fz_peek_byte(ctx, doc->file); | |
| 1532 if (c == 'x') | |
| 1533 trailer = pdf_read_old_xref(ctx, doc); | |
| 1534 else if (isdigit(c)) | |
| 1535 trailer = pdf_read_new_xref(ctx, doc); | |
| 1536 else | |
| 1537 fz_throw(ctx, FZ_ERROR_FORMAT, "cannot recognize xref format"); | |
| 1538 | |
| 1539 return trailer; | |
| 1540 } | |
| 1541 | |
| 1542 static int64_t | |
| 1543 read_xref_section(fz_context *ctx, pdf_document *doc, int64_t ofs) | |
| 1544 { | |
| 1545 pdf_obj *trailer = NULL; | |
| 1546 pdf_obj *prevobj; | |
| 1547 int64_t xrefstmofs = 0; | |
| 1548 int64_t prevofs = 0; | |
| 1549 | |
| 1550 trailer = pdf_read_xref(ctx, doc, ofs); | |
| 1551 fz_try(ctx) | |
| 1552 { | |
| 1553 pdf_set_populating_xref_trailer(ctx, doc, trailer); | |
| 1554 | |
| 1555 /* FIXME: do we overwrite free entries properly? */ | |
| 1556 /* FIXME: Does this work properly with progression? */ | |
| 1557 xrefstmofs = pdf_to_int64(ctx, pdf_dict_get(ctx, trailer, PDF_NAME(XRefStm))); | |
| 1558 if (xrefstmofs) | |
| 1559 { | |
| 1560 if (xrefstmofs < 0) | |
| 1561 fz_throw(ctx, FZ_ERROR_FORMAT, "negative xref stream offset"); | |
| 1562 | |
| 1563 /* | |
| 1564 Read the XRefStm stream, but throw away the resulting trailer. We do not | |
| 1565 follow any Prev tag therein, as specified on Page 108 of the PDF reference | |
| 1566 1.7 | |
| 1567 */ | |
| 1568 pdf_drop_obj(ctx, pdf_read_xref(ctx, doc, xrefstmofs)); | |
| 1569 } | |
| 1570 | |
| 1571 prevobj = pdf_dict_get(ctx, trailer, PDF_NAME(Prev)); | |
| 1572 if (pdf_is_int(ctx, prevobj)) | |
| 1573 { | |
| 1574 prevofs = pdf_to_int64(ctx, prevobj); | |
| 1575 if (prevofs <= 0) | |
| 1576 fz_throw(ctx, FZ_ERROR_FORMAT, "invalid offset for previous xref section"); | |
| 1577 } | |
| 1578 } | |
| 1579 fz_always(ctx) | |
| 1580 pdf_drop_obj(ctx, trailer); | |
| 1581 fz_catch(ctx) | |
| 1582 fz_rethrow(ctx); | |
| 1583 | |
| 1584 return prevofs; | |
| 1585 } | |
| 1586 | |
| 1587 static void | |
| 1588 pdf_read_xref_sections(fz_context *ctx, pdf_document *doc, int64_t ofs, int read_previous) | |
| 1589 { | |
| 1590 int i, len, cap; | |
| 1591 int64_t *offsets; | |
| 1592 int populated = 0; | |
| 1593 int size, xref_len; | |
| 1594 | |
| 1595 len = 0; | |
| 1596 cap = 10; | |
| 1597 offsets = fz_malloc_array(ctx, cap, int64_t); | |
| 1598 | |
| 1599 fz_var(populated); | |
| 1600 fz_var(offsets); | |
| 1601 | |
| 1602 fz_try(ctx) | |
| 1603 { | |
| 1604 while(ofs) | |
| 1605 { | |
| 1606 for (i = 0; i < len; i ++) | |
| 1607 { | |
| 1608 if (offsets[i] == ofs) | |
| 1609 break; | |
| 1610 } | |
| 1611 if (i < len) | |
| 1612 { | |
| 1613 fz_warn(ctx, "ignoring xref section recursion at offset %d", (int)ofs); | |
| 1614 break; | |
| 1615 } | |
| 1616 if (len == cap) | |
| 1617 { | |
| 1618 cap *= 2; | |
| 1619 offsets = fz_realloc_array(ctx, offsets, cap, int64_t); | |
| 1620 } | |
| 1621 offsets[len++] = ofs; | |
| 1622 | |
| 1623 pdf_populate_next_xref_level(ctx, doc); | |
| 1624 populated = 1; | |
| 1625 ofs = read_xref_section(ctx, doc, ofs); | |
| 1626 if (!read_previous) | |
| 1627 break; | |
| 1628 } | |
| 1629 | |
| 1630 /* For pathological files, such as chinese-example.pdf, where the original | |
| 1631 * xref in the file is highly fragmented, we can safely solidify it here | |
| 1632 * with no ill effects. */ | |
| 1633 ensure_solid_xref(ctx, doc, 0, doc->num_xref_sections-1); | |
| 1634 | |
| 1635 size = pdf_dict_get_int(ctx, pdf_trailer(ctx, doc), PDF_NAME(Size)); | |
| 1636 xref_len = pdf_xref_len(ctx, doc); | |
| 1637 if (xref_len > size) | |
| 1638 { | |
| 1639 if (xref_len == size+1) | |
| 1640 { | |
| 1641 /* Bug 708456 && Bug 708176. Allow for (sadly, quite common | |
| 1642 * PDF generators that can't get size right). */ | |
| 1643 fz_warn(ctx, "Trailer Size is off-by-one. Ignoring."); | |
| 1644 pdf_dict_put_int(ctx, pdf_trailer(ctx, doc), PDF_NAME(Size), size+1); | |
| 1645 } | |
| 1646 else | |
| 1647 fz_throw(ctx, FZ_ERROR_FORMAT, "incorrect number of xref entries in trailer, repairing"); | |
| 1648 } | |
| 1649 } | |
| 1650 fz_always(ctx) | |
| 1651 { | |
| 1652 fz_free(ctx, offsets); | |
| 1653 } | |
| 1654 fz_catch(ctx) | |
| 1655 { | |
| 1656 /* Undo pdf_populate_next_xref_level if we've done that already. */ | |
| 1657 if (populated) | |
| 1658 { | |
| 1659 pdf_drop_xref_subsec(ctx, &doc->xref_sections[doc->num_xref_sections - 1]); | |
| 1660 doc->num_xref_sections--; | |
| 1661 } | |
| 1662 fz_rethrow(ctx); | |
| 1663 } | |
| 1664 } | |
| 1665 | |
| 1666 void | |
| 1667 pdf_prime_xref_index(fz_context *ctx, pdf_document *doc) | |
| 1668 { | |
| 1669 int i, j; | |
| 1670 int *idx = doc->xref_index; | |
| 1671 | |
| 1672 for (i = doc->num_xref_sections-1; i >= 0; i--) | |
| 1673 { | |
| 1674 pdf_xref *xref = &doc->xref_sections[i]; | |
| 1675 pdf_xref_subsec *subsec = xref->subsec; | |
| 1676 while (subsec != NULL) | |
| 1677 { | |
| 1678 int start = subsec->start; | |
| 1679 int end = subsec->start + subsec->len; | |
| 1680 for (j = start; j < end; j++) | |
| 1681 { | |
| 1682 char t = subsec->table[j-start].type; | |
| 1683 if (t != 0 && t != 'f') | |
| 1684 idx[j] = i; | |
| 1685 } | |
| 1686 | |
| 1687 subsec = subsec->next; | |
| 1688 } | |
| 1689 } | |
| 1690 } | |
| 1691 | |
| 1692 static void | |
| 1693 check_xref_entry_offsets(fz_context *ctx, pdf_xref_entry *entry, int i, pdf_document *doc, void *arg) | |
| 1694 { | |
| 1695 int xref_len = (int)(intptr_t)arg; | |
| 1696 | |
| 1697 if (entry->type == 'n') | |
| 1698 { | |
| 1699 /* Special case code: "0000000000 * n" means free, | |
| 1700 * according to some producers (inc Quartz) */ | |
| 1701 if (entry->ofs == 0) | |
| 1702 entry->type = 'f'; | |
| 1703 else if (entry->ofs <= 0 || entry->ofs >= doc->file_size) | |
| 1704 fz_throw(ctx, FZ_ERROR_FORMAT, "object offset out of range: %d (%d 0 R)", (int)entry->ofs, i); | |
| 1705 } | |
| 1706 else if (entry->type == 'o') | |
| 1707 { | |
| 1708 /* Read this into a local variable here, because pdf_get_xref_entry | |
| 1709 * may solidify the xref, hence invalidating "entry", meaning we | |
| 1710 * need a stashed value for the throw. */ | |
| 1711 int64_t ofs = entry->ofs; | |
| 1712 if (ofs <= 0 || ofs >= xref_len || pdf_get_xref_entry_no_null(ctx, doc, ofs)->type != 'n') | |
| 1713 fz_throw(ctx, FZ_ERROR_FORMAT, "invalid reference to an objstm that does not exist: %d (%d 0 R)", (int)ofs, i); | |
| 1714 } | |
| 1715 } | |
| 1716 | |
| 1717 /* | |
| 1718 * load xref tables from pdf | |
| 1719 * | |
| 1720 * File locked on entry, throughout and on exit. | |
| 1721 */ | |
| 1722 | |
| 1723 static void | |
| 1724 pdf_load_xref(fz_context *ctx, pdf_document *doc) | |
| 1725 { | |
| 1726 int xref_len; | |
| 1727 pdf_xref_entry *entry; | |
| 1728 | |
| 1729 pdf_read_start_xref(ctx, doc); | |
| 1730 | |
| 1731 pdf_read_xref_sections(ctx, doc, doc->startxref, 1); | |
| 1732 | |
| 1733 if (pdf_xref_len(ctx, doc) == 0) | |
| 1734 fz_throw(ctx, FZ_ERROR_FORMAT, "found xref was empty"); | |
| 1735 | |
| 1736 pdf_prime_xref_index(ctx, doc); | |
| 1737 | |
| 1738 entry = pdf_get_xref_entry_no_null(ctx, doc, 0); | |
| 1739 /* broken pdfs where first object is missing */ | |
| 1740 if (!entry->type) | |
| 1741 { | |
| 1742 entry->type = 'f'; | |
| 1743 entry->gen = 65535; | |
| 1744 entry->num = 0; | |
| 1745 } | |
| 1746 /* broken pdfs where first object is not free */ | |
| 1747 else if (entry->type != 'f') | |
| 1748 fz_warn(ctx, "first object in xref is not free"); | |
| 1749 | |
| 1750 /* broken pdfs where object offsets are out of range */ | |
| 1751 xref_len = pdf_xref_len(ctx, doc); | |
| 1752 pdf_xref_entry_map(ctx, doc, check_xref_entry_offsets, (void *)(intptr_t)xref_len); | |
| 1753 } | |
| 1754 | |
| 1755 static void | |
| 1756 pdf_check_linear(fz_context *ctx, pdf_document *doc) | |
| 1757 { | |
| 1758 pdf_obj *dict = NULL; | |
| 1759 pdf_obj *o; | |
| 1760 int num, gen; | |
| 1761 int64_t stmofs; | |
| 1762 | |
| 1763 fz_var(dict); | |
| 1764 | |
| 1765 fz_try(ctx) | |
| 1766 { | |
| 1767 dict = pdf_parse_ind_obj(ctx, doc, doc->file, &num, &gen, &stmofs, NULL); | |
| 1768 if (!pdf_is_dict(ctx, dict)) | |
| 1769 break; | |
| 1770 o = pdf_dict_get(ctx, dict, PDF_NAME(Linearized)); | |
| 1771 if (o == NULL) | |
| 1772 break; | |
| 1773 if (pdf_to_int(ctx, o) != 1) | |
| 1774 break; | |
| 1775 doc->has_linearization_object = 1; | |
| 1776 } | |
| 1777 fz_always(ctx) | |
| 1778 pdf_drop_obj(ctx, dict); | |
| 1779 fz_catch(ctx) | |
| 1780 { | |
| 1781 /* Silently swallow this error. */ | |
| 1782 fz_rethrow_if(ctx, FZ_ERROR_SYSTEM); | |
| 1783 fz_report_error(ctx); | |
| 1784 } | |
| 1785 } | |
| 1786 | |
| 1787 static void | |
| 1788 pdf_load_linear(fz_context *ctx, pdf_document *doc) | |
| 1789 { | |
| 1790 pdf_obj *dict = NULL; | |
| 1791 pdf_obj *hint = NULL; | |
| 1792 pdf_obj *o; | |
| 1793 int num, gen, lin, len; | |
| 1794 int64_t stmofs; | |
| 1795 | |
| 1796 fz_var(dict); | |
| 1797 fz_var(hint); | |
| 1798 | |
| 1799 fz_try(ctx) | |
| 1800 { | |
| 1801 pdf_xref_entry *entry; | |
| 1802 | |
| 1803 dict = pdf_parse_ind_obj(ctx, doc, doc->file, &num, &gen, &stmofs, NULL); | |
| 1804 if (!pdf_is_dict(ctx, dict)) | |
| 1805 fz_throw(ctx, FZ_ERROR_FORMAT, "Failed to read linearized dictionary"); | |
| 1806 o = pdf_dict_get(ctx, dict, PDF_NAME(Linearized)); | |
| 1807 if (o == NULL) | |
| 1808 fz_throw(ctx, FZ_ERROR_FORMAT, "Failed to read linearized dictionary"); | |
| 1809 lin = pdf_to_int(ctx, o); | |
| 1810 if (lin != 1) | |
| 1811 fz_throw(ctx, FZ_ERROR_FORMAT, "Unexpected version of Linearized tag (%d)", lin); | |
| 1812 doc->has_linearization_object = 1; | |
| 1813 len = pdf_dict_get_int(ctx, dict, PDF_NAME(L)); | |
| 1814 if (len != doc->file_length) | |
| 1815 fz_throw(ctx, FZ_ERROR_ARGUMENT, "File has been updated since linearization"); | |
| 1816 | |
| 1817 pdf_read_xref_sections(ctx, doc, fz_tell(ctx, doc->file), 0); | |
| 1818 | |
| 1819 doc->linear_page_count = pdf_dict_get_int(ctx, dict, PDF_NAME(N)); | |
| 1820 doc->linear_page_refs = fz_realloc_array(ctx, doc->linear_page_refs, doc->linear_page_count, pdf_obj *); | |
| 1821 memset(doc->linear_page_refs, 0, doc->linear_page_count * sizeof(pdf_obj*)); | |
| 1822 doc->linear_obj = dict; | |
| 1823 doc->linear_pos = fz_tell(ctx, doc->file); | |
| 1824 doc->linear_page1_obj_num = pdf_dict_get_int(ctx, dict, PDF_NAME(O)); | |
| 1825 doc->linear_page_refs[0] = pdf_new_indirect(ctx, doc, doc->linear_page1_obj_num, 0); | |
| 1826 doc->linear_page_num = 0; | |
| 1827 hint = pdf_dict_get(ctx, dict, PDF_NAME(H)); | |
| 1828 doc->hint_object_offset = pdf_array_get_int(ctx, hint, 0); | |
| 1829 doc->hint_object_length = pdf_array_get_int(ctx, hint, 1); | |
| 1830 | |
| 1831 entry = pdf_get_populating_xref_entry(ctx, doc, 0); | |
| 1832 entry->type = 'f'; | |
| 1833 } | |
| 1834 fz_catch(ctx) | |
| 1835 { | |
| 1836 pdf_drop_obj(ctx, dict); | |
| 1837 fz_rethrow_if(ctx, FZ_ERROR_TRYLATER); | |
| 1838 fz_rethrow_if(ctx, FZ_ERROR_SYSTEM); | |
| 1839 fz_report_error(ctx); | |
| 1840 /* Drop back to non linearized reading mode */ | |
| 1841 doc->file_reading_linearly = 0; | |
| 1842 } | |
| 1843 } | |
| 1844 | |
| 1845 static void | |
| 1846 id_and_password(fz_context *ctx, pdf_document *doc) | |
| 1847 { | |
| 1848 pdf_obj *encrypt, *id; | |
| 1849 | |
| 1850 pdf_prime_xref_index(ctx, doc); | |
| 1851 | |
| 1852 encrypt = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Encrypt)); | |
| 1853 id = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(ID)); | |
| 1854 | |
| 1855 if (pdf_is_dict(ctx, encrypt)) | |
| 1856 doc->crypt = pdf_new_crypt(ctx, encrypt, id); | |
| 1857 | |
| 1858 /* Allow lazy clients to read encrypted files with a blank password */ | |
| 1859 (void)pdf_authenticate_password(ctx, doc, ""); | |
| 1860 } | |
| 1861 | |
| 1862 /* | |
| 1863 * Initialize and load xref tables. | |
| 1864 * If password is not null, try to decrypt. | |
| 1865 */ | |
| 1866 static void | |
| 1867 pdf_init_document(fz_context *ctx, pdf_document *doc) | |
| 1868 { | |
| 1869 int repaired = 0; | |
| 1870 | |
| 1871 fz_try(ctx) | |
| 1872 { | |
| 1873 /* Check to see if we should work in progressive mode */ | |
| 1874 if (doc->file->progressive) | |
| 1875 { | |
| 1876 doc->file_reading_linearly = 1; | |
| 1877 fz_seek(ctx, doc->file, 0, SEEK_END); | |
| 1878 doc->file_length = fz_tell(ctx, doc->file); | |
| 1879 if (doc->file_length < 0) | |
| 1880 doc->file_length = 0; | |
| 1881 fz_seek(ctx, doc->file, 0, SEEK_SET); | |
| 1882 } | |
| 1883 | |
| 1884 pdf_load_version(ctx, doc); | |
| 1885 | |
| 1886 if (doc->is_fdf) | |
| 1887 { | |
| 1888 doc->file_reading_linearly = 0; | |
| 1889 repaired = 1; | |
| 1890 break; /* skip to end of try/catch */ | |
| 1891 } | |
| 1892 | |
| 1893 /* Try to load the linearized file if we are in progressive | |
| 1894 * mode. */ | |
| 1895 if (doc->file_reading_linearly) | |
| 1896 pdf_load_linear(ctx, doc); | |
| 1897 else | |
| 1898 /* Even if we're not in progressive mode, check to see | |
| 1899 * if the file claims to be linearized. This is important | |
| 1900 * for checking signatures later on. */ | |
| 1901 pdf_check_linear(ctx, doc); | |
| 1902 | |
| 1903 /* If we aren't in progressive mode (or the linear load failed | |
| 1904 * and has set us back to non-progressive mode), load normally. | |
| 1905 */ | |
| 1906 if (!doc->file_reading_linearly) | |
| 1907 pdf_load_xref(ctx, doc); | |
| 1908 } | |
| 1909 fz_catch(ctx) | |
| 1910 { | |
| 1911 pdf_drop_xref_sections(ctx, doc); | |
| 1912 fz_rethrow_if(ctx, FZ_ERROR_TRYLATER); | |
| 1913 doc->file_reading_linearly = 0; | |
| 1914 fz_rethrow_if(ctx, FZ_ERROR_SYSTEM); | |
| 1915 fz_report_error(ctx); | |
| 1916 fz_warn(ctx, "trying to repair broken xref"); | |
| 1917 repaired = 1; | |
| 1918 } | |
| 1919 | |
| 1920 if (repaired) | |
| 1921 { | |
| 1922 /* pdf_repair_xref may access xref_index, so reset it properly */ | |
| 1923 if (doc->xref_index) | |
| 1924 memset(doc->xref_index, 0, sizeof(int) * doc->max_xref_len); | |
| 1925 pdf_repair_xref_aux(ctx, doc, id_and_password); | |
| 1926 } | |
| 1927 else | |
| 1928 id_and_password(ctx, doc); | |
| 1929 } | |
| 1930 | |
| 1931 void | |
| 1932 pdf_invalidate_xfa(fz_context *ctx, pdf_document *doc) | |
| 1933 { | |
| 1934 if (doc == NULL) | |
| 1935 return; | |
| 1936 fz_drop_xml(ctx, doc->xfa); | |
| 1937 doc->xfa = NULL; | |
| 1938 } | |
| 1939 | |
| 1940 static void | |
| 1941 pdf_drop_document_imp(fz_context *ctx, fz_document *doc_) | |
| 1942 { | |
| 1943 pdf_document *doc = (pdf_document*)doc_; | |
| 1944 int i; | |
| 1945 | |
| 1946 fz_defer_reap_start(ctx); | |
| 1947 | |
| 1948 /* Type3 glyphs in the glyph cache can contain pdf_obj pointers | |
| 1949 * that we are about to destroy. Simplest solution is to bin the | |
| 1950 * glyph cache at this point. */ | |
| 1951 fz_try(ctx) | |
| 1952 fz_purge_glyph_cache(ctx); | |
| 1953 fz_catch(ctx) | |
| 1954 { | |
| 1955 /* Swallow error, but continue dropping */ | |
| 1956 fz_rethrow_if(ctx, FZ_ERROR_SYSTEM); | |
| 1957 fz_report_error(ctx); | |
| 1958 } | |
| 1959 | |
| 1960 pdf_set_doc_event_callback(ctx, doc, NULL, NULL, NULL); | |
| 1961 pdf_drop_js(ctx, doc->js); | |
| 1962 | |
| 1963 pdf_drop_journal(ctx, doc->journal); | |
| 1964 | |
| 1965 pdf_drop_resource_tables(ctx, doc); | |
| 1966 | |
| 1967 pdf_drop_local_xref(ctx, doc->local_xref); | |
| 1968 | |
| 1969 pdf_drop_xref_sections(ctx, doc); | |
| 1970 fz_free(ctx, doc->xref_index); | |
| 1971 | |
| 1972 fz_drop_stream(ctx, doc->file); | |
| 1973 pdf_drop_crypt(ctx, doc->crypt); | |
| 1974 | |
| 1975 pdf_drop_obj(ctx, doc->linear_obj); | |
| 1976 if (doc->linear_page_refs) | |
| 1977 { | |
| 1978 for (i=0; i < doc->linear_page_count; i++) | |
| 1979 pdf_drop_obj(ctx, doc->linear_page_refs[i]); | |
| 1980 | |
| 1981 fz_free(ctx, doc->linear_page_refs); | |
| 1982 } | |
| 1983 | |
| 1984 fz_free(ctx, doc->hint_page); | |
| 1985 fz_free(ctx, doc->hint_shared_ref); | |
| 1986 fz_free(ctx, doc->hint_shared); | |
| 1987 fz_free(ctx, doc->hint_obj_offsets); | |
| 1988 | |
| 1989 for (i=0; i < doc->num_type3_fonts; i++) | |
| 1990 { | |
| 1991 fz_try(ctx) | |
| 1992 fz_decouple_type3_font(ctx, doc->type3_fonts[i], (void *)doc); | |
| 1993 fz_always(ctx) | |
| 1994 fz_drop_font(ctx, doc->type3_fonts[i]); | |
| 1995 fz_catch(ctx) | |
| 1996 { | |
| 1997 /* Swallow error, but continue dropping */ | |
| 1998 fz_rethrow_if(ctx, FZ_ERROR_SYSTEM); | |
| 1999 fz_report_error(ctx); | |
| 2000 } | |
| 2001 } | |
| 2002 | |
| 2003 fz_free(ctx, doc->type3_fonts); | |
| 2004 | |
| 2005 pdf_drop_ocg(ctx, doc); | |
| 2006 | |
| 2007 pdf_empty_store(ctx, doc); | |
| 2008 | |
| 2009 pdf_lexbuf_fin(ctx, &doc->lexbuf.base); | |
| 2010 | |
| 2011 fz_drop_colorspace(ctx, doc->oi); | |
| 2012 | |
| 2013 for (i = 0; i < doc->orphans_count; i++) | |
| 2014 pdf_drop_obj(ctx, doc->orphans[i]); | |
| 2015 | |
| 2016 fz_free(ctx, doc->orphans); | |
| 2017 | |
| 2018 pdf_drop_page_tree_internal(ctx, doc); | |
| 2019 | |
| 2020 fz_defer_reap_end(ctx); | |
| 2021 | |
| 2022 pdf_invalidate_xfa(ctx, doc); | |
| 2023 } | |
| 2024 | |
| 2025 void | |
| 2026 pdf_drop_document(fz_context *ctx, pdf_document *doc) | |
| 2027 { | |
| 2028 fz_drop_document(ctx, &doc->super); | |
| 2029 } | |
| 2030 | |
| 2031 pdf_document * | |
| 2032 pdf_keep_document(fz_context *ctx, pdf_document *doc) | |
| 2033 { | |
| 2034 return (pdf_document *)fz_keep_document(ctx, &doc->super); | |
| 2035 } | |
| 2036 | |
| 2037 /* | |
| 2038 * compressed object streams | |
| 2039 */ | |
| 2040 | |
| 2041 /* | |
| 2042 Do not hold pdf_xref_entry's over call to this function as they | |
| 2043 may be invalidated! | |
| 2044 */ | |
| 2045 static pdf_xref_entry * | |
| 2046 pdf_load_obj_stm(fz_context *ctx, pdf_document *doc, int num, pdf_lexbuf *buf, int target) | |
| 2047 { | |
| 2048 fz_stream *stm = NULL; | |
| 2049 pdf_obj *objstm = NULL; | |
| 2050 int *numbuf = NULL; | |
| 2051 int64_t *ofsbuf = NULL; | |
| 2052 | |
| 2053 pdf_obj *obj; | |
| 2054 int64_t first; | |
| 2055 int count; | |
| 2056 int i; | |
| 2057 pdf_token tok; | |
| 2058 pdf_xref_entry *ret_entry = NULL; | |
| 2059 int ret_idx; | |
| 2060 int xref_len; | |
| 2061 int found; | |
| 2062 fz_stream *sub = NULL; | |
| 2063 | |
| 2064 fz_var(numbuf); | |
| 2065 fz_var(ofsbuf); | |
| 2066 fz_var(objstm); | |
| 2067 fz_var(stm); | |
| 2068 fz_var(sub); | |
| 2069 | |
| 2070 fz_try(ctx) | |
| 2071 { | |
| 2072 objstm = pdf_load_object(ctx, doc, num); | |
| 2073 | |
| 2074 if (pdf_obj_marked(ctx, objstm)) | |
| 2075 fz_throw(ctx, FZ_ERROR_FORMAT, "recursive object stream lookup"); | |
| 2076 } | |
| 2077 fz_catch(ctx) | |
| 2078 { | |
| 2079 pdf_drop_obj(ctx, objstm); | |
| 2080 fz_rethrow(ctx); | |
| 2081 } | |
| 2082 | |
| 2083 fz_try(ctx) | |
| 2084 { | |
| 2085 (void)pdf_mark_obj(ctx, objstm); | |
| 2086 | |
| 2087 count = pdf_dict_get_int(ctx, objstm, PDF_NAME(N)); | |
| 2088 first = pdf_dict_get_int(ctx, objstm, PDF_NAME(First)); | |
| 2089 | |
| 2090 if (count < 0 || count > PDF_MAX_OBJECT_NUMBER) | |
| 2091 fz_throw(ctx, FZ_ERROR_FORMAT, "number of objects in object stream out of range"); | |
| 2092 | |
| 2093 numbuf = fz_calloc(ctx, count, sizeof(*numbuf)); | |
| 2094 ofsbuf = fz_calloc(ctx, count, sizeof(*ofsbuf)); | |
| 2095 | |
| 2096 xref_len = pdf_xref_len(ctx, doc); | |
| 2097 | |
| 2098 found = 0; | |
| 2099 | |
| 2100 stm = pdf_open_stream_number(ctx, doc, num); | |
| 2101 for (i = 0; i < count; i++) | |
| 2102 { | |
| 2103 tok = pdf_lex(ctx, stm, buf); | |
| 2104 if (tok != PDF_TOK_INT) | |
| 2105 fz_throw(ctx, FZ_ERROR_FORMAT, "corrupt object stream (%d 0 R)", num); | |
| 2106 numbuf[found] = buf->i; | |
| 2107 | |
| 2108 tok = pdf_lex(ctx, stm, buf); | |
| 2109 if (tok != PDF_TOK_INT) | |
| 2110 fz_throw(ctx, FZ_ERROR_FORMAT, "corrupt object stream (%d 0 R)", num); | |
| 2111 ofsbuf[found] = buf->i; | |
| 2112 | |
| 2113 if (numbuf[found] <= 0 || numbuf[found] >= xref_len) | |
| 2114 fz_warn(ctx, "object stream object out of range, skipping"); | |
| 2115 else | |
| 2116 found++; | |
| 2117 } | |
| 2118 | |
| 2119 ret_idx = -1; | |
| 2120 for (i = 0; i < found; i++) | |
| 2121 { | |
| 2122 pdf_xref_entry *entry; | |
| 2123 uint64_t length; | |
| 2124 int64_t offset; | |
| 2125 | |
| 2126 offset = first + ofsbuf[i]; | |
| 2127 if (i+1 < found) | |
| 2128 length = ofsbuf[i+1] - ofsbuf[i]; | |
| 2129 else | |
| 2130 length = UINT64_MAX; | |
| 2131 | |
| 2132 sub = fz_open_null_filter(ctx, stm, length, offset); | |
| 2133 | |
| 2134 obj = pdf_parse_stm_obj(ctx, doc, sub, buf); | |
| 2135 fz_drop_stream(ctx, sub); | |
| 2136 sub = NULL; | |
| 2137 | |
| 2138 entry = pdf_get_xref_entry_no_null(ctx, doc, numbuf[i]); | |
| 2139 | |
| 2140 pdf_set_obj_parent(ctx, obj, numbuf[i]); | |
| 2141 | |
| 2142 /* We may have set entry->type to be 'O' from being 'o' to avoid nasty | |
| 2143 * recursions in pdf_cache_object. Accept the type being 'O' here. */ | |
| 2144 if ((entry->type == 'o' || entry->type == 'O') && entry->ofs == num) | |
| 2145 { | |
| 2146 /* If we already have an entry for this object, | |
| 2147 * we'd like to drop it and use the new one - | |
| 2148 * but this means that anyone currently holding | |
| 2149 * a pointer to the old one will be left with a | |
| 2150 * stale pointer. Instead, we drop the new one | |
| 2151 * and trust that the old one is correct. */ | |
| 2152 if (entry->obj) | |
| 2153 { | |
| 2154 if (pdf_objcmp(ctx, entry->obj, obj)) | |
| 2155 fz_warn(ctx, "Encountered new definition for object %d - keeping the original one", numbuf[i]); | |
| 2156 pdf_drop_obj(ctx, obj); | |
| 2157 } | |
| 2158 else | |
| 2159 { | |
| 2160 entry->obj = obj; | |
| 2161 /* If we've just read a 'null' object, don't leave this as a NULL 'o' object, | |
| 2162 * as that will a) confuse the code that called us into thinking that nothing | |
| 2163 * was loaded, and b) cause the entire objstm to be reloaded every time that | |
| 2164 * object is accessed. Instead, just mark it as an 'f'. */ | |
| 2165 if (obj == NULL) | |
| 2166 entry->type = 'f'; | |
| 2167 fz_drop_buffer(ctx, entry->stm_buf); | |
| 2168 entry->stm_buf = NULL; | |
| 2169 } | |
| 2170 if (numbuf[i] == target) | |
| 2171 ret_idx = i; | |
| 2172 } | |
| 2173 else | |
| 2174 { | |
| 2175 pdf_drop_obj(ctx, obj); | |
| 2176 } | |
| 2177 } | |
| 2178 /* Parsing our way through the stream can cause the xref to be | |
| 2179 * solidified, which will move an entry. We therefore can't | |
| 2180 * read the entry for returning until no more parsing is to be | |
| 2181 * done. Thus we end up reading this entry twice. */ | |
| 2182 if (ret_idx >= 0) | |
| 2183 ret_entry = pdf_get_xref_entry_no_null(ctx, doc, numbuf[ret_idx]); | |
| 2184 } | |
| 2185 fz_always(ctx) | |
| 2186 { | |
| 2187 fz_drop_stream(ctx, stm); | |
| 2188 fz_drop_stream(ctx, sub); | |
| 2189 fz_free(ctx, ofsbuf); | |
| 2190 fz_free(ctx, numbuf); | |
| 2191 pdf_unmark_obj(ctx, objstm); | |
| 2192 pdf_drop_obj(ctx, objstm); | |
| 2193 } | |
| 2194 fz_catch(ctx) | |
| 2195 { | |
| 2196 fz_rethrow(ctx); | |
| 2197 } | |
| 2198 return ret_entry; | |
| 2199 } | |
| 2200 | |
| 2201 /* | |
| 2202 * object loading | |
| 2203 */ | |
| 2204 static int | |
| 2205 pdf_obj_read(fz_context *ctx, pdf_document *doc, int64_t *offset, int *nump, pdf_obj **page) | |
| 2206 { | |
| 2207 pdf_lexbuf *buf = &doc->lexbuf.base; | |
| 2208 int num, gen, tok; | |
| 2209 int64_t numofs, genofs, stmofs, tmpofs, newtmpofs; | |
| 2210 int xref_len; | |
| 2211 pdf_xref_entry *entry; | |
| 2212 | |
| 2213 numofs = *offset; | |
| 2214 fz_seek(ctx, doc->file, doc->bias + numofs, SEEK_SET); | |
| 2215 | |
| 2216 /* We expect to read 'num' here */ | |
| 2217 tok = pdf_lex(ctx, doc->file, buf); | |
| 2218 genofs = fz_tell(ctx, doc->file); | |
| 2219 if (tok != PDF_TOK_INT) | |
| 2220 { | |
| 2221 /* Failed! */ | |
| 2222 DEBUGMESS((ctx, "skipping unexpected data (tok=%d) at %d", tok, *offset)); | |
| 2223 *offset = genofs; | |
| 2224 return tok == PDF_TOK_EOF; | |
| 2225 } | |
| 2226 *nump = num = buf->i; | |
| 2227 | |
| 2228 /* We expect to read 'gen' here */ | |
| 2229 tok = pdf_lex(ctx, doc->file, buf); | |
| 2230 tmpofs = fz_tell(ctx, doc->file); | |
| 2231 if (tok != PDF_TOK_INT) | |
| 2232 { | |
| 2233 /* Failed! */ | |
| 2234 DEBUGMESS((ctx, "skipping unexpected data after \"%d\" (tok=%d) at %d", num, tok, *offset)); | |
| 2235 *offset = tmpofs; | |
| 2236 return tok == PDF_TOK_EOF; | |
| 2237 } | |
| 2238 gen = buf->i; | |
| 2239 | |
| 2240 /* We expect to read 'obj' here */ | |
| 2241 do | |
| 2242 { | |
| 2243 tmpofs = fz_tell(ctx, doc->file); | |
| 2244 tok = pdf_lex(ctx, doc->file, buf); | |
| 2245 if (tok == PDF_TOK_OBJ) | |
| 2246 break; | |
| 2247 if (tok != PDF_TOK_INT) | |
| 2248 { | |
| 2249 DEBUGMESS((ctx, "skipping unexpected data (tok=%d) at %d", tok, tmpofs)); | |
| 2250 *offset = fz_tell(ctx, doc->file); | |
| 2251 return tok == PDF_TOK_EOF; | |
| 2252 } | |
| 2253 DEBUGMESS((ctx, "skipping unexpected int %d at %d", num, numofs)); | |
| 2254 *nump = num = gen; | |
| 2255 numofs = genofs; | |
| 2256 gen = buf->i; | |
| 2257 genofs = tmpofs; | |
| 2258 } | |
| 2259 while (1); | |
| 2260 | |
| 2261 /* Now we read the actual object */ | |
| 2262 xref_len = pdf_xref_len(ctx, doc); | |
| 2263 | |
| 2264 /* When we are reading a progressive file, we typically see: | |
| 2265 * File Header | |
| 2266 * obj m (Linearization params) | |
| 2267 * xref #1 (refers to objects m-n) | |
| 2268 * obj m+1 | |
| 2269 * ... | |
| 2270 * obj n | |
| 2271 * obj 1 | |
| 2272 * ... | |
| 2273 * obj n-1 | |
| 2274 * xref #2 | |
| 2275 * | |
| 2276 * The linearisation params are read elsewhere, hence | |
| 2277 * whenever we read an object it should just go into the | |
| 2278 * previous xref. | |
| 2279 */ | |
| 2280 tok = pdf_repair_obj(ctx, doc, buf, &stmofs, NULL, NULL, NULL, page, &newtmpofs, NULL); | |
| 2281 | |
| 2282 do /* So we can break out of it */ | |
| 2283 { | |
| 2284 if (num <= 0 || num >= xref_len) | |
| 2285 { | |
| 2286 fz_warn(ctx, "Not a valid object number (%d %d obj)", num, gen); | |
| 2287 break; | |
| 2288 } | |
| 2289 if (gen != 0) | |
| 2290 { | |
| 2291 fz_warn(ctx, "Unexpected non zero generation number in linearized file"); | |
| 2292 } | |
| 2293 entry = pdf_get_populating_xref_entry(ctx, doc, num); | |
| 2294 if (entry->type != 0) | |
| 2295 { | |
| 2296 DEBUGMESS((ctx, "Duplicate object found (%d %d obj)", num, gen)); | |
| 2297 break; | |
| 2298 } | |
| 2299 if (page && *page) | |
| 2300 { | |
| 2301 DEBUGMESS((ctx, "Successfully read object %d @ %d - and found page %d!", num, numofs, doc->linear_page_num)); | |
| 2302 if (!entry->obj) | |
| 2303 entry->obj = pdf_keep_obj(ctx, *page); | |
| 2304 | |
| 2305 if (doc->linear_page_refs[doc->linear_page_num] == NULL) | |
| 2306 doc->linear_page_refs[doc->linear_page_num] = pdf_new_indirect(ctx, doc, num, gen); | |
| 2307 } | |
| 2308 else | |
| 2309 { | |
| 2310 DEBUGMESS((ctx, "Successfully read object %d @ %d", num, numofs)); | |
| 2311 } | |
| 2312 entry->type = 'n'; | |
| 2313 entry->gen = gen; // XXX: was 0 | |
| 2314 entry->num = num; | |
| 2315 entry->ofs = numofs; | |
| 2316 entry->stm_ofs = stmofs; | |
| 2317 } | |
| 2318 while (0); | |
| 2319 if (page && *page) | |
| 2320 doc->linear_page_num++; | |
| 2321 | |
| 2322 if (tok == PDF_TOK_ENDOBJ) | |
| 2323 { | |
| 2324 *offset = fz_tell(ctx, doc->file); | |
| 2325 } | |
| 2326 else | |
| 2327 { | |
| 2328 *offset = newtmpofs; | |
| 2329 } | |
| 2330 return 0; | |
| 2331 } | |
| 2332 | |
| 2333 static void | |
| 2334 pdf_load_hinted_page(fz_context *ctx, pdf_document *doc, int pagenum) | |
| 2335 { | |
| 2336 pdf_obj *page = NULL; | |
| 2337 | |
| 2338 if (!doc->hints_loaded || !doc->linear_page_refs) | |
| 2339 return; | |
| 2340 | |
| 2341 if (doc->linear_page_refs[pagenum]) | |
| 2342 return; | |
| 2343 | |
| 2344 fz_var(page); | |
| 2345 | |
| 2346 fz_try(ctx) | |
| 2347 { | |
| 2348 int num = doc->hint_page[pagenum].number; | |
| 2349 page = pdf_load_object(ctx, doc, num); | |
| 2350 if (pdf_name_eq(ctx, PDF_NAME(Page), pdf_dict_get(ctx, page, PDF_NAME(Type)))) | |
| 2351 { | |
| 2352 /* We have found the page object! */ | |
| 2353 DEBUGMESS((ctx, "LoadHintedPage pagenum=%d num=%d", pagenum, num)); | |
| 2354 doc->linear_page_refs[pagenum] = pdf_new_indirect(ctx, doc, num, 0); | |
| 2355 } | |
| 2356 } | |
| 2357 fz_always(ctx) | |
| 2358 pdf_drop_obj(ctx, page); | |
| 2359 fz_catch(ctx) | |
| 2360 { | |
| 2361 fz_rethrow_if(ctx, FZ_ERROR_TRYLATER); | |
| 2362 /* Swallow the error and proceed as normal */ | |
| 2363 fz_report_error(ctx); | |
| 2364 } | |
| 2365 } | |
| 2366 | |
| 2367 static int | |
| 2368 read_hinted_object(fz_context *ctx, pdf_document *doc, int num) | |
| 2369 { | |
| 2370 /* Try to find the object using our hint table. Find the closest | |
| 2371 * object <= the one we want that has a hint and read forward from | |
| 2372 * there. */ | |
| 2373 int expected = num; | |
| 2374 int curr_pos; | |
| 2375 int64_t start, offset; | |
| 2376 | |
| 2377 while (doc->hint_obj_offsets[expected] == 0 && expected > 0) | |
| 2378 expected--; | |
| 2379 if (expected != num) | |
| 2380 DEBUGMESS((ctx, "object %d is unhinted, will search forward from %d", expected, num)); | |
| 2381 if (expected == 0) /* No hints found, just bail */ | |
| 2382 return 0; | |
| 2383 | |
| 2384 curr_pos = fz_tell(ctx, doc->file); | |
| 2385 offset = doc->hint_obj_offsets[expected]; | |
| 2386 | |
| 2387 fz_var(expected); | |
| 2388 | |
| 2389 fz_try(ctx) | |
| 2390 { | |
| 2391 int found; | |
| 2392 | |
| 2393 /* Try to read forward from there */ | |
| 2394 do | |
| 2395 { | |
| 2396 start = offset; | |
| 2397 DEBUGMESS((ctx, "Searching for object %d @ %d", expected, offset)); | |
| 2398 pdf_obj_read(ctx, doc, &offset, &found, 0); | |
| 2399 DEBUGMESS((ctx, "Found object %d - next will be @ %d", found, offset)); | |
| 2400 if (found <= expected) | |
| 2401 { | |
| 2402 /* We found the right one (or one earlier than | |
| 2403 * we expected). Update the hints. */ | |
| 2404 doc->hint_obj_offsets[expected] = offset; | |
| 2405 doc->hint_obj_offsets[found] = start; | |
| 2406 doc->hint_obj_offsets[found+1] = offset; | |
| 2407 /* Retry with the next one */ | |
| 2408 expected = found+1; | |
| 2409 } | |
| 2410 else | |
| 2411 { | |
| 2412 /* We found one later than we expected. */ | |
| 2413 doc->hint_obj_offsets[expected] = 0; | |
| 2414 doc->hint_obj_offsets[found] = start; | |
| 2415 doc->hint_obj_offsets[found+1] = offset; | |
| 2416 while (doc->hint_obj_offsets[expected] == 0 && expected > 0) | |
| 2417 expected--; | |
| 2418 if (expected == 0) /* No hints found, we give up */ | |
| 2419 break; | |
| 2420 } | |
| 2421 } | |
| 2422 while (found != num); | |
| 2423 } | |
| 2424 fz_always(ctx) | |
| 2425 { | |
| 2426 fz_seek(ctx, doc->file, curr_pos, SEEK_SET); | |
| 2427 } | |
| 2428 fz_catch(ctx) | |
| 2429 { | |
| 2430 fz_rethrow_if(ctx, FZ_ERROR_TRYLATER); | |
| 2431 /* FIXME: Currently we ignore the hint. Perhaps we should | |
| 2432 * drop back to non-hinted operation here. */ | |
| 2433 doc->hint_obj_offsets[expected] = 0; | |
| 2434 fz_rethrow(ctx); | |
| 2435 } | |
| 2436 return expected != 0; | |
| 2437 } | |
| 2438 | |
| 2439 pdf_obj * | |
| 2440 pdf_load_unencrypted_object(fz_context *ctx, pdf_document *doc, int num) | |
| 2441 { | |
| 2442 pdf_xref_entry *x; | |
| 2443 | |
| 2444 if (num <= 0 || num >= pdf_xref_len(ctx, doc)) | |
| 2445 fz_throw(ctx, FZ_ERROR_FORMAT, "object out of range (%d 0 R); xref size %d", num, pdf_xref_len(ctx, doc)); | |
| 2446 | |
| 2447 x = pdf_get_xref_entry_no_null(ctx, doc, num); | |
| 2448 if (x->type == 'n') | |
| 2449 { | |
| 2450 fz_seek(ctx, doc->file, doc->bias + x->ofs, SEEK_SET); | |
| 2451 return pdf_parse_ind_obj(ctx, doc, doc->file, NULL, NULL, NULL, NULL); | |
| 2452 } | |
| 2453 return NULL; | |
| 2454 } | |
| 2455 | |
| 2456 int | |
| 2457 pdf_object_exists(fz_context *ctx, pdf_document *doc, int num) | |
| 2458 { | |
| 2459 pdf_xref_entry *x; | |
| 2460 if (num <= 0 || num >= pdf_xref_len(ctx, doc)) | |
| 2461 return 0; | |
| 2462 x = pdf_get_xref_entry(ctx, doc, num); | |
| 2463 if (x && (x->type == 'n' || x->type == 'o')) | |
| 2464 return 1; | |
| 2465 return 0; | |
| 2466 } | |
| 2467 | |
| 2468 pdf_xref_entry * | |
| 2469 pdf_cache_object(fz_context *ctx, pdf_document *doc, int num) | |
| 2470 { | |
| 2471 pdf_xref_entry *x; | |
| 2472 int rnum, rgen, try_repair; | |
| 2473 | |
| 2474 fz_var(try_repair); | |
| 2475 | |
| 2476 if (num <= 0 || num >= pdf_xref_len(ctx, doc)) | |
| 2477 fz_throw(ctx, FZ_ERROR_FORMAT, "object out of range (%d 0 R); xref size %d", num, pdf_xref_len(ctx, doc)); | |
| 2478 | |
| 2479 object_updated: | |
| 2480 try_repair = 0; | |
| 2481 rnum = num; | |
| 2482 | |
| 2483 x = pdf_get_xref_entry(ctx, doc, num); | |
| 2484 if (x == NULL) | |
| 2485 fz_throw(ctx, FZ_ERROR_FORMAT, "cannot find object in xref (%d 0 R)", num); | |
| 2486 | |
| 2487 if (x->obj != NULL) | |
| 2488 return x; | |
| 2489 | |
| 2490 if (x->type == 'f') | |
| 2491 { | |
| 2492 x->obj = PDF_NULL; | |
| 2493 } | |
| 2494 else if (x->type == 'n') | |
| 2495 { | |
| 2496 fz_seek(ctx, doc->file, doc->bias + x->ofs, SEEK_SET); | |
| 2497 | |
| 2498 fz_try(ctx) | |
| 2499 { | |
| 2500 x->obj = pdf_parse_ind_obj(ctx, doc, doc->file, | |
| 2501 &rnum, &rgen, &x->stm_ofs, &try_repair); | |
| 2502 } | |
| 2503 fz_catch(ctx) | |
| 2504 { | |
| 2505 fz_rethrow_if(ctx, FZ_ERROR_TRYLATER); | |
| 2506 fz_rethrow_if(ctx, FZ_ERROR_SYSTEM); | |
| 2507 if (!try_repair) | |
| 2508 fz_rethrow(ctx); | |
| 2509 else | |
| 2510 fz_report_error(ctx); | |
| 2511 } | |
| 2512 | |
| 2513 if (!try_repair && rnum != num) | |
| 2514 { | |
| 2515 pdf_drop_obj(ctx, x->obj); | |
| 2516 x->type = 'f'; | |
| 2517 x->ofs = -1; | |
| 2518 x->gen = 0; | |
| 2519 x->num = 0; | |
| 2520 x->stm_ofs = 0; | |
| 2521 x->obj = NULL; | |
| 2522 try_repair = (doc->repair_attempted == 0); | |
| 2523 } | |
| 2524 | |
| 2525 if (try_repair) | |
| 2526 { | |
| 2527 perform_repair: | |
| 2528 fz_try(ctx) | |
| 2529 pdf_repair_xref(ctx, doc); | |
| 2530 fz_catch(ctx) | |
| 2531 { | |
| 2532 fz_rethrow_if(ctx, FZ_ERROR_TRYLATER); | |
| 2533 fz_rethrow_if(ctx, FZ_ERROR_SYSTEM); | |
| 2534 fz_rethrow_if(ctx, FZ_ERROR_REPAIRED); | |
| 2535 fz_report_error(ctx); | |
| 2536 if (rnum == num) | |
| 2537 fz_throw(ctx, FZ_ERROR_FORMAT, "cannot parse object (%d 0 R)", num); | |
| 2538 else | |
| 2539 fz_throw(ctx, FZ_ERROR_FORMAT, "found object (%d 0 R) instead of (%d 0 R)", rnum, num); | |
| 2540 } | |
| 2541 goto object_updated; | |
| 2542 } | |
| 2543 | |
| 2544 if (doc->crypt) | |
| 2545 pdf_crypt_obj(ctx, doc->crypt, x->obj, x->num, x->gen); | |
| 2546 } | |
| 2547 else if (x->type == 'o') | |
| 2548 { | |
| 2549 if (!x->obj) | |
| 2550 { | |
| 2551 pdf_xref_entry *orig_x = x; | |
| 2552 pdf_xref_entry *ox = x; /* This init is unused, but it shuts warnings up. */ | |
| 2553 orig_x->type = 'O'; /* Mark this node so we know we're recursing. */ | |
| 2554 fz_try(ctx) | |
| 2555 x = pdf_load_obj_stm(ctx, doc, x->ofs, &doc->lexbuf.base, num); | |
| 2556 fz_always(ctx) | |
| 2557 { | |
| 2558 /* Most of the time ox == orig_x, but if pdf_load_obj_stm performed a | |
| 2559 * repair, it may not be. It is safe to call pdf_get_xref_entry_no_change | |
| 2560 * here, as it does not try/catch. */ | |
| 2561 ox = pdf_get_xref_entry_no_change(ctx, doc, num); | |
| 2562 /* Bug 706762: ox can be NULL if the object went away during a repair. */ | |
| 2563 if (ox && ox->type == 'O') | |
| 2564 ox->type = 'o'; /* Not recursing any more. */ | |
| 2565 } | |
| 2566 fz_catch(ctx) | |
| 2567 fz_rethrow(ctx); | |
| 2568 if (x == NULL) | |
| 2569 fz_throw(ctx, FZ_ERROR_FORMAT, "cannot load object stream containing object (%d 0 R)", num); | |
| 2570 if (!x->obj) | |
| 2571 { | |
| 2572 x->type = 'f'; | |
| 2573 if (ox) | |
| 2574 ox->type = 'f'; | |
| 2575 if (doc->repair_attempted) | |
| 2576 fz_throw(ctx, FZ_ERROR_FORMAT, "object (%d 0 R) was not found in its object stream", num); | |
| 2577 goto perform_repair; | |
| 2578 } | |
| 2579 } | |
| 2580 } | |
| 2581 else if (doc->hint_obj_offsets && read_hinted_object(ctx, doc, num)) | |
| 2582 { | |
| 2583 goto object_updated; | |
| 2584 } | |
| 2585 else if (doc->file_length && doc->linear_pos < doc->file_length) | |
| 2586 { | |
| 2587 fz_throw(ctx, FZ_ERROR_TRYLATER, "cannot find object in xref (%d 0 R) - not loaded yet?", num); | |
| 2588 } | |
| 2589 else | |
| 2590 { | |
| 2591 fz_throw(ctx, FZ_ERROR_FORMAT, "cannot find object in xref (%d 0 R)", num); | |
| 2592 } | |
| 2593 | |
| 2594 pdf_set_obj_parent(ctx, x->obj, num); | |
| 2595 return x; | |
| 2596 } | |
| 2597 | |
| 2598 pdf_obj * | |
| 2599 pdf_load_object(fz_context *ctx, pdf_document *doc, int num) | |
| 2600 { | |
| 2601 pdf_xref_entry *entry = pdf_cache_object(ctx, doc, num); | |
| 2602 return pdf_keep_obj(ctx, entry->obj); | |
| 2603 } | |
| 2604 | |
| 2605 pdf_obj * | |
| 2606 pdf_resolve_indirect(fz_context *ctx, pdf_obj *ref) | |
| 2607 { | |
| 2608 if (pdf_is_indirect(ctx, ref)) | |
| 2609 { | |
| 2610 pdf_document *doc = pdf_get_indirect_document(ctx, ref); | |
| 2611 int num = pdf_to_num(ctx, ref); | |
| 2612 pdf_xref_entry *entry; | |
| 2613 | |
| 2614 if (!doc) | |
| 2615 return NULL; | |
| 2616 if (num <= 0) | |
| 2617 { | |
| 2618 fz_warn(ctx, "invalid indirect reference (%d 0 R)", num); | |
| 2619 return NULL; | |
| 2620 } | |
| 2621 | |
| 2622 fz_try(ctx) | |
| 2623 entry = pdf_cache_object(ctx, doc, num); | |
| 2624 fz_catch(ctx) | |
| 2625 { | |
| 2626 fz_rethrow_if(ctx, FZ_ERROR_TRYLATER); | |
| 2627 fz_rethrow_if(ctx, FZ_ERROR_SYSTEM); | |
| 2628 fz_rethrow_if(ctx, FZ_ERROR_REPAIRED); | |
| 2629 fz_report_error(ctx); | |
| 2630 fz_warn(ctx, "cannot load object (%d 0 R) into cache", num); | |
| 2631 return NULL; | |
| 2632 } | |
| 2633 | |
| 2634 ref = entry->obj; | |
| 2635 } | |
| 2636 return ref; | |
| 2637 } | |
| 2638 | |
| 2639 pdf_obj * | |
| 2640 pdf_resolve_indirect_chain(fz_context *ctx, pdf_obj *ref) | |
| 2641 { | |
| 2642 int sanity = 10; | |
| 2643 | |
| 2644 while (pdf_is_indirect(ctx, ref)) | |
| 2645 { | |
| 2646 if (--sanity == 0) | |
| 2647 { | |
| 2648 fz_warn(ctx, "too many indirections (possible indirection cycle involving %d 0 R)", pdf_to_num(ctx, ref)); | |
| 2649 return NULL; | |
| 2650 } | |
| 2651 | |
| 2652 ref = pdf_resolve_indirect(ctx, ref); | |
| 2653 } | |
| 2654 | |
| 2655 return ref; | |
| 2656 } | |
| 2657 | |
| 2658 int | |
| 2659 pdf_count_objects(fz_context *ctx, pdf_document *doc) | |
| 2660 { | |
| 2661 return pdf_xref_len(ctx, doc); | |
| 2662 } | |
| 2663 | |
| 2664 int | |
| 2665 pdf_is_local_object(fz_context *ctx, pdf_document *doc, pdf_obj *obj) | |
| 2666 { | |
| 2667 pdf_xref *xref = doc->local_xref; | |
| 2668 pdf_xref_subsec *sub; | |
| 2669 int num; | |
| 2670 | |
| 2671 if (!pdf_is_indirect(ctx, obj)) | |
| 2672 return 0; | |
| 2673 | |
| 2674 if (xref == NULL) | |
| 2675 return 0; /* no local xref present */ | |
| 2676 | |
| 2677 num = pdf_to_num(ctx, obj); | |
| 2678 | |
| 2679 /* Local xrefs only ever have 1 section, and it should be solid. */ | |
| 2680 sub = xref->subsec; | |
| 2681 if (num >= sub->start && num < sub->start + sub->len) | |
| 2682 return sub->table[num - sub->start].type != 0; | |
| 2683 | |
| 2684 return 0; | |
| 2685 } | |
| 2686 | |
| 2687 static int | |
| 2688 pdf_create_local_object(fz_context *ctx, pdf_document *doc) | |
| 2689 { | |
| 2690 /* TODO: reuse free object slots by properly linking free object chains in the ofs field */ | |
| 2691 pdf_xref_entry *entry; | |
| 2692 int num; | |
| 2693 | |
| 2694 num = doc->local_xref->num_objects; | |
| 2695 | |
| 2696 entry = pdf_get_local_xref_entry(ctx, doc, num); | |
| 2697 entry->type = 'f'; | |
| 2698 entry->ofs = -1; | |
| 2699 entry->gen = 0; | |
| 2700 entry->num = num; | |
| 2701 entry->stm_ofs = 0; | |
| 2702 entry->stm_buf = NULL; | |
| 2703 entry->obj = NULL; | |
| 2704 return num; | |
| 2705 } | |
| 2706 | |
| 2707 int | |
| 2708 pdf_create_object(fz_context *ctx, pdf_document *doc) | |
| 2709 { | |
| 2710 /* TODO: reuse free object slots by properly linking free object chains in the ofs field */ | |
| 2711 pdf_xref_entry *entry; | |
| 2712 int num; | |
| 2713 | |
| 2714 if (doc->local_xref && doc->local_xref_nesting > 0) | |
| 2715 return pdf_create_local_object(ctx, doc); | |
| 2716 | |
| 2717 num = pdf_xref_len(ctx, doc); | |
| 2718 | |
| 2719 if (num > PDF_MAX_OBJECT_NUMBER) | |
| 2720 fz_throw(ctx, FZ_ERROR_LIMIT, "too many objects stored in pdf"); | |
| 2721 | |
| 2722 entry = pdf_get_incremental_xref_entry(ctx, doc, num); | |
| 2723 entry->type = 'f'; | |
| 2724 entry->ofs = -1; | |
| 2725 entry->gen = 0; | |
| 2726 entry->num = num; | |
| 2727 entry->stm_ofs = 0; | |
| 2728 entry->stm_buf = NULL; | |
| 2729 entry->obj = NULL; | |
| 2730 | |
| 2731 pdf_add_journal_fragment(ctx, doc, num, NULL, NULL, 1); | |
| 2732 | |
| 2733 return num; | |
| 2734 } | |
| 2735 | |
| 2736 static void | |
| 2737 pdf_delete_local_object(fz_context *ctx, pdf_document *doc, int num) | |
| 2738 { | |
| 2739 pdf_xref_entry *x; | |
| 2740 | |
| 2741 if (doc->local_xref == NULL || doc->local_xref_nesting == 0) | |
| 2742 fz_throw(ctx, FZ_ERROR_ARGUMENT, "No local xref to delete from!"); | |
| 2743 | |
| 2744 if (num <= 0 || num >= doc->local_xref->num_objects) | |
| 2745 { | |
| 2746 fz_warn(ctx, "local object out of range (%d 0 R); xref size %d", num, doc->local_xref->num_objects); | |
| 2747 return; | |
| 2748 } | |
| 2749 | |
| 2750 x = pdf_get_local_xref_entry(ctx, doc, num); | |
| 2751 | |
| 2752 fz_drop_buffer(ctx, x->stm_buf); | |
| 2753 pdf_drop_obj(ctx, x->obj); | |
| 2754 | |
| 2755 x->type = 'f'; | |
| 2756 x->ofs = 0; | |
| 2757 x->gen += 1; | |
| 2758 x->num = 0; | |
| 2759 x->stm_ofs = 0; | |
| 2760 x->stm_buf = NULL; | |
| 2761 x->obj = NULL; | |
| 2762 } | |
| 2763 | |
| 2764 void | |
| 2765 pdf_delete_object(fz_context *ctx, pdf_document *doc, int num) | |
| 2766 { | |
| 2767 pdf_xref_entry *x; | |
| 2768 pdf_xref *xref; | |
| 2769 int j; | |
| 2770 | |
| 2771 if (doc->local_xref && doc->local_xref_nesting > 0) | |
| 2772 { | |
| 2773 pdf_delete_local_object(ctx, doc, num); | |
| 2774 return; | |
| 2775 } | |
| 2776 | |
| 2777 if (num <= 0 || num >= pdf_xref_len(ctx, doc)) | |
| 2778 { | |
| 2779 fz_warn(ctx, "object out of range (%d 0 R); xref size %d", num, pdf_xref_len(ctx, doc)); | |
| 2780 return; | |
| 2781 } | |
| 2782 | |
| 2783 x = pdf_get_incremental_xref_entry(ctx, doc, num); | |
| 2784 | |
| 2785 fz_drop_buffer(ctx, x->stm_buf); | |
| 2786 pdf_drop_obj(ctx, x->obj); | |
| 2787 | |
| 2788 x->type = 'f'; | |
| 2789 x->ofs = 0; | |
| 2790 x->gen += 1; | |
| 2791 x->num = 0; | |
| 2792 x->stm_ofs = 0; | |
| 2793 x->stm_buf = NULL; | |
| 2794 x->obj = NULL; | |
| 2795 | |
| 2796 /* Currently we've left a 'free' object in the incremental | |
| 2797 * section. This is enough to cause us to think that the | |
| 2798 * document has changes. Check back in the non-incremental | |
| 2799 * sections to see if the last instance of the object there | |
| 2800 * was free (or if this object never appeared). If so, we | |
| 2801 * can mark this object as non-existent in the incremental | |
| 2802 * xref. This is important so we can 'undo' back to emptiness | |
| 2803 * after we save/when we reload a snapshot. */ | |
| 2804 for (j = 1; j < doc->num_xref_sections; j++) | |
| 2805 { | |
| 2806 xref = &doc->xref_sections[j]; | |
| 2807 | |
| 2808 if (num < xref->num_objects) | |
| 2809 { | |
| 2810 pdf_xref_subsec *sub; | |
| 2811 for (sub = xref->subsec; sub != NULL; sub = sub->next) | |
| 2812 { | |
| 2813 pdf_xref_entry *entry; | |
| 2814 | |
| 2815 if (num < sub->start || num >= sub->start + sub->len) | |
| 2816 continue; | |
| 2817 | |
| 2818 entry = &sub->table[num - sub->start]; | |
| 2819 if (entry->type) | |
| 2820 { | |
| 2821 if (entry->type == 'f') | |
| 2822 { | |
| 2823 /* It was free already! */ | |
| 2824 x->type = 0; | |
| 2825 x->gen = 0; | |
| 2826 } | |
| 2827 /* It was a real object. */ | |
| 2828 return; | |
| 2829 } | |
| 2830 } | |
| 2831 } | |
| 2832 } | |
| 2833 /* It never appeared before. */ | |
| 2834 x->type = 0; | |
| 2835 x->gen = 0; | |
| 2836 } | |
| 2837 | |
| 2838 static void | |
| 2839 pdf_update_local_object(fz_context *ctx, pdf_document *doc, int num, pdf_obj *newobj) | |
| 2840 { | |
| 2841 pdf_xref_entry *x; | |
| 2842 | |
| 2843 if (doc->local_xref == NULL || doc->local_xref_nesting == 0) | |
| 2844 fz_throw(ctx, FZ_ERROR_ARGUMENT, "Can't update local object without a local xref"); | |
| 2845 | |
| 2846 if (!newobj) | |
| 2847 { | |
| 2848 pdf_delete_local_object(ctx, doc, num); | |
| 2849 return; | |
| 2850 } | |
| 2851 | |
| 2852 x = pdf_get_local_xref_entry(ctx, doc, num); | |
| 2853 | |
| 2854 pdf_drop_obj(ctx, x->obj); | |
| 2855 | |
| 2856 x->type = 'n'; | |
| 2857 x->ofs = 0; | |
| 2858 x->obj = pdf_keep_obj(ctx, newobj); | |
| 2859 | |
| 2860 pdf_set_obj_parent(ctx, newobj, num); | |
| 2861 } | |
| 2862 | |
| 2863 void | |
| 2864 pdf_update_object(fz_context *ctx, pdf_document *doc, int num, pdf_obj *newobj) | |
| 2865 { | |
| 2866 pdf_xref_entry *x; | |
| 2867 | |
| 2868 if (!doc) | |
| 2869 return; | |
| 2870 | |
| 2871 if (doc->local_xref && doc->local_xref_nesting > 0) | |
| 2872 { | |
| 2873 pdf_update_local_object(ctx, doc, num, newobj); | |
| 2874 return; | |
| 2875 } | |
| 2876 | |
| 2877 if (num <= 0 || num >= pdf_xref_len(ctx, doc)) | |
| 2878 { | |
| 2879 fz_warn(ctx, "object out of range (%d 0 R); xref size %d", num, pdf_xref_len(ctx, doc)); | |
| 2880 return; | |
| 2881 } | |
| 2882 | |
| 2883 if (!newobj) | |
| 2884 { | |
| 2885 pdf_delete_object(ctx, doc, num); | |
| 2886 return; | |
| 2887 } | |
| 2888 | |
| 2889 x = pdf_get_incremental_xref_entry(ctx, doc, num); | |
| 2890 | |
| 2891 pdf_drop_obj(ctx, x->obj); | |
| 2892 | |
| 2893 x->type = 'n'; | |
| 2894 x->ofs = 0; | |
| 2895 x->obj = pdf_keep_obj(ctx, newobj); | |
| 2896 | |
| 2897 pdf_set_obj_parent(ctx, newobj, num); | |
| 2898 } | |
| 2899 | |
| 2900 void | |
| 2901 pdf_update_stream(fz_context *ctx, pdf_document *doc, pdf_obj *obj, fz_buffer *newbuf, int compressed) | |
| 2902 { | |
| 2903 int num; | |
| 2904 pdf_xref_entry *x; | |
| 2905 | |
| 2906 if (pdf_is_indirect(ctx, obj)) | |
| 2907 num = pdf_to_num(ctx, obj); | |
| 2908 else | |
| 2909 num = pdf_obj_parent_num(ctx, obj); | |
| 2910 | |
| 2911 /* Write the Length first, as this has the effect of moving the | |
| 2912 * old object into the journal for undo. This also moves the | |
| 2913 * stream buffer with it, keeping it consistent. */ | |
| 2914 pdf_dict_put_int(ctx, obj, PDF_NAME(Length), fz_buffer_storage(ctx, newbuf, NULL)); | |
| 2915 | |
| 2916 if (doc->local_xref && doc->local_xref_nesting > 0) | |
| 2917 { | |
| 2918 x = pdf_get_local_xref_entry(ctx, doc, num); | |
| 2919 } | |
| 2920 else | |
| 2921 { | |
| 2922 if (num <= 0 || num >= pdf_xref_len(ctx, doc)) | |
| 2923 { | |
| 2924 fz_warn(ctx, "object out of range (%d 0 R); xref size %d", num, pdf_xref_len(ctx, doc)); | |
| 2925 return; | |
| 2926 } | |
| 2927 | |
| 2928 x = pdf_get_xref_entry_no_null(ctx, doc, num); | |
| 2929 } | |
| 2930 | |
| 2931 fz_drop_buffer(ctx, x->stm_buf); | |
| 2932 x->stm_buf = fz_keep_buffer(ctx, newbuf); | |
| 2933 | |
| 2934 if (!compressed) | |
| 2935 { | |
| 2936 pdf_dict_del(ctx, obj, PDF_NAME(Filter)); | |
| 2937 pdf_dict_del(ctx, obj, PDF_NAME(DecodeParms)); | |
| 2938 } | |
| 2939 } | |
| 2940 | |
| 2941 int | |
| 2942 pdf_lookup_metadata(fz_context *ctx, pdf_document *doc, const char *key, char *buf, size_t size) | |
| 2943 { | |
| 2944 if (!strcmp(key, FZ_META_FORMAT)) | |
| 2945 { | |
| 2946 int version = pdf_version(ctx, doc); | |
| 2947 return 1 + (int)fz_snprintf(buf, size, "PDF %d.%d", version/10, version % 10); | |
| 2948 } | |
| 2949 | |
| 2950 if (!strcmp(key, FZ_META_ENCRYPTION)) | |
| 2951 { | |
| 2952 if (doc->crypt) | |
| 2953 { | |
| 2954 const char *stream_method = pdf_crypt_stream_method(ctx, doc->crypt); | |
| 2955 const char *string_method = pdf_crypt_string_method(ctx, doc->crypt); | |
| 2956 if (stream_method == string_method) | |
| 2957 return 1 + (int)fz_snprintf(buf, size, "Standard V%d R%d %d-bit %s", | |
| 2958 pdf_crypt_version(ctx, doc->crypt), | |
| 2959 pdf_crypt_revision(ctx, doc->crypt), | |
| 2960 pdf_crypt_length(ctx, doc->crypt), | |
| 2961 pdf_crypt_string_method(ctx, doc->crypt)); | |
| 2962 else | |
| 2963 return 1 + (int)fz_snprintf(buf, size, "Standard V%d R%d %d-bit streams: %s strings: %s", | |
| 2964 pdf_crypt_version(ctx, doc->crypt), | |
| 2965 pdf_crypt_revision(ctx, doc->crypt), | |
| 2966 pdf_crypt_length(ctx, doc->crypt), | |
| 2967 pdf_crypt_stream_method(ctx, doc->crypt), | |
| 2968 pdf_crypt_string_method(ctx, doc->crypt)); | |
| 2969 } | |
| 2970 else | |
| 2971 return 1 + (int)fz_strlcpy(buf, "None", size); | |
| 2972 } | |
| 2973 | |
| 2974 if (strstr(key, "info:") == key) | |
| 2975 { | |
| 2976 pdf_obj *info; | |
| 2977 const char *s; | |
| 2978 int n; | |
| 2979 | |
| 2980 info = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Info)); | |
| 2981 if (!info) | |
| 2982 return -1; | |
| 2983 | |
| 2984 info = pdf_dict_gets(ctx, info, key + 5); | |
| 2985 if (!info) | |
| 2986 return -1; | |
| 2987 | |
| 2988 s = pdf_to_text_string(ctx, info); | |
| 2989 if (strlen(s) <= 0) | |
| 2990 return -1; | |
| 2991 | |
| 2992 n = 1 + (int)fz_strlcpy(buf, s, size); | |
| 2993 return n; | |
| 2994 } | |
| 2995 | |
| 2996 return -1; | |
| 2997 } | |
| 2998 | |
| 2999 void | |
| 3000 pdf_set_metadata(fz_context *ctx, pdf_document *doc, const char *key, const char *value) | |
| 3001 { | |
| 3002 | |
| 3003 pdf_obj *info = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Info)); | |
| 3004 | |
| 3005 pdf_begin_operation(ctx, doc, "Set Metadata"); | |
| 3006 | |
| 3007 fz_try(ctx) | |
| 3008 { | |
| 3009 /* Ensure we have an Info dictionary. */ | |
| 3010 if (!pdf_is_dict(ctx, info)) | |
| 3011 { | |
| 3012 info = pdf_add_new_dict(ctx, doc, 8); | |
| 3013 pdf_dict_put_drop(ctx, pdf_trailer(ctx, doc), PDF_NAME(Info), info); | |
| 3014 } | |
| 3015 | |
| 3016 if (!strcmp(key, FZ_META_INFO_TITLE)) | |
| 3017 pdf_dict_put_text_string(ctx, info, PDF_NAME(Title), value); | |
| 3018 else if (!strcmp(key, FZ_META_INFO_AUTHOR)) | |
| 3019 pdf_dict_put_text_string(ctx, info, PDF_NAME(Author), value); | |
| 3020 else if (!strcmp(key, FZ_META_INFO_SUBJECT)) | |
| 3021 pdf_dict_put_text_string(ctx, info, PDF_NAME(Subject), value); | |
| 3022 else if (!strcmp(key, FZ_META_INFO_KEYWORDS)) | |
| 3023 pdf_dict_put_text_string(ctx, info, PDF_NAME(Keywords), value); | |
| 3024 else if (!strcmp(key, FZ_META_INFO_CREATOR)) | |
| 3025 pdf_dict_put_text_string(ctx, info, PDF_NAME(Creator), value); | |
| 3026 else if (!strcmp(key, FZ_META_INFO_PRODUCER)) | |
| 3027 pdf_dict_put_text_string(ctx, info, PDF_NAME(Producer), value); | |
| 3028 else if (!strcmp(key, FZ_META_INFO_CREATIONDATE)) | |
| 3029 { | |
| 3030 int64_t time = pdf_parse_date(ctx, value); | |
| 3031 if (time >= 0) | |
| 3032 pdf_dict_put_date(ctx, info, PDF_NAME(CreationDate), time); | |
| 3033 } | |
| 3034 else if (!strcmp(key, FZ_META_INFO_MODIFICATIONDATE)) | |
| 3035 { | |
| 3036 int64_t time = pdf_parse_date(ctx, value); | |
| 3037 if (time >= 0) | |
| 3038 pdf_dict_put_date(ctx, info, PDF_NAME(ModDate), time); | |
| 3039 } | |
| 3040 | |
| 3041 if (!strncmp(key, FZ_META_INFO, strlen(FZ_META_INFO))) | |
| 3042 key += strlen(FZ_META_INFO); | |
| 3043 pdf_dict_put_text_string(ctx, info, pdf_new_name(ctx, key), value); | |
| 3044 pdf_end_operation(ctx, doc); | |
| 3045 } | |
| 3046 fz_catch(ctx) | |
| 3047 { | |
| 3048 pdf_abandon_operation(ctx, doc); | |
| 3049 fz_rethrow(ctx); | |
| 3050 } | |
| 3051 } | |
| 3052 | |
| 3053 static fz_link_dest | |
| 3054 pdf_resolve_link_imp(fz_context *ctx, fz_document *doc_, const char *uri) | |
| 3055 { | |
| 3056 pdf_document *doc = (pdf_document*)doc_; | |
| 3057 return pdf_resolve_link_dest(ctx, doc, uri); | |
| 3058 } | |
| 3059 | |
| 3060 char *pdf_format_link_uri(fz_context *ctx, fz_document *doc, fz_link_dest dest) | |
| 3061 { | |
| 3062 return pdf_new_uri_from_explicit_dest(ctx, dest); | |
| 3063 } | |
| 3064 | |
| 3065 static fz_document * | |
| 3066 as_pdf(fz_context *ctx, fz_document *doc) | |
| 3067 { | |
| 3068 return doc; | |
| 3069 } | |
| 3070 | |
| 3071 /* | |
| 3072 Initializers for the fz_document interface. | |
| 3073 | |
| 3074 The functions are split across two files to allow calls to a | |
| 3075 version of the constructor that does not link in the interpreter. | |
| 3076 The interpreter references the built-in font and cmap resources | |
| 3077 which are quite big. Not linking those into the mutool binary | |
| 3078 saves roughly 6MB of space. | |
| 3079 */ | |
| 3080 | |
| 3081 static fz_colorspace *pdf_document_output_intent_imp(fz_context *ctx, fz_document *doc) | |
| 3082 { | |
| 3083 return pdf_document_output_intent(ctx, (pdf_document*)doc); | |
| 3084 } | |
| 3085 | |
| 3086 int pdf_needs_password_imp(fz_context *ctx, fz_document *doc) | |
| 3087 { | |
| 3088 return pdf_needs_password(ctx, (pdf_document*)doc); | |
| 3089 } | |
| 3090 | |
| 3091 int pdf_authenticate_password_imp(fz_context *ctx, fz_document *doc, const char *pw) | |
| 3092 { | |
| 3093 return pdf_authenticate_password(ctx, (pdf_document*)doc, pw); | |
| 3094 } | |
| 3095 | |
| 3096 int pdf_has_permission_imp(fz_context *ctx, fz_document *doc, fz_permission p) | |
| 3097 { | |
| 3098 return pdf_has_permission(ctx, (pdf_document*)doc, p); | |
| 3099 } | |
| 3100 | |
| 3101 fz_outline_iterator *pdf_new_outline_iterator_imp(fz_context *ctx, fz_document *doc) | |
| 3102 { | |
| 3103 return pdf_new_outline_iterator(ctx, (pdf_document*)doc); | |
| 3104 } | |
| 3105 | |
| 3106 int pdf_lookup_metadata_imp(fz_context *ctx, fz_document *doc, const char *key, char *ptr, size_t size) | |
| 3107 { | |
| 3108 return pdf_lookup_metadata(ctx, (pdf_document*)doc, key, ptr, size); | |
| 3109 } | |
| 3110 | |
| 3111 void pdf_set_metadata_imp(fz_context *ctx, fz_document *doc, const char *key, const char *value) | |
| 3112 { | |
| 3113 pdf_set_metadata(ctx, (pdf_document*)doc, key, value); | |
| 3114 } | |
| 3115 | |
| 3116 void pdf_run_document_structure_imp(fz_context *ctx, fz_document *doc, fz_device *dev, fz_cookie *cookie) | |
| 3117 { | |
| 3118 pdf_run_document_structure(ctx, (pdf_document*)doc, dev, cookie); | |
| 3119 } | |
| 3120 | |
| 3121 #ifndef NDEBUG | |
| 3122 void pdf_verify_name_table_sanity(void); | |
| 3123 #endif | |
| 3124 | |
| 3125 | |
| 3126 static pdf_document * | |
| 3127 pdf_new_document(fz_context *ctx, fz_stream *file) | |
| 3128 { | |
| 3129 pdf_document *doc = fz_new_derived_document(ctx, pdf_document); | |
| 3130 | |
| 3131 #ifndef NDEBUG | |
| 3132 pdf_verify_name_table_sanity(); | |
| 3133 #endif | |
| 3134 | |
| 3135 doc->super.drop_document = pdf_drop_document_imp; | |
| 3136 doc->super.get_output_intent = pdf_document_output_intent_imp; | |
| 3137 doc->super.needs_password = pdf_needs_password_imp; | |
| 3138 doc->super.authenticate_password = pdf_authenticate_password_imp; | |
| 3139 doc->super.has_permission = pdf_has_permission_imp; | |
| 3140 doc->super.outline_iterator = pdf_new_outline_iterator_imp; | |
| 3141 doc->super.resolve_link_dest = pdf_resolve_link_imp; | |
| 3142 doc->super.format_link_uri = pdf_format_link_uri; | |
| 3143 doc->super.count_pages = pdf_count_pages_imp; | |
| 3144 doc->super.load_page = pdf_load_page_imp; | |
| 3145 doc->super.page_label = pdf_page_label_imp; | |
| 3146 doc->super.lookup_metadata = pdf_lookup_metadata_imp; | |
| 3147 doc->super.set_metadata = pdf_set_metadata_imp; | |
| 3148 doc->super.run_structure = pdf_run_document_structure_imp; | |
| 3149 doc->super.as_pdf = as_pdf; | |
| 3150 | |
| 3151 pdf_lexbuf_init(ctx, &doc->lexbuf.base, PDF_LEXBUF_LARGE); | |
| 3152 doc->file = fz_keep_stream(ctx, file); | |
| 3153 | |
| 3154 /* Default to PDF-1.7 if the version header is missing and for new documents */ | |
| 3155 doc->version = 17; | |
| 3156 | |
| 3157 return doc; | |
| 3158 } | |
| 3159 | |
| 3160 pdf_document * | |
| 3161 pdf_open_document_with_stream(fz_context *ctx, fz_stream *file) | |
| 3162 { | |
| 3163 pdf_document *doc = pdf_new_document(ctx, file); | |
| 3164 fz_try(ctx) | |
| 3165 { | |
| 3166 pdf_init_document(ctx, doc); | |
| 3167 } | |
| 3168 fz_catch(ctx) | |
| 3169 { | |
| 3170 /* fz_drop_document may clobber our error code/message so we have to stash them temporarily. */ | |
| 3171 char message[256]; | |
| 3172 int code; | |
| 3173 fz_strlcpy(message, fz_convert_error(ctx, &code), sizeof message); | |
| 3174 fz_drop_document(ctx, &doc->super); | |
| 3175 fz_throw(ctx, code, "%s", message); | |
| 3176 } | |
| 3177 return doc; | |
| 3178 } | |
| 3179 | |
| 3180 /* Uncomment the following to test progressive loading. */ | |
| 3181 /* #define TEST_PROGRESSIVE_HACK */ | |
| 3182 | |
| 3183 pdf_document * | |
| 3184 pdf_open_document(fz_context *ctx, const char *filename) | |
| 3185 { | |
| 3186 fz_stream *file = NULL; | |
| 3187 pdf_document *doc = NULL; | |
| 3188 | |
| 3189 fz_var(file); | |
| 3190 fz_var(doc); | |
| 3191 | |
| 3192 fz_try(ctx) | |
| 3193 { | |
| 3194 file = fz_open_file(ctx, filename); | |
| 3195 #ifdef TEST_PROGRESSIVE_HACK | |
| 3196 file->progressive = 1; | |
| 3197 #endif | |
| 3198 doc = pdf_new_document(ctx, file); | |
| 3199 pdf_init_document(ctx, doc); | |
| 3200 } | |
| 3201 fz_always(ctx) | |
| 3202 { | |
| 3203 fz_drop_stream(ctx, file); | |
| 3204 } | |
| 3205 fz_catch(ctx) | |
| 3206 { | |
| 3207 /* fz_drop_document may clobber our error code/message so we have to stash them temporarily. */ | |
| 3208 char message[256]; | |
| 3209 int code; | |
| 3210 fz_strlcpy(message, fz_convert_error(ctx, &code), sizeof message); | |
| 3211 fz_drop_document(ctx, &doc->super); | |
| 3212 fz_throw(ctx, code, "%s", message); | |
| 3213 } | |
| 3214 | |
| 3215 #ifdef TEST_PROGRESSIVE_HACK | |
| 3216 if (doc->file_reading_linearly) | |
| 3217 { | |
| 3218 fz_try(ctx) | |
| 3219 pdf_progressive_advance(ctx, doc, doc->linear_page_count-1); | |
| 3220 fz_catch(ctx) | |
| 3221 { | |
| 3222 doc->file_reading_linearly = 0; | |
| 3223 /* swallow the error */ | |
| 3224 } | |
| 3225 } | |
| 3226 #endif | |
| 3227 | |
| 3228 return doc; | |
| 3229 } | |
| 3230 | |
| 3231 static void | |
| 3232 pdf_load_hints(fz_context *ctx, pdf_document *doc, int objnum) | |
| 3233 { | |
| 3234 fz_stream *stream = NULL; | |
| 3235 pdf_obj *dict; | |
| 3236 | |
| 3237 fz_var(stream); | |
| 3238 fz_var(dict); | |
| 3239 | |
| 3240 fz_try(ctx) | |
| 3241 { | |
| 3242 int i, j, least_num_page_objs, page_obj_num_bits; | |
| 3243 int least_page_len, page_len_num_bits, shared_hint_offset; | |
| 3244 /* int least_page_offset, page_offset_num_bits; */ | |
| 3245 /* int least_content_stream_len, content_stream_len_num_bits; */ | |
| 3246 int num_shared_obj_num_bits, shared_obj_num_bits; | |
| 3247 /* int numerator_bits, denominator_bits; */ | |
| 3248 int shared; | |
| 3249 int shared_obj_num, shared_obj_offset, shared_obj_count_page1; | |
| 3250 int shared_obj_count_total; | |
| 3251 int least_shared_group_len, shared_group_len_num_bits; | |
| 3252 int max_object_num = pdf_xref_len(ctx, doc); | |
| 3253 | |
| 3254 stream = pdf_open_stream_number(ctx, doc, objnum); | |
| 3255 dict = pdf_get_xref_entry_no_null(ctx, doc, objnum)->obj; | |
| 3256 if (dict == NULL || !pdf_is_dict(ctx, dict)) | |
| 3257 fz_throw(ctx, FZ_ERROR_FORMAT, "malformed hint object"); | |
| 3258 | |
| 3259 shared_hint_offset = pdf_dict_get_int(ctx, dict, PDF_NAME(S)); | |
| 3260 | |
| 3261 /* Malloc the structures (use realloc to cope with the fact we | |
| 3262 * may try this several times before enough data is loaded) */ | |
| 3263 doc->hint_page = fz_realloc_array(ctx, doc->hint_page, doc->linear_page_count+1, pdf_hint_page); | |
| 3264 memset(doc->hint_page, 0, sizeof(*doc->hint_page) * (doc->linear_page_count+1)); | |
| 3265 doc->hint_obj_offsets = fz_realloc_array(ctx, doc->hint_obj_offsets, max_object_num, int64_t); | |
| 3266 memset(doc->hint_obj_offsets, 0, sizeof(*doc->hint_obj_offsets) * max_object_num); | |
| 3267 doc->hint_obj_offsets_max = max_object_num; | |
| 3268 | |
| 3269 /* Read the page object hints table: Header first */ | |
| 3270 least_num_page_objs = fz_read_bits(ctx, stream, 32); | |
| 3271 /* The following is sometimes a lie, but we read this version, | |
| 3272 * as other table values are built from it. In | |
| 3273 * pdf_reference17.pdf, this points to 2 objects before the | |
| 3274 * first pages page object. */ | |
| 3275 doc->hint_page[0].offset = fz_read_bits(ctx, stream, 32); | |
| 3276 if (doc->hint_page[0].offset > doc->hint_object_offset) | |
| 3277 doc->hint_page[0].offset += doc->hint_object_length; | |
| 3278 page_obj_num_bits = fz_read_bits(ctx, stream, 16); | |
| 3279 least_page_len = fz_read_bits(ctx, stream, 32); | |
| 3280 page_len_num_bits = fz_read_bits(ctx, stream, 16); | |
| 3281 /* least_page_offset = */ (void) fz_read_bits(ctx, stream, 32); | |
| 3282 /* page_offset_num_bits = */ (void) fz_read_bits(ctx, stream, 16); | |
| 3283 /* least_content_stream_len = */ (void) fz_read_bits(ctx, stream, 32); | |
| 3284 /* content_stream_len_num_bits = */ (void) fz_read_bits(ctx, stream, 16); | |
| 3285 num_shared_obj_num_bits = fz_read_bits(ctx, stream, 16); | |
| 3286 shared_obj_num_bits = fz_read_bits(ctx, stream, 16); | |
| 3287 /* numerator_bits = */ (void) fz_read_bits(ctx, stream, 16); | |
| 3288 /* denominator_bits = */ (void) fz_read_bits(ctx, stream, 16); | |
| 3289 | |
| 3290 /* Item 1: Page object numbers */ | |
| 3291 doc->hint_page[0].number = doc->linear_page1_obj_num; | |
| 3292 /* We don't care about the number of objects in the first page */ | |
| 3293 (void)fz_read_bits(ctx, stream, page_obj_num_bits); | |
| 3294 j = 1; | |
| 3295 for (i = 1; i < doc->linear_page_count; i++) | |
| 3296 { | |
| 3297 int delta_page_objs = fz_read_bits(ctx, stream, page_obj_num_bits); | |
| 3298 | |
| 3299 doc->hint_page[i].number = j; | |
| 3300 j += least_num_page_objs + delta_page_objs; | |
| 3301 } | |
| 3302 doc->hint_page[i].number = j; /* Not a real page object */ | |
| 3303 fz_sync_bits(ctx, stream); | |
| 3304 /* Item 2: Page lengths */ | |
| 3305 j = doc->hint_page[0].offset; | |
| 3306 for (i = 0; i < doc->linear_page_count; i++) | |
| 3307 { | |
| 3308 int delta_page_len = fz_read_bits(ctx, stream, page_len_num_bits); | |
| 3309 int old = j; | |
| 3310 | |
| 3311 doc->hint_page[i].offset = j; | |
| 3312 j += least_page_len + delta_page_len; | |
| 3313 if (old <= doc->hint_object_offset && j > doc->hint_object_offset) | |
| 3314 j += doc->hint_object_length; | |
| 3315 } | |
| 3316 doc->hint_page[i].offset = j; | |
| 3317 fz_sync_bits(ctx, stream); | |
| 3318 /* Item 3: Shared references */ | |
| 3319 shared = 0; | |
| 3320 for (i = 0; i < doc->linear_page_count; i++) | |
| 3321 { | |
| 3322 int num_shared_objs = fz_read_bits(ctx, stream, num_shared_obj_num_bits); | |
| 3323 doc->hint_page[i].index = shared; | |
| 3324 shared += num_shared_objs; | |
| 3325 } | |
| 3326 doc->hint_page[i].index = shared; | |
| 3327 doc->hint_shared_ref = fz_realloc_array(ctx, doc->hint_shared_ref, shared, int); | |
| 3328 memset(doc->hint_shared_ref, 0, sizeof(*doc->hint_shared_ref) * shared); | |
| 3329 fz_sync_bits(ctx, stream); | |
| 3330 /* Item 4: Shared references */ | |
| 3331 for (i = 0; i < shared; i++) | |
| 3332 { | |
| 3333 int ref = fz_read_bits(ctx, stream, shared_obj_num_bits); | |
| 3334 doc->hint_shared_ref[i] = ref; | |
| 3335 } | |
| 3336 /* Skip items 5,6,7 as we don't use them */ | |
| 3337 | |
| 3338 fz_seek(ctx, stream, doc->bias + shared_hint_offset, SEEK_SET); | |
| 3339 | |
| 3340 /* Read the shared object hints table: Header first */ | |
| 3341 shared_obj_num = fz_read_bits(ctx, stream, 32); | |
| 3342 shared_obj_offset = fz_read_bits(ctx, stream, 32); | |
| 3343 if (shared_obj_offset > doc->hint_object_offset) | |
| 3344 shared_obj_offset += doc->hint_object_length; | |
| 3345 shared_obj_count_page1 = fz_read_bits(ctx, stream, 32); | |
| 3346 shared_obj_count_total = fz_read_bits(ctx, stream, 32); | |
| 3347 shared_obj_num_bits = fz_read_bits(ctx, stream, 16); | |
| 3348 least_shared_group_len = fz_read_bits(ctx, stream, 32); | |
| 3349 shared_group_len_num_bits = fz_read_bits(ctx, stream, 16); | |
| 3350 | |
| 3351 /* Sanity check the references in Item 4 above to ensure we | |
| 3352 * don't access out of range with malicious files. */ | |
| 3353 for (i = 0; i < shared; i++) | |
| 3354 { | |
| 3355 if (doc->hint_shared_ref[i] >= shared_obj_count_total) | |
| 3356 { | |
| 3357 fz_throw(ctx, FZ_ERROR_FORMAT, "malformed hint stream (shared refs)"); | |
| 3358 } | |
| 3359 } | |
| 3360 | |
| 3361 doc->hint_shared = fz_realloc_array(ctx, doc->hint_shared, shared_obj_count_total+1, pdf_hint_shared); | |
| 3362 memset(doc->hint_shared, 0, sizeof(*doc->hint_shared) * (shared_obj_count_total+1)); | |
| 3363 | |
| 3364 /* Item 1: Shared references */ | |
| 3365 j = doc->hint_page[0].offset; | |
| 3366 for (i = 0; i < shared_obj_count_page1; i++) | |
| 3367 { | |
| 3368 int off = fz_read_bits(ctx, stream, shared_group_len_num_bits); | |
| 3369 int old = j; | |
| 3370 doc->hint_shared[i].offset = j; | |
| 3371 j += off + least_shared_group_len; | |
| 3372 if (old <= doc->hint_object_offset && j > doc->hint_object_offset) | |
| 3373 j += doc->hint_object_length; | |
| 3374 } | |
| 3375 /* FIXME: We would have problems recreating the length of the | |
| 3376 * last page 1 shared reference group. But we'll never need | |
| 3377 * to, so ignore it. */ | |
| 3378 j = shared_obj_offset; | |
| 3379 for (; i < shared_obj_count_total; i++) | |
| 3380 { | |
| 3381 int off = fz_read_bits(ctx, stream, shared_group_len_num_bits); | |
| 3382 int old = j; | |
| 3383 doc->hint_shared[i].offset = j; | |
| 3384 j += off + least_shared_group_len; | |
| 3385 if (old <= doc->hint_object_offset && j > doc->hint_object_offset) | |
| 3386 j += doc->hint_object_length; | |
| 3387 } | |
| 3388 doc->hint_shared[i].offset = j; | |
| 3389 fz_sync_bits(ctx, stream); | |
| 3390 /* Item 2: Signature flags: read these just so we can skip */ | |
| 3391 for (i = 0; i < shared_obj_count_total; i++) | |
| 3392 { | |
| 3393 doc->hint_shared[i].number = fz_read_bits(ctx, stream, 1); | |
| 3394 } | |
| 3395 fz_sync_bits(ctx, stream); | |
| 3396 /* Item 3: Signatures: just skip */ | |
| 3397 for (i = 0; i < shared_obj_count_total; i++) | |
| 3398 { | |
| 3399 if (doc->hint_shared[i].number) | |
| 3400 { | |
| 3401 (void) fz_read_bits(ctx, stream, 128); | |
| 3402 } | |
| 3403 } | |
| 3404 fz_sync_bits(ctx, stream); | |
| 3405 /* Item 4: Shared object object numbers */ | |
| 3406 j = doc->linear_page1_obj_num; /* FIXME: This is a lie! */ | |
| 3407 for (i = 0; i < shared_obj_count_page1; i++) | |
| 3408 { | |
| 3409 doc->hint_shared[i].number = j; | |
| 3410 j += fz_read_bits(ctx, stream, shared_obj_num_bits) + 1; | |
| 3411 } | |
| 3412 j = shared_obj_num; | |
| 3413 for (; i < shared_obj_count_total; i++) | |
| 3414 { | |
| 3415 doc->hint_shared[i].number = j; | |
| 3416 j += fz_read_bits(ctx, stream, shared_obj_num_bits) + 1; | |
| 3417 } | |
| 3418 doc->hint_shared[i].number = j; | |
| 3419 | |
| 3420 /* Now, actually use the data we have gathered. */ | |
| 3421 for (i = 0 /*shared_obj_count_page1*/; i < shared_obj_count_total; i++) | |
| 3422 { | |
| 3423 if (doc->hint_shared[i].number >= 0 && doc->hint_shared[i].number < max_object_num) | |
| 3424 doc->hint_obj_offsets[doc->hint_shared[i].number] = doc->hint_shared[i].offset; | |
| 3425 } | |
| 3426 for (i = 0; i < doc->linear_page_count; i++) | |
| 3427 { | |
| 3428 if (doc->hint_page[i].number >= 0 && doc->hint_page[i].number < max_object_num) | |
| 3429 doc->hint_obj_offsets[doc->hint_page[i].number] = doc->hint_page[i].offset; | |
| 3430 } | |
| 3431 } | |
| 3432 fz_always(ctx) | |
| 3433 { | |
| 3434 fz_drop_stream(ctx, stream); | |
| 3435 } | |
| 3436 fz_catch(ctx) | |
| 3437 { | |
| 3438 fz_rethrow_if(ctx, FZ_ERROR_TRYLATER); | |
| 3439 /* Don't try to load hints again */ | |
| 3440 doc->hints_loaded = 1; | |
| 3441 /* We won't use the linearized object anymore. */ | |
| 3442 doc->file_reading_linearly = 0; | |
| 3443 fz_rethrow_if(ctx, FZ_ERROR_SYSTEM); | |
| 3444 /* Any other error becomes a TRYLATER */ | |
| 3445 fz_report_error(ctx); | |
| 3446 fz_throw(ctx, FZ_ERROR_TRYLATER, "malformed hints object"); | |
| 3447 } | |
| 3448 doc->hints_loaded = 1; | |
| 3449 } | |
| 3450 | |
| 3451 static void | |
| 3452 pdf_load_hint_object(fz_context *ctx, pdf_document *doc) | |
| 3453 { | |
| 3454 pdf_lexbuf *buf = &doc->lexbuf.base; | |
| 3455 int64_t curr_pos; | |
| 3456 | |
| 3457 curr_pos = fz_tell(ctx, doc->file); | |
| 3458 fz_seek(ctx, doc->file, doc->bias + doc->hint_object_offset, SEEK_SET); | |
| 3459 fz_try(ctx) | |
| 3460 { | |
| 3461 while (1) | |
| 3462 { | |
| 3463 pdf_obj *page = NULL; | |
| 3464 int num, tok; | |
| 3465 | |
| 3466 tok = pdf_lex(ctx, doc->file, buf); | |
| 3467 if (tok != PDF_TOK_INT) | |
| 3468 break; | |
| 3469 num = buf->i; | |
| 3470 tok = pdf_lex(ctx, doc->file, buf); | |
| 3471 if (tok != PDF_TOK_INT) | |
| 3472 break; | |
| 3473 /* Ignore gen = buf->i */ | |
| 3474 tok = pdf_lex(ctx, doc->file, buf); | |
| 3475 if (tok != PDF_TOK_OBJ) | |
| 3476 break; | |
| 3477 (void)pdf_repair_obj(ctx, doc, buf, NULL, NULL, NULL, NULL, &page, NULL, NULL); | |
| 3478 pdf_load_hints(ctx, doc, num); | |
| 3479 } | |
| 3480 } | |
| 3481 fz_always(ctx) | |
| 3482 { | |
| 3483 fz_seek(ctx, doc->file, curr_pos, SEEK_SET); | |
| 3484 } | |
| 3485 fz_catch(ctx) | |
| 3486 { | |
| 3487 fz_rethrow(ctx); | |
| 3488 } | |
| 3489 } | |
| 3490 | |
| 3491 pdf_obj *pdf_progressive_advance(fz_context *ctx, pdf_document *doc, int pagenum) | |
| 3492 { | |
| 3493 int curr_pos; | |
| 3494 pdf_obj *page = NULL; | |
| 3495 | |
| 3496 pdf_load_hinted_page(ctx, doc, pagenum); | |
| 3497 | |
| 3498 if (pagenum < 0 || pagenum >= doc->linear_page_count) | |
| 3499 fz_throw(ctx, FZ_ERROR_ARGUMENT, "page load out of range (%d of %d)", pagenum, doc->linear_page_count); | |
| 3500 | |
| 3501 if (doc->linear_pos == doc->file_length) | |
| 3502 return doc->linear_page_refs[pagenum]; | |
| 3503 | |
| 3504 /* Only load hints once, and then only after we have got page 0 */ | |
| 3505 if (pagenum > 0 && !doc->hints_loaded && doc->hint_object_offset > 0 && doc->linear_pos >= doc->hint_object_offset) | |
| 3506 { | |
| 3507 /* Found hint object */ | |
| 3508 pdf_load_hint_object(ctx, doc); | |
| 3509 } | |
| 3510 | |
| 3511 DEBUGMESS((ctx, "continuing to try to advance from %d", doc->linear_pos)); | |
| 3512 curr_pos = fz_tell(ctx, doc->file); | |
| 3513 | |
| 3514 fz_var(page); | |
| 3515 | |
| 3516 fz_try(ctx) | |
| 3517 { | |
| 3518 int eof; | |
| 3519 do | |
| 3520 { | |
| 3521 int num; | |
| 3522 eof = pdf_obj_read(ctx, doc, &doc->linear_pos, &num, &page); | |
| 3523 pdf_drop_obj(ctx, page); | |
| 3524 page = NULL; | |
| 3525 } | |
| 3526 while (!eof); | |
| 3527 | |
| 3528 { | |
| 3529 pdf_obj *catalog; | |
| 3530 pdf_obj *pages; | |
| 3531 doc->linear_pos = doc->file_length; | |
| 3532 pdf_load_xref(ctx, doc); | |
| 3533 catalog = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root)); | |
| 3534 pages = pdf_dict_get(ctx, catalog, PDF_NAME(Pages)); | |
| 3535 | |
| 3536 if (!pdf_is_dict(ctx, pages)) | |
| 3537 fz_throw(ctx, FZ_ERROR_FORMAT, "missing page tree"); | |
| 3538 break; | |
| 3539 } | |
| 3540 } | |
| 3541 fz_always(ctx) | |
| 3542 { | |
| 3543 fz_seek(ctx, doc->file, curr_pos, SEEK_SET); | |
| 3544 } | |
| 3545 fz_catch(ctx) | |
| 3546 { | |
| 3547 pdf_drop_obj(ctx, page); | |
| 3548 if (fz_caught(ctx) == FZ_ERROR_TRYLATER) | |
| 3549 { | |
| 3550 if (doc->linear_page_refs[pagenum] == NULL) | |
| 3551 { | |
| 3552 /* Still not got a page */ | |
| 3553 fz_rethrow(ctx); | |
| 3554 } | |
| 3555 // TODO: should we really swallow this error? | |
| 3556 fz_rethrow_if(ctx, FZ_ERROR_SYSTEM); | |
| 3557 fz_report_error(ctx); | |
| 3558 } | |
| 3559 else | |
| 3560 fz_rethrow(ctx); | |
| 3561 } | |
| 3562 | |
| 3563 return doc->linear_page_refs[pagenum]; | |
| 3564 } | |
| 3565 | |
| 3566 pdf_document *fz_new_pdf_document_from_fz_document(fz_context *ctx, fz_document *ptr) | |
| 3567 { | |
| 3568 if (!ptr || !ptr->as_pdf) | |
| 3569 return NULL; | |
| 3570 return (pdf_document *)fz_keep_document(ctx, ptr->as_pdf(ctx, ptr)); | |
| 3571 } | |
| 3572 | |
| 3573 pdf_document *pdf_document_from_fz_document(fz_context *ctx, fz_document *ptr) | |
| 3574 { | |
| 3575 return (pdf_document *)((ptr && ptr->count_pages == pdf_count_pages_imp) ? ptr : NULL); | |
| 3576 } | |
| 3577 | |
| 3578 pdf_page *pdf_page_from_fz_page(fz_context *ctx, fz_page *page) | |
| 3579 { | |
| 3580 if (pdf_document_from_fz_document(ctx, page->doc)) | |
| 3581 return (pdf_page*) page; | |
| 3582 return NULL; | |
| 3583 } | |
| 3584 | |
| 3585 pdf_document *pdf_specifics(fz_context *ctx, fz_document *doc) | |
| 3586 { | |
| 3587 return pdf_document_from_fz_document(ctx, doc); | |
| 3588 } | |
| 3589 | |
| 3590 pdf_obj * | |
| 3591 pdf_add_object(fz_context *ctx, pdf_document *doc, pdf_obj *obj) | |
| 3592 { | |
| 3593 pdf_document *orig_doc; | |
| 3594 int num; | |
| 3595 | |
| 3596 orig_doc = pdf_get_bound_document(ctx, obj); | |
| 3597 if (orig_doc && orig_doc != doc) | |
| 3598 fz_throw(ctx, FZ_ERROR_ARGUMENT, "tried to add an object belonging to a different document"); | |
| 3599 if (pdf_is_indirect(ctx, obj)) | |
| 3600 return pdf_keep_obj(ctx, obj); | |
| 3601 num = pdf_create_object(ctx, doc); | |
| 3602 pdf_update_object(ctx, doc, num, obj); | |
| 3603 return pdf_new_indirect(ctx, doc, num, 0); | |
| 3604 } | |
| 3605 | |
| 3606 pdf_obj * | |
| 3607 pdf_add_object_drop(fz_context *ctx, pdf_document *doc, pdf_obj *obj) | |
| 3608 { | |
| 3609 pdf_obj *ind = NULL; | |
| 3610 fz_try(ctx) | |
| 3611 ind = pdf_add_object(ctx, doc, obj); | |
| 3612 fz_always(ctx) | |
| 3613 pdf_drop_obj(ctx, obj); | |
| 3614 fz_catch(ctx) | |
| 3615 fz_rethrow(ctx); | |
| 3616 return ind; | |
| 3617 } | |
| 3618 | |
| 3619 pdf_obj * | |
| 3620 pdf_add_new_dict(fz_context *ctx, pdf_document *doc, int initial) | |
| 3621 { | |
| 3622 return pdf_add_object_drop(ctx, doc, pdf_new_dict(ctx, doc, initial)); | |
| 3623 } | |
| 3624 | |
| 3625 pdf_obj * | |
| 3626 pdf_add_new_array(fz_context *ctx, pdf_document *doc, int initial) | |
| 3627 { | |
| 3628 return pdf_add_object_drop(ctx, doc, pdf_new_array(ctx, doc, initial)); | |
| 3629 } | |
| 3630 | |
| 3631 pdf_obj * | |
| 3632 pdf_add_stream(fz_context *ctx, pdf_document *doc, fz_buffer *buf, pdf_obj *obj, int compressed) | |
| 3633 { | |
| 3634 pdf_obj *ind; | |
| 3635 if (!obj) | |
| 3636 ind = pdf_add_new_dict(ctx, doc, 4); | |
| 3637 else | |
| 3638 ind = pdf_add_object(ctx, doc, obj); | |
| 3639 fz_try(ctx) | |
| 3640 pdf_update_stream(ctx, doc, ind, buf, compressed); | |
| 3641 fz_catch(ctx) | |
| 3642 { | |
| 3643 pdf_drop_obj(ctx, ind); | |
| 3644 fz_rethrow(ctx); | |
| 3645 } | |
| 3646 return ind; | |
| 3647 } | |
| 3648 | |
| 3649 pdf_document *pdf_create_document(fz_context *ctx) | |
| 3650 { | |
| 3651 pdf_document *doc; | |
| 3652 pdf_obj *root; | |
| 3653 pdf_obj *pages; | |
| 3654 pdf_obj *trailer = NULL; | |
| 3655 | |
| 3656 fz_var(trailer); | |
| 3657 | |
| 3658 doc = pdf_new_document(ctx, NULL); | |
| 3659 fz_try(ctx) | |
| 3660 { | |
| 3661 doc->file_size = 0; | |
| 3662 doc->startxref = 0; | |
| 3663 doc->num_xref_sections = 0; | |
| 3664 doc->num_incremental_sections = 0; | |
| 3665 doc->xref_base = 0; | |
| 3666 doc->disallow_new_increments = 0; | |
| 3667 pdf_get_populating_xref_entry(ctx, doc, 0); | |
| 3668 | |
| 3669 trailer = pdf_new_dict(ctx, doc, 2); | |
| 3670 pdf_dict_put_int(ctx, trailer, PDF_NAME(Size), 3); | |
| 3671 pdf_dict_put_drop(ctx, trailer, PDF_NAME(Root), root = pdf_add_new_dict(ctx, doc, 2)); | |
| 3672 pdf_dict_put(ctx, root, PDF_NAME(Type), PDF_NAME(Catalog)); | |
| 3673 pdf_dict_put_drop(ctx, root, PDF_NAME(Pages), pages = pdf_add_new_dict(ctx, doc, 3)); | |
| 3674 pdf_dict_put(ctx, pages, PDF_NAME(Type), PDF_NAME(Pages)); | |
| 3675 pdf_dict_put_int(ctx, pages, PDF_NAME(Count), 0); | |
| 3676 pdf_dict_put_array(ctx, pages, PDF_NAME(Kids), 1); | |
| 3677 | |
| 3678 /* Set the trailer of the final xref section. */ | |
| 3679 doc->xref_sections[0].trailer = trailer; | |
| 3680 } | |
| 3681 fz_catch(ctx) | |
| 3682 { | |
| 3683 pdf_drop_obj(ctx, trailer); | |
| 3684 fz_drop_document(ctx, &doc->super); | |
| 3685 fz_rethrow(ctx); | |
| 3686 } | |
| 3687 return doc; | |
| 3688 } | |
| 3689 | |
| 3690 static const char *pdf_extensions[] = | |
| 3691 { | |
| 3692 "pdf", | |
| 3693 "fdf", | |
| 3694 "pclm", | |
| 3695 "ai", | |
| 3696 NULL | |
| 3697 }; | |
| 3698 | |
| 3699 static const char *pdf_mimetypes[] = | |
| 3700 { | |
| 3701 "application/pdf", | |
| 3702 "application/PCLm", | |
| 3703 NULL | |
| 3704 }; | |
| 3705 | |
| 3706 static int | |
| 3707 pdf_recognize_doc_content(fz_context *ctx, const fz_document_handler *handler, fz_stream *stream, fz_archive *dir, void **state, fz_document_recognize_state_free_fn **free_state) | |
| 3708 { | |
| 3709 const char *match = "%PDF-"; | |
| 3710 const char *match2 = "%FDF-"; | |
| 3711 int pos = 0; | |
| 3712 int n = 4096+5; | |
| 3713 int c; | |
| 3714 | |
| 3715 if (state) | |
| 3716 *state = NULL; | |
| 3717 if (free_state) | |
| 3718 *free_state = NULL; | |
| 3719 | |
| 3720 if (stream == NULL) | |
| 3721 return 0; | |
| 3722 | |
| 3723 do | |
| 3724 { | |
| 3725 c = fz_read_byte(ctx, stream); | |
| 3726 if (c == EOF) | |
| 3727 return 0; | |
| 3728 if (c == match[pos] || c == match2[pos]) | |
| 3729 { | |
| 3730 pos++; | |
| 3731 if (pos == 5) | |
| 3732 return 100; | |
| 3733 } | |
| 3734 else | |
| 3735 { | |
| 3736 /* Restart matching, but recheck c against the start. */ | |
| 3737 pos = (c == match[0]); | |
| 3738 } | |
| 3739 } | |
| 3740 while (--n > 0); | |
| 3741 | |
| 3742 return 0; | |
| 3743 } | |
| 3744 | |
| 3745 static fz_document * | |
| 3746 open_document(fz_context *ctx, const fz_document_handler *handler, fz_stream *file, fz_stream *accel, fz_archive *zip, void *state) | |
| 3747 { | |
| 3748 if (file == NULL) | |
| 3749 return NULL; | |
| 3750 return (fz_document *)pdf_open_document_with_stream(ctx, file); | |
| 3751 } | |
| 3752 | |
| 3753 fz_document_handler pdf_document_handler = | |
| 3754 { | |
| 3755 NULL, | |
| 3756 open_document, | |
| 3757 pdf_extensions, | |
| 3758 pdf_mimetypes, | |
| 3759 pdf_recognize_doc_content | |
| 3760 }; | |
| 3761 | |
| 3762 void pdf_mark_xref(fz_context *ctx, pdf_document *doc) | |
| 3763 { | |
| 3764 int x, e; | |
| 3765 | |
| 3766 for (x = 0; x < doc->num_xref_sections; x++) | |
| 3767 { | |
| 3768 pdf_xref *xref = &doc->xref_sections[x]; | |
| 3769 pdf_xref_subsec *sub; | |
| 3770 | |
| 3771 for (sub = xref->subsec; sub != NULL; sub = sub->next) | |
| 3772 { | |
| 3773 for (e = 0; e < sub->len; e++) | |
| 3774 { | |
| 3775 pdf_xref_entry *entry = &sub->table[e]; | |
| 3776 if (entry->obj) | |
| 3777 { | |
| 3778 entry->marked = 1; | |
| 3779 } | |
| 3780 } | |
| 3781 } | |
| 3782 } | |
| 3783 } | |
| 3784 | |
| 3785 void pdf_clear_xref(fz_context *ctx, pdf_document *doc) | |
| 3786 { | |
| 3787 int x, e; | |
| 3788 | |
| 3789 for (x = 0; x < doc->num_xref_sections; x++) | |
| 3790 { | |
| 3791 pdf_xref *xref = &doc->xref_sections[x]; | |
| 3792 pdf_xref_subsec *sub; | |
| 3793 | |
| 3794 for (sub = xref->subsec; sub != NULL; sub = sub->next) | |
| 3795 { | |
| 3796 for (e = 0; e < sub->len; e++) | |
| 3797 { | |
| 3798 pdf_xref_entry *entry = &sub->table[e]; | |
| 3799 /* We cannot drop objects if the stream | |
| 3800 * buffer has been updated */ | |
| 3801 if (entry->obj != NULL && entry->stm_buf == NULL) | |
| 3802 { | |
| 3803 if (pdf_obj_refs(ctx, entry->obj) == 1) | |
| 3804 { | |
| 3805 pdf_drop_obj(ctx, entry->obj); | |
| 3806 entry->obj = NULL; | |
| 3807 } | |
| 3808 } | |
| 3809 } | |
| 3810 } | |
| 3811 } | |
| 3812 } | |
| 3813 | |
| 3814 void pdf_clear_xref_to_mark(fz_context *ctx, pdf_document *doc) | |
| 3815 { | |
| 3816 int x, e; | |
| 3817 | |
| 3818 for (x = 0; x < doc->num_xref_sections; x++) | |
| 3819 { | |
| 3820 pdf_xref *xref = &doc->xref_sections[x]; | |
| 3821 pdf_xref_subsec *sub; | |
| 3822 | |
| 3823 for (sub = xref->subsec; sub != NULL; sub = sub->next) | |
| 3824 { | |
| 3825 for (e = 0; e < sub->len; e++) | |
| 3826 { | |
| 3827 pdf_xref_entry *entry = &sub->table[e]; | |
| 3828 | |
| 3829 /* We cannot drop objects if the stream buffer has | |
| 3830 * been updated */ | |
| 3831 if (entry->obj != NULL && entry->stm_buf == NULL) | |
| 3832 { | |
| 3833 if (!entry->marked && pdf_obj_refs(ctx, entry->obj) == 1) | |
| 3834 { | |
| 3835 pdf_drop_obj(ctx, entry->obj); | |
| 3836 entry->obj = NULL; | |
| 3837 } | |
| 3838 } | |
| 3839 } | |
| 3840 } | |
| 3841 } | |
| 3842 } | |
| 3843 | |
| 3844 int | |
| 3845 pdf_count_versions(fz_context *ctx, pdf_document *doc) | |
| 3846 { | |
| 3847 return doc->num_xref_sections-doc->num_incremental_sections-doc->has_linearization_object; | |
| 3848 } | |
| 3849 | |
| 3850 int | |
| 3851 pdf_count_unsaved_versions(fz_context *ctx, pdf_document *doc) | |
| 3852 { | |
| 3853 return doc->num_incremental_sections; | |
| 3854 } | |
| 3855 | |
| 3856 int | |
| 3857 pdf_doc_was_linearized(fz_context *ctx, pdf_document *doc) | |
| 3858 { | |
| 3859 return doc->has_linearization_object; | |
| 3860 } | |
| 3861 | |
| 3862 static int pdf_obj_exists(fz_context *ctx, pdf_document *doc, int i) | |
| 3863 { | |
| 3864 pdf_xref_subsec *sub; | |
| 3865 int j; | |
| 3866 | |
| 3867 if (i < 0) | |
| 3868 fz_throw(ctx, FZ_ERROR_ARGUMENT, "Negative object number requested"); | |
| 3869 | |
| 3870 if (i <= doc->max_xref_len) | |
| 3871 j = doc->xref_index[i]; | |
| 3872 else | |
| 3873 j = 0; | |
| 3874 | |
| 3875 /* We may be accessing an earlier version of the document using xref_base | |
| 3876 * and j may be an index into a later xref section */ | |
| 3877 if (doc->xref_base > j) | |
| 3878 j = doc->xref_base; | |
| 3879 | |
| 3880 /* Find the first xref section where the entry is defined. */ | |
| 3881 for (; j < doc->num_xref_sections; j++) | |
| 3882 { | |
| 3883 pdf_xref *xref = &doc->xref_sections[j]; | |
| 3884 | |
| 3885 if (i < xref->num_objects) | |
| 3886 { | |
| 3887 for (sub = xref->subsec; sub != NULL; sub = sub->next) | |
| 3888 { | |
| 3889 if (i < sub->start || i >= sub->start + sub->len) | |
| 3890 continue; | |
| 3891 | |
| 3892 if (sub->table[i - sub->start].type) | |
| 3893 return 1; | |
| 3894 } | |
| 3895 } | |
| 3896 } | |
| 3897 | |
| 3898 return 0; | |
| 3899 } | |
| 3900 | |
| 3901 enum { | |
| 3902 FIELD_CHANGED = 1, | |
| 3903 FIELD_CHANGE_VALID = 2, | |
| 3904 FIELD_CHANGE_INVALID = 4 | |
| 3905 }; | |
| 3906 | |
| 3907 typedef struct | |
| 3908 { | |
| 3909 int num_obj; | |
| 3910 int obj_changes[FZ_FLEXIBLE_ARRAY]; | |
| 3911 } pdf_changes; | |
| 3912 | |
| 3913 static int | |
| 3914 check_unchanged_between(fz_context *ctx, pdf_document *doc, pdf_changes *changes, pdf_obj *nobj, pdf_obj *oobj) | |
| 3915 { | |
| 3916 int marked = 0; | |
| 3917 int changed = 0; | |
| 3918 | |
| 3919 /* Trivially identical => trivially unchanged. */ | |
| 3920 if (nobj == oobj) | |
| 3921 return 0; | |
| 3922 | |
| 3923 /* Strictly speaking we shouldn't need to call fz_var, | |
| 3924 * but I suspect static analysis tools are not smart | |
| 3925 * enough to figure that out. */ | |
| 3926 fz_var(marked); | |
| 3927 | |
| 3928 if (pdf_is_indirect(ctx, nobj)) | |
| 3929 { | |
| 3930 int o_xref_base = doc->xref_base; | |
| 3931 | |
| 3932 /* Both must be indirect if one is. */ | |
| 3933 if (!pdf_is_indirect(ctx, oobj)) | |
| 3934 { | |
| 3935 changes->obj_changes[pdf_to_num(ctx, nobj)] |= FIELD_CHANGE_INVALID; | |
| 3936 return 1; | |
| 3937 } | |
| 3938 | |
| 3939 /* Handle recursing back into ourselves. */ | |
| 3940 if (pdf_obj_marked(ctx, nobj)) | |
| 3941 { | |
| 3942 if (pdf_obj_marked(ctx, oobj)) | |
| 3943 return 0; | |
| 3944 changes->obj_changes[pdf_to_num(ctx, nobj)] |= FIELD_CHANGE_INVALID; | |
| 3945 return 1; | |
| 3946 } | |
| 3947 else if (pdf_obj_marked(ctx, oobj)) | |
| 3948 { | |
| 3949 changes->obj_changes[pdf_to_num(ctx, nobj)] |= FIELD_CHANGE_INVALID; | |
| 3950 return 1; | |
| 3951 } | |
| 3952 | |
| 3953 nobj = pdf_resolve_indirect_chain(ctx, nobj); | |
| 3954 doc->xref_base = o_xref_base+1; | |
| 3955 fz_try(ctx) | |
| 3956 { | |
| 3957 oobj = pdf_resolve_indirect_chain(ctx, oobj); | |
| 3958 if (oobj != nobj) | |
| 3959 { | |
| 3960 /* Different objects, so lock them */ | |
| 3961 if (!pdf_obj_marked(ctx, nobj) && !pdf_obj_marked(ctx, oobj)) | |
| 3962 { | |
| 3963 (void)pdf_mark_obj(ctx, nobj); | |
| 3964 (void)pdf_mark_obj(ctx, oobj); | |
| 3965 marked = 1; | |
| 3966 } | |
| 3967 } | |
| 3968 } | |
| 3969 fz_always(ctx) | |
| 3970 doc->xref_base = o_xref_base; | |
| 3971 fz_catch(ctx) | |
| 3972 fz_rethrow(ctx); | |
| 3973 | |
| 3974 if (nobj == oobj) | |
| 3975 return 0; /* Trivially identical */ | |
| 3976 } | |
| 3977 | |
| 3978 fz_var(changed); | |
| 3979 | |
| 3980 fz_try(ctx) | |
| 3981 { | |
| 3982 if (pdf_is_dict(ctx, nobj)) | |
| 3983 { | |
| 3984 int i, n = pdf_dict_len(ctx, nobj); | |
| 3985 | |
| 3986 if (!pdf_is_dict(ctx, oobj) || n != pdf_dict_len(ctx, oobj)) | |
| 3987 { | |
| 3988 change_found: | |
| 3989 changes->obj_changes[pdf_to_num(ctx, nobj)] |= FIELD_CHANGE_INVALID; | |
| 3990 changed = 1; | |
| 3991 break; | |
| 3992 } | |
| 3993 | |
| 3994 for (i = 0; i < n; i++) | |
| 3995 { | |
| 3996 pdf_obj *key = pdf_dict_get_key(ctx, nobj, i); | |
| 3997 pdf_obj *nval = pdf_dict_get(ctx, nobj, key); | |
| 3998 pdf_obj *oval = pdf_dict_get(ctx, oobj, key); | |
| 3999 | |
| 4000 changed |= check_unchanged_between(ctx, doc, changes, nval, oval); | |
| 4001 } | |
| 4002 } | |
| 4003 else if (pdf_is_array(ctx, nobj)) | |
| 4004 { | |
| 4005 int i, n = pdf_array_len(ctx, nobj); | |
| 4006 | |
| 4007 if (!pdf_is_array(ctx, oobj) || n != pdf_array_len(ctx, oobj)) | |
| 4008 goto change_found; | |
| 4009 | |
| 4010 for (i = 0; i < n; i++) | |
| 4011 { | |
| 4012 pdf_obj *nval = pdf_array_get(ctx, nobj, i); | |
| 4013 pdf_obj *oval = pdf_array_get(ctx, oobj, i); | |
| 4014 | |
| 4015 changed |= check_unchanged_between(ctx, doc, changes, nval, oval); | |
| 4016 } | |
| 4017 } | |
| 4018 else if (pdf_objcmp(ctx, nobj, oobj)) | |
| 4019 goto change_found; | |
| 4020 } | |
| 4021 fz_always(ctx) | |
| 4022 { | |
| 4023 if (marked) | |
| 4024 { | |
| 4025 pdf_unmark_obj(ctx, nobj); | |
| 4026 pdf_unmark_obj(ctx, oobj); | |
| 4027 } | |
| 4028 } | |
| 4029 fz_catch(ctx) | |
| 4030 fz_rethrow(ctx); | |
| 4031 | |
| 4032 return changed; | |
| 4033 } | |
| 4034 | |
| 4035 typedef struct | |
| 4036 { | |
| 4037 int max; | |
| 4038 int len; | |
| 4039 char **list; | |
| 4040 } char_list; | |
| 4041 | |
| 4042 /* This structure is used to hold the definition of which fields | |
| 4043 * are locked. */ | |
| 4044 struct pdf_locked_fields | |
| 4045 { | |
| 4046 int p; | |
| 4047 int all; | |
| 4048 char_list includes; | |
| 4049 char_list excludes; | |
| 4050 }; | |
| 4051 | |
| 4052 static void | |
| 4053 free_char_list(fz_context *ctx, char_list *c) | |
| 4054 { | |
| 4055 int i; | |
| 4056 | |
| 4057 if (c == NULL) | |
| 4058 return; | |
| 4059 | |
| 4060 for (i = c->len-1; i >= 0; i--) | |
| 4061 fz_free(ctx, c->list[i]); | |
| 4062 fz_free(ctx, c->list); | |
| 4063 c->len = 0; | |
| 4064 c->max = 0; | |
| 4065 } | |
| 4066 | |
| 4067 void | |
| 4068 pdf_drop_locked_fields(fz_context *ctx, pdf_locked_fields *fl) | |
| 4069 { | |
| 4070 if (fl == NULL) | |
| 4071 return; | |
| 4072 | |
| 4073 free_char_list(ctx, &fl->includes); | |
| 4074 free_char_list(ctx, &fl->excludes); | |
| 4075 fz_free(ctx, fl); | |
| 4076 } | |
| 4077 | |
| 4078 static void | |
| 4079 char_list_append(fz_context *ctx, char_list *list, const char *s) | |
| 4080 { | |
| 4081 if (list->len == list->max) | |
| 4082 { | |
| 4083 int n = list->max * 2; | |
| 4084 if (n == 0) n = 4; | |
| 4085 | |
| 4086 list->list = fz_realloc_array(ctx, list->list, n, char *); | |
| 4087 list->max = n; | |
| 4088 } | |
| 4089 list->list[list->len] = fz_strdup(ctx, s); | |
| 4090 list->len++; | |
| 4091 } | |
| 4092 | |
| 4093 int | |
| 4094 pdf_is_field_locked(fz_context *ctx, pdf_locked_fields *locked, const char *name) | |
| 4095 { | |
| 4096 int i; | |
| 4097 | |
| 4098 if (locked->p == 1) | |
| 4099 { | |
| 4100 /* Permissions were set, and say that field changes are not to be allowed. */ | |
| 4101 return 1; /* Locked */ | |
| 4102 } | |
| 4103 | |
| 4104 if(locked->all) | |
| 4105 { | |
| 4106 /* The only way we might not be unlocked is if | |
| 4107 * we are listed in the excludes. */ | |
| 4108 for (i = 0; i < locked->excludes.len; i++) | |
| 4109 if (!strcmp(locked->excludes.list[i], name)) | |
| 4110 return 0; | |
| 4111 return 1; | |
| 4112 } | |
| 4113 | |
| 4114 /* The only way we can be locked is for us to be in the includes. */ | |
| 4115 for (i = 0; i < locked->includes.len; i++) | |
| 4116 if (strcmp(locked->includes.list[i], name) == 0) | |
| 4117 return 1; | |
| 4118 | |
| 4119 /* Anything else is unlocked */ | |
| 4120 return 0; | |
| 4121 } | |
| 4122 | |
| 4123 /* Unfortunately, in C, there is no legal way to define a function | |
| 4124 * type that returns itself. We therefore have to use a struct | |
| 4125 * wrapper. */ | |
| 4126 typedef struct filter_wrap | |
| 4127 { | |
| 4128 struct filter_wrap (*func)(fz_context *ctx, pdf_obj *dict, pdf_obj *key); | |
| 4129 } filter_wrap; | |
| 4130 | |
| 4131 typedef struct filter_wrap (*filter_fn)(fz_context *ctx, pdf_obj *dict, pdf_obj *key); | |
| 4132 | |
| 4133 #define RETURN_FILTER(f) { filter_wrap rf; rf.func = (f); return rf; } | |
| 4134 | |
| 4135 static filter_wrap filter_simple(fz_context *ctx, pdf_obj *dict, pdf_obj *key) | |
| 4136 { | |
| 4137 RETURN_FILTER(NULL); | |
| 4138 } | |
| 4139 | |
| 4140 static filter_wrap filter_transformparams(fz_context *ctx, pdf_obj *dict, pdf_obj *key) | |
| 4141 { | |
| 4142 if (pdf_name_eq(ctx, key, PDF_NAME(Type)) || | |
| 4143 pdf_name_eq(ctx, key, PDF_NAME(P)) || | |
| 4144 pdf_name_eq(ctx, key, PDF_NAME(V)) || | |
| 4145 pdf_name_eq(ctx, key, PDF_NAME(Document)) || | |
| 4146 pdf_name_eq(ctx, key, PDF_NAME(Msg)) || | |
| 4147 pdf_name_eq(ctx, key, PDF_NAME(V)) || | |
| 4148 pdf_name_eq(ctx, key, PDF_NAME(Annots)) || | |
| 4149 pdf_name_eq(ctx, key, PDF_NAME(Form)) || | |
| 4150 pdf_name_eq(ctx, key, PDF_NAME(FormEx)) || | |
| 4151 pdf_name_eq(ctx, key, PDF_NAME(EF)) || | |
| 4152 pdf_name_eq(ctx, key, PDF_NAME(P)) || | |
| 4153 pdf_name_eq(ctx, key, PDF_NAME(Action)) || | |
| 4154 pdf_name_eq(ctx, key, PDF_NAME(Fields))) | |
| 4155 RETURN_FILTER(&filter_simple); | |
| 4156 RETURN_FILTER(NULL); | |
| 4157 } | |
| 4158 | |
| 4159 static filter_wrap filter_reference(fz_context *ctx, pdf_obj *dict, pdf_obj *key) | |
| 4160 { | |
| 4161 if (pdf_name_eq(ctx, key, PDF_NAME(Type)) || | |
| 4162 pdf_name_eq(ctx, key, PDF_NAME(TransformMethod)) || | |
| 4163 pdf_name_eq(ctx, key, PDF_NAME(DigestMethod)) || | |
| 4164 pdf_name_eq(ctx, key, PDF_NAME(DigestValue)) || | |
| 4165 pdf_name_eq(ctx, key, PDF_NAME(DigestLocation))) | |
| 4166 RETURN_FILTER(&filter_simple); | |
| 4167 if (pdf_name_eq(ctx, key, PDF_NAME(TransformParams))) | |
| 4168 RETURN_FILTER(&filter_transformparams); | |
| 4169 RETURN_FILTER(NULL); | |
| 4170 } | |
| 4171 | |
| 4172 static filter_wrap filter_prop_build_sub(fz_context *ctx, pdf_obj *dict, pdf_obj *key) | |
| 4173 { | |
| 4174 if (pdf_name_eq(ctx, key, PDF_NAME(Name)) || | |
| 4175 pdf_name_eq(ctx, key, PDF_NAME(Date)) || | |
| 4176 pdf_name_eq(ctx, key, PDF_NAME(R)) || | |
| 4177 pdf_name_eq(ctx, key, PDF_NAME(PreRelease)) || | |
| 4178 pdf_name_eq(ctx, key, PDF_NAME(OS)) || | |
| 4179 pdf_name_eq(ctx, key, PDF_NAME(NonEFontNoWarn)) || | |
| 4180 pdf_name_eq(ctx, key, PDF_NAME(TrustedMode)) || | |
| 4181 pdf_name_eq(ctx, key, PDF_NAME(V)) || | |
| 4182 pdf_name_eq(ctx, key, PDF_NAME(REx)) || | |
| 4183 pdf_name_eq(ctx, key, PDF_NAME(Preview))) | |
| 4184 RETURN_FILTER(&filter_simple); | |
| 4185 RETURN_FILTER(NULL); | |
| 4186 } | |
| 4187 | |
| 4188 static filter_wrap filter_prop_build(fz_context *ctx, pdf_obj *dict, pdf_obj *key) | |
| 4189 { | |
| 4190 if (pdf_name_eq(ctx, key, PDF_NAME(Filter)) || | |
| 4191 pdf_name_eq(ctx, key, PDF_NAME(PubSec)) || | |
| 4192 pdf_name_eq(ctx, key, PDF_NAME(App)) || | |
| 4193 pdf_name_eq(ctx, key, PDF_NAME(SigQ))) | |
| 4194 RETURN_FILTER(&filter_prop_build_sub); | |
| 4195 RETURN_FILTER(NULL); | |
| 4196 } | |
| 4197 | |
| 4198 static filter_wrap filter_v(fz_context *ctx, pdf_obj *dict, pdf_obj *key) | |
| 4199 { | |
| 4200 /* Text can point to a stream object */ | |
| 4201 if (pdf_name_eq(ctx, key, PDF_NAME(Length)) && pdf_is_stream(ctx, dict)) | |
| 4202 RETURN_FILTER(&filter_simple); | |
| 4203 /* Sigs point to a dict. */ | |
| 4204 if (pdf_name_eq(ctx, key, PDF_NAME(Type)) || | |
| 4205 pdf_name_eq(ctx, key, PDF_NAME(Filter)) || | |
| 4206 pdf_name_eq(ctx, key, PDF_NAME(SubFilter)) || | |
| 4207 pdf_name_eq(ctx, key, PDF_NAME(Contents)) || | |
| 4208 pdf_name_eq(ctx, key, PDF_NAME(Cert)) || | |
| 4209 pdf_name_eq(ctx, key, PDF_NAME(ByteRange)) || | |
| 4210 pdf_name_eq(ctx, key, PDF_NAME(Changes)) || | |
| 4211 pdf_name_eq(ctx, key, PDF_NAME(Name)) || | |
| 4212 pdf_name_eq(ctx, key, PDF_NAME(M)) || | |
| 4213 pdf_name_eq(ctx, key, PDF_NAME(Location)) || | |
| 4214 pdf_name_eq(ctx, key, PDF_NAME(Reason)) || | |
| 4215 pdf_name_eq(ctx, key, PDF_NAME(ContactInfo)) || | |
| 4216 pdf_name_eq(ctx, key, PDF_NAME(R)) || | |
| 4217 pdf_name_eq(ctx, key, PDF_NAME(V)) || | |
| 4218 pdf_name_eq(ctx, key, PDF_NAME(Prop_AuthTime)) || | |
| 4219 pdf_name_eq(ctx, key, PDF_NAME(Prop_AuthType))) | |
| 4220 RETURN_FILTER(&filter_simple); | |
| 4221 if (pdf_name_eq(ctx, key, PDF_NAME(Reference))) | |
| 4222 RETURN_FILTER(filter_reference); | |
| 4223 if (pdf_name_eq(ctx, key, PDF_NAME(Prop_Build))) | |
| 4224 RETURN_FILTER(filter_prop_build); | |
| 4225 RETURN_FILTER(NULL); | |
| 4226 } | |
| 4227 | |
| 4228 static filter_wrap filter_appearance(fz_context *ctx, pdf_obj *dict, pdf_obj *key); | |
| 4229 | |
| 4230 static filter_wrap filter_xobject_list(fz_context *ctx, pdf_obj *dict, pdf_obj *key) | |
| 4231 { | |
| 4232 /* FIXME: Infinite recursion possible here? */ | |
| 4233 RETURN_FILTER(&filter_appearance); | |
| 4234 } | |
| 4235 | |
| 4236 static filter_wrap filter_font(fz_context *ctx, pdf_obj *dict, pdf_obj *key) | |
| 4237 { | |
| 4238 /* In the example I've seen the /Name field was dropped, so we'll allow | |
| 4239 * local changes, but none that follow an indirection. */ | |
| 4240 RETURN_FILTER(NULL); | |
| 4241 } | |
| 4242 | |
| 4243 /* FIXME: One idea here is to make filter_font_list and filter_xobject_list | |
| 4244 * only accept NEW objects as changes. Will think about this. */ | |
| 4245 static filter_wrap filter_font_list(fz_context *ctx, pdf_obj *dict, pdf_obj *key) | |
| 4246 { | |
| 4247 RETURN_FILTER(&filter_font); | |
| 4248 } | |
| 4249 | |
| 4250 static filter_wrap filter_resources(fz_context *ctx, pdf_obj *dict, pdf_obj *key) | |
| 4251 { | |
| 4252 if (pdf_name_eq(ctx, key, PDF_NAME(XObject))) | |
| 4253 RETURN_FILTER(&filter_xobject_list); | |
| 4254 if (pdf_name_eq(ctx, key, PDF_NAME(Font))) | |
| 4255 RETURN_FILTER(&filter_font_list); | |
| 4256 RETURN_FILTER(NULL); | |
| 4257 } | |
| 4258 | |
| 4259 static filter_wrap filter_appearance(fz_context *ctx, pdf_obj *dict, pdf_obj *key) | |
| 4260 { | |
| 4261 if (pdf_name_eq(ctx, key, PDF_NAME(Resources))) | |
| 4262 RETURN_FILTER(&filter_resources); | |
| 4263 RETURN_FILTER(NULL); | |
| 4264 } | |
| 4265 | |
| 4266 static filter_wrap filter_ap(fz_context *ctx, pdf_obj *dict, pdf_obj *key) | |
| 4267 { | |
| 4268 /* Just the /N entry for now. May need to add more later. */ | |
| 4269 if (pdf_name_eq(ctx, key, PDF_NAME(N)) && pdf_is_stream(ctx, pdf_dict_get(ctx, dict, key))) | |
| 4270 RETURN_FILTER(&filter_appearance); | |
| 4271 RETURN_FILTER(NULL); | |
| 4272 } | |
| 4273 | |
| 4274 static filter_wrap filter_xfa(fz_context *ctx, pdf_obj *dict, pdf_obj *key) | |
| 4275 { | |
| 4276 /* Text can point to a stream object */ | |
| 4277 if (pdf_is_stream(ctx, dict)) | |
| 4278 RETURN_FILTER(&filter_simple); | |
| 4279 RETURN_FILTER(NULL); | |
| 4280 } | |
| 4281 | |
| 4282 static void | |
| 4283 filter_changes_accepted(fz_context *ctx, pdf_changes *changes, pdf_obj *obj, filter_fn filter) | |
| 4284 { | |
| 4285 int obj_num; | |
| 4286 | |
| 4287 if (obj == NULL || pdf_obj_marked(ctx, obj)) | |
| 4288 return; | |
| 4289 | |
| 4290 obj_num = pdf_to_num(ctx, obj); | |
| 4291 | |
| 4292 fz_try(ctx) | |
| 4293 { | |
| 4294 if (obj_num != 0) | |
| 4295 { | |
| 4296 (void)pdf_mark_obj(ctx, obj); | |
| 4297 changes->obj_changes[obj_num] |= FIELD_CHANGE_VALID; | |
| 4298 } | |
| 4299 if (filter == NULL) | |
| 4300 break; | |
| 4301 if (pdf_is_dict(ctx, obj)) | |
| 4302 { | |
| 4303 int i, n = pdf_dict_len(ctx, obj); | |
| 4304 | |
| 4305 for (i = 0; i < n; i++) | |
| 4306 { | |
| 4307 pdf_obj *key = pdf_dict_get_key(ctx, obj, i); | |
| 4308 pdf_obj *val = pdf_dict_get_val(ctx, obj, i); | |
| 4309 filter_fn f = (filter(ctx, obj, key)).func; | |
| 4310 if (f != NULL) | |
| 4311 filter_changes_accepted(ctx, changes, val, f); | |
| 4312 } | |
| 4313 } | |
| 4314 else if (pdf_is_array(ctx, obj)) | |
| 4315 { | |
| 4316 int i, n = pdf_array_len(ctx, obj); | |
| 4317 | |
| 4318 for (i = 0; i < n; i++) | |
| 4319 { | |
| 4320 pdf_obj *val = pdf_array_get(ctx, obj, i); | |
| 4321 filter_changes_accepted(ctx, changes, val, filter); | |
| 4322 } | |
| 4323 } | |
| 4324 } | |
| 4325 fz_always(ctx) | |
| 4326 if (obj_num != 0) | |
| 4327 pdf_unmark_obj(ctx, obj); | |
| 4328 fz_catch(ctx) | |
| 4329 fz_rethrow(ctx); | |
| 4330 } | |
| 4331 | |
| 4332 static void | |
| 4333 check_field(fz_context *ctx, pdf_document *doc, pdf_changes *changes, pdf_obj *obj, pdf_locked_fields *locked, const char *name_prefix, pdf_obj *new_v, pdf_obj *old_v) | |
| 4334 { | |
| 4335 pdf_obj *old_obj, *new_obj, *n_v, *o_v; | |
| 4336 int o_xref_base; | |
| 4337 int obj_num; | |
| 4338 char *field_name = NULL; | |
| 4339 | |
| 4340 /* All fields MUST be indirections, either in the Fields array | |
| 4341 * or AcroForms, or in the Kids array of other Fields. */ | |
| 4342 if (!pdf_is_indirect(ctx, obj)) | |
| 4343 return; | |
| 4344 | |
| 4345 obj_num = pdf_to_num(ctx, obj); | |
| 4346 o_xref_base = doc->xref_base; | |
| 4347 new_obj = pdf_resolve_indirect_chain(ctx, obj); | |
| 4348 | |
| 4349 /* Similarly, all fields must be dicts */ | |
| 4350 if (!pdf_is_dict(ctx, new_obj)) | |
| 4351 return; | |
| 4352 | |
| 4353 if (pdf_obj_marked(ctx, obj)) | |
| 4354 return; | |
| 4355 | |
| 4356 fz_var(field_name); | |
| 4357 | |
| 4358 fz_try(ctx) | |
| 4359 { | |
| 4360 int i, len; | |
| 4361 const char *name; | |
| 4362 size_t n; | |
| 4363 pdf_obj *t; | |
| 4364 int is_locked; | |
| 4365 | |
| 4366 (void)pdf_mark_obj(ctx, obj); | |
| 4367 | |
| 4368 /* Do this within the try, so we can catch any problems */ | |
| 4369 doc->xref_base = o_xref_base+1; | |
| 4370 old_obj = pdf_resolve_indirect_chain(ctx, obj); | |
| 4371 | |
| 4372 t = pdf_dict_get(ctx, old_obj, PDF_NAME(T)); | |
| 4373 if (t != NULL) | |
| 4374 { | |
| 4375 name = pdf_dict_get_text_string(ctx, old_obj, PDF_NAME(T)); | |
| 4376 n = strlen(name)+1; | |
| 4377 if (*name_prefix) | |
| 4378 n += 1 + strlen(name_prefix); | |
| 4379 field_name = fz_malloc(ctx, n); | |
| 4380 if (*name_prefix) | |
| 4381 { | |
| 4382 strcpy(field_name, name_prefix); | |
| 4383 strcat(field_name, "."); | |
| 4384 } | |
| 4385 else | |
| 4386 *field_name = 0; | |
| 4387 strcat(field_name, name); | |
| 4388 name_prefix = field_name; | |
| 4389 } | |
| 4390 | |
| 4391 doc->xref_base = o_xref_base; | |
| 4392 | |
| 4393 if (!pdf_is_dict(ctx, old_obj)) | |
| 4394 break; | |
| 4395 | |
| 4396 /* Check V explicitly, allowing for it being inherited. */ | |
| 4397 n_v = pdf_dict_get(ctx, new_obj, PDF_NAME(V)); | |
| 4398 if (n_v == NULL) | |
| 4399 n_v = new_v; | |
| 4400 o_v = pdf_dict_get(ctx, old_obj, PDF_NAME(V)); | |
| 4401 if (o_v == NULL) | |
| 4402 o_v = old_v; | |
| 4403 | |
| 4404 is_locked = pdf_is_field_locked(ctx, locked, name_prefix); | |
| 4405 if (pdf_name_eq(ctx, pdf_dict_get(ctx, new_obj, PDF_NAME(Type)), PDF_NAME(Annot)) && | |
| 4406 pdf_name_eq(ctx, pdf_dict_get(ctx, new_obj, PDF_NAME(Subtype)), PDF_NAME(Widget))) | |
| 4407 { | |
| 4408 if (is_locked) | |
| 4409 { | |
| 4410 /* If locked, V must not change! */ | |
| 4411 if (check_unchanged_between(ctx, doc, changes, n_v, o_v)) | |
| 4412 changes->obj_changes[obj_num] |= FIELD_CHANGE_INVALID; | |
| 4413 } | |
| 4414 else | |
| 4415 { | |
| 4416 /* If not locked, V can change to be filled in! */ | |
| 4417 filter_changes_accepted(ctx, changes, n_v, &filter_v); | |
| 4418 changes->obj_changes[obj_num] |= FIELD_CHANGE_VALID; | |
| 4419 } | |
| 4420 } | |
| 4421 | |
| 4422 /* Check all the fields in the new object are | |
| 4423 * either the same as the old object, or are | |
| 4424 * expected changes. */ | |
| 4425 len = pdf_dict_len(ctx, new_obj); | |
| 4426 for (i = 0; i < len; i++) | |
| 4427 { | |
| 4428 pdf_obj *key = pdf_dict_get_key(ctx, new_obj, i); | |
| 4429 pdf_obj *nval = pdf_dict_get(ctx, new_obj, key); | |
| 4430 pdf_obj *oval = pdf_dict_get(ctx, old_obj, key); | |
| 4431 | |
| 4432 /* Kids arrays shouldn't change. */ | |
| 4433 if (pdf_name_eq(ctx, key, PDF_NAME(Kids))) | |
| 4434 { | |
| 4435 int j, m; | |
| 4436 | |
| 4437 /* Kids must be an array. If it's not, count it as a difference. */ | |
| 4438 if (!pdf_is_array(ctx, nval) || !pdf_is_array(ctx, oval)) | |
| 4439 { | |
| 4440 change_found: | |
| 4441 changes->obj_changes[obj_num] |= FIELD_CHANGE_INVALID; | |
| 4442 break; | |
| 4443 } | |
| 4444 m = pdf_array_len(ctx, nval); | |
| 4445 /* Any change in length counts as a difference */ | |
| 4446 if (m != pdf_array_len(ctx, oval)) | |
| 4447 goto change_found; | |
| 4448 for (j = 0; j < m; j++) | |
| 4449 { | |
| 4450 pdf_obj *nkid = pdf_array_get(ctx, nval, j); | |
| 4451 pdf_obj *okid = pdf_array_get(ctx, oval, j); | |
| 4452 /* Kids arrays are supposed to all be indirect. If they aren't, | |
| 4453 * count it as a difference. */ | |
| 4454 if (!pdf_is_indirect(ctx, nkid) || !pdf_is_indirect(ctx, okid)) | |
| 4455 goto change_found; | |
| 4456 /* For now at least, we'll count any change in number as a difference. */ | |
| 4457 if (pdf_to_num(ctx, nkid) != pdf_to_num(ctx, okid)) | |
| 4458 goto change_found; | |
| 4459 check_field(ctx, doc, changes, nkid, locked, name_prefix, n_v, o_v); | |
| 4460 } | |
| 4461 } | |
| 4462 else if (pdf_name_eq(ctx, key, PDF_NAME(V))) | |
| 4463 { | |
| 4464 /* V is checked above */ | |
| 4465 } | |
| 4466 else if (pdf_name_eq(ctx, key, PDF_NAME(AP))) | |
| 4467 { | |
| 4468 /* If we're locked, then nothing can change. If not, | |
| 4469 * we can change to be filled in. */ | |
| 4470 if (is_locked) | |
| 4471 check_unchanged_between(ctx, doc, changes, nval, oval); | |
| 4472 else | |
| 4473 filter_changes_accepted(ctx, changes, nval, &filter_ap); | |
| 4474 } | |
| 4475 /* All other fields can't change */ | |
| 4476 else | |
| 4477 check_unchanged_between(ctx, doc, changes, nval, oval); | |
| 4478 } | |
| 4479 | |
| 4480 /* Now check all the fields in the old object to | |
| 4481 * make sure none were dropped. */ | |
| 4482 len = pdf_dict_len(ctx, old_obj); | |
| 4483 for (i = 0; i < len; i++) | |
| 4484 { | |
| 4485 pdf_obj *key = pdf_dict_get_key(ctx, old_obj, i); | |
| 4486 pdf_obj *nval, *oval; | |
| 4487 | |
| 4488 /* V is checked above */ | |
| 4489 if (pdf_name_eq(ctx, key, PDF_NAME(V))) | |
| 4490 continue; | |
| 4491 | |
| 4492 nval = pdf_dict_get(ctx, new_obj, key); | |
| 4493 oval = pdf_dict_get(ctx, old_obj, key); | |
| 4494 | |
| 4495 if (nval == NULL && oval != NULL) | |
| 4496 changes->obj_changes[pdf_to_num(ctx, nval)] |= FIELD_CHANGE_INVALID; | |
| 4497 } | |
| 4498 changes->obj_changes[obj_num] |= FIELD_CHANGE_VALID; | |
| 4499 | |
| 4500 } | |
| 4501 fz_always(ctx) | |
| 4502 { | |
| 4503 pdf_unmark_obj(ctx, obj); | |
| 4504 fz_free(ctx, field_name); | |
| 4505 doc->xref_base = o_xref_base; | |
| 4506 } | |
| 4507 fz_catch(ctx) | |
| 4508 fz_rethrow(ctx); | |
| 4509 } | |
| 4510 | |
| 4511 static int | |
| 4512 pdf_obj_changed_in_version(fz_context *ctx, pdf_document *doc, int num, int version) | |
| 4513 { | |
| 4514 if (num < 0 || num > doc->max_xref_len) | |
| 4515 fz_throw(ctx, FZ_ERROR_ARGUMENT, "Invalid object number requested"); | |
| 4516 | |
| 4517 return version == doc->xref_index[num]; | |
| 4518 } | |
| 4519 | |
| 4520 static void | |
| 4521 merge_lock_specification(fz_context *ctx, pdf_locked_fields *fields, pdf_obj *lock) | |
| 4522 { | |
| 4523 pdf_obj *action; | |
| 4524 int i, r, w; | |
| 4525 | |
| 4526 if (lock == NULL) | |
| 4527 return; | |
| 4528 | |
| 4529 action = pdf_dict_get(ctx, lock, PDF_NAME(Action)); | |
| 4530 | |
| 4531 if (pdf_name_eq(ctx, action, PDF_NAME(All))) | |
| 4532 { | |
| 4533 /* All fields locked means we don't need any stored | |
| 4534 * includes/excludes. */ | |
| 4535 fields->all = 1; | |
| 4536 free_char_list(ctx, &fields->includes); | |
| 4537 free_char_list(ctx, &fields->excludes); | |
| 4538 } | |
| 4539 else | |
| 4540 { | |
| 4541 pdf_obj *f = pdf_dict_get(ctx, lock, PDF_NAME(Fields)); | |
| 4542 int len = pdf_array_len(ctx, f); | |
| 4543 | |
| 4544 if (pdf_name_eq(ctx, action, PDF_NAME(Include))) | |
| 4545 { | |
| 4546 if (fields->all) | |
| 4547 { | |
| 4548 /* Current state = "All except <excludes> are locked". | |
| 4549 * We need to remove <Fields> from <excludes>. */ | |
| 4550 for (i = 0; i < len; i++) | |
| 4551 { | |
| 4552 const char *s = pdf_array_get_text_string(ctx, f, i); | |
| 4553 | |
| 4554 for (r = w = 0; r < fields->excludes.len; r++) | |
| 4555 { | |
| 4556 if (strcmp(s, fields->excludes.list[r])) | |
| 4557 fields->excludes.list[w++] = fields->excludes.list[r]; | |
| 4558 } | |
| 4559 fields->excludes.len = w; | |
| 4560 } | |
| 4561 } | |
| 4562 else | |
| 4563 { | |
| 4564 /* Current state = <includes> are locked. | |
| 4565 * We need to add <Fields> to <include> (avoiding repetition). */ | |
| 4566 for (i = 0; i < len; i++) | |
| 4567 { | |
| 4568 const char *s = pdf_array_get_text_string(ctx, f, i); | |
| 4569 | |
| 4570 for (r = 0; r < fields->includes.len; r++) | |
| 4571 { | |
| 4572 if (!strcmp(s, fields->includes.list[r])) | |
| 4573 break; | |
| 4574 } | |
| 4575 if (r == fields->includes.len) | |
| 4576 char_list_append(ctx, &fields->includes, s); | |
| 4577 } | |
| 4578 } | |
| 4579 } | |
| 4580 else if (pdf_name_eq(ctx, action, PDF_NAME(Exclude))) | |
| 4581 { | |
| 4582 if (fields->all) | |
| 4583 { | |
| 4584 /* Current state = "All except <excludes> are locked. | |
| 4585 * We need to remove anything from <excludes> that isn't in <Fields>. */ | |
| 4586 for (r = w = 0; r < fields->excludes.len; r++) | |
| 4587 { | |
| 4588 for (i = 0; i < len; i++) | |
| 4589 { | |
| 4590 const char *s = pdf_array_get_text_string(ctx, f, i); | |
| 4591 if (!strcmp(s, fields->excludes.list[r])) | |
| 4592 break; | |
| 4593 } | |
| 4594 if (i != len) /* we found a match */ | |
| 4595 fields->excludes.list[w++] = fields->excludes.list[r]; | |
| 4596 } | |
| 4597 fields->excludes.len = w; | |
| 4598 } | |
| 4599 else | |
| 4600 { | |
| 4601 /* Current state = <includes> are locked. | |
| 4602 * Set all. <excludes> becomes <Fields> less <includes>. Remove <includes>. */ | |
| 4603 fields->all = 1; | |
| 4604 for (i = 0; i < len; i++) | |
| 4605 { | |
| 4606 const char *s = pdf_array_get_text_string(ctx, f, i); | |
| 4607 for (r = 0; r < fields->includes.len; r++) | |
| 4608 { | |
| 4609 if (!strcmp(s, fields->includes.list[r])) | |
| 4610 break; | |
| 4611 } | |
| 4612 if (r == fields->includes.len) | |
| 4613 char_list_append(ctx, &fields->excludes, s); | |
| 4614 } | |
| 4615 free_char_list(ctx, &fields->includes); | |
| 4616 } | |
| 4617 } | |
| 4618 } | |
| 4619 } | |
| 4620 | |
| 4621 static void | |
| 4622 find_locked_fields_value(fz_context *ctx, pdf_locked_fields *fields, pdf_obj *v) | |
| 4623 { | |
| 4624 pdf_obj *ref = pdf_dict_get(ctx, v, PDF_NAME(Reference)); | |
| 4625 int i, n; | |
| 4626 | |
| 4627 if (!ref) | |
| 4628 return; | |
| 4629 | |
| 4630 n = pdf_array_len(ctx, ref); | |
| 4631 for (i = 0; i < n; i++) | |
| 4632 { | |
| 4633 pdf_obj *sr = pdf_array_get(ctx, ref, i); | |
| 4634 pdf_obj *tm, *tp, *type; | |
| 4635 | |
| 4636 /* Type is optional, but if it exists, it'd better be SigRef. */ | |
| 4637 type = pdf_dict_get(ctx, sr, PDF_NAME(Type)); | |
| 4638 if (type != NULL && !pdf_name_eq(ctx, type, PDF_NAME(SigRef))) | |
| 4639 continue; | |
| 4640 tm = pdf_dict_get(ctx, sr, PDF_NAME(TransformMethod)); | |
| 4641 tp = pdf_dict_get(ctx, sr, PDF_NAME(TransformParams)); | |
| 4642 if (pdf_name_eq(ctx, tm, PDF_NAME(DocMDP))) | |
| 4643 { | |
| 4644 int p = pdf_dict_get_int(ctx, tp, PDF_NAME(P)); | |
| 4645 | |
| 4646 if (p == 0) | |
| 4647 p = 2; | |
| 4648 if (fields->p == 0) | |
| 4649 fields->p = p; | |
| 4650 else | |
| 4651 fields->p = fz_mini(fields->p, p); | |
| 4652 } | |
| 4653 else if (pdf_name_eq(ctx, tm, PDF_NAME(FieldMDP))) | |
| 4654 merge_lock_specification(ctx, fields, tp); | |
| 4655 } | |
| 4656 } | |
| 4657 | |
| 4658 static void | |
| 4659 find_locked_fields_aux(fz_context *ctx, pdf_obj *field, pdf_locked_fields *fields, pdf_obj *inherit_v, pdf_obj *inherit_ft) | |
| 4660 { | |
| 4661 int i, n; | |
| 4662 | |
| 4663 if (!pdf_name_eq(ctx, pdf_dict_get(ctx, field, PDF_NAME(Type)), PDF_NAME(Annot))) | |
| 4664 return; | |
| 4665 | |
| 4666 if (pdf_obj_marked(ctx, field)) | |
| 4667 return; | |
| 4668 | |
| 4669 fz_try(ctx) | |
| 4670 { | |
| 4671 pdf_obj *kids, *v, *ft; | |
| 4672 | |
| 4673 (void)pdf_mark_obj(ctx, field); | |
| 4674 | |
| 4675 v = pdf_dict_get(ctx, field, PDF_NAME(V)); | |
| 4676 if (v == NULL) | |
| 4677 v = inherit_v; | |
| 4678 ft = pdf_dict_get(ctx, field, PDF_NAME(FT)); | |
| 4679 if (ft == NULL) | |
| 4680 ft = inherit_ft; | |
| 4681 | |
| 4682 /* We are looking for Widget annotations of type Sig that are | |
| 4683 * signed (i.e. have a 'V' field). */ | |
| 4684 if (pdf_name_eq(ctx, pdf_dict_get(ctx, field, PDF_NAME(Subtype)), PDF_NAME(Widget)) && | |
| 4685 pdf_name_eq(ctx, ft, PDF_NAME(Sig)) && | |
| 4686 pdf_name_eq(ctx, pdf_dict_get(ctx, v, PDF_NAME(Type)), PDF_NAME(Sig))) | |
| 4687 { | |
| 4688 /* Signed Sig Widgets (i.e. ones with a 'V' field) need | |
| 4689 * to have their lock field respected. */ | |
| 4690 merge_lock_specification(ctx, fields, pdf_dict_get(ctx, field, PDF_NAME(Lock))); | |
| 4691 | |
| 4692 /* Look for DocMDP and FieldMDP entries to see what | |
| 4693 * flavours of alterations are allowed. */ | |
| 4694 find_locked_fields_value(ctx, fields, v); | |
| 4695 } | |
| 4696 | |
| 4697 /* Recurse as required */ | |
| 4698 kids = pdf_dict_get(ctx, field, PDF_NAME(Kids)); | |
| 4699 if (kids) | |
| 4700 { | |
| 4701 n = pdf_array_len(ctx, kids); | |
| 4702 for (i = 0; i < n; i++) | |
| 4703 find_locked_fields_aux(ctx, pdf_array_get(ctx, kids, i), fields, v, ft); | |
| 4704 } | |
| 4705 } | |
| 4706 fz_always(ctx) | |
| 4707 pdf_unmark_obj(ctx, field); | |
| 4708 fz_catch(ctx) | |
| 4709 fz_rethrow(ctx); | |
| 4710 } | |
| 4711 | |
| 4712 pdf_locked_fields * | |
| 4713 pdf_find_locked_fields(fz_context *ctx, pdf_document *doc, int version) | |
| 4714 { | |
| 4715 pdf_locked_fields *fields = fz_malloc_struct(ctx, pdf_locked_fields); | |
| 4716 int o_xref_base = doc->xref_base; | |
| 4717 doc->xref_base = version; | |
| 4718 | |
| 4719 fz_var(fields); | |
| 4720 | |
| 4721 fz_try(ctx) | |
| 4722 { | |
| 4723 pdf_obj *fobj = pdf_dict_getp(ctx, pdf_trailer(ctx, doc), "Root/AcroForm/Fields"); | |
| 4724 int i, len = pdf_array_len(ctx, fobj); | |
| 4725 | |
| 4726 if (len == 0) | |
| 4727 break; | |
| 4728 | |
| 4729 for (i = 0; i < len; i++) | |
| 4730 find_locked_fields_aux(ctx, pdf_array_get(ctx, fobj, i), fields, NULL, NULL); | |
| 4731 | |
| 4732 /* Add in any DocMDP referenced directly from the Perms dict. */ | |
| 4733 find_locked_fields_value(ctx, fields, pdf_dict_getp(ctx, pdf_trailer(ctx, doc), "Root/Perms/DocMDP")); | |
| 4734 } | |
| 4735 fz_always(ctx) | |
| 4736 doc->xref_base = o_xref_base; | |
| 4737 fz_catch(ctx) | |
| 4738 { | |
| 4739 pdf_drop_locked_fields(ctx, fields); | |
| 4740 fz_rethrow(ctx); | |
| 4741 } | |
| 4742 | |
| 4743 return fields; | |
| 4744 } | |
| 4745 | |
| 4746 pdf_locked_fields * | |
| 4747 pdf_find_locked_fields_for_sig(fz_context *ctx, pdf_document *doc, pdf_obj *sig) | |
| 4748 { | |
| 4749 pdf_locked_fields *fields = fz_malloc_struct(ctx, pdf_locked_fields); | |
| 4750 | |
| 4751 fz_var(fields); | |
| 4752 | |
| 4753 fz_try(ctx) | |
| 4754 { | |
| 4755 pdf_obj *ref; | |
| 4756 int i, len; | |
| 4757 | |
| 4758 /* Ensure it really is a sig */ | |
| 4759 if (!pdf_name_eq(ctx, pdf_dict_get(ctx, sig, PDF_NAME(Subtype)), PDF_NAME(Widget)) || | |
| 4760 !pdf_name_eq(ctx, pdf_dict_get_inheritable(ctx, sig, PDF_NAME(FT)), PDF_NAME(Sig))) | |
| 4761 break; | |
| 4762 | |
| 4763 /* Check the locking details given in the V (i.e. what the signature value | |
| 4764 * claims to lock). */ | |
| 4765 ref = pdf_dict_getp(ctx, sig, "V/Reference"); | |
| 4766 len = pdf_array_len(ctx, ref); | |
| 4767 for (i = 0; i < len; i++) | |
| 4768 { | |
| 4769 pdf_obj *tp = pdf_dict_get(ctx, pdf_array_get(ctx, ref, i), PDF_NAME(TransformParams)); | |
| 4770 merge_lock_specification(ctx, fields, tp); | |
| 4771 } | |
| 4772 | |
| 4773 /* Also, check the locking details given in the Signature definition. This may | |
| 4774 * not strictly be necessary as it's supposed to be "what the form author told | |
| 4775 * the signature that it should lock". A well-formed signature should lock | |
| 4776 * at least that much (possibly with extra fields locked from the XFA). If the | |
| 4777 * signature doesn't lock as much as it was told to, we should be suspicious | |
| 4778 * of the signing application. It is not clear that this test is actually | |
| 4779 * necessary, or in keeping with what Acrobat does. */ | |
| 4780 merge_lock_specification(ctx, fields, pdf_dict_get(ctx, sig, PDF_NAME(Lock))); | |
| 4781 } | |
| 4782 fz_catch(ctx) | |
| 4783 { | |
| 4784 pdf_drop_locked_fields(ctx, fields); | |
| 4785 fz_rethrow(ctx); | |
| 4786 } | |
| 4787 | |
| 4788 return fields; | |
| 4789 } | |
| 4790 | |
| 4791 static int | |
| 4792 validate_locked_fields(fz_context *ctx, pdf_document *doc, int version, pdf_locked_fields *locked) | |
| 4793 { | |
| 4794 int o_xref_base = doc->xref_base; | |
| 4795 pdf_changes *changes; | |
| 4796 int num_objs; | |
| 4797 int i, n; | |
| 4798 int all_indirects = 1; | |
| 4799 | |
| 4800 num_objs = doc->max_xref_len; | |
| 4801 changes = fz_malloc_flexible(ctx, pdf_changes, obj_changes, num_objs); | |
| 4802 changes->num_obj = num_objs; | |
| 4803 | |
| 4804 fz_try(ctx) | |
| 4805 { | |
| 4806 pdf_obj *acroform, *new_acroform, *old_acroform; | |
| 4807 int len, acroform_num; | |
| 4808 | |
| 4809 doc->xref_base = version; | |
| 4810 | |
| 4811 /* Detect every object that has changed */ | |
| 4812 for (i = 1; i < num_objs; i++) | |
| 4813 { | |
| 4814 if (pdf_obj_changed_in_version(ctx, doc, i, version)) | |
| 4815 changes->obj_changes[i] = FIELD_CHANGED; | |
| 4816 } | |
| 4817 | |
| 4818 /* FIXME: Compare PageTrees and NumberTrees (just to allow for them being regenerated | |
| 4819 * and having produced stuff that represents the same stuff). */ | |
| 4820 | |
| 4821 /* The metadata of a document may be regenerated. Allow for that. */ | |
| 4822 filter_changes_accepted(ctx, changes, pdf_dict_getp(ctx, pdf_trailer(ctx, doc), "Root/Metadata"), &filter_simple); | |
| 4823 | |
| 4824 /* The ModDate of document info may be regenerated. Allow for that. */ | |
| 4825 /* FIXME: We accept all changes in document info, when maybe we ought to just | |
| 4826 * accept ModDate? */ | |
| 4827 filter_changes_accepted(ctx, changes, pdf_dict_getp(ctx, pdf_trailer(ctx, doc), "Info"), &filter_simple); | |
| 4828 | |
| 4829 /* The Encryption dict may be rewritten for the new Xref. */ | |
| 4830 filter_changes_accepted(ctx, changes, pdf_dict_getp(ctx, pdf_trailer(ctx, doc), "Encrypt"), &filter_simple); | |
| 4831 | |
| 4832 /* We have to accept certain changes in the top level AcroForms dict, | |
| 4833 * so get the 2 versions... */ | |
| 4834 acroform = pdf_dict_getp(ctx, pdf_trailer(ctx, doc), "Root/AcroForm"); | |
| 4835 acroform_num = pdf_to_num(ctx, acroform); | |
| 4836 new_acroform = pdf_resolve_indirect_chain(ctx, acroform); | |
| 4837 doc->xref_base = version+1; | |
| 4838 old_acroform = pdf_resolve_indirect_chain(ctx, pdf_dict_getp(ctx, pdf_trailer(ctx, doc), "Root/AcroForm")); | |
| 4839 doc->xref_base = version; | |
| 4840 n = pdf_dict_len(ctx, new_acroform); | |
| 4841 for (i = 0; i < n; i++) | |
| 4842 { | |
| 4843 pdf_obj *key = pdf_dict_get_key(ctx, new_acroform, i); | |
| 4844 pdf_obj *nval = pdf_dict_get(ctx, new_acroform, key); | |
| 4845 pdf_obj *oval = pdf_dict_get(ctx, old_acroform, key); | |
| 4846 | |
| 4847 if (pdf_name_eq(ctx, key, PDF_NAME(Fields))) | |
| 4848 { | |
| 4849 int j; | |
| 4850 | |
| 4851 len = pdf_array_len(ctx, nval); | |
| 4852 for (j = 0; j < len; j++) | |
| 4853 { | |
| 4854 pdf_obj *field = pdf_array_get(ctx, nval, j); | |
| 4855 if (!pdf_is_indirect(ctx, field)) | |
| 4856 all_indirects = 0; | |
| 4857 check_field(ctx, doc, changes, field, locked, "", NULL, NULL); | |
| 4858 } | |
| 4859 } | |
| 4860 else if (pdf_name_eq(ctx, key, PDF_NAME(SigFlags))) | |
| 4861 { | |
| 4862 /* Accept this */ | |
| 4863 changes->obj_changes[acroform_num] |= FIELD_CHANGE_VALID; | |
| 4864 } | |
| 4865 else if (pdf_name_eq(ctx, key, PDF_NAME(DR))) | |
| 4866 { | |
| 4867 /* Accept any changes from within the Document Resources */ | |
| 4868 filter_changes_accepted(ctx, changes, nval, &filter_resources); | |
| 4869 } | |
| 4870 else if (pdf_name_eq(ctx, key, PDF_NAME(XFA))) | |
| 4871 { | |
| 4872 /* Allow any changes within the XFA streams. */ | |
| 4873 filter_changes_accepted(ctx, changes, nval, &filter_xfa); | |
| 4874 } | |
| 4875 else if (pdf_objcmp(ctx, nval, oval)) | |
| 4876 { | |
| 4877 changes->obj_changes[acroform_num] |= FIELD_CHANGE_INVALID; | |
| 4878 } | |
| 4879 } | |
| 4880 | |
| 4881 /* Allow for any object streams/XRefs to be changed. */ | |
| 4882 doc->xref_base = version+1; | |
| 4883 for (i = 1; i < num_objs; i++) | |
| 4884 { | |
| 4885 pdf_obj *oobj, *otype; | |
| 4886 if (changes->obj_changes[i] != FIELD_CHANGED) | |
| 4887 continue; | |
| 4888 if (!pdf_obj_exists(ctx, doc, i)) | |
| 4889 { | |
| 4890 /* Not present this version - must be newly created, can't be a change. */ | |
| 4891 changes->obj_changes[i] |= FIELD_CHANGE_VALID; | |
| 4892 continue; | |
| 4893 } | |
| 4894 oobj = pdf_load_object(ctx, doc, i); | |
| 4895 otype = pdf_dict_get(ctx, oobj, PDF_NAME(Type)); | |
| 4896 if (pdf_name_eq(ctx, otype, PDF_NAME(ObjStm)) || | |
| 4897 pdf_name_eq(ctx, otype, PDF_NAME(XRef))) | |
| 4898 { | |
| 4899 changes->obj_changes[i] |= FIELD_CHANGE_VALID; | |
| 4900 } | |
| 4901 pdf_drop_obj(ctx, oobj); | |
| 4902 } | |
| 4903 } | |
| 4904 fz_always(ctx) | |
| 4905 doc->xref_base = o_xref_base; | |
| 4906 fz_catch(ctx) | |
| 4907 { | |
| 4908 fz_free(ctx, changes); | |
| 4909 fz_rethrow(ctx); | |
| 4910 } | |
| 4911 | |
| 4912 for (i = 1; i < num_objs; i++) | |
| 4913 { | |
| 4914 if (changes->obj_changes[i] == FIELD_CHANGED) | |
| 4915 /* Change with no reason */ | |
| 4916 break; | |
| 4917 if (changes->obj_changes[i] & FIELD_CHANGE_INVALID) | |
| 4918 /* Illegal Change */ | |
| 4919 break; | |
| 4920 } | |
| 4921 | |
| 4922 fz_free(ctx, changes); | |
| 4923 | |
| 4924 return (i == num_objs) && all_indirects; | |
| 4925 } | |
| 4926 | |
| 4927 int | |
| 4928 pdf_validate_changes(fz_context *ctx, pdf_document *doc, int version) | |
| 4929 { | |
| 4930 int unsaved_versions = pdf_count_unsaved_versions(ctx, doc); | |
| 4931 int n = pdf_count_versions(ctx, doc); | |
| 4932 pdf_locked_fields *locked = NULL; | |
| 4933 int result; | |
| 4934 | |
| 4935 if (version < 0 || version >= n) | |
| 4936 fz_throw(ctx, FZ_ERROR_ARGUMENT, "There aren't that many changes to find in this document!"); | |
| 4937 | |
| 4938 /* We are wanting to compare version+1 with version to make sure | |
| 4939 * that the only changes made in going to version are conformant | |
| 4940 * with what was allowed in version+1. The production of version | |
| 4941 * might have involved signing a signature field and locking down | |
| 4942 * more fields - this means that taking the list of locked things | |
| 4943 * from version rather than version+1 will give us bad results! */ | |
| 4944 locked = pdf_find_locked_fields(ctx, doc, unsaved_versions+version+1); | |
| 4945 | |
| 4946 fz_try(ctx) | |
| 4947 { | |
| 4948 if (!locked->all && locked->includes.len == 0 && locked->p == 0) | |
| 4949 { | |
| 4950 /* If nothing is locked at all, then all changes are permissible. */ | |
| 4951 result = 1; | |
| 4952 } | |
| 4953 else | |
| 4954 result = validate_locked_fields(ctx, doc, unsaved_versions+version, locked); | |
| 4955 } | |
| 4956 fz_always(ctx) | |
| 4957 pdf_drop_locked_fields(ctx, locked); | |
| 4958 fz_catch(ctx) | |
| 4959 fz_rethrow(ctx); | |
| 4960 | |
| 4961 return result; | |
| 4962 } | |
| 4963 | |
| 4964 int | |
| 4965 pdf_validate_change_history(fz_context *ctx, pdf_document *doc) | |
| 4966 { | |
| 4967 int num_versions = pdf_count_versions(ctx, doc); | |
| 4968 int v; | |
| 4969 | |
| 4970 if (num_versions < 2) | |
| 4971 return 0; /* Unless there are at least 2 versions, there have been no updates. */ | |
| 4972 | |
| 4973 for(v = num_versions - 2; v >= 0; v--) | |
| 4974 { | |
| 4975 if (!pdf_validate_changes(ctx, doc, v)) | |
| 4976 return v+1; | |
| 4977 } | |
| 4978 return 0; | |
| 4979 } | |
| 4980 | |
| 4981 /* Return the version that obj appears in, or -1 for not found. */ | |
| 4982 static int | |
| 4983 pdf_find_incremental_update_num_for_obj(fz_context *ctx, pdf_document *doc, pdf_obj *obj) | |
| 4984 { | |
| 4985 pdf_xref *xref = NULL; | |
| 4986 pdf_xref_subsec *sub; | |
| 4987 int i, j; | |
| 4988 | |
| 4989 if (obj == NULL) | |
| 4990 return -1; | |
| 4991 | |
| 4992 /* obj needs to be indirect for us to get a num out of it. */ | |
| 4993 i = pdf_to_num(ctx, obj); | |
| 4994 if (i <= 0) | |
| 4995 return -1; | |
| 4996 | |
| 4997 /* obj can't be indirect below, so resolve it here. */ | |
| 4998 obj = pdf_resolve_indirect_chain(ctx, obj); | |
| 4999 | |
| 5000 /* Find the first xref section where the entry is defined. */ | |
| 5001 for (j = 0; j < doc->num_xref_sections; j++) | |
| 5002 { | |
| 5003 xref = &doc->xref_sections[j]; | |
| 5004 | |
| 5005 if (i < xref->num_objects) | |
| 5006 { | |
| 5007 for (sub = xref->subsec; sub != NULL; sub = sub->next) | |
| 5008 { | |
| 5009 pdf_xref_entry *entry; | |
| 5010 | |
| 5011 if (i < sub->start || i >= sub->start + sub->len) | |
| 5012 continue; | |
| 5013 | |
| 5014 entry = &sub->table[i - sub->start]; | |
| 5015 if (entry->obj == obj) | |
| 5016 return j; | |
| 5017 } | |
| 5018 } | |
| 5019 } | |
| 5020 return -1; | |
| 5021 } | |
| 5022 | |
| 5023 int pdf_find_version_for_obj(fz_context *ctx, pdf_document *doc, pdf_obj *obj) | |
| 5024 { | |
| 5025 int v = pdf_find_incremental_update_num_for_obj(ctx, doc, obj); | |
| 5026 int n; | |
| 5027 | |
| 5028 if (v == -1) | |
| 5029 return -1; | |
| 5030 | |
| 5031 n = pdf_count_versions(ctx, doc) + pdf_count_unsaved_versions(ctx, doc); | |
| 5032 if (v > n) | |
| 5033 return n; | |
| 5034 | |
| 5035 return v; | |
| 5036 } | |
| 5037 | |
| 5038 int pdf_validate_signature(fz_context *ctx, pdf_annot *widget) | |
| 5039 { | |
| 5040 pdf_document *doc; | |
| 5041 int unsaved_versions, num_versions, version, i; | |
| 5042 pdf_locked_fields *locked = NULL; | |
| 5043 int o_xref_base; | |
| 5044 | |
| 5045 if (!widget->page) | |
| 5046 fz_throw(ctx, FZ_ERROR_ARGUMENT, "annotation not bound to any page"); | |
| 5047 | |
| 5048 doc = widget->page->doc; | |
| 5049 unsaved_versions = pdf_count_unsaved_versions(ctx, doc); | |
| 5050 num_versions = pdf_count_versions(ctx, doc) + unsaved_versions; | |
| 5051 version = pdf_find_version_for_obj(ctx, doc, widget->obj); | |
| 5052 | |
| 5053 if (version > num_versions-1) | |
| 5054 version = num_versions-1; | |
| 5055 | |
| 5056 /* Get the locked definition from the object when it was signed. */ | |
| 5057 o_xref_base = doc->xref_base; | |
| 5058 doc->xref_base = version; | |
| 5059 | |
| 5060 fz_var(locked); /* Not really needed, but it stops warnings */ | |
| 5061 | |
| 5062 fz_try(ctx) | |
| 5063 { | |
| 5064 locked = pdf_find_locked_fields_for_sig(ctx, doc, widget->obj); | |
| 5065 for (i = version-1; i >= unsaved_versions; i--) | |
| 5066 { | |
| 5067 doc->xref_base = i; | |
| 5068 if (!validate_locked_fields(ctx, doc, i, locked)) | |
| 5069 break; | |
| 5070 } | |
| 5071 } | |
| 5072 fz_always(ctx) | |
| 5073 { | |
| 5074 doc->xref_base = o_xref_base; | |
| 5075 pdf_drop_locked_fields(ctx, locked); | |
| 5076 } | |
| 5077 fz_catch(ctx) | |
| 5078 fz_rethrow(ctx); | |
| 5079 | |
| 5080 return i+1-unsaved_versions; | |
| 5081 } | |
| 5082 | |
| 5083 int pdf_was_pure_xfa(fz_context *ctx, pdf_document *doc) | |
| 5084 { | |
| 5085 int num_unsaved_versions = pdf_count_unsaved_versions(ctx, doc); | |
| 5086 int num_versions = pdf_count_versions(ctx, doc); | |
| 5087 int v; | |
| 5088 int o_xref_base = doc->xref_base; | |
| 5089 int pure_xfa = 0; | |
| 5090 | |
| 5091 fz_var(pure_xfa); | |
| 5092 | |
| 5093 fz_try(ctx) | |
| 5094 { | |
| 5095 for(v = num_versions + num_unsaved_versions; !pure_xfa && v >= num_unsaved_versions; v--) | |
| 5096 { | |
| 5097 pdf_obj *o; | |
| 5098 doc->xref_base = v; | |
| 5099 o = pdf_dict_getp(ctx, pdf_trailer(ctx, doc), "Root/AcroForm"); | |
| 5100 /* If we find a version that had an empty Root/AcroForm/Fields, but had a | |
| 5101 * Root/AcroForm/XFA entry, then we deduce that this was at one time a | |
| 5102 * pure XFA form. */ | |
| 5103 if (pdf_array_len(ctx, pdf_dict_get(ctx, o, PDF_NAME(Fields))) == 0 && | |
| 5104 pdf_dict_get(ctx, o, PDF_NAME(XFA)) != NULL) | |
| 5105 pure_xfa = 1; | |
| 5106 } | |
| 5107 } | |
| 5108 fz_always(ctx) | |
| 5109 doc->xref_base = o_xref_base; | |
| 5110 fz_catch(ctx) | |
| 5111 fz_rethrow(ctx); | |
| 5112 | |
| 5113 return pure_xfa; | |
| 5114 } | |
| 5115 | |
| 5116 pdf_xref *pdf_new_local_xref(fz_context *ctx, pdf_document *doc) | |
| 5117 { | |
| 5118 int n = pdf_xref_len(ctx, doc); | |
| 5119 pdf_xref *xref = fz_malloc_struct(ctx, pdf_xref); | |
| 5120 | |
| 5121 xref->subsec = NULL; | |
| 5122 xref->num_objects = n; | |
| 5123 xref->trailer = NULL; | |
| 5124 xref->pre_repair_trailer = NULL; | |
| 5125 xref->unsaved_sigs = NULL; | |
| 5126 xref->unsaved_sigs_end = NULL; | |
| 5127 | |
| 5128 fz_try(ctx) | |
| 5129 { | |
| 5130 xref->subsec = fz_malloc_struct(ctx, pdf_xref_subsec); | |
| 5131 xref->subsec->len = n; | |
| 5132 xref->subsec->start = 0; | |
| 5133 xref->subsec->table = fz_malloc_struct_array(ctx, n, pdf_xref_entry); | |
| 5134 xref->subsec->next = NULL; | |
| 5135 } | |
| 5136 fz_catch(ctx) | |
| 5137 { | |
| 5138 fz_free(ctx, xref->subsec); | |
| 5139 fz_free(ctx, xref); | |
| 5140 fz_rethrow(ctx); | |
| 5141 } | |
| 5142 | |
| 5143 return xref; | |
| 5144 } | |
| 5145 | |
| 5146 void pdf_drop_local_xref(fz_context *ctx, pdf_xref *xref) | |
| 5147 { | |
| 5148 if (xref == NULL) | |
| 5149 return; | |
| 5150 | |
| 5151 pdf_drop_xref_subsec(ctx, xref); | |
| 5152 | |
| 5153 fz_free(ctx, xref); | |
| 5154 } | |
| 5155 | |
| 5156 void pdf_drop_local_xref_and_resources(fz_context *ctx, pdf_document *doc) | |
| 5157 { | |
| 5158 pdf_purge_local_resources(ctx, doc); | |
| 5159 pdf_purge_locals_from_store(ctx, doc); | |
| 5160 pdf_drop_local_xref(ctx, doc->local_xref); | |
| 5161 doc->local_xref = NULL; | |
| 5162 doc->resynth_required = 1; | |
| 5163 } | |
| 5164 | |
| 5165 void | |
| 5166 pdf_debug_doc_changes(fz_context *ctx, pdf_document *doc) | |
| 5167 { | |
| 5168 int i, j; | |
| 5169 | |
| 5170 if (doc->num_incremental_sections == 0) | |
| 5171 fz_write_printf(ctx, fz_stddbg(ctx), "No incremental xrefs"); | |
| 5172 else | |
| 5173 { | |
| 5174 for (i = 0; i < doc->num_incremental_sections; i++) | |
| 5175 { | |
| 5176 pdf_xref *xref = &doc->xref_sections[i]; | |
| 5177 pdf_xref_subsec *sub; | |
| 5178 | |
| 5179 fz_write_printf(ctx, fz_stddbg(ctx), "Incremental xref:\n"); | |
| 5180 for (sub = xref->subsec; sub != NULL; sub = sub->next) | |
| 5181 { | |
| 5182 fz_write_printf(ctx, fz_stddbg(ctx), " Objects %d->%d\n", sub->start, sub->start + sub->len - 1); | |
| 5183 for (j = 0; j < sub->len; j++) | |
| 5184 { | |
| 5185 pdf_xref_entry *e = &sub->table[j]; | |
| 5186 if (e->type == 0) | |
| 5187 continue; | |
| 5188 fz_write_printf(ctx, fz_stddbg(ctx), "%d %d obj (%c)\n", j + sub->start, e->gen, e->type); | |
| 5189 pdf_debug_obj(ctx, e->obj); | |
| 5190 fz_write_printf(ctx, fz_stddbg(ctx), "\nendobj\n"); | |
| 5191 } | |
| 5192 } | |
| 5193 } | |
| 5194 } | |
| 5195 | |
| 5196 if (doc->local_xref == NULL) | |
| 5197 fz_write_printf(ctx, fz_stddbg(ctx), "No local xref"); | |
| 5198 else | |
| 5199 { | |
| 5200 for (i = 0; i < doc->num_incremental_sections; i++) | |
| 5201 { | |
| 5202 pdf_xref *xref = doc->local_xref; | |
| 5203 pdf_xref_subsec *sub; | |
| 5204 | |
| 5205 fz_write_printf(ctx, fz_stddbg(ctx), "Local xref (%sin force):\n", doc->local_xref_nesting == 0 ? "not " : ""); | |
| 5206 for (sub = xref->subsec; sub != NULL; sub = sub->next) | |
| 5207 { | |
| 5208 fz_write_printf(ctx, fz_stddbg(ctx), " Objects %d->%d\n", sub->start, sub->start + sub->len - 1); | |
| 5209 for (j = 0; j < sub->len; j++) | |
| 5210 { | |
| 5211 pdf_xref_entry *e = &sub->table[j]; | |
| 5212 if (e->type == 0) | |
| 5213 continue; | |
| 5214 fz_write_printf(ctx, fz_stddbg(ctx), "%d %d obj (%c)\n", j + sub->start, e->gen, e->type); | |
| 5215 pdf_debug_obj(ctx, e->obj); | |
| 5216 fz_write_printf(ctx, fz_stddbg(ctx), "\nendobj\n"); | |
| 5217 } | |
| 5218 } | |
| 5219 } | |
| 5220 } | |
| 5221 | |
| 5222 } | |
| 5223 | |
| 5224 pdf_obj * | |
| 5225 pdf_metadata(fz_context *ctx, pdf_document *doc) | |
| 5226 { | |
| 5227 int initial = doc->xref_base; | |
| 5228 pdf_obj *obj = NULL; | |
| 5229 | |
| 5230 fz_var(obj); | |
| 5231 | |
| 5232 fz_try(ctx) | |
| 5233 { | |
| 5234 do | |
| 5235 { | |
| 5236 pdf_obj *root = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root)); | |
| 5237 obj = pdf_dict_get(ctx, root, PDF_NAME(Metadata)); | |
| 5238 if (obj) | |
| 5239 break; | |
| 5240 doc->xref_base++; | |
| 5241 } | |
| 5242 while (doc->xref_base < doc->num_xref_sections); | |
| 5243 } | |
| 5244 fz_always(ctx) | |
| 5245 doc->xref_base = initial; | |
| 5246 fz_catch(ctx) | |
| 5247 fz_rethrow(ctx); | |
| 5248 | |
| 5249 return obj; | |
| 5250 } | |
| 5251 | |
| 5252 int pdf_obj_is_incremental(fz_context *ctx, pdf_obj *obj) | |
| 5253 { | |
| 5254 pdf_document *doc = pdf_get_bound_document(ctx, obj); | |
| 5255 int v; | |
| 5256 | |
| 5257 if (doc == NULL || doc->num_incremental_sections == 0) | |
| 5258 return 0; | |
| 5259 | |
| 5260 v = pdf_find_incremental_update_num_for_obj(ctx, doc, obj); | |
| 5261 | |
| 5262 return (v == 0); | |
| 5263 } | |
| 5264 | |
| 5265 void pdf_minimize_document(fz_context *ctx, pdf_document *doc) | |
| 5266 { | |
| 5267 int i; | |
| 5268 | |
| 5269 /* Don't throw anything away if we've done a repair! */ | |
| 5270 if (doc == NULL || doc->repair_attempted) | |
| 5271 return; | |
| 5272 | |
| 5273 /* Don't throw anything away in the incremental section, as that's where | |
| 5274 * all our changes will be. */ | |
| 5275 for (i = doc->num_incremental_sections; i < doc->num_xref_sections; i++) | |
| 5276 { | |
| 5277 pdf_xref *xref = &doc->xref_sections[i]; | |
| 5278 pdf_xref_subsec *sub; | |
| 5279 | |
| 5280 for (sub = xref->subsec; sub; sub = sub->next) | |
| 5281 { | |
| 5282 int len = sub->len; | |
| 5283 int j; | |
| 5284 for (j = 0; j < len; j++) | |
| 5285 { | |
| 5286 pdf_xref_entry *e = &sub->table[j]; | |
| 5287 if (e->obj == NULL) | |
| 5288 continue; | |
| 5289 e->obj = pdf_drop_singleton_obj(ctx, e->obj); | |
| 5290 } | |
| 5291 } | |
| 5292 } | |
| 5293 } | |
| 5294 | |
| 5295 void pdf_repair_xref(fz_context *ctx, pdf_document *doc) | |
| 5296 { | |
| 5297 pdf_repair_xref_aux(ctx, doc, pdf_prime_xref_index); | |
| 5298 } |
