comparison mupdf-source/source/pdf/pdf-write.c @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 // Copyright (C) 2004-2025 Artifex Software, Inc.
2 //
3 // This file is part of MuPDF.
4 //
5 // MuPDF is free software: you can redistribute it and/or modify it under the
6 // terms of the GNU Affero General Public License as published by the Free
7 // Software Foundation, either version 3 of the License, or (at your option)
8 // any later version.
9 //
10 // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
11 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
13 // details.
14 //
15 // You should have received a copy of the GNU Affero General Public License
16 // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
17 //
18 // Alternative licensing terms are available from the licensor.
19 // For commercial licensing, see <https://www.artifex.com/> or contact
20 // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
21 // CA 94129, USA, for further information.
22
23 #include "mupdf/fitz.h"
24 #include "pdf-annot-imp.h"
25
26 #include <zlib.h>
27
28 #include <assert.h>
29 #include <limits.h>
30 #include <string.h>
31
32 #include <stdio.h> /* for debug printing */
33 /* #define DEBUG_HEAP_SORT */
34 /* #define DEBUG_WRITING */
35 /* #define DEBUG_MARK_AND_SWEEP */
36
37 #define SIG_EXTRAS_SIZE (1024)
38
39 #define SLASH_BYTE_RANGE ("/ByteRange")
40 #define SLASH_CONTENTS ("/Contents")
41 #define SLASH_FILTER ("/Filter")
42
43 typedef struct
44 {
45 fz_output *out;
46
47 int do_incremental;
48 int do_tight;
49 int do_ascii;
50 int do_expand;
51 int do_compress;
52 int do_compress_images;
53 int do_compress_fonts;
54 int do_garbage;
55 int do_clean;
56 int do_encrypt;
57 int dont_regenerate_id;
58 int do_snapshot;
59 int do_preserve_metadata;
60 int do_use_objstms;
61 int compression_effort;
62
63 int list_len;
64 int *use_list;
65 int64_t *ofs_list;
66 int *gen_list;
67 int *renumber_map;
68
69 pdf_object_labels *labels;
70 int num_labels;
71 char *obj_labels[100];
72
73 int bias; /* when saving incrementally to a file with garbage before the version marker */
74
75 int crypt_object_number;
76 char opwd_utf8[128];
77 char upwd_utf8[128];
78 int permissions;
79 pdf_crypt *crypt;
80 pdf_obj *crypt_obj;
81 pdf_obj *metadata;
82 } pdf_write_state;
83
84 static void
85 expand_lists(fz_context *ctx, pdf_write_state *opts, int num)
86 {
87 int i;
88
89 /* objects are numbered 0..num and maybe two additional objects for linearization */
90 num += 3;
91 if (num <= opts->list_len)
92 return;
93
94 opts->use_list = fz_realloc_array(ctx, opts->use_list, num, int);
95 opts->ofs_list = fz_realloc_array(ctx, opts->ofs_list, num, int64_t);
96 opts->gen_list = fz_realloc_array(ctx, opts->gen_list, num, int);
97 opts->renumber_map = fz_realloc_array(ctx, opts->renumber_map, num, int);
98
99 for (i = opts->list_len; i < num; i++)
100 {
101 opts->use_list[i] = 0;
102 opts->ofs_list[i] = 0;
103 opts->gen_list[i] = 0;
104 opts->renumber_map[i] = i;
105 }
106 opts->list_len = num;
107 }
108
109 /*
110 * Garbage collect objects not reachable from the trailer.
111 */
112
113 static void bake_stream_length(fz_context *ctx, pdf_document *doc, int num)
114 {
115 if (pdf_obj_num_is_stream(ctx, doc, num))
116 {
117 pdf_obj *len;
118 pdf_obj *obj = NULL;
119 fz_var(obj);
120 fz_try(ctx)
121 {
122 obj = pdf_load_object(ctx, doc, num);
123 len = pdf_dict_get(ctx, obj, PDF_NAME(Length));
124 if (pdf_is_indirect(ctx, len))
125 pdf_dict_put_int(ctx, obj, PDF_NAME(Length), pdf_to_int(ctx, len));
126 }
127 fz_always(ctx)
128 pdf_drop_obj(ctx, obj);
129 fz_catch(ctx)
130 fz_rethrow(ctx);
131 }
132 }
133
134 /* Mark a reference. If it's been marked already, return NULL (as no further
135 * processing is required). If it's not, return the resolved object so
136 * that we can continue our recursive marking. If it's a duff reference
137 * return the fact so that we can remove the reference at source.
138 */
139 static pdf_obj *markref(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, pdf_obj *obj, int *duff)
140 {
141 int num = pdf_to_num(ctx, obj);
142 int xref_len = pdf_xref_len(ctx, doc);
143
144 if (num <= 0 || num >= xref_len)
145 {
146 *duff = 1;
147 return NULL;
148 }
149 expand_lists(ctx, opts, xref_len);
150 *duff = 0;
151 if (opts->use_list[num])
152 return NULL;
153
154 opts->use_list[num] = 1;
155
156 obj = pdf_resolve_indirect(ctx, obj);
157 if (obj == NULL || pdf_is_null(ctx, obj))
158 {
159 *duff = 1;
160 opts->use_list[num] = 0;
161 }
162
163 return obj;
164 }
165
166 #ifdef DEBUG_MARK_AND_SWEEP
167 static int depth = 0;
168
169 static
170 void indent()
171 {
172 while (depth > 0)
173 {
174 int d = depth;
175 if (d > 16)
176 d = 16;
177 printf("%s", &" "[16-d]);
178 depth -= d;
179 }
180 }
181 #define DEBUGGING_MARKING(A) do { A; } while (0)
182 #else
183 #define DEBUGGING_MARKING(A) do { } while (0)
184 #endif
185
186 /* Recursively mark an object. If any references found are duff, then
187 * replace them with nulls. */
188 static int markobj(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, pdf_obj *obj)
189 {
190 int i;
191
192 DEBUGGING_MARKING(depth++);
193
194 while (pdf_is_indirect(ctx, obj))
195 {
196 int duff;
197 DEBUGGING_MARKING(indent(); printf("Marking object %d\n", pdf_to_num(ctx, obj)));
198 obj = markref(ctx, doc, opts, obj, &duff);
199 if (duff)
200 {
201 DEBUGGING_MARKING(depth--);
202 return 1;
203 }
204 }
205
206 if (pdf_is_dict(ctx, obj))
207 {
208 int n = pdf_dict_len(ctx, obj);
209 for (i = 0; i < n; i++)
210 {
211 DEBUGGING_MARKING(indent(); printf("DICT[%d/%d] = %s\n", i, n, pdf_to_name(ctx, pdf_dict_get_key(ctx, obj, i))));
212 if (markobj(ctx, doc, opts, pdf_dict_get_val(ctx, obj, i)))
213 pdf_dict_put_val_null(ctx, obj, i);
214 }
215 }
216
217 else if (pdf_is_array(ctx, obj))
218 {
219 int n = pdf_array_len(ctx, obj);
220 for (i = 0; i < n; i++)
221 {
222 DEBUGGING_MARKING(indent(); printf("ARRAY[%d/%d]\n", i, n));
223 if (markobj(ctx, doc, opts, pdf_array_get(ctx, obj, i)))
224 pdf_array_put(ctx, obj, i, PDF_NULL);
225 }
226 }
227
228 DEBUGGING_MARKING(depth--);
229
230 return 0;
231 }
232
233 /*
234 * Scan for and remove duplicate objects (slow)
235 */
236
237 static int removeduplicateobjs(fz_context *ctx, pdf_document *doc, pdf_write_state *opts)
238 {
239 int num, other;
240 int xref_len = pdf_xref_len(ctx, doc);
241 int changed = 0;
242
243 expand_lists(ctx, opts, xref_len);
244 for (num = 1; num < xref_len; num++)
245 {
246 /* Only compare an object to objects preceding it */
247 for (other = 1; other < num; other++)
248 {
249 pdf_obj *a, *b;
250 int newnum;
251
252 if (num == other || num >= opts->list_len || !opts->use_list[num] || !opts->use_list[other])
253 continue;
254
255 /* TODO: resolve indirect references to see if we can omit them */
256
257 a = pdf_get_xref_entry_no_null(ctx, doc, num)->obj;
258 b = pdf_get_xref_entry_no_null(ctx, doc, other)->obj;
259 if (opts->do_garbage >= 4)
260 {
261 if (pdf_objcmp_deep(ctx, a, b))
262 continue;
263 }
264 else
265 {
266 if (pdf_objcmp(ctx, a, b))
267 continue;
268 }
269
270 /* Never common up pages! */
271 if (pdf_name_eq(ctx, pdf_dict_get(ctx, a, PDF_NAME(Type)), PDF_NAME(Page)))
272 continue;
273
274 /* Keep the lowest numbered object */
275 newnum = fz_mini(num, other);
276 opts->renumber_map[num] = newnum;
277 opts->renumber_map[other] = newnum;
278 opts->use_list[fz_maxi(num, other)] = 0;
279
280 /* One duplicate was found, do not look for another */
281 changed = 1;
282 break;
283 }
284 }
285
286 return changed;
287 }
288
289 /*
290 * Renumber objects sequentially so the xref is more compact
291 *
292 * This code assumes that any opts->renumber_map[n] <= n for all n.
293 */
294
295 static void compactxref(fz_context *ctx, pdf_document *doc, pdf_write_state *opts)
296 {
297 int num, newnum;
298 int xref_len = pdf_xref_len(ctx, doc);
299
300 /*
301 * Update renumber_map in-place, clustering all used
302 * objects together at low object ids. Objects that
303 * already should be renumbered will have their new
304 * object ids be updated to reflect the compaction.
305 */
306
307 if (xref_len > opts->list_len)
308 expand_lists(ctx, opts, xref_len-1);
309
310 newnum = 1;
311 for (num = 1; num < xref_len; num++)
312 {
313 /* If it's not used, map it to zero */
314 if (!opts->use_list[opts->renumber_map[num]])
315 {
316 opts->renumber_map[num] = 0;
317 }
318 /* If it's not moved, compact it. */
319 else if (opts->renumber_map[num] == num)
320 {
321 opts->renumber_map[num] = newnum++;
322 }
323 /* Otherwise it's used, and moved. We know that it must have
324 * moved down, so the place it's moved to will be in the right
325 * place already. */
326 else
327 {
328 opts->renumber_map[num] = opts->renumber_map[opts->renumber_map[num]];
329 }
330 }
331 }
332
333 /*
334 * Update indirect objects according to renumbering established when
335 * removing duplicate objects and compacting the xref.
336 */
337
338 static void renumberobj(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, pdf_obj *obj)
339 {
340 int i;
341 int xref_len = pdf_xref_len(ctx, doc);
342
343 if (pdf_is_dict(ctx, obj))
344 {
345 int n = pdf_dict_len(ctx, obj);
346 for (i = 0; i < n; i++)
347 {
348 pdf_obj *key = pdf_dict_get_key(ctx, obj, i);
349 pdf_obj *val = pdf_dict_get_val(ctx, obj, i);
350 if (pdf_is_indirect(ctx, val))
351 {
352 int o = pdf_to_num(ctx, val);
353 if (o >= xref_len || o <= 0 || opts->renumber_map[o] == 0)
354 val = PDF_NULL;
355 else
356 val = pdf_new_indirect(ctx, doc, opts->renumber_map[o], 0);
357 pdf_dict_put_drop(ctx, obj, key, val);
358 }
359 else
360 {
361 renumberobj(ctx, doc, opts, val);
362 }
363 }
364 }
365
366 else if (pdf_is_array(ctx, obj))
367 {
368 int n = pdf_array_len(ctx, obj);
369 for (i = 0; i < n; i++)
370 {
371 pdf_obj *val = pdf_array_get(ctx, obj, i);
372 if (pdf_is_indirect(ctx, val))
373 {
374 int o = pdf_to_num(ctx, val);
375 if (o >= xref_len || o <= 0 || opts->renumber_map[o] == 0)
376 val = PDF_NULL;
377 else
378 val = pdf_new_indirect(ctx, doc, opts->renumber_map[o], 0);
379 pdf_array_put_drop(ctx, obj, i, val);
380 }
381 else
382 {
383 renumberobj(ctx, doc, opts, val);
384 }
385 }
386 }
387 }
388
389 static void renumberobjs(fz_context *ctx, pdf_document *doc, pdf_write_state *opts)
390 {
391 pdf_xref_entry *newxref = NULL;
392 int newlen;
393 int num;
394 int *new_use_list;
395 int xref_len = pdf_xref_len(ctx, doc);
396
397 expand_lists(ctx, opts, xref_len);
398 new_use_list = fz_calloc(ctx, opts->list_len, sizeof(int));
399
400 fz_var(newxref);
401 fz_try(ctx)
402 {
403 /* Apply renumber map to indirect references in all objects in xref */
404 renumberobj(ctx, doc, opts, pdf_trailer(ctx, doc));
405 for (num = 0; num < xref_len; num++)
406 {
407 pdf_obj *obj;
408 int to = opts->renumber_map[num];
409
410 /* If object is going to be dropped, don't bother renumbering */
411 if (to == 0)
412 continue;
413
414 obj = pdf_get_xref_entry_no_null(ctx, doc, num)->obj;
415
416 if (pdf_is_indirect(ctx, obj))
417 {
418 int o = pdf_to_num(ctx, obj);
419 if (o >= xref_len || o <= 0 || opts->renumber_map[o] == 0)
420 obj = PDF_NULL;
421 else
422 obj = pdf_new_indirect(ctx, doc, opts->renumber_map[o], 0);
423 fz_try(ctx)
424 pdf_update_object(ctx, doc, num, obj);
425 fz_always(ctx)
426 pdf_drop_obj(ctx, obj);
427 fz_catch(ctx)
428 fz_rethrow(ctx);
429 }
430 else
431 {
432 renumberobj(ctx, doc, opts, obj);
433 }
434 }
435
436 /* Create new table for the reordered, compacted xref */
437 newxref = Memento_label(fz_malloc_array(ctx, xref_len + 3, pdf_xref_entry), "pdf_xref_entries");
438 newxref[0] = *pdf_get_xref_entry_no_null(ctx, doc, 0);
439
440 /* Move used objects into the new compacted xref */
441 newlen = 0;
442 for (num = 1; num < xref_len; num++)
443 {
444 if (opts->use_list[num])
445 {
446 pdf_xref_entry *e;
447 if (newlen < opts->renumber_map[num])
448 newlen = opts->renumber_map[num];
449 e = pdf_get_xref_entry_no_null(ctx, doc, num);
450 newxref[opts->renumber_map[num]] = *e;
451 if (e->obj)
452 pdf_set_obj_parent(ctx, e->obj, opts->renumber_map[num]);
453 e->obj = NULL;
454 e->stm_buf = NULL;
455 new_use_list[opts->renumber_map[num]] = opts->use_list[num];
456 }
457 else
458 {
459 pdf_xref_entry *e = pdf_get_xref_entry_no_null(ctx, doc, num);
460 pdf_drop_obj(ctx, e->obj);
461 e->obj = NULL;
462 fz_drop_buffer(ctx, e->stm_buf);
463 e->stm_buf = NULL;
464 }
465 }
466
467 pdf_replace_xref(ctx, doc, newxref, newlen + 1);
468 newxref = NULL;
469 }
470 fz_catch(ctx)
471 {
472 fz_free(ctx, newxref);
473 fz_free(ctx, new_use_list);
474 fz_rethrow(ctx);
475 }
476 fz_free(ctx, opts->use_list);
477 opts->use_list = new_use_list;
478
479 for (num = 1; num < xref_len; num++)
480 {
481 opts->renumber_map[num] = num;
482 }
483 }
484
485 /*
486 * Make sure we have loaded objects from object streams.
487 */
488
489 static void preloadobjstms(fz_context *ctx, pdf_document *doc)
490 {
491 pdf_obj *obj;
492 int num;
493 pdf_xref_entry *x = NULL;
494 int load = 1;
495
496 /* If we have attempted a repair, then everything will have been
497 * loaded already. */
498 if (doc->repair_attempted)
499 {
500 /* Bug 707112: But we do need to mark all our 'o' objects as being something else. */
501 load = 0;
502 }
503
504 fz_var(num);
505 fz_var(x);
506
507 /* xref_len may change due to repair, so check it every iteration */
508 for (num = 0; num < pdf_xref_len(ctx, doc); num++)
509 {
510 fz_try(ctx)
511 {
512 for (; num < pdf_xref_len(ctx, doc); num++)
513 {
514 x = pdf_get_xref_entry_no_null(ctx, doc, num);
515 if (x->type == 'o')
516 {
517 if (load)
518 {
519 obj = pdf_load_object(ctx, doc, num);
520 pdf_drop_obj(ctx, obj);
521 }
522 /* The object is no longer an objstm one. It's a regular object
523 * held in memory. Previously we used gen to hold the index of
524 * the obj in the objstm, so reset this to 0. */
525 x->type = 'n';
526 x->gen = 0;
527 }
528 x = NULL;
529 }
530 }
531 fz_catch(ctx)
532 {
533 /* We need to clear the type even in the event of an error, lest we
534 * hit an assert later. Bug 707110. */
535 if (x && x->type == 'o')
536 {
537 x->type = 'f';
538 x->gen = 0;
539 }
540 /* Ignore the error, so we can carry on trying to load. */
541 fz_rethrow_if(ctx, FZ_ERROR_SYSTEM);
542 fz_report_error(ctx);
543 }
544 }
545 }
546
547 /*
548 * Save streams and objects to the output
549 */
550
551 static int is_bitmap_stream(fz_context *ctx, pdf_obj *obj, size_t len, int *w, int *h)
552 {
553 pdf_obj *bpc;
554 pdf_obj *cs;
555 int stride;
556 if (pdf_dict_get(ctx, obj, PDF_NAME(Subtype)) != PDF_NAME(Image))
557 return 0;
558 *w = pdf_dict_get_int(ctx, obj, PDF_NAME(Width));
559 *h = pdf_dict_get_int(ctx, obj, PDF_NAME(Height));
560 stride = (*w + 7) >> 3;
561 if ((size_t)stride * (*h) != len)
562 return 0;
563 if (pdf_dict_get_bool(ctx, obj, PDF_NAME(ImageMask)))
564 {
565 return 1;
566 }
567 else
568 {
569 bpc = pdf_dict_get(ctx, obj, PDF_NAME(BitsPerComponent));
570 if (!pdf_is_int(ctx, bpc))
571 return 0;
572 if (pdf_to_int(ctx, bpc) != 1)
573 return 0;
574 cs = pdf_dict_get(ctx, obj, PDF_NAME(ColorSpace));
575 if (!pdf_name_eq(ctx, cs, PDF_NAME(DeviceGray)))
576 return 0;
577 return 1;
578 }
579 }
580
581 static inline int isbinary(int c)
582 {
583 if (c == '\n' || c == '\r' || c == '\t')
584 return 0;
585 return c < 32 || c > 127;
586 }
587
588 static int isbinarystream(fz_context *ctx, const unsigned char *data, size_t len)
589 {
590 size_t i;
591 for (i = 0; i < len; i++)
592 if (isbinary(data[i]))
593 return 1;
594 return 0;
595 }
596
597 static fz_buffer *hexbuf(fz_context *ctx, const unsigned char *p, size_t n)
598 {
599 static const char hex[17] = "0123456789abcdef";
600 int x = 0;
601 size_t len = n * 2 + (n / 32) + 1;
602 unsigned char *data = Memento_label(fz_malloc(ctx, len), "hexbuf");
603 fz_buffer *buf = fz_new_buffer_from_data(ctx, data, len);
604
605 while (n--)
606 {
607 *data++ = hex[*p >> 4];
608 *data++ = hex[*p & 15];
609 if (++x == 32)
610 {
611 *data++ = '\n';
612 x = 0;
613 }
614 p++;
615 }
616
617 *data++ = '>';
618
619 return buf;
620 }
621
622 static void addhexfilter(fz_context *ctx, pdf_document *doc, pdf_obj *dict)
623 {
624 pdf_obj *f, *dp, *newf, *newdp;
625
626 newf = newdp = NULL;
627 f = pdf_dict_get(ctx, dict, PDF_NAME(Filter));
628 dp = pdf_dict_get(ctx, dict, PDF_NAME(DecodeParms));
629
630 fz_var(newf);
631 fz_var(newdp);
632
633 fz_try(ctx)
634 {
635 if (pdf_is_name(ctx, f))
636 {
637 newf = pdf_new_array(ctx, doc, 2);
638 pdf_array_push(ctx, newf, PDF_NAME(ASCIIHexDecode));
639 pdf_array_push(ctx, newf, f);
640 f = newf;
641 if (pdf_is_dict(ctx, dp))
642 {
643 newdp = pdf_new_array(ctx, doc, 2);
644 pdf_array_push(ctx, newdp, PDF_NULL);
645 pdf_array_push(ctx, newdp, dp);
646 dp = newdp;
647 }
648 }
649 else if (pdf_is_array(ctx, f))
650 {
651 pdf_array_insert(ctx, f, PDF_NAME(ASCIIHexDecode), 0);
652 if (pdf_is_array(ctx, dp))
653 pdf_array_insert(ctx, dp, PDF_NULL, 0);
654 }
655 else
656 f = PDF_NAME(ASCIIHexDecode);
657
658 pdf_dict_put(ctx, dict, PDF_NAME(Filter), f);
659 if (dp)
660 pdf_dict_put(ctx, dict, PDF_NAME(DecodeParms), dp);
661 }
662 fz_always(ctx)
663 {
664 pdf_drop_obj(ctx, newf);
665 pdf_drop_obj(ctx, newdp);
666 }
667 fz_catch(ctx)
668 fz_rethrow(ctx);
669 }
670
671 static fz_buffer *deflatebuf(fz_context *ctx, const unsigned char *p, size_t n, int effort)
672 {
673 fz_buffer *buf;
674 uLongf csize;
675 int t;
676 uLong longN = (uLong)n;
677 unsigned char *data;
678 size_t cap;
679 int mode;
680
681 if (n != (size_t)longN)
682 fz_throw(ctx, FZ_ERROR_LIMIT, "Buffer too large to deflate");
683
684 cap = compressBound(longN);
685 data = Memento_label(fz_malloc(ctx, cap), "pdf_write_deflate");
686 buf = fz_new_buffer_from_data(ctx, data, cap);
687 csize = (uLongf)cap;
688 if (effort == 0)
689 mode = Z_DEFAULT_COMPRESSION;
690 else
691 mode = effort * Z_BEST_COMPRESSION / 100;
692 t = compress2(data, &csize, p, longN, mode);
693 if (t != Z_OK)
694 {
695 fz_drop_buffer(ctx, buf);
696 fz_throw(ctx, FZ_ERROR_LIBRARY, "cannot deflate buffer");
697 }
698 fz_try(ctx)
699 fz_resize_buffer(ctx, buf, csize);
700 fz_catch(ctx)
701 {
702 fz_drop_buffer(ctx, buf);
703 fz_rethrow(ctx);
704 }
705 return buf;
706 }
707
708 static int striphexfilter(fz_context *ctx, pdf_document *doc, pdf_obj *dict)
709 {
710 pdf_obj *f, *dp;
711 int is_hex = 0;
712
713 f = pdf_dict_get(ctx, dict, PDF_NAME(Filter));
714 dp = pdf_dict_get(ctx, dict, PDF_NAME(DecodeParms));
715
716 if (pdf_is_array(ctx, f))
717 {
718 /* Remove ASCIIHexDecode from head of filter list */
719 if (pdf_array_get(ctx, f, 0) == PDF_NAME(ASCIIHexDecode))
720 {
721 is_hex = 1;
722 pdf_array_delete(ctx, f, 0);
723 if (pdf_is_array(ctx, dp))
724 pdf_array_delete(ctx, dp, 0);
725 }
726 /* Unpack array if only one filter remains */
727 if (pdf_array_len(ctx, f) == 1)
728 {
729 f = pdf_array_get(ctx, f, 0);
730 pdf_dict_put(ctx, dict, PDF_NAME(Filter), f);
731 if (dp)
732 {
733 dp = pdf_array_get(ctx, dp, 0);
734 pdf_dict_put(ctx, dict, PDF_NAME(DecodeParms), dp);
735 }
736 }
737 /* Remove array if no filters remain */
738 else if (pdf_array_len(ctx, f) == 0)
739 {
740 pdf_dict_del(ctx, dict, PDF_NAME(Filter));
741 pdf_dict_del(ctx, dict, PDF_NAME(DecodeParms));
742 }
743 }
744 else if (f == PDF_NAME(ASCIIHexDecode))
745 {
746 is_hex = 1;
747 pdf_dict_del(ctx, dict, PDF_NAME(Filter));
748 pdf_dict_del(ctx, dict, PDF_NAME(DecodeParms));
749 }
750
751 return is_hex;
752 }
753
754 static fz_buffer *unhexbuf(fz_context *ctx, const unsigned char *p, size_t n)
755 {
756 fz_stream *mstm = NULL;
757 fz_stream *xstm = NULL;
758 fz_buffer *out = NULL;
759 fz_var(mstm);
760 fz_var(xstm);
761 fz_try(ctx)
762 {
763 mstm = fz_open_memory(ctx, p, n);
764 xstm = fz_open_ahxd(ctx, mstm);
765 out = fz_read_all(ctx, xstm, n/2);
766 }
767 fz_always(ctx)
768 {
769 fz_drop_stream(ctx, xstm);
770 fz_drop_stream(ctx, mstm);
771 }
772 fz_catch(ctx)
773 fz_rethrow(ctx);
774 return out;
775 }
776
777 static void write_data(fz_context *ctx, void *arg, const unsigned char *data, size_t len)
778 {
779 fz_write_data(ctx, (fz_output *)arg, data, len);
780 }
781
782 static void copystream(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, pdf_obj *obj_orig, int num, int gen, int do_deflate, int unenc)
783 {
784 fz_buffer *tmp_unhex = NULL, *tmp_comp = NULL, *tmp_hex = NULL, *buf = NULL;
785 pdf_obj *obj = NULL;
786 pdf_obj *dp;
787 size_t len;
788 unsigned char *data;
789 int w, h;
790
791 fz_var(buf);
792 fz_var(tmp_comp);
793 fz_var(tmp_hex);
794 fz_var(obj);
795
796 fz_try(ctx)
797 {
798 buf = pdf_load_raw_stream_number(ctx, doc, num);
799 obj = pdf_copy_dict(ctx, obj_orig);
800
801 len = fz_buffer_storage(ctx, buf, &data);
802
803 if (do_deflate && striphexfilter(ctx, doc, obj))
804 {
805 tmp_unhex = unhexbuf(ctx, data, len);
806 len = fz_buffer_storage(ctx, tmp_unhex, &data);
807 }
808
809 if (do_deflate && !pdf_dict_get(ctx, obj, PDF_NAME(Filter)))
810 {
811 if (is_bitmap_stream(ctx, obj, len, &w, &h))
812 {
813 tmp_comp = fz_compress_ccitt_fax_g4(ctx, data, w, h, (w+7)>>3);
814 pdf_dict_put(ctx, obj, PDF_NAME(Filter), PDF_NAME(CCITTFaxDecode));
815 dp = pdf_dict_put_dict(ctx, obj, PDF_NAME(DecodeParms), 1);
816 pdf_dict_put_int(ctx, dp, PDF_NAME(K), -1);
817 pdf_dict_put_int(ctx, dp, PDF_NAME(Columns), w);
818 }
819 else if (do_deflate == 1)
820 {
821 tmp_comp = deflatebuf(ctx, data, len, opts->compression_effort);
822 pdf_dict_put(ctx, obj, PDF_NAME(Filter), PDF_NAME(FlateDecode));
823 }
824 else
825 {
826 size_t comp_len;
827 int mode = (opts->compression_effort == 0 ? FZ_BROTLI_DEFAULT :
828 FZ_BROTLI_BEST * opts->compression_effort / 100);
829 unsigned char *comp_data = fz_new_brotli_data(ctx, &comp_len, data, len, mode);
830 tmp_comp = fz_new_buffer_from_data(ctx, comp_data, comp_len);
831 pdf_dict_put(ctx, obj, PDF_NAME(Filter), PDF_NAME(BrotliDecode));
832 }
833 len = fz_buffer_storage(ctx, tmp_comp, &data);
834 }
835
836 if (opts->do_ascii && isbinarystream(ctx, data, len))
837 {
838 tmp_hex = hexbuf(ctx, data, len);
839 len = fz_buffer_storage(ctx, tmp_hex, &data);
840 addhexfilter(ctx, doc, obj);
841 }
842
843 fz_write_printf(ctx, opts->out, "%d %d obj\n", num, gen);
844
845 if (unenc)
846 {
847 pdf_dict_put_int(ctx, obj, PDF_NAME(Length), len);
848 pdf_print_obj(ctx, opts->out, obj, opts->do_tight, opts->do_ascii);
849 fz_write_string(ctx, opts->out, "\nstream\n");
850 fz_write_data(ctx, opts->out, data, len);
851 }
852 else
853 {
854 pdf_dict_put_int(ctx, obj, PDF_NAME(Length), pdf_encrypted_len(ctx, opts->crypt, num, gen, len));
855 pdf_print_encrypted_obj(ctx, opts->out, obj, opts->do_tight, opts->do_ascii, opts->crypt, num, gen, NULL);
856 fz_write_string(ctx, opts->out, "\nstream\n");
857 pdf_encrypt_data(ctx, opts->crypt, num, gen, write_data, opts->out, data, len);
858 }
859
860 fz_write_string(ctx, opts->out, "\nendstream\nendobj\n\n");
861 }
862 fz_always(ctx)
863 {
864 fz_drop_buffer(ctx, tmp_unhex);
865 fz_drop_buffer(ctx, tmp_hex);
866 fz_drop_buffer(ctx, tmp_comp);
867 fz_drop_buffer(ctx, buf);
868 pdf_drop_obj(ctx, obj);
869 }
870 fz_catch(ctx)
871 {
872 fz_rethrow(ctx);
873 }
874 }
875
876 static void expandstream(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, pdf_obj *obj_orig, int num, int gen, int do_deflate, int unenc)
877 {
878 fz_buffer *buf = NULL, *tmp_comp = NULL, *tmp_hex = NULL;
879 pdf_obj *obj = NULL;
880 pdf_obj *dp;
881 size_t len;
882 unsigned char *data;
883 int w, h;
884
885 fz_var(buf);
886 fz_var(tmp_comp);
887 fz_var(tmp_hex);
888 fz_var(obj);
889
890 fz_try(ctx)
891 {
892 buf = pdf_load_stream_number(ctx, doc, num);
893 obj = pdf_copy_dict(ctx, obj_orig);
894 pdf_dict_del(ctx, obj, PDF_NAME(Filter));
895 pdf_dict_del(ctx, obj, PDF_NAME(DecodeParms));
896
897 len = fz_buffer_storage(ctx, buf, &data);
898 if (do_deflate)
899 {
900 if (is_bitmap_stream(ctx, obj, len, &w, &h))
901 {
902 tmp_comp = fz_compress_ccitt_fax_g4(ctx, data, w, h, (w+7)>>3);
903 pdf_dict_put(ctx, obj, PDF_NAME(Filter), PDF_NAME(CCITTFaxDecode));
904 dp = pdf_dict_put_dict(ctx, obj, PDF_NAME(DecodeParms), 1);
905 pdf_dict_put_int(ctx, dp, PDF_NAME(K), -1);
906 pdf_dict_put_int(ctx, dp, PDF_NAME(Columns), w);
907 }
908 else if (do_deflate == 1)
909 {
910 tmp_comp = deflatebuf(ctx, data, len, opts->compression_effort);
911 pdf_dict_put(ctx, obj, PDF_NAME(Filter), PDF_NAME(FlateDecode));
912 }
913 else
914 {
915 size_t comp_len;
916 int mode = (opts->compression_effort == 0 ? FZ_BROTLI_DEFAULT :
917 FZ_BROTLI_BEST * opts->compression_effort / 100);
918 unsigned char *comp_data = fz_new_brotli_data(ctx, &comp_len, data, len, mode);
919 tmp_comp = fz_new_buffer_from_data(ctx, comp_data, comp_len);
920 pdf_dict_put(ctx, obj, PDF_NAME(Filter), PDF_NAME(BrotliDecode));
921 }
922 len = fz_buffer_storage(ctx, tmp_comp, &data);
923 }
924
925 if (opts->do_ascii && isbinarystream(ctx, data, len))
926 {
927 tmp_hex = hexbuf(ctx, data, len);
928 len = fz_buffer_storage(ctx, tmp_hex, &data);
929 addhexfilter(ctx, doc, obj);
930 }
931
932 fz_write_printf(ctx, opts->out, "%d %d obj\n", num, gen);
933
934 if (unenc)
935 {
936 pdf_dict_put_int(ctx, obj, PDF_NAME(Length), len);
937 pdf_print_obj(ctx, opts->out, obj, opts->do_tight, opts->do_ascii);
938 fz_write_string(ctx, opts->out, "\nstream\n");
939 fz_write_data(ctx, opts->out, data, len);
940 }
941 else
942 {
943 pdf_dict_put_int(ctx, obj, PDF_NAME(Length), pdf_encrypted_len(ctx, opts->crypt, num, gen, (int)len));
944 pdf_print_encrypted_obj(ctx, opts->out, obj, opts->do_tight, opts->do_ascii, opts->crypt, num, gen, NULL);
945 fz_write_string(ctx, opts->out, "\nstream\n");
946 pdf_encrypt_data(ctx, opts->crypt, num, gen, write_data, opts->out, data, len);
947 }
948
949 fz_write_string(ctx, opts->out, "\nendstream\nendobj\n\n");
950 }
951 fz_always(ctx)
952 {
953 fz_drop_buffer(ctx, tmp_hex);
954 fz_drop_buffer(ctx, tmp_comp);
955 fz_drop_buffer(ctx, buf);
956 pdf_drop_obj(ctx, obj);
957 }
958 fz_catch(ctx)
959 {
960 fz_rethrow(ctx);
961 }
962 }
963
964 static int is_image_filter(pdf_obj *s)
965 {
966 return
967 s == PDF_NAME(CCITTFaxDecode) || s == PDF_NAME(CCF) ||
968 s == PDF_NAME(DCTDecode) || s == PDF_NAME(DCT) ||
969 s == PDF_NAME(RunLengthDecode) || s == PDF_NAME(RL) ||
970 s == PDF_NAME(JBIG2Decode) ||
971 s == PDF_NAME(JPXDecode);
972 }
973
974 static int filter_implies_image(fz_context *ctx, pdf_obj *o)
975 {
976 if (pdf_is_name(ctx, o))
977 return is_image_filter(o);
978 if (pdf_is_array(ctx, o))
979 {
980 int i, len;
981 len = pdf_array_len(ctx, o);
982 for (i = 0; i < len; i++)
983 if (is_image_filter(pdf_array_get(ctx, o, i)))
984 return 1;
985 }
986 return 0;
987 }
988
989 static int is_jpx_filter(fz_context *ctx, pdf_obj *o)
990 {
991 if (o == PDF_NAME(JPXDecode))
992 return 1;
993 if (pdf_is_array(ctx, o))
994 {
995 int i, len;
996 len = pdf_array_len(ctx, o);
997 for (i = 0; i < len; i++)
998 if (pdf_array_get(ctx, o, i) == PDF_NAME(JPXDecode))
999 return 1;
1000 }
1001 return 0;
1002 }
1003
1004 int pdf_is_image_stream(fz_context *ctx, pdf_obj *obj)
1005 {
1006 pdf_obj *o;
1007 if ((o = pdf_dict_get(ctx, obj, PDF_NAME(Type)), pdf_name_eq(ctx, o, PDF_NAME(XObject))))
1008 if ((o = pdf_dict_get(ctx, obj, PDF_NAME(Subtype)), pdf_name_eq(ctx, o, PDF_NAME(Image))))
1009 return 1;
1010 if (o = pdf_dict_get(ctx, obj, PDF_NAME(Filter)), filter_implies_image(ctx, o))
1011 return 1;
1012 if (pdf_dict_get(ctx, obj, PDF_NAME(Width)) != NULL && pdf_dict_get(ctx, obj, PDF_NAME(Height)) != NULL)
1013 return 1;
1014 return 0;
1015 }
1016
1017 static int is_font_stream(fz_context *ctx, pdf_obj *obj)
1018 {
1019 pdf_obj *o;
1020 if (o = pdf_dict_get(ctx, obj, PDF_NAME(Type)), pdf_name_eq(ctx, o, PDF_NAME(Font)))
1021 return 1;
1022 if (o = pdf_dict_get(ctx, obj, PDF_NAME(Type)), pdf_name_eq(ctx, o, PDF_NAME(FontDescriptor)))
1023 return 1;
1024 if (pdf_dict_get(ctx, obj, PDF_NAME(Length1)) != NULL)
1025 return 1;
1026 if (pdf_dict_get(ctx, obj, PDF_NAME(Length2)) != NULL)
1027 return 1;
1028 if (pdf_dict_get(ctx, obj, PDF_NAME(Length3)) != NULL)
1029 return 1;
1030 if (o = pdf_dict_get(ctx, obj, PDF_NAME(Subtype)), pdf_name_eq(ctx, o, PDF_NAME(Type1C)))
1031 return 1;
1032 if (o = pdf_dict_get(ctx, obj, PDF_NAME(Subtype)), pdf_name_eq(ctx, o, PDF_NAME(CIDFontType0C)))
1033 return 1;
1034 return 0;
1035 }
1036
1037 static int is_jpx_stream(fz_context *ctx, pdf_obj *obj)
1038 {
1039 pdf_obj *o;
1040 if (o = pdf_dict_get(ctx, obj, PDF_NAME(Filter)), is_jpx_filter(ctx, o))
1041 return 1;
1042 return 0;
1043 }
1044
1045
1046 static int is_xml_metadata(fz_context *ctx, pdf_obj *obj)
1047 {
1048 if (pdf_name_eq(ctx, pdf_dict_get(ctx, obj, PDF_NAME(Type)), PDF_NAME(Metadata)))
1049 if (pdf_name_eq(ctx, pdf_dict_get(ctx, obj, PDF_NAME(Subtype)), PDF_NAME(XML)))
1050 return 1;
1051 return 0;
1052 }
1053
1054 static void writelabel(fz_context *ctx, void *arg, const char *label)
1055 {
1056 pdf_write_state *opts = arg;
1057 if (opts->num_labels < (int)nelem(opts->obj_labels))
1058 opts->obj_labels[opts->num_labels++] = fz_strdup(ctx, label);
1059 }
1060
1061 static int labelcmp(const void *aa, const void *bb)
1062 {
1063 return fz_strverscmp(*(const char **)aa, *(const char **)bb);
1064 }
1065
1066 static void writeobject(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, int num, int gen, int skip_xrefs, int unenc)
1067 {
1068 pdf_obj *obj = NULL;
1069 fz_buffer *buf = NULL;
1070 int do_deflate = 0;
1071 int do_expand = 0;
1072 int skip = 0;
1073 int i;
1074
1075 fz_var(obj);
1076 fz_var(buf);
1077
1078 if (opts->do_encrypt == PDF_ENCRYPT_NONE)
1079 unenc = 1;
1080
1081 fz_try(ctx)
1082 {
1083 obj = pdf_load_object(ctx, doc, num);
1084
1085 /* skip ObjStm and XRef objects */
1086 if (pdf_is_dict(ctx, obj))
1087 {
1088 pdf_obj *type = pdf_dict_get(ctx, obj, PDF_NAME(Type));
1089 if (type == PDF_NAME(ObjStm) && !opts->do_use_objstms)
1090 {
1091 if (opts->use_list)
1092 opts->use_list[num] = 0;
1093 skip = 1;
1094 }
1095 if (skip_xrefs && type == PDF_NAME(XRef))
1096 {
1097 if (opts->use_list)
1098 opts->use_list[num] = 0;
1099 skip = 1;
1100 }
1101 }
1102
1103 if (!skip)
1104 {
1105 if (opts->labels)
1106 {
1107 opts->num_labels = 0;
1108 pdf_label_object(ctx, opts->labels, num, writelabel, opts);
1109 if (opts->num_labels == 0)
1110 {
1111 fz_write_string(ctx, opts->out, "% unused\n");
1112 }
1113 else
1114 {
1115 qsort(opts->obj_labels, opts->num_labels, sizeof(char*), labelcmp);
1116 for (i = 0; i < opts->num_labels; ++i)
1117 {
1118 fz_write_printf(ctx, opts->out, "%% %s\n", opts->obj_labels[i]);
1119 fz_free(ctx, opts->obj_labels[i]);
1120 opts->obj_labels[i] = NULL;
1121 }
1122 }
1123 }
1124
1125 if (pdf_obj_num_is_stream(ctx, doc, num))
1126 {
1127 do_deflate = opts->do_compress;
1128 do_expand = opts->do_expand;
1129 if (opts->do_compress_images && pdf_is_image_stream(ctx, obj))
1130 do_deflate = opts->do_compress ? opts->do_compress : 1, do_expand = 0;
1131 if (opts->do_compress_fonts && is_font_stream(ctx, obj))
1132 do_deflate = opts->do_compress ? opts->do_compress : 1, do_expand = 0;
1133 if (is_xml_metadata(ctx, obj))
1134 do_deflate = 0, do_expand = 0;
1135 if (is_jpx_stream(ctx, obj))
1136 do_deflate = 0, do_expand = 0;
1137
1138 if (do_expand)
1139 expandstream(ctx, doc, opts, obj, num, gen, do_deflate, unenc);
1140 else
1141 copystream(ctx, doc, opts, obj, num, gen, do_deflate, unenc);
1142 }
1143 else
1144 {
1145 fz_write_printf(ctx, opts->out, "%d %d obj\n", num, gen);
1146 pdf_print_encrypted_obj(ctx, opts->out, obj, opts->do_tight, opts->do_ascii, unenc ? NULL : opts->crypt, num, gen, NULL);
1147 fz_write_string(ctx, opts->out, "\nendobj\n\n");
1148 }
1149 }
1150 }
1151 fz_always(ctx)
1152 {
1153 for (i = 0; i < opts->num_labels; ++i)
1154 {
1155 fz_free(ctx, opts->obj_labels[i]);
1156 opts->obj_labels[i] = NULL;
1157 }
1158 fz_drop_buffer(ctx, buf);
1159 pdf_drop_obj(ctx, obj);
1160 }
1161 fz_catch(ctx)
1162 {
1163 fz_rethrow(ctx);
1164 }
1165 }
1166
1167 static void writexrefsubsect(fz_context *ctx, pdf_write_state *opts, int from, int to)
1168 {
1169 int num;
1170
1171 fz_write_printf(ctx, opts->out, "%d %d\n", from, to - from);
1172 for (num = from; num < to; num++)
1173 {
1174 if (opts->use_list[num])
1175 fz_write_printf(ctx, opts->out, "%010lu %05d n \n", opts->ofs_list[num] - opts->bias, opts->gen_list[num]);
1176 else
1177 fz_write_printf(ctx, opts->out, "%010lu %05d f \n", opts->ofs_list[num] - opts->bias, opts->gen_list[num]);
1178 }
1179 }
1180
1181 static void writexref(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, int from, int to, int first, int64_t startxref)
1182 {
1183 pdf_obj *trailer = NULL;
1184 pdf_obj *obj;
1185
1186 fz_write_string(ctx, opts->out, "xref\n");
1187
1188 if (opts->do_incremental)
1189 {
1190 int subfrom = from;
1191 int subto;
1192
1193 while (subfrom < to)
1194 {
1195 while (subfrom < to && !pdf_xref_is_incremental(ctx, doc, subfrom))
1196 subfrom++;
1197
1198 subto = subfrom;
1199 while (subto < to && pdf_xref_is_incremental(ctx, doc, subto))
1200 subto++;
1201
1202 if (subfrom < subto)
1203 writexrefsubsect(ctx, opts, subfrom, subto);
1204
1205 subfrom = subto;
1206 }
1207 }
1208 else
1209 {
1210 writexrefsubsect(ctx, opts, from, to);
1211 }
1212
1213 fz_write_string(ctx, opts->out, "\n");
1214
1215 fz_var(trailer);
1216
1217 fz_try(ctx)
1218 {
1219 if (opts->do_incremental)
1220 {
1221 trailer = pdf_keep_obj(ctx, pdf_trailer(ctx, doc));
1222 pdf_dict_put_int(ctx, trailer, PDF_NAME(Size), pdf_xref_len(ctx, doc));
1223 pdf_dict_put_int(ctx, trailer, PDF_NAME(Prev), doc->startxref);
1224 pdf_dict_del(ctx, trailer, PDF_NAME(XRefStm));
1225 if (!opts->do_snapshot)
1226 doc->startxref = startxref - opts->bias;
1227 }
1228 else
1229 {
1230 trailer = pdf_new_dict(ctx, doc, 5);
1231
1232 pdf_dict_put_int(ctx, trailer, PDF_NAME(Size), to);
1233
1234 if (first)
1235 {
1236 pdf_obj *otrailer = pdf_trailer(ctx, doc);
1237 obj = pdf_dict_get(ctx, otrailer, PDF_NAME(Info));
1238 if (obj)
1239 pdf_dict_put(ctx, trailer, PDF_NAME(Info), obj);
1240
1241 obj = pdf_dict_get(ctx, otrailer, PDF_NAME(Root));
1242 if (obj)
1243 pdf_dict_put(ctx, trailer, PDF_NAME(Root), obj);
1244
1245
1246 obj = pdf_dict_get(ctx, otrailer, PDF_NAME(ID));
1247 if (obj)
1248 pdf_dict_put(ctx, trailer, PDF_NAME(ID), obj);
1249
1250 /* The encryption dictionary is kept in the writer state to handle
1251 the encryption dictionary object being renumbered during repair.*/
1252 if (opts->crypt_obj)
1253 {
1254 /* If the encryption dictionary used to be an indirect reference from the trailer,
1255 store it the same way in the trailer in the saved file. */
1256 if (pdf_is_indirect(ctx, opts->crypt_obj))
1257 pdf_dict_put_indirect(ctx, trailer, PDF_NAME(Encrypt), opts->crypt_object_number);
1258 else
1259 pdf_dict_put(ctx, trailer, PDF_NAME(Encrypt), opts->crypt_obj);
1260 }
1261
1262 if (opts->metadata)
1263 pdf_dict_putp(ctx, trailer, "Root/Metadata", opts->metadata);
1264 }
1265 }
1266
1267 fz_write_string(ctx, opts->out, "trailer\n");
1268 /* Trailer is NOT encrypted */
1269 pdf_print_obj(ctx, opts->out, trailer, opts->do_tight, opts->do_ascii);
1270 fz_write_string(ctx, opts->out, "\n");
1271
1272 fz_write_printf(ctx, opts->out, "startxref\n%lu\n%%%%EOF\n", startxref - opts->bias);
1273
1274 doc->last_xref_was_old_style = 1;
1275 }
1276 fz_always(ctx)
1277 {
1278 pdf_drop_obj(ctx, trailer);
1279 }
1280 fz_catch(ctx)
1281 {
1282 fz_rethrow(ctx);
1283 }
1284 }
1285
1286 static void writexrefstreamsubsect(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, pdf_obj *index, fz_buffer *fzbuf, int from, int to)
1287 {
1288 int num;
1289
1290 pdf_array_push_int(ctx, index, from);
1291 pdf_array_push_int(ctx, index, to - from);
1292 for (num = from; num < to; num++)
1293 {
1294 int f1, f2, f3;
1295 pdf_xref_entry *x = pdf_get_xref_entry_no_null(ctx, doc, num);
1296 if (opts->use_list[num] == 0)
1297 {
1298 f1 = 0; /* Free */
1299 f2 = opts->ofs_list[num];
1300 f3 = opts->gen_list[num];
1301 }
1302 else if (x->type == 'o')
1303 {
1304 f1 = 2; /* Object Stream */
1305 f2 = opts->ofs_list[num];
1306 f3 = opts->gen_list[num];
1307 }
1308 else
1309 {
1310 f1 = 1; /* Object */
1311 f2 = opts->ofs_list[num] - opts->bias;
1312 f3 = opts->gen_list[num];
1313 }
1314 fz_append_byte(ctx, fzbuf, f1);
1315 fz_append_byte(ctx, fzbuf, f2>>24);
1316 fz_append_byte(ctx, fzbuf, f2>>16);
1317 fz_append_byte(ctx, fzbuf, f2>>8);
1318 fz_append_byte(ctx, fzbuf, f2);
1319 fz_append_byte(ctx, fzbuf, f3);
1320 }
1321 }
1322
1323 static void writexrefstream(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, int from, int to, int first, int64_t startxref)
1324 {
1325 int num;
1326 pdf_obj *dict = NULL;
1327 pdf_obj *obj;
1328 pdf_obj *w = NULL;
1329 pdf_obj *index;
1330 fz_buffer *fzbuf = NULL;
1331
1332 fz_var(dict);
1333 fz_var(w);
1334 fz_var(fzbuf);
1335 fz_try(ctx)
1336 {
1337 num = pdf_create_object(ctx, doc);
1338 expand_lists(ctx, opts, num);
1339
1340 dict = pdf_new_dict(ctx, doc, 6);
1341 pdf_update_object(ctx, doc, num, dict);
1342
1343 to++;
1344
1345 if (first)
1346 {
1347 obj = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Info));
1348 if (obj)
1349 pdf_dict_put(ctx, dict, PDF_NAME(Info), obj);
1350
1351 obj = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root));
1352 if (obj)
1353 pdf_dict_put(ctx, dict, PDF_NAME(Root), obj);
1354
1355 obj = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(ID));
1356 if (obj)
1357 pdf_dict_put(ctx, dict, PDF_NAME(ID), obj);
1358
1359 /* The encryption dictionary is kept in the writer state to handle
1360 the encryption dictionary object being renumbered during repair.*/
1361 if (opts->crypt_obj)
1362 {
1363 /* If the encryption dictionary used to be an indirect reference from the trailer,
1364 store it the same way in the xref stream in the saved file. */
1365 if (pdf_is_indirect(ctx, opts->crypt_obj))
1366 pdf_dict_put_indirect(ctx, dict, PDF_NAME(Encrypt), opts->crypt_object_number);
1367 else
1368 pdf_dict_put(ctx, dict, PDF_NAME(Encrypt), opts->crypt_obj);
1369 }
1370 }
1371
1372 pdf_dict_put_int(ctx, dict, PDF_NAME(Size), to);
1373
1374 if (opts->do_incremental)
1375 {
1376 pdf_dict_put_int(ctx, dict, PDF_NAME(Prev), doc->startxref);
1377 if (!opts->do_snapshot)
1378 doc->startxref = startxref - opts->bias;
1379 }
1380
1381 pdf_dict_put(ctx, dict, PDF_NAME(Type), PDF_NAME(XRef));
1382
1383 w = pdf_new_array(ctx, doc, 3);
1384 pdf_dict_put(ctx, dict, PDF_NAME(W), w);
1385 pdf_array_push_int(ctx, w, 1);
1386 pdf_array_push_int(ctx, w, 4);
1387 pdf_array_push_int(ctx, w, 1);
1388
1389 index = pdf_new_array(ctx, doc, 2);
1390 pdf_dict_put_drop(ctx, dict, PDF_NAME(Index), index);
1391
1392 /* opts->gen_list[num] is already initialized by fz_calloc. */
1393 opts->use_list[num] = 1;
1394 opts->ofs_list[num] = startxref;
1395
1396 fzbuf = fz_new_buffer(ctx, (1 + 4 + 1) * (to-from));
1397
1398 if (opts->do_incremental)
1399 {
1400 int subfrom = from;
1401 int subto;
1402
1403 while (subfrom < to)
1404 {
1405 while (subfrom < to && !pdf_xref_is_incremental(ctx, doc, subfrom))
1406 subfrom++;
1407
1408 subto = subfrom;
1409 while (subto < to && pdf_xref_is_incremental(ctx, doc, subto))
1410 subto++;
1411
1412 if (subfrom < subto)
1413 writexrefstreamsubsect(ctx, doc, opts, index, fzbuf, subfrom, subto);
1414
1415 subfrom = subto;
1416 }
1417 }
1418 else
1419 {
1420 writexrefstreamsubsect(ctx, doc, opts, index, fzbuf, from, to);
1421 }
1422
1423 pdf_update_stream(ctx, doc, dict, fzbuf, 0);
1424
1425 writeobject(ctx, doc, opts, num, 0, 0, 1);
1426 fz_write_printf(ctx, opts->out, "startxref\n%lu\n%%%%EOF\n", startxref - opts->bias);
1427
1428 if (opts->do_snapshot)
1429 pdf_delete_object(ctx, doc, num);
1430 }
1431 fz_always(ctx)
1432 {
1433 pdf_drop_obj(ctx, dict);
1434 pdf_drop_obj(ctx, w);
1435 fz_drop_buffer(ctx, fzbuf);
1436 }
1437 fz_catch(ctx)
1438 {
1439 fz_rethrow(ctx);
1440 }
1441
1442 doc->last_xref_was_old_style = 0;
1443 }
1444
1445 static void
1446 dowriteobject(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, int num)
1447 {
1448 pdf_xref_entry *entry = pdf_get_xref_entry_no_null(ctx, doc, num);
1449 int gen = opts->gen_list ? opts->gen_list[num] : 0;
1450 if (entry->type == 'f')
1451 gen = entry->gen;
1452 if (entry->type == 'n')
1453 gen = entry->gen;
1454
1455 /* If we are renumbering, then make sure all generation numbers are
1456 * zero (except object 0 which must be free, and have a gen number of
1457 * 65535). Changing the generation numbers (and indeed object numbers)
1458 * will break encryption - so only do this if we are renumbering
1459 * anyway. */
1460 if (opts->do_garbage >= 2)
1461 gen = (num == 0 ? 65535 : 0);
1462
1463 /* For objects in object streams, the gen number gives us the index of
1464 * the object within the stream. */
1465 if (entry->type == 'o')
1466 gen = entry->gen;
1467
1468 if (opts->gen_list)
1469 opts->gen_list[num] = gen;
1470
1471 if (opts->do_garbage && !opts->use_list[num])
1472 return;
1473
1474 if (entry->type == 'o' && (!opts->do_incremental || pdf_xref_is_incremental(ctx, doc, num)))
1475 {
1476 assert(opts->do_use_objstms);
1477 opts->ofs_list[num] = entry->ofs;
1478 return;
1479 }
1480
1481 if (entry->type == 'n')
1482 {
1483 if (!opts->do_incremental || pdf_xref_is_incremental(ctx, doc, num))
1484 {
1485 if (opts->ofs_list)
1486 opts->ofs_list[num] = fz_tell_output(ctx, opts->out);
1487 writeobject(ctx, doc, opts, num, gen, 1, num == opts->crypt_object_number);
1488 }
1489 }
1490 else if (opts->use_list)
1491 opts->use_list[num] = 0;
1492 }
1493
1494 static void
1495 writeobjects(fz_context *ctx, pdf_document *doc, pdf_write_state *opts)
1496 {
1497 int num;
1498 int xref_len = pdf_xref_len(ctx, doc);
1499
1500 if (!opts->do_incremental)
1501 {
1502 int version = pdf_version(ctx, doc);
1503 fz_write_printf(ctx, opts->out, "%%PDF-%d.%d\n", version / 10, version % 10);
1504 fz_write_string(ctx, opts->out, "%\xC2\xB5\xC2\xB6\n\n");
1505 }
1506
1507 for (num = 0; num < xref_len; num++)
1508 dowriteobject(ctx, doc, opts, num);
1509 }
1510
1511 #ifdef DEBUG_WRITING
1512 static void dump_object_details(fz_context *ctx, pdf_document *doc, pdf_write_state *opts)
1513 {
1514 int i;
1515
1516 for (i = 0; i < pdf_xref_len(ctx, doc); i++)
1517 {
1518 fprintf(stderr, "%d@%ld: use=%d\n", i, opts->ofs_list[i], opts->use_list[i]);
1519 }
1520 }
1521 #endif
1522
1523 static void presize_unsaved_signature_byteranges(fz_context *ctx, pdf_document *doc)
1524 {
1525 int s;
1526
1527 for (s = 0; s < doc->num_incremental_sections; s++)
1528 {
1529 pdf_xref *xref = &doc->xref_sections[s];
1530
1531 if (xref->unsaved_sigs)
1532 {
1533 /* The ByteRange objects of signatures are initially written out with
1534 * dummy values, and then overwritten later. We need to make sure their
1535 * initial form at least takes enough sufficient file space */
1536 pdf_unsaved_sig *usig;
1537 int n = 0;
1538
1539 for (usig = xref->unsaved_sigs; usig; usig = usig->next)
1540 n++;
1541
1542 for (usig = xref->unsaved_sigs; usig; usig = usig->next)
1543 {
1544 /* There will be segments of bytes at the beginning, at
1545 * the end and between each consecutive pair of signatures,
1546 * hence n + 1 */
1547 int i;
1548 pdf_obj *byte_range = pdf_dict_getl(ctx, usig->field, PDF_NAME(V), PDF_NAME(ByteRange), NULL);
1549
1550 for (i = 0; i < n+1; i++)
1551 {
1552 pdf_array_push_int(ctx, byte_range, INT_MAX);
1553 pdf_array_push_int(ctx, byte_range, INT_MAX);
1554 }
1555 }
1556 }
1557 }
1558 }
1559
1560 static void complete_signatures(fz_context *ctx, pdf_document *doc, pdf_write_state *opts)
1561 {
1562 pdf_obj *byte_range = NULL;
1563 char *buf = NULL, *ptr;
1564 int s;
1565 fz_stream *stm = NULL;
1566
1567 fz_var(byte_range);
1568 fz_var(stm);
1569 fz_var(buf);
1570
1571 fz_try(ctx)
1572 {
1573 for (s = 0; s < doc->num_incremental_sections; s++)
1574 {
1575 pdf_xref *xref = &doc->xref_sections[doc->num_incremental_sections - s - 1];
1576
1577 if (xref->unsaved_sigs)
1578 {
1579 pdf_unsaved_sig *usig;
1580 size_t buf_size = 0;
1581 size_t i;
1582 size_t last_end;
1583
1584 for (usig = xref->unsaved_sigs; usig; usig = usig->next)
1585 {
1586 size_t size = usig->signer->max_digest_size(ctx, usig->signer);
1587 buf_size = fz_maxz(buf_size, size);
1588 }
1589
1590 buf_size = buf_size * 2 + SIG_EXTRAS_SIZE;
1591
1592 buf = fz_calloc(ctx, buf_size, 1);
1593
1594 stm = fz_stream_from_output(ctx, opts->out);
1595 /* Locate the byte ranges and contents in the saved file */
1596 for (usig = xref->unsaved_sigs; usig; usig = usig->next)
1597 {
1598 char *bstr, *cstr, *fstr;
1599 size_t bytes_read;
1600 int pnum = pdf_obj_parent_num(ctx, pdf_dict_getl(ctx, usig->field, PDF_NAME(V), PDF_NAME(ByteRange), NULL));
1601 fz_seek(ctx, stm, opts->ofs_list[pnum], SEEK_SET);
1602 /* SIG_EXTRAS_SIZE is an arbitrary value and its addition above to buf_size
1603 * could cause an attempt to read off the end of the file. That's not an
1604 * error, but we need to keep track of how many bytes are read and search
1605 * for markers only in defined data */
1606 bytes_read = fz_read(ctx, stm, (unsigned char *)buf, buf_size);
1607 assert(bytes_read <= buf_size);
1608
1609 bstr = fz_memmem(buf, bytes_read, SLASH_BYTE_RANGE, sizeof(SLASH_BYTE_RANGE)-1);
1610 cstr = fz_memmem(buf, bytes_read, SLASH_CONTENTS, sizeof(SLASH_CONTENTS)-1);
1611 fstr = fz_memmem(buf, bytes_read, SLASH_FILTER, sizeof(SLASH_FILTER)-1);
1612
1613 if (!(bstr && cstr && fstr && bstr < cstr && cstr < fstr))
1614 fz_throw(ctx, FZ_ERROR_FORMAT, "Failed to determine byte ranges while writing signature");
1615
1616 usig->byte_range_start = bstr - buf + sizeof(SLASH_BYTE_RANGE)-1 + opts->ofs_list[pnum];
1617 usig->byte_range_end = cstr - buf + opts->ofs_list[pnum];
1618 usig->contents_start = cstr - buf + sizeof(SLASH_CONTENTS)-1 + opts->ofs_list[pnum];
1619 usig->contents_end = fstr - buf + opts->ofs_list[pnum];
1620 }
1621
1622 fz_drop_stream(ctx, stm);
1623 stm = NULL;
1624
1625 /* Recreate ByteRange with correct values. */
1626 byte_range = pdf_new_array(ctx, doc, 4);
1627
1628 last_end = 0;
1629 for (usig = xref->unsaved_sigs; usig; usig = usig->next)
1630 {
1631 pdf_array_push_int(ctx, byte_range, last_end);
1632 pdf_array_push_int(ctx, byte_range, usig->contents_start - last_end);
1633 last_end = usig->contents_end;
1634 }
1635 pdf_array_push_int(ctx, byte_range, last_end);
1636 pdf_array_push_int(ctx, byte_range, xref->end_ofs - last_end);
1637
1638 /* Copy the new ByteRange to the other unsaved signatures */
1639 for (usig = xref->unsaved_sigs; usig; usig = usig->next)
1640 pdf_dict_putl_drop(ctx, usig->field, pdf_copy_array(ctx, byte_range), PDF_NAME(V), PDF_NAME(ByteRange), NULL);
1641
1642 /* Write the byte range into buf, padding with spaces*/
1643 ptr = pdf_sprint_obj(ctx, buf, buf_size, &i, byte_range, 1, 0);
1644 if (ptr != buf) /* should never happen, since data should fit in buf_size */
1645 fz_free(ctx, ptr);
1646 memset(buf+i, ' ', buf_size-i);
1647
1648 /* Write the byte range to the file */
1649 for (usig = xref->unsaved_sigs; usig; usig = usig->next)
1650 {
1651 fz_seek_output(ctx, opts->out, usig->byte_range_start, SEEK_SET);
1652 fz_write_data(ctx, opts->out, buf, usig->byte_range_end - usig->byte_range_start);
1653 }
1654
1655 /* Write the digests into the file */
1656 for (usig = xref->unsaved_sigs; usig; usig = usig->next)
1657 pdf_write_digest(ctx, opts->out, byte_range, usig->field, usig->contents_start, usig->contents_end - usig->contents_start, usig->signer);
1658
1659 /* delete the unsaved_sigs records */
1660 while ((usig = xref->unsaved_sigs) != NULL)
1661 {
1662 xref->unsaved_sigs = usig->next;
1663 pdf_drop_obj(ctx, usig->field);
1664 pdf_drop_signer(ctx, usig->signer);
1665 fz_free(ctx, usig);
1666 }
1667
1668 xref->unsaved_sigs_end = NULL;
1669
1670 pdf_drop_obj(ctx, byte_range);
1671 byte_range = NULL;
1672
1673 fz_free(ctx, buf);
1674 buf = NULL;
1675 }
1676 }
1677 }
1678 fz_always(ctx)
1679 {
1680 pdf_drop_obj(ctx, byte_range);
1681 }
1682 fz_catch(ctx)
1683 {
1684 fz_drop_stream(ctx, stm);
1685 fz_free(ctx, buf);
1686 fz_rethrow(ctx);
1687 }
1688 }
1689
1690 static void clean_content_streams(fz_context *ctx, pdf_document *doc, int sanitize, int ascii, int newlines)
1691 {
1692 int n = pdf_count_pages(ctx, doc);
1693 int i;
1694
1695 pdf_filter_options options = { 0 };
1696 pdf_sanitize_filter_options sopts = { 0 };
1697 pdf_filter_factory list[2] = { 0 };
1698
1699 options.recurse = 1;
1700 options.ascii = ascii;
1701 options.newlines = newlines;
1702 options.filters = sanitize ? list : NULL;
1703 list[0].filter = pdf_new_sanitize_filter;
1704 list[0].options = &sopts;
1705
1706 for (i = 0; i < n; i++)
1707 {
1708 pdf_annot *annot;
1709 pdf_page *page = pdf_load_page(ctx, doc, i);
1710
1711 fz_try(ctx)
1712 {
1713 pdf_filter_page_contents(ctx, doc, page, &options);
1714 for (annot = pdf_first_annot(ctx, page); annot != NULL; annot = pdf_next_annot(ctx, annot))
1715 {
1716 pdf_filter_annot_contents(ctx, doc, annot, &options);
1717 }
1718 }
1719 fz_always(ctx)
1720 fz_drop_page(ctx, &page->super);
1721 fz_catch(ctx)
1722 fz_rethrow(ctx);
1723 }
1724 }
1725
1726 /* Initialise the pdf_write_state, used dynamically during the write, from the static
1727 * pdf_write_options, passed into pdf_save_document */
1728 static void initialise_write_state(fz_context *ctx, pdf_document *doc, const pdf_write_options *in_opts, pdf_write_state *opts)
1729 {
1730 int xref_len = pdf_xref_len(ctx, doc);
1731
1732 opts->do_incremental = in_opts->do_incremental;
1733 opts->do_ascii = in_opts->do_ascii;
1734 opts->do_tight = !in_opts->do_pretty;
1735 opts->do_expand = in_opts->do_decompress;
1736 opts->do_compress = in_opts->do_compress;
1737 opts->do_compress_images = in_opts->do_compress_images;
1738 opts->do_compress_fonts = in_opts->do_compress_fonts;
1739 opts->do_snapshot = in_opts->do_snapshot;
1740 opts->compression_effort = in_opts->compression_effort;
1741 if (opts->compression_effort < 0)
1742 opts->compression_effort = 0;
1743 else if (opts->compression_effort > 100)
1744 opts->compression_effort = 100;
1745
1746 opts->do_garbage = in_opts->do_garbage;
1747 opts->do_clean = in_opts->do_clean;
1748 opts->do_encrypt = in_opts->do_encrypt;
1749 opts->dont_regenerate_id = in_opts->dont_regenerate_id;
1750 opts->do_preserve_metadata = in_opts->do_preserve_metadata;
1751 opts->do_use_objstms = in_opts->do_use_objstms;
1752
1753 opts->permissions = in_opts->permissions;
1754 memcpy(opts->opwd_utf8, in_opts->opwd_utf8, nelem(opts->opwd_utf8));
1755 memcpy(opts->upwd_utf8, in_opts->upwd_utf8, nelem(opts->upwd_utf8));
1756
1757 /* We deliberately make these arrays long enough to cope with
1758 * 1 to n access rather than 0..n-1, and add space for 2 new
1759 * extra entries that may be required for linearization. */
1760 opts->list_len = 0;
1761 opts->use_list = NULL;
1762 opts->ofs_list = NULL;
1763 opts->gen_list = NULL;
1764 opts->renumber_map = NULL;
1765
1766 expand_lists(ctx, opts, xref_len);
1767 }
1768
1769 /* Free the resources held by the dynamic write options */
1770 static void finalise_write_state(fz_context *ctx, pdf_write_state *opts)
1771 {
1772 fz_free(ctx, opts->use_list);
1773 fz_free(ctx, opts->ofs_list);
1774 fz_free(ctx, opts->gen_list);
1775 fz_free(ctx, opts->renumber_map);
1776 pdf_drop_object_labels(ctx, opts->labels);
1777 }
1778
1779 const pdf_write_options pdf_default_write_options = {
1780 0, /* do_incremental */
1781 0, /* do_pretty */
1782 0, /* do_ascii */
1783 0, /* do_compress */
1784 0, /* do_compress_images */
1785 0, /* do_compress_fonts */
1786 0, /* do_decompress */
1787 0, /* do_garbage */
1788 0, /* do_linear */
1789 0, /* do_clean */
1790 0, /* do_sanitize */
1791 0, /* do_appearance */
1792 0, /* do_encrypt */
1793 0, /* dont_regenerate_id */
1794 ~0, /* permissions */
1795 "", /* opwd_utf8[128] */
1796 "", /* upwd_utf8[128] */
1797 0 /* do_snapshot */
1798 };
1799
1800 static const pdf_write_options pdf_snapshot_write_options = {
1801 1, /* do_incremental */
1802 0, /* do_pretty */
1803 0, /* do_ascii */
1804 0, /* do_compress */
1805 0, /* do_compress_images */
1806 0, /* do_compress_fonts */
1807 0, /* do_decompress */
1808 0, /* do_garbage */
1809 0, /* do_linear */
1810 0, /* do_clean */
1811 0, /* do_sanitize */
1812 0, /* do_appearance */
1813 0, /* do_encrypt */
1814 1, /* dont_regenerate_id */
1815 ~0, /* permissions */
1816 "", /* opwd_utf8[128] */
1817 "", /* upwd_utf8[128] */
1818 1 /* do_snapshot */
1819 };
1820
1821 const char *fz_pdf_write_options_usage =
1822 "PDF output options:\n"
1823 "\tdecompress: decompress all streams (except compress-fonts/images)\n"
1824 "\tcompress=yes|flate|brotli: compress all streams, yes defaults to flate\n"
1825 "\tcompress-fonts: compress embedded fonts\n"
1826 "\tcompress-images: compress images\n"
1827 "\tcompress-effort=0|percentage: effort spent compressing, 0 is default, 100 is max effort\n"
1828 "\tascii: ASCII hex encode binary streams\n"
1829 "\tpretty: pretty-print objects with indentation\n"
1830 "\tlabels: print object labels\n"
1831 "\tlinearize: optimize for web browsers (no longer supported!)\n"
1832 "\tclean: pretty-print graphics commands in content streams\n"
1833 "\tsanitize: sanitize graphics commands in content streams\n"
1834 "\tgarbage: garbage collect unused objects\n"
1835 "\tor garbage=compact: ... and compact cross reference table\n"
1836 "\tor garbage=deduplicate: ... and remove duplicate objects\n"
1837 "\tincremental: write changes as incremental update\n"
1838 "\tobjstms: use object streams and cross reference streams\n"
1839 "\tappearance=yes|all: synthesize just missing, or all, annotation/widget apperance streams\n"
1840 "\tcontinue-on-error: continue saving the document even if there is an error\n"
1841 "\tdecrypt: write unencrypted document\n"
1842 "\tencrypt=rc4-40|rc4-128|aes-128|aes-256: write encrypted document\n"
1843 "\tpermissions=NUMBER: document permissions to grant when encrypting\n"
1844 "\tuser-password=PASSWORD: password required to read document\n"
1845 "\towner-password=PASSWORD: password required to edit document\n"
1846 "\tregenerate-id: (default yes) regenerate document id\n"
1847 "\n";
1848
1849 pdf_write_options *
1850 pdf_parse_write_options(fz_context *ctx, pdf_write_options *opts, const char *args)
1851 {
1852 const char *val;
1853
1854 memset(opts, 0, sizeof *opts);
1855
1856 if (fz_has_option(ctx, args, "decompress", &val))
1857 opts->do_decompress = fz_option_eq(val, "yes");
1858 if (fz_has_option(ctx, args, "compress", &val))
1859 {
1860 if (fz_option_eq(val, "brotli"))
1861 opts->do_compress = 2;
1862 else if (fz_option_eq(val, "flate"))
1863 opts->do_compress = 1;
1864 else
1865 opts->do_compress = fz_option_eq(val, "yes");
1866 }
1867 if (fz_has_option(ctx, args, "compress-fonts", &val))
1868 opts->do_compress_fonts = fz_option_eq(val, "yes");
1869 if (fz_has_option(ctx, args, "compress-images", &val))
1870 opts->do_compress_images = fz_option_eq(val, "yes");
1871 if (fz_has_option(ctx, args, "compression-effort", &val))
1872 opts->compression_effort = fz_atoi(val);
1873 if (fz_has_option(ctx, args, "labels", &val))
1874 opts->do_labels = fz_option_eq(val, "yes");
1875 if (fz_has_option(ctx, args, "ascii", &val))
1876 opts->do_ascii = fz_option_eq(val, "yes");
1877 if (fz_has_option(ctx, args, "pretty", &val))
1878 opts->do_pretty = fz_option_eq(val, "yes");
1879 if (fz_has_option(ctx, args, "linearize", &val))
1880 opts->do_linear = fz_option_eq(val, "yes");
1881 if (fz_has_option(ctx, args, "clean", &val))
1882 opts->do_clean = fz_option_eq(val, "yes");
1883 if (fz_has_option(ctx, args, "sanitize", &val))
1884 opts->do_sanitize = fz_option_eq(val, "yes");
1885 if (fz_has_option(ctx, args, "incremental", &val))
1886 opts->do_incremental = fz_option_eq(val, "yes");
1887 if (fz_has_option(ctx, args, "objstms", &val))
1888 opts->do_use_objstms = fz_option_eq(val, "yes");
1889 if (fz_has_option(ctx, args, "regenerate-id", &val))
1890 opts->dont_regenerate_id = fz_option_eq(val, "no");
1891 if (fz_has_option(ctx, args, "decrypt", &val))
1892 opts->do_encrypt = fz_option_eq(val, "yes") ? PDF_ENCRYPT_NONE : PDF_ENCRYPT_KEEP;
1893 if (fz_has_option(ctx, args, "encrypt", &val))
1894 {
1895 if (fz_option_eq(val, "none") || fz_option_eq(val, "no"))
1896 opts->do_encrypt = PDF_ENCRYPT_NONE;
1897 else if (fz_option_eq(val, "keep"))
1898 opts->do_encrypt = PDF_ENCRYPT_KEEP;
1899 else if (fz_option_eq(val, "rc4-40") || fz_option_eq(val, "yes"))
1900 opts->do_encrypt = PDF_ENCRYPT_RC4_40;
1901 else if (fz_option_eq(val, "rc4-128"))
1902 opts->do_encrypt = PDF_ENCRYPT_RC4_128;
1903 else if (fz_option_eq(val, "aes-128"))
1904 opts->do_encrypt = PDF_ENCRYPT_AES_128;
1905 else if (fz_option_eq(val, "aes-256"))
1906 opts->do_encrypt = PDF_ENCRYPT_AES_256;
1907 else
1908 fz_throw(ctx, FZ_ERROR_ARGUMENT, "unknown encryption in options");
1909 }
1910 if (fz_has_option(ctx, args, "owner-password", &val))
1911 fz_copy_option(ctx, val, opts->opwd_utf8, nelem(opts->opwd_utf8));
1912 if (fz_has_option(ctx, args, "user-password", &val))
1913 fz_copy_option(ctx, val, opts->upwd_utf8, nelem(opts->upwd_utf8));
1914 if (fz_has_option(ctx, args, "permissions", &val))
1915 opts->permissions = fz_atoi(val);
1916 else
1917 opts->permissions = ~0;
1918 if (fz_has_option(ctx, args, "garbage", &val))
1919 {
1920 if (fz_option_eq(val, "yes"))
1921 opts->do_garbage = 1;
1922 else if (fz_option_eq(val, "compact"))
1923 opts->do_garbage = 2;
1924 else if (fz_option_eq(val, "deduplicate"))
1925 opts->do_garbage = 3;
1926 else
1927 opts->do_garbage = fz_atoi(val);
1928 }
1929 if (fz_has_option(ctx, args, "appearance", &val))
1930 {
1931 if (fz_option_eq(val, "yes"))
1932 opts->do_appearance = 1;
1933 else if (fz_option_eq(val, "all"))
1934 opts->do_appearance = 2;
1935 }
1936
1937 return opts;
1938 }
1939
1940 int pdf_can_be_saved_incrementally(fz_context *ctx, pdf_document *doc)
1941 {
1942 if (doc->repair_attempted)
1943 return 0;
1944 if (doc->redacted)
1945 return 0;
1946 return 1;
1947 }
1948
1949 static void
1950 prepare_for_save(fz_context *ctx, pdf_document *doc, const pdf_write_options *in_opts)
1951 {
1952 /* Rewrite (and possibly sanitize) the operator streams */
1953 if (in_opts->do_clean || in_opts->do_sanitize)
1954 {
1955 pdf_begin_operation(ctx, doc, "Clean content streams");
1956 fz_try(ctx)
1957 {
1958 clean_content_streams(ctx, doc, in_opts->do_sanitize, in_opts->do_ascii, in_opts->do_pretty);
1959 pdf_end_operation(ctx, doc);
1960 }
1961 fz_catch(ctx)
1962 {
1963 pdf_abandon_operation(ctx, doc);
1964 fz_rethrow(ctx);
1965 }
1966 }
1967
1968 /* When saving a PDF with signatures the file will
1969 first be written once, then the file will have its
1970 digests and byte ranges calculated and and then the
1971 signature dictionary containing them will be updated
1972 both in memory and in the saved file. By setting this
1973 flag we avoid a new xref section from being created when
1974 the signature dictionary is updated. */
1975 doc->save_in_progress = 1;
1976
1977 if (!in_opts->do_snapshot)
1978 presize_unsaved_signature_byteranges(ctx, doc);
1979 }
1980
1981 static pdf_obj *
1982 new_identity(fz_context *ctx, pdf_document *doc)
1983 {
1984 unsigned char rnd[32];
1985 pdf_obj *id;
1986
1987 fz_memrnd(ctx, rnd, nelem(rnd));
1988
1989 id = pdf_dict_put_array(ctx, pdf_trailer(ctx, doc), PDF_NAME(ID), 2);
1990 pdf_array_push_string(ctx, id, (char *) rnd + 0, nelem(rnd) / 2);
1991 pdf_array_push_string(ctx, id, (char *) rnd + 16, nelem(rnd) / 2);
1992
1993 return id;
1994 }
1995
1996 static void
1997 change_identity(fz_context *ctx, pdf_document *doc, pdf_obj *id)
1998 {
1999 unsigned char rnd[16];
2000 if (pdf_array_len(ctx, id) >= 2)
2001 {
2002 /* Update second half of ID array with new random data. */
2003 fz_memrnd(ctx, rnd, 16);
2004 pdf_array_put_string(ctx, id, 1, (char *)rnd, 16);
2005 }
2006 }
2007
2008 static void
2009 create_encryption_dictionary(fz_context *ctx, pdf_document *doc, pdf_crypt *crypt)
2010 {
2011 unsigned char *o, *u;
2012 pdf_obj *encrypt;
2013 int r;
2014
2015 r = pdf_crypt_revision(ctx, crypt);
2016
2017 encrypt = pdf_dict_put_dict(ctx, pdf_trailer(ctx, doc), PDF_NAME(Encrypt), 10);
2018
2019 pdf_dict_put_name(ctx, encrypt, PDF_NAME(Filter), "Standard");
2020 pdf_dict_put_int(ctx, encrypt, PDF_NAME(R), r);
2021 pdf_dict_put_int(ctx, encrypt, PDF_NAME(V), pdf_crypt_version(ctx, crypt));
2022 pdf_dict_put_int(ctx, encrypt, PDF_NAME(Length), pdf_crypt_length(ctx, crypt));
2023 pdf_dict_put_int(ctx, encrypt, PDF_NAME(P), pdf_crypt_permissions(ctx, crypt));
2024 pdf_dict_put_bool(ctx, encrypt, PDF_NAME(EncryptMetadata), pdf_crypt_encrypt_metadata(ctx, crypt));
2025
2026 o = pdf_crypt_owner_password(ctx, crypt);
2027 u = pdf_crypt_user_password(ctx, crypt);
2028
2029 if (r < 4)
2030 {
2031 pdf_dict_put_string(ctx, encrypt, PDF_NAME(O), (char *) o, 32);
2032 pdf_dict_put_string(ctx, encrypt, PDF_NAME(U), (char *) u, 32);
2033 }
2034 else if (r == 4)
2035 {
2036 pdf_obj *cf;
2037
2038 pdf_dict_put_name(ctx, encrypt, PDF_NAME(StmF), "StdCF");
2039 pdf_dict_put_name(ctx, encrypt, PDF_NAME(StrF), "StdCF");
2040
2041 cf = pdf_dict_put_dict(ctx, encrypt, PDF_NAME(CF), 1);
2042 cf = pdf_dict_put_dict(ctx, cf, PDF_NAME(StdCF), 3);
2043 pdf_dict_put_name(ctx, cf, PDF_NAME(AuthEvent), "DocOpen");
2044 pdf_dict_put_name(ctx, cf, PDF_NAME(CFM), "AESV2");
2045 pdf_dict_put_int(ctx, cf, PDF_NAME(Length), 16);
2046 pdf_dict_put_string(ctx, encrypt, PDF_NAME(O), (char *) o, 32);
2047 pdf_dict_put_string(ctx, encrypt, PDF_NAME(U), (char *) u, 32);
2048 }
2049 else if (r == 6)
2050 {
2051 unsigned char *oe = pdf_crypt_owner_encryption(ctx, crypt);
2052 unsigned char *ue = pdf_crypt_user_encryption(ctx, crypt);
2053 pdf_obj *cf;
2054
2055 pdf_dict_put_name(ctx, encrypt, PDF_NAME(StmF), "StdCF");
2056 pdf_dict_put_name(ctx, encrypt, PDF_NAME(StrF), "StdCF");
2057
2058 cf = pdf_dict_put_dict(ctx, encrypt, PDF_NAME(CF), 1);
2059 cf = pdf_dict_put_dict(ctx, cf, PDF_NAME(StdCF), 3);
2060 pdf_dict_put_name(ctx, cf, PDF_NAME(AuthEvent), "DocOpen");
2061 pdf_dict_put_name(ctx, cf, PDF_NAME(CFM), "AESV3");
2062 pdf_dict_put_int(ctx, cf, PDF_NAME(Length), 32);
2063 pdf_dict_put_string(ctx, encrypt, PDF_NAME(O), (char *) o, 48);
2064 pdf_dict_put_string(ctx, encrypt, PDF_NAME(U), (char *) u, 48);
2065 pdf_dict_put_string(ctx, encrypt, PDF_NAME(OE), (char *) oe, 32);
2066 pdf_dict_put_string(ctx, encrypt, PDF_NAME(UE), (char *) ue, 32);
2067 pdf_dict_put_string(ctx, encrypt, PDF_NAME(Perms), (char *) pdf_crypt_permissions_encryption(ctx, crypt), 16);
2068 }
2069 }
2070
2071 static void
2072 ensure_initial_incremental_contents(fz_context *ctx, fz_stream *in, fz_output *out, int64_t len)
2073 {
2074 fz_stream *verify;
2075 unsigned char buf0[4096];
2076 unsigned char buf1[4096];
2077 size_t n0, n1;
2078 int64_t off = 0;
2079 int same;
2080
2081 if (!in)
2082 fz_throw(ctx, FZ_ERROR_ARGUMENT, "no input file for incremental write");
2083
2084 verify = fz_stream_from_output(ctx, out);
2085
2086 fz_try(ctx)
2087 {
2088 /* Compare current contents of output file (in case we append) */
2089 if (verify)
2090 {
2091 do
2092 {
2093 int64_t read = sizeof(buf0);
2094 if (off + read > len)
2095 read = len - off;
2096 fz_seek(ctx, in, off, SEEK_SET);
2097 n0 = fz_read(ctx, in, buf0, read);
2098 fz_seek(ctx, verify, off, SEEK_SET);
2099 n1 = fz_read(ctx, verify, buf1, read);
2100 same = (n0 == n1 && !memcmp(buf0, buf1, n0));
2101 off += (int64_t)n0;
2102 }
2103 while (same && n0 > 0 && off < len);
2104
2105 if (same)
2106 {
2107 fz_seek_output(ctx, out, len, SEEK_SET);
2108 fz_truncate_output(ctx, out);
2109 break; /* return from try */
2110 }
2111
2112 fz_seek_output(ctx, out, 0, SEEK_SET);
2113 }
2114
2115 /* Copy old contents into new file */
2116 fz_seek(ctx, in, 0, SEEK_SET);
2117 off = 0;
2118 do
2119 {
2120 int64_t read = sizeof(buf0);
2121 if (off + read > len)
2122 read = len - off;
2123 n0 = fz_read(ctx, in, buf0, read);
2124 if (n0)
2125 fz_write_data(ctx, out, buf0, n0);
2126 off += n0;
2127 }
2128 while (n0 > 0 && off < len);
2129
2130 if (verify)
2131 {
2132 fz_truncate_output(ctx, out);
2133 fz_seek_output(ctx, out, 0, SEEK_END);
2134 }
2135 }
2136 fz_always(ctx)
2137 fz_drop_stream(ctx, verify);
2138 fz_catch(ctx)
2139 fz_rethrow(ctx);
2140 }
2141
2142 #define OBJSTM_MAXOBJS 256
2143 #define OBJSTM_MAXLEN 1<<24
2144
2145 typedef struct
2146 {
2147 pdf_write_state *opts;
2148 int n;
2149 int objnum[OBJSTM_MAXOBJS];
2150 size_t len[OBJSTM_MAXOBJS];
2151 fz_buffer *content_buf;
2152 fz_output *content_out;
2153 int root_num;
2154 int info_num;
2155 int sep;
2156 } objstm_gather_data;
2157
2158 static void
2159 flush_gathered(fz_context *ctx, pdf_document *doc, objstm_gather_data *data)
2160 {
2161 pdf_obj *obj;
2162 pdf_obj *ref = NULL;
2163 fz_buffer *newbuf = NULL;
2164 fz_output *out = NULL;
2165 int i;
2166
2167 if (data->n == 0)
2168 return;
2169
2170 obj = pdf_new_dict(ctx, doc, 4);
2171
2172 fz_var(ref);
2173 fz_var(newbuf);
2174 fz_var(out);
2175
2176 fz_try(ctx)
2177 {
2178 size_t pos = 0, first;
2179 int num;
2180 newbuf = fz_new_buffer(ctx, 128);
2181
2182 out = fz_new_output_with_buffer(ctx, newbuf);
2183
2184 for (i = 0; i < data->n; i++)
2185 {
2186 fz_write_printf(ctx, out, "%d %d ", data->objnum[i], pos);
2187 pos += data->len[i];
2188 }
2189
2190 fz_close_output(ctx, out);
2191 first = fz_tell_output(ctx, out);
2192 fz_drop_output(ctx, out);
2193 out = NULL;
2194
2195 pdf_dict_put_int(ctx, obj, PDF_NAME(First), first);
2196 pdf_dict_put_int(ctx, obj, PDF_NAME(N), data->n);
2197 pdf_dict_put(ctx, obj, PDF_NAME(Type), PDF_NAME(ObjStm));
2198
2199 fz_close_output(ctx, data->content_out);
2200 fz_append_buffer(ctx, newbuf, data->content_buf);
2201
2202 doc->xref_base = 0; /* Might have been reset by our caller */
2203 ref = pdf_add_object(ctx, doc, obj);
2204 pdf_update_stream(ctx, doc, ref, newbuf, 0);
2205
2206 num = pdf_to_num(ctx, ref);
2207 expand_lists(ctx, data->opts, num);
2208 data->opts->use_list[num] = 1;
2209
2210 /* Update all the xref entries for the objects to point into this stream. */
2211 for (i = 0; i < data->n; i++)
2212 {
2213 pdf_xref_entry *x = pdf_get_xref_entry_no_null(ctx, doc, data->objnum[i]);
2214 x->ofs = num; /* ofs = which objstm is this in */
2215 x->gen = i; /* gen = nth entry in the objstm */
2216 data->opts->ofs_list[data->objnum[i]] = i;
2217 data->opts->gen_list[data->objnum[i]] = i;
2218 }
2219
2220 data->n = 0;
2221 data->sep = 0;
2222 }
2223 fz_always(ctx)
2224 {
2225 fz_drop_output(ctx, data->content_out);
2226 data->content_out = NULL;
2227 fz_drop_buffer(ctx, data->content_buf);
2228 data->content_buf = NULL;
2229 pdf_drop_obj(ctx, obj);
2230 pdf_drop_obj(ctx, ref);
2231 fz_drop_buffer(ctx, newbuf);
2232 fz_drop_output(ctx, out);
2233 }
2234 fz_catch(ctx)
2235 fz_rethrow(ctx);
2236 }
2237
2238 static void
2239 objstm_gather(fz_context *ctx, pdf_xref_entry *x, int i, pdf_document *doc, objstm_gather_data *data)
2240 {
2241 size_t olen, len;
2242
2243 if (i == data->root_num || i == data->info_num)
2244 return;
2245
2246 /* Ensure the object is loaded! */
2247 if (i == 0)
2248 return; /* pdf_cache_object does not like being called for i == 0 which should be free. */
2249 pdf_cache_object(ctx, doc, i);
2250
2251 /* Both normal objects and stream objects can get put into objstms (because we've already
2252 * unpacked stream objects from objstms earlier!) Stream objects that are non-incremental
2253 * will be left as they are by the later check. */
2254 if ((x->type != 'n' && x->type != 'o') || x->stm_buf != NULL || x->stm_ofs != 0 || x->gen != 0)
2255 return; /* Objects with generation number != 0 cannot be put in objstms */
2256 if (i == data->opts->crypt_object_number)
2257 return; /* Encryption dictionaries can also not be put in objstms */
2258
2259 /* If we are writing incrementally, then only the last one can be gathered. */
2260 if (data->opts->do_incremental && !pdf_obj_is_incremental(ctx, x->obj))
2261 return;
2262
2263 /* FIXME: Can we do a pass through to check for such objects more exactly? */
2264 if (pdf_is_int(ctx, x->obj))
2265 return; /* In case it's a Length value. */
2266 if (pdf_is_indirect(ctx, x->obj))
2267 return; /* Bare indirect references are not allowed. */
2268
2269 if (data->content_buf == NULL)
2270 data->content_buf = fz_new_buffer(ctx, 128);
2271 if (data->content_out == NULL)
2272 data->content_out = fz_new_output_with_buffer(ctx, data->content_buf);
2273
2274 olen = data->content_buf->len;
2275 pdf_print_encrypted_obj(ctx, data->content_out, x->obj, 1, 0, NULL, 0, 0, NULL);
2276 data->objnum[data->n] = i;
2277 len = data->content_buf->len;
2278 data->len[data->n] = len - olen;
2279 x->type = 'o';
2280 x->gen = data->n;
2281 data->n++;
2282 if (data->n == OBJSTM_MAXOBJS || len > OBJSTM_MAXLEN)
2283 flush_gathered(ctx, doc, data);
2284 }
2285
2286 static void
2287 gather_to_objstms(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, int xref_len)
2288 {
2289 int count, num;
2290 objstm_gather_data data = { 0 };
2291
2292 data.opts = opts;
2293 data.root_num = pdf_to_num(ctx, pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root)));
2294 data.info_num = pdf_to_num(ctx, pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Info)));
2295
2296 count = pdf_xref_len(ctx, doc);
2297 for (num = 1; num < count; ++num)
2298 {
2299 pdf_xref_entry *x = pdf_get_xref_entry_no_change(ctx, doc, num);
2300 if (x)
2301 objstm_gather(ctx, x, num, doc, &data);
2302 }
2303
2304 flush_gathered(ctx, doc, &data);
2305 }
2306
2307 static void
2308 unpack_objstm_objs(fz_context *ctx, pdf_document *doc, int xref_len)
2309 {
2310 int num;
2311
2312 /* At this point, all our objects are cached already. Let's change
2313 * all the 'o' objects to be 'n' and get rid of the ObjStm objects
2314 * they all came from. */
2315 for (num = 1; num < xref_len; ++num)
2316 {
2317 pdf_xref_entry *x = pdf_get_xref_entry_no_change(ctx, doc, num);
2318 if (!x || x->type != 'o')
2319 continue;
2320
2321 /* Change the type of the object to 'n'. */
2322 x->type = 'n';
2323 /* This leaves x->ofs etc wrong, but that's OK as the object is
2324 * in memory, and we'll fix it up after the write. */
2325
2326 /* We no longer need the ObjStm that this object came from. */
2327 if (x->ofs != 0)
2328 {
2329 pdf_xref_entry *y = pdf_get_xref_entry_no_change(ctx, doc, x->ofs);
2330 /* The xref entry y for the objstm containing the object identified by
2331 xref entry x above must exist, otherwise that object would not be labelled
2332 'o' in the xref. */
2333 assert(y != NULL);
2334 y->type = 'f';
2335 }
2336 }
2337 }
2338
2339 static void
2340 prepass(fz_context *ctx, pdf_document *doc)
2341 {
2342 int num;
2343
2344 for (num = 1; num < pdf_xref_len(ctx, doc); ++num)
2345 {
2346 if (pdf_object_exists(ctx, doc, num))
2347 {
2348 fz_try(ctx)
2349 pdf_cache_object(ctx, doc, num);
2350 fz_catch(ctx)
2351 fz_report_error(ctx);
2352 }
2353 }
2354 }
2355
2356 static void
2357 pdf_ensure_pages_are_pages(fz_context *ctx, pdf_document *doc)
2358 {
2359 int i;
2360
2361 if (!doc->fwd_page_map)
2362 return;
2363
2364 for (i = 0; i < doc->map_page_count; i++)
2365 {
2366 pdf_obj *type = pdf_dict_get(ctx, doc->fwd_page_map[i], PDF_NAME(Type));
2367 if (type == NULL)
2368 pdf_dict_put(ctx, doc->fwd_page_map[i], PDF_NAME(Type), PDF_NAME(Page));
2369 }
2370 }
2371
2372 static void
2373 do_pdf_save_document(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, const pdf_write_options *in_opts)
2374 {
2375 int lastfree;
2376 int num;
2377 int xref_len;
2378 pdf_obj *id1, *id = NULL;
2379 int changed;
2380 int64_t current_offset;
2381
2382 if (in_opts->do_incremental)
2383 {
2384 ensure_initial_incremental_contents(ctx, doc->file, opts->out, doc->file_size);
2385
2386 /* If no changes, nothing more to write */
2387 if (!pdf_has_unsaved_changes(ctx, doc))
2388 {
2389 doc->save_in_progress = 0;
2390 return;
2391 }
2392
2393 fz_write_string(ctx, opts->out, "\n");
2394 }
2395
2396 pdf_begin_operation(ctx, doc, "Save document");
2397 fz_try(ctx)
2398 {
2399 /* First, we do a prepass across the document to load all the objects
2400 * into memory. We'll end up doing this later on anyway, but by doing
2401 * it here, we force any repairs to happen before writing proper
2402 * starts. */
2403 prepass(ctx, doc);
2404 xref_len = pdf_xref_len(ctx, doc);
2405
2406 initialise_write_state(ctx, doc, in_opts, opts);
2407
2408 if (in_opts->do_labels)
2409 opts->labels = pdf_load_object_labels(ctx, doc);
2410
2411 if (!opts->dont_regenerate_id)
2412 {
2413 /* Update second half of ID array if it exists. */
2414 id = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(ID));
2415 if (id)
2416 change_identity(ctx, doc, id);
2417 }
2418
2419 /* Remove encryption dictionary if saving without encryption. */
2420 if (opts->do_encrypt == PDF_ENCRYPT_NONE)
2421 {
2422 assert(!in_opts->do_snapshot);
2423 pdf_dict_del(ctx, pdf_trailer(ctx, doc), PDF_NAME(Encrypt));
2424 }
2425
2426 /* Keep encryption dictionary if saving with old encryption. */
2427 else if (opts->do_encrypt == PDF_ENCRYPT_KEEP)
2428 {
2429 opts->crypt = doc->crypt;
2430 }
2431
2432 /* Create encryption dictionary if saving with new encryption. */
2433 else
2434 {
2435 assert(!opts->do_snapshot);
2436 if (!id)
2437 id = new_identity(ctx, doc);
2438 id1 = pdf_array_get(ctx, id, 0);
2439 opts->crypt = pdf_new_encrypt(ctx, opts->opwd_utf8, opts->upwd_utf8, id1, opts->permissions, opts->do_encrypt);
2440 create_encryption_dictionary(ctx, doc, opts->crypt);
2441 }
2442
2443 /* Stash Encrypt entry in the writer state, in case a repair pass throws away the old trailer. */
2444 opts->crypt_obj = pdf_keep_obj(ctx, pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Encrypt)));
2445
2446 /* If we're writing a snapshot, we can't be doing garbage
2447 * collection, or linearisation, and must be writing
2448 * incrementally. */
2449 assert(!opts->do_snapshot || opts->do_garbage == 0);
2450
2451 /* Make sure any objects hidden in compressed streams have been loaded */
2452 if (!opts->do_incremental)
2453 {
2454 pdf_ensure_solid_xref(ctx, doc, xref_len);
2455 preloadobjstms(ctx, doc);
2456 }
2457
2458 /* If we're using objstms, then the version must be at least 1.5 */
2459 if (opts->do_use_objstms && pdf_version(ctx, doc) < 15)
2460 {
2461 pdf_obj *root = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root));
2462 pdf_obj *version = pdf_dict_get(ctx, root, PDF_NAME(Version));
2463 doc->version = 15;
2464 if (opts->do_incremental || version != NULL)
2465 {
2466 pdf_dict_put(ctx, root, PDF_NAME(Version), PDF_NAME(1_5));
2467 }
2468 }
2469
2470 if (opts->do_preserve_metadata)
2471 opts->metadata = pdf_keep_obj(ctx, pdf_metadata(ctx, doc));
2472
2473 xref_len = pdf_xref_len(ctx, doc); /* May have changed due to repair */
2474 expand_lists(ctx, opts, xref_len);
2475
2476 if (opts->do_garbage >= 1)
2477 {
2478 pdf_ensure_pages_are_pages(ctx, doc);
2479 }
2480
2481 do
2482 {
2483 changed = 0;
2484 /* Sweep & mark objects from the trailer */
2485 if (opts->do_garbage >= 1)
2486 {
2487 /* Start by removing indirect /Length attributes on streams */
2488 for (num = 0; num < xref_len; num++)
2489 bake_stream_length(ctx, doc, num);
2490
2491 (void)markobj(ctx, doc, opts, pdf_trailer(ctx, doc));
2492 }
2493 else
2494 {
2495 for (num = 0; num < xref_len; num++)
2496 opts->use_list[num] = 1;
2497 }
2498
2499 /* Coalesce and renumber duplicate objects */
2500 if (opts->do_garbage >= 3)
2501 changed = removeduplicateobjs(ctx, doc, opts);
2502
2503 /* Compact xref by renumbering and removing unused objects */
2504 if (opts->do_garbage >= 2)
2505 compactxref(ctx, doc, opts);
2506
2507 /* Make renumbering affect all indirect references and update xref */
2508 if (opts->do_garbage >= 2)
2509 renumberobjs(ctx, doc, opts);
2510 }
2511 while (changed);
2512
2513 opts->crypt_object_number = 0;
2514 if (opts->crypt)
2515 {
2516 pdf_obj *crypt = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Encrypt));
2517 opts->crypt_object_number = pdf_to_num(ctx, crypt);
2518 }
2519
2520 xref_len = pdf_xref_len(ctx, doc); /* May have changed due to repair */
2521 expand_lists(ctx, opts, xref_len);
2522
2523 /* If we're about to do a non-incremental write, we can't
2524 * afford to leave any objects in ObjStms. We might have
2525 * changed the objects, and we won't know to update the
2526 * stream. So pull all the objects into memory. */
2527 if (!opts->do_incremental)
2528 unpack_objstm_objs(ctx, doc, xref_len);
2529
2530 if (opts->do_use_objstms)
2531 gather_to_objstms(ctx, doc, opts, xref_len);
2532
2533 xref_len = pdf_xref_len(ctx, doc); /* May have changed due to the gather */
2534 expand_lists(ctx, opts, xref_len);
2535
2536 /* Truncate the xref after compacting and renumbering */
2537 if ((opts->do_garbage >= 2) &&
2538 !opts->do_incremental)
2539 {
2540 while (xref_len > 0 && !opts->use_list[xref_len-1])
2541 xref_len--;
2542 }
2543
2544 if (opts->do_incremental)
2545 {
2546 int i;
2547
2548 doc->disallow_new_increments = 1;
2549
2550 for (i = 0; i < doc->num_incremental_sections; i++)
2551 {
2552 doc->xref_base = doc->num_incremental_sections - i - 1;
2553 xref_len = pdf_xref_len(ctx, doc);
2554
2555 writeobjects(ctx, doc, opts);
2556
2557 #ifdef DEBUG_WRITING
2558 dump_object_details(ctx, doc, opts);
2559 #endif
2560
2561 for (num = 0; num < xref_len; num++)
2562 {
2563 if (!opts->use_list[num] && pdf_xref_is_incremental(ctx, doc, num))
2564 {
2565 /* Make unreusable. FIXME: would be better to link to existing free list */
2566 opts->gen_list[num] = 65535;
2567 opts->ofs_list[num] = 0;
2568 }
2569 }
2570
2571 current_offset = fz_tell_output(ctx, opts->out);
2572 if (!doc->last_xref_was_old_style || opts->do_use_objstms)
2573 writexrefstream(ctx, doc, opts, 0, xref_len, 1, current_offset);
2574 else
2575 writexref(ctx, doc, opts, 0, xref_len, 1, current_offset);
2576
2577 doc->xref_sections[doc->xref_base].end_ofs = fz_tell_output(ctx, opts->out);
2578 }
2579
2580 doc->xref_base = 0;
2581 doc->disallow_new_increments = 0;
2582 }
2583 else
2584 {
2585 writeobjects(ctx, doc, opts);
2586
2587 #ifdef DEBUG_WRITING
2588 dump_object_details(ctx, doc, opts);
2589 #endif
2590
2591 /* Construct linked list of free object slots */
2592 lastfree = 0;
2593 for (num = 0; num < xref_len; num++)
2594 {
2595 if (!opts->use_list[num])
2596 {
2597 opts->gen_list[num]++;
2598 opts->ofs_list[lastfree] = num;
2599 lastfree = num;
2600 }
2601 }
2602 opts->gen_list[0] = 0xffff;
2603
2604 current_offset = fz_tell_output(ctx, opts->out);
2605 if (opts->do_use_objstms)
2606 writexrefstream(ctx, doc, opts, 0, xref_len, 1, current_offset);
2607 else
2608 writexref(ctx, doc, opts, 0, xref_len, 1, current_offset);
2609
2610 doc->xref_sections[0].end_ofs = fz_tell_output(ctx, opts->out);
2611 }
2612
2613 if (!in_opts->do_snapshot)
2614 {
2615 complete_signatures(ctx, doc, opts);
2616 }
2617
2618 pdf_sync_open_pages(ctx, doc);
2619
2620 pdf_end_operation(ctx, doc);
2621 }
2622 fz_always(ctx)
2623 {
2624 finalise_write_state(ctx, opts);
2625 if (opts->crypt != doc->crypt)
2626 pdf_drop_crypt(ctx, opts->crypt);
2627 pdf_drop_obj(ctx, opts->crypt_obj);
2628 pdf_drop_obj(ctx, opts->metadata);
2629 doc->save_in_progress = 0;
2630 }
2631 fz_catch(ctx)
2632 {
2633 pdf_abandon_operation(ctx, doc);
2634 fz_rethrow(ctx);
2635 }
2636 }
2637
2638 int pdf_has_unsaved_sigs(fz_context *ctx, pdf_document *doc)
2639 {
2640 int s;
2641 for (s = 0; s < doc->num_incremental_sections; s++)
2642 {
2643 pdf_xref *xref = &doc->xref_sections[doc->num_incremental_sections - s - 1];
2644
2645 if (xref->unsaved_sigs)
2646 return 1;
2647 }
2648 return 0;
2649 }
2650
2651 void pdf_write_document(fz_context *ctx, pdf_document *doc, fz_output *out, const pdf_write_options *in_opts)
2652 {
2653 pdf_write_options opts_defaults = pdf_default_write_options;
2654 pdf_write_state opts = { 0 };
2655
2656 if (!doc || !out)
2657 return;
2658
2659 if (!in_opts)
2660 in_opts = &opts_defaults;
2661
2662 if (in_opts->do_incremental && doc->repair_attempted)
2663 fz_throw(ctx, FZ_ERROR_ARGUMENT, "Can't do incremental writes on a repaired file");
2664 if (in_opts->do_incremental && in_opts->do_garbage)
2665 fz_throw(ctx, FZ_ERROR_ARGUMENT, "Can't do incremental writes with garbage collection");
2666 if (in_opts->do_linear)
2667 fz_throw(ctx, FZ_ERROR_ARGUMENT, "Linearisation is no longer supported");
2668 if (in_opts->do_incremental && in_opts->do_encrypt != PDF_ENCRYPT_KEEP)
2669 fz_throw(ctx, FZ_ERROR_ARGUMENT, "Can't do incremental writes when changing encryption");
2670 if (in_opts->do_snapshot)
2671 {
2672 if (in_opts->do_incremental == 0 ||
2673 in_opts->do_pretty ||
2674 in_opts->do_ascii ||
2675 in_opts->do_compress ||
2676 in_opts->do_compress_images ||
2677 in_opts->do_compress_fonts ||
2678 in_opts->do_decompress ||
2679 in_opts->do_garbage ||
2680 in_opts->do_linear ||
2681 in_opts->do_clean ||
2682 in_opts->do_sanitize ||
2683 in_opts->do_appearance ||
2684 in_opts->do_encrypt != PDF_ENCRYPT_KEEP)
2685 fz_throw(ctx, FZ_ERROR_ARGUMENT, "Can't use these options when snapshotting!");
2686 }
2687 if (pdf_has_unsaved_sigs(ctx, doc) && !fz_output_supports_stream(ctx, out))
2688 fz_throw(ctx, FZ_ERROR_ARGUMENT, "Can't write pdf that has unsaved sigs to a fz_output unless it supports fz_stream_from_output!");
2689
2690 prepare_for_save(ctx, doc, in_opts);
2691
2692 opts.out = out;
2693
2694 do_pdf_save_document(ctx, doc, &opts, in_opts);
2695 }
2696
2697 void pdf_save_document(fz_context *ctx, pdf_document *doc, const char *filename, const pdf_write_options *in_opts)
2698 {
2699 pdf_write_options opts_defaults = pdf_default_write_options;
2700 pdf_write_state opts = { 0 };
2701
2702 if (!doc)
2703 return;
2704
2705 if (!in_opts)
2706 in_opts = &opts_defaults;
2707
2708 if (in_opts->do_incremental && !doc->file)
2709 fz_throw(ctx, FZ_ERROR_ARGUMENT, "Can't do incremental writes on a new document");
2710 if (in_opts->do_incremental && doc->repair_attempted)
2711 fz_throw(ctx, FZ_ERROR_ARGUMENT, "Can't do incremental writes on a repaired file");
2712 if (in_opts->do_incremental && in_opts->do_garbage)
2713 fz_throw(ctx, FZ_ERROR_ARGUMENT, "Can't do incremental writes with garbage collection");
2714 if (in_opts->do_linear)
2715 fz_throw(ctx, FZ_ERROR_ARGUMENT, "Linearisation is no longer supported");
2716 if (in_opts->do_incremental && in_opts->do_encrypt != PDF_ENCRYPT_KEEP)
2717 fz_throw(ctx, FZ_ERROR_ARGUMENT, "Can't do incremental writes when changing encryption");
2718 if (in_opts->do_snapshot)
2719 {
2720 if (in_opts->do_incremental == 0 ||
2721 in_opts->do_pretty ||
2722 in_opts->do_ascii ||
2723 in_opts->do_compress ||
2724 in_opts->do_compress_images ||
2725 in_opts->do_compress_fonts ||
2726 in_opts->do_decompress ||
2727 in_opts->do_garbage ||
2728 in_opts->do_clean ||
2729 in_opts->do_sanitize ||
2730 in_opts->do_appearance ||
2731 in_opts->do_encrypt != PDF_ENCRYPT_KEEP)
2732 fz_throw(ctx, FZ_ERROR_ARGUMENT, "Can't use these options when snapshotting!");
2733 }
2734
2735 if (in_opts->do_appearance > 0)
2736 {
2737 int i, n = pdf_count_pages(ctx, doc);
2738 for (i = 0; i < n; ++i)
2739 {
2740 pdf_page *page = pdf_load_page(ctx, doc, i);
2741 fz_try(ctx)
2742 {
2743 pdf_annot *annot;
2744 for (annot = pdf_first_annot(ctx, page); annot; annot = pdf_next_annot(ctx, annot))
2745 if (in_opts->do_appearance > 1)
2746 pdf_annot_request_resynthesis(ctx, annot);
2747 else
2748 pdf_annot_request_synthesis(ctx, annot);
2749 for (annot = pdf_first_widget(ctx, page); annot; annot = pdf_next_widget(ctx, annot))
2750 if (in_opts->do_appearance > 1)
2751 pdf_annot_request_resynthesis(ctx, annot);
2752 else
2753 pdf_annot_request_synthesis(ctx, annot);
2754 pdf_update_page(ctx, page);
2755 }
2756 fz_always(ctx)
2757 fz_drop_page(ctx, &page->super);
2758 fz_catch(ctx)
2759 fz_warn(ctx, "could not create annotation appearances");
2760 }
2761 }
2762
2763 if (in_opts->do_incremental)
2764 opts.bias = doc->bias;
2765
2766 prepare_for_save(ctx, doc, in_opts);
2767
2768 if (in_opts->do_incremental)
2769 {
2770 opts.out = fz_new_output_with_path(ctx, filename, 1);
2771 }
2772 else
2773 {
2774 opts.out = fz_new_output_with_path(ctx, filename, 0);
2775 }
2776 fz_try(ctx)
2777 {
2778 do_pdf_save_document(ctx, doc, &opts, in_opts);
2779 fz_close_output(ctx, opts.out);
2780 }
2781 fz_always(ctx)
2782 {
2783 fz_drop_output(ctx, opts.out);
2784 opts.out = NULL;
2785 }
2786 fz_catch(ctx)
2787 {
2788 fz_rethrow(ctx);
2789 }
2790 }
2791
2792 void pdf_save_snapshot(fz_context *ctx, pdf_document *doc, const char *filename)
2793 {
2794 pdf_save_document(ctx, doc, filename, &pdf_snapshot_write_options);
2795 }
2796
2797 void pdf_write_snapshot(fz_context *ctx, pdf_document *doc, fz_output *out)
2798 {
2799 pdf_write_document(ctx, doc, out, &pdf_snapshot_write_options);
2800 }
2801
2802 char *
2803 pdf_format_write_options(fz_context *ctx, char *buffer, size_t buffer_len, const pdf_write_options *opts)
2804 {
2805 #define ADD_OPT(S) do { if (!first) fz_strlcat(buffer, ",", buffer_len); fz_strlcat(buffer, (S), buffer_len); first = 0; } while (0)
2806
2807 int first = 1;
2808 *buffer = 0;
2809 if (opts->do_decompress)
2810 ADD_OPT("decompress=yes");
2811 if (opts->do_compress)
2812 ADD_OPT("compress=yes");
2813 if (opts->do_compress_fonts)
2814 ADD_OPT("compress-fonts=yes");
2815 if (opts->do_compress_images)
2816 ADD_OPT("compress-images=yes");
2817 if (opts->do_ascii)
2818 ADD_OPT("ascii=yes");
2819 if (opts->do_pretty)
2820 ADD_OPT("pretty=yes");
2821 if (opts->do_linear)
2822 ADD_OPT("linearize=yes");
2823 if (opts->do_clean)
2824 ADD_OPT("clean=yes");
2825 if (opts->do_sanitize)
2826 ADD_OPT("sanitize=yes");
2827 if (opts->do_incremental)
2828 ADD_OPT("incremental=yes");
2829 if (opts->do_encrypt == PDF_ENCRYPT_NONE)
2830 ADD_OPT("decrypt=yes");
2831 else if (opts->do_encrypt == PDF_ENCRYPT_KEEP)
2832 ADD_OPT("decrypt=no");
2833 switch(opts->do_encrypt)
2834 {
2835 default:
2836 case PDF_ENCRYPT_UNKNOWN:
2837 break;
2838 case PDF_ENCRYPT_NONE:
2839 ADD_OPT("encrypt=no");
2840 break;
2841 case PDF_ENCRYPT_KEEP:
2842 ADD_OPT("encrypt=keep");
2843 break;
2844 case PDF_ENCRYPT_RC4_40:
2845 ADD_OPT("encrypt=rc4-40");
2846 break;
2847 case PDF_ENCRYPT_RC4_128:
2848 ADD_OPT("encrypt=rc4-128");
2849 break;
2850 case PDF_ENCRYPT_AES_128:
2851 ADD_OPT("encrypt=aes-128");
2852 break;
2853 case PDF_ENCRYPT_AES_256:
2854 ADD_OPT("encrypt=aes-256");
2855 break;
2856 }
2857 if (strlen(opts->opwd_utf8)) {
2858 ADD_OPT("owner-password=");
2859 fz_strlcat(buffer, opts->opwd_utf8, buffer_len);
2860 }
2861 if (strlen(opts->upwd_utf8)) {
2862 ADD_OPT("user-password=");
2863 fz_strlcat(buffer, opts->upwd_utf8, buffer_len);
2864 }
2865 {
2866 char temp[32];
2867 ADD_OPT("permissions=");
2868 fz_snprintf(temp, sizeof(temp), "%d", opts->permissions);
2869 fz_strlcat(buffer, temp, buffer_len);
2870 }
2871 switch(opts->do_garbage)
2872 {
2873 case 0:
2874 break;
2875 case 1:
2876 ADD_OPT("garbage=yes");
2877 break;
2878 case 2:
2879 ADD_OPT("garbage=compact");
2880 break;
2881 case 3:
2882 ADD_OPT("garbage=deduplicate");
2883 break;
2884 default:
2885 {
2886 char temp[32];
2887 fz_snprintf(temp, sizeof(temp), "%d", opts->do_garbage);
2888 ADD_OPT("garbage=");
2889 fz_strlcat(buffer, temp, buffer_len);
2890 break;
2891 }
2892 }
2893 switch(opts->do_appearance)
2894 {
2895 case 1:
2896 ADD_OPT("appearance=yes");
2897 break;
2898 case 2:
2899 ADD_OPT("appearance=all");
2900 break;
2901 }
2902
2903 #undef ADD_OPT
2904
2905 return buffer;
2906 }
2907
2908 typedef struct
2909 {
2910 fz_document_writer super;
2911 pdf_document *pdf;
2912 pdf_write_options opts;
2913 fz_output *out;
2914
2915 fz_rect mediabox;
2916 pdf_obj *resources;
2917 fz_buffer *contents;
2918 } pdf_writer;
2919
2920 static fz_device *
2921 pdf_writer_begin_page(fz_context *ctx, fz_document_writer *wri_, fz_rect mediabox)
2922 {
2923 pdf_writer *wri = (pdf_writer*)wri_;
2924 wri->mediabox = mediabox; // TODO: handle non-zero x0,y0
2925 return pdf_page_write(ctx, wri->pdf, wri->mediabox, &wri->resources, &wri->contents);
2926 }
2927
2928 static void
2929 pdf_writer_end_page(fz_context *ctx, fz_document_writer *wri_, fz_device *dev)
2930 {
2931 pdf_writer *wri = (pdf_writer*)wri_;
2932 pdf_obj *obj = NULL;
2933
2934 fz_var(obj);
2935
2936 fz_try(ctx)
2937 {
2938 fz_close_device(ctx, dev);
2939 obj = pdf_add_page(ctx, wri->pdf, wri->mediabox, 0, wri->resources, wri->contents);
2940 pdf_insert_page(ctx, wri->pdf, -1, obj);
2941 }
2942 fz_always(ctx)
2943 {
2944 fz_drop_device(ctx, dev);
2945 pdf_drop_obj(ctx, obj);
2946 fz_drop_buffer(ctx, wri->contents);
2947 wri->contents = NULL;
2948 pdf_drop_obj(ctx, wri->resources);
2949 wri->resources = NULL;
2950 }
2951 fz_catch(ctx)
2952 fz_rethrow(ctx);
2953 }
2954
2955 static void
2956 pdf_writer_close_writer(fz_context *ctx, fz_document_writer *wri_)
2957 {
2958 pdf_writer *wri = (pdf_writer*)wri_;
2959 pdf_write_document(ctx, wri->pdf, wri->out, &wri->opts);
2960 fz_close_output(ctx, wri->out);
2961 }
2962
2963 static void
2964 pdf_writer_drop_writer(fz_context *ctx, fz_document_writer *wri_)
2965 {
2966 pdf_writer *wri = (pdf_writer*)wri_;
2967 fz_drop_buffer(ctx, wri->contents);
2968 pdf_drop_obj(ctx, wri->resources);
2969 pdf_drop_document(ctx, wri->pdf);
2970 fz_drop_output(ctx, wri->out);
2971 }
2972
2973 fz_document_writer *
2974 fz_new_pdf_writer_with_output(fz_context *ctx, fz_output *out, const char *options)
2975 {
2976 pdf_writer *wri;
2977
2978 fz_var(wri);
2979
2980 fz_try(ctx)
2981 {
2982 wri = fz_new_derived_document_writer(ctx, pdf_writer, pdf_writer_begin_page, pdf_writer_end_page, pdf_writer_close_writer, pdf_writer_drop_writer);
2983 pdf_parse_write_options(ctx, &wri->opts, options);
2984 wri->out = out;
2985 wri->pdf = pdf_create_document(ctx);
2986 }
2987 fz_catch(ctx)
2988 {
2989 fz_drop_output(ctx, out);
2990 pdf_drop_document(ctx, wri->pdf);
2991 fz_free(ctx, wri);
2992 fz_rethrow(ctx);
2993 }
2994
2995 return (fz_document_writer*)wri;
2996 }
2997
2998 fz_document_writer *
2999 fz_new_pdf_writer(fz_context *ctx, const char *path, const char *options)
3000 {
3001 fz_output *out = fz_new_output_with_path(ctx, path ? path : "out.pdf", 0);
3002 return fz_new_pdf_writer_with_output(ctx, out, options);
3003 }
3004
3005 void pdf_write_journal(fz_context *ctx, pdf_document *doc, fz_output *out)
3006 {
3007 if (!doc || !out)
3008 return;
3009
3010 if (!doc->journal)
3011 fz_throw(ctx, FZ_ERROR_ARGUMENT, "Can't write non-existent journal");
3012
3013 pdf_serialise_journal(ctx, doc, out);
3014 }
3015
3016 void pdf_save_journal(fz_context *ctx, pdf_document *doc, const char *filename)
3017 {
3018 fz_output *out;
3019
3020 if (!doc)
3021 return;
3022
3023 out = fz_new_output_with_path(ctx, filename, 0);
3024 fz_try(ctx)
3025 {
3026 pdf_write_journal(ctx, doc, out);
3027 fz_close_output(ctx, out);
3028 }
3029 fz_always(ctx)
3030 fz_drop_output(ctx, out);
3031 fz_catch(ctx)
3032 fz_rethrow(ctx);
3033 }
3034
3035 void pdf_read_journal(fz_context *ctx, pdf_document *doc, fz_stream *stm)
3036 {
3037 pdf_deserialise_journal(ctx, doc, stm);
3038 }
3039
3040 void pdf_load_journal(fz_context *ctx, pdf_document *doc, const char *filename)
3041 {
3042 fz_stream *stm;
3043
3044 if (!doc)
3045 return;
3046
3047 stm = fz_open_file(ctx, filename);
3048 fz_try(ctx)
3049 pdf_read_journal(ctx, doc, stm);
3050 fz_always(ctx)
3051 fz_drop_stream(ctx, stm);
3052 fz_catch(ctx)
3053 fz_rethrow(ctx);
3054 }