comparison mupdf-source/include/mupdf/pdf/document.h @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 // Copyright (C) 2004-2025 Artifex Software, Inc.
2 //
3 // This file is part of MuPDF.
4 //
5 // MuPDF is free software: you can redistribute it and/or modify it under the
6 // terms of the GNU Affero General Public License as published by the Free
7 // Software Foundation, either version 3 of the License, or (at your option)
8 // any later version.
9 //
10 // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
11 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
13 // details.
14 //
15 // You should have received a copy of the GNU Affero General Public License
16 // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
17 //
18 // Alternative licensing terms are available from the licensor.
19 // For commercial licensing, see <https://www.artifex.com/> or contact
20 // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
21 // CA 94129, USA, for further information.
22
23 #ifndef MUPDF_PDF_DOCUMENT_H
24 #define MUPDF_PDF_DOCUMENT_H
25
26 #include "mupdf/fitz/export.h"
27 #include "mupdf/fitz/document.h"
28 #include "mupdf/fitz/hash.h"
29 #include "mupdf/fitz/stream.h"
30 #include "mupdf/fitz/xml.h"
31 #include "mupdf/pdf/object.h"
32
33 typedef struct pdf_xref pdf_xref;
34 typedef struct pdf_ocg_descriptor pdf_ocg_descriptor;
35
36 typedef struct pdf_page pdf_page;
37 typedef struct pdf_annot pdf_annot;
38 typedef struct pdf_js pdf_js;
39 typedef struct pdf_document pdf_document;
40
41 enum
42 {
43 PDF_LEXBUF_SMALL = 256,
44 PDF_LEXBUF_LARGE = 65536
45 };
46
47 typedef struct
48 {
49 size_t size;
50 size_t base_size;
51 size_t len;
52 int64_t i;
53 float f;
54 char *scratch;
55 char buffer[PDF_LEXBUF_SMALL];
56 } pdf_lexbuf;
57
58 typedef struct
59 {
60 pdf_lexbuf base;
61 char buffer[PDF_LEXBUF_LARGE - PDF_LEXBUF_SMALL];
62 } pdf_lexbuf_large;
63
64 /*
65 Document event structures are mostly opaque to the app. Only the type
66 is visible to the app.
67 */
68 typedef struct pdf_doc_event pdf_doc_event;
69
70 /*
71 the type of function via which the app receives
72 document events.
73 */
74 typedef void (pdf_doc_event_cb)(fz_context *ctx, pdf_document *doc, pdf_doc_event *evt, void *data);
75
76 /*
77 the type of function via which the app frees
78 the data provided to the event callback pdf_doc_event_cb.
79 */
80 typedef void (pdf_free_doc_event_data_cb)(fz_context *ctx, void *data);
81
82 typedef struct pdf_js_console pdf_js_console;
83
84 /*
85 Callback called when the console is dropped because it
86 is being replaced or the javascript is being disabled
87 by a call to pdf_disable_js().
88 */
89 typedef void (pdf_js_console_drop_cb)(pdf_js_console *console, void *user);
90
91 /*
92 Callback signalling that a piece of javascript is asking
93 the javascript console to be displayed.
94 */
95 typedef void (pdf_js_console_show_cb)(void *user);
96
97 /*
98 Callback signalling that a piece of javascript is asking
99 the javascript console to be hidden.
100 */
101 typedef void (pdf_js_console_hide_cb)(void *user);
102
103 /*
104 Callback signalling that a piece of javascript is asking
105 the javascript console to remove all its contents.
106 */
107 typedef void (pdf_js_console_clear_cb)(void *user);
108
109 /*
110 Callback signalling that a piece of javascript is appending
111 the given message to the javascript console contents.
112 */
113 typedef void (pdf_js_console_write_cb)(void *user, const char *msg);
114
115 /*
116 The callback functions relating to a javascript console.
117 */
118 typedef struct pdf_js_console {
119 pdf_js_console_drop_cb *drop;
120 pdf_js_console_show_cb *show;
121 pdf_js_console_hide_cb *hide;
122 pdf_js_console_clear_cb *clear;
123 pdf_js_console_write_cb *write;
124 } pdf_js_console;
125
126 /*
127 Retrieve the currently set javascript console, or NULL
128 if none is set.
129 */
130 pdf_js_console *pdf_js_get_console(fz_context *ctx, pdf_document *doc);
131
132 /*
133 Set a new javascript console.
134
135 console: A set of callback functions informing about
136 what pieces of executed js is trying to do
137 to the js console. The caller transfers ownership of
138 console when calling pdf_js_set_console(). Once it and
139 the corresponding user pointer are no longer needed
140 console->drop() will be called passing both the console
141 and the user pointer.
142
143 user: Opaque data that will be passed unchanged to all
144 js console callbacks when called. The caller ensures
145 that this is valid until either the js console is
146 replaced by calling pdf_js_set_console() again with a
147 new console, or pdf_disable_js() is called. In either
148 case the caller to ensures that the user data is freed.
149 */
150 void pdf_js_set_console(fz_context *ctx, pdf_document *doc, pdf_js_console *console, void *user);
151
152 /*
153 Open a PDF document.
154
155 Open a PDF document by reading its cross reference table, so
156 MuPDF can locate PDF objects inside the file. Upon an broken
157 cross reference table or other parse errors MuPDF will restart
158 parsing the file from the beginning to try to rebuild a
159 (hopefully correct) cross reference table to allow further
160 processing of the file.
161
162 The returned pdf_document should be used when calling most
163 other PDF functions. Note that it wraps the context, so those
164 functions implicitly get access to the global state in
165 context.
166
167 filename: a path to a file as it would be given to open(2).
168 */
169 pdf_document *pdf_open_document(fz_context *ctx, const char *filename);
170
171 /*
172 Opens a PDF document.
173
174 Same as pdf_open_document, but takes a stream instead of a
175 filename to locate the PDF document to open. Increments the
176 reference count of the stream. See fz_open_file,
177 fz_open_file_w or fz_open_fd for opening a stream, and
178 fz_drop_stream for closing an open stream.
179 */
180 pdf_document *pdf_open_document_with_stream(fz_context *ctx, fz_stream *file);
181
182 /*
183 Closes and frees an opened PDF document.
184
185 The resource store in the context associated with pdf_document
186 is emptied.
187 */
188 void pdf_drop_document(fz_context *ctx, pdf_document *doc);
189
190 pdf_document *pdf_keep_document(fz_context *ctx, pdf_document *doc);
191
192 /*
193 down-cast a fz_document to a pdf_document.
194 Returns NULL if underlying document is not PDF
195 */
196 pdf_document *pdf_specifics(fz_context *ctx, fz_document *doc);
197
198 /*
199 Down-cast generic fitz objects into pdf specific variants.
200 Returns NULL if the objects are not from a PDF document.
201 */
202 pdf_document *pdf_document_from_fz_document(fz_context *ctx, fz_document *ptr);
203 pdf_page *pdf_page_from_fz_page(fz_context *ctx, fz_page *ptr);
204
205 /*
206 Get a pdf_document handle from an fz_document handle.
207
208 This is superficially similar to pdf_document_from_fz_document
209 (and the older pdf_specifics).
210
211 For fz_documents that are actually pdf_documents, this will return
212 a kept version of the same pointer, just cast differently.
213
214 For fz_documents that have a pdf_document representation internally,
215 then you may get a kept version of a different pointer.
216
217 For fz_documents that have no pdf_document representation internally,
218 this will return NULL.
219
220 Note that this returns a kept pointer that the caller is responsible
221 for freeing, unlike pdf_specifics or pdf_document_from_fz_document.
222 */
223 pdf_document *fz_new_pdf_document_from_fz_document(fz_context *ctx, fz_document *ptr);
224
225 int pdf_needs_password(fz_context *ctx, pdf_document *doc);
226
227 /*
228 Attempt to authenticate a
229 password.
230
231 Returns 0 for failure, non-zero for success.
232
233 In the non-zero case:
234 bit 0 set => no password required
235 bit 1 set => user password authenticated
236 bit 2 set => owner password authenticated
237 */
238 int pdf_authenticate_password(fz_context *ctx, pdf_document *doc, const char *pw);
239
240 int pdf_has_permission(fz_context *ctx, pdf_document *doc, fz_permission p);
241 int pdf_lookup_metadata(fz_context *ctx, pdf_document *doc, const char *key, char *ptr, size_t size);
242
243 fz_outline *pdf_load_outline(fz_context *ctx, pdf_document *doc);
244
245 fz_outline_iterator *pdf_new_outline_iterator(fz_context *ctx, pdf_document *doc);
246
247 void pdf_invalidate_xfa(fz_context *ctx, pdf_document *doc);
248
249 /*
250 Get the number of layer configurations defined in this document.
251
252 doc: The document in question.
253 */
254 int pdf_count_layer_configs(fz_context *ctx, pdf_document *doc);
255
256 /*
257 Configure visibility of individual layers in this document.
258 */
259 int pdf_count_layers(fz_context *ctx, pdf_document *doc);
260 const char *pdf_layer_name(fz_context *ctx, pdf_document *doc, int layer);
261 int pdf_layer_is_enabled(fz_context *ctx, pdf_document *doc, int layer);
262 void pdf_enable_layer(fz_context *ctx, pdf_document *doc, int layer, int enabled);
263
264 typedef struct
265 {
266 const char *name;
267 const char *creator;
268 } pdf_layer_config;
269
270 /*
271 Fetch the name (and optionally creator) of the given layer config.
272
273 doc: The document in question.
274
275 config_num: A value in the 0..n-1 range, where n is the
276 value returned from pdf_count_layer_configs.
277
278 info: Pointer to structure to fill in. Pointers within
279 this structure may be set to NULL if no information is
280 available.
281 */
282 void pdf_layer_config_info(fz_context *ctx, pdf_document *doc, int config_num, pdf_layer_config *info);
283
284 /*
285 Set the current configuration.
286 This updates the visibility of the optional content groups
287 within the document.
288
289 doc: The document in question.
290
291 config_num: A value in the 0..n-1 range, where n is the
292 value returned from pdf_count_layer_configs.
293 */
294 void pdf_select_layer_config(fz_context *ctx, pdf_document *doc, int config_num);
295
296 /*
297 Returns the number of entries in the 'UI' for this layer configuration.
298
299 doc: The document in question.
300 */
301 int pdf_count_layer_config_ui(fz_context *ctx, pdf_document *doc);
302
303 /*
304 Select a checkbox/radiobox within the 'UI' for this layer
305 configuration.
306
307 Selecting a UI entry that is a radiobox may disable
308 other UI entries.
309
310 doc: The document in question.
311
312 ui: A value in the 0..m-1 range, where m is the value
313 returned by pdf_count_layer_config_ui.
314 */
315 void pdf_select_layer_config_ui(fz_context *ctx, pdf_document *doc, int ui);
316
317 /*
318 Select a checkbox/radiobox within the 'UI' for this layer configuration.
319
320 doc: The document in question.
321
322 ui: A value in the 0..m-1 range, where m is the value
323 returned by pdf_count_layer_config_ui.
324 */
325 void pdf_deselect_layer_config_ui(fz_context *ctx, pdf_document *doc, int ui);
326
327 /*
328 Toggle a checkbox/radiobox within the 'UI' for this layer configuration.
329
330 Toggling a UI entry that is a radiobox may disable
331 other UI entries.
332
333 doc: The document in question.
334
335 ui: A value in the 0..m-1 range, where m is the value
336 returned by pdf_count_layer_config_ui.
337 */
338 void pdf_toggle_layer_config_ui(fz_context *ctx, pdf_document *doc, int ui);
339
340 typedef enum
341 {
342 PDF_LAYER_UI_LABEL = 0,
343 PDF_LAYER_UI_CHECKBOX = 1,
344 PDF_LAYER_UI_RADIOBOX = 2
345 } pdf_layer_config_ui_type;
346
347 typedef struct
348 {
349 const char *text;
350 int depth;
351 pdf_layer_config_ui_type type;
352 int selected;
353 int locked;
354 } pdf_layer_config_ui;
355
356 /*
357 Get the info for a given entry in the layer config ui.
358
359 doc: The document in question.
360
361 ui: A value in the 0..m-1 range, where m is the value
362 returned by pdf_count_layer_config_ui.
363
364 info: Pointer to a structure to fill in with information
365 about the requested ui entry.
366 */
367 void pdf_layer_config_ui_info(fz_context *ctx, pdf_document *doc, int ui, pdf_layer_config_ui *info);
368
369 /*
370 Write the current layer config back into the document as the default state.
371 */
372 void pdf_set_layer_config_as_default(fz_context *ctx, pdf_document *doc);
373
374 /*
375 Determine whether changes have been made since the
376 document was opened or last saved.
377 */
378 int pdf_has_unsaved_changes(fz_context *ctx, pdf_document *doc);
379
380 /*
381 Determine if this PDF has been repaired since opening.
382 */
383 int pdf_was_repaired(fz_context *ctx, pdf_document *doc);
384
385 /* Object that can perform the cryptographic operation necessary for document signing */
386 typedef struct pdf_pkcs7_signer pdf_pkcs7_signer;
387
388 /* Unsaved signature fields */
389 typedef struct pdf_unsaved_sig
390 {
391 pdf_obj *field;
392 size_t byte_range_start;
393 size_t byte_range_end;
394 size_t contents_start;
395 size_t contents_end;
396 pdf_pkcs7_signer *signer;
397 struct pdf_unsaved_sig *next;
398 } pdf_unsaved_sig;
399
400 typedef struct
401 {
402 int page;
403 int object;
404 } pdf_rev_page_map;
405
406 typedef struct
407 {
408 int number; /* Page object number */
409 int64_t offset; /* Offset of page object */
410 int64_t index; /* Index into shared hint_shared_ref */
411 } pdf_hint_page;
412
413 typedef struct
414 {
415 int number; /* Object number of first object */
416 int64_t offset; /* Offset of first object */
417 } pdf_hint_shared;
418
419 struct pdf_document
420 {
421 fz_document super;
422
423 fz_stream *file;
424
425 int version;
426 int is_fdf;
427 int bias;
428 int64_t startxref;
429 int64_t file_size;
430 pdf_crypt *crypt;
431 pdf_ocg_descriptor *ocg;
432 fz_colorspace *oi;
433
434 int max_xref_len;
435 int num_xref_sections;
436 int saved_num_xref_sections;
437 int num_incremental_sections;
438 int xref_base;
439 int disallow_new_increments;
440
441 /* The local_xref is only active, if local_xref_nesting >= 0 */
442 pdf_xref *local_xref;
443 int local_xref_nesting;
444
445 pdf_xref *xref_sections;
446 pdf_xref *saved_xref_sections;
447 int *xref_index;
448 int save_in_progress;
449 int last_xref_was_old_style;
450 int has_linearization_object;
451
452 int map_page_count;
453 pdf_rev_page_map *rev_page_map;
454 pdf_obj **fwd_page_map;
455 int page_tree_broken;
456
457 int repair_attempted;
458 int repair_in_progress;
459 int non_structural_change; /* True if we are modifying the document in a way that does not change the (page) structure */
460
461 /* State indicating which file parsing method we are using */
462 int file_reading_linearly;
463 int64_t file_length;
464
465 int linear_page_count;
466 pdf_obj *linear_obj; /* Linearized object (if used) */
467 pdf_obj **linear_page_refs; /* Page objects for linear loading */
468 int linear_page1_obj_num;
469
470 /* The state for the pdf_progressive_advance parser */
471 int64_t linear_pos;
472 int linear_page_num;
473
474 int hint_object_offset;
475 int hint_object_length;
476 int hints_loaded; /* Set to 1 after the hints loading has completed,
477 * whether successful or not! */
478 /* Page n references shared object references:
479 * hint_shared_ref[i]
480 * where
481 * i = s to e-1
482 * s = hint_page[n]->index
483 * e = hint_page[n+1]->index
484 * Shared object reference r accesses objects:
485 * rs to re-1
486 * where
487 * rs = hint_shared[r]->number
488 * re = hint_shared[r]->count + rs
489 * These are guaranteed to lie within the region starting at
490 * hint_shared[r]->offset of length hint_shared[r]->length
491 */
492 pdf_hint_page *hint_page;
493 int *hint_shared_ref;
494 pdf_hint_shared *hint_shared;
495 int hint_obj_offsets_max;
496 int64_t *hint_obj_offsets;
497
498 pdf_lexbuf_large lexbuf;
499
500 pdf_js *js;
501
502 int recalculate;
503 int redacted;
504 int resynth_required;
505
506 pdf_doc_event_cb *event_cb;
507 pdf_free_doc_event_data_cb *free_event_data_cb;
508 void *event_cb_data;
509
510 int num_type3_fonts;
511 int max_type3_fonts;
512 fz_font **type3_fonts;
513
514 struct {
515 fz_hash_table *fonts;
516 fz_hash_table *colorspaces;
517 } resources;
518
519 int orphans_max;
520 int orphans_count;
521 pdf_obj **orphans;
522
523 fz_xml_doc *xfa;
524
525 pdf_journal *journal;
526 };
527
528 pdf_document *pdf_create_document(fz_context *ctx);
529
530 typedef struct pdf_graft_map pdf_graft_map;
531
532 /*
533 Return a deep copied object equivalent to the
534 supplied object, suitable for use within the given document.
535
536 dst: The document in which the returned object is to be used.
537
538 obj: The object deep copy.
539
540 Note: If grafting multiple objects, you should use a pdf_graft_map
541 to avoid potential duplication of target objects.
542 */
543 pdf_obj *pdf_graft_object(fz_context *ctx, pdf_document *dst, pdf_obj *obj);
544
545 /*
546 Prepare a graft map object to allow objects
547 to be deep copied from one document to the given one, avoiding
548 problems with duplicated child objects.
549
550 dst: The document to copy objects to.
551
552 Note: all the source objects must come from the same document.
553 */
554 pdf_graft_map *pdf_new_graft_map(fz_context *ctx, pdf_document *dst);
555
556 pdf_graft_map *pdf_keep_graft_map(fz_context *ctx, pdf_graft_map *map);
557 void pdf_drop_graft_map(fz_context *ctx, pdf_graft_map *map);
558
559 /*
560 Return a deep copied object equivalent
561 to the supplied object, suitable for use within the target
562 document of the map.
563
564 map: A map targeted at the document in which the returned
565 object is to be used.
566
567 obj: The object to be copied.
568
569 Note: Copying multiple objects via the same graft map ensures
570 that any shared children are not copied more than once.
571 */
572 pdf_obj *pdf_graft_mapped_object(fz_context *ctx, pdf_graft_map *map, pdf_obj *obj);
573
574 /*
575 Graft a page (and its resources) from the src document to the
576 destination document of the graft. This involves a deep copy
577 of the objects in question.
578
579 map: A map targeted at the document into which the page should
580 be inserted.
581
582 page_to: The position within the destination document at which
583 the page should be inserted (pages numbered from 0, with -1
584 meaning "at the end").
585
586 src: The document from which the page should be copied.
587
588 page_from: The page number which should be copied from the src
589 document (pages numbered from 0, with -1 meaning "at the end").
590 */
591 void pdf_graft_page(fz_context *ctx, pdf_document *dst, int page_to, pdf_document *src, int page_from);
592 void pdf_graft_mapped_page(fz_context *ctx, pdf_graft_map *map, int page_to, pdf_document *src, int page_from);
593
594 /*
595 Create a device that will record the
596 graphical operations given to it into a sequence of
597 pdf operations, together with a set of resources. This
598 sequence/set pair can then be used as the basis for
599 adding a page to the document (see pdf_add_page).
600 Returns a kept reference.
601
602 doc: The document for which these are intended.
603
604 mediabox: The bbox for the created page.
605
606 presources: Pointer to a place to put the created
607 resources dictionary.
608
609 pcontents: Pointer to a place to put the created
610 contents buffer.
611 */
612 fz_device *pdf_page_write(fz_context *ctx, pdf_document *doc, fz_rect mediabox, pdf_obj **presources, fz_buffer **pcontents);
613
614 /*
615 Create a pdf device. Rendering to the device creates
616 new pdf content. WARNING: this device is work in progress. It doesn't
617 currently support all rendering cases.
618
619 Note that contents must be a stream (dictionary) to be updated (or
620 a reference to a stream). Callers should take care to ensure that it
621 is not an array, and that is it not shared with other objects/pages.
622 */
623 fz_device *pdf_new_pdf_device(fz_context *ctx, pdf_document *doc, fz_matrix topctm, pdf_obj *resources, fz_buffer *contents);
624
625 /*
626 Create a pdf_obj within a document that
627 represents a page, from a previously created resources
628 dictionary and page content stream. This should then be
629 inserted into the document using pdf_insert_page.
630
631 After this call the page exists within the document
632 structure, but is not actually ever displayed as it is
633 not linked into the PDF page tree.
634
635 doc: The document to which to add the page.
636
637 mediabox: The mediabox for the page (should be identical
638 to that used when creating the resources/contents).
639
640 rotate: 0, 90, 180 or 270. The rotation to use for the
641 page.
642
643 resources: The resources dictionary for the new page
644 (typically created by pdf_page_write).
645
646 contents: The page contents for the new page (typically
647 create by pdf_page_write).
648 */
649 pdf_obj *pdf_add_page(fz_context *ctx, pdf_document *doc, fz_rect mediabox, int rotate, pdf_obj *resources, fz_buffer *contents);
650
651 /*
652 Insert a page previously created by
653 pdf_add_page into the pages tree of the document.
654
655 doc: The document to insert into.
656
657 at: The page number to insert at (pages numbered from 0).
658 0 <= n <= page_count inserts before page n. Negative numbers
659 or INT_MAX are treated as page count, and insert at the end.
660 0 inserts at the start. All existing pages are after the
661 insertion point are shuffled up.
662
663 page: The page to insert.
664 */
665 void pdf_insert_page(fz_context *ctx, pdf_document *doc, int at, pdf_obj *page);
666
667 /*
668 Delete a page from the page tree of
669 a document. This does not remove the page contents
670 or resources from the file.
671
672 doc: The document to operate on.
673
674 number: The page to remove (numbered from 0)
675 */
676 void pdf_delete_page(fz_context *ctx, pdf_document *doc, int number);
677
678 /*
679 Delete a range of pages from the
680 page tree of a document. This does not remove the page
681 contents or resources from the file.
682
683 doc: The document to operate on.
684
685 start, end: The range of pages (numbered from 0)
686 (inclusive, exclusive) to remove. If end is negative or
687 greater than the number of pages in the document, it
688 will be taken to be the end of the document.
689 */
690 void pdf_delete_page_range(fz_context *ctx, pdf_document *doc, int start, int end);
691
692 /*
693 Get page label (string) from a page number (index).
694 */
695 void pdf_page_label(fz_context *ctx, pdf_document *doc, int page, char *buf, size_t size);
696 void pdf_page_label_imp(fz_context *ctx, fz_document *doc, int chapter, int page, char *buf, size_t size);
697
698 typedef enum {
699 PDF_PAGE_LABEL_NONE = 0,
700 PDF_PAGE_LABEL_DECIMAL = 'D',
701 PDF_PAGE_LABEL_ROMAN_UC = 'R',
702 PDF_PAGE_LABEL_ROMAN_LC = 'r',
703 PDF_PAGE_LABEL_ALPHA_UC = 'A',
704 PDF_PAGE_LABEL_ALPHA_LC = 'a',
705 } pdf_page_label_style;
706
707 void pdf_set_page_labels(fz_context *ctx, pdf_document *doc, int index, pdf_page_label_style style, const char *prefix, int start);
708 void pdf_delete_page_labels(fz_context *ctx, pdf_document *doc, int index);
709
710 fz_text_language pdf_document_language(fz_context *ctx, pdf_document *doc);
711 void pdf_set_document_language(fz_context *ctx, pdf_document *doc, fz_text_language lang);
712
713 /*
714 In calls to fz_save_document, the following options structure can be used
715 to control aspects of the writing process. This structure may grow
716 in the future, and should be zero-filled to allow forwards compatibility.
717 */
718 typedef struct
719 {
720 int do_incremental; /* Write just the changed objects. */
721 int do_pretty; /* Pretty-print dictionaries and arrays. */
722 int do_ascii; /* ASCII hex encode binary streams. */
723 int do_compress; /* Compress streams. 1 zlib, 2 brotli */
724 int do_compress_images; /* Compress (or leave compressed) image streams. */
725 int do_compress_fonts; /* Compress (or leave compressed) font streams. */
726 int do_decompress; /* Decompress streams (except when compressing images/fonts). */
727 int do_garbage; /* Garbage collect objects before saving; 1=gc, 2=re-number, 3=de-duplicate. */
728 int do_linear; /* Write linearised. */
729 int do_clean; /* Clean content streams. */
730 int do_sanitize; /* Sanitize content streams. */
731 int do_appearance; /* (Re)create appearance streams. */
732 int do_encrypt; /* Encryption method to use: keep, none, rc4-40, etc. */
733 int dont_regenerate_id; /* Don't regenerate ID if set (used for clean) */
734 int permissions; /* Document encryption permissions. */
735 char opwd_utf8[128]; /* Owner password. */
736 char upwd_utf8[128]; /* User password. */
737 int do_snapshot; /* Do not use directly. Use the snapshot functions. */
738 int do_preserve_metadata; /* When cleaning, preserve metadata unchanged. */
739 int do_use_objstms; /* Use objstms if possible */
740 int compression_effort; /* 0 for default. 100 = max, 1 = min. */
741 int do_labels; /* Add labels to each object showing how it can be reached from the Root. */
742 } pdf_write_options;
743
744 FZ_DATA extern const pdf_write_options pdf_default_write_options;
745
746 /*
747 Parse option string into a pdf_write_options struct.
748 Matches the command line options to 'mutool clean':
749 g: garbage collect
750 d, i, f: expand all, fonts, images
751 l: linearize
752 a: ascii hex encode
753 z: deflate
754 c: clean content streams
755 s: sanitize content streams
756 */
757 pdf_write_options *pdf_parse_write_options(fz_context *ctx, pdf_write_options *opts, const char *args);
758
759 /*
760 Returns true if there are digital signatures waiting to
761 to updated on save.
762 */
763 int pdf_has_unsaved_sigs(fz_context *ctx, pdf_document *doc);
764
765 /*
766 Write out the document to an output stream with all changes finalised.
767 */
768 void pdf_write_document(fz_context *ctx, pdf_document *doc, fz_output *out, const pdf_write_options *opts);
769
770 /*
771 Write out the document to a file with all changes finalised.
772 */
773 void pdf_save_document(fz_context *ctx, pdf_document *doc, const char *filename, const pdf_write_options *opts);
774
775 /*
776 Snapshot the document to a file. This does not cause the
777 incremental xref to be finalized, so the document in memory
778 remains (essentially) unchanged.
779 */
780 void pdf_save_snapshot(fz_context *ctx, pdf_document *doc, const char *filename);
781
782 /*
783 Snapshot the document to an output stream. This does not cause
784 the incremental xref to be finalized, so the document in memory
785 remains (essentially) unchanged.
786 */
787 void pdf_write_snapshot(fz_context *ctx, pdf_document *doc, fz_output *out);
788
789 char *pdf_format_write_options(fz_context *ctx, char *buffer, size_t buffer_len, const pdf_write_options *opts);
790
791 /*
792 Return true if the document can be saved incrementally. Applying
793 redactions or having a repaired document make incremental saving
794 impossible.
795 */
796 int pdf_can_be_saved_incrementally(fz_context *ctx, pdf_document *doc);
797
798 /*
799 Write out the journal to an output stream.
800 */
801 void pdf_write_journal(fz_context *ctx, pdf_document *doc, fz_output *out);
802
803 /*
804 Write out the journal to a file.
805 */
806 void pdf_save_journal(fz_context *ctx, pdf_document *doc, const char *filename);
807
808 /*
809 Read a journal from a filename. Will do nothing if the journal
810 does not match. Will throw on a corrupted journal.
811 */
812 void pdf_load_journal(fz_context *ctx, pdf_document *doc, const char *filename);
813
814 /*
815 Read a journal from a stream. Will do nothing if the journal
816 does not match. Will throw on a corrupted journal.
817 */
818 void pdf_read_journal(fz_context *ctx, pdf_document *doc, fz_stream *stm);
819
820 /*
821 Minimize the memory used by a document.
822
823 We walk the in memory xref tables, evicting the PDF objects
824 therein that aren't in use.
825
826 This reduces the current memory use, but any subsequent use
827 of these objects will load them back into memory again.
828 */
829 void pdf_minimize_document(fz_context *ctx, pdf_document *doc);
830
831 /*
832 Map a pdf object representing a structure tag through
833 an optional role_map and convert to an fz_structure.
834 */
835 fz_structure pdf_structure_type(fz_context *ctx, pdf_obj *role_map, pdf_obj *tag);
836
837 /*
838 Run the document structure to a device.
839 */
840 void pdf_run_document_structure(fz_context *ctx, pdf_document *doc, fz_device *dev, fz_cookie *cookie);
841
842 /*
843 Return the count of the associated files on a document.
844 Note, that this is the count of files associated at the document
845 level and does not necessarily include files associated at other
846 levels.
847 */
848 int pdf_count_document_associated_files(fz_context *ctx, pdf_document *doc);
849
850 /*
851 Return a borrowed pointer to the PDF object that represents a
852 given associated file on a document.
853
854 Indexed from 0 to count-1.
855 */
856 pdf_obj *pdf_document_associated_file(fz_context *ctx, pdf_document *doc, int idx);
857
858 /*
859 Return the count of the associated files on a given page.
860 Note, that this is the count of files associated at the page
861 level and does not necessarily include files associated at other
862 levels.
863 */
864 int pdf_count_page_associated_files(fz_context *ctx, pdf_page *page);
865
866 /*
867 Return a borrowed pointer to the PDF object that represents a
868 given associated file on a page.
869
870 Indexed from 0 to count-1.
871 */
872 pdf_obj *pdf_page_associated_file(fz_context *ctx, pdf_page *page, int idx);
873
874
875 /*
876 A structure used to create "labels" for numbered objects.
877 The labels are different ways to reach an object from the trailer
878 and page tree, using the "mutool show" syntax.
879
880 Note: Paths involving "Parent", "P", "Prev", and "Last" are ignored,
881 as these are used for cycles in the structures which we don't care about
882 labeling.
883 */
884 typedef struct pdf_object_labels pdf_object_labels;
885
886 /*
887 Scan the entire object structure to create a directed graph
888 of indirect numbered objects and how they can reach each other.
889 */
890 pdf_object_labels *pdf_load_object_labels(fz_context *ctx, pdf_document *doc);
891
892 void pdf_drop_object_labels(fz_context *ctx, pdf_object_labels *g);
893
894 /*
895 Enumerate all the possible labels for a given numbered object.
896 The callback is invoked with a path for each possible way the object
897 can be reached from the PDF trailer.
898 */
899 typedef void (pdf_label_object_fn)(fz_context *ctx, void *arg, const char *label);
900 void pdf_label_object(fz_context *ctx, pdf_object_labels *g, int num, pdf_label_object_fn *callback, void *arg);
901
902 #endif