diff mupdf-source/include/mupdf/pdf/document.h @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mupdf-source/include/mupdf/pdf/document.h	Mon Sep 15 11:43:07 2025 +0200
@@ -0,0 +1,902 @@
+// Copyright (C) 2004-2025 Artifex Software, Inc.
+//
+// This file is part of MuPDF.
+//
+// MuPDF is free software: you can redistribute it and/or modify it under the
+// terms of the GNU Affero General Public License as published by the Free
+// Software Foundation, either version 3 of the License, or (at your option)
+// any later version.
+//
+// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
+// details.
+//
+// You should have received a copy of the GNU Affero General Public License
+// along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
+//
+// Alternative licensing terms are available from the licensor.
+// For commercial licensing, see <https://www.artifex.com/> or contact
+// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
+// CA 94129, USA, for further information.
+
+#ifndef MUPDF_PDF_DOCUMENT_H
+#define MUPDF_PDF_DOCUMENT_H
+
+#include "mupdf/fitz/export.h"
+#include "mupdf/fitz/document.h"
+#include "mupdf/fitz/hash.h"
+#include "mupdf/fitz/stream.h"
+#include "mupdf/fitz/xml.h"
+#include "mupdf/pdf/object.h"
+
+typedef struct pdf_xref pdf_xref;
+typedef struct pdf_ocg_descriptor pdf_ocg_descriptor;
+
+typedef struct pdf_page pdf_page;
+typedef struct pdf_annot pdf_annot;
+typedef struct pdf_js pdf_js;
+typedef struct pdf_document pdf_document;
+
+enum
+{
+	PDF_LEXBUF_SMALL = 256,
+	PDF_LEXBUF_LARGE = 65536
+};
+
+typedef struct
+{
+	size_t size;
+	size_t base_size;
+	size_t len;
+	int64_t i;
+	float f;
+	char *scratch;
+	char buffer[PDF_LEXBUF_SMALL];
+} pdf_lexbuf;
+
+typedef struct
+{
+	pdf_lexbuf base;
+	char buffer[PDF_LEXBUF_LARGE - PDF_LEXBUF_SMALL];
+} pdf_lexbuf_large;
+
+/*
+	Document event structures are mostly opaque to the app. Only the type
+	is visible to the app.
+*/
+typedef struct pdf_doc_event pdf_doc_event;
+
+/*
+	the type of function via which the app receives
+	document events.
+*/
+typedef void (pdf_doc_event_cb)(fz_context *ctx, pdf_document *doc, pdf_doc_event *evt, void *data);
+
+/*
+	the type of function via which the app frees
+	the data provided to the event callback pdf_doc_event_cb.
+*/
+typedef void (pdf_free_doc_event_data_cb)(fz_context *ctx, void *data);
+
+typedef struct pdf_js_console pdf_js_console;
+
+/*
+	Callback called when the console is dropped because it
+	is being replaced or the javascript is being disabled
+	by a call to pdf_disable_js().
+*/
+typedef void (pdf_js_console_drop_cb)(pdf_js_console *console, void *user);
+
+/*
+	Callback signalling that a piece of javascript is asking
+	the javascript console to be displayed.
+*/
+typedef void (pdf_js_console_show_cb)(void *user);
+
+/*
+	Callback signalling that a piece of javascript is asking
+	the javascript console to be hidden.
+*/
+typedef void (pdf_js_console_hide_cb)(void *user);
+
+/*
+	Callback signalling that a piece of javascript is asking
+	the javascript console to remove all its contents.
+*/
+typedef void (pdf_js_console_clear_cb)(void *user);
+
+/*
+	Callback signalling that a piece of javascript is appending
+	the given message to the javascript console contents.
+*/
+typedef void (pdf_js_console_write_cb)(void *user, const char *msg);
+
+/*
+	The callback functions relating to a javascript console.
+*/
+typedef struct pdf_js_console {
+	pdf_js_console_drop_cb *drop;
+	pdf_js_console_show_cb *show;
+	pdf_js_console_hide_cb *hide;
+	pdf_js_console_clear_cb *clear;
+	pdf_js_console_write_cb *write;
+} pdf_js_console;
+
+/*
+	Retrieve the currently set javascript console, or NULL
+	if none is set.
+*/
+pdf_js_console *pdf_js_get_console(fz_context *ctx, pdf_document *doc);
+
+/*
+	Set a new javascript console.
+
+	console: A set of callback functions informing about
+	what pieces of executed js is trying to do
+	to the js console. The caller transfers ownership of
+	console when calling pdf_js_set_console(). Once it and
+	the corresponding user pointer are no longer needed
+	console->drop() will be called passing both the console
+	and the user pointer.
+
+	user: Opaque data that will be passed unchanged to all
+	js console callbacks when called. The caller ensures
+	that this is valid until either the js console is
+	replaced by calling pdf_js_set_console() again with a
+	new console, or pdf_disable_js() is called. In either
+	case the caller to ensures that the user data is freed.
+*/
+void pdf_js_set_console(fz_context *ctx, pdf_document *doc, pdf_js_console *console, void *user);
+
+/*
+	Open a PDF document.
+
+	Open a PDF document by reading its cross reference table, so
+	MuPDF can locate PDF objects inside the file. Upon an broken
+	cross reference table or other parse errors MuPDF will restart
+	parsing the file from the beginning to try to rebuild a
+	(hopefully correct) cross reference table to allow further
+	processing of the file.
+
+	The returned pdf_document should be used when calling most
+	other PDF functions. Note that it wraps the context, so those
+	functions implicitly get access to the global state in
+	context.
+
+	filename: a path to a file as it would be given to open(2).
+*/
+pdf_document *pdf_open_document(fz_context *ctx, const char *filename);
+
+/*
+	Opens a PDF document.
+
+	Same as pdf_open_document, but takes a stream instead of a
+	filename to locate the PDF document to open. Increments the
+	reference count of the stream. See fz_open_file,
+	fz_open_file_w or fz_open_fd for opening a stream, and
+	fz_drop_stream for closing an open stream.
+*/
+pdf_document *pdf_open_document_with_stream(fz_context *ctx, fz_stream *file);
+
+/*
+	Closes and frees an opened PDF document.
+
+	The resource store in the context associated with pdf_document
+	is emptied.
+*/
+void pdf_drop_document(fz_context *ctx, pdf_document *doc);
+
+pdf_document *pdf_keep_document(fz_context *ctx, pdf_document *doc);
+
+/*
+	down-cast a fz_document to a pdf_document.
+	Returns NULL if underlying document is not PDF
+*/
+pdf_document *pdf_specifics(fz_context *ctx, fz_document *doc);
+
+/*
+	Down-cast generic fitz objects into pdf specific variants.
+	Returns NULL if the objects are not from a PDF document.
+*/
+pdf_document *pdf_document_from_fz_document(fz_context *ctx, fz_document *ptr);
+pdf_page *pdf_page_from_fz_page(fz_context *ctx, fz_page *ptr);
+
+/*
+	Get a pdf_document handle from an fz_document handle.
+
+	This is superficially similar to pdf_document_from_fz_document
+	(and the older pdf_specifics).
+
+	For fz_documents that are actually pdf_documents, this will return
+	a kept version of the same pointer, just cast differently.
+
+	For fz_documents that have a pdf_document representation internally,
+	then you may get a kept version of a different pointer.
+
+	For fz_documents that have no pdf_document representation internally,
+	this will return NULL.
+
+	Note that this returns a kept pointer that the caller is responsible
+	for freeing, unlike pdf_specifics or pdf_document_from_fz_document.
+*/
+pdf_document *fz_new_pdf_document_from_fz_document(fz_context *ctx, fz_document *ptr);
+
+int pdf_needs_password(fz_context *ctx, pdf_document *doc);
+
+/*
+	Attempt to authenticate a
+	password.
+
+	Returns 0 for failure, non-zero for success.
+
+	In the non-zero case:
+		bit 0 set => no password required
+		bit 1 set => user password authenticated
+		bit 2 set => owner password authenticated
+*/
+int pdf_authenticate_password(fz_context *ctx, pdf_document *doc, const char *pw);
+
+int pdf_has_permission(fz_context *ctx, pdf_document *doc, fz_permission p);
+int pdf_lookup_metadata(fz_context *ctx, pdf_document *doc, const char *key, char *ptr, size_t size);
+
+fz_outline *pdf_load_outline(fz_context *ctx, pdf_document *doc);
+
+fz_outline_iterator *pdf_new_outline_iterator(fz_context *ctx, pdf_document *doc);
+
+void pdf_invalidate_xfa(fz_context *ctx, pdf_document *doc);
+
+/*
+	Get the number of layer configurations defined in this document.
+
+	doc: The document in question.
+*/
+int pdf_count_layer_configs(fz_context *ctx, pdf_document *doc);
+
+/*
+	Configure visibility of individual layers in this document.
+*/
+int pdf_count_layers(fz_context *ctx, pdf_document *doc);
+const char *pdf_layer_name(fz_context *ctx, pdf_document *doc, int layer);
+int pdf_layer_is_enabled(fz_context *ctx, pdf_document *doc, int layer);
+void pdf_enable_layer(fz_context *ctx, pdf_document *doc, int layer, int enabled);
+
+typedef struct
+{
+	const char *name;
+	const char *creator;
+} pdf_layer_config;
+
+/*
+	Fetch the name (and optionally creator) of the given layer config.
+
+	doc: The document in question.
+
+	config_num: A value in the 0..n-1 range, where n is the
+	value returned from pdf_count_layer_configs.
+
+	info: Pointer to structure to fill in. Pointers within
+	this structure may be set to NULL if no information is
+	available.
+*/
+void pdf_layer_config_info(fz_context *ctx, pdf_document *doc, int config_num, pdf_layer_config *info);
+
+/*
+	Set the current configuration.
+	This updates the visibility of the optional content groups
+	within the document.
+
+	doc: The document in question.
+
+	config_num: A value in the 0..n-1 range, where n is the
+	value returned from pdf_count_layer_configs.
+*/
+void pdf_select_layer_config(fz_context *ctx, pdf_document *doc, int config_num);
+
+/*
+	Returns the number of entries in the 'UI' for this layer configuration.
+
+	doc: The document in question.
+*/
+int pdf_count_layer_config_ui(fz_context *ctx, pdf_document *doc);
+
+/*
+	Select a checkbox/radiobox within the 'UI' for this layer
+	configuration.
+
+	Selecting a UI entry that is a radiobox may disable
+	other UI entries.
+
+	doc: The document in question.
+
+	ui: A value in the 0..m-1 range, where m is the value
+	returned by pdf_count_layer_config_ui.
+*/
+void pdf_select_layer_config_ui(fz_context *ctx, pdf_document *doc, int ui);
+
+/*
+	Select a checkbox/radiobox within the 'UI' for this layer configuration.
+
+	doc: The document in question.
+
+	ui: A value in the 0..m-1 range, where m is the value
+	returned by pdf_count_layer_config_ui.
+*/
+void pdf_deselect_layer_config_ui(fz_context *ctx, pdf_document *doc, int ui);
+
+/*
+	Toggle a checkbox/radiobox within the 'UI' for this layer configuration.
+
+	Toggling a UI entry that is a radiobox may disable
+	other UI entries.
+
+	doc: The document in question.
+
+	ui: A value in the 0..m-1 range, where m is the value
+	returned by pdf_count_layer_config_ui.
+*/
+void pdf_toggle_layer_config_ui(fz_context *ctx, pdf_document *doc, int ui);
+
+typedef enum
+{
+	PDF_LAYER_UI_LABEL = 0,
+	PDF_LAYER_UI_CHECKBOX = 1,
+	PDF_LAYER_UI_RADIOBOX = 2
+} pdf_layer_config_ui_type;
+
+typedef struct
+{
+	const char *text;
+	int depth;
+	pdf_layer_config_ui_type type;
+	int selected;
+	int locked;
+} pdf_layer_config_ui;
+
+/*
+	Get the info for a given entry in the layer config ui.
+
+	doc: The document in question.
+
+	ui: A value in the 0..m-1 range, where m is the value
+	returned by pdf_count_layer_config_ui.
+
+	info: Pointer to a structure to fill in with information
+	about the requested ui entry.
+*/
+void pdf_layer_config_ui_info(fz_context *ctx, pdf_document *doc, int ui, pdf_layer_config_ui *info);
+
+/*
+	Write the current layer config back into the document as the default state.
+*/
+void pdf_set_layer_config_as_default(fz_context *ctx, pdf_document *doc);
+
+/*
+	Determine whether changes have been made since the
+	document was opened or last saved.
+*/
+int pdf_has_unsaved_changes(fz_context *ctx, pdf_document *doc);
+
+/*
+	Determine if this PDF has been repaired since opening.
+*/
+int pdf_was_repaired(fz_context *ctx, pdf_document *doc);
+
+/* Object that can perform the cryptographic operation necessary for document signing */
+typedef struct pdf_pkcs7_signer pdf_pkcs7_signer;
+
+/* Unsaved signature fields */
+typedef struct pdf_unsaved_sig
+{
+	pdf_obj *field;
+	size_t byte_range_start;
+	size_t byte_range_end;
+	size_t contents_start;
+	size_t contents_end;
+	pdf_pkcs7_signer *signer;
+	struct pdf_unsaved_sig *next;
+} pdf_unsaved_sig;
+
+typedef struct
+{
+	int page;
+	int object;
+} pdf_rev_page_map;
+
+typedef struct
+{
+	int number; /* Page object number */
+	int64_t offset; /* Offset of page object */
+	int64_t index; /* Index into shared hint_shared_ref */
+} pdf_hint_page;
+
+typedef struct
+{
+	int number; /* Object number of first object */
+	int64_t offset; /* Offset of first object */
+} pdf_hint_shared;
+
+struct pdf_document
+{
+	fz_document super;
+
+	fz_stream *file;
+
+	int version;
+	int is_fdf;
+	int bias;
+	int64_t startxref;
+	int64_t file_size;
+	pdf_crypt *crypt;
+	pdf_ocg_descriptor *ocg;
+	fz_colorspace *oi;
+
+	int max_xref_len;
+	int num_xref_sections;
+	int saved_num_xref_sections;
+	int num_incremental_sections;
+	int xref_base;
+	int disallow_new_increments;
+
+	/* The local_xref is only active, if local_xref_nesting >= 0 */
+	pdf_xref *local_xref;
+	int local_xref_nesting;
+
+	pdf_xref *xref_sections;
+	pdf_xref *saved_xref_sections;
+	int *xref_index;
+	int save_in_progress;
+	int last_xref_was_old_style;
+	int has_linearization_object;
+
+	int map_page_count;
+	pdf_rev_page_map *rev_page_map;
+	pdf_obj **fwd_page_map;
+	int page_tree_broken;
+
+	int repair_attempted;
+	int repair_in_progress;
+	int non_structural_change; /* True if we are modifying the document in a way that does not change the (page) structure */
+
+	/* State indicating which file parsing method we are using */
+	int file_reading_linearly;
+	int64_t file_length;
+
+	int linear_page_count;
+	pdf_obj *linear_obj; /* Linearized object (if used) */
+	pdf_obj **linear_page_refs; /* Page objects for linear loading */
+	int linear_page1_obj_num;
+
+	/* The state for the pdf_progressive_advance parser */
+	int64_t linear_pos;
+	int linear_page_num;
+
+	int hint_object_offset;
+	int hint_object_length;
+	int hints_loaded; /* Set to 1 after the hints loading has completed,
+			   * whether successful or not! */
+	/* Page n references shared object references:
+	 *   hint_shared_ref[i]
+	 * where
+	 *      i = s to e-1
+	 *	s = hint_page[n]->index
+	 *	e = hint_page[n+1]->index
+	 * Shared object reference r accesses objects:
+	 *   rs to re-1
+	 * where
+	 *   rs = hint_shared[r]->number
+	 *   re = hint_shared[r]->count + rs
+	 * These are guaranteed to lie within the region starting at
+	 * hint_shared[r]->offset of length hint_shared[r]->length
+	 */
+	pdf_hint_page *hint_page;
+	int *hint_shared_ref;
+	pdf_hint_shared *hint_shared;
+	int hint_obj_offsets_max;
+	int64_t *hint_obj_offsets;
+
+	pdf_lexbuf_large lexbuf;
+
+	pdf_js *js;
+
+	int recalculate;
+	int redacted;
+	int resynth_required;
+
+	pdf_doc_event_cb *event_cb;
+	pdf_free_doc_event_data_cb *free_event_data_cb;
+	void *event_cb_data;
+
+	int num_type3_fonts;
+	int max_type3_fonts;
+	fz_font **type3_fonts;
+
+	struct {
+		fz_hash_table *fonts;
+		fz_hash_table *colorspaces;
+	} resources;
+
+	int orphans_max;
+	int orphans_count;
+	pdf_obj **orphans;
+
+	fz_xml_doc *xfa;
+
+	pdf_journal *journal;
+};
+
+pdf_document *pdf_create_document(fz_context *ctx);
+
+typedef struct pdf_graft_map pdf_graft_map;
+
+/*
+	Return a deep copied object equivalent to the
+	supplied object, suitable for use within the given document.
+
+	dst: The document in which the returned object is to be used.
+
+	obj: The object deep copy.
+
+	Note: If grafting multiple objects, you should use a pdf_graft_map
+	to avoid potential duplication of target objects.
+*/
+pdf_obj *pdf_graft_object(fz_context *ctx, pdf_document *dst, pdf_obj *obj);
+
+/*
+	Prepare a graft map object to allow objects
+	to be deep copied from one document to the given one, avoiding
+	problems with duplicated child objects.
+
+	dst: The document to copy objects to.
+
+	Note: all the source objects must come from the same document.
+*/
+pdf_graft_map *pdf_new_graft_map(fz_context *ctx, pdf_document *dst);
+
+pdf_graft_map *pdf_keep_graft_map(fz_context *ctx, pdf_graft_map *map);
+void pdf_drop_graft_map(fz_context *ctx, pdf_graft_map *map);
+
+/*
+	Return a deep copied object equivalent
+	to the supplied object, suitable for use within the target
+	document of the map.
+
+	map: A map targeted at the document in which the returned
+	object is to be used.
+
+	obj: The object to be copied.
+
+	Note: Copying multiple objects via the same graft map ensures
+	that any shared children are not copied more than once.
+*/
+pdf_obj *pdf_graft_mapped_object(fz_context *ctx, pdf_graft_map *map, pdf_obj *obj);
+
+/*
+	Graft a page (and its resources) from the src document to the
+	destination document of the graft. This involves a deep copy
+	of the objects in question.
+
+	map: A map targeted at the document into which the page should
+	be inserted.
+
+	page_to: The position within the destination document at which
+	the page should be inserted (pages numbered from 0, with -1
+	meaning "at the end").
+
+	src: The document from which the page should be copied.
+
+	page_from: The page number which should be copied from the src
+	document (pages numbered from 0, with -1 meaning "at the end").
+*/
+void pdf_graft_page(fz_context *ctx, pdf_document *dst, int page_to, pdf_document *src, int page_from);
+void pdf_graft_mapped_page(fz_context *ctx, pdf_graft_map *map, int page_to, pdf_document *src, int page_from);
+
+/*
+	Create a device that will record the
+	graphical operations given to it into a sequence of
+	pdf operations, together with a set of resources. This
+	sequence/set pair can then be used as the basis for
+	adding a page to the document (see pdf_add_page).
+	Returns a kept reference.
+
+	doc: The document for which these are intended.
+
+	mediabox: The bbox for the created page.
+
+	presources: Pointer to a place to put the created
+	resources dictionary.
+
+	pcontents: Pointer to a place to put the created
+	contents buffer.
+*/
+fz_device *pdf_page_write(fz_context *ctx, pdf_document *doc, fz_rect mediabox, pdf_obj **presources, fz_buffer **pcontents);
+
+/*
+	Create a pdf device. Rendering to the device creates
+	new pdf content. WARNING: this device is work in progress. It doesn't
+	currently support all rendering cases.
+
+	Note that contents must be a stream (dictionary) to be updated (or
+	a reference to a stream). Callers should take care to ensure that it
+	is not an array, and that is it not shared with other objects/pages.
+*/
+fz_device *pdf_new_pdf_device(fz_context *ctx, pdf_document *doc, fz_matrix topctm, pdf_obj *resources, fz_buffer *contents);
+
+/*
+	Create a pdf_obj within a document that
+	represents a page, from a previously created resources
+	dictionary and page content stream. This should then be
+	inserted into the document using pdf_insert_page.
+
+	After this call the page exists within the document
+	structure, but is not actually ever displayed as it is
+	not linked into the PDF page tree.
+
+	doc: The document to which to add the page.
+
+	mediabox: The mediabox for the page (should be identical
+	to that used when creating the resources/contents).
+
+	rotate: 0, 90, 180 or 270. The rotation to use for the
+	page.
+
+	resources: The resources dictionary for the new page
+	(typically created by pdf_page_write).
+
+	contents: The page contents for the new page (typically
+	create by pdf_page_write).
+*/
+pdf_obj *pdf_add_page(fz_context *ctx, pdf_document *doc, fz_rect mediabox, int rotate, pdf_obj *resources, fz_buffer *contents);
+
+/*
+	Insert a page previously created by
+	pdf_add_page into the pages tree of the document.
+
+	doc: The document to insert into.
+
+	at: The page number to insert at (pages numbered from 0).
+	0 <= n <= page_count inserts before page n. Negative numbers
+	or INT_MAX are treated as page count, and insert at the end.
+	0 inserts at the start. All existing pages are after the
+	insertion point are shuffled up.
+
+	page: The page to insert.
+*/
+void pdf_insert_page(fz_context *ctx, pdf_document *doc, int at, pdf_obj *page);
+
+/*
+	Delete a page from the page tree of
+	a document. This does not remove the page contents
+	or resources from the file.
+
+	doc: The document to operate on.
+
+	number: The page to remove (numbered from 0)
+*/
+void pdf_delete_page(fz_context *ctx, pdf_document *doc, int number);
+
+/*
+	Delete a range of pages from the
+	page tree of a document. This does not remove the page
+	contents or resources from the file.
+
+	doc: The document to operate on.
+
+	start, end: The range of pages (numbered from 0)
+	(inclusive, exclusive) to remove. If end is negative or
+	greater than the number of pages in the document, it
+	will be taken to be the end of the document.
+*/
+void pdf_delete_page_range(fz_context *ctx, pdf_document *doc, int start, int end);
+
+/*
+	Get page label (string) from a page number (index).
+*/
+void pdf_page_label(fz_context *ctx, pdf_document *doc, int page, char *buf, size_t size);
+void pdf_page_label_imp(fz_context *ctx, fz_document *doc, int chapter, int page, char *buf, size_t size);
+
+typedef enum {
+	PDF_PAGE_LABEL_NONE = 0,
+	PDF_PAGE_LABEL_DECIMAL = 'D',
+	PDF_PAGE_LABEL_ROMAN_UC = 'R',
+	PDF_PAGE_LABEL_ROMAN_LC = 'r',
+	PDF_PAGE_LABEL_ALPHA_UC = 'A',
+	PDF_PAGE_LABEL_ALPHA_LC = 'a',
+} pdf_page_label_style;
+
+void pdf_set_page_labels(fz_context *ctx, pdf_document *doc, int index, pdf_page_label_style style, const char *prefix, int start);
+void pdf_delete_page_labels(fz_context *ctx, pdf_document *doc, int index);
+
+fz_text_language pdf_document_language(fz_context *ctx, pdf_document *doc);
+void pdf_set_document_language(fz_context *ctx, pdf_document *doc, fz_text_language lang);
+
+/*
+	In calls to fz_save_document, the following options structure can be used
+	to control aspects of the writing process. This structure may grow
+	in the future, and should be zero-filled to allow forwards compatibility.
+*/
+typedef struct
+{
+	int do_incremental; /* Write just the changed objects. */
+	int do_pretty; /* Pretty-print dictionaries and arrays. */
+	int do_ascii; /* ASCII hex encode binary streams. */
+	int do_compress; /* Compress streams. 1 zlib, 2 brotli */
+	int do_compress_images; /* Compress (or leave compressed) image streams. */
+	int do_compress_fonts; /* Compress (or leave compressed) font streams. */
+	int do_decompress; /* Decompress streams (except when compressing images/fonts). */
+	int do_garbage; /* Garbage collect objects before saving; 1=gc, 2=re-number, 3=de-duplicate. */
+	int do_linear; /* Write linearised. */
+	int do_clean; /* Clean content streams. */
+	int do_sanitize; /* Sanitize content streams. */
+	int do_appearance; /* (Re)create appearance streams. */
+	int do_encrypt; /* Encryption method to use: keep, none, rc4-40, etc. */
+	int dont_regenerate_id; /* Don't regenerate ID if set (used for clean) */
+	int permissions; /* Document encryption permissions. */
+	char opwd_utf8[128]; /* Owner password. */
+	char upwd_utf8[128]; /* User password. */
+	int do_snapshot; /* Do not use directly. Use the snapshot functions. */
+	int do_preserve_metadata; /* When cleaning, preserve metadata unchanged. */
+	int do_use_objstms; /* Use objstms if possible */
+	int compression_effort; /* 0 for default. 100 = max, 1 = min. */
+	int do_labels; /* Add labels to each object showing how it can be reached from the Root. */
+} pdf_write_options;
+
+FZ_DATA extern const pdf_write_options pdf_default_write_options;
+
+/*
+	Parse option string into a pdf_write_options struct.
+	Matches the command line options to 'mutool clean':
+		g: garbage collect
+		d, i, f: expand all, fonts, images
+		l: linearize
+		a: ascii hex encode
+		z: deflate
+		c: clean content streams
+		s: sanitize content streams
+*/
+pdf_write_options *pdf_parse_write_options(fz_context *ctx, pdf_write_options *opts, const char *args);
+
+/*
+	Returns true if there are digital signatures waiting to
+	to updated on save.
+*/
+int pdf_has_unsaved_sigs(fz_context *ctx, pdf_document *doc);
+
+/*
+	Write out the document to an output stream with all changes finalised.
+*/
+void pdf_write_document(fz_context *ctx, pdf_document *doc, fz_output *out, const pdf_write_options *opts);
+
+/*
+	Write out the document to a file with all changes finalised.
+*/
+void pdf_save_document(fz_context *ctx, pdf_document *doc, const char *filename, const pdf_write_options *opts);
+
+/*
+	Snapshot the document to a file. This does not cause the
+	incremental xref to be finalized, so the document in memory
+	remains (essentially) unchanged.
+*/
+void pdf_save_snapshot(fz_context *ctx, pdf_document *doc, const char *filename);
+
+/*
+	Snapshot the document to an output stream. This does not cause
+	the incremental xref to be finalized, so the document in memory
+	remains (essentially) unchanged.
+*/
+void pdf_write_snapshot(fz_context *ctx, pdf_document *doc, fz_output *out);
+
+char *pdf_format_write_options(fz_context *ctx, char *buffer, size_t buffer_len, const pdf_write_options *opts);
+
+/*
+	Return true if the document can be saved incrementally. Applying
+	redactions or having a repaired document make incremental saving
+	impossible.
+*/
+int pdf_can_be_saved_incrementally(fz_context *ctx, pdf_document *doc);
+
+/*
+	Write out the journal to an output stream.
+*/
+void pdf_write_journal(fz_context *ctx, pdf_document *doc, fz_output *out);
+
+/*
+	Write out the journal to a file.
+*/
+void pdf_save_journal(fz_context *ctx, pdf_document *doc, const char *filename);
+
+/*
+	Read a journal from a filename. Will do nothing if the journal
+	does not match. Will throw on a corrupted journal.
+*/
+void pdf_load_journal(fz_context *ctx, pdf_document *doc, const char *filename);
+
+/*
+	Read a journal from a stream. Will do nothing if the journal
+	does not match. Will throw on a corrupted journal.
+*/
+void pdf_read_journal(fz_context *ctx, pdf_document *doc, fz_stream *stm);
+
+/*
+	Minimize the memory used by a document.
+
+	We walk the in memory xref tables, evicting the PDF objects
+	therein that aren't in use.
+
+	This reduces the current memory use, but any subsequent use
+	of these objects will load them back into memory again.
+*/
+void pdf_minimize_document(fz_context *ctx, pdf_document *doc);
+
+/*
+	Map a pdf object representing a structure tag through
+	an optional role_map and convert to an fz_structure.
+*/
+fz_structure pdf_structure_type(fz_context *ctx, pdf_obj *role_map, pdf_obj *tag);
+
+/*
+	Run the document structure to a device.
+*/
+void pdf_run_document_structure(fz_context *ctx, pdf_document *doc, fz_device *dev, fz_cookie *cookie);
+
+/*
+	Return the count of the associated files on a document.
+	Note, that this is the count of files associated at the document
+	level and does not necessarily include files associated at other
+	levels.
+*/
+int pdf_count_document_associated_files(fz_context *ctx, pdf_document *doc);
+
+/*
+	Return a borrowed pointer to the PDF object that represents a
+	given associated file on a document.
+
+	Indexed from 0 to count-1.
+*/
+pdf_obj *pdf_document_associated_file(fz_context *ctx, pdf_document *doc, int idx);
+
+/*
+	Return the count of the associated files on a given page.
+	Note, that this is the count of files associated at the page
+	level and does not necessarily include files associated at other
+	levels.
+*/
+int pdf_count_page_associated_files(fz_context *ctx, pdf_page *page);
+
+/*
+	Return a borrowed pointer to the PDF object that represents a
+	given associated file on a page.
+
+	Indexed from 0 to count-1.
+*/
+pdf_obj *pdf_page_associated_file(fz_context *ctx, pdf_page *page, int idx);
+
+
+/*
+	A structure used to create "labels" for numbered objects.
+	The labels are different ways to reach an object from the trailer
+	and page tree, using the "mutool show" syntax.
+
+	Note: Paths involving "Parent", "P", "Prev", and "Last" are ignored,
+	as these are used for cycles in the structures which we don't care about
+	labeling.
+*/
+typedef struct pdf_object_labels pdf_object_labels;
+
+/*
+	Scan the entire object structure to create a directed graph
+	of indirect numbered objects and how they can reach each other.
+*/
+pdf_object_labels *pdf_load_object_labels(fz_context *ctx, pdf_document *doc);
+
+void pdf_drop_object_labels(fz_context *ctx, pdf_object_labels *g);
+
+/*
+	Enumerate all the possible labels for a given numbered object.
+	The callback is invoked with a path for each possible way the object
+	can be reached from the PDF trailer.
+*/
+typedef void (pdf_label_object_fn)(fz_context *ctx, void *arg, const char *label);
+void pdf_label_object(fz_context *ctx, pdf_object_labels *g, int num, pdf_label_object_fn *callback, void *arg);
+
+#endif