diff mupdf-source/include/mupdf/pdf/xref.h @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mupdf-source/include/mupdf/pdf/xref.h	Mon Sep 15 11:43:07 2025 +0200
@@ -0,0 +1,300 @@
+// Copyright (C) 2004-2025 Artifex Software, Inc.
+//
+// This file is part of MuPDF.
+//
+// MuPDF is free software: you can redistribute it and/or modify it under the
+// terms of the GNU Affero General Public License as published by the Free
+// Software Foundation, either version 3 of the License, or (at your option)
+// any later version.
+//
+// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
+// details.
+//
+// You should have received a copy of the GNU Affero General Public License
+// along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
+//
+// Alternative licensing terms are available from the licensor.
+// For commercial licensing, see <https://www.artifex.com/> or contact
+// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
+// CA 94129, USA, for further information.
+
+#ifndef MUPDF_PDF_XREF_H
+#define MUPDF_PDF_XREF_H
+
+#include "mupdf/pdf/document.h"
+
+/*
+	Allocate a slot in the xref table and return a fresh unused object number.
+*/
+int pdf_create_object(fz_context *ctx, pdf_document *doc);
+
+/*
+	Remove object from xref table, marking the slot as free.
+*/
+void pdf_delete_object(fz_context *ctx, pdf_document *doc, int num);
+
+/*
+	Replace object in xref table with the passed in object.
+*/
+void pdf_update_object(fz_context *ctx, pdf_document *doc, int num, pdf_obj *obj);
+
+/*
+	Replace stream contents for object in xref table with the passed in buffer.
+
+	The buffer contents must match the /Filter setting if 'compressed' is true.
+	If 'compressed' is false, the /Filter and /DecodeParms entries are deleted.
+	The /Length entry is updated.
+*/
+void pdf_update_stream(fz_context *ctx, pdf_document *doc, pdf_obj *ref, fz_buffer *buf, int compressed);
+
+/*
+	Return true if 'obj' is an indirect reference to an object that is held
+	by the "local" xref section.
+*/
+int pdf_is_local_object(fz_context *ctx, pdf_document *doc, pdf_obj *obj);
+
+pdf_obj *pdf_add_object(fz_context *ctx, pdf_document *doc, pdf_obj *obj);
+pdf_obj *pdf_add_object_drop(fz_context *ctx, pdf_document *doc, pdf_obj *obj);
+pdf_obj *pdf_add_stream(fz_context *ctx, pdf_document *doc, fz_buffer *buf, pdf_obj *obj, int compressed);
+
+pdf_obj *pdf_add_new_dict(fz_context *ctx, pdf_document *doc, int initial);
+pdf_obj *pdf_add_new_array(fz_context *ctx, pdf_document *doc, int initial);
+
+typedef struct
+{
+	char type;		/* 0=unset (f)ree i(n)use (o)bjstm */
+	unsigned char marked;	/* marked to keep alive with pdf_mark_xref */
+	unsigned short gen;	/* generation / objstm index */
+	int num;		/* original object number (for decryption after renumbering) */
+	int64_t ofs;		/* file offset / objstm object number */
+	int64_t stm_ofs;	/* on-disk stream */
+	fz_buffer *stm_buf;	/* in-memory stream (for updated objects) */
+	pdf_obj *obj;		/* stored/cached object */
+} pdf_xref_entry;
+
+typedef struct pdf_xref_subsec
+{
+	struct pdf_xref_subsec *next;
+	int len;
+	int start;
+	pdf_xref_entry *table;
+} pdf_xref_subsec;
+
+struct pdf_xref
+{
+	int num_objects;
+	pdf_xref_subsec *subsec;
+	pdf_obj *trailer;
+	pdf_obj *pre_repair_trailer;
+	pdf_unsaved_sig *unsaved_sigs;
+	pdf_unsaved_sig **unsaved_sigs_end;
+	int64_t end_ofs; /* file offset to end of xref */
+};
+
+/**
+	Retrieve the pdf_xref_entry for a given object.
+
+	This can cause xref reorganisations (solidifications etc) due to
+	repairs, so all held pdf_xref_entries should be considered
+	invalid after this call (other than the returned one).
+*/
+pdf_xref_entry *pdf_cache_object(fz_context *ctx, pdf_document *doc, int num);
+
+int pdf_object_exists(fz_context *ctx, pdf_document *doc, int num);
+
+int pdf_count_objects(fz_context *ctx, pdf_document *doc);
+
+/**
+	Resolve an indirect object (or chain of objects).
+
+	This can cause xref reorganisations (solidifications etc) due to
+	repairs, so all held pdf_xref_entries should be considered
+	invalid after this call (other than the returned one).
+*/
+pdf_obj *pdf_resolve_indirect(fz_context *ctx, pdf_obj *ref);
+pdf_obj *pdf_resolve_indirect_chain(fz_context *ctx, pdf_obj *ref);
+
+/**
+	Load a given object.
+
+	This can cause xref reorganisations (solidifications etc) due to
+	repairs, so all held pdf_xref_entries should be considered
+	invalid after this call (other than the returned one).
+*/
+pdf_obj *pdf_load_object(fz_context *ctx, pdf_document *doc, int num);
+pdf_obj *pdf_load_unencrypted_object(fz_context *ctx, pdf_document *doc, int num);
+
+/*
+	Load raw (compressed but decrypted) contents of a stream into buf.
+*/
+fz_buffer *pdf_load_raw_stream_number(fz_context *ctx, pdf_document *doc, int num);
+fz_buffer *pdf_load_raw_stream(fz_context *ctx, pdf_obj *ref);
+
+/*
+	Load uncompressed contents of a stream into buf.
+*/
+fz_buffer *pdf_load_stream_number(fz_context *ctx, pdf_document *doc, int num);
+fz_buffer *pdf_load_stream(fz_context *ctx, pdf_obj *ref);
+
+/*
+	Open a stream for reading the raw (compressed but decrypted) data.
+*/
+fz_stream *pdf_open_raw_stream_number(fz_context *ctx, pdf_document *doc, int num);
+fz_stream *pdf_open_raw_stream(fz_context *ctx, pdf_obj *ref);
+
+/*
+	Open a stream for reading uncompressed data.
+	Put the opened file in doc->stream.
+	Using doc->file while a stream is open is a Bad idea.
+*/
+fz_stream *pdf_open_stream_number(fz_context *ctx, pdf_document *doc, int num);
+fz_stream *pdf_open_stream(fz_context *ctx, pdf_obj *ref);
+
+/*
+	Construct a filter to decode a stream, without
+	constraining to stream length, and without decryption.
+*/
+fz_stream *pdf_open_inline_stream(fz_context *ctx, pdf_document *doc, pdf_obj *stmobj, int length, fz_stream *chain, fz_compression_params *params);
+fz_compressed_buffer *pdf_load_compressed_stream(fz_context *ctx, pdf_document *doc, int num, size_t worst_case);
+void pdf_load_compressed_inline_image(fz_context *ctx, pdf_document *doc, pdf_obj *dict, int length, fz_stream *cstm, int indexed, fz_compressed_image *image);
+fz_stream *pdf_open_stream_with_offset(fz_context *ctx, pdf_document *doc, int num, pdf_obj *dict, int64_t stm_ofs);
+fz_stream *pdf_open_contents_stream(fz_context *ctx, pdf_document *doc, pdf_obj *obj);
+
+int pdf_version(fz_context *ctx, pdf_document *doc);
+pdf_obj *pdf_trailer(fz_context *ctx, pdf_document *doc);
+void pdf_set_populating_xref_trailer(fz_context *ctx, pdf_document *doc, pdf_obj *trailer);
+int pdf_xref_len(fz_context *ctx, pdf_document *doc);
+
+pdf_obj *pdf_metadata(fz_context *ctx, pdf_document *doc);
+
+/*
+	Used while reading the individual xref sections from a file.
+*/
+pdf_xref_entry *pdf_get_populating_xref_entry(fz_context *ctx, pdf_document *doc, int i);
+
+/*
+	Used after loading a document to access entries.
+
+	This will never throw anything, or return NULL if it is
+	only asked to return objects in range within a 'solid'
+	xref.
+
+	This may "solidify" the xref (so can cause allocations).
+*/
+pdf_xref_entry *pdf_get_xref_entry(fz_context *ctx, pdf_document *doc, int i);
+
+/*
+	Map a function across all xref entries in a document.
+*/
+void pdf_xref_entry_map(fz_context *ctx, pdf_document *doc, void (*fn)(fz_context *, pdf_xref_entry *, int i, pdf_document *doc, void *), void *arg);
+
+
+/*
+	Used after loading a document to access entries.
+
+	This will never throw anything, or return NULL if it is
+	only asked to return objects in range within a 'solid'
+	xref.
+
+	This will never "solidify" the xref, so no entry may be found
+	(NULL will be returned) for free entries.
+
+	Called with a valid i, this will never try/catch or throw.
+*/
+pdf_xref_entry *pdf_get_xref_entry_no_change(fz_context *ctx, pdf_document *doc, int i);
+pdf_xref_entry *pdf_get_xref_entry_no_null(fz_context *ctx, pdf_document *doc, int i);
+void pdf_replace_xref(fz_context *ctx, pdf_document *doc, pdf_xref_entry *entries, int n);
+void pdf_forget_xref(fz_context *ctx, pdf_document *doc);
+pdf_xref_entry *pdf_get_incremental_xref_entry(fz_context *ctx, pdf_document *doc, int i);
+
+/*
+	Ensure that an object has been cloned into the incremental xref section.
+*/
+int pdf_xref_ensure_incremental_object(fz_context *ctx, pdf_document *doc, int num);
+int pdf_xref_is_incremental(fz_context *ctx, pdf_document *doc, int num);
+void pdf_xref_store_unsaved_signature(fz_context *ctx, pdf_document *doc, pdf_obj *field, pdf_pkcs7_signer *signer);
+void pdf_xref_remove_unsaved_signature(fz_context *ctx, pdf_document *doc, pdf_obj *field);
+int pdf_xref_obj_is_unsaved_signature(pdf_document *doc, pdf_obj *obj);
+void pdf_xref_ensure_local_object(fz_context *ctx, pdf_document *doc, int num);
+int pdf_obj_is_incremental(fz_context *ctx, pdf_obj *obj);
+
+void pdf_repair_xref(fz_context *ctx, pdf_document *doc);
+
+/*
+	Ensure that the current populating xref has a single subsection
+	that covers the entire range.
+*/
+void pdf_ensure_solid_xref(fz_context *ctx, pdf_document *doc, int num);
+void pdf_mark_xref(fz_context *ctx, pdf_document *doc);
+void pdf_clear_xref(fz_context *ctx, pdf_document *doc);
+void pdf_clear_xref_to_mark(fz_context *ctx, pdf_document *doc);
+
+int pdf_repair_obj(fz_context *ctx, pdf_document *doc, pdf_lexbuf *buf, int64_t *stmofsp, int64_t *stmlenp, pdf_obj **encrypt, pdf_obj **id, pdf_obj **page, int64_t *tmpofs, pdf_obj **root);
+
+pdf_obj *pdf_progressive_advance(fz_context *ctx, pdf_document *doc, int pagenum);
+
+/*
+	Return the number of versions that there
+	are in a file. i.e. 1 + the number of updates that
+	the file on disc has been through. i.e. internal
+	unsaved changes to the file (such as appearance streams)
+	are ignored. Also, the initial write of a linearized
+	file (which appears as a base file write + an incremental
+	update) is treated as a single version.
+*/
+int pdf_count_versions(fz_context *ctx, pdf_document *doc);
+int pdf_count_unsaved_versions(fz_context *ctx, pdf_document *doc);
+int pdf_validate_changes(fz_context *ctx, pdf_document *doc, int version);
+int pdf_doc_was_linearized(fz_context *ctx, pdf_document *doc);
+
+typedef struct pdf_locked_fields pdf_locked_fields;
+int pdf_is_field_locked(fz_context *ctx, pdf_locked_fields *locked, const char *name);
+void pdf_drop_locked_fields(fz_context *ctx, pdf_locked_fields *locked);
+pdf_locked_fields *pdf_find_locked_fields(fz_context *ctx, pdf_document *doc, int version);
+pdf_locked_fields *pdf_find_locked_fields_for_sig(fz_context *ctx, pdf_document *doc, pdf_obj *sig);
+
+/*
+	Check the entire history of the document, and return the number of
+	the last version that checked out OK.
+	i.e. 0 = "the entire history checks out OK".
+		  n = "none of the history checked out OK".
+*/
+int pdf_validate_change_history(fz_context *ctx, pdf_document *doc);
+
+/*
+	Find which version of a document the current version of obj
+	was defined in.
+
+	version = 0 = latest, 1 = previous update etc, allowing for
+	the first incremental update in a linearized file being ignored.
+*/
+int pdf_find_version_for_obj(fz_context *ctx, pdf_document *doc, pdf_obj *obj);
+
+/*
+	Return the number of updates ago when a signature became invalid,
+	not counting any unsaved changes.
+
+	Thus:
+	 -1 => Has changed in the current unsaved changes.
+	  0 => still valid.
+	  1 => became invalid on the last save
+	  n => became invalid n saves ago
+*/
+int pdf_validate_signature(fz_context *ctx, pdf_annot *widget);
+int pdf_was_pure_xfa(fz_context *ctx, pdf_document *doc);
+
+/* Local xrefs - designed for holding stuff that shouldn't be written
+ * back into the actual document, such as synthesized appearance
+ * streams. */
+pdf_xref *pdf_new_local_xref(fz_context *ctx, pdf_document *doc);
+
+void pdf_drop_local_xref(fz_context *ctx, pdf_xref *xref);
+void pdf_drop_local_xref_and_resources(fz_context *ctx, pdf_document *doc);
+
+/* Debug call to dump the incremental/local xrefs to the
+ * debug channel. */
+void pdf_debug_doc_changes(fz_context *ctx, pdf_document *doc);
+
+#endif