comparison mupdf-source/include/mupdf/pdf/xref.h @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 // Copyright (C) 2004-2025 Artifex Software, Inc.
2 //
3 // This file is part of MuPDF.
4 //
5 // MuPDF is free software: you can redistribute it and/or modify it under the
6 // terms of the GNU Affero General Public License as published by the Free
7 // Software Foundation, either version 3 of the License, or (at your option)
8 // any later version.
9 //
10 // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
11 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
13 // details.
14 //
15 // You should have received a copy of the GNU Affero General Public License
16 // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
17 //
18 // Alternative licensing terms are available from the licensor.
19 // For commercial licensing, see <https://www.artifex.com/> or contact
20 // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
21 // CA 94129, USA, for further information.
22
23 #ifndef MUPDF_PDF_XREF_H
24 #define MUPDF_PDF_XREF_H
25
26 #include "mupdf/pdf/document.h"
27
28 /*
29 Allocate a slot in the xref table and return a fresh unused object number.
30 */
31 int pdf_create_object(fz_context *ctx, pdf_document *doc);
32
33 /*
34 Remove object from xref table, marking the slot as free.
35 */
36 void pdf_delete_object(fz_context *ctx, pdf_document *doc, int num);
37
38 /*
39 Replace object in xref table with the passed in object.
40 */
41 void pdf_update_object(fz_context *ctx, pdf_document *doc, int num, pdf_obj *obj);
42
43 /*
44 Replace stream contents for object in xref table with the passed in buffer.
45
46 The buffer contents must match the /Filter setting if 'compressed' is true.
47 If 'compressed' is false, the /Filter and /DecodeParms entries are deleted.
48 The /Length entry is updated.
49 */
50 void pdf_update_stream(fz_context *ctx, pdf_document *doc, pdf_obj *ref, fz_buffer *buf, int compressed);
51
52 /*
53 Return true if 'obj' is an indirect reference to an object that is held
54 by the "local" xref section.
55 */
56 int pdf_is_local_object(fz_context *ctx, pdf_document *doc, pdf_obj *obj);
57
58 pdf_obj *pdf_add_object(fz_context *ctx, pdf_document *doc, pdf_obj *obj);
59 pdf_obj *pdf_add_object_drop(fz_context *ctx, pdf_document *doc, pdf_obj *obj);
60 pdf_obj *pdf_add_stream(fz_context *ctx, pdf_document *doc, fz_buffer *buf, pdf_obj *obj, int compressed);
61
62 pdf_obj *pdf_add_new_dict(fz_context *ctx, pdf_document *doc, int initial);
63 pdf_obj *pdf_add_new_array(fz_context *ctx, pdf_document *doc, int initial);
64
65 typedef struct
66 {
67 char type; /* 0=unset (f)ree i(n)use (o)bjstm */
68 unsigned char marked; /* marked to keep alive with pdf_mark_xref */
69 unsigned short gen; /* generation / objstm index */
70 int num; /* original object number (for decryption after renumbering) */
71 int64_t ofs; /* file offset / objstm object number */
72 int64_t stm_ofs; /* on-disk stream */
73 fz_buffer *stm_buf; /* in-memory stream (for updated objects) */
74 pdf_obj *obj; /* stored/cached object */
75 } pdf_xref_entry;
76
77 typedef struct pdf_xref_subsec
78 {
79 struct pdf_xref_subsec *next;
80 int len;
81 int start;
82 pdf_xref_entry *table;
83 } pdf_xref_subsec;
84
85 struct pdf_xref
86 {
87 int num_objects;
88 pdf_xref_subsec *subsec;
89 pdf_obj *trailer;
90 pdf_obj *pre_repair_trailer;
91 pdf_unsaved_sig *unsaved_sigs;
92 pdf_unsaved_sig **unsaved_sigs_end;
93 int64_t end_ofs; /* file offset to end of xref */
94 };
95
96 /**
97 Retrieve the pdf_xref_entry for a given object.
98
99 This can cause xref reorganisations (solidifications etc) due to
100 repairs, so all held pdf_xref_entries should be considered
101 invalid after this call (other than the returned one).
102 */
103 pdf_xref_entry *pdf_cache_object(fz_context *ctx, pdf_document *doc, int num);
104
105 int pdf_object_exists(fz_context *ctx, pdf_document *doc, int num);
106
107 int pdf_count_objects(fz_context *ctx, pdf_document *doc);
108
109 /**
110 Resolve an indirect object (or chain of objects).
111
112 This can cause xref reorganisations (solidifications etc) due to
113 repairs, so all held pdf_xref_entries should be considered
114 invalid after this call (other than the returned one).
115 */
116 pdf_obj *pdf_resolve_indirect(fz_context *ctx, pdf_obj *ref);
117 pdf_obj *pdf_resolve_indirect_chain(fz_context *ctx, pdf_obj *ref);
118
119 /**
120 Load a given object.
121
122 This can cause xref reorganisations (solidifications etc) due to
123 repairs, so all held pdf_xref_entries should be considered
124 invalid after this call (other than the returned one).
125 */
126 pdf_obj *pdf_load_object(fz_context *ctx, pdf_document *doc, int num);
127 pdf_obj *pdf_load_unencrypted_object(fz_context *ctx, pdf_document *doc, int num);
128
129 /*
130 Load raw (compressed but decrypted) contents of a stream into buf.
131 */
132 fz_buffer *pdf_load_raw_stream_number(fz_context *ctx, pdf_document *doc, int num);
133 fz_buffer *pdf_load_raw_stream(fz_context *ctx, pdf_obj *ref);
134
135 /*
136 Load uncompressed contents of a stream into buf.
137 */
138 fz_buffer *pdf_load_stream_number(fz_context *ctx, pdf_document *doc, int num);
139 fz_buffer *pdf_load_stream(fz_context *ctx, pdf_obj *ref);
140
141 /*
142 Open a stream for reading the raw (compressed but decrypted) data.
143 */
144 fz_stream *pdf_open_raw_stream_number(fz_context *ctx, pdf_document *doc, int num);
145 fz_stream *pdf_open_raw_stream(fz_context *ctx, pdf_obj *ref);
146
147 /*
148 Open a stream for reading uncompressed data.
149 Put the opened file in doc->stream.
150 Using doc->file while a stream is open is a Bad idea.
151 */
152 fz_stream *pdf_open_stream_number(fz_context *ctx, pdf_document *doc, int num);
153 fz_stream *pdf_open_stream(fz_context *ctx, pdf_obj *ref);
154
155 /*
156 Construct a filter to decode a stream, without
157 constraining to stream length, and without decryption.
158 */
159 fz_stream *pdf_open_inline_stream(fz_context *ctx, pdf_document *doc, pdf_obj *stmobj, int length, fz_stream *chain, fz_compression_params *params);
160 fz_compressed_buffer *pdf_load_compressed_stream(fz_context *ctx, pdf_document *doc, int num, size_t worst_case);
161 void pdf_load_compressed_inline_image(fz_context *ctx, pdf_document *doc, pdf_obj *dict, int length, fz_stream *cstm, int indexed, fz_compressed_image *image);
162 fz_stream *pdf_open_stream_with_offset(fz_context *ctx, pdf_document *doc, int num, pdf_obj *dict, int64_t stm_ofs);
163 fz_stream *pdf_open_contents_stream(fz_context *ctx, pdf_document *doc, pdf_obj *obj);
164
165 int pdf_version(fz_context *ctx, pdf_document *doc);
166 pdf_obj *pdf_trailer(fz_context *ctx, pdf_document *doc);
167 void pdf_set_populating_xref_trailer(fz_context *ctx, pdf_document *doc, pdf_obj *trailer);
168 int pdf_xref_len(fz_context *ctx, pdf_document *doc);
169
170 pdf_obj *pdf_metadata(fz_context *ctx, pdf_document *doc);
171
172 /*
173 Used while reading the individual xref sections from a file.
174 */
175 pdf_xref_entry *pdf_get_populating_xref_entry(fz_context *ctx, pdf_document *doc, int i);
176
177 /*
178 Used after loading a document to access entries.
179
180 This will never throw anything, or return NULL if it is
181 only asked to return objects in range within a 'solid'
182 xref.
183
184 This may "solidify" the xref (so can cause allocations).
185 */
186 pdf_xref_entry *pdf_get_xref_entry(fz_context *ctx, pdf_document *doc, int i);
187
188 /*
189 Map a function across all xref entries in a document.
190 */
191 void pdf_xref_entry_map(fz_context *ctx, pdf_document *doc, void (*fn)(fz_context *, pdf_xref_entry *, int i, pdf_document *doc, void *), void *arg);
192
193
194 /*
195 Used after loading a document to access entries.
196
197 This will never throw anything, or return NULL if it is
198 only asked to return objects in range within a 'solid'
199 xref.
200
201 This will never "solidify" the xref, so no entry may be found
202 (NULL will be returned) for free entries.
203
204 Called with a valid i, this will never try/catch or throw.
205 */
206 pdf_xref_entry *pdf_get_xref_entry_no_change(fz_context *ctx, pdf_document *doc, int i);
207 pdf_xref_entry *pdf_get_xref_entry_no_null(fz_context *ctx, pdf_document *doc, int i);
208 void pdf_replace_xref(fz_context *ctx, pdf_document *doc, pdf_xref_entry *entries, int n);
209 void pdf_forget_xref(fz_context *ctx, pdf_document *doc);
210 pdf_xref_entry *pdf_get_incremental_xref_entry(fz_context *ctx, pdf_document *doc, int i);
211
212 /*
213 Ensure that an object has been cloned into the incremental xref section.
214 */
215 int pdf_xref_ensure_incremental_object(fz_context *ctx, pdf_document *doc, int num);
216 int pdf_xref_is_incremental(fz_context *ctx, pdf_document *doc, int num);
217 void pdf_xref_store_unsaved_signature(fz_context *ctx, pdf_document *doc, pdf_obj *field, pdf_pkcs7_signer *signer);
218 void pdf_xref_remove_unsaved_signature(fz_context *ctx, pdf_document *doc, pdf_obj *field);
219 int pdf_xref_obj_is_unsaved_signature(pdf_document *doc, pdf_obj *obj);
220 void pdf_xref_ensure_local_object(fz_context *ctx, pdf_document *doc, int num);
221 int pdf_obj_is_incremental(fz_context *ctx, pdf_obj *obj);
222
223 void pdf_repair_xref(fz_context *ctx, pdf_document *doc);
224
225 /*
226 Ensure that the current populating xref has a single subsection
227 that covers the entire range.
228 */
229 void pdf_ensure_solid_xref(fz_context *ctx, pdf_document *doc, int num);
230 void pdf_mark_xref(fz_context *ctx, pdf_document *doc);
231 void pdf_clear_xref(fz_context *ctx, pdf_document *doc);
232 void pdf_clear_xref_to_mark(fz_context *ctx, pdf_document *doc);
233
234 int pdf_repair_obj(fz_context *ctx, pdf_document *doc, pdf_lexbuf *buf, int64_t *stmofsp, int64_t *stmlenp, pdf_obj **encrypt, pdf_obj **id, pdf_obj **page, int64_t *tmpofs, pdf_obj **root);
235
236 pdf_obj *pdf_progressive_advance(fz_context *ctx, pdf_document *doc, int pagenum);
237
238 /*
239 Return the number of versions that there
240 are in a file. i.e. 1 + the number of updates that
241 the file on disc has been through. i.e. internal
242 unsaved changes to the file (such as appearance streams)
243 are ignored. Also, the initial write of a linearized
244 file (which appears as a base file write + an incremental
245 update) is treated as a single version.
246 */
247 int pdf_count_versions(fz_context *ctx, pdf_document *doc);
248 int pdf_count_unsaved_versions(fz_context *ctx, pdf_document *doc);
249 int pdf_validate_changes(fz_context *ctx, pdf_document *doc, int version);
250 int pdf_doc_was_linearized(fz_context *ctx, pdf_document *doc);
251
252 typedef struct pdf_locked_fields pdf_locked_fields;
253 int pdf_is_field_locked(fz_context *ctx, pdf_locked_fields *locked, const char *name);
254 void pdf_drop_locked_fields(fz_context *ctx, pdf_locked_fields *locked);
255 pdf_locked_fields *pdf_find_locked_fields(fz_context *ctx, pdf_document *doc, int version);
256 pdf_locked_fields *pdf_find_locked_fields_for_sig(fz_context *ctx, pdf_document *doc, pdf_obj *sig);
257
258 /*
259 Check the entire history of the document, and return the number of
260 the last version that checked out OK.
261 i.e. 0 = "the entire history checks out OK".
262 n = "none of the history checked out OK".
263 */
264 int pdf_validate_change_history(fz_context *ctx, pdf_document *doc);
265
266 /*
267 Find which version of a document the current version of obj
268 was defined in.
269
270 version = 0 = latest, 1 = previous update etc, allowing for
271 the first incremental update in a linearized file being ignored.
272 */
273 int pdf_find_version_for_obj(fz_context *ctx, pdf_document *doc, pdf_obj *obj);
274
275 /*
276 Return the number of updates ago when a signature became invalid,
277 not counting any unsaved changes.
278
279 Thus:
280 -1 => Has changed in the current unsaved changes.
281 0 => still valid.
282 1 => became invalid on the last save
283 n => became invalid n saves ago
284 */
285 int pdf_validate_signature(fz_context *ctx, pdf_annot *widget);
286 int pdf_was_pure_xfa(fz_context *ctx, pdf_document *doc);
287
288 /* Local xrefs - designed for holding stuff that shouldn't be written
289 * back into the actual document, such as synthesized appearance
290 * streams. */
291 pdf_xref *pdf_new_local_xref(fz_context *ctx, pdf_document *doc);
292
293 void pdf_drop_local_xref(fz_context *ctx, pdf_xref *xref);
294 void pdf_drop_local_xref_and_resources(fz_context *ctx, pdf_document *doc);
295
296 /* Debug call to dump the incremental/local xrefs to the
297 * debug channel. */
298 void pdf_debug_doc_changes(fz_context *ctx, pdf_document *doc);
299
300 #endif