Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/include/mupdf/pdf/document.h @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 // Copyright (C) 2004-2025 Artifex Software, Inc. | |
| 2 // | |
| 3 // This file is part of MuPDF. | |
| 4 // | |
| 5 // MuPDF is free software: you can redistribute it and/or modify it under the | |
| 6 // terms of the GNU Affero General Public License as published by the Free | |
| 7 // Software Foundation, either version 3 of the License, or (at your option) | |
| 8 // any later version. | |
| 9 // | |
| 10 // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY | |
| 11 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | |
| 12 // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more | |
| 13 // details. | |
| 14 // | |
| 15 // You should have received a copy of the GNU Affero General Public License | |
| 16 // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html> | |
| 17 // | |
| 18 // Alternative licensing terms are available from the licensor. | |
| 19 // For commercial licensing, see <https://www.artifex.com/> or contact | |
| 20 // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, | |
| 21 // CA 94129, USA, for further information. | |
| 22 | |
| 23 #ifndef MUPDF_PDF_DOCUMENT_H | |
| 24 #define MUPDF_PDF_DOCUMENT_H | |
| 25 | |
| 26 #include "mupdf/fitz/export.h" | |
| 27 #include "mupdf/fitz/document.h" | |
| 28 #include "mupdf/fitz/hash.h" | |
| 29 #include "mupdf/fitz/stream.h" | |
| 30 #include "mupdf/fitz/xml.h" | |
| 31 #include "mupdf/pdf/object.h" | |
| 32 | |
| 33 typedef struct pdf_xref pdf_xref; | |
| 34 typedef struct pdf_ocg_descriptor pdf_ocg_descriptor; | |
| 35 | |
| 36 typedef struct pdf_page pdf_page; | |
| 37 typedef struct pdf_annot pdf_annot; | |
| 38 typedef struct pdf_js pdf_js; | |
| 39 typedef struct pdf_document pdf_document; | |
| 40 | |
| 41 enum | |
| 42 { | |
| 43 PDF_LEXBUF_SMALL = 256, | |
| 44 PDF_LEXBUF_LARGE = 65536 | |
| 45 }; | |
| 46 | |
| 47 typedef struct | |
| 48 { | |
| 49 size_t size; | |
| 50 size_t base_size; | |
| 51 size_t len; | |
| 52 int64_t i; | |
| 53 float f; | |
| 54 char *scratch; | |
| 55 char buffer[PDF_LEXBUF_SMALL]; | |
| 56 } pdf_lexbuf; | |
| 57 | |
| 58 typedef struct | |
| 59 { | |
| 60 pdf_lexbuf base; | |
| 61 char buffer[PDF_LEXBUF_LARGE - PDF_LEXBUF_SMALL]; | |
| 62 } pdf_lexbuf_large; | |
| 63 | |
| 64 /* | |
| 65 Document event structures are mostly opaque to the app. Only the type | |
| 66 is visible to the app. | |
| 67 */ | |
| 68 typedef struct pdf_doc_event pdf_doc_event; | |
| 69 | |
| 70 /* | |
| 71 the type of function via which the app receives | |
| 72 document events. | |
| 73 */ | |
| 74 typedef void (pdf_doc_event_cb)(fz_context *ctx, pdf_document *doc, pdf_doc_event *evt, void *data); | |
| 75 | |
| 76 /* | |
| 77 the type of function via which the app frees | |
| 78 the data provided to the event callback pdf_doc_event_cb. | |
| 79 */ | |
| 80 typedef void (pdf_free_doc_event_data_cb)(fz_context *ctx, void *data); | |
| 81 | |
| 82 typedef struct pdf_js_console pdf_js_console; | |
| 83 | |
| 84 /* | |
| 85 Callback called when the console is dropped because it | |
| 86 is being replaced or the javascript is being disabled | |
| 87 by a call to pdf_disable_js(). | |
| 88 */ | |
| 89 typedef void (pdf_js_console_drop_cb)(pdf_js_console *console, void *user); | |
| 90 | |
| 91 /* | |
| 92 Callback signalling that a piece of javascript is asking | |
| 93 the javascript console to be displayed. | |
| 94 */ | |
| 95 typedef void (pdf_js_console_show_cb)(void *user); | |
| 96 | |
| 97 /* | |
| 98 Callback signalling that a piece of javascript is asking | |
| 99 the javascript console to be hidden. | |
| 100 */ | |
| 101 typedef void (pdf_js_console_hide_cb)(void *user); | |
| 102 | |
| 103 /* | |
| 104 Callback signalling that a piece of javascript is asking | |
| 105 the javascript console to remove all its contents. | |
| 106 */ | |
| 107 typedef void (pdf_js_console_clear_cb)(void *user); | |
| 108 | |
| 109 /* | |
| 110 Callback signalling that a piece of javascript is appending | |
| 111 the given message to the javascript console contents. | |
| 112 */ | |
| 113 typedef void (pdf_js_console_write_cb)(void *user, const char *msg); | |
| 114 | |
| 115 /* | |
| 116 The callback functions relating to a javascript console. | |
| 117 */ | |
| 118 typedef struct pdf_js_console { | |
| 119 pdf_js_console_drop_cb *drop; | |
| 120 pdf_js_console_show_cb *show; | |
| 121 pdf_js_console_hide_cb *hide; | |
| 122 pdf_js_console_clear_cb *clear; | |
| 123 pdf_js_console_write_cb *write; | |
| 124 } pdf_js_console; | |
| 125 | |
| 126 /* | |
| 127 Retrieve the currently set javascript console, or NULL | |
| 128 if none is set. | |
| 129 */ | |
| 130 pdf_js_console *pdf_js_get_console(fz_context *ctx, pdf_document *doc); | |
| 131 | |
| 132 /* | |
| 133 Set a new javascript console. | |
| 134 | |
| 135 console: A set of callback functions informing about | |
| 136 what pieces of executed js is trying to do | |
| 137 to the js console. The caller transfers ownership of | |
| 138 console when calling pdf_js_set_console(). Once it and | |
| 139 the corresponding user pointer are no longer needed | |
| 140 console->drop() will be called passing both the console | |
| 141 and the user pointer. | |
| 142 | |
| 143 user: Opaque data that will be passed unchanged to all | |
| 144 js console callbacks when called. The caller ensures | |
| 145 that this is valid until either the js console is | |
| 146 replaced by calling pdf_js_set_console() again with a | |
| 147 new console, or pdf_disable_js() is called. In either | |
| 148 case the caller to ensures that the user data is freed. | |
| 149 */ | |
| 150 void pdf_js_set_console(fz_context *ctx, pdf_document *doc, pdf_js_console *console, void *user); | |
| 151 | |
| 152 /* | |
| 153 Open a PDF document. | |
| 154 | |
| 155 Open a PDF document by reading its cross reference table, so | |
| 156 MuPDF can locate PDF objects inside the file. Upon an broken | |
| 157 cross reference table or other parse errors MuPDF will restart | |
| 158 parsing the file from the beginning to try to rebuild a | |
| 159 (hopefully correct) cross reference table to allow further | |
| 160 processing of the file. | |
| 161 | |
| 162 The returned pdf_document should be used when calling most | |
| 163 other PDF functions. Note that it wraps the context, so those | |
| 164 functions implicitly get access to the global state in | |
| 165 context. | |
| 166 | |
| 167 filename: a path to a file as it would be given to open(2). | |
| 168 */ | |
| 169 pdf_document *pdf_open_document(fz_context *ctx, const char *filename); | |
| 170 | |
| 171 /* | |
| 172 Opens a PDF document. | |
| 173 | |
| 174 Same as pdf_open_document, but takes a stream instead of a | |
| 175 filename to locate the PDF document to open. Increments the | |
| 176 reference count of the stream. See fz_open_file, | |
| 177 fz_open_file_w or fz_open_fd for opening a stream, and | |
| 178 fz_drop_stream for closing an open stream. | |
| 179 */ | |
| 180 pdf_document *pdf_open_document_with_stream(fz_context *ctx, fz_stream *file); | |
| 181 | |
| 182 /* | |
| 183 Closes and frees an opened PDF document. | |
| 184 | |
| 185 The resource store in the context associated with pdf_document | |
| 186 is emptied. | |
| 187 */ | |
| 188 void pdf_drop_document(fz_context *ctx, pdf_document *doc); | |
| 189 | |
| 190 pdf_document *pdf_keep_document(fz_context *ctx, pdf_document *doc); | |
| 191 | |
| 192 /* | |
| 193 down-cast a fz_document to a pdf_document. | |
| 194 Returns NULL if underlying document is not PDF | |
| 195 */ | |
| 196 pdf_document *pdf_specifics(fz_context *ctx, fz_document *doc); | |
| 197 | |
| 198 /* | |
| 199 Down-cast generic fitz objects into pdf specific variants. | |
| 200 Returns NULL if the objects are not from a PDF document. | |
| 201 */ | |
| 202 pdf_document *pdf_document_from_fz_document(fz_context *ctx, fz_document *ptr); | |
| 203 pdf_page *pdf_page_from_fz_page(fz_context *ctx, fz_page *ptr); | |
| 204 | |
| 205 /* | |
| 206 Get a pdf_document handle from an fz_document handle. | |
| 207 | |
| 208 This is superficially similar to pdf_document_from_fz_document | |
| 209 (and the older pdf_specifics). | |
| 210 | |
| 211 For fz_documents that are actually pdf_documents, this will return | |
| 212 a kept version of the same pointer, just cast differently. | |
| 213 | |
| 214 For fz_documents that have a pdf_document representation internally, | |
| 215 then you may get a kept version of a different pointer. | |
| 216 | |
| 217 For fz_documents that have no pdf_document representation internally, | |
| 218 this will return NULL. | |
| 219 | |
| 220 Note that this returns a kept pointer that the caller is responsible | |
| 221 for freeing, unlike pdf_specifics or pdf_document_from_fz_document. | |
| 222 */ | |
| 223 pdf_document *fz_new_pdf_document_from_fz_document(fz_context *ctx, fz_document *ptr); | |
| 224 | |
| 225 int pdf_needs_password(fz_context *ctx, pdf_document *doc); | |
| 226 | |
| 227 /* | |
| 228 Attempt to authenticate a | |
| 229 password. | |
| 230 | |
| 231 Returns 0 for failure, non-zero for success. | |
| 232 | |
| 233 In the non-zero case: | |
| 234 bit 0 set => no password required | |
| 235 bit 1 set => user password authenticated | |
| 236 bit 2 set => owner password authenticated | |
| 237 */ | |
| 238 int pdf_authenticate_password(fz_context *ctx, pdf_document *doc, const char *pw); | |
| 239 | |
| 240 int pdf_has_permission(fz_context *ctx, pdf_document *doc, fz_permission p); | |
| 241 int pdf_lookup_metadata(fz_context *ctx, pdf_document *doc, const char *key, char *ptr, size_t size); | |
| 242 | |
| 243 fz_outline *pdf_load_outline(fz_context *ctx, pdf_document *doc); | |
| 244 | |
| 245 fz_outline_iterator *pdf_new_outline_iterator(fz_context *ctx, pdf_document *doc); | |
| 246 | |
| 247 void pdf_invalidate_xfa(fz_context *ctx, pdf_document *doc); | |
| 248 | |
| 249 /* | |
| 250 Get the number of layer configurations defined in this document. | |
| 251 | |
| 252 doc: The document in question. | |
| 253 */ | |
| 254 int pdf_count_layer_configs(fz_context *ctx, pdf_document *doc); | |
| 255 | |
| 256 /* | |
| 257 Configure visibility of individual layers in this document. | |
| 258 */ | |
| 259 int pdf_count_layers(fz_context *ctx, pdf_document *doc); | |
| 260 const char *pdf_layer_name(fz_context *ctx, pdf_document *doc, int layer); | |
| 261 int pdf_layer_is_enabled(fz_context *ctx, pdf_document *doc, int layer); | |
| 262 void pdf_enable_layer(fz_context *ctx, pdf_document *doc, int layer, int enabled); | |
| 263 | |
| 264 typedef struct | |
| 265 { | |
| 266 const char *name; | |
| 267 const char *creator; | |
| 268 } pdf_layer_config; | |
| 269 | |
| 270 /* | |
| 271 Fetch the name (and optionally creator) of the given layer config. | |
| 272 | |
| 273 doc: The document in question. | |
| 274 | |
| 275 config_num: A value in the 0..n-1 range, where n is the | |
| 276 value returned from pdf_count_layer_configs. | |
| 277 | |
| 278 info: Pointer to structure to fill in. Pointers within | |
| 279 this structure may be set to NULL if no information is | |
| 280 available. | |
| 281 */ | |
| 282 void pdf_layer_config_info(fz_context *ctx, pdf_document *doc, int config_num, pdf_layer_config *info); | |
| 283 | |
| 284 /* | |
| 285 Set the current configuration. | |
| 286 This updates the visibility of the optional content groups | |
| 287 within the document. | |
| 288 | |
| 289 doc: The document in question. | |
| 290 | |
| 291 config_num: A value in the 0..n-1 range, where n is the | |
| 292 value returned from pdf_count_layer_configs. | |
| 293 */ | |
| 294 void pdf_select_layer_config(fz_context *ctx, pdf_document *doc, int config_num); | |
| 295 | |
| 296 /* | |
| 297 Returns the number of entries in the 'UI' for this layer configuration. | |
| 298 | |
| 299 doc: The document in question. | |
| 300 */ | |
| 301 int pdf_count_layer_config_ui(fz_context *ctx, pdf_document *doc); | |
| 302 | |
| 303 /* | |
| 304 Select a checkbox/radiobox within the 'UI' for this layer | |
| 305 configuration. | |
| 306 | |
| 307 Selecting a UI entry that is a radiobox may disable | |
| 308 other UI entries. | |
| 309 | |
| 310 doc: The document in question. | |
| 311 | |
| 312 ui: A value in the 0..m-1 range, where m is the value | |
| 313 returned by pdf_count_layer_config_ui. | |
| 314 */ | |
| 315 void pdf_select_layer_config_ui(fz_context *ctx, pdf_document *doc, int ui); | |
| 316 | |
| 317 /* | |
| 318 Select a checkbox/radiobox within the 'UI' for this layer configuration. | |
| 319 | |
| 320 doc: The document in question. | |
| 321 | |
| 322 ui: A value in the 0..m-1 range, where m is the value | |
| 323 returned by pdf_count_layer_config_ui. | |
| 324 */ | |
| 325 void pdf_deselect_layer_config_ui(fz_context *ctx, pdf_document *doc, int ui); | |
| 326 | |
| 327 /* | |
| 328 Toggle a checkbox/radiobox within the 'UI' for this layer configuration. | |
| 329 | |
| 330 Toggling a UI entry that is a radiobox may disable | |
| 331 other UI entries. | |
| 332 | |
| 333 doc: The document in question. | |
| 334 | |
| 335 ui: A value in the 0..m-1 range, where m is the value | |
| 336 returned by pdf_count_layer_config_ui. | |
| 337 */ | |
| 338 void pdf_toggle_layer_config_ui(fz_context *ctx, pdf_document *doc, int ui); | |
| 339 | |
| 340 typedef enum | |
| 341 { | |
| 342 PDF_LAYER_UI_LABEL = 0, | |
| 343 PDF_LAYER_UI_CHECKBOX = 1, | |
| 344 PDF_LAYER_UI_RADIOBOX = 2 | |
| 345 } pdf_layer_config_ui_type; | |
| 346 | |
| 347 typedef struct | |
| 348 { | |
| 349 const char *text; | |
| 350 int depth; | |
| 351 pdf_layer_config_ui_type type; | |
| 352 int selected; | |
| 353 int locked; | |
| 354 } pdf_layer_config_ui; | |
| 355 | |
| 356 /* | |
| 357 Get the info for a given entry in the layer config ui. | |
| 358 | |
| 359 doc: The document in question. | |
| 360 | |
| 361 ui: A value in the 0..m-1 range, where m is the value | |
| 362 returned by pdf_count_layer_config_ui. | |
| 363 | |
| 364 info: Pointer to a structure to fill in with information | |
| 365 about the requested ui entry. | |
| 366 */ | |
| 367 void pdf_layer_config_ui_info(fz_context *ctx, pdf_document *doc, int ui, pdf_layer_config_ui *info); | |
| 368 | |
| 369 /* | |
| 370 Write the current layer config back into the document as the default state. | |
| 371 */ | |
| 372 void pdf_set_layer_config_as_default(fz_context *ctx, pdf_document *doc); | |
| 373 | |
| 374 /* | |
| 375 Determine whether changes have been made since the | |
| 376 document was opened or last saved. | |
| 377 */ | |
| 378 int pdf_has_unsaved_changes(fz_context *ctx, pdf_document *doc); | |
| 379 | |
| 380 /* | |
| 381 Determine if this PDF has been repaired since opening. | |
| 382 */ | |
| 383 int pdf_was_repaired(fz_context *ctx, pdf_document *doc); | |
| 384 | |
| 385 /* Object that can perform the cryptographic operation necessary for document signing */ | |
| 386 typedef struct pdf_pkcs7_signer pdf_pkcs7_signer; | |
| 387 | |
| 388 /* Unsaved signature fields */ | |
| 389 typedef struct pdf_unsaved_sig | |
| 390 { | |
| 391 pdf_obj *field; | |
| 392 size_t byte_range_start; | |
| 393 size_t byte_range_end; | |
| 394 size_t contents_start; | |
| 395 size_t contents_end; | |
| 396 pdf_pkcs7_signer *signer; | |
| 397 struct pdf_unsaved_sig *next; | |
| 398 } pdf_unsaved_sig; | |
| 399 | |
| 400 typedef struct | |
| 401 { | |
| 402 int page; | |
| 403 int object; | |
| 404 } pdf_rev_page_map; | |
| 405 | |
| 406 typedef struct | |
| 407 { | |
| 408 int number; /* Page object number */ | |
| 409 int64_t offset; /* Offset of page object */ | |
| 410 int64_t index; /* Index into shared hint_shared_ref */ | |
| 411 } pdf_hint_page; | |
| 412 | |
| 413 typedef struct | |
| 414 { | |
| 415 int number; /* Object number of first object */ | |
| 416 int64_t offset; /* Offset of first object */ | |
| 417 } pdf_hint_shared; | |
| 418 | |
| 419 struct pdf_document | |
| 420 { | |
| 421 fz_document super; | |
| 422 | |
| 423 fz_stream *file; | |
| 424 | |
| 425 int version; | |
| 426 int is_fdf; | |
| 427 int bias; | |
| 428 int64_t startxref; | |
| 429 int64_t file_size; | |
| 430 pdf_crypt *crypt; | |
| 431 pdf_ocg_descriptor *ocg; | |
| 432 fz_colorspace *oi; | |
| 433 | |
| 434 int max_xref_len; | |
| 435 int num_xref_sections; | |
| 436 int saved_num_xref_sections; | |
| 437 int num_incremental_sections; | |
| 438 int xref_base; | |
| 439 int disallow_new_increments; | |
| 440 | |
| 441 /* The local_xref is only active, if local_xref_nesting >= 0 */ | |
| 442 pdf_xref *local_xref; | |
| 443 int local_xref_nesting; | |
| 444 | |
| 445 pdf_xref *xref_sections; | |
| 446 pdf_xref *saved_xref_sections; | |
| 447 int *xref_index; | |
| 448 int save_in_progress; | |
| 449 int last_xref_was_old_style; | |
| 450 int has_linearization_object; | |
| 451 | |
| 452 int map_page_count; | |
| 453 pdf_rev_page_map *rev_page_map; | |
| 454 pdf_obj **fwd_page_map; | |
| 455 int page_tree_broken; | |
| 456 | |
| 457 int repair_attempted; | |
| 458 int repair_in_progress; | |
| 459 int non_structural_change; /* True if we are modifying the document in a way that does not change the (page) structure */ | |
| 460 | |
| 461 /* State indicating which file parsing method we are using */ | |
| 462 int file_reading_linearly; | |
| 463 int64_t file_length; | |
| 464 | |
| 465 int linear_page_count; | |
| 466 pdf_obj *linear_obj; /* Linearized object (if used) */ | |
| 467 pdf_obj **linear_page_refs; /* Page objects for linear loading */ | |
| 468 int linear_page1_obj_num; | |
| 469 | |
| 470 /* The state for the pdf_progressive_advance parser */ | |
| 471 int64_t linear_pos; | |
| 472 int linear_page_num; | |
| 473 | |
| 474 int hint_object_offset; | |
| 475 int hint_object_length; | |
| 476 int hints_loaded; /* Set to 1 after the hints loading has completed, | |
| 477 * whether successful or not! */ | |
| 478 /* Page n references shared object references: | |
| 479 * hint_shared_ref[i] | |
| 480 * where | |
| 481 * i = s to e-1 | |
| 482 * s = hint_page[n]->index | |
| 483 * e = hint_page[n+1]->index | |
| 484 * Shared object reference r accesses objects: | |
| 485 * rs to re-1 | |
| 486 * where | |
| 487 * rs = hint_shared[r]->number | |
| 488 * re = hint_shared[r]->count + rs | |
| 489 * These are guaranteed to lie within the region starting at | |
| 490 * hint_shared[r]->offset of length hint_shared[r]->length | |
| 491 */ | |
| 492 pdf_hint_page *hint_page; | |
| 493 int *hint_shared_ref; | |
| 494 pdf_hint_shared *hint_shared; | |
| 495 int hint_obj_offsets_max; | |
| 496 int64_t *hint_obj_offsets; | |
| 497 | |
| 498 pdf_lexbuf_large lexbuf; | |
| 499 | |
| 500 pdf_js *js; | |
| 501 | |
| 502 int recalculate; | |
| 503 int redacted; | |
| 504 int resynth_required; | |
| 505 | |
| 506 pdf_doc_event_cb *event_cb; | |
| 507 pdf_free_doc_event_data_cb *free_event_data_cb; | |
| 508 void *event_cb_data; | |
| 509 | |
| 510 int num_type3_fonts; | |
| 511 int max_type3_fonts; | |
| 512 fz_font **type3_fonts; | |
| 513 | |
| 514 struct { | |
| 515 fz_hash_table *fonts; | |
| 516 fz_hash_table *colorspaces; | |
| 517 } resources; | |
| 518 | |
| 519 int orphans_max; | |
| 520 int orphans_count; | |
| 521 pdf_obj **orphans; | |
| 522 | |
| 523 fz_xml_doc *xfa; | |
| 524 | |
| 525 pdf_journal *journal; | |
| 526 }; | |
| 527 | |
| 528 pdf_document *pdf_create_document(fz_context *ctx); | |
| 529 | |
| 530 typedef struct pdf_graft_map pdf_graft_map; | |
| 531 | |
| 532 /* | |
| 533 Return a deep copied object equivalent to the | |
| 534 supplied object, suitable for use within the given document. | |
| 535 | |
| 536 dst: The document in which the returned object is to be used. | |
| 537 | |
| 538 obj: The object deep copy. | |
| 539 | |
| 540 Note: If grafting multiple objects, you should use a pdf_graft_map | |
| 541 to avoid potential duplication of target objects. | |
| 542 */ | |
| 543 pdf_obj *pdf_graft_object(fz_context *ctx, pdf_document *dst, pdf_obj *obj); | |
| 544 | |
| 545 /* | |
| 546 Prepare a graft map object to allow objects | |
| 547 to be deep copied from one document to the given one, avoiding | |
| 548 problems with duplicated child objects. | |
| 549 | |
| 550 dst: The document to copy objects to. | |
| 551 | |
| 552 Note: all the source objects must come from the same document. | |
| 553 */ | |
| 554 pdf_graft_map *pdf_new_graft_map(fz_context *ctx, pdf_document *dst); | |
| 555 | |
| 556 pdf_graft_map *pdf_keep_graft_map(fz_context *ctx, pdf_graft_map *map); | |
| 557 void pdf_drop_graft_map(fz_context *ctx, pdf_graft_map *map); | |
| 558 | |
| 559 /* | |
| 560 Return a deep copied object equivalent | |
| 561 to the supplied object, suitable for use within the target | |
| 562 document of the map. | |
| 563 | |
| 564 map: A map targeted at the document in which the returned | |
| 565 object is to be used. | |
| 566 | |
| 567 obj: The object to be copied. | |
| 568 | |
| 569 Note: Copying multiple objects via the same graft map ensures | |
| 570 that any shared children are not copied more than once. | |
| 571 */ | |
| 572 pdf_obj *pdf_graft_mapped_object(fz_context *ctx, pdf_graft_map *map, pdf_obj *obj); | |
| 573 | |
| 574 /* | |
| 575 Graft a page (and its resources) from the src document to the | |
| 576 destination document of the graft. This involves a deep copy | |
| 577 of the objects in question. | |
| 578 | |
| 579 map: A map targeted at the document into which the page should | |
| 580 be inserted. | |
| 581 | |
| 582 page_to: The position within the destination document at which | |
| 583 the page should be inserted (pages numbered from 0, with -1 | |
| 584 meaning "at the end"). | |
| 585 | |
| 586 src: The document from which the page should be copied. | |
| 587 | |
| 588 page_from: The page number which should be copied from the src | |
| 589 document (pages numbered from 0, with -1 meaning "at the end"). | |
| 590 */ | |
| 591 void pdf_graft_page(fz_context *ctx, pdf_document *dst, int page_to, pdf_document *src, int page_from); | |
| 592 void pdf_graft_mapped_page(fz_context *ctx, pdf_graft_map *map, int page_to, pdf_document *src, int page_from); | |
| 593 | |
| 594 /* | |
| 595 Create a device that will record the | |
| 596 graphical operations given to it into a sequence of | |
| 597 pdf operations, together with a set of resources. This | |
| 598 sequence/set pair can then be used as the basis for | |
| 599 adding a page to the document (see pdf_add_page). | |
| 600 Returns a kept reference. | |
| 601 | |
| 602 doc: The document for which these are intended. | |
| 603 | |
| 604 mediabox: The bbox for the created page. | |
| 605 | |
| 606 presources: Pointer to a place to put the created | |
| 607 resources dictionary. | |
| 608 | |
| 609 pcontents: Pointer to a place to put the created | |
| 610 contents buffer. | |
| 611 */ | |
| 612 fz_device *pdf_page_write(fz_context *ctx, pdf_document *doc, fz_rect mediabox, pdf_obj **presources, fz_buffer **pcontents); | |
| 613 | |
| 614 /* | |
| 615 Create a pdf device. Rendering to the device creates | |
| 616 new pdf content. WARNING: this device is work in progress. It doesn't | |
| 617 currently support all rendering cases. | |
| 618 | |
| 619 Note that contents must be a stream (dictionary) to be updated (or | |
| 620 a reference to a stream). Callers should take care to ensure that it | |
| 621 is not an array, and that is it not shared with other objects/pages. | |
| 622 */ | |
| 623 fz_device *pdf_new_pdf_device(fz_context *ctx, pdf_document *doc, fz_matrix topctm, pdf_obj *resources, fz_buffer *contents); | |
| 624 | |
| 625 /* | |
| 626 Create a pdf_obj within a document that | |
| 627 represents a page, from a previously created resources | |
| 628 dictionary and page content stream. This should then be | |
| 629 inserted into the document using pdf_insert_page. | |
| 630 | |
| 631 After this call the page exists within the document | |
| 632 structure, but is not actually ever displayed as it is | |
| 633 not linked into the PDF page tree. | |
| 634 | |
| 635 doc: The document to which to add the page. | |
| 636 | |
| 637 mediabox: The mediabox for the page (should be identical | |
| 638 to that used when creating the resources/contents). | |
| 639 | |
| 640 rotate: 0, 90, 180 or 270. The rotation to use for the | |
| 641 page. | |
| 642 | |
| 643 resources: The resources dictionary for the new page | |
| 644 (typically created by pdf_page_write). | |
| 645 | |
| 646 contents: The page contents for the new page (typically | |
| 647 create by pdf_page_write). | |
| 648 */ | |
| 649 pdf_obj *pdf_add_page(fz_context *ctx, pdf_document *doc, fz_rect mediabox, int rotate, pdf_obj *resources, fz_buffer *contents); | |
| 650 | |
| 651 /* | |
| 652 Insert a page previously created by | |
| 653 pdf_add_page into the pages tree of the document. | |
| 654 | |
| 655 doc: The document to insert into. | |
| 656 | |
| 657 at: The page number to insert at (pages numbered from 0). | |
| 658 0 <= n <= page_count inserts before page n. Negative numbers | |
| 659 or INT_MAX are treated as page count, and insert at the end. | |
| 660 0 inserts at the start. All existing pages are after the | |
| 661 insertion point are shuffled up. | |
| 662 | |
| 663 page: The page to insert. | |
| 664 */ | |
| 665 void pdf_insert_page(fz_context *ctx, pdf_document *doc, int at, pdf_obj *page); | |
| 666 | |
| 667 /* | |
| 668 Delete a page from the page tree of | |
| 669 a document. This does not remove the page contents | |
| 670 or resources from the file. | |
| 671 | |
| 672 doc: The document to operate on. | |
| 673 | |
| 674 number: The page to remove (numbered from 0) | |
| 675 */ | |
| 676 void pdf_delete_page(fz_context *ctx, pdf_document *doc, int number); | |
| 677 | |
| 678 /* | |
| 679 Delete a range of pages from the | |
| 680 page tree of a document. This does not remove the page | |
| 681 contents or resources from the file. | |
| 682 | |
| 683 doc: The document to operate on. | |
| 684 | |
| 685 start, end: The range of pages (numbered from 0) | |
| 686 (inclusive, exclusive) to remove. If end is negative or | |
| 687 greater than the number of pages in the document, it | |
| 688 will be taken to be the end of the document. | |
| 689 */ | |
| 690 void pdf_delete_page_range(fz_context *ctx, pdf_document *doc, int start, int end); | |
| 691 | |
| 692 /* | |
| 693 Get page label (string) from a page number (index). | |
| 694 */ | |
| 695 void pdf_page_label(fz_context *ctx, pdf_document *doc, int page, char *buf, size_t size); | |
| 696 void pdf_page_label_imp(fz_context *ctx, fz_document *doc, int chapter, int page, char *buf, size_t size); | |
| 697 | |
| 698 typedef enum { | |
| 699 PDF_PAGE_LABEL_NONE = 0, | |
| 700 PDF_PAGE_LABEL_DECIMAL = 'D', | |
| 701 PDF_PAGE_LABEL_ROMAN_UC = 'R', | |
| 702 PDF_PAGE_LABEL_ROMAN_LC = 'r', | |
| 703 PDF_PAGE_LABEL_ALPHA_UC = 'A', | |
| 704 PDF_PAGE_LABEL_ALPHA_LC = 'a', | |
| 705 } pdf_page_label_style; | |
| 706 | |
| 707 void pdf_set_page_labels(fz_context *ctx, pdf_document *doc, int index, pdf_page_label_style style, const char *prefix, int start); | |
| 708 void pdf_delete_page_labels(fz_context *ctx, pdf_document *doc, int index); | |
| 709 | |
| 710 fz_text_language pdf_document_language(fz_context *ctx, pdf_document *doc); | |
| 711 void pdf_set_document_language(fz_context *ctx, pdf_document *doc, fz_text_language lang); | |
| 712 | |
| 713 /* | |
| 714 In calls to fz_save_document, the following options structure can be used | |
| 715 to control aspects of the writing process. This structure may grow | |
| 716 in the future, and should be zero-filled to allow forwards compatibility. | |
| 717 */ | |
| 718 typedef struct | |
| 719 { | |
| 720 int do_incremental; /* Write just the changed objects. */ | |
| 721 int do_pretty; /* Pretty-print dictionaries and arrays. */ | |
| 722 int do_ascii; /* ASCII hex encode binary streams. */ | |
| 723 int do_compress; /* Compress streams. 1 zlib, 2 brotli */ | |
| 724 int do_compress_images; /* Compress (or leave compressed) image streams. */ | |
| 725 int do_compress_fonts; /* Compress (or leave compressed) font streams. */ | |
| 726 int do_decompress; /* Decompress streams (except when compressing images/fonts). */ | |
| 727 int do_garbage; /* Garbage collect objects before saving; 1=gc, 2=re-number, 3=de-duplicate. */ | |
| 728 int do_linear; /* Write linearised. */ | |
| 729 int do_clean; /* Clean content streams. */ | |
| 730 int do_sanitize; /* Sanitize content streams. */ | |
| 731 int do_appearance; /* (Re)create appearance streams. */ | |
| 732 int do_encrypt; /* Encryption method to use: keep, none, rc4-40, etc. */ | |
| 733 int dont_regenerate_id; /* Don't regenerate ID if set (used for clean) */ | |
| 734 int permissions; /* Document encryption permissions. */ | |
| 735 char opwd_utf8[128]; /* Owner password. */ | |
| 736 char upwd_utf8[128]; /* User password. */ | |
| 737 int do_snapshot; /* Do not use directly. Use the snapshot functions. */ | |
| 738 int do_preserve_metadata; /* When cleaning, preserve metadata unchanged. */ | |
| 739 int do_use_objstms; /* Use objstms if possible */ | |
| 740 int compression_effort; /* 0 for default. 100 = max, 1 = min. */ | |
| 741 int do_labels; /* Add labels to each object showing how it can be reached from the Root. */ | |
| 742 } pdf_write_options; | |
| 743 | |
| 744 FZ_DATA extern const pdf_write_options pdf_default_write_options; | |
| 745 | |
| 746 /* | |
| 747 Parse option string into a pdf_write_options struct. | |
| 748 Matches the command line options to 'mutool clean': | |
| 749 g: garbage collect | |
| 750 d, i, f: expand all, fonts, images | |
| 751 l: linearize | |
| 752 a: ascii hex encode | |
| 753 z: deflate | |
| 754 c: clean content streams | |
| 755 s: sanitize content streams | |
| 756 */ | |
| 757 pdf_write_options *pdf_parse_write_options(fz_context *ctx, pdf_write_options *opts, const char *args); | |
| 758 | |
| 759 /* | |
| 760 Returns true if there are digital signatures waiting to | |
| 761 to updated on save. | |
| 762 */ | |
| 763 int pdf_has_unsaved_sigs(fz_context *ctx, pdf_document *doc); | |
| 764 | |
| 765 /* | |
| 766 Write out the document to an output stream with all changes finalised. | |
| 767 */ | |
| 768 void pdf_write_document(fz_context *ctx, pdf_document *doc, fz_output *out, const pdf_write_options *opts); | |
| 769 | |
| 770 /* | |
| 771 Write out the document to a file with all changes finalised. | |
| 772 */ | |
| 773 void pdf_save_document(fz_context *ctx, pdf_document *doc, const char *filename, const pdf_write_options *opts); | |
| 774 | |
| 775 /* | |
| 776 Snapshot the document to a file. This does not cause the | |
| 777 incremental xref to be finalized, so the document in memory | |
| 778 remains (essentially) unchanged. | |
| 779 */ | |
| 780 void pdf_save_snapshot(fz_context *ctx, pdf_document *doc, const char *filename); | |
| 781 | |
| 782 /* | |
| 783 Snapshot the document to an output stream. This does not cause | |
| 784 the incremental xref to be finalized, so the document in memory | |
| 785 remains (essentially) unchanged. | |
| 786 */ | |
| 787 void pdf_write_snapshot(fz_context *ctx, pdf_document *doc, fz_output *out); | |
| 788 | |
| 789 char *pdf_format_write_options(fz_context *ctx, char *buffer, size_t buffer_len, const pdf_write_options *opts); | |
| 790 | |
| 791 /* | |
| 792 Return true if the document can be saved incrementally. Applying | |
| 793 redactions or having a repaired document make incremental saving | |
| 794 impossible. | |
| 795 */ | |
| 796 int pdf_can_be_saved_incrementally(fz_context *ctx, pdf_document *doc); | |
| 797 | |
| 798 /* | |
| 799 Write out the journal to an output stream. | |
| 800 */ | |
| 801 void pdf_write_journal(fz_context *ctx, pdf_document *doc, fz_output *out); | |
| 802 | |
| 803 /* | |
| 804 Write out the journal to a file. | |
| 805 */ | |
| 806 void pdf_save_journal(fz_context *ctx, pdf_document *doc, const char *filename); | |
| 807 | |
| 808 /* | |
| 809 Read a journal from a filename. Will do nothing if the journal | |
| 810 does not match. Will throw on a corrupted journal. | |
| 811 */ | |
| 812 void pdf_load_journal(fz_context *ctx, pdf_document *doc, const char *filename); | |
| 813 | |
| 814 /* | |
| 815 Read a journal from a stream. Will do nothing if the journal | |
| 816 does not match. Will throw on a corrupted journal. | |
| 817 */ | |
| 818 void pdf_read_journal(fz_context *ctx, pdf_document *doc, fz_stream *stm); | |
| 819 | |
| 820 /* | |
| 821 Minimize the memory used by a document. | |
| 822 | |
| 823 We walk the in memory xref tables, evicting the PDF objects | |
| 824 therein that aren't in use. | |
| 825 | |
| 826 This reduces the current memory use, but any subsequent use | |
| 827 of these objects will load them back into memory again. | |
| 828 */ | |
| 829 void pdf_minimize_document(fz_context *ctx, pdf_document *doc); | |
| 830 | |
| 831 /* | |
| 832 Map a pdf object representing a structure tag through | |
| 833 an optional role_map and convert to an fz_structure. | |
| 834 */ | |
| 835 fz_structure pdf_structure_type(fz_context *ctx, pdf_obj *role_map, pdf_obj *tag); | |
| 836 | |
| 837 /* | |
| 838 Run the document structure to a device. | |
| 839 */ | |
| 840 void pdf_run_document_structure(fz_context *ctx, pdf_document *doc, fz_device *dev, fz_cookie *cookie); | |
| 841 | |
| 842 /* | |
| 843 Return the count of the associated files on a document. | |
| 844 Note, that this is the count of files associated at the document | |
| 845 level and does not necessarily include files associated at other | |
| 846 levels. | |
| 847 */ | |
| 848 int pdf_count_document_associated_files(fz_context *ctx, pdf_document *doc); | |
| 849 | |
| 850 /* | |
| 851 Return a borrowed pointer to the PDF object that represents a | |
| 852 given associated file on a document. | |
| 853 | |
| 854 Indexed from 0 to count-1. | |
| 855 */ | |
| 856 pdf_obj *pdf_document_associated_file(fz_context *ctx, pdf_document *doc, int idx); | |
| 857 | |
| 858 /* | |
| 859 Return the count of the associated files on a given page. | |
| 860 Note, that this is the count of files associated at the page | |
| 861 level and does not necessarily include files associated at other | |
| 862 levels. | |
| 863 */ | |
| 864 int pdf_count_page_associated_files(fz_context *ctx, pdf_page *page); | |
| 865 | |
| 866 /* | |
| 867 Return a borrowed pointer to the PDF object that represents a | |
| 868 given associated file on a page. | |
| 869 | |
| 870 Indexed from 0 to count-1. | |
| 871 */ | |
| 872 pdf_obj *pdf_page_associated_file(fz_context *ctx, pdf_page *page, int idx); | |
| 873 | |
| 874 | |
| 875 /* | |
| 876 A structure used to create "labels" for numbered objects. | |
| 877 The labels are different ways to reach an object from the trailer | |
| 878 and page tree, using the "mutool show" syntax. | |
| 879 | |
| 880 Note: Paths involving "Parent", "P", "Prev", and "Last" are ignored, | |
| 881 as these are used for cycles in the structures which we don't care about | |
| 882 labeling. | |
| 883 */ | |
| 884 typedef struct pdf_object_labels pdf_object_labels; | |
| 885 | |
| 886 /* | |
| 887 Scan the entire object structure to create a directed graph | |
| 888 of indirect numbered objects and how they can reach each other. | |
| 889 */ | |
| 890 pdf_object_labels *pdf_load_object_labels(fz_context *ctx, pdf_document *doc); | |
| 891 | |
| 892 void pdf_drop_object_labels(fz_context *ctx, pdf_object_labels *g); | |
| 893 | |
| 894 /* | |
| 895 Enumerate all the possible labels for a given numbered object. | |
| 896 The callback is invoked with a path for each possible way the object | |
| 897 can be reached from the PDF trailer. | |
| 898 */ | |
| 899 typedef void (pdf_label_object_fn)(fz_context *ctx, void *arg, const char *label); | |
| 900 void pdf_label_object(fz_context *ctx, pdf_object_labels *g, int num, pdf_label_object_fn *callback, void *arg); | |
| 901 | |
| 902 #endif |
