Mercurial > hgrepos > Python2 > PyMuPDF
diff mupdf-source/source/pdf/pdf-outline.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mupdf-source/source/pdf/pdf-outline.c Mon Sep 15 11:43:07 2025 +0200 @@ -0,0 +1,600 @@ +// Copyright (C) 2004-2025 Artifex Software, Inc. +// +// This file is part of MuPDF. +// +// MuPDF is free software: you can redistribute it and/or modify it under the +// terms of the GNU Affero General Public License as published by the Free +// Software Foundation, either version 3 of the License, or (at your option) +// any later version. +// +// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more +// details. +// +// You should have received a copy of the GNU Affero General Public License +// along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html> +// +// Alternative licensing terms are available from the licensor. +// For commercial licensing, see <https://www.artifex.com/> or contact +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. + +#include "mupdf/fitz.h" +#include "mupdf/pdf.h" +#include "pdf-annot-imp.h" + +#include <string.h> +#include <math.h> + +/* + The URI encoding format broadly follows that described in + "Parameters for Opening PDF files" from the Adobe Acrobat SDK, + version 8.1, which can, at the time of writing, be found here: + + https://web.archive.org/web/20170921000830/http://www.adobe.com/content/dam/Adobe/en/devnet/acrobat/pdfs/pdf_open_parameters.pdf +*/ + +static void +pdf_test_outline(fz_context *ctx, pdf_document *doc, pdf_obj *dict, pdf_mark_bits *marks, pdf_obj *parent, int *fixed) +{ + int parent_diff, prev_diff, last_diff; + pdf_obj *first, *last, *next, *prev; + pdf_obj *expected_parent = parent; + pdf_obj *expected_prev = NULL; + + last = pdf_dict_get(ctx, expected_parent, PDF_NAME(Last)); + + while (dict && pdf_is_dict(ctx, dict)) + { + if (pdf_mark_bits_set(ctx, marks, dict)) + fz_throw(ctx, FZ_ERROR_FORMAT, "Cycle detected in outlines"); + + if (!pdf_is_indirect(ctx, dict)) + fz_throw(ctx, FZ_ERROR_FORMAT, "Non-indirect outline entry discovered"); + + parent = pdf_dict_get(ctx, dict, PDF_NAME(Parent)); + prev = pdf_dict_get(ctx, dict, PDF_NAME(Prev)); + next = pdf_dict_get(ctx, dict, PDF_NAME(Next)); + + parent_diff = pdf_objcmp(ctx, parent, expected_parent); + prev_diff = pdf_objcmp(ctx, prev, expected_prev); + last_diff = next == NULL && pdf_objcmp_resolve(ctx, last, dict); + + if (fixed == NULL) + { + if (parent_diff) + fz_throw(ctx, FZ_ERROR_FORMAT, "Outline parent pointer still bad or missing despite repair"); + if (prev_diff) + fz_throw(ctx, FZ_ERROR_FORMAT, "Outline prev pointer still bad or missing despite repair"); + if (last_diff) + fz_throw(ctx, FZ_ERROR_FORMAT, "Outline last pointer still bad or missing despite repair"); + } + else if (parent_diff || prev_diff || last_diff) + { + if (*fixed == 0) + pdf_begin_operation(ctx, doc, "Repair outline nodes"); + *fixed = 1; + doc->non_structural_change = 1; + fz_try(ctx) + { + if (parent_diff) + { + fz_warn(ctx, "Bad or missing parent pointer in outline tree, repairing"); + pdf_dict_put(ctx, dict, PDF_NAME(Parent), expected_parent); + } + if (prev_diff) + { + fz_warn(ctx, "Bad or missing prev pointer in outline tree, repairing"); + if (expected_prev) + pdf_dict_put(ctx, dict, PDF_NAME(Prev), expected_prev); + else + pdf_dict_del(ctx, dict, PDF_NAME(Prev)); + } + if (last_diff) + { + fz_warn(ctx, "Bad or missing last pointer in outline tree, repairing"); + pdf_dict_put(ctx, expected_parent, PDF_NAME(Last), dict); + } + } + fz_always(ctx) + doc->non_structural_change = 0; + fz_catch(ctx) + fz_rethrow(ctx); + } + + first = pdf_dict_get(ctx, dict, PDF_NAME(First)); + if (first) + pdf_test_outline(ctx, doc, first, marks, dict, fixed); + + expected_prev = dict; + dict = next; + } +} + +fz_outline * +pdf_load_outline(fz_context *ctx, pdf_document *doc) +{ + /* Just appeal to the fz_ level. */ + return fz_load_outline(ctx, (fz_document *)doc); +} + +enum { + MOD_NONE = 0, + MOD_BELOW = 1, + MOD_AFTER = 2 +}; + +typedef struct pdf_outline_iterator { + fz_outline_iterator super; + fz_outline_item item; + pdf_obj *current; + int modifier; +} pdf_outline_iterator; + +static int +pdf_outline_iterator_next(fz_context *ctx, fz_outline_iterator *iter_) +{ + pdf_outline_iterator *iter = (pdf_outline_iterator *)iter_; + pdf_obj *next; + + if (iter->modifier != MOD_NONE || iter->current == NULL) + return -1; + next = pdf_dict_get(ctx, iter->current, PDF_NAME(Next)); + if (next == NULL) + { + iter->modifier = MOD_AFTER; + return 1; + } + + iter->modifier = MOD_NONE; + iter->current = next; + return 0; +} + +static int +pdf_outline_iterator_prev(fz_context *ctx, fz_outline_iterator *iter_) +{ + pdf_outline_iterator *iter = (pdf_outline_iterator *)iter_; + pdf_obj *prev; + + if (iter->modifier == MOD_BELOW || iter->current == NULL) + return -1; + if (iter->modifier == MOD_AFTER) + { + iter->modifier = MOD_NONE; + return 0; + } + prev = pdf_dict_get(ctx, iter->current, PDF_NAME(Prev)); + if (prev == NULL) + return -1; + + iter->modifier = MOD_NONE; + iter->current = prev; + return 0; +} + +static int +pdf_outline_iterator_up(fz_context *ctx, fz_outline_iterator *iter_) +{ + pdf_outline_iterator *iter = (pdf_outline_iterator *)iter_; + pdf_obj *up; + pdf_obj *grandparent; + + if (iter->current == NULL) + return -1; + if (iter->modifier == MOD_BELOW) + { + iter->modifier = MOD_NONE; + return 0; + } + /* The topmost level still has a parent pointer, just one + * that points to the outlines object. We never want to + * allow us to move 'up' onto the outlines object. */ + up = pdf_dict_get(ctx, iter->current, PDF_NAME(Parent)); + if (up == NULL) + /* This should never happen! */ + return -1; + grandparent = pdf_dict_get(ctx, up, PDF_NAME(Parent)); + if (grandparent == NULL) + return -1; + + iter->modifier = MOD_NONE; + iter->current = up; + return 0; +} + +static int +pdf_outline_iterator_down(fz_context *ctx, fz_outline_iterator *iter_) +{ + pdf_outline_iterator *iter = (pdf_outline_iterator *)iter_; + pdf_obj *down; + + if (iter->modifier != MOD_NONE || iter->current == NULL) + return -1; + down = pdf_dict_get(ctx, iter->current, PDF_NAME(First)); + if (down == NULL) + { + iter->modifier = MOD_BELOW; + return 1; + } + + iter->modifier = MOD_NONE; + iter->current = down; + return 0; +} + +static void +do_outline_update(fz_context *ctx, pdf_obj *obj, fz_outline_item *item, int is_new_node) +{ + int count; + int open_delta = 0; + pdf_obj *parent; + + /* If the open/closed state changes, update. */ + count = pdf_dict_get_int(ctx, obj, PDF_NAME(Count)); + if ((count < 0 && item->is_open) || (count > 0 && !item->is_open)) + { + pdf_dict_put_int(ctx, obj, PDF_NAME(Count), -count); + open_delta = -count; + } + else if (is_new_node) + open_delta = 1; + + parent = pdf_dict_get(ctx, obj, PDF_NAME(Parent)); + while (parent) + { + pdf_obj *cobj = pdf_dict_get(ctx, parent, PDF_NAME(Count)); + count = pdf_to_int(ctx, cobj); + if (open_delta || cobj == NULL) + pdf_dict_put_int(ctx, parent, PDF_NAME(Count), count > 0 ? count + open_delta : count - open_delta); + if (count < 0) + break; + parent = pdf_dict_get(ctx, parent, PDF_NAME(Parent)); + } + + if (item->title) + pdf_dict_put_text_string(ctx, obj, PDF_NAME(Title), item->title); + else + pdf_dict_del(ctx, obj, PDF_NAME(Title)); + + pdf_dict_del(ctx, obj, PDF_NAME(A)); + pdf_dict_del(ctx, obj, PDF_NAME(C)); + pdf_dict_del(ctx, obj, PDF_NAME(F)); + pdf_dict_del(ctx, obj, PDF_NAME(Dest)); + if (item->uri) + { + pdf_document *doc = pdf_get_bound_document(ctx, obj); + + if (item->uri[0] == '#') + pdf_dict_put_drop(ctx, obj, PDF_NAME(Dest), + pdf_new_dest_from_link(ctx, doc, item->uri, 0)); + else if (!strncmp(item->uri, "file:", 5)) + pdf_dict_put_drop(ctx, obj, PDF_NAME(Dest), + pdf_new_dest_from_link(ctx, doc, item->uri, 1)); + else + pdf_dict_put_drop(ctx, obj, PDF_NAME(A), + pdf_new_action_from_link(ctx, doc, item->uri)); + } + if (item->r != 0 || item->g != 0 || item->b != 0) + { + pdf_obj *color = pdf_dict_put_array(ctx, obj, PDF_NAME(C), 3); + pdf_array_put_real(ctx, color, 0, item->r / 255.0); + pdf_array_put_real(ctx, color, 1, item->g / 255.0); + pdf_array_put_real(ctx, color, 2, item->b / 255.0); + } + if (item->flags != 0) + pdf_dict_put_int(ctx, obj, PDF_NAME(F), item->flags); +} + +static int +pdf_outline_iterator_insert(fz_context *ctx, fz_outline_iterator *iter_, fz_outline_item *item) +{ + pdf_outline_iterator *iter = (pdf_outline_iterator *)iter_; + pdf_document *doc = (pdf_document *)iter->super.doc; + pdf_obj *obj = NULL; + pdf_obj *prev; + pdf_obj *parent; + pdf_obj *outlines = NULL; + pdf_obj *newoutlines = NULL; + int result = 0; + + fz_var(obj); + fz_var(newoutlines); + + pdf_begin_operation(ctx, doc, "Insert outline item"); + + fz_try(ctx) + { + obj = pdf_add_new_dict(ctx, doc, 4); + + if (iter->modifier == MOD_BELOW) + parent = iter->current; + else if (iter->modifier == MOD_NONE && iter->current == NULL) + { + pdf_obj *root = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root)); + outlines = pdf_dict_get(ctx, root, PDF_NAME(Outlines)); + if (outlines == NULL) + { + /* No outlines entry, better make one. */ + newoutlines = outlines = pdf_add_new_dict(ctx, doc, 4); + pdf_dict_put(ctx, root, PDF_NAME(Outlines), outlines); + pdf_dict_put(ctx, outlines, PDF_NAME(Type), PDF_NAME(Outlines)); + } + iter->modifier = MOD_BELOW; + iter->current = outlines; + parent = outlines; + } + else + parent = pdf_dict_get(ctx, iter->current, PDF_NAME(Parent)); + + pdf_dict_put(ctx, obj, PDF_NAME(Parent), parent); + + do_outline_update(ctx, obj, item, 1); + + switch (iter->modifier) + { + case MOD_BELOW: + pdf_dict_put(ctx, iter->current, PDF_NAME(First), obj); + pdf_dict_put(ctx, iter->current, PDF_NAME(Last), obj); + iter->current = obj; + iter->modifier = MOD_AFTER; + result = 1; + break; + case MOD_AFTER: + pdf_dict_put(ctx, obj, PDF_NAME(Prev), iter->current); + pdf_dict_put(ctx, iter->current, PDF_NAME(Next), obj); + pdf_dict_put(ctx, parent, PDF_NAME(Last), obj); + iter->current = obj; + result = 1; + break; + default: + prev = pdf_dict_get(ctx, iter->current, PDF_NAME(Prev)); + if (prev) + { + pdf_dict_put(ctx, prev, PDF_NAME(Next), obj); + pdf_dict_put(ctx, obj, PDF_NAME(Prev), prev); + } + else + pdf_dict_put(ctx, parent, PDF_NAME(First), obj); + pdf_dict_put(ctx, iter->current, PDF_NAME(Prev), obj); + pdf_dict_put(ctx, obj, PDF_NAME(Next), iter->current); + result = 0; + break; + } + pdf_end_operation(ctx, doc); + } + fz_always(ctx) + { + pdf_drop_obj(ctx, obj); + pdf_drop_obj(ctx, newoutlines); + } + fz_catch(ctx) + { + pdf_abandon_operation(ctx, doc); + fz_rethrow(ctx); + } + + return result; +} + +static void +pdf_outline_iterator_update(fz_context *ctx, fz_outline_iterator *iter_, fz_outline_item *item) +{ + pdf_outline_iterator *iter = (pdf_outline_iterator *)iter_; + pdf_document *doc = (pdf_document *)iter->super.doc; + + if (iter->modifier != MOD_NONE || iter->current == NULL) + fz_throw(ctx, FZ_ERROR_ARGUMENT, "Can't update a non-existent outline item!"); + + pdf_begin_operation(ctx, doc, "Update outline item"); + + fz_try(ctx) + { + do_outline_update(ctx, iter->current, item, 0); + pdf_end_operation(ctx, doc); + } + fz_catch(ctx) + { + pdf_abandon_operation(ctx, doc); + fz_rethrow(ctx); + } +} + +static int +pdf_outline_iterator_del(fz_context *ctx, fz_outline_iterator *iter_) +{ + pdf_outline_iterator *iter = (pdf_outline_iterator *)iter_; + pdf_document *doc = (pdf_document *)iter->super.doc; + pdf_obj *next, *prev, *parent; + int result = 0; + int count; + + if (iter->modifier != MOD_NONE || iter->current == NULL) + fz_throw(ctx, FZ_ERROR_ARGUMENT, "Can't delete a non-existent outline item!"); + + prev = pdf_dict_get(ctx, iter->current, PDF_NAME(Prev)); + next = pdf_dict_get(ctx, iter->current, PDF_NAME(Next)); + parent = pdf_dict_get(ctx, iter->current, PDF_NAME(Parent)); + count = pdf_dict_get_int(ctx, iter->current, PDF_NAME(Count)); + /* How many nodes visible from above are being removed? */ + if (count > 0) + count++; /* Open children, plus this node. */ + else + count = 1; /* Just this node */ + + pdf_begin_operation(ctx, doc, "Delete outline item"); + + fz_try(ctx) + { + pdf_obj *up = parent; + while (up) + { + int c = pdf_dict_get_int(ctx, up, PDF_NAME(Count)); + pdf_dict_put_int(ctx, up, PDF_NAME(Count), (c > 0 ? c - count : c + count)); + if (c < 0) + break; + up = pdf_dict_get(ctx, up, PDF_NAME(Parent)); + } + + if (prev) + { + if (next) + pdf_dict_put(ctx, prev, PDF_NAME(Next), next); + else + pdf_dict_del(ctx, prev, PDF_NAME(Next)); + } + if (next) + { + if (prev) + pdf_dict_put(ctx, next, PDF_NAME(Prev), prev); + else + { + pdf_dict_put(ctx, parent, PDF_NAME(First), next); + pdf_dict_del(ctx, next, PDF_NAME(Prev)); + } + iter->current = next; + } + else if (prev) + { + iter->current = prev; + pdf_dict_put(ctx, parent, PDF_NAME(Last), prev); + } + else if (parent) + { + iter->current = parent; + iter->modifier = MOD_BELOW; + pdf_dict_del(ctx, parent, PDF_NAME(First)); + pdf_dict_del(ctx, parent, PDF_NAME(Last)); + result = 1; + } + else + { + iter->current = NULL; + result = 1; + } + pdf_end_operation(ctx, doc); + } + fz_catch(ctx) + { + pdf_abandon_operation(ctx, doc); + fz_rethrow(ctx); + } + + return result; +} + +static fz_outline_item * +pdf_outline_iterator_item(fz_context *ctx, fz_outline_iterator *iter_) +{ + pdf_outline_iterator *iter = (pdf_outline_iterator *)iter_; + pdf_obj *obj; + pdf_document *doc = (pdf_document *)iter->super.doc; + + if (iter->modifier != MOD_NONE || iter->current == NULL) + return NULL; + + fz_free(ctx, iter->item.title); + iter->item.title = NULL; + fz_free(ctx, iter->item.uri); + iter->item.uri = NULL; + + obj = pdf_dict_get(ctx, iter->current, PDF_NAME(Title)); + if (obj) + iter->item.title = Memento_label(fz_strdup(ctx, pdf_to_text_string(ctx, obj)), "outline_title"); + obj = pdf_dict_get(ctx, iter->current, PDF_NAME(Dest)); + if (obj) + iter->item.uri = Memento_label(pdf_parse_link_dest(ctx, doc, obj), "outline_uri"); + else + { + obj = pdf_dict_get(ctx, iter->current, PDF_NAME(A)); + if (obj) + iter->item.uri = Memento_label(pdf_parse_link_action(ctx, doc, obj, -1), "outline_uri"); + } + + iter->item.is_open = pdf_dict_get_int(ctx, iter->current, PDF_NAME(Count)) > 0; + + obj = pdf_dict_get(ctx, iter->current, PDF_NAME(C)); + iter->item.r = (int)(0.5 + 255 * pdf_array_get_real(ctx, obj, 0)); + iter->item.g = (int)(0.5 + 255 * pdf_array_get_real(ctx, obj, 1)); + iter->item.b = (int)(0.5 + 255 * pdf_array_get_real(ctx, obj, 2)); + + iter->item.flags = pdf_dict_get_int(ctx, iter->current, PDF_NAME(F)) & 127; + + return &iter->item; +} + +static void +pdf_outline_iterator_drop(fz_context *ctx, fz_outline_iterator *iter_) +{ + pdf_outline_iterator *iter = (pdf_outline_iterator *)iter_; + + if (iter == NULL) + return; + + fz_free(ctx, iter->item.title); + fz_free(ctx, iter->item.uri); +} + +fz_outline_iterator *pdf_new_outline_iterator(fz_context *ctx, pdf_document *doc) +{ + pdf_obj *root, *obj, *first; + pdf_mark_bits *marks; + pdf_outline_iterator *iter = NULL; + int fixed = 0; + + /* Walk the outlines to spot problems that might bite us later + * (in particular, for cycles). */ + marks = pdf_new_mark_bits(ctx, doc); + fz_try(ctx) + { + root = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root)); + obj = pdf_dict_get(ctx, root, PDF_NAME(Outlines)); + first = pdf_dict_get(ctx, obj, PDF_NAME(First)); + if (first) + { + /* cache page tree for fast link destination lookups. This + * will be dropped 'just in time' on writes to the doc. */ + pdf_load_page_tree(ctx, doc); + fz_try(ctx) + { + /* Pass through the outlines once, fixing inconsistencies */ + pdf_test_outline(ctx, doc, first, marks, obj, &fixed); + + if (fixed) + { + /* If a fix was performed, pass through again, + * this time throwing if it's still not correct. */ + pdf_mark_bits_reset(ctx, marks); + pdf_test_outline(ctx, doc, first, marks, obj, NULL); + pdf_end_operation(ctx, doc); + } + } + fz_catch(ctx) + { + if (fixed) + pdf_abandon_operation(ctx, doc); + fz_rethrow(ctx); + } + } + } + fz_always(ctx) + pdf_drop_mark_bits(ctx, marks); + fz_catch(ctx) + fz_rethrow(ctx); + + iter = fz_new_derived_outline_iter(ctx, pdf_outline_iterator, &doc->super); + iter->super.del = pdf_outline_iterator_del; + iter->super.next = pdf_outline_iterator_next; + iter->super.prev = pdf_outline_iterator_prev; + iter->super.up = pdf_outline_iterator_up; + iter->super.down = pdf_outline_iterator_down; + iter->super.insert = pdf_outline_iterator_insert; + iter->super.update = pdf_outline_iterator_update; + iter->super.drop = pdf_outline_iterator_drop; + iter->super.item = pdf_outline_iterator_item; + iter->current = first; + iter->modifier = MOD_NONE; + + return &iter->super; +}
