comparison mupdf-source/source/pdf/pdf-outline.c @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 // Copyright (C) 2004-2025 Artifex Software, Inc.
2 //
3 // This file is part of MuPDF.
4 //
5 // MuPDF is free software: you can redistribute it and/or modify it under the
6 // terms of the GNU Affero General Public License as published by the Free
7 // Software Foundation, either version 3 of the License, or (at your option)
8 // any later version.
9 //
10 // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
11 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
13 // details.
14 //
15 // You should have received a copy of the GNU Affero General Public License
16 // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
17 //
18 // Alternative licensing terms are available from the licensor.
19 // For commercial licensing, see <https://www.artifex.com/> or contact
20 // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
21 // CA 94129, USA, for further information.
22
23 #include "mupdf/fitz.h"
24 #include "mupdf/pdf.h"
25 #include "pdf-annot-imp.h"
26
27 #include <string.h>
28 #include <math.h>
29
30 /*
31 The URI encoding format broadly follows that described in
32 "Parameters for Opening PDF files" from the Adobe Acrobat SDK,
33 version 8.1, which can, at the time of writing, be found here:
34
35 https://web.archive.org/web/20170921000830/http://www.adobe.com/content/dam/Adobe/en/devnet/acrobat/pdfs/pdf_open_parameters.pdf
36 */
37
38 static void
39 pdf_test_outline(fz_context *ctx, pdf_document *doc, pdf_obj *dict, pdf_mark_bits *marks, pdf_obj *parent, int *fixed)
40 {
41 int parent_diff, prev_diff, last_diff;
42 pdf_obj *first, *last, *next, *prev;
43 pdf_obj *expected_parent = parent;
44 pdf_obj *expected_prev = NULL;
45
46 last = pdf_dict_get(ctx, expected_parent, PDF_NAME(Last));
47
48 while (dict && pdf_is_dict(ctx, dict))
49 {
50 if (pdf_mark_bits_set(ctx, marks, dict))
51 fz_throw(ctx, FZ_ERROR_FORMAT, "Cycle detected in outlines");
52
53 if (!pdf_is_indirect(ctx, dict))
54 fz_throw(ctx, FZ_ERROR_FORMAT, "Non-indirect outline entry discovered");
55
56 parent = pdf_dict_get(ctx, dict, PDF_NAME(Parent));
57 prev = pdf_dict_get(ctx, dict, PDF_NAME(Prev));
58 next = pdf_dict_get(ctx, dict, PDF_NAME(Next));
59
60 parent_diff = pdf_objcmp(ctx, parent, expected_parent);
61 prev_diff = pdf_objcmp(ctx, prev, expected_prev);
62 last_diff = next == NULL && pdf_objcmp_resolve(ctx, last, dict);
63
64 if (fixed == NULL)
65 {
66 if (parent_diff)
67 fz_throw(ctx, FZ_ERROR_FORMAT, "Outline parent pointer still bad or missing despite repair");
68 if (prev_diff)
69 fz_throw(ctx, FZ_ERROR_FORMAT, "Outline prev pointer still bad or missing despite repair");
70 if (last_diff)
71 fz_throw(ctx, FZ_ERROR_FORMAT, "Outline last pointer still bad or missing despite repair");
72 }
73 else if (parent_diff || prev_diff || last_diff)
74 {
75 if (*fixed == 0)
76 pdf_begin_operation(ctx, doc, "Repair outline nodes");
77 *fixed = 1;
78 doc->non_structural_change = 1;
79 fz_try(ctx)
80 {
81 if (parent_diff)
82 {
83 fz_warn(ctx, "Bad or missing parent pointer in outline tree, repairing");
84 pdf_dict_put(ctx, dict, PDF_NAME(Parent), expected_parent);
85 }
86 if (prev_diff)
87 {
88 fz_warn(ctx, "Bad or missing prev pointer in outline tree, repairing");
89 if (expected_prev)
90 pdf_dict_put(ctx, dict, PDF_NAME(Prev), expected_prev);
91 else
92 pdf_dict_del(ctx, dict, PDF_NAME(Prev));
93 }
94 if (last_diff)
95 {
96 fz_warn(ctx, "Bad or missing last pointer in outline tree, repairing");
97 pdf_dict_put(ctx, expected_parent, PDF_NAME(Last), dict);
98 }
99 }
100 fz_always(ctx)
101 doc->non_structural_change = 0;
102 fz_catch(ctx)
103 fz_rethrow(ctx);
104 }
105
106 first = pdf_dict_get(ctx, dict, PDF_NAME(First));
107 if (first)
108 pdf_test_outline(ctx, doc, first, marks, dict, fixed);
109
110 expected_prev = dict;
111 dict = next;
112 }
113 }
114
115 fz_outline *
116 pdf_load_outline(fz_context *ctx, pdf_document *doc)
117 {
118 /* Just appeal to the fz_ level. */
119 return fz_load_outline(ctx, (fz_document *)doc);
120 }
121
122 enum {
123 MOD_NONE = 0,
124 MOD_BELOW = 1,
125 MOD_AFTER = 2
126 };
127
128 typedef struct pdf_outline_iterator {
129 fz_outline_iterator super;
130 fz_outline_item item;
131 pdf_obj *current;
132 int modifier;
133 } pdf_outline_iterator;
134
135 static int
136 pdf_outline_iterator_next(fz_context *ctx, fz_outline_iterator *iter_)
137 {
138 pdf_outline_iterator *iter = (pdf_outline_iterator *)iter_;
139 pdf_obj *next;
140
141 if (iter->modifier != MOD_NONE || iter->current == NULL)
142 return -1;
143 next = pdf_dict_get(ctx, iter->current, PDF_NAME(Next));
144 if (next == NULL)
145 {
146 iter->modifier = MOD_AFTER;
147 return 1;
148 }
149
150 iter->modifier = MOD_NONE;
151 iter->current = next;
152 return 0;
153 }
154
155 static int
156 pdf_outline_iterator_prev(fz_context *ctx, fz_outline_iterator *iter_)
157 {
158 pdf_outline_iterator *iter = (pdf_outline_iterator *)iter_;
159 pdf_obj *prev;
160
161 if (iter->modifier == MOD_BELOW || iter->current == NULL)
162 return -1;
163 if (iter->modifier == MOD_AFTER)
164 {
165 iter->modifier = MOD_NONE;
166 return 0;
167 }
168 prev = pdf_dict_get(ctx, iter->current, PDF_NAME(Prev));
169 if (prev == NULL)
170 return -1;
171
172 iter->modifier = MOD_NONE;
173 iter->current = prev;
174 return 0;
175 }
176
177 static int
178 pdf_outline_iterator_up(fz_context *ctx, fz_outline_iterator *iter_)
179 {
180 pdf_outline_iterator *iter = (pdf_outline_iterator *)iter_;
181 pdf_obj *up;
182 pdf_obj *grandparent;
183
184 if (iter->current == NULL)
185 return -1;
186 if (iter->modifier == MOD_BELOW)
187 {
188 iter->modifier = MOD_NONE;
189 return 0;
190 }
191 /* The topmost level still has a parent pointer, just one
192 * that points to the outlines object. We never want to
193 * allow us to move 'up' onto the outlines object. */
194 up = pdf_dict_get(ctx, iter->current, PDF_NAME(Parent));
195 if (up == NULL)
196 /* This should never happen! */
197 return -1;
198 grandparent = pdf_dict_get(ctx, up, PDF_NAME(Parent));
199 if (grandparent == NULL)
200 return -1;
201
202 iter->modifier = MOD_NONE;
203 iter->current = up;
204 return 0;
205 }
206
207 static int
208 pdf_outline_iterator_down(fz_context *ctx, fz_outline_iterator *iter_)
209 {
210 pdf_outline_iterator *iter = (pdf_outline_iterator *)iter_;
211 pdf_obj *down;
212
213 if (iter->modifier != MOD_NONE || iter->current == NULL)
214 return -1;
215 down = pdf_dict_get(ctx, iter->current, PDF_NAME(First));
216 if (down == NULL)
217 {
218 iter->modifier = MOD_BELOW;
219 return 1;
220 }
221
222 iter->modifier = MOD_NONE;
223 iter->current = down;
224 return 0;
225 }
226
227 static void
228 do_outline_update(fz_context *ctx, pdf_obj *obj, fz_outline_item *item, int is_new_node)
229 {
230 int count;
231 int open_delta = 0;
232 pdf_obj *parent;
233
234 /* If the open/closed state changes, update. */
235 count = pdf_dict_get_int(ctx, obj, PDF_NAME(Count));
236 if ((count < 0 && item->is_open) || (count > 0 && !item->is_open))
237 {
238 pdf_dict_put_int(ctx, obj, PDF_NAME(Count), -count);
239 open_delta = -count;
240 }
241 else if (is_new_node)
242 open_delta = 1;
243
244 parent = pdf_dict_get(ctx, obj, PDF_NAME(Parent));
245 while (parent)
246 {
247 pdf_obj *cobj = pdf_dict_get(ctx, parent, PDF_NAME(Count));
248 count = pdf_to_int(ctx, cobj);
249 if (open_delta || cobj == NULL)
250 pdf_dict_put_int(ctx, parent, PDF_NAME(Count), count > 0 ? count + open_delta : count - open_delta);
251 if (count < 0)
252 break;
253 parent = pdf_dict_get(ctx, parent, PDF_NAME(Parent));
254 }
255
256 if (item->title)
257 pdf_dict_put_text_string(ctx, obj, PDF_NAME(Title), item->title);
258 else
259 pdf_dict_del(ctx, obj, PDF_NAME(Title));
260
261 pdf_dict_del(ctx, obj, PDF_NAME(A));
262 pdf_dict_del(ctx, obj, PDF_NAME(C));
263 pdf_dict_del(ctx, obj, PDF_NAME(F));
264 pdf_dict_del(ctx, obj, PDF_NAME(Dest));
265 if (item->uri)
266 {
267 pdf_document *doc = pdf_get_bound_document(ctx, obj);
268
269 if (item->uri[0] == '#')
270 pdf_dict_put_drop(ctx, obj, PDF_NAME(Dest),
271 pdf_new_dest_from_link(ctx, doc, item->uri, 0));
272 else if (!strncmp(item->uri, "file:", 5))
273 pdf_dict_put_drop(ctx, obj, PDF_NAME(Dest),
274 pdf_new_dest_from_link(ctx, doc, item->uri, 1));
275 else
276 pdf_dict_put_drop(ctx, obj, PDF_NAME(A),
277 pdf_new_action_from_link(ctx, doc, item->uri));
278 }
279 if (item->r != 0 || item->g != 0 || item->b != 0)
280 {
281 pdf_obj *color = pdf_dict_put_array(ctx, obj, PDF_NAME(C), 3);
282 pdf_array_put_real(ctx, color, 0, item->r / 255.0);
283 pdf_array_put_real(ctx, color, 1, item->g / 255.0);
284 pdf_array_put_real(ctx, color, 2, item->b / 255.0);
285 }
286 if (item->flags != 0)
287 pdf_dict_put_int(ctx, obj, PDF_NAME(F), item->flags);
288 }
289
290 static int
291 pdf_outline_iterator_insert(fz_context *ctx, fz_outline_iterator *iter_, fz_outline_item *item)
292 {
293 pdf_outline_iterator *iter = (pdf_outline_iterator *)iter_;
294 pdf_document *doc = (pdf_document *)iter->super.doc;
295 pdf_obj *obj = NULL;
296 pdf_obj *prev;
297 pdf_obj *parent;
298 pdf_obj *outlines = NULL;
299 pdf_obj *newoutlines = NULL;
300 int result = 0;
301
302 fz_var(obj);
303 fz_var(newoutlines);
304
305 pdf_begin_operation(ctx, doc, "Insert outline item");
306
307 fz_try(ctx)
308 {
309 obj = pdf_add_new_dict(ctx, doc, 4);
310
311 if (iter->modifier == MOD_BELOW)
312 parent = iter->current;
313 else if (iter->modifier == MOD_NONE && iter->current == NULL)
314 {
315 pdf_obj *root = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root));
316 outlines = pdf_dict_get(ctx, root, PDF_NAME(Outlines));
317 if (outlines == NULL)
318 {
319 /* No outlines entry, better make one. */
320 newoutlines = outlines = pdf_add_new_dict(ctx, doc, 4);
321 pdf_dict_put(ctx, root, PDF_NAME(Outlines), outlines);
322 pdf_dict_put(ctx, outlines, PDF_NAME(Type), PDF_NAME(Outlines));
323 }
324 iter->modifier = MOD_BELOW;
325 iter->current = outlines;
326 parent = outlines;
327 }
328 else
329 parent = pdf_dict_get(ctx, iter->current, PDF_NAME(Parent));
330
331 pdf_dict_put(ctx, obj, PDF_NAME(Parent), parent);
332
333 do_outline_update(ctx, obj, item, 1);
334
335 switch (iter->modifier)
336 {
337 case MOD_BELOW:
338 pdf_dict_put(ctx, iter->current, PDF_NAME(First), obj);
339 pdf_dict_put(ctx, iter->current, PDF_NAME(Last), obj);
340 iter->current = obj;
341 iter->modifier = MOD_AFTER;
342 result = 1;
343 break;
344 case MOD_AFTER:
345 pdf_dict_put(ctx, obj, PDF_NAME(Prev), iter->current);
346 pdf_dict_put(ctx, iter->current, PDF_NAME(Next), obj);
347 pdf_dict_put(ctx, parent, PDF_NAME(Last), obj);
348 iter->current = obj;
349 result = 1;
350 break;
351 default:
352 prev = pdf_dict_get(ctx, iter->current, PDF_NAME(Prev));
353 if (prev)
354 {
355 pdf_dict_put(ctx, prev, PDF_NAME(Next), obj);
356 pdf_dict_put(ctx, obj, PDF_NAME(Prev), prev);
357 }
358 else
359 pdf_dict_put(ctx, parent, PDF_NAME(First), obj);
360 pdf_dict_put(ctx, iter->current, PDF_NAME(Prev), obj);
361 pdf_dict_put(ctx, obj, PDF_NAME(Next), iter->current);
362 result = 0;
363 break;
364 }
365 pdf_end_operation(ctx, doc);
366 }
367 fz_always(ctx)
368 {
369 pdf_drop_obj(ctx, obj);
370 pdf_drop_obj(ctx, newoutlines);
371 }
372 fz_catch(ctx)
373 {
374 pdf_abandon_operation(ctx, doc);
375 fz_rethrow(ctx);
376 }
377
378 return result;
379 }
380
381 static void
382 pdf_outline_iterator_update(fz_context *ctx, fz_outline_iterator *iter_, fz_outline_item *item)
383 {
384 pdf_outline_iterator *iter = (pdf_outline_iterator *)iter_;
385 pdf_document *doc = (pdf_document *)iter->super.doc;
386
387 if (iter->modifier != MOD_NONE || iter->current == NULL)
388 fz_throw(ctx, FZ_ERROR_ARGUMENT, "Can't update a non-existent outline item!");
389
390 pdf_begin_operation(ctx, doc, "Update outline item");
391
392 fz_try(ctx)
393 {
394 do_outline_update(ctx, iter->current, item, 0);
395 pdf_end_operation(ctx, doc);
396 }
397 fz_catch(ctx)
398 {
399 pdf_abandon_operation(ctx, doc);
400 fz_rethrow(ctx);
401 }
402 }
403
404 static int
405 pdf_outline_iterator_del(fz_context *ctx, fz_outline_iterator *iter_)
406 {
407 pdf_outline_iterator *iter = (pdf_outline_iterator *)iter_;
408 pdf_document *doc = (pdf_document *)iter->super.doc;
409 pdf_obj *next, *prev, *parent;
410 int result = 0;
411 int count;
412
413 if (iter->modifier != MOD_NONE || iter->current == NULL)
414 fz_throw(ctx, FZ_ERROR_ARGUMENT, "Can't delete a non-existent outline item!");
415
416 prev = pdf_dict_get(ctx, iter->current, PDF_NAME(Prev));
417 next = pdf_dict_get(ctx, iter->current, PDF_NAME(Next));
418 parent = pdf_dict_get(ctx, iter->current, PDF_NAME(Parent));
419 count = pdf_dict_get_int(ctx, iter->current, PDF_NAME(Count));
420 /* How many nodes visible from above are being removed? */
421 if (count > 0)
422 count++; /* Open children, plus this node. */
423 else
424 count = 1; /* Just this node */
425
426 pdf_begin_operation(ctx, doc, "Delete outline item");
427
428 fz_try(ctx)
429 {
430 pdf_obj *up = parent;
431 while (up)
432 {
433 int c = pdf_dict_get_int(ctx, up, PDF_NAME(Count));
434 pdf_dict_put_int(ctx, up, PDF_NAME(Count), (c > 0 ? c - count : c + count));
435 if (c < 0)
436 break;
437 up = pdf_dict_get(ctx, up, PDF_NAME(Parent));
438 }
439
440 if (prev)
441 {
442 if (next)
443 pdf_dict_put(ctx, prev, PDF_NAME(Next), next);
444 else
445 pdf_dict_del(ctx, prev, PDF_NAME(Next));
446 }
447 if (next)
448 {
449 if (prev)
450 pdf_dict_put(ctx, next, PDF_NAME(Prev), prev);
451 else
452 {
453 pdf_dict_put(ctx, parent, PDF_NAME(First), next);
454 pdf_dict_del(ctx, next, PDF_NAME(Prev));
455 }
456 iter->current = next;
457 }
458 else if (prev)
459 {
460 iter->current = prev;
461 pdf_dict_put(ctx, parent, PDF_NAME(Last), prev);
462 }
463 else if (parent)
464 {
465 iter->current = parent;
466 iter->modifier = MOD_BELOW;
467 pdf_dict_del(ctx, parent, PDF_NAME(First));
468 pdf_dict_del(ctx, parent, PDF_NAME(Last));
469 result = 1;
470 }
471 else
472 {
473 iter->current = NULL;
474 result = 1;
475 }
476 pdf_end_operation(ctx, doc);
477 }
478 fz_catch(ctx)
479 {
480 pdf_abandon_operation(ctx, doc);
481 fz_rethrow(ctx);
482 }
483
484 return result;
485 }
486
487 static fz_outline_item *
488 pdf_outline_iterator_item(fz_context *ctx, fz_outline_iterator *iter_)
489 {
490 pdf_outline_iterator *iter = (pdf_outline_iterator *)iter_;
491 pdf_obj *obj;
492 pdf_document *doc = (pdf_document *)iter->super.doc;
493
494 if (iter->modifier != MOD_NONE || iter->current == NULL)
495 return NULL;
496
497 fz_free(ctx, iter->item.title);
498 iter->item.title = NULL;
499 fz_free(ctx, iter->item.uri);
500 iter->item.uri = NULL;
501
502 obj = pdf_dict_get(ctx, iter->current, PDF_NAME(Title));
503 if (obj)
504 iter->item.title = Memento_label(fz_strdup(ctx, pdf_to_text_string(ctx, obj)), "outline_title");
505 obj = pdf_dict_get(ctx, iter->current, PDF_NAME(Dest));
506 if (obj)
507 iter->item.uri = Memento_label(pdf_parse_link_dest(ctx, doc, obj), "outline_uri");
508 else
509 {
510 obj = pdf_dict_get(ctx, iter->current, PDF_NAME(A));
511 if (obj)
512 iter->item.uri = Memento_label(pdf_parse_link_action(ctx, doc, obj, -1), "outline_uri");
513 }
514
515 iter->item.is_open = pdf_dict_get_int(ctx, iter->current, PDF_NAME(Count)) > 0;
516
517 obj = pdf_dict_get(ctx, iter->current, PDF_NAME(C));
518 iter->item.r = (int)(0.5 + 255 * pdf_array_get_real(ctx, obj, 0));
519 iter->item.g = (int)(0.5 + 255 * pdf_array_get_real(ctx, obj, 1));
520 iter->item.b = (int)(0.5 + 255 * pdf_array_get_real(ctx, obj, 2));
521
522 iter->item.flags = pdf_dict_get_int(ctx, iter->current, PDF_NAME(F)) & 127;
523
524 return &iter->item;
525 }
526
527 static void
528 pdf_outline_iterator_drop(fz_context *ctx, fz_outline_iterator *iter_)
529 {
530 pdf_outline_iterator *iter = (pdf_outline_iterator *)iter_;
531
532 if (iter == NULL)
533 return;
534
535 fz_free(ctx, iter->item.title);
536 fz_free(ctx, iter->item.uri);
537 }
538
539 fz_outline_iterator *pdf_new_outline_iterator(fz_context *ctx, pdf_document *doc)
540 {
541 pdf_obj *root, *obj, *first;
542 pdf_mark_bits *marks;
543 pdf_outline_iterator *iter = NULL;
544 int fixed = 0;
545
546 /* Walk the outlines to spot problems that might bite us later
547 * (in particular, for cycles). */
548 marks = pdf_new_mark_bits(ctx, doc);
549 fz_try(ctx)
550 {
551 root = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root));
552 obj = pdf_dict_get(ctx, root, PDF_NAME(Outlines));
553 first = pdf_dict_get(ctx, obj, PDF_NAME(First));
554 if (first)
555 {
556 /* cache page tree for fast link destination lookups. This
557 * will be dropped 'just in time' on writes to the doc. */
558 pdf_load_page_tree(ctx, doc);
559 fz_try(ctx)
560 {
561 /* Pass through the outlines once, fixing inconsistencies */
562 pdf_test_outline(ctx, doc, first, marks, obj, &fixed);
563
564 if (fixed)
565 {
566 /* If a fix was performed, pass through again,
567 * this time throwing if it's still not correct. */
568 pdf_mark_bits_reset(ctx, marks);
569 pdf_test_outline(ctx, doc, first, marks, obj, NULL);
570 pdf_end_operation(ctx, doc);
571 }
572 }
573 fz_catch(ctx)
574 {
575 if (fixed)
576 pdf_abandon_operation(ctx, doc);
577 fz_rethrow(ctx);
578 }
579 }
580 }
581 fz_always(ctx)
582 pdf_drop_mark_bits(ctx, marks);
583 fz_catch(ctx)
584 fz_rethrow(ctx);
585
586 iter = fz_new_derived_outline_iter(ctx, pdf_outline_iterator, &doc->super);
587 iter->super.del = pdf_outline_iterator_del;
588 iter->super.next = pdf_outline_iterator_next;
589 iter->super.prev = pdf_outline_iterator_prev;
590 iter->super.up = pdf_outline_iterator_up;
591 iter->super.down = pdf_outline_iterator_down;
592 iter->super.insert = pdf_outline_iterator_insert;
593 iter->super.update = pdf_outline_iterator_update;
594 iter->super.drop = pdf_outline_iterator_drop;
595 iter->super.item = pdf_outline_iterator_item;
596 iter->current = first;
597 iter->modifier = MOD_NONE;
598
599 return &iter->super;
600 }