comparison mupdf-source/source/pdf/pdf-label.c @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 // Copyright (C) 2004-2025 Artifex Software, Inc.
2 //
3 // This file is part of MuPDF.
4 //
5 // MuPDF is free software: you can redistribute it and/or modify it under the
6 // terms of the GNU Affero General Public License as published by the Free
7 // Software Foundation, either version 3 of the License, or (at your option)
8 // any later version.
9 //
10 // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
11 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
13 // details.
14 //
15 // You should have received a copy of the GNU Affero General Public License
16 // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
17 //
18 // Alternative licensing terms are available from the licensor.
19 // For commercial licensing, see <https://www.artifex.com/> or contact
20 // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
21 // CA 94129, USA, for further information.
22
23 #include "mupdf/fitz.h"
24 #include "mupdf/pdf.h"
25
26 #include <stdarg.h>
27 #include <stdlib.h>
28 #include <string.h>
29
30 typedef struct pdf_object_labels pdf_object_labels;
31 typedef struct pdf_object_label_node pdf_object_label_node;
32
33 struct pdf_object_label_node
34 {
35 int num;
36 char *path;
37 pdf_object_label_node *next;
38 };
39
40 struct pdf_object_labels
41 {
42 fz_pool *pool;
43 int object_count;
44 int root, info, encrypt;
45 unsigned short *pages;
46 char *seen;
47 pdf_object_label_node **nodes;
48 };
49
50 static void
51 add_object_label(fz_context *ctx, pdf_object_labels *g, char *path, int a, int b)
52 {
53 pdf_object_label_node *node, **root;
54
55 node = fz_pool_alloc(ctx, g->pool, sizeof(pdf_object_label_node));
56 node->path = fz_pool_strdup(ctx, g->pool, path);
57 node->num = b;
58
59 root = &g->nodes[a];
60 node->next = *root;
61 *root = node;
62 }
63
64 static void
65 scan_object_label_rec(fz_context *ctx, pdf_object_labels *g, char *root_path, pdf_obj *obj, int top)
66 {
67 char path[100];
68 int i, n;
69 if (pdf_is_indirect(ctx, obj))
70 ;
71 else if (pdf_is_dict(ctx, obj))
72 {
73 n = pdf_dict_len(ctx, obj);
74 for (i = 0; i < n; ++i)
75 {
76 pdf_obj *key = pdf_dict_get_key(ctx, obj, i);
77 pdf_obj *val = pdf_dict_get_val(ctx, obj, i);
78 if (val && key != PDF_NAME(Parent) && key != PDF_NAME(P) && key != PDF_NAME(Prev) && key != PDF_NAME(Last))
79 {
80 if (pdf_is_indirect(ctx, val))
81 {
82 fz_snprintf(path, sizeof path, "%s/%s", root_path, pdf_to_name(ctx, key));
83 add_object_label(ctx, g, path, pdf_to_num(ctx, val), top);
84 }
85 else if (pdf_is_dict(ctx, val) || pdf_is_array(ctx, val))
86 {
87 fz_snprintf(path, sizeof path, "%s/%s", root_path, pdf_to_name(ctx, key));
88 scan_object_label_rec(ctx, g, path, val, top);
89 }
90 }
91 }
92 }
93 else if (pdf_is_array(ctx, obj))
94 {
95 n = pdf_array_len(ctx, obj);
96 for (i = 0; i < n; ++i)
97 {
98 pdf_obj *val = pdf_array_get(ctx, obj, i);
99 if (val)
100 {
101 if (pdf_is_indirect(ctx, val))
102 {
103 fz_snprintf(path, sizeof path, "%s/%d", root_path, i+1);
104 add_object_label(ctx, g, path, pdf_to_num(ctx, val), top);
105 }
106 else if (pdf_is_dict(ctx, val) || pdf_is_array(ctx, val))
107 {
108 fz_snprintf(path, sizeof path, "%s/%d", root_path, i+1);
109 scan_object_label_rec(ctx, g, path, val, top);
110 }
111 }
112 }
113 }
114 }
115
116 static void
117 scan_object_label(fz_context *ctx, pdf_document *doc, pdf_object_labels *g, int num)
118 {
119 pdf_obj *obj = pdf_load_object(ctx, doc, num);
120 fz_try(ctx)
121 scan_object_label_rec(ctx, g, "", obj, num);
122 fz_always(ctx)
123 pdf_drop_obj(ctx, obj);
124 fz_catch(ctx)
125 fz_rethrow(ctx);
126 }
127
128 pdf_object_labels *
129 pdf_load_object_labels(fz_context *ctx, pdf_document *doc)
130 {
131 pdf_object_labels *g = NULL;
132 fz_pool *pool;
133 int i, n, page_count;
134
135 n = pdf_count_objects(ctx, doc);
136
137 pool = fz_new_pool(ctx);
138 fz_try(ctx)
139 {
140 g = fz_pool_alloc(ctx, pool, sizeof(pdf_object_labels));
141 g->pool = pool;
142 g->object_count = n;
143 g->root = pdf_to_num(ctx, pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root)));
144 g->info = pdf_to_num(ctx, pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Info)));
145 g->encrypt = pdf_to_num(ctx, pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Encrypt)));
146 g->seen = fz_pool_alloc(ctx, pool, n);
147 g->nodes = fz_pool_alloc(ctx, pool, g->object_count * sizeof(pdf_object_label_node*));
148 g->pages = fz_pool_alloc(ctx, pool, g->object_count * sizeof(unsigned short));
149
150 page_count = pdf_count_pages(ctx, doc);
151 for (i = 0; i < page_count; ++i)
152 g->pages[pdf_to_num(ctx, pdf_lookup_page_obj(ctx, doc, i))] = i+1;
153
154 for (i = 1; i < n; ++i)
155 scan_object_label(ctx, doc, g, i);
156 }
157 fz_catch(ctx)
158 {
159 fz_drop_pool(ctx, pool);
160 }
161 return g;
162 }
163
164 void
165 pdf_drop_object_labels(fz_context *ctx, pdf_object_labels *g)
166 {
167 if (g)
168 fz_drop_pool(ctx, g->pool);
169 }
170
171 static char *
172 prepend(char *path_buffer, char *path, const char *fmt, ...)
173 {
174 char buf[256];
175 size_t z;
176 va_list args;
177
178 va_start(args, fmt);
179 z = fz_vsnprintf(buf, sizeof(buf), fmt, args);
180 va_end(args);
181
182 /* We always want to leave ourselves at least 3 chars for
183 * a future "..." */
184 if (path_buffer + z + 3 <= path)
185 {
186 path -= z;
187 memcpy(path, buf, z);
188 return path;
189 }
190
191 /* Just put ... in now. */
192 path -= 3;
193 path[0] = '.';
194 path[1] = '.';
195 path[2] = '.';
196
197 return path;
198 }
199
200 static void
201 find_paths(fz_context *ctx, pdf_object_labels *g, int here, char *path_buffer, char *leaf_path, pdf_label_object_fn *callback, void *arg)
202 {
203 pdf_object_label_node *node;
204 int next;
205 if (here == g->root)
206 {
207 prepend(path_buffer, leaf_path, "trailer/Root");
208 callback(ctx, arg, prepend(path_buffer, leaf_path, "trailer/Root"));
209 return;
210 }
211 if (here == g->info)
212 {
213 callback(ctx, arg, prepend(path_buffer, leaf_path, "trailer/Info"));
214 return;
215 }
216 if (here == g->encrypt)
217 {
218 callback(ctx, arg, prepend(path_buffer, leaf_path, "trailer/Encrypt"));
219 return;
220 }
221 if (g->pages[here])
222 {
223 callback(ctx, arg, prepend(path_buffer, leaf_path, "pages/%d", g->pages[here]));
224 }
225 for (node = g->nodes[here]; node; node = node->next)
226 {
227 next = node->num;
228 if (next < 1 || next >= g->object_count)
229 continue;
230 if (g->seen[next])
231 continue;
232 if (g->pages[next])
233 {
234 callback(ctx, arg, prepend(path_buffer, leaf_path, "pages/%d%s", g->pages[next], node->path));
235 }
236 else
237 {
238 char *p = prepend(path_buffer, leaf_path, "%s", node->path);
239 g->seen[next] = 1;
240 // if we've run out of room in the path buffer, send this and stop.
241 if (p[0] == '.' && p[1] == '.' && p[2] == '.')
242 callback(ctx, arg, p);
243 else
244 find_paths(ctx, g, next, path_buffer, p, callback, arg);
245 g->seen[next] = 0;
246 }
247 }
248 }
249
250 void
251 pdf_label_object(fz_context *ctx, pdf_object_labels *g, int num, pdf_label_object_fn *callback, void *arg)
252 {
253 int i;
254 char path[4096];
255
256 if (num < 1 || num >= g->object_count)
257 return;
258 for (i = 1; i < g->object_count; ++i)
259 g->seen[i] = 0;
260 path[sizeof(path)-1] = 0;
261 find_paths(ctx, g, num, path, &path[sizeof(path)-1], callback, arg);
262 }