comparison mupdf-source/source/pdf/pdf-nametree.c @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 // Copyright (C) 2004-2025 Artifex Software, Inc.
2 //
3 // This file is part of MuPDF.
4 //
5 // MuPDF is free software: you can redistribute it and/or modify it under the
6 // terms of the GNU Affero General Public License as published by the Free
7 // Software Foundation, either version 3 of the License, or (at your option)
8 // any later version.
9 //
10 // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
11 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
13 // details.
14 //
15 // You should have received a copy of the GNU Affero General Public License
16 // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
17 //
18 // Alternative licensing terms are available from the licensor.
19 // For commercial licensing, see <https://www.artifex.com/> or contact
20 // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
21 // CA 94129, USA, for further information.
22
23 #include "mupdf/fitz.h"
24 #include "mupdf/pdf.h"
25
26 #include <string.h>
27
28 static pdf_obj *
29 pdf_lookup_name_imp(fz_context *ctx, pdf_obj *node, const char *needle, pdf_cycle_list *cycle_up)
30 {
31 pdf_cycle_list cycle;
32 pdf_obj *kids = pdf_dict_get(ctx, node, PDF_NAME(Kids));
33 pdf_obj *names = pdf_dict_get(ctx, node, PDF_NAME(Names));
34
35 if (pdf_cycle(ctx, &cycle, cycle_up, node))
36 return NULL;
37
38 if (pdf_is_array(ctx, kids))
39 {
40 int l = 0;
41 int r = pdf_array_len(ctx, kids) - 1;
42
43 while (l <= r)
44 {
45 int m = (l + r) >> 1;
46 pdf_obj *kid = pdf_array_get(ctx, kids, m);
47 pdf_obj *limits = pdf_dict_get(ctx, kid, PDF_NAME(Limits));
48 const char *first = pdf_array_get_text_string(ctx, limits, 0);
49 const char *last = pdf_array_get_text_string(ctx, limits, 1);
50
51 if (!pdf_is_indirect(ctx, kid))
52 {
53 fz_warn(ctx, "non-indirect internal node found in name tree");
54 break;
55 }
56
57 if (strcmp(needle, first) < 0)
58 r = m - 1;
59 else if (strcmp(needle, last) > 0)
60 l = m + 1;
61 else
62 {
63 pdf_obj *obj = pdf_lookup_name_imp(ctx, kid, needle, &cycle);
64 if (obj)
65 return obj;
66 else
67 break;
68 }
69 }
70
71 /* Spec says names should be sorted (hence the binary search,
72 * above), but Acrobat copes with non-sorted. Drop back to a
73 * simple search if the binary search fails. */
74 r = pdf_array_len(ctx, kids);
75 for (l = 0; l < r; l++)
76 {
77 pdf_obj *obj, *kid = pdf_array_get(ctx, kids, l);
78 if (!pdf_is_indirect(ctx, kid))
79 {
80 fz_warn(ctx, "non-indirect internal node found in name tree");
81 continue;
82 }
83 obj = pdf_lookup_name_imp(ctx, kid, needle, &cycle);
84 if (obj)
85 return obj;
86 }
87 }
88
89 if (pdf_is_array(ctx, names))
90 {
91 int l = 0;
92 int r = (pdf_array_len(ctx, names) / 2) - 1;
93
94 while (l <= r)
95 {
96 int m = (l + r) >> 1;
97 int c;
98 const char *key = pdf_array_get_text_string(ctx, names, m * 2);
99 pdf_obj *val = pdf_array_get(ctx, names, m * 2 + 1);
100
101 c = strcmp(needle, key);
102 if (c < 0)
103 r = m - 1;
104 else if (c > 0)
105 l = m + 1;
106 else
107 return val;
108 }
109
110 /* Spec says names should be sorted (hence the binary search,
111 * above), but Acrobat copes with non-sorted. Drop back to a
112 * simple search if the binary search fails. */
113 r = pdf_array_len(ctx, names)/2;
114 for (l = 0; l < r; l++)
115 if (!strcmp(needle, pdf_array_get_text_string(ctx, names, l * 2)))
116 return pdf_array_get(ctx, names, l * 2 + 1);
117 }
118
119 return NULL;
120 }
121
122 pdf_obj *
123 pdf_lookup_name(fz_context *ctx, pdf_document *doc, pdf_obj *which, pdf_obj *needle)
124 {
125 pdf_obj *root = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root));
126 pdf_obj *names = pdf_dict_get(ctx, root, PDF_NAME(Names));
127 pdf_obj *tree = pdf_dict_get(ctx, names, which);
128 return pdf_lookup_name_imp(ctx, tree, pdf_to_text_string(ctx, needle), NULL);
129 }
130
131 pdf_obj *
132 pdf_lookup_dest(fz_context *ctx, pdf_document *doc, pdf_obj *needle)
133 {
134 pdf_obj *root = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root));
135 pdf_obj *dests = pdf_dict_get(ctx, root, PDF_NAME(Dests));
136 pdf_obj *names = pdf_dict_get(ctx, root, PDF_NAME(Names));
137
138 /* PDF 1.1 has destinations in a dictionary */
139 if (dests)
140 {
141 if (pdf_is_name(ctx, needle))
142 return pdf_dict_get(ctx, dests, needle);
143 else
144 return pdf_dict_gets(ctx, dests, pdf_to_str_buf(ctx, needle));
145 }
146
147 /* PDF 1.2 has destinations in a name tree */
148 if (names)
149 {
150 pdf_obj *tree = pdf_dict_get(ctx, names, PDF_NAME(Dests));
151 return pdf_lookup_name_imp(ctx, tree, pdf_to_text_string(ctx, needle), NULL);
152 }
153
154 return NULL;
155 }
156
157 static void
158 pdf_load_name_tree_imp(fz_context *ctx, pdf_obj *dict, pdf_document *doc, pdf_obj *node, pdf_cycle_list *cycle_up)
159 {
160 pdf_cycle_list cycle;
161 pdf_obj *kids = pdf_dict_get(ctx, node, PDF_NAME(Kids));
162 pdf_obj *names = pdf_dict_get(ctx, node, PDF_NAME(Names));
163 int i;
164
165 if (kids && !pdf_cycle(ctx, &cycle, cycle_up, node))
166 {
167 int len = pdf_array_len(ctx, kids);
168 for (i = 0; i < len; i++)
169 pdf_load_name_tree_imp(ctx, dict, doc, pdf_array_get(ctx, kids, i), &cycle);
170 }
171
172 if (names)
173 {
174 int len = pdf_array_len(ctx, names);
175 for (i = 0; i + 1 < len; i += 2)
176 {
177 pdf_obj *key = pdf_array_get(ctx, names, i);
178 pdf_obj *val = pdf_array_get(ctx, names, i + 1);
179 if (pdf_is_string(ctx, key))
180 {
181 key = pdf_new_name(ctx, pdf_to_text_string(ctx, key));
182 fz_try(ctx)
183 pdf_dict_put(ctx, dict, key, val);
184 fz_always(ctx)
185 pdf_drop_obj(ctx, key);
186 fz_catch(ctx)
187 fz_rethrow(ctx);
188 }
189 else if (pdf_is_name(ctx, key))
190 {
191 pdf_dict_put(ctx, dict, key, val);
192 }
193 }
194 }
195 }
196
197 /* FIXME: fz_try/fz_catch needed here */
198 pdf_obj *
199 pdf_load_name_tree(fz_context *ctx, pdf_document *doc, pdf_obj *which)
200 {
201 pdf_obj *root = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root));
202 pdf_obj *names = pdf_dict_get(ctx, root, PDF_NAME(Names));
203 pdf_obj *tree = pdf_dict_get(ctx, names, which);
204 if (pdf_is_dict(ctx, tree))
205 {
206 pdf_obj *dict = pdf_new_dict(ctx, doc, 100);
207 pdf_load_name_tree_imp(ctx, dict, doc, tree, NULL);
208 return dict;
209 }
210 return NULL;
211 }
212
213 pdf_obj *
214 pdf_lookup_number_imp(fz_context *ctx, pdf_obj *node, int needle, pdf_cycle_list *cycle_up)
215 {
216 pdf_cycle_list cycle;
217 pdf_obj *kids = pdf_dict_get(ctx, node, PDF_NAME(Kids));
218 pdf_obj *nums = pdf_dict_get(ctx, node, PDF_NAME(Nums));
219
220 if (pdf_is_array(ctx, kids))
221 {
222 int l = 0;
223 int r = pdf_array_len(ctx, kids) - 1;
224
225 while (l <= r)
226 {
227 int m = (l + r) >> 1;
228 pdf_obj *kid = pdf_array_get(ctx, kids, m);
229 pdf_obj *limits = pdf_dict_get(ctx, kid, PDF_NAME(Limits));
230 int first = pdf_array_get_int(ctx, limits, 0);
231 int last = pdf_array_get_int(ctx, limits, 1);
232
233 if (needle < first)
234 r = m - 1;
235 else if (needle > last)
236 l = m + 1;
237 else
238 {
239 if (pdf_cycle(ctx, &cycle, cycle_up, node))
240 break;
241 return pdf_lookup_number_imp(ctx, kid, needle, &cycle);
242 }
243 }
244 }
245
246 if (pdf_is_array(ctx, nums))
247 {
248 int l = 0;
249 int r = (pdf_array_len(ctx, nums) / 2) - 1;
250
251 while (l <= r)
252 {
253 int m = (l + r) >> 1;
254 int key = pdf_array_get_int(ctx, nums, m * 2);
255 pdf_obj *val = pdf_array_get(ctx, nums, m * 2 + 1);
256
257 if (needle < key)
258 r = m - 1;
259 else if (needle > key)
260 l = m + 1;
261 else
262 return val;
263 }
264
265 /* Parallel the nametree lookup above by allowing for non-sorted lists. */
266 r = pdf_array_len(ctx, nums)/2;
267 for (l = 0; l < r; l++)
268 if (needle == pdf_array_get_int(ctx, nums, l * 2))
269 return pdf_array_get(ctx, nums, l * 2 + 1);
270 }
271
272 return NULL;
273 }
274
275 pdf_obj *
276 pdf_lookup_number(fz_context *ctx, pdf_obj *node, int needle)
277 {
278 return pdf_lookup_number_imp(ctx, node, needle, NULL);
279 }
280
281 static void pdf_walk_tree_imp(fz_context *ctx, pdf_obj *obj, pdf_obj *kid_name,
282 void (*arrive)(fz_context *, pdf_obj *, void *, pdf_obj **),
283 void (*leave)(fz_context *, pdf_obj *, void *),
284 void *arg,
285 pdf_obj **inherit_names,
286 pdf_obj **inherit_vals,
287 pdf_cycle_list *cycle_up);
288
289 static void
290 pdf_walk_tree_kid(fz_context *ctx,
291 pdf_obj *obj,
292 pdf_obj *kid_name,
293 void (*arrive)(fz_context *, pdf_obj *, void *, pdf_obj **),
294 void (*leave)(fz_context *, pdf_obj *, void *),
295 void *arg,
296 pdf_obj **inherit_names,
297 pdf_obj **inherit_vals,
298 pdf_cycle_list *cycle_up)
299 {
300 pdf_cycle_list cycle;
301 pdf_obj **new_vals = NULL;
302
303 if (obj == NULL || pdf_cycle(ctx, &cycle, cycle_up, obj))
304 return;
305
306 fz_var(new_vals);
307
308 fz_try(ctx)
309 {
310 /* First we run through the names we've been asked to collect
311 * inherited values for updating the values. */
312 if (inherit_names != NULL)
313 {
314 int i, n;
315
316 for (n = 0; inherit_names[n] != NULL; n++);
317
318 for (i = 0; i < n; i++)
319 {
320 pdf_obj *v = pdf_dict_get(ctx, obj, inherit_names[i]);
321 if (v != NULL)
322 {
323 if (new_vals == NULL)
324 {
325 new_vals = fz_malloc_array(ctx, n, pdf_obj *);
326 memcpy(new_vals, inherit_vals, n*sizeof(pdf_obj *));
327 inherit_vals = new_vals;
328 }
329 inherit_vals[i] = v;
330 }
331 }
332 }
333
334 if (arrive)
335 arrive(ctx, obj, arg, inherit_vals);
336 pdf_walk_tree_imp(ctx, pdf_dict_get(ctx, obj, kid_name), kid_name, arrive, leave, arg, inherit_names, inherit_vals, &cycle);
337 if (leave)
338 leave(ctx, obj, arg);
339 }
340 fz_always(ctx)
341 fz_free(ctx, new_vals);
342 fz_catch(ctx)
343 fz_rethrow(ctx);
344 }
345
346 static void pdf_walk_tree_imp(fz_context *ctx, pdf_obj *obj, pdf_obj *kid_name,
347 void (*arrive)(fz_context *, pdf_obj *, void *, pdf_obj **),
348 void (*leave)(fz_context *, pdf_obj *, void *),
349 void *arg,
350 pdf_obj **inherit_names,
351 pdf_obj **inherit_vals,
352 pdf_cycle_list *cycle_up)
353 {
354 pdf_cycle_list cycle;
355
356 if (obj == NULL || pdf_cycle(ctx, &cycle, cycle_up, obj))
357 return;
358
359 if (pdf_is_array(ctx, obj))
360 {
361 int i, n = pdf_array_len(ctx, obj);
362 for (i = 0; i < n; i++)
363 pdf_walk_tree_kid(ctx, pdf_array_get(ctx, obj, i), kid_name, arrive, leave, arg, inherit_names, inherit_vals, &cycle);
364 }
365 else
366 {
367 pdf_walk_tree_kid(ctx, obj, kid_name, arrive, leave, arg, inherit_names, inherit_vals, &cycle);
368 }
369 }
370
371 void pdf_walk_tree(fz_context *ctx, pdf_obj *obj, pdf_obj *kid_name,
372 void (*arrive)(fz_context *, pdf_obj *, void *, pdf_obj **),
373 void (*leave)(fz_context *, pdf_obj *, void *),
374 void *arg,
375 pdf_obj **inherit_names,
376 pdf_obj **inherit_vals)
377 {
378 pdf_walk_tree_imp(ctx, obj, kid_name, arrive, leave, arg, inherit_names, inherit_vals, NULL);
379 }