comparison mupdf-source/source/fitz/archive.c @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 // Copyright (C) 2004-2024 Artifex Software, Inc.
2 //
3 // This file is part of MuPDF.
4 //
5 // MuPDF is free software: you can redistribute it and/or modify it under the
6 // terms of the GNU Affero General Public License as published by the Free
7 // Software Foundation, either version 3 of the License, or (at your option)
8 // any later version.
9 //
10 // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
11 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
13 // details.
14 //
15 // You should have received a copy of the GNU Affero General Public License
16 // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
17 //
18 // Alternative licensing terms are available from the licensor.
19 // For commercial licensing, see <https://www.artifex.com/> or contact
20 // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
21 // CA 94129, USA, for further information.
22
23 #include "mupdf/fitz.h"
24
25 #include <string.h>
26
27 enum
28 {
29 FZ_ARCHIVE_HANDLER_MAX = 32
30 };
31
32 struct fz_archive_handler_context
33 {
34 int refs;
35 int count;
36 const fz_archive_handler *handler[FZ_ARCHIVE_HANDLER_MAX];
37 };
38
39 fz_stream *
40 fz_open_archive_entry(fz_context *ctx, fz_archive *arch, const char *name)
41 {
42 fz_stream *stream = fz_try_open_archive_entry(ctx, arch, name);
43
44 if (stream == NULL)
45 fz_throw(ctx, FZ_ERROR_FORMAT, "cannot find entry %s", name);
46
47 return stream;
48 }
49
50 fz_stream *
51 fz_try_open_archive_entry(fz_context *ctx, fz_archive *arch, const char *name)
52 {
53 char *local_name;
54 fz_stream *stream = NULL;
55
56 if (arch == NULL || !arch->open_entry)
57 return NULL;
58
59 local_name = fz_cleanname_strdup(ctx, name);
60
61 fz_var(stream);
62
63 fz_try(ctx)
64 stream = arch->open_entry(ctx, arch, local_name);
65 fz_always(ctx)
66 fz_free(ctx, local_name);
67 fz_catch(ctx)
68 fz_rethrow(ctx);
69
70 return stream;
71 }
72
73 fz_buffer *
74 fz_read_archive_entry(fz_context *ctx, fz_archive *arch, const char *name)
75 {
76 fz_buffer *buf = fz_try_read_archive_entry(ctx, arch, name);
77
78 if (buf == NULL)
79 fz_throw(ctx, FZ_ERROR_FORMAT, "cannot find entry %s", name);
80
81 return buf;
82 }
83
84 fz_buffer *
85 fz_try_read_archive_entry(fz_context *ctx, fz_archive *arch, const char *name)
86 {
87 char *local_name;
88 fz_buffer *buf = NULL;
89
90 if (arch == NULL || !arch->read_entry || !arch->has_entry || name == NULL)
91 return NULL;
92
93 local_name = fz_cleanname_strdup(ctx, name);
94
95 fz_var(buf);
96
97 fz_try(ctx)
98 {
99 if (!arch->has_entry(ctx, arch, local_name))
100 break;
101 buf = arch->read_entry(ctx, arch, local_name);
102 }
103 fz_always(ctx)
104 fz_free(ctx, local_name);
105 fz_catch(ctx)
106 fz_rethrow(ctx);
107
108 return buf;
109 }
110
111 int
112 fz_has_archive_entry(fz_context *ctx, fz_archive *arch, const char *name)
113 {
114 char *local_name;
115 int res = 0;
116
117 if (arch == NULL)
118 return 0;
119 if (!arch->has_entry)
120 return 0;
121
122 local_name = fz_cleanname_strdup(ctx, name);
123
124 fz_var(res);
125
126 fz_try(ctx)
127 res = arch->has_entry(ctx, arch, local_name);
128 fz_always(ctx)
129 fz_free(ctx, local_name);
130 fz_catch(ctx)
131 fz_rethrow(ctx);
132
133 return res;
134 }
135
136 const char *
137 fz_list_archive_entry(fz_context *ctx, fz_archive *arch, int idx)
138 {
139 if (arch == 0)
140 return NULL;
141 if (!arch->list_entry)
142 return NULL;
143
144 return arch->list_entry(ctx, arch, idx);
145 }
146
147 int
148 fz_count_archive_entries(fz_context *ctx, fz_archive *arch)
149 {
150 if (arch == NULL)
151 return 0;
152 if (!arch->count_entries)
153 return 0;
154 return arch->count_entries(ctx, arch);
155 }
156
157 const char *
158 fz_archive_format(fz_context *ctx, fz_archive *arch)
159 {
160 if (arch == NULL)
161 return "undefined";
162 return arch->format;
163 }
164
165 fz_archive *
166 fz_new_archive_of_size(fz_context *ctx, fz_stream *file, int size)
167 {
168 fz_archive *arch;
169 arch = Memento_label(fz_calloc(ctx, 1, size), "fz_archive");
170 arch->refs = 1;
171 arch->file = fz_keep_stream(ctx, file);
172 return arch;
173 }
174
175 fz_archive *
176 fz_try_open_archive_with_stream(fz_context *ctx, fz_stream *file)
177 {
178 fz_archive *arch = NULL;
179 int i;
180
181 if (file == NULL)
182 return NULL;
183
184 for (i = 0; i < ctx->archive->count; i++)
185 {
186 fz_seek(ctx, file, 0, SEEK_SET);
187 if (ctx->archive->handler[i]->recognize(ctx, file))
188 {
189 arch = ctx->archive->handler[i]->open(ctx, file);
190 if (arch)
191 return arch;
192 }
193 }
194
195 return NULL;
196 }
197
198 fz_archive *
199 fz_open_archive_with_stream(fz_context *ctx, fz_stream *file)
200 {
201 fz_archive *arch = fz_try_open_archive_with_stream(ctx, file);
202 if (arch == NULL)
203 fz_throw(ctx, FZ_ERROR_FORMAT, "cannot recognize archive");
204 return arch;
205 }
206
207 fz_archive *
208 fz_open_archive(fz_context *ctx, const char *filename)
209 {
210 fz_stream *file;
211 fz_archive *arch = NULL;
212
213 file = fz_open_file(ctx, filename);
214
215 fz_try(ctx)
216 arch = fz_open_archive_with_stream(ctx, file);
217 fz_always(ctx)
218 fz_drop_stream(ctx, file);
219 fz_catch(ctx)
220 fz_rethrow(ctx);
221
222 return arch;
223 }
224
225 fz_archive *
226 fz_keep_archive(fz_context *ctx, fz_archive *arch)
227 {
228 return (fz_archive *)fz_keep_imp(ctx, arch, &arch->refs);
229 }
230
231 void
232 fz_drop_archive(fz_context *ctx, fz_archive *arch)
233 {
234 if (fz_drop_imp(ctx, arch, &arch->refs))
235 {
236 if (arch->drop_archive)
237 arch->drop_archive(ctx, arch);
238 fz_drop_stream(ctx, arch->file);
239 fz_free(ctx, arch);
240 }
241 }
242
243 /* In-memory archive using a fz_tree holding fz_buffers */
244
245 typedef struct
246 {
247 fz_archive super;
248 fz_tree *tree;
249 } fz_tree_archive;
250
251 static int has_tree_entry(fz_context *ctx, fz_archive *arch, const char *name)
252 {
253 fz_tree *tree = ((fz_tree_archive*)arch)->tree;
254 fz_buffer *ent = fz_tree_lookup(ctx, tree, name);
255 return ent != NULL;
256 }
257
258 static fz_buffer *read_tree_entry(fz_context *ctx, fz_archive *arch, const char *name)
259 {
260 fz_tree *tree = ((fz_tree_archive*)arch)->tree;
261 fz_buffer *ent = fz_tree_lookup(ctx, tree, name);
262 return fz_keep_buffer(ctx, ent);
263 }
264
265 static fz_stream *open_tree_entry(fz_context *ctx, fz_archive *arch, const char *name)
266 {
267 fz_tree *tree = ((fz_tree_archive*)arch)->tree;
268 fz_buffer *ent = fz_tree_lookup(ctx, tree, name);
269 return fz_open_buffer(ctx, ent);
270 }
271
272 static void drop_tree_archive_entry(fz_context *ctx, void *ent)
273 {
274 fz_drop_buffer(ctx, ent);
275 }
276
277 static void drop_tree_archive(fz_context *ctx, fz_archive *arch)
278 {
279 fz_tree *tree = ((fz_tree_archive*)arch)->tree;
280 fz_drop_tree(ctx, tree, drop_tree_archive_entry);
281 }
282
283 fz_archive *
284 fz_new_tree_archive(fz_context *ctx, fz_tree *tree)
285 {
286 fz_tree_archive *arch;
287
288 arch = fz_new_derived_archive(ctx, NULL, fz_tree_archive);
289 arch->super.format = "tree";
290 arch->super.has_entry = has_tree_entry;
291 arch->super.read_entry = read_tree_entry;
292 arch->super.open_entry = open_tree_entry;
293 arch->super.drop_archive = drop_tree_archive;
294 arch->tree = tree;
295
296 return &arch->super;
297 }
298
299 void
300 fz_tree_archive_add_buffer(fz_context *ctx, fz_archive *arch_, const char *name, fz_buffer *buf)
301 {
302 fz_tree_archive *arch = (fz_tree_archive *)arch_;
303
304 if (arch == NULL || arch->super.has_entry != has_tree_entry)
305 fz_throw(ctx, FZ_ERROR_ARGUMENT, "cannot insert into a non-tree archive");
306
307 buf = fz_keep_buffer(ctx, buf);
308
309 fz_try(ctx)
310 arch->tree = fz_tree_insert(ctx, arch->tree, name, buf);
311 fz_catch(ctx)
312 {
313 fz_drop_buffer(ctx, buf);
314 fz_rethrow(ctx);
315 }
316 }
317
318 void
319 fz_tree_archive_add_data(fz_context *ctx, fz_archive *arch_, const char *name, const void *data, size_t size)
320 {
321 fz_tree_archive *arch = (fz_tree_archive *)arch_;
322 fz_buffer *buf;
323
324 if (arch == NULL || arch->super.has_entry != has_tree_entry)
325 fz_throw(ctx, FZ_ERROR_ARGUMENT, "cannot insert into a non-tree archive");
326
327 buf = fz_new_buffer_from_copied_data(ctx, data, size);
328
329 fz_try(ctx)
330 arch->tree = fz_tree_insert(ctx, arch->tree, name, buf);
331 fz_catch(ctx)
332 {
333 fz_drop_buffer(ctx, buf);
334 fz_rethrow(ctx);
335 }
336 }
337
338 typedef struct
339 {
340 fz_archive *arch;
341 char *dir;
342 } multi_archive_entry;
343
344 typedef struct
345 {
346 fz_archive super;
347 int len;
348 int max;
349 multi_archive_entry *sub;
350 } fz_multi_archive;
351
352 static int has_multi_entry(fz_context *ctx, fz_archive *arch_, const char *name)
353 {
354 fz_multi_archive *arch = (fz_multi_archive *)arch_;
355 int i;
356
357 for (i = arch->len-1; i >= 0; i--)
358 {
359 multi_archive_entry *e = &arch->sub[i];
360 const char *subname = name;
361 if (e->dir)
362 {
363 size_t n = strlen(e->dir);
364 if (strncmp(e->dir, name, n) != 0)
365 continue;
366 subname += n;
367 }
368 if (fz_has_archive_entry(ctx, arch->sub[i].arch, subname))
369 return 1;
370 }
371 return 0;
372 }
373
374 static fz_buffer *read_multi_entry(fz_context *ctx, fz_archive *arch_, const char *name)
375 {
376 fz_multi_archive *arch = (fz_multi_archive *)arch_;
377 int i;
378 fz_buffer *res = NULL;
379
380 for (i = arch->len-1; i >= 0; i--)
381 {
382 multi_archive_entry *e = &arch->sub[i];
383 const char *subname = name;
384
385 if (e->dir)
386 {
387 size_t n = strlen(e->dir);
388 if (strncmp(e->dir, name, n) != 0)
389 continue;
390 subname += n;
391 }
392
393 res = fz_try_read_archive_entry(ctx, arch->sub[i].arch, subname);
394
395 if (res)
396 break;
397 }
398
399 return res;
400 }
401
402 static fz_stream *open_multi_entry(fz_context *ctx, fz_archive *arch_, const char *name)
403 {
404 fz_multi_archive *arch = (fz_multi_archive *)arch_;
405 int i;
406 fz_stream *res = NULL;
407
408 for (i = arch->len-1; i >= 0; i--)
409 {
410 multi_archive_entry *e = &arch->sub[i];
411 const char *subname = name;
412
413 if (e->dir)
414 {
415 size_t n = strlen(e->dir);
416 if (strncmp(e->dir, name, n) != 0)
417 continue;
418 subname += n;
419 }
420
421 res = fz_open_archive_entry(ctx, arch->sub[i].arch, subname);
422
423 if (res)
424 break;
425 }
426
427 return res;
428 }
429
430 static void drop_multi_archive(fz_context *ctx, fz_archive *arch_)
431 {
432 fz_multi_archive *arch = (fz_multi_archive *)arch_;
433 int i;
434
435 for (i = arch->len-1; i >= 0; i--)
436 {
437 multi_archive_entry *e = &arch->sub[i];
438 fz_free(ctx, e->dir);
439 fz_drop_archive(ctx, e->arch);
440 }
441 fz_free(ctx, arch->sub);
442 }
443
444 fz_archive *
445 fz_new_multi_archive(fz_context *ctx)
446 {
447 fz_multi_archive *arch;
448
449 arch = fz_new_derived_archive(ctx, NULL, fz_multi_archive);
450 arch->super.format = "multi";
451 arch->super.has_entry = has_multi_entry;
452 arch->super.read_entry = read_multi_entry;
453 arch->super.open_entry = open_multi_entry;
454 arch->super.drop_archive = drop_multi_archive;
455 arch->max = 0;
456 arch->len = 0;
457 arch->sub = NULL;
458
459 return &arch->super;
460 }
461
462 void
463 fz_mount_multi_archive(fz_context *ctx, fz_archive *arch_, fz_archive *sub, const char *path)
464 {
465 fz_multi_archive *arch = (fz_multi_archive *)arch_;
466 char *clean_path = NULL;
467
468 if (arch->super.has_entry != has_multi_entry)
469 fz_throw(ctx, FZ_ERROR_ARGUMENT, "cannot mount within a non-multi archive");
470
471 if (arch->len == arch->max)
472 {
473 int n = arch->max ? arch->max * 2 : 8;
474
475 arch->sub = fz_realloc(ctx, arch->sub, sizeof(*arch->sub) * n);
476 arch->max = n;
477 }
478
479 /* If we have a path, then strip any trailing slashes, and add just one. */
480 if (path)
481 {
482 clean_path = fz_cleanname_strdup(ctx, path);
483 if (clean_path[0] == '.' && clean_path[1] == 0)
484 {
485 fz_free(ctx, clean_path);
486 clean_path = NULL;
487 }
488 else
489 {
490 /* Do a strcat without doing a strcat to avoid the compiler
491 * complaining at us. We know that n here will be <= n above
492 * so this is safe. */
493 size_t n = strlen(clean_path);
494 clean_path[n] = '/';
495 clean_path[n + 1] = 0;
496 }
497 }
498
499 arch->sub[arch->len].arch = fz_keep_archive(ctx, sub);
500 arch->sub[arch->len].dir = clean_path;
501 arch->len++;
502 }
503
504 static const fz_archive_handler fz_zip_archive_handler =
505 {
506 fz_is_zip_archive,
507 fz_open_zip_archive_with_stream
508 };
509
510 static const fz_archive_handler fz_tar_archive_handler =
511 {
512 fz_is_tar_archive,
513 fz_open_tar_archive_with_stream
514 };
515
516 const fz_archive_handler fz_libarchive_archive_handler =
517 {
518 fz_is_libarchive_archive,
519 fz_open_libarchive_archive_with_stream
520 };
521
522 const fz_archive_handler fz_cfb_archive_handler =
523 {
524 fz_is_cfb_archive,
525 fz_open_cfb_archive_with_stream
526 };
527
528 void fz_new_archive_handler_context(fz_context *ctx)
529 {
530 ctx->archive = fz_malloc_struct(ctx, fz_archive_handler_context);
531 ctx->archive->refs = 1;
532
533 fz_register_archive_handler(ctx, &fz_zip_archive_handler);
534 fz_register_archive_handler(ctx, &fz_tar_archive_handler);
535 #ifdef HAVE_LIBARCHIVE
536 fz_register_archive_handler(ctx, &fz_libarchive_archive_handler);
537 #endif
538 fz_register_archive_handler(ctx, &fz_cfb_archive_handler);
539 }
540
541 fz_archive_handler_context *fz_keep_archive_handler_context(fz_context *ctx)
542 {
543 if (!ctx || !ctx->archive)
544 return NULL;
545 return fz_keep_imp(ctx, ctx->archive, &ctx->archive->refs);
546 }
547
548 void fz_drop_archive_handler_context(fz_context *ctx)
549 {
550 if (!ctx)
551 return;
552
553 if (fz_drop_imp(ctx, ctx->archive, &ctx->archive->refs))
554 {
555 fz_free(ctx, ctx->archive);
556 ctx->archive = NULL;
557 }
558 }
559
560 void fz_register_archive_handler(fz_context *ctx, const fz_archive_handler *handler)
561 {
562 fz_archive_handler_context *ac;
563 int i;
564
565 if (!handler)
566 return;
567
568 ac = ctx->archive;
569 if (ac == NULL)
570 fz_throw(ctx, FZ_ERROR_ARGUMENT, "archive handler list not found");
571
572 for (i = 0; i < ac->count; i++)
573 if (ac->handler[i] == handler)
574 return;
575
576 if (ac->count >= FZ_ARCHIVE_HANDLER_MAX)
577 fz_throw(ctx, FZ_ERROR_LIMIT, "Too many archive handlers");
578
579 ac->handler[ac->count++] = handler;
580 }