Mercurial > hgrepos > Python2 > PyMuPDF
diff mupdf-source/source/fitz/archive.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mupdf-source/source/fitz/archive.c Mon Sep 15 11:43:07 2025 +0200 @@ -0,0 +1,580 @@ +// Copyright (C) 2004-2024 Artifex Software, Inc. +// +// This file is part of MuPDF. +// +// MuPDF is free software: you can redistribute it and/or modify it under the +// terms of the GNU Affero General Public License as published by the Free +// Software Foundation, either version 3 of the License, or (at your option) +// any later version. +// +// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more +// details. +// +// You should have received a copy of the GNU Affero General Public License +// along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html> +// +// Alternative licensing terms are available from the licensor. +// For commercial licensing, see <https://www.artifex.com/> or contact +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. + +#include "mupdf/fitz.h" + +#include <string.h> + +enum +{ + FZ_ARCHIVE_HANDLER_MAX = 32 +}; + +struct fz_archive_handler_context +{ + int refs; + int count; + const fz_archive_handler *handler[FZ_ARCHIVE_HANDLER_MAX]; +}; + +fz_stream * +fz_open_archive_entry(fz_context *ctx, fz_archive *arch, const char *name) +{ + fz_stream *stream = fz_try_open_archive_entry(ctx, arch, name); + + if (stream == NULL) + fz_throw(ctx, FZ_ERROR_FORMAT, "cannot find entry %s", name); + + return stream; +} + +fz_stream * +fz_try_open_archive_entry(fz_context *ctx, fz_archive *arch, const char *name) +{ + char *local_name; + fz_stream *stream = NULL; + + if (arch == NULL || !arch->open_entry) + return NULL; + + local_name = fz_cleanname_strdup(ctx, name); + + fz_var(stream); + + fz_try(ctx) + stream = arch->open_entry(ctx, arch, local_name); + fz_always(ctx) + fz_free(ctx, local_name); + fz_catch(ctx) + fz_rethrow(ctx); + + return stream; +} + +fz_buffer * +fz_read_archive_entry(fz_context *ctx, fz_archive *arch, const char *name) +{ + fz_buffer *buf = fz_try_read_archive_entry(ctx, arch, name); + + if (buf == NULL) + fz_throw(ctx, FZ_ERROR_FORMAT, "cannot find entry %s", name); + + return buf; +} + +fz_buffer * +fz_try_read_archive_entry(fz_context *ctx, fz_archive *arch, const char *name) +{ + char *local_name; + fz_buffer *buf = NULL; + + if (arch == NULL || !arch->read_entry || !arch->has_entry || name == NULL) + return NULL; + + local_name = fz_cleanname_strdup(ctx, name); + + fz_var(buf); + + fz_try(ctx) + { + if (!arch->has_entry(ctx, arch, local_name)) + break; + buf = arch->read_entry(ctx, arch, local_name); + } + fz_always(ctx) + fz_free(ctx, local_name); + fz_catch(ctx) + fz_rethrow(ctx); + + return buf; +} + +int +fz_has_archive_entry(fz_context *ctx, fz_archive *arch, const char *name) +{ + char *local_name; + int res = 0; + + if (arch == NULL) + return 0; + if (!arch->has_entry) + return 0; + + local_name = fz_cleanname_strdup(ctx, name); + + fz_var(res); + + fz_try(ctx) + res = arch->has_entry(ctx, arch, local_name); + fz_always(ctx) + fz_free(ctx, local_name); + fz_catch(ctx) + fz_rethrow(ctx); + + return res; +} + +const char * +fz_list_archive_entry(fz_context *ctx, fz_archive *arch, int idx) +{ + if (arch == 0) + return NULL; + if (!arch->list_entry) + return NULL; + + return arch->list_entry(ctx, arch, idx); +} + +int +fz_count_archive_entries(fz_context *ctx, fz_archive *arch) +{ + if (arch == NULL) + return 0; + if (!arch->count_entries) + return 0; + return arch->count_entries(ctx, arch); +} + +const char * +fz_archive_format(fz_context *ctx, fz_archive *arch) +{ + if (arch == NULL) + return "undefined"; + return arch->format; +} + +fz_archive * +fz_new_archive_of_size(fz_context *ctx, fz_stream *file, int size) +{ + fz_archive *arch; + arch = Memento_label(fz_calloc(ctx, 1, size), "fz_archive"); + arch->refs = 1; + arch->file = fz_keep_stream(ctx, file); + return arch; +} + +fz_archive * +fz_try_open_archive_with_stream(fz_context *ctx, fz_stream *file) +{ + fz_archive *arch = NULL; + int i; + + if (file == NULL) + return NULL; + + for (i = 0; i < ctx->archive->count; i++) + { + fz_seek(ctx, file, 0, SEEK_SET); + if (ctx->archive->handler[i]->recognize(ctx, file)) + { + arch = ctx->archive->handler[i]->open(ctx, file); + if (arch) + return arch; + } + } + + return NULL; +} + +fz_archive * +fz_open_archive_with_stream(fz_context *ctx, fz_stream *file) +{ + fz_archive *arch = fz_try_open_archive_with_stream(ctx, file); + if (arch == NULL) + fz_throw(ctx, FZ_ERROR_FORMAT, "cannot recognize archive"); + return arch; +} + +fz_archive * +fz_open_archive(fz_context *ctx, const char *filename) +{ + fz_stream *file; + fz_archive *arch = NULL; + + file = fz_open_file(ctx, filename); + + fz_try(ctx) + arch = fz_open_archive_with_stream(ctx, file); + fz_always(ctx) + fz_drop_stream(ctx, file); + fz_catch(ctx) + fz_rethrow(ctx); + + return arch; +} + +fz_archive * +fz_keep_archive(fz_context *ctx, fz_archive *arch) +{ + return (fz_archive *)fz_keep_imp(ctx, arch, &arch->refs); +} + +void +fz_drop_archive(fz_context *ctx, fz_archive *arch) +{ + if (fz_drop_imp(ctx, arch, &arch->refs)) + { + if (arch->drop_archive) + arch->drop_archive(ctx, arch); + fz_drop_stream(ctx, arch->file); + fz_free(ctx, arch); + } +} + +/* In-memory archive using a fz_tree holding fz_buffers */ + +typedef struct +{ + fz_archive super; + fz_tree *tree; +} fz_tree_archive; + +static int has_tree_entry(fz_context *ctx, fz_archive *arch, const char *name) +{ + fz_tree *tree = ((fz_tree_archive*)arch)->tree; + fz_buffer *ent = fz_tree_lookup(ctx, tree, name); + return ent != NULL; +} + +static fz_buffer *read_tree_entry(fz_context *ctx, fz_archive *arch, const char *name) +{ + fz_tree *tree = ((fz_tree_archive*)arch)->tree; + fz_buffer *ent = fz_tree_lookup(ctx, tree, name); + return fz_keep_buffer(ctx, ent); +} + +static fz_stream *open_tree_entry(fz_context *ctx, fz_archive *arch, const char *name) +{ + fz_tree *tree = ((fz_tree_archive*)arch)->tree; + fz_buffer *ent = fz_tree_lookup(ctx, tree, name); + return fz_open_buffer(ctx, ent); +} + +static void drop_tree_archive_entry(fz_context *ctx, void *ent) +{ + fz_drop_buffer(ctx, ent); +} + +static void drop_tree_archive(fz_context *ctx, fz_archive *arch) +{ + fz_tree *tree = ((fz_tree_archive*)arch)->tree; + fz_drop_tree(ctx, tree, drop_tree_archive_entry); +} + +fz_archive * +fz_new_tree_archive(fz_context *ctx, fz_tree *tree) +{ + fz_tree_archive *arch; + + arch = fz_new_derived_archive(ctx, NULL, fz_tree_archive); + arch->super.format = "tree"; + arch->super.has_entry = has_tree_entry; + arch->super.read_entry = read_tree_entry; + arch->super.open_entry = open_tree_entry; + arch->super.drop_archive = drop_tree_archive; + arch->tree = tree; + + return &arch->super; +} + +void +fz_tree_archive_add_buffer(fz_context *ctx, fz_archive *arch_, const char *name, fz_buffer *buf) +{ + fz_tree_archive *arch = (fz_tree_archive *)arch_; + + if (arch == NULL || arch->super.has_entry != has_tree_entry) + fz_throw(ctx, FZ_ERROR_ARGUMENT, "cannot insert into a non-tree archive"); + + buf = fz_keep_buffer(ctx, buf); + + fz_try(ctx) + arch->tree = fz_tree_insert(ctx, arch->tree, name, buf); + fz_catch(ctx) + { + fz_drop_buffer(ctx, buf); + fz_rethrow(ctx); + } +} + +void +fz_tree_archive_add_data(fz_context *ctx, fz_archive *arch_, const char *name, const void *data, size_t size) +{ + fz_tree_archive *arch = (fz_tree_archive *)arch_; + fz_buffer *buf; + + if (arch == NULL || arch->super.has_entry != has_tree_entry) + fz_throw(ctx, FZ_ERROR_ARGUMENT, "cannot insert into a non-tree archive"); + + buf = fz_new_buffer_from_copied_data(ctx, data, size); + + fz_try(ctx) + arch->tree = fz_tree_insert(ctx, arch->tree, name, buf); + fz_catch(ctx) + { + fz_drop_buffer(ctx, buf); + fz_rethrow(ctx); + } +} + +typedef struct +{ + fz_archive *arch; + char *dir; +} multi_archive_entry; + +typedef struct +{ + fz_archive super; + int len; + int max; + multi_archive_entry *sub; +} fz_multi_archive; + +static int has_multi_entry(fz_context *ctx, fz_archive *arch_, const char *name) +{ + fz_multi_archive *arch = (fz_multi_archive *)arch_; + int i; + + for (i = arch->len-1; i >= 0; i--) + { + multi_archive_entry *e = &arch->sub[i]; + const char *subname = name; + if (e->dir) + { + size_t n = strlen(e->dir); + if (strncmp(e->dir, name, n) != 0) + continue; + subname += n; + } + if (fz_has_archive_entry(ctx, arch->sub[i].arch, subname)) + return 1; + } + return 0; +} + +static fz_buffer *read_multi_entry(fz_context *ctx, fz_archive *arch_, const char *name) +{ + fz_multi_archive *arch = (fz_multi_archive *)arch_; + int i; + fz_buffer *res = NULL; + + for (i = arch->len-1; i >= 0; i--) + { + multi_archive_entry *e = &arch->sub[i]; + const char *subname = name; + + if (e->dir) + { + size_t n = strlen(e->dir); + if (strncmp(e->dir, name, n) != 0) + continue; + subname += n; + } + + res = fz_try_read_archive_entry(ctx, arch->sub[i].arch, subname); + + if (res) + break; + } + + return res; +} + +static fz_stream *open_multi_entry(fz_context *ctx, fz_archive *arch_, const char *name) +{ + fz_multi_archive *arch = (fz_multi_archive *)arch_; + int i; + fz_stream *res = NULL; + + for (i = arch->len-1; i >= 0; i--) + { + multi_archive_entry *e = &arch->sub[i]; + const char *subname = name; + + if (e->dir) + { + size_t n = strlen(e->dir); + if (strncmp(e->dir, name, n) != 0) + continue; + subname += n; + } + + res = fz_open_archive_entry(ctx, arch->sub[i].arch, subname); + + if (res) + break; + } + + return res; +} + +static void drop_multi_archive(fz_context *ctx, fz_archive *arch_) +{ + fz_multi_archive *arch = (fz_multi_archive *)arch_; + int i; + + for (i = arch->len-1; i >= 0; i--) + { + multi_archive_entry *e = &arch->sub[i]; + fz_free(ctx, e->dir); + fz_drop_archive(ctx, e->arch); + } + fz_free(ctx, arch->sub); +} + +fz_archive * +fz_new_multi_archive(fz_context *ctx) +{ + fz_multi_archive *arch; + + arch = fz_new_derived_archive(ctx, NULL, fz_multi_archive); + arch->super.format = "multi"; + arch->super.has_entry = has_multi_entry; + arch->super.read_entry = read_multi_entry; + arch->super.open_entry = open_multi_entry; + arch->super.drop_archive = drop_multi_archive; + arch->max = 0; + arch->len = 0; + arch->sub = NULL; + + return &arch->super; +} + +void +fz_mount_multi_archive(fz_context *ctx, fz_archive *arch_, fz_archive *sub, const char *path) +{ + fz_multi_archive *arch = (fz_multi_archive *)arch_; + char *clean_path = NULL; + + if (arch->super.has_entry != has_multi_entry) + fz_throw(ctx, FZ_ERROR_ARGUMENT, "cannot mount within a non-multi archive"); + + if (arch->len == arch->max) + { + int n = arch->max ? arch->max * 2 : 8; + + arch->sub = fz_realloc(ctx, arch->sub, sizeof(*arch->sub) * n); + arch->max = n; + } + + /* If we have a path, then strip any trailing slashes, and add just one. */ + if (path) + { + clean_path = fz_cleanname_strdup(ctx, path); + if (clean_path[0] == '.' && clean_path[1] == 0) + { + fz_free(ctx, clean_path); + clean_path = NULL; + } + else + { + /* Do a strcat without doing a strcat to avoid the compiler + * complaining at us. We know that n here will be <= n above + * so this is safe. */ + size_t n = strlen(clean_path); + clean_path[n] = '/'; + clean_path[n + 1] = 0; + } + } + + arch->sub[arch->len].arch = fz_keep_archive(ctx, sub); + arch->sub[arch->len].dir = clean_path; + arch->len++; +} + +static const fz_archive_handler fz_zip_archive_handler = +{ + fz_is_zip_archive, + fz_open_zip_archive_with_stream +}; + +static const fz_archive_handler fz_tar_archive_handler = +{ + fz_is_tar_archive, + fz_open_tar_archive_with_stream +}; + +const fz_archive_handler fz_libarchive_archive_handler = +{ + fz_is_libarchive_archive, + fz_open_libarchive_archive_with_stream +}; + +const fz_archive_handler fz_cfb_archive_handler = +{ + fz_is_cfb_archive, + fz_open_cfb_archive_with_stream +}; + +void fz_new_archive_handler_context(fz_context *ctx) +{ + ctx->archive = fz_malloc_struct(ctx, fz_archive_handler_context); + ctx->archive->refs = 1; + + fz_register_archive_handler(ctx, &fz_zip_archive_handler); + fz_register_archive_handler(ctx, &fz_tar_archive_handler); +#ifdef HAVE_LIBARCHIVE + fz_register_archive_handler(ctx, &fz_libarchive_archive_handler); +#endif + fz_register_archive_handler(ctx, &fz_cfb_archive_handler); +} + +fz_archive_handler_context *fz_keep_archive_handler_context(fz_context *ctx) +{ + if (!ctx || !ctx->archive) + return NULL; + return fz_keep_imp(ctx, ctx->archive, &ctx->archive->refs); +} + +void fz_drop_archive_handler_context(fz_context *ctx) +{ + if (!ctx) + return; + + if (fz_drop_imp(ctx, ctx->archive, &ctx->archive->refs)) + { + fz_free(ctx, ctx->archive); + ctx->archive = NULL; + } +} + +void fz_register_archive_handler(fz_context *ctx, const fz_archive_handler *handler) +{ + fz_archive_handler_context *ac; + int i; + + if (!handler) + return; + + ac = ctx->archive; + if (ac == NULL) + fz_throw(ctx, FZ_ERROR_ARGUMENT, "archive handler list not found"); + + for (i = 0; i < ac->count; i++) + if (ac->handler[i] == handler) + return; + + if (ac->count >= FZ_ARCHIVE_HANDLER_MAX) + fz_throw(ctx, FZ_ERROR_LIMIT, "Too many archive handlers"); + + ac->handler[ac->count++] = handler; +}
