diff mupdf-source/source/fitz/unlibarchive.c @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mupdf-source/source/fitz/unlibarchive.c	Mon Sep 15 11:43:07 2025 +0200
@@ -0,0 +1,727 @@
+// Copyright (C) 2023-2025 Artifex Software, Inc.
+//
+// This file is part of MuPDF.
+//
+// MuPDF is free software: you can redistribute it and/or modify it under the
+// terms of the GNU Affero General Public License as published by the Free
+// Software Foundation, either version 3 of the License, or (at your option)
+// any later version.
+//
+// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
+// details.
+//
+// You should have received a copy of the GNU Affero General Public License
+// along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
+//
+// Alternative licensing terms are available from the licensor.
+// For commercial licensing, see <https://www.artifex.com/> or contact
+// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
+// CA 94129, USA, for further information.
+
+#include "mupdf/fitz.h"
+
+#ifdef HAVE_LIBARCHIVE
+
+#ifdef _WIN32
+#include "libarchive/archive.h"
+#include "libarchive/archive_entry.h"
+#else
+#include <archive.h>
+#include <archive_entry.h>
+#endif
+
+typedef struct
+{
+	size_t len;
+	uint8_t name[32];
+} entry_t;
+
+typedef struct
+{
+	fz_archive super;
+
+	struct archive *archive;
+
+	int current_entry_idx;
+
+	int entries_max;
+	int entries_len;
+	entry_t **entries;
+
+	fz_context *ctx; /* safe! */
+	uint8_t block[4096];
+} fz_libarchive_archive;
+
+static la_ssize_t
+libarchive_read(struct archive *a, void *client_data, const void **buf)
+{
+	fz_libarchive_archive *arch = (fz_libarchive_archive *)client_data;
+	size_t z;
+	uint8_t *p;
+	size_t left;
+	fz_context *ctx = arch->ctx;
+	la_ssize_t ret = 0;
+
+	fz_try(ctx)
+	{
+		z = fz_available(arch->ctx, arch->super.file, 1024);
+
+		/* If we're at the EOF, can't read anything! */
+		if (z == 0)
+			break;
+
+		/* If we have at least 1K, then just return the pointer to that
+		 * directly. */
+		if (z >= 1024)
+		{
+			*buf = arch->super.file->rp;
+			arch->super.file->rp += z;
+			ret = (la_ssize_t)z;
+			break;
+		}
+
+		/* If not, let's pull a large enough lump out. */
+
+		left = sizeof(arch->block);
+		p = arch->block;
+		do
+		{
+			memcpy(p, arch->super.file->rp, z);
+			p += z;
+			arch->super.file->rp += z;
+			left -= z;
+			if (left)
+			{
+				z = fz_available(arch->ctx, arch->super.file, left);
+				if (z > left)
+					z = left;
+				if (z == 0)
+					break;
+			}
+		}
+		while (left != 0);
+
+		ret = p - arch->block;
+		*buf = arch->block;
+	}
+	fz_catch(ctx)
+	{
+		/* Ignore error */
+		archive_set_error(a, ARCHIVE_FATAL, "%s", fz_convert_error(ctx, NULL));
+		return -1;
+	}
+
+	return ret;
+}
+
+static la_int64_t
+libarchive_skip(struct archive *a, void *client_data, la_int64_t skip)
+{
+	fz_libarchive_archive *arch = (fz_libarchive_archive *)client_data;
+	int64_t pos;
+	fz_context *ctx = arch->ctx;
+
+	fz_try(ctx)
+	{
+		pos = fz_tell(arch->ctx, arch->super.file);
+		fz_seek(arch->ctx, arch->super.file, pos + skip, SEEK_SET);
+		pos = fz_tell(arch->ctx, arch->super.file) - pos;
+	}
+	fz_catch(ctx)
+	{
+		/* Ignore error */
+		archive_set_error(a, ARCHIVE_FATAL, "%s", fz_convert_error(ctx, NULL));
+		return -1;
+	}
+
+	return pos;
+}
+
+static la_int64_t
+libarchive_seek(struct archive *a, void *client_data, la_int64_t offset, int whence)
+{
+	fz_libarchive_archive *arch = (fz_libarchive_archive *)client_data;
+	fz_context *ctx = arch->ctx;
+	int64_t pos;
+
+	fz_try(ctx)
+	{
+		fz_seek(arch->ctx, arch->super.file, offset, whence);
+		pos = fz_tell(arch->ctx, arch->super.file);
+	}
+	fz_catch(ctx)
+	{
+		/* Ignore error */
+		archive_set_error(a, ARCHIVE_FATAL, "%s", fz_convert_error(ctx, NULL));
+		return -1;
+	}
+
+	return pos;
+}
+
+static int
+libarchive_close(struct archive *a, void *client_data)
+{
+	/* Nothing to do. Stream is dropped when the fz_archive is closed. */
+	return ARCHIVE_OK;
+}
+
+static int
+libarchive_open(fz_context *ctx, fz_libarchive_archive *arch)
+{
+	int r;
+
+	arch->archive = archive_read_new();
+	archive_read_support_filter_all(arch->archive);
+	archive_read_support_format_all(arch->archive);
+
+	arch->ctx = ctx;
+	r = archive_read_set_seek_callback(arch->archive, libarchive_seek);
+	if (r == ARCHIVE_OK)
+		r = archive_read_open2(arch->archive, arch, NULL, libarchive_read, libarchive_skip, libarchive_close);
+	arch->ctx = NULL;
+	if (r != ARCHIVE_OK)
+	{
+		archive_read_free(arch->archive);
+		arch->archive = NULL;
+	}
+
+	return r != ARCHIVE_OK;
+}
+
+static void
+libarchive_reset(fz_context *ctx, fz_libarchive_archive *arch)
+{
+	if (arch->archive)
+	{
+		archive_read_free(arch->archive);
+		arch->archive = NULL;
+	}
+	fz_seek(ctx, arch->super.file, 0, SEEK_SET);
+	if (libarchive_open(ctx, arch))
+		fz_throw(ctx, FZ_ERROR_LIBRARY, "Failed to restart archive traversal!");
+
+	arch->current_entry_idx = 0;
+}
+
+static void
+drop_libarchive_archive(fz_context *ctx, fz_archive *arch_)
+{
+	fz_libarchive_archive *arch = (fz_libarchive_archive *)arch_;
+	int i;
+
+	archive_read_free(arch->archive);
+	for (i = 0; i < arch->entries_len; ++i)
+		fz_free(ctx, arch->entries[i]);
+	fz_free(ctx, arch->entries);
+	arch->archive = NULL;
+}
+
+int
+fz_is_libarchive_archive(fz_context *ctx, fz_stream *file)
+{
+	fz_libarchive_archive arch;
+	struct archive_entry *entry;
+	int ret;
+
+	arch.super.file = file;
+	fz_seek(ctx, file, 0, SEEK_SET);
+
+	/* Annoyingly, libarchive can say "sure, I can open this" only to
+	 * then fail when we try to read from it. We therefore need to
+	 * try to read at least 1 entry out to be sure. */
+	ret = libarchive_open(ctx, &arch);
+	if (ret == ARCHIVE_OK)
+	{
+		fz_var(ret);
+
+		fz_try(ctx)
+		{
+			arch.ctx = ctx; /* safe */
+			ret = archive_read_next_header(arch.archive, &entry);
+		}
+		fz_catch(ctx)
+		{
+			archive_read_free(arch.archive);
+			fz_rethrow(ctx);
+		}
+	}
+
+	archive_read_free(arch.archive);
+
+	/* Do NOT return true if we get ARCHIVE_EOF. We will fail to recognise empty
+	 * archives, but the alternative is false positives. */
+	return ret == ARCHIVE_OK;
+}
+
+static int
+lookup_archive_entry(fz_context *ctx, fz_libarchive_archive *arch, const char *name)
+{
+	int idx;
+
+	for (idx = 0; idx < arch->entries_len; idx++)
+	{
+		if (!strcmp(name, (const char *)arch->entries[idx]->name))
+			return idx;
+	}
+
+	return -1;
+}
+
+static int has_libarchive_entry(fz_context *ctx, fz_archive *arch_, const char *name)
+{
+	fz_libarchive_archive *arch = (fz_libarchive_archive *)arch_;
+	return lookup_archive_entry(ctx, arch, name) != -1;
+}
+
+static const char *list_libarchive_entry(fz_context *ctx, fz_archive *arch_, int idx)
+{
+	fz_libarchive_archive *arch = (fz_libarchive_archive *)arch_;
+	if (idx < 0 || idx >= arch->entries_len)
+		return NULL;
+	return (const char *)arch->entries[idx]->name;
+}
+
+static int count_libarchive_entries(fz_context *ctx, fz_archive *arch_)
+{
+	fz_libarchive_archive *arch = (fz_libarchive_archive *)arch_;
+	return arch->entries_len;
+}
+
+static fz_buffer *
+read_libarchive_entry(fz_context *ctx, fz_archive *arch_, const char *name)
+{
+	fz_libarchive_archive *arch = (fz_libarchive_archive *)arch_;
+	fz_buffer *ubuf = NULL;
+	int idx;
+	struct archive_entry *entry;
+	la_ssize_t ret;
+	size_t size;
+
+	idx = lookup_archive_entry(ctx, arch, name);
+	if (idx < 0)
+		return NULL;
+
+	if (arch->current_entry_idx > idx)
+		libarchive_reset(ctx, arch);
+
+	fz_var(ubuf);
+
+	arch->ctx = ctx;
+	fz_try(ctx)
+	{
+		while (arch->current_entry_idx < idx)
+		{
+			int r = archive_read_next_header(arch->archive, &entry);
+			if (r == ARCHIVE_OK)
+				r = archive_read_data_skip(arch->archive);
+			if (r != ARCHIVE_OK)
+				fz_throw(ctx, FZ_ERROR_LIBRARY, "Failed to skip over archive entry");
+			arch->current_entry_idx++;
+		}
+
+		/* This is the one we want. */
+		if (archive_read_next_header(arch->archive, &entry) != ARCHIVE_OK)
+			fz_throw(ctx, FZ_ERROR_LIBRARY, "Failed to read archive entry header");
+
+		arch->current_entry_idx++;
+		size = arch->entries[idx]->len;
+		ubuf = fz_new_buffer(ctx, size);
+		ubuf->len = size;
+
+		ret = archive_read_data(arch->archive, ubuf->data, size);
+		if (ret < 0)
+			fz_throw(ctx, FZ_ERROR_LIBRARY, "Failed to read archive data");
+		if ((size_t)ret != size)
+			fz_warn(ctx, "Premature end of data reading archive entry data (%zu vs %zu)", (size_t)ubuf->len, (size_t)size);
+	}
+	fz_always(ctx)
+		arch->ctx = NULL;
+	fz_catch(ctx)
+	{
+		fz_drop_buffer(ctx, ubuf);
+		fz_rethrow(ctx);
+	}
+
+	return ubuf;
+}
+
+static fz_stream *
+open_libarchive_entry(fz_context *ctx, fz_archive *arch_, const char *name)
+{
+	fz_buffer *buf = read_libarchive_entry(ctx, arch_, name);
+	fz_stream *stm = NULL;
+
+	fz_try(ctx)
+		stm = fz_open_buffer(ctx, buf);
+	fz_always(ctx)
+		fz_drop_buffer(ctx, buf);
+	fz_catch(ctx)
+		fz_rethrow(ctx);
+
+	return stm;
+}
+
+fz_archive *
+fz_open_libarchive_archive_with_stream(fz_context *ctx, fz_stream *file)
+{
+	fz_libarchive_archive *arch = fz_new_derived_archive(ctx, file, fz_libarchive_archive);
+	int r;
+	const char *path = NULL;
+	char *free_path = NULL;
+
+	fz_seek(ctx, file, 0, SEEK_SET);
+
+	if (libarchive_open(ctx, arch) != ARCHIVE_OK)
+	{
+		fz_drop_archive(ctx, &arch->super);
+		fz_throw(ctx, FZ_ERROR_LIBRARY, "cannot recognize libarchive archive");
+	}
+
+	arch->super.format = "libarchive";
+	arch->super.count_entries = count_libarchive_entries;
+	arch->super.list_entry = list_libarchive_entry;
+	arch->super.has_entry = has_libarchive_entry;
+	arch->super.read_entry = read_libarchive_entry;
+	arch->super.open_entry = open_libarchive_entry;
+	arch->super.drop_archive = drop_libarchive_archive;
+
+	fz_var(free_path);
+
+	fz_try(ctx)
+	{
+		arch->ctx = ctx;
+		/* Count the archive entries */
+		do
+		{
+			struct archive_entry *entry;
+			size_t z;
+
+			r = archive_read_next_header(arch->archive, &entry);
+			if (r == ARCHIVE_EOF)
+				break;
+
+			if (r != ARCHIVE_OK)
+				fz_throw(ctx, FZ_ERROR_LIBRARY, "Corrupt archive");
+
+			free_path = NULL;
+			path = archive_entry_pathname_utf8(entry);
+			if (!path)
+			{
+				path = free_path = fz_utf8_from_wchar(ctx, archive_entry_pathname_w(entry));
+			}
+			if (!path)
+				continue;
+
+			if (arch->entries_len == arch->entries_max)
+			{
+				int new_max = arch->entries_max * 2;
+				if (new_max == 0)
+					new_max = 32;
+
+				arch->entries = fz_realloc(ctx, arch->entries, sizeof(arch->entries[0]) * new_max);
+				arch->entries_max = new_max;
+			}
+
+			z = strlen(path);
+			arch->entries[arch->entries_len] = fz_malloc(ctx, sizeof(entry_t) - 32 + z + 1);
+			memcpy(&arch->entries[arch->entries_len]->name[0], path, z+1);
+			if (free_path)
+			{
+				fz_free(ctx, free_path);
+				free_path = NULL;
+			}
+			arch->entries[arch->entries_len]->len = archive_entry_size(entry);
+
+			arch->entries_len++;
+		}
+		while (r != ARCHIVE_EOF && r != ARCHIVE_FATAL);
+
+		libarchive_reset(ctx, arch);
+	}
+	fz_always(ctx)
+	{
+		if (free_path)
+			fz_free(ctx, free_path);
+	}
+	fz_catch(ctx)
+	{
+		arch->ctx = NULL;
+		fz_drop_archive(ctx, &arch->super);
+		fz_rethrow(ctx);
+	}
+
+	return &arch->super;
+}
+
+fz_archive *
+fz_open_libarchive_archive(fz_context *ctx, const char *filename)
+{
+	fz_archive *tar = NULL;
+	fz_stream *file;
+
+	file = fz_open_file(ctx, filename);
+
+	fz_try(ctx)
+		tar = fz_open_libarchive_archive_with_stream(ctx, file);
+	fz_always(ctx)
+		fz_drop_stream(ctx, file);
+	fz_catch(ctx)
+		fz_rethrow(ctx);
+
+	return tar;
+}
+
+
+/* Universal decomp stream */
+
+typedef struct
+{
+	fz_stream *chain;
+	fz_context *ctx; /* Safe as not persistent. */
+	struct archive *archive;
+	struct archive_entry *entry;
+	uint8_t block[4096];
+} fz_libarchived_state;
+
+static la_ssize_t
+libarchived_read(struct archive *a, void *client_data, const void **buf)
+{
+	fz_libarchived_state *state = (fz_libarchived_state *)client_data;
+	size_t z;
+	uint8_t *p;
+	size_t left;
+	fz_context *ctx = state->ctx;
+	la_ssize_t ret = 0;
+
+	fz_try(ctx)
+	{
+		z = fz_available(ctx, state->chain, 1024);
+
+		/* If we're at the EOF, can't read anything! */
+		if (z == 0)
+			break;
+
+		/* If we have at least 1K, then just return the pointer to that
+		 * directly. */
+		if (z >= 1024)
+		{
+			*buf = state->chain->rp;
+			state->chain->rp += z;
+			ret = (la_ssize_t)z;
+			break;
+		}
+
+		/* If not, let's pull a large enough lump out. */
+
+		left = sizeof(state->block);
+		p = state->block;
+		do
+		{
+			memcpy(p, state->chain->rp, z);
+			p += z;
+			state->chain->rp += z;
+			left -= z;
+			if (left)
+			{
+				z = fz_available(ctx, state->chain, left);
+				if (z > left)
+					z = left;
+				if (z == 0)
+					break;
+			}
+		}
+		while (left != 0);
+
+		ret = p - state->block;
+		*buf = state->block;
+	}
+	fz_catch(ctx)
+	{
+		/* Ignore error */
+		archive_set_error(a, ARCHIVE_FATAL, "%s", fz_convert_error(ctx, NULL));
+		return -1;
+	}
+
+	return ret;
+}
+
+static la_int64_t
+libarchived_skip(struct archive *a, void *client_data, la_int64_t skip)
+{
+	fz_libarchived_state *state = (fz_libarchived_state *)client_data;
+	int64_t pos;
+	fz_context *ctx = state->ctx;
+
+	fz_try(ctx)
+	{
+		pos = fz_tell(state->ctx, state->chain);
+		fz_seek(state->ctx, state->chain, pos + skip, SEEK_SET);
+		pos = fz_tell(state->ctx, state->chain) - pos;
+	}
+	fz_catch(ctx)
+	{
+		/* Ignore error */
+		archive_set_error(a, ARCHIVE_FATAL, "%s", fz_convert_error(ctx, NULL));
+		return -1;
+	}
+
+	return pos;
+}
+
+static la_int64_t
+libarchived_seek(struct archive *a, void *client_data, la_int64_t offset, int whence)
+{
+	fz_libarchived_state *state = (fz_libarchived_state *)client_data;
+	fz_context *ctx = state->ctx;
+	int64_t pos;
+
+	fz_try(ctx)
+	{
+		fz_seek(ctx, state->chain, offset, whence);
+		pos = fz_tell(ctx, state->chain);
+	}
+	fz_catch(ctx)
+	{
+		/* Ignore error */
+		archive_set_error(a, ARCHIVE_FATAL, "%s", fz_convert_error(ctx, NULL));
+		return -1;
+	}
+
+	return pos;
+}
+
+static int
+libarchived_close(struct archive *a, void *client_data)
+{
+	/* Nothing to do. Stream is dropped when the fz_stream is dropped. */
+	return ARCHIVE_OK;
+}
+
+static int
+next_libarchived(fz_context *ctx, fz_stream *stm, size_t required)
+{
+	fz_libarchived_state *state = stm->state;
+	la_ssize_t z;
+
+	if (stm->eof)
+		return EOF;
+
+	z = archive_read_data(state->archive, state->block, sizeof(state->block));
+	if (z < 0)
+		fz_throw(ctx, FZ_ERROR_LIBRARY, "Failed to read compressed data");
+	if (z == 0)
+	{
+		stm->eof = 1;
+		return EOF;
+	}
+
+	stm->rp = state->block;
+	stm->wp = state->block + z;
+
+	return *stm->rp++;
+}
+
+static void
+close_libarchived(fz_context *ctx, void *state_)
+{
+	fz_libarchived_state *state = (fz_libarchived_state *)state_;
+	int code;
+
+	state->ctx = ctx;
+	code = archive_read_free(state->archive);
+	state->ctx = NULL;
+	if (code != ARCHIVE_OK)
+		fz_warn(ctx, "libarchive error: archive_read_free: %d", code);
+
+	fz_drop_stream(ctx, state->chain);
+	fz_free(ctx, state);
+}
+
+fz_stream *
+fz_open_libarchived(fz_context *ctx, fz_stream *chain)
+{
+	fz_libarchived_state *state;
+	int r;
+
+	state = fz_malloc_struct(ctx, fz_libarchived_state);
+
+	state->chain = fz_keep_stream(ctx, chain);
+	state->archive = archive_read_new();
+	archive_read_support_filter_all(state->archive);
+	archive_read_support_format_raw(state->archive);
+
+	state->ctx = ctx;
+	r = archive_read_set_seek_callback(state->archive, libarchived_seek);
+	if (r == ARCHIVE_OK)
+		r = archive_read_open2(state->archive, state, NULL, libarchived_read, libarchived_skip, libarchived_close);
+	if (r != ARCHIVE_OK)
+	{
+		archive_read_free(state->archive);
+		state->ctx = NULL;
+		fz_drop_stream(ctx, state->chain);
+		fz_free(ctx, state);
+		fz_throw(ctx, FZ_ERROR_LIBRARY, "Failed to open archive");
+	}
+
+	r = archive_filter_code(state->archive, 0);
+	if (r == ARCHIVE_FILTER_NONE)
+	{
+		archive_read_free(state->archive);
+		state->ctx = NULL;
+		fz_drop_stream(ctx, state->chain);
+		fz_free(ctx, state);
+		fz_throw(ctx, FZ_ERROR_LIBRARY, "Failed to open archive");
+	}
+
+	/* This is the one we want. */
+	r = archive_read_next_header(state->archive, &state->entry);
+	if (r != ARCHIVE_OK)
+	{
+		archive_read_free(state->archive);
+		state->ctx = NULL;
+		fz_drop_stream(ctx, state->chain);
+		fz_free(ctx, state);
+		fz_throw(ctx, FZ_ERROR_LIBRARY, "Failed to open archive");
+	}
+
+	return fz_new_stream(ctx, state, next_libarchived, close_libarchived);
+}
+
+#else
+
+int
+fz_is_libarchive_archive(fz_context *ctx, fz_stream *file)
+{
+	static int warned = 0;
+
+	if (!warned)
+	{
+		warned = 1;
+		fz_warn(ctx, "libarchive support not included");
+	}
+
+	return 0;
+}
+
+fz_archive *
+fz_open_libarchive_archive_with_stream(fz_context *ctx, fz_stream *file)
+{
+	fz_throw(ctx, FZ_ERROR_UNSUPPORTED, "libarchive support not included");
+}
+
+fz_archive *
+fz_open_libarchive_archive(fz_context *ctx, const char *filename)
+{
+	fz_throw(ctx, FZ_ERROR_UNSUPPORTED, "libarchive support not included");
+}
+
+fz_stream *
+fz_open_libarchived(fz_context *ctx, fz_stream *chain)
+{
+	fz_throw(ctx, FZ_ERROR_UNSUPPORTED, "libarchive support not included");
+}
+
+#endif