diff mupdf-source/thirdparty/extract/src/zip.c @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mupdf-source/thirdparty/extract/src/zip.c	Mon Sep 15 11:43:07 2025 +0200
@@ -0,0 +1,496 @@
+#include "extract/alloc.h"
+
+#include "mem.h"
+#include "outf.h"
+#include "zip.h"
+
+#include <zlib.h>
+
+#include <assert.h>
+#include <errno.h>
+#include <limits.h>
+#include <time.h>
+
+#include "compat_stdint.h"
+
+
+typedef struct
+{
+	int16_t  mtime;
+	int16_t  mdate;
+	int32_t  crc_sum;
+	int32_t  size_compressed;
+	int32_t  size_uncompressed;
+	char    *name;
+	uint32_t offset;
+	uint16_t attr_internal;
+	uint32_t attr_external;
+} extract_zip_cd_file_t;
+
+struct extract_zip_t
+{
+	extract_buffer_t      *buffer;
+	extract_zip_cd_file_t *cd_files;
+	int                    cd_files_num;
+
+	/* errno_ is set to non-zero if any operation fails; avoids need to check
+	after every small output operation. */
+	int                    errno_;
+	int                    eof;
+	uint16_t               compression_method;
+	int                    compress_level;
+
+	/* Defaults for various values in zip file headers etc. */
+	uint16_t               mtime;
+	uint16_t               mdate;
+	uint16_t               version_creator;
+	uint16_t               version_extract;
+	uint16_t               general_purpose_bit_flag;
+	uint16_t               file_attr_internal;
+	uint32_t               file_attr_external;
+	char                  *archive_comment;
+};
+
+int extract_zip_open(extract_buffer_t *buffer, extract_zip_t **o_zip)
+{
+	int              e = -1;
+	extract_zip_t   *zip;
+	extract_alloc_t *alloc = extract_buffer_alloc(buffer);
+
+	if (extract_malloc(alloc, &zip, sizeof(*zip))) goto end;
+
+	zip->cd_files = NULL;
+	zip->cd_files_num = 0;
+	zip->buffer = buffer;
+	zip->errno_ = 0;
+	zip->eof = 0;
+	zip->compression_method = Z_DEFLATED;
+	zip->compress_level = Z_DEFAULT_COMPRESSION;
+
+	/* We could maybe convert current date/time to the ms-dos format required
+	here, but using zeros doesn't seem to make a difference to Word etc. */
+
+	{
+		time_t     t = time(NULL);
+		struct tm *tm;
+		#ifdef _POSIX_SOURCE
+			struct tm   tm_local;
+			tm = gmtime_r(&t, &tm_local);
+		#else
+			tm = gmtime(&t);
+		#endif
+		if (tm)
+		{
+			/* mdate and mtime are in MS DOS format:
+				mtime:
+					bits 0-4: seconds / 2.
+					bits 5-10: minute (0-59).
+					bits 11-15: hour (0-23).
+				mdate:
+					bits 0-4: day of month (1-31).
+					bits 5-8: month (1=jan, 2=feb, etc).
+					bits 9-15: year - 1980.
+			*/
+			zip->mtime = (uint16_t) ((tm->tm_hour << 11) | (tm->tm_min << 5) | (tm->tm_sec / 2));
+			zip->mdate = (uint16_t) (((1900 + tm->tm_year - 1980) << 9) | ((tm->tm_mon + 1) << 5) | tm->tm_mday);
+		}
+		else
+		{
+			outf0("*** gmtime_r() failed");
+			zip->mtime = 0;
+			zip->mdate = 0;
+		}
+	}
+
+	/* These are all copied from command-line zip on unix. */
+	zip->version_creator = (0x3 << 8) + 30; /* 0x3 is unix, 30 means 3.0. */
+	zip->version_extract = 10;              /* 10 means 1.0. */
+	zip->general_purpose_bit_flag = 0;
+	zip->file_attr_internal = 0;
+
+	/* We follow command-line zip which uses 0x81a40000 which is octal
+	0100644:0.  (0100644 is S_IFREG (regular file) plus rw-r-r. See stat(2) for
+	details.) */
+	zip->file_attr_external = (0100644 << 16) + 0;
+	if (extract_strdup(alloc, "Artifex", &zip->archive_comment)) goto end;
+
+	e = 0;
+end:
+
+	if (e) {
+		if (zip) extract_free(alloc, &zip->archive_comment);
+		extract_free(alloc, &zip);
+		*o_zip = NULL;
+	}
+	else {
+		*o_zip = zip;
+	}
+
+	return e;
+}
+
+static int s_native_little_endinesss(void)
+{
+	static const char   a[] = { 1, 2};
+	uint16_t b = *(uint16_t*) a;
+	if (b == 1 + 2*256) {
+		/* Native little-endiness. */
+		return 1;
+	}
+	else if (b == 2 + 1*256) {
+		/* Native big-endiness. */
+		return 0;
+	}
+	/* Would like to call abort() here, but that breaks on AIX/gcc. */
+	assert(0);
+	return 0;
+}
+
+
+/* Allocation fns for zlib. */
+
+static void *s_zalloc(void *opaque, unsigned items, unsigned size)
+{
+	extract_zip_t   *zip = opaque;
+	extract_alloc_t *alloc = extract_buffer_alloc(zip->buffer);
+	void            *ptr;
+
+	if (extract_malloc(alloc, &ptr, items*size)) return NULL;
+
+	return ptr;
+}
+
+static void s_zfree(void *opaque, void *ptr)
+{
+	extract_zip_t   *zip = opaque;
+	extract_alloc_t *alloc = extract_buffer_alloc(zip->buffer);
+
+	extract_free(alloc, &ptr);
+}
+
+
+/* Uses zlib to write raw deflate compressed data to zip->buffer. */
+static int
+s_write_compressed(
+		extract_zip_t *zip,
+		const void    *data,
+		size_t         data_length,
+		size_t        *o_compressed_length)
+{
+	int      ze;
+	z_stream zstream = {0};  /* Initialise to keep Coverity quiet. */
+
+	if (zip->errno_)    return -1;
+	if (zip->eof)       return +1;
+
+	zstream.zalloc = s_zalloc;
+	zstream.zfree = s_zfree;
+	zstream.opaque = zip;
+
+	/* We need to write raw deflate data, so we use deflateInit2() with -ve
+	windowBits. The values we use are deflateInit()'s defaults. */
+	ze = deflateInit2(&zstream,
+			zip->compress_level,
+			Z_DEFLATED,
+			-15 /*windowBits*/,
+			8 /*memLevel*/,
+			Z_DEFAULT_STRATEGY);
+	if (ze != Z_OK)
+	{
+		errno = (ze == Z_MEM_ERROR) ? ENOMEM : EINVAL;
+		zip->errno_ = errno;
+		outf("deflateInit2() failed ze=%i", ze);
+		return -1;
+	}
+
+	/* Set zstream to read from specified data. */
+	zstream.next_in = (void*) data;
+	zstream.avail_in = (unsigned) data_length;
+
+	/* We increment *o_compressed_length gradually so that if we return an
+	error, we still indicate how many butes of compressed data have been
+	written. */
+	if (o_compressed_length)
+	{
+		*o_compressed_length = 0;
+	}
+
+	for(;;)
+	{
+		/* todo: write an extract_buffer_cache() function so we can write
+		directly into output buffer if it has a fn_cache. */
+		unsigned char   buffer[1024];
+		zstream.next_out = &buffer[0];
+		zstream.avail_out = sizeof(buffer);
+		ze = deflate(&zstream, zstream.avail_in ? Z_NO_FLUSH : Z_FINISH);
+		if (ze != Z_STREAM_END && ze != Z_OK)
+		{
+			outf("deflate() failed ze=%i", ze);
+			errno = EIO;
+			zip->errno_ = errno;
+			return -1;
+		}
+		{
+			/* Send the new compressed data to buffer. */
+			size_t  bytes_written;
+			int e = extract_buffer_write(zip->buffer, buffer, zstream.next_out - buffer, &bytes_written);
+			if (o_compressed_length)
+			{
+				*o_compressed_length += bytes_written;
+			}
+			if (e)
+			{
+				if (e == -1)    zip->errno_ = errno;
+				if (e ==  +1)   zip->eof = 1;
+				outf("extract_buffer_write() failed e=%i errno=%i", e, errno);
+				return e;
+			}
+		}
+		if (ze == Z_STREAM_END)
+		{
+			break;
+		}
+	}
+	ze = deflateEnd(&zstream);
+	if (ze != Z_OK)
+	{
+		outf("deflateEnd() failed ze=%i", ze);
+		errno = EIO;
+		zip->errno_ = errno;
+		return -1;
+	}
+	if (o_compressed_length)
+	{
+		assert(*o_compressed_length == (size_t) zstream.total_out);
+	}
+
+	return 0;
+}
+
+/* Writes uncompressed data to zip->buffer. */
+static int s_write(extract_zip_t *zip, const void *data, size_t data_length)
+{
+	size_t actual;
+	int e;
+
+	if (zip->errno_)    return -1;
+	if (zip->eof)       return +1;
+
+	e = extract_buffer_write(zip->buffer, data, data_length, &actual);
+	if (e == -1)    zip->errno_ = errno;
+	if (e == +1)    zip->eof = 1;
+
+	return e;
+}
+
+static int s_write_uint32(extract_zip_t *zip, uint32_t value)
+{
+	if (s_native_little_endinesss()) {
+		return s_write(zip, &value, sizeof(value));
+	}
+	else {
+		unsigned char value2[4] = {
+				(unsigned char) (value >> 0),
+				(unsigned char) (value >> 8),
+				(unsigned char) (value >> 16),
+				(unsigned char) (value >> 24)
+				};
+		return s_write(zip, &value2, sizeof(value2));
+	}
+}
+
+static int s_write_uint16(extract_zip_t *zip, uint16_t value)
+{
+	if (s_native_little_endinesss()) {
+		return s_write(zip, &value, sizeof(value));
+	}
+	else {
+		unsigned char value2[2] = {
+				(unsigned char) (value >> 0),
+				(unsigned char) (value >> 8)
+				};
+		return s_write(zip, &value2, sizeof(value2));
+	}
+}
+
+static int s_write_string(extract_zip_t *zip, const char *text)
+{
+	return s_write(zip, text, strlen(text));
+}
+
+
+int extract_zip_write_file(
+		extract_zip_t *zip,
+		const void    *data,
+		size_t         data_length,
+		const char    *name)
+{
+	int                    e = -1;
+	extract_zip_cd_file_t *cd_file = NULL;
+	extract_alloc_t       *alloc = extract_buffer_alloc(zip->buffer);
+
+	if (data_length > INT_MAX) {
+		assert(0);
+		errno = EINVAL;
+		return -1;
+	}
+	/* Create central directory file header for later. */
+	if (extract_realloc2(
+			alloc,
+			&zip->cd_files,
+			sizeof(extract_zip_cd_file_t) * zip->cd_files_num,
+			sizeof(extract_zip_cd_file_t) * (zip->cd_files_num+1)
+			)) goto end;
+	cd_file = &zip->cd_files[zip->cd_files_num];
+	cd_file->name = NULL;
+
+	cd_file->mtime = zip->mtime;
+	cd_file->mdate = zip->mdate;
+	cd_file->crc_sum = (int32_t) crc32(crc32(0, NULL, 0), data, (int) data_length);
+	cd_file->size_uncompressed = (int) data_length;
+	if (zip->compression_method == 0)
+	{
+		cd_file->size_compressed = cd_file->size_uncompressed;
+	}
+	if (extract_strdup(alloc, name, &cd_file->name)) goto end;
+	cd_file->offset = (int) extract_buffer_pos(zip->buffer);
+	cd_file->attr_internal = zip->file_attr_internal;
+	cd_file->attr_external = zip->file_attr_external;
+	if (!cd_file->name) goto end;
+
+	/* Write local file header. If we are using compression, we set bit 3 of
+	General purpose bit flag and write zeros for crc-32, compressed size and
+	uncompressed size; then we write the actual values in data descriptor after
+	the compressed data. */
+	{
+		const char extra_local[] = "";  /* Modify for testing. */
+		uint16_t general_purpose_bit_flag = zip->general_purpose_bit_flag;
+		if (zip->compression_method)    general_purpose_bit_flag |= 8;
+		s_write_uint32(zip, 0x04034b50);
+		s_write_uint16(zip, zip->version_extract);          /* Version needed to extract (minimum). */
+		s_write_uint16(zip, general_purpose_bit_flag);      /* General purpose bit flag */
+		s_write_uint16(zip, zip->compression_method);       /* Compression method */
+		s_write_uint16(zip, cd_file->mtime);                /* File last modification time */
+		s_write_uint16(zip, cd_file->mdate);                /* File last modification date */
+		if (zip->compression_method)
+		{
+			s_write_uint32(zip, 0);                         /* CRC-32 of uncompressed data */
+			s_write_uint32(zip, 0);                         /* Compressed size */
+			s_write_uint32(zip, 0);                         /* Uncompressed size */
+		}
+		else
+		{
+			s_write_uint32(zip, cd_file->crc_sum);          /* CRC-32 of uncompressed data */
+			s_write_uint32(zip, cd_file->size_compressed);  /* Compressed size */
+			s_write_uint32(zip, cd_file->size_uncompressed);/* Uncompressed size */
+		}
+		s_write_uint16(zip, (uint16_t) strlen(name));       /* File name length (n) */
+		s_write_uint16(zip, sizeof(extra_local)-1);         /* Extra field length (m) */
+		s_write_string(zip, cd_file->name);                 /* File name */
+		s_write(zip, extra_local, sizeof(extra_local)-1);   /* Extra field */
+	}
+
+	if (zip->compression_method)
+	{
+		/* Write compressed data. */
+		size_t  data_length_compressed;
+		s_write_compressed(zip, data, data_length, &data_length_compressed);
+		cd_file->size_compressed = (int) data_length_compressed;
+
+		/* Write data descriptor. */
+		s_write_uint32(zip, 0x08074b50);                    /* Data descriptor signature */
+		s_write_uint32(zip, cd_file->crc_sum);              /* CRC-32 of uncompressed data */
+		s_write_uint32(zip, cd_file->size_compressed);      /* Compressed size */
+		s_write_uint32(zip, cd_file->size_uncompressed);    /* Uncompressed size */
+	}
+	else
+	{
+		s_write(zip, data, data_length);
+	}
+
+	if (zip->errno_)    e = -1;
+	else if (zip->eof)  e = +1;
+	else e = 0;
+
+
+end:
+
+	if (e) {
+		/* Leave zip->cd_files_num unchanged, so calling extract_zip_close()
+		will write out any earlier files. Free cd_file->name to avoid leak. */
+		if (cd_file) extract_free(alloc, &cd_file->name);
+	}
+	else {
+		/* cd_files[zip->cd_files_num] is valid. */
+		zip->cd_files_num += 1;
+	}
+
+	return e;
+}
+
+int extract_zip_close(extract_zip_t **pzip)
+{
+	int              e = -1;
+	size_t           pos;
+	size_t           len;
+	int              i;
+	extract_zip_t   *zip = *pzip;
+	extract_alloc_t *alloc;
+
+	if (!zip) {
+		return 0;
+	}
+	alloc = extract_buffer_alloc(zip->buffer);
+	pos = extract_buffer_pos(zip->buffer);
+	len = 0;
+
+	/* Write Central directory file headers, freeing data as we go. */
+	for (i=0; i<zip->cd_files_num; ++i) {
+		const char extra[] = "";
+		size_t pos2 = extract_buffer_pos(zip->buffer);
+		extract_zip_cd_file_t* cd_file = &zip->cd_files[i];
+		s_write_uint32(zip, 0x02014b50);
+		s_write_uint16(zip, zip->version_creator);              /* Version made by, copied from command-line zip. */
+		s_write_uint16(zip, zip->version_extract);              /* Version needed to extract (minimum). */
+		s_write_uint16(zip, zip->general_purpose_bit_flag);     /* General purpose bit flag */
+		s_write_uint16(zip, zip->compression_method);           /* Compression method */
+		s_write_uint16(zip, cd_file->mtime);                    /* File last modification time */
+		s_write_uint16(zip, cd_file->mdate);                    /* File last modification date */
+		s_write_uint32(zip, cd_file->crc_sum);                  /* CRC-32 of uncompressed data */
+		s_write_uint32(zip, cd_file->size_compressed);          /* Compressed size */
+		s_write_uint32(zip, cd_file->size_uncompressed);        /* Uncompressed size */
+		s_write_uint16(zip, (uint16_t) strlen(cd_file->name));  /* File name length (n) */
+		s_write_uint16(zip, sizeof(extra)-1);                   /* Extra field length (m) */
+		s_write_uint16(zip, 0);                                 /* File comment length (k) */
+		s_write_uint16(zip, 0);                                 /* Disk number where file starts */
+		s_write_uint16(zip, cd_file->attr_internal);            /* Internal file attributes */
+		s_write_uint32(zip, cd_file->attr_external);            /* External file attributes. */
+		s_write_uint32(zip, cd_file->offset);                   /* Offset of local file header. */
+		s_write_string(zip, cd_file->name);                     /* File name */
+		s_write(zip, extra, sizeof(extra)-1);                   /* Extra field */
+		len += extract_buffer_pos(zip->buffer) - pos2;
+		extract_free(alloc, &cd_file->name);
+	}
+	extract_free(alloc, &zip->cd_files);
+
+	/* Write End of central directory record. */
+	s_write_uint32(zip, 0x06054b50);
+	s_write_uint16(zip, 0);                             /* Number of this disk */
+	s_write_uint16(zip, 0);                             /* Disk where central directory starts */
+	s_write_uint16(zip, (uint16_t) zip->cd_files_num);  /* Number of central directory records on this disk */
+	s_write_uint16(zip, (uint16_t) zip->cd_files_num);  /* Total number of central directory records */
+	s_write_uint32(zip, (int) len);                     /* Size of central directory (bytes) */
+	s_write_uint32(zip, (int) pos);                     /* Offset of start of central directory, relative to start of archive */
+
+	s_write_uint16(zip, (uint16_t) strlen(zip->archive_comment));  /* Comment length (n) */
+	s_write_string(zip, zip->archive_comment);
+	extract_free(alloc, &zip->archive_comment);
+
+	if (zip->errno_)    e = -1;
+	else if (zip->eof)  e = +1;
+	else e = 0;
+
+	extract_free(alloc, pzip);
+
+	return e;
+}