Mercurial > hgrepos > Python2 > PyMuPDF
diff mupdf-source/source/fitz/untar.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mupdf-source/source/fitz/untar.c Mon Sep 15 11:43:07 2025 +0200 @@ -0,0 +1,354 @@ +// Copyright (C) 2004-2024 Artifex Software, Inc. +// +// This file is part of MuPDF. +// +// MuPDF is free software: you can redistribute it and/or modify it under the +// terms of the GNU Affero General Public License as published by the Free +// Software Foundation, either version 3 of the License, or (at your option) +// any later version. +// +// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more +// details. +// +// You should have received a copy of the GNU Affero General Public License +// along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html> +// +// Alternative licensing terms are available from the licensor. +// For commercial licensing, see <https://www.artifex.com/> or contact +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. + +#include "mupdf/fitz.h" + +#include <string.h> +#include <limits.h> + +#define TYPE_NORMAL_OLD '\0' +#define TYPE_NORMAL '0' +#define TYPE_CONTIGUOUS '7' +#define TYPE_LONG_NAME 'L' + +typedef struct +{ + char *name; + int64_t offset; + int size; +} tar_entry; + +typedef struct +{ + fz_archive super; + + int count; + tar_entry *entries; +} fz_tar_archive; + +static inline int isoctdigit(char c) +{ + return c >= '0' && c <= '7'; +} + +static inline int64_t otoi(const char *s) +{ + int64_t value = 0; + + while (*s && isoctdigit(*s)) + { + value *= 8; + value += (*s) - '0'; + s++; + } + + return value; +} + +static void drop_tar_archive(fz_context *ctx, fz_archive *arch) +{ + fz_tar_archive *tar = (fz_tar_archive *) arch; + int i; + for (i = 0; i < tar->count; ++i) + fz_free(ctx, tar->entries[i].name); + fz_free(ctx, tar->entries); +} + +static int is_zeroed(fz_context *ctx, unsigned char *buf, size_t size) +{ + size_t off; + + for (off = 0; off < size; off++) + if (buf[off] != 0) + return 0; + + return 1; +} + +static void ensure_tar_entries(fz_context *ctx, fz_tar_archive *tar) +{ + fz_stream *file = tar->super.file; + unsigned char record[512]; + char *longname = NULL; + char name[101]; + char octsize[13]; + char typeflag; + int64_t offset, blocks, size; + size_t n; + + tar->count = 0; + + fz_seek(ctx, file, 0, SEEK_SET); + + while (1) + { + offset = fz_tell(ctx, file); + n = fz_read(ctx, file, record, nelem(record)); + if (n == 0) + break; + if (n < nelem(record)) + fz_throw(ctx, FZ_ERROR_FORMAT, "premature end of data in tar record"); + + if (is_zeroed(ctx, record, nelem(record))) + continue; + + memcpy(name, record + 0, nelem(name) - 1); + name[nelem(name) - 1] = '\0'; + + memcpy(octsize, record + 124, nelem(octsize) - 1); + octsize[nelem(octsize) - 1] = '\0'; + + size = otoi(octsize); + if (size > INT_MAX) + fz_throw(ctx, FZ_ERROR_FORMAT, "tar archive entry too large"); + + typeflag = (char) record[156]; + + if (typeflag == TYPE_LONG_NAME) + { + longname = fz_malloc(ctx, size + 1); + fz_try(ctx) + { + n = fz_read(ctx, file, (unsigned char *) longname, size); + if (n < (size_t) size) + fz_throw(ctx, FZ_ERROR_FORMAT, "premature end of data in tar long name entry name"); + longname[size] = '\0'; + } + fz_catch(ctx) + { + fz_free(ctx, longname); + fz_rethrow(ctx); + } + + fz_seek(ctx, file, 512 - (size % 512), 1); + } + + if (typeflag != TYPE_NORMAL_OLD && typeflag != TYPE_NORMAL && + typeflag != TYPE_CONTIGUOUS && typeflag != TYPE_LONG_NAME) + continue; + + blocks = (size + 511) / 512; + fz_seek(ctx, file, blocks * 512, 1); + + tar->entries = fz_realloc_array(ctx, tar->entries, tar->count + 1, tar_entry); + + tar->entries[tar->count].offset = offset; + tar->entries[tar->count].size = size; + if (longname != NULL) + { + tar->entries[tar->count].name = longname; + longname = NULL; + } + else + tar->entries[tar->count].name = fz_strdup(ctx, name); + + tar->count++; + } +} + +static tar_entry *lookup_tar_entry(fz_context *ctx, fz_tar_archive *tar, const char *name) +{ + int i; + for (i = 0; i < tar->count; i++) + if (!fz_strcasecmp(name, tar->entries[i].name)) + return &tar->entries[i]; + return NULL; +} + +static fz_stream *open_tar_entry(fz_context *ctx, fz_archive *arch, const char *name) +{ + fz_tar_archive *tar = (fz_tar_archive *) arch; + fz_stream *file = tar->super.file; + tar_entry *ent; + + ent = lookup_tar_entry(ctx, tar, name); + if (!ent) + return NULL; + + fz_seek(ctx, file, ent->offset + 512, 0); + return fz_open_null_filter(ctx, file, ent->size, fz_tell(ctx, file)); +} + +static fz_buffer *read_tar_entry(fz_context *ctx, fz_archive *arch, const char *name) +{ + fz_tar_archive *tar = (fz_tar_archive *) arch; + fz_stream *file = tar->super.file; + fz_buffer *ubuf; + tar_entry *ent; + + ent = lookup_tar_entry(ctx, tar, name); + if (!ent) + return NULL; + + ubuf = fz_new_buffer(ctx, ent->size); + + fz_try(ctx) + { + fz_seek(ctx, file, ent->offset + 512, 0); + ubuf->len = fz_read(ctx, file, ubuf->data, ent->size); + if (ubuf->len != (size_t)ent->size) + fz_throw(ctx, FZ_ERROR_FORMAT, "cannot read entire archive entry"); + } + fz_catch(ctx) + { + fz_drop_buffer(ctx, ubuf); + fz_rethrow(ctx); + } + + return ubuf; +} + +static int has_tar_entry(fz_context *ctx, fz_archive *arch, const char *name) +{ + fz_tar_archive *tar = (fz_tar_archive *) arch; + tar_entry *ent = lookup_tar_entry(ctx, tar, name); + return ent != NULL; +} + +static const char *list_tar_entry(fz_context *ctx, fz_archive *arch, int idx) +{ + fz_tar_archive *tar = (fz_tar_archive *) arch; + if (idx < 0 || idx >= tar->count) + return NULL; + return tar->entries[idx].name; +} + +static int count_tar_entries(fz_context *ctx, fz_archive *arch) +{ + fz_tar_archive *tar = (fz_tar_archive *) arch; + return tar->count; +} + +static int isoct(unsigned char *d, int n) +{ + while (--n > 0) + { + unsigned char c = *d++; + if (c < '0' || c > '7') + return 0; + } + return (*d == 0); +} + +static int +check_v7(fz_context *ctx, fz_stream *file) +{ + unsigned char data[512]; + size_t n; + int i; + + fz_seek(ctx, file, 0, SEEK_SET); + n = fz_read(ctx, file, data, nelem(data)); + if (n != nelem(data)) + return 0; + + /* Skip over name. */ + for (i = 0; i < 100 && data[i] != 0; i++); + + /* We want at least 1 byte of name, and a zero terminator. */ + if (i == 0 || i == 100) + return 0; + + /* Skip over a run of zero terminators. */ + for (; i < 100 && data[i] == 0; i++); + + if (i != 100) + return 0; + + return (isoct(data+100, 8) && + isoct(data+108, 8) && + isoct(data+116, 8) && + isoct(data+124, 12) && + isoct(data+136, 12) && + isoct(data+148, 8)); +} + +int +fz_is_tar_archive(fz_context *ctx, fz_stream *file) +{ + const unsigned char gnusignature[6] = { 'u', 's', 't', 'a', 'r', ' ' }; + const unsigned char paxsignature[6] = { 'u', 's', 't', 'a', 'r', '\0' }; + const unsigned char v7signature[6] = { '\0', '\0', '\0', '\0', '\0', '\0' }; + unsigned char data[6]; + size_t n; + + fz_seek(ctx, file, 257, 0); + n = fz_read(ctx, file, data, nelem(data)); + if (n != nelem(data)) + return 0; + if (!memcmp(data, gnusignature, nelem(gnusignature))) + return 1; + if (!memcmp(data, paxsignature, nelem(paxsignature))) + return 1; + if (!memcmp(data, v7signature, nelem(v7signature))) + return check_v7(ctx, file); + + return 0; +} + +fz_archive * +fz_open_tar_archive_with_stream(fz_context *ctx, fz_stream *file) +{ + fz_tar_archive *tar; + + if (!fz_is_tar_archive(ctx, file)) + fz_throw(ctx, FZ_ERROR_FORMAT, "cannot recognize tar archive"); + + tar = fz_new_derived_archive(ctx, file, fz_tar_archive); + tar->super.format = "tar"; + tar->super.count_entries = count_tar_entries; + tar->super.list_entry = list_tar_entry; + tar->super.has_entry = has_tar_entry; + tar->super.read_entry = read_tar_entry; + tar->super.open_entry = open_tar_entry; + tar->super.drop_archive = drop_tar_archive; + + fz_try(ctx) + { + ensure_tar_entries(ctx, tar); + } + fz_catch(ctx) + { + fz_drop_archive(ctx, &tar->super); + fz_rethrow(ctx); + } + + return &tar->super; +} + +fz_archive * +fz_open_tar_archive(fz_context *ctx, const char *filename) +{ + fz_archive *tar = NULL; + fz_stream *file; + + file = fz_open_file(ctx, filename); + + fz_try(ctx) + tar = fz_open_tar_archive_with_stream(ctx, file); + fz_always(ctx) + fz_drop_stream(ctx, file); + fz_catch(ctx) + fz_rethrow(ctx); + + return tar; +}
