Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/source/fitz/untar.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 // Copyright (C) 2004-2024 Artifex Software, Inc. | |
| 2 // | |
| 3 // This file is part of MuPDF. | |
| 4 // | |
| 5 // MuPDF is free software: you can redistribute it and/or modify it under the | |
| 6 // terms of the GNU Affero General Public License as published by the Free | |
| 7 // Software Foundation, either version 3 of the License, or (at your option) | |
| 8 // any later version. | |
| 9 // | |
| 10 // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY | |
| 11 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | |
| 12 // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more | |
| 13 // details. | |
| 14 // | |
| 15 // You should have received a copy of the GNU Affero General Public License | |
| 16 // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html> | |
| 17 // | |
| 18 // Alternative licensing terms are available from the licensor. | |
| 19 // For commercial licensing, see <https://www.artifex.com/> or contact | |
| 20 // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, | |
| 21 // CA 94129, USA, for further information. | |
| 22 | |
| 23 #include "mupdf/fitz.h" | |
| 24 | |
| 25 #include <string.h> | |
| 26 #include <limits.h> | |
| 27 | |
| 28 #define TYPE_NORMAL_OLD '\0' | |
| 29 #define TYPE_NORMAL '0' | |
| 30 #define TYPE_CONTIGUOUS '7' | |
| 31 #define TYPE_LONG_NAME 'L' | |
| 32 | |
| 33 typedef struct | |
| 34 { | |
| 35 char *name; | |
| 36 int64_t offset; | |
| 37 int size; | |
| 38 } tar_entry; | |
| 39 | |
| 40 typedef struct | |
| 41 { | |
| 42 fz_archive super; | |
| 43 | |
| 44 int count; | |
| 45 tar_entry *entries; | |
| 46 } fz_tar_archive; | |
| 47 | |
| 48 static inline int isoctdigit(char c) | |
| 49 { | |
| 50 return c >= '0' && c <= '7'; | |
| 51 } | |
| 52 | |
| 53 static inline int64_t otoi(const char *s) | |
| 54 { | |
| 55 int64_t value = 0; | |
| 56 | |
| 57 while (*s && isoctdigit(*s)) | |
| 58 { | |
| 59 value *= 8; | |
| 60 value += (*s) - '0'; | |
| 61 s++; | |
| 62 } | |
| 63 | |
| 64 return value; | |
| 65 } | |
| 66 | |
| 67 static void drop_tar_archive(fz_context *ctx, fz_archive *arch) | |
| 68 { | |
| 69 fz_tar_archive *tar = (fz_tar_archive *) arch; | |
| 70 int i; | |
| 71 for (i = 0; i < tar->count; ++i) | |
| 72 fz_free(ctx, tar->entries[i].name); | |
| 73 fz_free(ctx, tar->entries); | |
| 74 } | |
| 75 | |
| 76 static int is_zeroed(fz_context *ctx, unsigned char *buf, size_t size) | |
| 77 { | |
| 78 size_t off; | |
| 79 | |
| 80 for (off = 0; off < size; off++) | |
| 81 if (buf[off] != 0) | |
| 82 return 0; | |
| 83 | |
| 84 return 1; | |
| 85 } | |
| 86 | |
| 87 static void ensure_tar_entries(fz_context *ctx, fz_tar_archive *tar) | |
| 88 { | |
| 89 fz_stream *file = tar->super.file; | |
| 90 unsigned char record[512]; | |
| 91 char *longname = NULL; | |
| 92 char name[101]; | |
| 93 char octsize[13]; | |
| 94 char typeflag; | |
| 95 int64_t offset, blocks, size; | |
| 96 size_t n; | |
| 97 | |
| 98 tar->count = 0; | |
| 99 | |
| 100 fz_seek(ctx, file, 0, SEEK_SET); | |
| 101 | |
| 102 while (1) | |
| 103 { | |
| 104 offset = fz_tell(ctx, file); | |
| 105 n = fz_read(ctx, file, record, nelem(record)); | |
| 106 if (n == 0) | |
| 107 break; | |
| 108 if (n < nelem(record)) | |
| 109 fz_throw(ctx, FZ_ERROR_FORMAT, "premature end of data in tar record"); | |
| 110 | |
| 111 if (is_zeroed(ctx, record, nelem(record))) | |
| 112 continue; | |
| 113 | |
| 114 memcpy(name, record + 0, nelem(name) - 1); | |
| 115 name[nelem(name) - 1] = '\0'; | |
| 116 | |
| 117 memcpy(octsize, record + 124, nelem(octsize) - 1); | |
| 118 octsize[nelem(octsize) - 1] = '\0'; | |
| 119 | |
| 120 size = otoi(octsize); | |
| 121 if (size > INT_MAX) | |
| 122 fz_throw(ctx, FZ_ERROR_FORMAT, "tar archive entry too large"); | |
| 123 | |
| 124 typeflag = (char) record[156]; | |
| 125 | |
| 126 if (typeflag == TYPE_LONG_NAME) | |
| 127 { | |
| 128 longname = fz_malloc(ctx, size + 1); | |
| 129 fz_try(ctx) | |
| 130 { | |
| 131 n = fz_read(ctx, file, (unsigned char *) longname, size); | |
| 132 if (n < (size_t) size) | |
| 133 fz_throw(ctx, FZ_ERROR_FORMAT, "premature end of data in tar long name entry name"); | |
| 134 longname[size] = '\0'; | |
| 135 } | |
| 136 fz_catch(ctx) | |
| 137 { | |
| 138 fz_free(ctx, longname); | |
| 139 fz_rethrow(ctx); | |
| 140 } | |
| 141 | |
| 142 fz_seek(ctx, file, 512 - (size % 512), 1); | |
| 143 } | |
| 144 | |
| 145 if (typeflag != TYPE_NORMAL_OLD && typeflag != TYPE_NORMAL && | |
| 146 typeflag != TYPE_CONTIGUOUS && typeflag != TYPE_LONG_NAME) | |
| 147 continue; | |
| 148 | |
| 149 blocks = (size + 511) / 512; | |
| 150 fz_seek(ctx, file, blocks * 512, 1); | |
| 151 | |
| 152 tar->entries = fz_realloc_array(ctx, tar->entries, tar->count + 1, tar_entry); | |
| 153 | |
| 154 tar->entries[tar->count].offset = offset; | |
| 155 tar->entries[tar->count].size = size; | |
| 156 if (longname != NULL) | |
| 157 { | |
| 158 tar->entries[tar->count].name = longname; | |
| 159 longname = NULL; | |
| 160 } | |
| 161 else | |
| 162 tar->entries[tar->count].name = fz_strdup(ctx, name); | |
| 163 | |
| 164 tar->count++; | |
| 165 } | |
| 166 } | |
| 167 | |
| 168 static tar_entry *lookup_tar_entry(fz_context *ctx, fz_tar_archive *tar, const char *name) | |
| 169 { | |
| 170 int i; | |
| 171 for (i = 0; i < tar->count; i++) | |
| 172 if (!fz_strcasecmp(name, tar->entries[i].name)) | |
| 173 return &tar->entries[i]; | |
| 174 return NULL; | |
| 175 } | |
| 176 | |
| 177 static fz_stream *open_tar_entry(fz_context *ctx, fz_archive *arch, const char *name) | |
| 178 { | |
| 179 fz_tar_archive *tar = (fz_tar_archive *) arch; | |
| 180 fz_stream *file = tar->super.file; | |
| 181 tar_entry *ent; | |
| 182 | |
| 183 ent = lookup_tar_entry(ctx, tar, name); | |
| 184 if (!ent) | |
| 185 return NULL; | |
| 186 | |
| 187 fz_seek(ctx, file, ent->offset + 512, 0); | |
| 188 return fz_open_null_filter(ctx, file, ent->size, fz_tell(ctx, file)); | |
| 189 } | |
| 190 | |
| 191 static fz_buffer *read_tar_entry(fz_context *ctx, fz_archive *arch, const char *name) | |
| 192 { | |
| 193 fz_tar_archive *tar = (fz_tar_archive *) arch; | |
| 194 fz_stream *file = tar->super.file; | |
| 195 fz_buffer *ubuf; | |
| 196 tar_entry *ent; | |
| 197 | |
| 198 ent = lookup_tar_entry(ctx, tar, name); | |
| 199 if (!ent) | |
| 200 return NULL; | |
| 201 | |
| 202 ubuf = fz_new_buffer(ctx, ent->size); | |
| 203 | |
| 204 fz_try(ctx) | |
| 205 { | |
| 206 fz_seek(ctx, file, ent->offset + 512, 0); | |
| 207 ubuf->len = fz_read(ctx, file, ubuf->data, ent->size); | |
| 208 if (ubuf->len != (size_t)ent->size) | |
| 209 fz_throw(ctx, FZ_ERROR_FORMAT, "cannot read entire archive entry"); | |
| 210 } | |
| 211 fz_catch(ctx) | |
| 212 { | |
| 213 fz_drop_buffer(ctx, ubuf); | |
| 214 fz_rethrow(ctx); | |
| 215 } | |
| 216 | |
| 217 return ubuf; | |
| 218 } | |
| 219 | |
| 220 static int has_tar_entry(fz_context *ctx, fz_archive *arch, const char *name) | |
| 221 { | |
| 222 fz_tar_archive *tar = (fz_tar_archive *) arch; | |
| 223 tar_entry *ent = lookup_tar_entry(ctx, tar, name); | |
| 224 return ent != NULL; | |
| 225 } | |
| 226 | |
| 227 static const char *list_tar_entry(fz_context *ctx, fz_archive *arch, int idx) | |
| 228 { | |
| 229 fz_tar_archive *tar = (fz_tar_archive *) arch; | |
| 230 if (idx < 0 || idx >= tar->count) | |
| 231 return NULL; | |
| 232 return tar->entries[idx].name; | |
| 233 } | |
| 234 | |
| 235 static int count_tar_entries(fz_context *ctx, fz_archive *arch) | |
| 236 { | |
| 237 fz_tar_archive *tar = (fz_tar_archive *) arch; | |
| 238 return tar->count; | |
| 239 } | |
| 240 | |
| 241 static int isoct(unsigned char *d, int n) | |
| 242 { | |
| 243 while (--n > 0) | |
| 244 { | |
| 245 unsigned char c = *d++; | |
| 246 if (c < '0' || c > '7') | |
| 247 return 0; | |
| 248 } | |
| 249 return (*d == 0); | |
| 250 } | |
| 251 | |
| 252 static int | |
| 253 check_v7(fz_context *ctx, fz_stream *file) | |
| 254 { | |
| 255 unsigned char data[512]; | |
| 256 size_t n; | |
| 257 int i; | |
| 258 | |
| 259 fz_seek(ctx, file, 0, SEEK_SET); | |
| 260 n = fz_read(ctx, file, data, nelem(data)); | |
| 261 if (n != nelem(data)) | |
| 262 return 0; | |
| 263 | |
| 264 /* Skip over name. */ | |
| 265 for (i = 0; i < 100 && data[i] != 0; i++); | |
| 266 | |
| 267 /* We want at least 1 byte of name, and a zero terminator. */ | |
| 268 if (i == 0 || i == 100) | |
| 269 return 0; | |
| 270 | |
| 271 /* Skip over a run of zero terminators. */ | |
| 272 for (; i < 100 && data[i] == 0; i++); | |
| 273 | |
| 274 if (i != 100) | |
| 275 return 0; | |
| 276 | |
| 277 return (isoct(data+100, 8) && | |
| 278 isoct(data+108, 8) && | |
| 279 isoct(data+116, 8) && | |
| 280 isoct(data+124, 12) && | |
| 281 isoct(data+136, 12) && | |
| 282 isoct(data+148, 8)); | |
| 283 } | |
| 284 | |
| 285 int | |
| 286 fz_is_tar_archive(fz_context *ctx, fz_stream *file) | |
| 287 { | |
| 288 const unsigned char gnusignature[6] = { 'u', 's', 't', 'a', 'r', ' ' }; | |
| 289 const unsigned char paxsignature[6] = { 'u', 's', 't', 'a', 'r', '\0' }; | |
| 290 const unsigned char v7signature[6] = { '\0', '\0', '\0', '\0', '\0', '\0' }; | |
| 291 unsigned char data[6]; | |
| 292 size_t n; | |
| 293 | |
| 294 fz_seek(ctx, file, 257, 0); | |
| 295 n = fz_read(ctx, file, data, nelem(data)); | |
| 296 if (n != nelem(data)) | |
| 297 return 0; | |
| 298 if (!memcmp(data, gnusignature, nelem(gnusignature))) | |
| 299 return 1; | |
| 300 if (!memcmp(data, paxsignature, nelem(paxsignature))) | |
| 301 return 1; | |
| 302 if (!memcmp(data, v7signature, nelem(v7signature))) | |
| 303 return check_v7(ctx, file); | |
| 304 | |
| 305 return 0; | |
| 306 } | |
| 307 | |
| 308 fz_archive * | |
| 309 fz_open_tar_archive_with_stream(fz_context *ctx, fz_stream *file) | |
| 310 { | |
| 311 fz_tar_archive *tar; | |
| 312 | |
| 313 if (!fz_is_tar_archive(ctx, file)) | |
| 314 fz_throw(ctx, FZ_ERROR_FORMAT, "cannot recognize tar archive"); | |
| 315 | |
| 316 tar = fz_new_derived_archive(ctx, file, fz_tar_archive); | |
| 317 tar->super.format = "tar"; | |
| 318 tar->super.count_entries = count_tar_entries; | |
| 319 tar->super.list_entry = list_tar_entry; | |
| 320 tar->super.has_entry = has_tar_entry; | |
| 321 tar->super.read_entry = read_tar_entry; | |
| 322 tar->super.open_entry = open_tar_entry; | |
| 323 tar->super.drop_archive = drop_tar_archive; | |
| 324 | |
| 325 fz_try(ctx) | |
| 326 { | |
| 327 ensure_tar_entries(ctx, tar); | |
| 328 } | |
| 329 fz_catch(ctx) | |
| 330 { | |
| 331 fz_drop_archive(ctx, &tar->super); | |
| 332 fz_rethrow(ctx); | |
| 333 } | |
| 334 | |
| 335 return &tar->super; | |
| 336 } | |
| 337 | |
| 338 fz_archive * | |
| 339 fz_open_tar_archive(fz_context *ctx, const char *filename) | |
| 340 { | |
| 341 fz_archive *tar = NULL; | |
| 342 fz_stream *file; | |
| 343 | |
| 344 file = fz_open_file(ctx, filename); | |
| 345 | |
| 346 fz_try(ctx) | |
| 347 tar = fz_open_tar_archive_with_stream(ctx, file); | |
| 348 fz_always(ctx) | |
| 349 fz_drop_stream(ctx, file); | |
| 350 fz_catch(ctx) | |
| 351 fz_rethrow(ctx); | |
| 352 | |
| 353 return tar; | |
| 354 } |
