Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/source/fitz/archive.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 // Copyright (C) 2004-2024 Artifex Software, Inc. | |
| 2 // | |
| 3 // This file is part of MuPDF. | |
| 4 // | |
| 5 // MuPDF is free software: you can redistribute it and/or modify it under the | |
| 6 // terms of the GNU Affero General Public License as published by the Free | |
| 7 // Software Foundation, either version 3 of the License, or (at your option) | |
| 8 // any later version. | |
| 9 // | |
| 10 // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY | |
| 11 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | |
| 12 // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more | |
| 13 // details. | |
| 14 // | |
| 15 // You should have received a copy of the GNU Affero General Public License | |
| 16 // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html> | |
| 17 // | |
| 18 // Alternative licensing terms are available from the licensor. | |
| 19 // For commercial licensing, see <https://www.artifex.com/> or contact | |
| 20 // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, | |
| 21 // CA 94129, USA, for further information. | |
| 22 | |
| 23 #include "mupdf/fitz.h" | |
| 24 | |
| 25 #include <string.h> | |
| 26 | |
| 27 enum | |
| 28 { | |
| 29 FZ_ARCHIVE_HANDLER_MAX = 32 | |
| 30 }; | |
| 31 | |
| 32 struct fz_archive_handler_context | |
| 33 { | |
| 34 int refs; | |
| 35 int count; | |
| 36 const fz_archive_handler *handler[FZ_ARCHIVE_HANDLER_MAX]; | |
| 37 }; | |
| 38 | |
| 39 fz_stream * | |
| 40 fz_open_archive_entry(fz_context *ctx, fz_archive *arch, const char *name) | |
| 41 { | |
| 42 fz_stream *stream = fz_try_open_archive_entry(ctx, arch, name); | |
| 43 | |
| 44 if (stream == NULL) | |
| 45 fz_throw(ctx, FZ_ERROR_FORMAT, "cannot find entry %s", name); | |
| 46 | |
| 47 return stream; | |
| 48 } | |
| 49 | |
| 50 fz_stream * | |
| 51 fz_try_open_archive_entry(fz_context *ctx, fz_archive *arch, const char *name) | |
| 52 { | |
| 53 char *local_name; | |
| 54 fz_stream *stream = NULL; | |
| 55 | |
| 56 if (arch == NULL || !arch->open_entry) | |
| 57 return NULL; | |
| 58 | |
| 59 local_name = fz_cleanname_strdup(ctx, name); | |
| 60 | |
| 61 fz_var(stream); | |
| 62 | |
| 63 fz_try(ctx) | |
| 64 stream = arch->open_entry(ctx, arch, local_name); | |
| 65 fz_always(ctx) | |
| 66 fz_free(ctx, local_name); | |
| 67 fz_catch(ctx) | |
| 68 fz_rethrow(ctx); | |
| 69 | |
| 70 return stream; | |
| 71 } | |
| 72 | |
| 73 fz_buffer * | |
| 74 fz_read_archive_entry(fz_context *ctx, fz_archive *arch, const char *name) | |
| 75 { | |
| 76 fz_buffer *buf = fz_try_read_archive_entry(ctx, arch, name); | |
| 77 | |
| 78 if (buf == NULL) | |
| 79 fz_throw(ctx, FZ_ERROR_FORMAT, "cannot find entry %s", name); | |
| 80 | |
| 81 return buf; | |
| 82 } | |
| 83 | |
| 84 fz_buffer * | |
| 85 fz_try_read_archive_entry(fz_context *ctx, fz_archive *arch, const char *name) | |
| 86 { | |
| 87 char *local_name; | |
| 88 fz_buffer *buf = NULL; | |
| 89 | |
| 90 if (arch == NULL || !arch->read_entry || !arch->has_entry || name == NULL) | |
| 91 return NULL; | |
| 92 | |
| 93 local_name = fz_cleanname_strdup(ctx, name); | |
| 94 | |
| 95 fz_var(buf); | |
| 96 | |
| 97 fz_try(ctx) | |
| 98 { | |
| 99 if (!arch->has_entry(ctx, arch, local_name)) | |
| 100 break; | |
| 101 buf = arch->read_entry(ctx, arch, local_name); | |
| 102 } | |
| 103 fz_always(ctx) | |
| 104 fz_free(ctx, local_name); | |
| 105 fz_catch(ctx) | |
| 106 fz_rethrow(ctx); | |
| 107 | |
| 108 return buf; | |
| 109 } | |
| 110 | |
| 111 int | |
| 112 fz_has_archive_entry(fz_context *ctx, fz_archive *arch, const char *name) | |
| 113 { | |
| 114 char *local_name; | |
| 115 int res = 0; | |
| 116 | |
| 117 if (arch == NULL) | |
| 118 return 0; | |
| 119 if (!arch->has_entry) | |
| 120 return 0; | |
| 121 | |
| 122 local_name = fz_cleanname_strdup(ctx, name); | |
| 123 | |
| 124 fz_var(res); | |
| 125 | |
| 126 fz_try(ctx) | |
| 127 res = arch->has_entry(ctx, arch, local_name); | |
| 128 fz_always(ctx) | |
| 129 fz_free(ctx, local_name); | |
| 130 fz_catch(ctx) | |
| 131 fz_rethrow(ctx); | |
| 132 | |
| 133 return res; | |
| 134 } | |
| 135 | |
| 136 const char * | |
| 137 fz_list_archive_entry(fz_context *ctx, fz_archive *arch, int idx) | |
| 138 { | |
| 139 if (arch == 0) | |
| 140 return NULL; | |
| 141 if (!arch->list_entry) | |
| 142 return NULL; | |
| 143 | |
| 144 return arch->list_entry(ctx, arch, idx); | |
| 145 } | |
| 146 | |
| 147 int | |
| 148 fz_count_archive_entries(fz_context *ctx, fz_archive *arch) | |
| 149 { | |
| 150 if (arch == NULL) | |
| 151 return 0; | |
| 152 if (!arch->count_entries) | |
| 153 return 0; | |
| 154 return arch->count_entries(ctx, arch); | |
| 155 } | |
| 156 | |
| 157 const char * | |
| 158 fz_archive_format(fz_context *ctx, fz_archive *arch) | |
| 159 { | |
| 160 if (arch == NULL) | |
| 161 return "undefined"; | |
| 162 return arch->format; | |
| 163 } | |
| 164 | |
| 165 fz_archive * | |
| 166 fz_new_archive_of_size(fz_context *ctx, fz_stream *file, int size) | |
| 167 { | |
| 168 fz_archive *arch; | |
| 169 arch = Memento_label(fz_calloc(ctx, 1, size), "fz_archive"); | |
| 170 arch->refs = 1; | |
| 171 arch->file = fz_keep_stream(ctx, file); | |
| 172 return arch; | |
| 173 } | |
| 174 | |
| 175 fz_archive * | |
| 176 fz_try_open_archive_with_stream(fz_context *ctx, fz_stream *file) | |
| 177 { | |
| 178 fz_archive *arch = NULL; | |
| 179 int i; | |
| 180 | |
| 181 if (file == NULL) | |
| 182 return NULL; | |
| 183 | |
| 184 for (i = 0; i < ctx->archive->count; i++) | |
| 185 { | |
| 186 fz_seek(ctx, file, 0, SEEK_SET); | |
| 187 if (ctx->archive->handler[i]->recognize(ctx, file)) | |
| 188 { | |
| 189 arch = ctx->archive->handler[i]->open(ctx, file); | |
| 190 if (arch) | |
| 191 return arch; | |
| 192 } | |
| 193 } | |
| 194 | |
| 195 return NULL; | |
| 196 } | |
| 197 | |
| 198 fz_archive * | |
| 199 fz_open_archive_with_stream(fz_context *ctx, fz_stream *file) | |
| 200 { | |
| 201 fz_archive *arch = fz_try_open_archive_with_stream(ctx, file); | |
| 202 if (arch == NULL) | |
| 203 fz_throw(ctx, FZ_ERROR_FORMAT, "cannot recognize archive"); | |
| 204 return arch; | |
| 205 } | |
| 206 | |
| 207 fz_archive * | |
| 208 fz_open_archive(fz_context *ctx, const char *filename) | |
| 209 { | |
| 210 fz_stream *file; | |
| 211 fz_archive *arch = NULL; | |
| 212 | |
| 213 file = fz_open_file(ctx, filename); | |
| 214 | |
| 215 fz_try(ctx) | |
| 216 arch = fz_open_archive_with_stream(ctx, file); | |
| 217 fz_always(ctx) | |
| 218 fz_drop_stream(ctx, file); | |
| 219 fz_catch(ctx) | |
| 220 fz_rethrow(ctx); | |
| 221 | |
| 222 return arch; | |
| 223 } | |
| 224 | |
| 225 fz_archive * | |
| 226 fz_keep_archive(fz_context *ctx, fz_archive *arch) | |
| 227 { | |
| 228 return (fz_archive *)fz_keep_imp(ctx, arch, &arch->refs); | |
| 229 } | |
| 230 | |
| 231 void | |
| 232 fz_drop_archive(fz_context *ctx, fz_archive *arch) | |
| 233 { | |
| 234 if (fz_drop_imp(ctx, arch, &arch->refs)) | |
| 235 { | |
| 236 if (arch->drop_archive) | |
| 237 arch->drop_archive(ctx, arch); | |
| 238 fz_drop_stream(ctx, arch->file); | |
| 239 fz_free(ctx, arch); | |
| 240 } | |
| 241 } | |
| 242 | |
| 243 /* In-memory archive using a fz_tree holding fz_buffers */ | |
| 244 | |
| 245 typedef struct | |
| 246 { | |
| 247 fz_archive super; | |
| 248 fz_tree *tree; | |
| 249 } fz_tree_archive; | |
| 250 | |
| 251 static int has_tree_entry(fz_context *ctx, fz_archive *arch, const char *name) | |
| 252 { | |
| 253 fz_tree *tree = ((fz_tree_archive*)arch)->tree; | |
| 254 fz_buffer *ent = fz_tree_lookup(ctx, tree, name); | |
| 255 return ent != NULL; | |
| 256 } | |
| 257 | |
| 258 static fz_buffer *read_tree_entry(fz_context *ctx, fz_archive *arch, const char *name) | |
| 259 { | |
| 260 fz_tree *tree = ((fz_tree_archive*)arch)->tree; | |
| 261 fz_buffer *ent = fz_tree_lookup(ctx, tree, name); | |
| 262 return fz_keep_buffer(ctx, ent); | |
| 263 } | |
| 264 | |
| 265 static fz_stream *open_tree_entry(fz_context *ctx, fz_archive *arch, const char *name) | |
| 266 { | |
| 267 fz_tree *tree = ((fz_tree_archive*)arch)->tree; | |
| 268 fz_buffer *ent = fz_tree_lookup(ctx, tree, name); | |
| 269 return fz_open_buffer(ctx, ent); | |
| 270 } | |
| 271 | |
| 272 static void drop_tree_archive_entry(fz_context *ctx, void *ent) | |
| 273 { | |
| 274 fz_drop_buffer(ctx, ent); | |
| 275 } | |
| 276 | |
| 277 static void drop_tree_archive(fz_context *ctx, fz_archive *arch) | |
| 278 { | |
| 279 fz_tree *tree = ((fz_tree_archive*)arch)->tree; | |
| 280 fz_drop_tree(ctx, tree, drop_tree_archive_entry); | |
| 281 } | |
| 282 | |
| 283 fz_archive * | |
| 284 fz_new_tree_archive(fz_context *ctx, fz_tree *tree) | |
| 285 { | |
| 286 fz_tree_archive *arch; | |
| 287 | |
| 288 arch = fz_new_derived_archive(ctx, NULL, fz_tree_archive); | |
| 289 arch->super.format = "tree"; | |
| 290 arch->super.has_entry = has_tree_entry; | |
| 291 arch->super.read_entry = read_tree_entry; | |
| 292 arch->super.open_entry = open_tree_entry; | |
| 293 arch->super.drop_archive = drop_tree_archive; | |
| 294 arch->tree = tree; | |
| 295 | |
| 296 return &arch->super; | |
| 297 } | |
| 298 | |
| 299 void | |
| 300 fz_tree_archive_add_buffer(fz_context *ctx, fz_archive *arch_, const char *name, fz_buffer *buf) | |
| 301 { | |
| 302 fz_tree_archive *arch = (fz_tree_archive *)arch_; | |
| 303 | |
| 304 if (arch == NULL || arch->super.has_entry != has_tree_entry) | |
| 305 fz_throw(ctx, FZ_ERROR_ARGUMENT, "cannot insert into a non-tree archive"); | |
| 306 | |
| 307 buf = fz_keep_buffer(ctx, buf); | |
| 308 | |
| 309 fz_try(ctx) | |
| 310 arch->tree = fz_tree_insert(ctx, arch->tree, name, buf); | |
| 311 fz_catch(ctx) | |
| 312 { | |
| 313 fz_drop_buffer(ctx, buf); | |
| 314 fz_rethrow(ctx); | |
| 315 } | |
| 316 } | |
| 317 | |
| 318 void | |
| 319 fz_tree_archive_add_data(fz_context *ctx, fz_archive *arch_, const char *name, const void *data, size_t size) | |
| 320 { | |
| 321 fz_tree_archive *arch = (fz_tree_archive *)arch_; | |
| 322 fz_buffer *buf; | |
| 323 | |
| 324 if (arch == NULL || arch->super.has_entry != has_tree_entry) | |
| 325 fz_throw(ctx, FZ_ERROR_ARGUMENT, "cannot insert into a non-tree archive"); | |
| 326 | |
| 327 buf = fz_new_buffer_from_copied_data(ctx, data, size); | |
| 328 | |
| 329 fz_try(ctx) | |
| 330 arch->tree = fz_tree_insert(ctx, arch->tree, name, buf); | |
| 331 fz_catch(ctx) | |
| 332 { | |
| 333 fz_drop_buffer(ctx, buf); | |
| 334 fz_rethrow(ctx); | |
| 335 } | |
| 336 } | |
| 337 | |
| 338 typedef struct | |
| 339 { | |
| 340 fz_archive *arch; | |
| 341 char *dir; | |
| 342 } multi_archive_entry; | |
| 343 | |
| 344 typedef struct | |
| 345 { | |
| 346 fz_archive super; | |
| 347 int len; | |
| 348 int max; | |
| 349 multi_archive_entry *sub; | |
| 350 } fz_multi_archive; | |
| 351 | |
| 352 static int has_multi_entry(fz_context *ctx, fz_archive *arch_, const char *name) | |
| 353 { | |
| 354 fz_multi_archive *arch = (fz_multi_archive *)arch_; | |
| 355 int i; | |
| 356 | |
| 357 for (i = arch->len-1; i >= 0; i--) | |
| 358 { | |
| 359 multi_archive_entry *e = &arch->sub[i]; | |
| 360 const char *subname = name; | |
| 361 if (e->dir) | |
| 362 { | |
| 363 size_t n = strlen(e->dir); | |
| 364 if (strncmp(e->dir, name, n) != 0) | |
| 365 continue; | |
| 366 subname += n; | |
| 367 } | |
| 368 if (fz_has_archive_entry(ctx, arch->sub[i].arch, subname)) | |
| 369 return 1; | |
| 370 } | |
| 371 return 0; | |
| 372 } | |
| 373 | |
| 374 static fz_buffer *read_multi_entry(fz_context *ctx, fz_archive *arch_, const char *name) | |
| 375 { | |
| 376 fz_multi_archive *arch = (fz_multi_archive *)arch_; | |
| 377 int i; | |
| 378 fz_buffer *res = NULL; | |
| 379 | |
| 380 for (i = arch->len-1; i >= 0; i--) | |
| 381 { | |
| 382 multi_archive_entry *e = &arch->sub[i]; | |
| 383 const char *subname = name; | |
| 384 | |
| 385 if (e->dir) | |
| 386 { | |
| 387 size_t n = strlen(e->dir); | |
| 388 if (strncmp(e->dir, name, n) != 0) | |
| 389 continue; | |
| 390 subname += n; | |
| 391 } | |
| 392 | |
| 393 res = fz_try_read_archive_entry(ctx, arch->sub[i].arch, subname); | |
| 394 | |
| 395 if (res) | |
| 396 break; | |
| 397 } | |
| 398 | |
| 399 return res; | |
| 400 } | |
| 401 | |
| 402 static fz_stream *open_multi_entry(fz_context *ctx, fz_archive *arch_, const char *name) | |
| 403 { | |
| 404 fz_multi_archive *arch = (fz_multi_archive *)arch_; | |
| 405 int i; | |
| 406 fz_stream *res = NULL; | |
| 407 | |
| 408 for (i = arch->len-1; i >= 0; i--) | |
| 409 { | |
| 410 multi_archive_entry *e = &arch->sub[i]; | |
| 411 const char *subname = name; | |
| 412 | |
| 413 if (e->dir) | |
| 414 { | |
| 415 size_t n = strlen(e->dir); | |
| 416 if (strncmp(e->dir, name, n) != 0) | |
| 417 continue; | |
| 418 subname += n; | |
| 419 } | |
| 420 | |
| 421 res = fz_open_archive_entry(ctx, arch->sub[i].arch, subname); | |
| 422 | |
| 423 if (res) | |
| 424 break; | |
| 425 } | |
| 426 | |
| 427 return res; | |
| 428 } | |
| 429 | |
| 430 static void drop_multi_archive(fz_context *ctx, fz_archive *arch_) | |
| 431 { | |
| 432 fz_multi_archive *arch = (fz_multi_archive *)arch_; | |
| 433 int i; | |
| 434 | |
| 435 for (i = arch->len-1; i >= 0; i--) | |
| 436 { | |
| 437 multi_archive_entry *e = &arch->sub[i]; | |
| 438 fz_free(ctx, e->dir); | |
| 439 fz_drop_archive(ctx, e->arch); | |
| 440 } | |
| 441 fz_free(ctx, arch->sub); | |
| 442 } | |
| 443 | |
| 444 fz_archive * | |
| 445 fz_new_multi_archive(fz_context *ctx) | |
| 446 { | |
| 447 fz_multi_archive *arch; | |
| 448 | |
| 449 arch = fz_new_derived_archive(ctx, NULL, fz_multi_archive); | |
| 450 arch->super.format = "multi"; | |
| 451 arch->super.has_entry = has_multi_entry; | |
| 452 arch->super.read_entry = read_multi_entry; | |
| 453 arch->super.open_entry = open_multi_entry; | |
| 454 arch->super.drop_archive = drop_multi_archive; | |
| 455 arch->max = 0; | |
| 456 arch->len = 0; | |
| 457 arch->sub = NULL; | |
| 458 | |
| 459 return &arch->super; | |
| 460 } | |
| 461 | |
| 462 void | |
| 463 fz_mount_multi_archive(fz_context *ctx, fz_archive *arch_, fz_archive *sub, const char *path) | |
| 464 { | |
| 465 fz_multi_archive *arch = (fz_multi_archive *)arch_; | |
| 466 char *clean_path = NULL; | |
| 467 | |
| 468 if (arch->super.has_entry != has_multi_entry) | |
| 469 fz_throw(ctx, FZ_ERROR_ARGUMENT, "cannot mount within a non-multi archive"); | |
| 470 | |
| 471 if (arch->len == arch->max) | |
| 472 { | |
| 473 int n = arch->max ? arch->max * 2 : 8; | |
| 474 | |
| 475 arch->sub = fz_realloc(ctx, arch->sub, sizeof(*arch->sub) * n); | |
| 476 arch->max = n; | |
| 477 } | |
| 478 | |
| 479 /* If we have a path, then strip any trailing slashes, and add just one. */ | |
| 480 if (path) | |
| 481 { | |
| 482 clean_path = fz_cleanname_strdup(ctx, path); | |
| 483 if (clean_path[0] == '.' && clean_path[1] == 0) | |
| 484 { | |
| 485 fz_free(ctx, clean_path); | |
| 486 clean_path = NULL; | |
| 487 } | |
| 488 else | |
| 489 { | |
| 490 /* Do a strcat without doing a strcat to avoid the compiler | |
| 491 * complaining at us. We know that n here will be <= n above | |
| 492 * so this is safe. */ | |
| 493 size_t n = strlen(clean_path); | |
| 494 clean_path[n] = '/'; | |
| 495 clean_path[n + 1] = 0; | |
| 496 } | |
| 497 } | |
| 498 | |
| 499 arch->sub[arch->len].arch = fz_keep_archive(ctx, sub); | |
| 500 arch->sub[arch->len].dir = clean_path; | |
| 501 arch->len++; | |
| 502 } | |
| 503 | |
| 504 static const fz_archive_handler fz_zip_archive_handler = | |
| 505 { | |
| 506 fz_is_zip_archive, | |
| 507 fz_open_zip_archive_with_stream | |
| 508 }; | |
| 509 | |
| 510 static const fz_archive_handler fz_tar_archive_handler = | |
| 511 { | |
| 512 fz_is_tar_archive, | |
| 513 fz_open_tar_archive_with_stream | |
| 514 }; | |
| 515 | |
| 516 const fz_archive_handler fz_libarchive_archive_handler = | |
| 517 { | |
| 518 fz_is_libarchive_archive, | |
| 519 fz_open_libarchive_archive_with_stream | |
| 520 }; | |
| 521 | |
| 522 const fz_archive_handler fz_cfb_archive_handler = | |
| 523 { | |
| 524 fz_is_cfb_archive, | |
| 525 fz_open_cfb_archive_with_stream | |
| 526 }; | |
| 527 | |
| 528 void fz_new_archive_handler_context(fz_context *ctx) | |
| 529 { | |
| 530 ctx->archive = fz_malloc_struct(ctx, fz_archive_handler_context); | |
| 531 ctx->archive->refs = 1; | |
| 532 | |
| 533 fz_register_archive_handler(ctx, &fz_zip_archive_handler); | |
| 534 fz_register_archive_handler(ctx, &fz_tar_archive_handler); | |
| 535 #ifdef HAVE_LIBARCHIVE | |
| 536 fz_register_archive_handler(ctx, &fz_libarchive_archive_handler); | |
| 537 #endif | |
| 538 fz_register_archive_handler(ctx, &fz_cfb_archive_handler); | |
| 539 } | |
| 540 | |
| 541 fz_archive_handler_context *fz_keep_archive_handler_context(fz_context *ctx) | |
| 542 { | |
| 543 if (!ctx || !ctx->archive) | |
| 544 return NULL; | |
| 545 return fz_keep_imp(ctx, ctx->archive, &ctx->archive->refs); | |
| 546 } | |
| 547 | |
| 548 void fz_drop_archive_handler_context(fz_context *ctx) | |
| 549 { | |
| 550 if (!ctx) | |
| 551 return; | |
| 552 | |
| 553 if (fz_drop_imp(ctx, ctx->archive, &ctx->archive->refs)) | |
| 554 { | |
| 555 fz_free(ctx, ctx->archive); | |
| 556 ctx->archive = NULL; | |
| 557 } | |
| 558 } | |
| 559 | |
| 560 void fz_register_archive_handler(fz_context *ctx, const fz_archive_handler *handler) | |
| 561 { | |
| 562 fz_archive_handler_context *ac; | |
| 563 int i; | |
| 564 | |
| 565 if (!handler) | |
| 566 return; | |
| 567 | |
| 568 ac = ctx->archive; | |
| 569 if (ac == NULL) | |
| 570 fz_throw(ctx, FZ_ERROR_ARGUMENT, "archive handler list not found"); | |
| 571 | |
| 572 for (i = 0; i < ac->count; i++) | |
| 573 if (ac->handler[i] == handler) | |
| 574 return; | |
| 575 | |
| 576 if (ac->count >= FZ_ARCHIVE_HANDLER_MAX) | |
| 577 fz_throw(ctx, FZ_ERROR_LIMIT, "Too many archive handlers"); | |
| 578 | |
| 579 ac->handler[ac->count++] = handler; | |
| 580 } |
