comparison mupdf-source/source/fitz/unlibarchive.c @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 // Copyright (C) 2023-2025 Artifex Software, Inc.
2 //
3 // This file is part of MuPDF.
4 //
5 // MuPDF is free software: you can redistribute it and/or modify it under the
6 // terms of the GNU Affero General Public License as published by the Free
7 // Software Foundation, either version 3 of the License, or (at your option)
8 // any later version.
9 //
10 // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
11 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
13 // details.
14 //
15 // You should have received a copy of the GNU Affero General Public License
16 // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
17 //
18 // Alternative licensing terms are available from the licensor.
19 // For commercial licensing, see <https://www.artifex.com/> or contact
20 // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
21 // CA 94129, USA, for further information.
22
23 #include "mupdf/fitz.h"
24
25 #ifdef HAVE_LIBARCHIVE
26
27 #ifdef _WIN32
28 #include "libarchive/archive.h"
29 #include "libarchive/archive_entry.h"
30 #else
31 #include <archive.h>
32 #include <archive_entry.h>
33 #endif
34
35 typedef struct
36 {
37 size_t len;
38 uint8_t name[32];
39 } entry_t;
40
41 typedef struct
42 {
43 fz_archive super;
44
45 struct archive *archive;
46
47 int current_entry_idx;
48
49 int entries_max;
50 int entries_len;
51 entry_t **entries;
52
53 fz_context *ctx; /* safe! */
54 uint8_t block[4096];
55 } fz_libarchive_archive;
56
57 static la_ssize_t
58 libarchive_read(struct archive *a, void *client_data, const void **buf)
59 {
60 fz_libarchive_archive *arch = (fz_libarchive_archive *)client_data;
61 size_t z;
62 uint8_t *p;
63 size_t left;
64 fz_context *ctx = arch->ctx;
65 la_ssize_t ret = 0;
66
67 fz_try(ctx)
68 {
69 z = fz_available(arch->ctx, arch->super.file, 1024);
70
71 /* If we're at the EOF, can't read anything! */
72 if (z == 0)
73 break;
74
75 /* If we have at least 1K, then just return the pointer to that
76 * directly. */
77 if (z >= 1024)
78 {
79 *buf = arch->super.file->rp;
80 arch->super.file->rp += z;
81 ret = (la_ssize_t)z;
82 break;
83 }
84
85 /* If not, let's pull a large enough lump out. */
86
87 left = sizeof(arch->block);
88 p = arch->block;
89 do
90 {
91 memcpy(p, arch->super.file->rp, z);
92 p += z;
93 arch->super.file->rp += z;
94 left -= z;
95 if (left)
96 {
97 z = fz_available(arch->ctx, arch->super.file, left);
98 if (z > left)
99 z = left;
100 if (z == 0)
101 break;
102 }
103 }
104 while (left != 0);
105
106 ret = p - arch->block;
107 *buf = arch->block;
108 }
109 fz_catch(ctx)
110 {
111 /* Ignore error */
112 archive_set_error(a, ARCHIVE_FATAL, "%s", fz_convert_error(ctx, NULL));
113 return -1;
114 }
115
116 return ret;
117 }
118
119 static la_int64_t
120 libarchive_skip(struct archive *a, void *client_data, la_int64_t skip)
121 {
122 fz_libarchive_archive *arch = (fz_libarchive_archive *)client_data;
123 int64_t pos;
124 fz_context *ctx = arch->ctx;
125
126 fz_try(ctx)
127 {
128 pos = fz_tell(arch->ctx, arch->super.file);
129 fz_seek(arch->ctx, arch->super.file, pos + skip, SEEK_SET);
130 pos = fz_tell(arch->ctx, arch->super.file) - pos;
131 }
132 fz_catch(ctx)
133 {
134 /* Ignore error */
135 archive_set_error(a, ARCHIVE_FATAL, "%s", fz_convert_error(ctx, NULL));
136 return -1;
137 }
138
139 return pos;
140 }
141
142 static la_int64_t
143 libarchive_seek(struct archive *a, void *client_data, la_int64_t offset, int whence)
144 {
145 fz_libarchive_archive *arch = (fz_libarchive_archive *)client_data;
146 fz_context *ctx = arch->ctx;
147 int64_t pos;
148
149 fz_try(ctx)
150 {
151 fz_seek(arch->ctx, arch->super.file, offset, whence);
152 pos = fz_tell(arch->ctx, arch->super.file);
153 }
154 fz_catch(ctx)
155 {
156 /* Ignore error */
157 archive_set_error(a, ARCHIVE_FATAL, "%s", fz_convert_error(ctx, NULL));
158 return -1;
159 }
160
161 return pos;
162 }
163
164 static int
165 libarchive_close(struct archive *a, void *client_data)
166 {
167 /* Nothing to do. Stream is dropped when the fz_archive is closed. */
168 return ARCHIVE_OK;
169 }
170
171 static int
172 libarchive_open(fz_context *ctx, fz_libarchive_archive *arch)
173 {
174 int r;
175
176 arch->archive = archive_read_new();
177 archive_read_support_filter_all(arch->archive);
178 archive_read_support_format_all(arch->archive);
179
180 arch->ctx = ctx;
181 r = archive_read_set_seek_callback(arch->archive, libarchive_seek);
182 if (r == ARCHIVE_OK)
183 r = archive_read_open2(arch->archive, arch, NULL, libarchive_read, libarchive_skip, libarchive_close);
184 arch->ctx = NULL;
185 if (r != ARCHIVE_OK)
186 {
187 archive_read_free(arch->archive);
188 arch->archive = NULL;
189 }
190
191 return r != ARCHIVE_OK;
192 }
193
194 static void
195 libarchive_reset(fz_context *ctx, fz_libarchive_archive *arch)
196 {
197 if (arch->archive)
198 {
199 archive_read_free(arch->archive);
200 arch->archive = NULL;
201 }
202 fz_seek(ctx, arch->super.file, 0, SEEK_SET);
203 if (libarchive_open(ctx, arch))
204 fz_throw(ctx, FZ_ERROR_LIBRARY, "Failed to restart archive traversal!");
205
206 arch->current_entry_idx = 0;
207 }
208
209 static void
210 drop_libarchive_archive(fz_context *ctx, fz_archive *arch_)
211 {
212 fz_libarchive_archive *arch = (fz_libarchive_archive *)arch_;
213 int i;
214
215 archive_read_free(arch->archive);
216 for (i = 0; i < arch->entries_len; ++i)
217 fz_free(ctx, arch->entries[i]);
218 fz_free(ctx, arch->entries);
219 arch->archive = NULL;
220 }
221
222 int
223 fz_is_libarchive_archive(fz_context *ctx, fz_stream *file)
224 {
225 fz_libarchive_archive arch;
226 struct archive_entry *entry;
227 int ret;
228
229 arch.super.file = file;
230 fz_seek(ctx, file, 0, SEEK_SET);
231
232 /* Annoyingly, libarchive can say "sure, I can open this" only to
233 * then fail when we try to read from it. We therefore need to
234 * try to read at least 1 entry out to be sure. */
235 ret = libarchive_open(ctx, &arch);
236 if (ret == ARCHIVE_OK)
237 {
238 fz_var(ret);
239
240 fz_try(ctx)
241 {
242 arch.ctx = ctx; /* safe */
243 ret = archive_read_next_header(arch.archive, &entry);
244 }
245 fz_catch(ctx)
246 {
247 archive_read_free(arch.archive);
248 fz_rethrow(ctx);
249 }
250 }
251
252 archive_read_free(arch.archive);
253
254 /* Do NOT return true if we get ARCHIVE_EOF. We will fail to recognise empty
255 * archives, but the alternative is false positives. */
256 return ret == ARCHIVE_OK;
257 }
258
259 static int
260 lookup_archive_entry(fz_context *ctx, fz_libarchive_archive *arch, const char *name)
261 {
262 int idx;
263
264 for (idx = 0; idx < arch->entries_len; idx++)
265 {
266 if (!strcmp(name, (const char *)arch->entries[idx]->name))
267 return idx;
268 }
269
270 return -1;
271 }
272
273 static int has_libarchive_entry(fz_context *ctx, fz_archive *arch_, const char *name)
274 {
275 fz_libarchive_archive *arch = (fz_libarchive_archive *)arch_;
276 return lookup_archive_entry(ctx, arch, name) != -1;
277 }
278
279 static const char *list_libarchive_entry(fz_context *ctx, fz_archive *arch_, int idx)
280 {
281 fz_libarchive_archive *arch = (fz_libarchive_archive *)arch_;
282 if (idx < 0 || idx >= arch->entries_len)
283 return NULL;
284 return (const char *)arch->entries[idx]->name;
285 }
286
287 static int count_libarchive_entries(fz_context *ctx, fz_archive *arch_)
288 {
289 fz_libarchive_archive *arch = (fz_libarchive_archive *)arch_;
290 return arch->entries_len;
291 }
292
293 static fz_buffer *
294 read_libarchive_entry(fz_context *ctx, fz_archive *arch_, const char *name)
295 {
296 fz_libarchive_archive *arch = (fz_libarchive_archive *)arch_;
297 fz_buffer *ubuf = NULL;
298 int idx;
299 struct archive_entry *entry;
300 la_ssize_t ret;
301 size_t size;
302
303 idx = lookup_archive_entry(ctx, arch, name);
304 if (idx < 0)
305 return NULL;
306
307 if (arch->current_entry_idx > idx)
308 libarchive_reset(ctx, arch);
309
310 fz_var(ubuf);
311
312 arch->ctx = ctx;
313 fz_try(ctx)
314 {
315 while (arch->current_entry_idx < idx)
316 {
317 int r = archive_read_next_header(arch->archive, &entry);
318 if (r == ARCHIVE_OK)
319 r = archive_read_data_skip(arch->archive);
320 if (r != ARCHIVE_OK)
321 fz_throw(ctx, FZ_ERROR_LIBRARY, "Failed to skip over archive entry");
322 arch->current_entry_idx++;
323 }
324
325 /* This is the one we want. */
326 if (archive_read_next_header(arch->archive, &entry) != ARCHIVE_OK)
327 fz_throw(ctx, FZ_ERROR_LIBRARY, "Failed to read archive entry header");
328
329 arch->current_entry_idx++;
330 size = arch->entries[idx]->len;
331 ubuf = fz_new_buffer(ctx, size);
332 ubuf->len = size;
333
334 ret = archive_read_data(arch->archive, ubuf->data, size);
335 if (ret < 0)
336 fz_throw(ctx, FZ_ERROR_LIBRARY, "Failed to read archive data");
337 if ((size_t)ret != size)
338 fz_warn(ctx, "Premature end of data reading archive entry data (%zu vs %zu)", (size_t)ubuf->len, (size_t)size);
339 }
340 fz_always(ctx)
341 arch->ctx = NULL;
342 fz_catch(ctx)
343 {
344 fz_drop_buffer(ctx, ubuf);
345 fz_rethrow(ctx);
346 }
347
348 return ubuf;
349 }
350
351 static fz_stream *
352 open_libarchive_entry(fz_context *ctx, fz_archive *arch_, const char *name)
353 {
354 fz_buffer *buf = read_libarchive_entry(ctx, arch_, name);
355 fz_stream *stm = NULL;
356
357 fz_try(ctx)
358 stm = fz_open_buffer(ctx, buf);
359 fz_always(ctx)
360 fz_drop_buffer(ctx, buf);
361 fz_catch(ctx)
362 fz_rethrow(ctx);
363
364 return stm;
365 }
366
367 fz_archive *
368 fz_open_libarchive_archive_with_stream(fz_context *ctx, fz_stream *file)
369 {
370 fz_libarchive_archive *arch = fz_new_derived_archive(ctx, file, fz_libarchive_archive);
371 int r;
372 const char *path = NULL;
373 char *free_path = NULL;
374
375 fz_seek(ctx, file, 0, SEEK_SET);
376
377 if (libarchive_open(ctx, arch) != ARCHIVE_OK)
378 {
379 fz_drop_archive(ctx, &arch->super);
380 fz_throw(ctx, FZ_ERROR_LIBRARY, "cannot recognize libarchive archive");
381 }
382
383 arch->super.format = "libarchive";
384 arch->super.count_entries = count_libarchive_entries;
385 arch->super.list_entry = list_libarchive_entry;
386 arch->super.has_entry = has_libarchive_entry;
387 arch->super.read_entry = read_libarchive_entry;
388 arch->super.open_entry = open_libarchive_entry;
389 arch->super.drop_archive = drop_libarchive_archive;
390
391 fz_var(free_path);
392
393 fz_try(ctx)
394 {
395 arch->ctx = ctx;
396 /* Count the archive entries */
397 do
398 {
399 struct archive_entry *entry;
400 size_t z;
401
402 r = archive_read_next_header(arch->archive, &entry);
403 if (r == ARCHIVE_EOF)
404 break;
405
406 if (r != ARCHIVE_OK)
407 fz_throw(ctx, FZ_ERROR_LIBRARY, "Corrupt archive");
408
409 free_path = NULL;
410 path = archive_entry_pathname_utf8(entry);
411 if (!path)
412 {
413 path = free_path = fz_utf8_from_wchar(ctx, archive_entry_pathname_w(entry));
414 }
415 if (!path)
416 continue;
417
418 if (arch->entries_len == arch->entries_max)
419 {
420 int new_max = arch->entries_max * 2;
421 if (new_max == 0)
422 new_max = 32;
423
424 arch->entries = fz_realloc(ctx, arch->entries, sizeof(arch->entries[0]) * new_max);
425 arch->entries_max = new_max;
426 }
427
428 z = strlen(path);
429 arch->entries[arch->entries_len] = fz_malloc(ctx, sizeof(entry_t) - 32 + z + 1);
430 memcpy(&arch->entries[arch->entries_len]->name[0], path, z+1);
431 if (free_path)
432 {
433 fz_free(ctx, free_path);
434 free_path = NULL;
435 }
436 arch->entries[arch->entries_len]->len = archive_entry_size(entry);
437
438 arch->entries_len++;
439 }
440 while (r != ARCHIVE_EOF && r != ARCHIVE_FATAL);
441
442 libarchive_reset(ctx, arch);
443 }
444 fz_always(ctx)
445 {
446 if (free_path)
447 fz_free(ctx, free_path);
448 }
449 fz_catch(ctx)
450 {
451 arch->ctx = NULL;
452 fz_drop_archive(ctx, &arch->super);
453 fz_rethrow(ctx);
454 }
455
456 return &arch->super;
457 }
458
459 fz_archive *
460 fz_open_libarchive_archive(fz_context *ctx, const char *filename)
461 {
462 fz_archive *tar = NULL;
463 fz_stream *file;
464
465 file = fz_open_file(ctx, filename);
466
467 fz_try(ctx)
468 tar = fz_open_libarchive_archive_with_stream(ctx, file);
469 fz_always(ctx)
470 fz_drop_stream(ctx, file);
471 fz_catch(ctx)
472 fz_rethrow(ctx);
473
474 return tar;
475 }
476
477
478 /* Universal decomp stream */
479
480 typedef struct
481 {
482 fz_stream *chain;
483 fz_context *ctx; /* Safe as not persistent. */
484 struct archive *archive;
485 struct archive_entry *entry;
486 uint8_t block[4096];
487 } fz_libarchived_state;
488
489 static la_ssize_t
490 libarchived_read(struct archive *a, void *client_data, const void **buf)
491 {
492 fz_libarchived_state *state = (fz_libarchived_state *)client_data;
493 size_t z;
494 uint8_t *p;
495 size_t left;
496 fz_context *ctx = state->ctx;
497 la_ssize_t ret = 0;
498
499 fz_try(ctx)
500 {
501 z = fz_available(ctx, state->chain, 1024);
502
503 /* If we're at the EOF, can't read anything! */
504 if (z == 0)
505 break;
506
507 /* If we have at least 1K, then just return the pointer to that
508 * directly. */
509 if (z >= 1024)
510 {
511 *buf = state->chain->rp;
512 state->chain->rp += z;
513 ret = (la_ssize_t)z;
514 break;
515 }
516
517 /* If not, let's pull a large enough lump out. */
518
519 left = sizeof(state->block);
520 p = state->block;
521 do
522 {
523 memcpy(p, state->chain->rp, z);
524 p += z;
525 state->chain->rp += z;
526 left -= z;
527 if (left)
528 {
529 z = fz_available(ctx, state->chain, left);
530 if (z > left)
531 z = left;
532 if (z == 0)
533 break;
534 }
535 }
536 while (left != 0);
537
538 ret = p - state->block;
539 *buf = state->block;
540 }
541 fz_catch(ctx)
542 {
543 /* Ignore error */
544 archive_set_error(a, ARCHIVE_FATAL, "%s", fz_convert_error(ctx, NULL));
545 return -1;
546 }
547
548 return ret;
549 }
550
551 static la_int64_t
552 libarchived_skip(struct archive *a, void *client_data, la_int64_t skip)
553 {
554 fz_libarchived_state *state = (fz_libarchived_state *)client_data;
555 int64_t pos;
556 fz_context *ctx = state->ctx;
557
558 fz_try(ctx)
559 {
560 pos = fz_tell(state->ctx, state->chain);
561 fz_seek(state->ctx, state->chain, pos + skip, SEEK_SET);
562 pos = fz_tell(state->ctx, state->chain) - pos;
563 }
564 fz_catch(ctx)
565 {
566 /* Ignore error */
567 archive_set_error(a, ARCHIVE_FATAL, "%s", fz_convert_error(ctx, NULL));
568 return -1;
569 }
570
571 return pos;
572 }
573
574 static la_int64_t
575 libarchived_seek(struct archive *a, void *client_data, la_int64_t offset, int whence)
576 {
577 fz_libarchived_state *state = (fz_libarchived_state *)client_data;
578 fz_context *ctx = state->ctx;
579 int64_t pos;
580
581 fz_try(ctx)
582 {
583 fz_seek(ctx, state->chain, offset, whence);
584 pos = fz_tell(ctx, state->chain);
585 }
586 fz_catch(ctx)
587 {
588 /* Ignore error */
589 archive_set_error(a, ARCHIVE_FATAL, "%s", fz_convert_error(ctx, NULL));
590 return -1;
591 }
592
593 return pos;
594 }
595
596 static int
597 libarchived_close(struct archive *a, void *client_data)
598 {
599 /* Nothing to do. Stream is dropped when the fz_stream is dropped. */
600 return ARCHIVE_OK;
601 }
602
603 static int
604 next_libarchived(fz_context *ctx, fz_stream *stm, size_t required)
605 {
606 fz_libarchived_state *state = stm->state;
607 la_ssize_t z;
608
609 if (stm->eof)
610 return EOF;
611
612 z = archive_read_data(state->archive, state->block, sizeof(state->block));
613 if (z < 0)
614 fz_throw(ctx, FZ_ERROR_LIBRARY, "Failed to read compressed data");
615 if (z == 0)
616 {
617 stm->eof = 1;
618 return EOF;
619 }
620
621 stm->rp = state->block;
622 stm->wp = state->block + z;
623
624 return *stm->rp++;
625 }
626
627 static void
628 close_libarchived(fz_context *ctx, void *state_)
629 {
630 fz_libarchived_state *state = (fz_libarchived_state *)state_;
631 int code;
632
633 state->ctx = ctx;
634 code = archive_read_free(state->archive);
635 state->ctx = NULL;
636 if (code != ARCHIVE_OK)
637 fz_warn(ctx, "libarchive error: archive_read_free: %d", code);
638
639 fz_drop_stream(ctx, state->chain);
640 fz_free(ctx, state);
641 }
642
643 fz_stream *
644 fz_open_libarchived(fz_context *ctx, fz_stream *chain)
645 {
646 fz_libarchived_state *state;
647 int r;
648
649 state = fz_malloc_struct(ctx, fz_libarchived_state);
650
651 state->chain = fz_keep_stream(ctx, chain);
652 state->archive = archive_read_new();
653 archive_read_support_filter_all(state->archive);
654 archive_read_support_format_raw(state->archive);
655
656 state->ctx = ctx;
657 r = archive_read_set_seek_callback(state->archive, libarchived_seek);
658 if (r == ARCHIVE_OK)
659 r = archive_read_open2(state->archive, state, NULL, libarchived_read, libarchived_skip, libarchived_close);
660 if (r != ARCHIVE_OK)
661 {
662 archive_read_free(state->archive);
663 state->ctx = NULL;
664 fz_drop_stream(ctx, state->chain);
665 fz_free(ctx, state);
666 fz_throw(ctx, FZ_ERROR_LIBRARY, "Failed to open archive");
667 }
668
669 r = archive_filter_code(state->archive, 0);
670 if (r == ARCHIVE_FILTER_NONE)
671 {
672 archive_read_free(state->archive);
673 state->ctx = NULL;
674 fz_drop_stream(ctx, state->chain);
675 fz_free(ctx, state);
676 fz_throw(ctx, FZ_ERROR_LIBRARY, "Failed to open archive");
677 }
678
679 /* This is the one we want. */
680 r = archive_read_next_header(state->archive, &state->entry);
681 if (r != ARCHIVE_OK)
682 {
683 archive_read_free(state->archive);
684 state->ctx = NULL;
685 fz_drop_stream(ctx, state->chain);
686 fz_free(ctx, state);
687 fz_throw(ctx, FZ_ERROR_LIBRARY, "Failed to open archive");
688 }
689
690 return fz_new_stream(ctx, state, next_libarchived, close_libarchived);
691 }
692
693 #else
694
695 int
696 fz_is_libarchive_archive(fz_context *ctx, fz_stream *file)
697 {
698 static int warned = 0;
699
700 if (!warned)
701 {
702 warned = 1;
703 fz_warn(ctx, "libarchive support not included");
704 }
705
706 return 0;
707 }
708
709 fz_archive *
710 fz_open_libarchive_archive_with_stream(fz_context *ctx, fz_stream *file)
711 {
712 fz_throw(ctx, FZ_ERROR_UNSUPPORTED, "libarchive support not included");
713 }
714
715 fz_archive *
716 fz_open_libarchive_archive(fz_context *ctx, const char *filename)
717 {
718 fz_throw(ctx, FZ_ERROR_UNSUPPORTED, "libarchive support not included");
719 }
720
721 fz_stream *
722 fz_open_libarchived(fz_context *ctx, fz_stream *chain)
723 {
724 fz_throw(ctx, FZ_ERROR_UNSUPPORTED, "libarchive support not included");
725 }
726
727 #endif