comparison mupdf-source/source/fitz/untar.c @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 // Copyright (C) 2004-2024 Artifex Software, Inc.
2 //
3 // This file is part of MuPDF.
4 //
5 // MuPDF is free software: you can redistribute it and/or modify it under the
6 // terms of the GNU Affero General Public License as published by the Free
7 // Software Foundation, either version 3 of the License, or (at your option)
8 // any later version.
9 //
10 // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
11 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
13 // details.
14 //
15 // You should have received a copy of the GNU Affero General Public License
16 // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
17 //
18 // Alternative licensing terms are available from the licensor.
19 // For commercial licensing, see <https://www.artifex.com/> or contact
20 // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
21 // CA 94129, USA, for further information.
22
23 #include "mupdf/fitz.h"
24
25 #include <string.h>
26 #include <limits.h>
27
28 #define TYPE_NORMAL_OLD '\0'
29 #define TYPE_NORMAL '0'
30 #define TYPE_CONTIGUOUS '7'
31 #define TYPE_LONG_NAME 'L'
32
33 typedef struct
34 {
35 char *name;
36 int64_t offset;
37 int size;
38 } tar_entry;
39
40 typedef struct
41 {
42 fz_archive super;
43
44 int count;
45 tar_entry *entries;
46 } fz_tar_archive;
47
48 static inline int isoctdigit(char c)
49 {
50 return c >= '0' && c <= '7';
51 }
52
53 static inline int64_t otoi(const char *s)
54 {
55 int64_t value = 0;
56
57 while (*s && isoctdigit(*s))
58 {
59 value *= 8;
60 value += (*s) - '0';
61 s++;
62 }
63
64 return value;
65 }
66
67 static void drop_tar_archive(fz_context *ctx, fz_archive *arch)
68 {
69 fz_tar_archive *tar = (fz_tar_archive *) arch;
70 int i;
71 for (i = 0; i < tar->count; ++i)
72 fz_free(ctx, tar->entries[i].name);
73 fz_free(ctx, tar->entries);
74 }
75
76 static int is_zeroed(fz_context *ctx, unsigned char *buf, size_t size)
77 {
78 size_t off;
79
80 for (off = 0; off < size; off++)
81 if (buf[off] != 0)
82 return 0;
83
84 return 1;
85 }
86
87 static void ensure_tar_entries(fz_context *ctx, fz_tar_archive *tar)
88 {
89 fz_stream *file = tar->super.file;
90 unsigned char record[512];
91 char *longname = NULL;
92 char name[101];
93 char octsize[13];
94 char typeflag;
95 int64_t offset, blocks, size;
96 size_t n;
97
98 tar->count = 0;
99
100 fz_seek(ctx, file, 0, SEEK_SET);
101
102 while (1)
103 {
104 offset = fz_tell(ctx, file);
105 n = fz_read(ctx, file, record, nelem(record));
106 if (n == 0)
107 break;
108 if (n < nelem(record))
109 fz_throw(ctx, FZ_ERROR_FORMAT, "premature end of data in tar record");
110
111 if (is_zeroed(ctx, record, nelem(record)))
112 continue;
113
114 memcpy(name, record + 0, nelem(name) - 1);
115 name[nelem(name) - 1] = '\0';
116
117 memcpy(octsize, record + 124, nelem(octsize) - 1);
118 octsize[nelem(octsize) - 1] = '\0';
119
120 size = otoi(octsize);
121 if (size > INT_MAX)
122 fz_throw(ctx, FZ_ERROR_FORMAT, "tar archive entry too large");
123
124 typeflag = (char) record[156];
125
126 if (typeflag == TYPE_LONG_NAME)
127 {
128 longname = fz_malloc(ctx, size + 1);
129 fz_try(ctx)
130 {
131 n = fz_read(ctx, file, (unsigned char *) longname, size);
132 if (n < (size_t) size)
133 fz_throw(ctx, FZ_ERROR_FORMAT, "premature end of data in tar long name entry name");
134 longname[size] = '\0';
135 }
136 fz_catch(ctx)
137 {
138 fz_free(ctx, longname);
139 fz_rethrow(ctx);
140 }
141
142 fz_seek(ctx, file, 512 - (size % 512), 1);
143 }
144
145 if (typeflag != TYPE_NORMAL_OLD && typeflag != TYPE_NORMAL &&
146 typeflag != TYPE_CONTIGUOUS && typeflag != TYPE_LONG_NAME)
147 continue;
148
149 blocks = (size + 511) / 512;
150 fz_seek(ctx, file, blocks * 512, 1);
151
152 tar->entries = fz_realloc_array(ctx, tar->entries, tar->count + 1, tar_entry);
153
154 tar->entries[tar->count].offset = offset;
155 tar->entries[tar->count].size = size;
156 if (longname != NULL)
157 {
158 tar->entries[tar->count].name = longname;
159 longname = NULL;
160 }
161 else
162 tar->entries[tar->count].name = fz_strdup(ctx, name);
163
164 tar->count++;
165 }
166 }
167
168 static tar_entry *lookup_tar_entry(fz_context *ctx, fz_tar_archive *tar, const char *name)
169 {
170 int i;
171 for (i = 0; i < tar->count; i++)
172 if (!fz_strcasecmp(name, tar->entries[i].name))
173 return &tar->entries[i];
174 return NULL;
175 }
176
177 static fz_stream *open_tar_entry(fz_context *ctx, fz_archive *arch, const char *name)
178 {
179 fz_tar_archive *tar = (fz_tar_archive *) arch;
180 fz_stream *file = tar->super.file;
181 tar_entry *ent;
182
183 ent = lookup_tar_entry(ctx, tar, name);
184 if (!ent)
185 return NULL;
186
187 fz_seek(ctx, file, ent->offset + 512, 0);
188 return fz_open_null_filter(ctx, file, ent->size, fz_tell(ctx, file));
189 }
190
191 static fz_buffer *read_tar_entry(fz_context *ctx, fz_archive *arch, const char *name)
192 {
193 fz_tar_archive *tar = (fz_tar_archive *) arch;
194 fz_stream *file = tar->super.file;
195 fz_buffer *ubuf;
196 tar_entry *ent;
197
198 ent = lookup_tar_entry(ctx, tar, name);
199 if (!ent)
200 return NULL;
201
202 ubuf = fz_new_buffer(ctx, ent->size);
203
204 fz_try(ctx)
205 {
206 fz_seek(ctx, file, ent->offset + 512, 0);
207 ubuf->len = fz_read(ctx, file, ubuf->data, ent->size);
208 if (ubuf->len != (size_t)ent->size)
209 fz_throw(ctx, FZ_ERROR_FORMAT, "cannot read entire archive entry");
210 }
211 fz_catch(ctx)
212 {
213 fz_drop_buffer(ctx, ubuf);
214 fz_rethrow(ctx);
215 }
216
217 return ubuf;
218 }
219
220 static int has_tar_entry(fz_context *ctx, fz_archive *arch, const char *name)
221 {
222 fz_tar_archive *tar = (fz_tar_archive *) arch;
223 tar_entry *ent = lookup_tar_entry(ctx, tar, name);
224 return ent != NULL;
225 }
226
227 static const char *list_tar_entry(fz_context *ctx, fz_archive *arch, int idx)
228 {
229 fz_tar_archive *tar = (fz_tar_archive *) arch;
230 if (idx < 0 || idx >= tar->count)
231 return NULL;
232 return tar->entries[idx].name;
233 }
234
235 static int count_tar_entries(fz_context *ctx, fz_archive *arch)
236 {
237 fz_tar_archive *tar = (fz_tar_archive *) arch;
238 return tar->count;
239 }
240
241 static int isoct(unsigned char *d, int n)
242 {
243 while (--n > 0)
244 {
245 unsigned char c = *d++;
246 if (c < '0' || c > '7')
247 return 0;
248 }
249 return (*d == 0);
250 }
251
252 static int
253 check_v7(fz_context *ctx, fz_stream *file)
254 {
255 unsigned char data[512];
256 size_t n;
257 int i;
258
259 fz_seek(ctx, file, 0, SEEK_SET);
260 n = fz_read(ctx, file, data, nelem(data));
261 if (n != nelem(data))
262 return 0;
263
264 /* Skip over name. */
265 for (i = 0; i < 100 && data[i] != 0; i++);
266
267 /* We want at least 1 byte of name, and a zero terminator. */
268 if (i == 0 || i == 100)
269 return 0;
270
271 /* Skip over a run of zero terminators. */
272 for (; i < 100 && data[i] == 0; i++);
273
274 if (i != 100)
275 return 0;
276
277 return (isoct(data+100, 8) &&
278 isoct(data+108, 8) &&
279 isoct(data+116, 8) &&
280 isoct(data+124, 12) &&
281 isoct(data+136, 12) &&
282 isoct(data+148, 8));
283 }
284
285 int
286 fz_is_tar_archive(fz_context *ctx, fz_stream *file)
287 {
288 const unsigned char gnusignature[6] = { 'u', 's', 't', 'a', 'r', ' ' };
289 const unsigned char paxsignature[6] = { 'u', 's', 't', 'a', 'r', '\0' };
290 const unsigned char v7signature[6] = { '\0', '\0', '\0', '\0', '\0', '\0' };
291 unsigned char data[6];
292 size_t n;
293
294 fz_seek(ctx, file, 257, 0);
295 n = fz_read(ctx, file, data, nelem(data));
296 if (n != nelem(data))
297 return 0;
298 if (!memcmp(data, gnusignature, nelem(gnusignature)))
299 return 1;
300 if (!memcmp(data, paxsignature, nelem(paxsignature)))
301 return 1;
302 if (!memcmp(data, v7signature, nelem(v7signature)))
303 return check_v7(ctx, file);
304
305 return 0;
306 }
307
308 fz_archive *
309 fz_open_tar_archive_with_stream(fz_context *ctx, fz_stream *file)
310 {
311 fz_tar_archive *tar;
312
313 if (!fz_is_tar_archive(ctx, file))
314 fz_throw(ctx, FZ_ERROR_FORMAT, "cannot recognize tar archive");
315
316 tar = fz_new_derived_archive(ctx, file, fz_tar_archive);
317 tar->super.format = "tar";
318 tar->super.count_entries = count_tar_entries;
319 tar->super.list_entry = list_tar_entry;
320 tar->super.has_entry = has_tar_entry;
321 tar->super.read_entry = read_tar_entry;
322 tar->super.open_entry = open_tar_entry;
323 tar->super.drop_archive = drop_tar_archive;
324
325 fz_try(ctx)
326 {
327 ensure_tar_entries(ctx, tar);
328 }
329 fz_catch(ctx)
330 {
331 fz_drop_archive(ctx, &tar->super);
332 fz_rethrow(ctx);
333 }
334
335 return &tar->super;
336 }
337
338 fz_archive *
339 fz_open_tar_archive(fz_context *ctx, const char *filename)
340 {
341 fz_archive *tar = NULL;
342 fz_stream *file;
343
344 file = fz_open_file(ctx, filename);
345
346 fz_try(ctx)
347 tar = fz_open_tar_archive_with_stream(ctx, file);
348 fz_always(ctx)
349 fz_drop_stream(ctx, file);
350 fz_catch(ctx)
351 fz_rethrow(ctx);
352
353 return tar;
354 }