comparison mupdf-source/thirdparty/extract/src/zip.c @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 #include "extract/alloc.h"
2
3 #include "mem.h"
4 #include "outf.h"
5 #include "zip.h"
6
7 #include <zlib.h>
8
9 #include <assert.h>
10 #include <errno.h>
11 #include <limits.h>
12 #include <time.h>
13
14 #include "compat_stdint.h"
15
16
17 typedef struct
18 {
19 int16_t mtime;
20 int16_t mdate;
21 int32_t crc_sum;
22 int32_t size_compressed;
23 int32_t size_uncompressed;
24 char *name;
25 uint32_t offset;
26 uint16_t attr_internal;
27 uint32_t attr_external;
28 } extract_zip_cd_file_t;
29
30 struct extract_zip_t
31 {
32 extract_buffer_t *buffer;
33 extract_zip_cd_file_t *cd_files;
34 int cd_files_num;
35
36 /* errno_ is set to non-zero if any operation fails; avoids need to check
37 after every small output operation. */
38 int errno_;
39 int eof;
40 uint16_t compression_method;
41 int compress_level;
42
43 /* Defaults for various values in zip file headers etc. */
44 uint16_t mtime;
45 uint16_t mdate;
46 uint16_t version_creator;
47 uint16_t version_extract;
48 uint16_t general_purpose_bit_flag;
49 uint16_t file_attr_internal;
50 uint32_t file_attr_external;
51 char *archive_comment;
52 };
53
54 int extract_zip_open(extract_buffer_t *buffer, extract_zip_t **o_zip)
55 {
56 int e = -1;
57 extract_zip_t *zip;
58 extract_alloc_t *alloc = extract_buffer_alloc(buffer);
59
60 if (extract_malloc(alloc, &zip, sizeof(*zip))) goto end;
61
62 zip->cd_files = NULL;
63 zip->cd_files_num = 0;
64 zip->buffer = buffer;
65 zip->errno_ = 0;
66 zip->eof = 0;
67 zip->compression_method = Z_DEFLATED;
68 zip->compress_level = Z_DEFAULT_COMPRESSION;
69
70 /* We could maybe convert current date/time to the ms-dos format required
71 here, but using zeros doesn't seem to make a difference to Word etc. */
72
73 {
74 time_t t = time(NULL);
75 struct tm *tm;
76 #ifdef _POSIX_SOURCE
77 struct tm tm_local;
78 tm = gmtime_r(&t, &tm_local);
79 #else
80 tm = gmtime(&t);
81 #endif
82 if (tm)
83 {
84 /* mdate and mtime are in MS DOS format:
85 mtime:
86 bits 0-4: seconds / 2.
87 bits 5-10: minute (0-59).
88 bits 11-15: hour (0-23).
89 mdate:
90 bits 0-4: day of month (1-31).
91 bits 5-8: month (1=jan, 2=feb, etc).
92 bits 9-15: year - 1980.
93 */
94 zip->mtime = (uint16_t) ((tm->tm_hour << 11) | (tm->tm_min << 5) | (tm->tm_sec / 2));
95 zip->mdate = (uint16_t) (((1900 + tm->tm_year - 1980) << 9) | ((tm->tm_mon + 1) << 5) | tm->tm_mday);
96 }
97 else
98 {
99 outf0("*** gmtime_r() failed");
100 zip->mtime = 0;
101 zip->mdate = 0;
102 }
103 }
104
105 /* These are all copied from command-line zip on unix. */
106 zip->version_creator = (0x3 << 8) + 30; /* 0x3 is unix, 30 means 3.0. */
107 zip->version_extract = 10; /* 10 means 1.0. */
108 zip->general_purpose_bit_flag = 0;
109 zip->file_attr_internal = 0;
110
111 /* We follow command-line zip which uses 0x81a40000 which is octal
112 0100644:0. (0100644 is S_IFREG (regular file) plus rw-r-r. See stat(2) for
113 details.) */
114 zip->file_attr_external = (0100644 << 16) + 0;
115 if (extract_strdup(alloc, "Artifex", &zip->archive_comment)) goto end;
116
117 e = 0;
118 end:
119
120 if (e) {
121 if (zip) extract_free(alloc, &zip->archive_comment);
122 extract_free(alloc, &zip);
123 *o_zip = NULL;
124 }
125 else {
126 *o_zip = zip;
127 }
128
129 return e;
130 }
131
132 static int s_native_little_endinesss(void)
133 {
134 static const char a[] = { 1, 2};
135 uint16_t b = *(uint16_t*) a;
136 if (b == 1 + 2*256) {
137 /* Native little-endiness. */
138 return 1;
139 }
140 else if (b == 2 + 1*256) {
141 /* Native big-endiness. */
142 return 0;
143 }
144 /* Would like to call abort() here, but that breaks on AIX/gcc. */
145 assert(0);
146 return 0;
147 }
148
149
150 /* Allocation fns for zlib. */
151
152 static void *s_zalloc(void *opaque, unsigned items, unsigned size)
153 {
154 extract_zip_t *zip = opaque;
155 extract_alloc_t *alloc = extract_buffer_alloc(zip->buffer);
156 void *ptr;
157
158 if (extract_malloc(alloc, &ptr, items*size)) return NULL;
159
160 return ptr;
161 }
162
163 static void s_zfree(void *opaque, void *ptr)
164 {
165 extract_zip_t *zip = opaque;
166 extract_alloc_t *alloc = extract_buffer_alloc(zip->buffer);
167
168 extract_free(alloc, &ptr);
169 }
170
171
172 /* Uses zlib to write raw deflate compressed data to zip->buffer. */
173 static int
174 s_write_compressed(
175 extract_zip_t *zip,
176 const void *data,
177 size_t data_length,
178 size_t *o_compressed_length)
179 {
180 int ze;
181 z_stream zstream = {0}; /* Initialise to keep Coverity quiet. */
182
183 if (zip->errno_) return -1;
184 if (zip->eof) return +1;
185
186 zstream.zalloc = s_zalloc;
187 zstream.zfree = s_zfree;
188 zstream.opaque = zip;
189
190 /* We need to write raw deflate data, so we use deflateInit2() with -ve
191 windowBits. The values we use are deflateInit()'s defaults. */
192 ze = deflateInit2(&zstream,
193 zip->compress_level,
194 Z_DEFLATED,
195 -15 /*windowBits*/,
196 8 /*memLevel*/,
197 Z_DEFAULT_STRATEGY);
198 if (ze != Z_OK)
199 {
200 errno = (ze == Z_MEM_ERROR) ? ENOMEM : EINVAL;
201 zip->errno_ = errno;
202 outf("deflateInit2() failed ze=%i", ze);
203 return -1;
204 }
205
206 /* Set zstream to read from specified data. */
207 zstream.next_in = (void*) data;
208 zstream.avail_in = (unsigned) data_length;
209
210 /* We increment *o_compressed_length gradually so that if we return an
211 error, we still indicate how many butes of compressed data have been
212 written. */
213 if (o_compressed_length)
214 {
215 *o_compressed_length = 0;
216 }
217
218 for(;;)
219 {
220 /* todo: write an extract_buffer_cache() function so we can write
221 directly into output buffer if it has a fn_cache. */
222 unsigned char buffer[1024];
223 zstream.next_out = &buffer[0];
224 zstream.avail_out = sizeof(buffer);
225 ze = deflate(&zstream, zstream.avail_in ? Z_NO_FLUSH : Z_FINISH);
226 if (ze != Z_STREAM_END && ze != Z_OK)
227 {
228 outf("deflate() failed ze=%i", ze);
229 errno = EIO;
230 zip->errno_ = errno;
231 return -1;
232 }
233 {
234 /* Send the new compressed data to buffer. */
235 size_t bytes_written;
236 int e = extract_buffer_write(zip->buffer, buffer, zstream.next_out - buffer, &bytes_written);
237 if (o_compressed_length)
238 {
239 *o_compressed_length += bytes_written;
240 }
241 if (e)
242 {
243 if (e == -1) zip->errno_ = errno;
244 if (e == +1) zip->eof = 1;
245 outf("extract_buffer_write() failed e=%i errno=%i", e, errno);
246 return e;
247 }
248 }
249 if (ze == Z_STREAM_END)
250 {
251 break;
252 }
253 }
254 ze = deflateEnd(&zstream);
255 if (ze != Z_OK)
256 {
257 outf("deflateEnd() failed ze=%i", ze);
258 errno = EIO;
259 zip->errno_ = errno;
260 return -1;
261 }
262 if (o_compressed_length)
263 {
264 assert(*o_compressed_length == (size_t) zstream.total_out);
265 }
266
267 return 0;
268 }
269
270 /* Writes uncompressed data to zip->buffer. */
271 static int s_write(extract_zip_t *zip, const void *data, size_t data_length)
272 {
273 size_t actual;
274 int e;
275
276 if (zip->errno_) return -1;
277 if (zip->eof) return +1;
278
279 e = extract_buffer_write(zip->buffer, data, data_length, &actual);
280 if (e == -1) zip->errno_ = errno;
281 if (e == +1) zip->eof = 1;
282
283 return e;
284 }
285
286 static int s_write_uint32(extract_zip_t *zip, uint32_t value)
287 {
288 if (s_native_little_endinesss()) {
289 return s_write(zip, &value, sizeof(value));
290 }
291 else {
292 unsigned char value2[4] = {
293 (unsigned char) (value >> 0),
294 (unsigned char) (value >> 8),
295 (unsigned char) (value >> 16),
296 (unsigned char) (value >> 24)
297 };
298 return s_write(zip, &value2, sizeof(value2));
299 }
300 }
301
302 static int s_write_uint16(extract_zip_t *zip, uint16_t value)
303 {
304 if (s_native_little_endinesss()) {
305 return s_write(zip, &value, sizeof(value));
306 }
307 else {
308 unsigned char value2[2] = {
309 (unsigned char) (value >> 0),
310 (unsigned char) (value >> 8)
311 };
312 return s_write(zip, &value2, sizeof(value2));
313 }
314 }
315
316 static int s_write_string(extract_zip_t *zip, const char *text)
317 {
318 return s_write(zip, text, strlen(text));
319 }
320
321
322 int extract_zip_write_file(
323 extract_zip_t *zip,
324 const void *data,
325 size_t data_length,
326 const char *name)
327 {
328 int e = -1;
329 extract_zip_cd_file_t *cd_file = NULL;
330 extract_alloc_t *alloc = extract_buffer_alloc(zip->buffer);
331
332 if (data_length > INT_MAX) {
333 assert(0);
334 errno = EINVAL;
335 return -1;
336 }
337 /* Create central directory file header for later. */
338 if (extract_realloc2(
339 alloc,
340 &zip->cd_files,
341 sizeof(extract_zip_cd_file_t) * zip->cd_files_num,
342 sizeof(extract_zip_cd_file_t) * (zip->cd_files_num+1)
343 )) goto end;
344 cd_file = &zip->cd_files[zip->cd_files_num];
345 cd_file->name = NULL;
346
347 cd_file->mtime = zip->mtime;
348 cd_file->mdate = zip->mdate;
349 cd_file->crc_sum = (int32_t) crc32(crc32(0, NULL, 0), data, (int) data_length);
350 cd_file->size_uncompressed = (int) data_length;
351 if (zip->compression_method == 0)
352 {
353 cd_file->size_compressed = cd_file->size_uncompressed;
354 }
355 if (extract_strdup(alloc, name, &cd_file->name)) goto end;
356 cd_file->offset = (int) extract_buffer_pos(zip->buffer);
357 cd_file->attr_internal = zip->file_attr_internal;
358 cd_file->attr_external = zip->file_attr_external;
359 if (!cd_file->name) goto end;
360
361 /* Write local file header. If we are using compression, we set bit 3 of
362 General purpose bit flag and write zeros for crc-32, compressed size and
363 uncompressed size; then we write the actual values in data descriptor after
364 the compressed data. */
365 {
366 const char extra_local[] = ""; /* Modify for testing. */
367 uint16_t general_purpose_bit_flag = zip->general_purpose_bit_flag;
368 if (zip->compression_method) general_purpose_bit_flag |= 8;
369 s_write_uint32(zip, 0x04034b50);
370 s_write_uint16(zip, zip->version_extract); /* Version needed to extract (minimum). */
371 s_write_uint16(zip, general_purpose_bit_flag); /* General purpose bit flag */
372 s_write_uint16(zip, zip->compression_method); /* Compression method */
373 s_write_uint16(zip, cd_file->mtime); /* File last modification time */
374 s_write_uint16(zip, cd_file->mdate); /* File last modification date */
375 if (zip->compression_method)
376 {
377 s_write_uint32(zip, 0); /* CRC-32 of uncompressed data */
378 s_write_uint32(zip, 0); /* Compressed size */
379 s_write_uint32(zip, 0); /* Uncompressed size */
380 }
381 else
382 {
383 s_write_uint32(zip, cd_file->crc_sum); /* CRC-32 of uncompressed data */
384 s_write_uint32(zip, cd_file->size_compressed); /* Compressed size */
385 s_write_uint32(zip, cd_file->size_uncompressed);/* Uncompressed size */
386 }
387 s_write_uint16(zip, (uint16_t) strlen(name)); /* File name length (n) */
388 s_write_uint16(zip, sizeof(extra_local)-1); /* Extra field length (m) */
389 s_write_string(zip, cd_file->name); /* File name */
390 s_write(zip, extra_local, sizeof(extra_local)-1); /* Extra field */
391 }
392
393 if (zip->compression_method)
394 {
395 /* Write compressed data. */
396 size_t data_length_compressed;
397 s_write_compressed(zip, data, data_length, &data_length_compressed);
398 cd_file->size_compressed = (int) data_length_compressed;
399
400 /* Write data descriptor. */
401 s_write_uint32(zip, 0x08074b50); /* Data descriptor signature */
402 s_write_uint32(zip, cd_file->crc_sum); /* CRC-32 of uncompressed data */
403 s_write_uint32(zip, cd_file->size_compressed); /* Compressed size */
404 s_write_uint32(zip, cd_file->size_uncompressed); /* Uncompressed size */
405 }
406 else
407 {
408 s_write(zip, data, data_length);
409 }
410
411 if (zip->errno_) e = -1;
412 else if (zip->eof) e = +1;
413 else e = 0;
414
415
416 end:
417
418 if (e) {
419 /* Leave zip->cd_files_num unchanged, so calling extract_zip_close()
420 will write out any earlier files. Free cd_file->name to avoid leak. */
421 if (cd_file) extract_free(alloc, &cd_file->name);
422 }
423 else {
424 /* cd_files[zip->cd_files_num] is valid. */
425 zip->cd_files_num += 1;
426 }
427
428 return e;
429 }
430
431 int extract_zip_close(extract_zip_t **pzip)
432 {
433 int e = -1;
434 size_t pos;
435 size_t len;
436 int i;
437 extract_zip_t *zip = *pzip;
438 extract_alloc_t *alloc;
439
440 if (!zip) {
441 return 0;
442 }
443 alloc = extract_buffer_alloc(zip->buffer);
444 pos = extract_buffer_pos(zip->buffer);
445 len = 0;
446
447 /* Write Central directory file headers, freeing data as we go. */
448 for (i=0; i<zip->cd_files_num; ++i) {
449 const char extra[] = "";
450 size_t pos2 = extract_buffer_pos(zip->buffer);
451 extract_zip_cd_file_t* cd_file = &zip->cd_files[i];
452 s_write_uint32(zip, 0x02014b50);
453 s_write_uint16(zip, zip->version_creator); /* Version made by, copied from command-line zip. */
454 s_write_uint16(zip, zip->version_extract); /* Version needed to extract (minimum). */
455 s_write_uint16(zip, zip->general_purpose_bit_flag); /* General purpose bit flag */
456 s_write_uint16(zip, zip->compression_method); /* Compression method */
457 s_write_uint16(zip, cd_file->mtime); /* File last modification time */
458 s_write_uint16(zip, cd_file->mdate); /* File last modification date */
459 s_write_uint32(zip, cd_file->crc_sum); /* CRC-32 of uncompressed data */
460 s_write_uint32(zip, cd_file->size_compressed); /* Compressed size */
461 s_write_uint32(zip, cd_file->size_uncompressed); /* Uncompressed size */
462 s_write_uint16(zip, (uint16_t) strlen(cd_file->name)); /* File name length (n) */
463 s_write_uint16(zip, sizeof(extra)-1); /* Extra field length (m) */
464 s_write_uint16(zip, 0); /* File comment length (k) */
465 s_write_uint16(zip, 0); /* Disk number where file starts */
466 s_write_uint16(zip, cd_file->attr_internal); /* Internal file attributes */
467 s_write_uint32(zip, cd_file->attr_external); /* External file attributes. */
468 s_write_uint32(zip, cd_file->offset); /* Offset of local file header. */
469 s_write_string(zip, cd_file->name); /* File name */
470 s_write(zip, extra, sizeof(extra)-1); /* Extra field */
471 len += extract_buffer_pos(zip->buffer) - pos2;
472 extract_free(alloc, &cd_file->name);
473 }
474 extract_free(alloc, &zip->cd_files);
475
476 /* Write End of central directory record. */
477 s_write_uint32(zip, 0x06054b50);
478 s_write_uint16(zip, 0); /* Number of this disk */
479 s_write_uint16(zip, 0); /* Disk where central directory starts */
480 s_write_uint16(zip, (uint16_t) zip->cd_files_num); /* Number of central directory records on this disk */
481 s_write_uint16(zip, (uint16_t) zip->cd_files_num); /* Total number of central directory records */
482 s_write_uint32(zip, (int) len); /* Size of central directory (bytes) */
483 s_write_uint32(zip, (int) pos); /* Offset of start of central directory, relative to start of archive */
484
485 s_write_uint16(zip, (uint16_t) strlen(zip->archive_comment)); /* Comment length (n) */
486 s_write_string(zip, zip->archive_comment);
487 extract_free(alloc, &zip->archive_comment);
488
489 if (zip->errno_) e = -1;
490 else if (zip->eof) e = +1;
491 else e = 0;
492
493 extract_free(alloc, pzip);
494
495 return e;
496 }