Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/extract/src/zip.c @ 3:2c135c81b16c
MERGE: upstream PyMuPDF 1.26.4 with MuPDF 1.26.7
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:44:09 +0200 |
| parents | b50eed0cc0ef |
| children |
comparison
equal
deleted
inserted
replaced
| 0:6015a75abc2d | 3:2c135c81b16c |
|---|---|
| 1 #include "extract/alloc.h" | |
| 2 | |
| 3 #include "mem.h" | |
| 4 #include "outf.h" | |
| 5 #include "zip.h" | |
| 6 | |
| 7 #include <zlib.h> | |
| 8 | |
| 9 #include <assert.h> | |
| 10 #include <errno.h> | |
| 11 #include <limits.h> | |
| 12 #include <time.h> | |
| 13 | |
| 14 #include "compat_stdint.h" | |
| 15 | |
| 16 | |
| 17 typedef struct | |
| 18 { | |
| 19 int16_t mtime; | |
| 20 int16_t mdate; | |
| 21 int32_t crc_sum; | |
| 22 int32_t size_compressed; | |
| 23 int32_t size_uncompressed; | |
| 24 char *name; | |
| 25 uint32_t offset; | |
| 26 uint16_t attr_internal; | |
| 27 uint32_t attr_external; | |
| 28 } extract_zip_cd_file_t; | |
| 29 | |
| 30 struct extract_zip_t | |
| 31 { | |
| 32 extract_buffer_t *buffer; | |
| 33 extract_zip_cd_file_t *cd_files; | |
| 34 int cd_files_num; | |
| 35 | |
| 36 /* errno_ is set to non-zero if any operation fails; avoids need to check | |
| 37 after every small output operation. */ | |
| 38 int errno_; | |
| 39 int eof; | |
| 40 uint16_t compression_method; | |
| 41 int compress_level; | |
| 42 | |
| 43 /* Defaults for various values in zip file headers etc. */ | |
| 44 uint16_t mtime; | |
| 45 uint16_t mdate; | |
| 46 uint16_t version_creator; | |
| 47 uint16_t version_extract; | |
| 48 uint16_t general_purpose_bit_flag; | |
| 49 uint16_t file_attr_internal; | |
| 50 uint32_t file_attr_external; | |
| 51 char *archive_comment; | |
| 52 }; | |
| 53 | |
| 54 int extract_zip_open(extract_buffer_t *buffer, extract_zip_t **o_zip) | |
| 55 { | |
| 56 int e = -1; | |
| 57 extract_zip_t *zip; | |
| 58 extract_alloc_t *alloc = extract_buffer_alloc(buffer); | |
| 59 | |
| 60 if (extract_malloc(alloc, &zip, sizeof(*zip))) goto end; | |
| 61 | |
| 62 zip->cd_files = NULL; | |
| 63 zip->cd_files_num = 0; | |
| 64 zip->buffer = buffer; | |
| 65 zip->errno_ = 0; | |
| 66 zip->eof = 0; | |
| 67 zip->compression_method = Z_DEFLATED; | |
| 68 zip->compress_level = Z_DEFAULT_COMPRESSION; | |
| 69 | |
| 70 /* We could maybe convert current date/time to the ms-dos format required | |
| 71 here, but using zeros doesn't seem to make a difference to Word etc. */ | |
| 72 | |
| 73 { | |
| 74 time_t t = time(NULL); | |
| 75 struct tm *tm; | |
| 76 #ifdef _POSIX_SOURCE | |
| 77 struct tm tm_local; | |
| 78 tm = gmtime_r(&t, &tm_local); | |
| 79 #else | |
| 80 tm = gmtime(&t); | |
| 81 #endif | |
| 82 if (tm) | |
| 83 { | |
| 84 /* mdate and mtime are in MS DOS format: | |
| 85 mtime: | |
| 86 bits 0-4: seconds / 2. | |
| 87 bits 5-10: minute (0-59). | |
| 88 bits 11-15: hour (0-23). | |
| 89 mdate: | |
| 90 bits 0-4: day of month (1-31). | |
| 91 bits 5-8: month (1=jan, 2=feb, etc). | |
| 92 bits 9-15: year - 1980. | |
| 93 */ | |
| 94 zip->mtime = (uint16_t) ((tm->tm_hour << 11) | (tm->tm_min << 5) | (tm->tm_sec / 2)); | |
| 95 zip->mdate = (uint16_t) (((1900 + tm->tm_year - 1980) << 9) | ((tm->tm_mon + 1) << 5) | tm->tm_mday); | |
| 96 } | |
| 97 else | |
| 98 { | |
| 99 outf0("*** gmtime_r() failed"); | |
| 100 zip->mtime = 0; | |
| 101 zip->mdate = 0; | |
| 102 } | |
| 103 } | |
| 104 | |
| 105 /* These are all copied from command-line zip on unix. */ | |
| 106 zip->version_creator = (0x3 << 8) + 30; /* 0x3 is unix, 30 means 3.0. */ | |
| 107 zip->version_extract = 10; /* 10 means 1.0. */ | |
| 108 zip->general_purpose_bit_flag = 0; | |
| 109 zip->file_attr_internal = 0; | |
| 110 | |
| 111 /* We follow command-line zip which uses 0x81a40000 which is octal | |
| 112 0100644:0. (0100644 is S_IFREG (regular file) plus rw-r-r. See stat(2) for | |
| 113 details.) */ | |
| 114 zip->file_attr_external = (0100644 << 16) + 0; | |
| 115 if (extract_strdup(alloc, "Artifex", &zip->archive_comment)) goto end; | |
| 116 | |
| 117 e = 0; | |
| 118 end: | |
| 119 | |
| 120 if (e) { | |
| 121 if (zip) extract_free(alloc, &zip->archive_comment); | |
| 122 extract_free(alloc, &zip); | |
| 123 *o_zip = NULL; | |
| 124 } | |
| 125 else { | |
| 126 *o_zip = zip; | |
| 127 } | |
| 128 | |
| 129 return e; | |
| 130 } | |
| 131 | |
| 132 static int s_native_little_endinesss(void) | |
| 133 { | |
| 134 static const char a[] = { 1, 2}; | |
| 135 uint16_t b = *(uint16_t*) a; | |
| 136 if (b == 1 + 2*256) { | |
| 137 /* Native little-endiness. */ | |
| 138 return 1; | |
| 139 } | |
| 140 else if (b == 2 + 1*256) { | |
| 141 /* Native big-endiness. */ | |
| 142 return 0; | |
| 143 } | |
| 144 /* Would like to call abort() here, but that breaks on AIX/gcc. */ | |
| 145 assert(0); | |
| 146 return 0; | |
| 147 } | |
| 148 | |
| 149 | |
| 150 /* Allocation fns for zlib. */ | |
| 151 | |
| 152 static void *s_zalloc(void *opaque, unsigned items, unsigned size) | |
| 153 { | |
| 154 extract_zip_t *zip = opaque; | |
| 155 extract_alloc_t *alloc = extract_buffer_alloc(zip->buffer); | |
| 156 void *ptr; | |
| 157 | |
| 158 if (extract_malloc(alloc, &ptr, items*size)) return NULL; | |
| 159 | |
| 160 return ptr; | |
| 161 } | |
| 162 | |
| 163 static void s_zfree(void *opaque, void *ptr) | |
| 164 { | |
| 165 extract_zip_t *zip = opaque; | |
| 166 extract_alloc_t *alloc = extract_buffer_alloc(zip->buffer); | |
| 167 | |
| 168 extract_free(alloc, &ptr); | |
| 169 } | |
| 170 | |
| 171 | |
| 172 /* Uses zlib to write raw deflate compressed data to zip->buffer. */ | |
| 173 static int | |
| 174 s_write_compressed( | |
| 175 extract_zip_t *zip, | |
| 176 const void *data, | |
| 177 size_t data_length, | |
| 178 size_t *o_compressed_length) | |
| 179 { | |
| 180 int ze; | |
| 181 z_stream zstream = {0}; /* Initialise to keep Coverity quiet. */ | |
| 182 | |
| 183 if (zip->errno_) return -1; | |
| 184 if (zip->eof) return +1; | |
| 185 | |
| 186 zstream.zalloc = s_zalloc; | |
| 187 zstream.zfree = s_zfree; | |
| 188 zstream.opaque = zip; | |
| 189 | |
| 190 /* We need to write raw deflate data, so we use deflateInit2() with -ve | |
| 191 windowBits. The values we use are deflateInit()'s defaults. */ | |
| 192 ze = deflateInit2(&zstream, | |
| 193 zip->compress_level, | |
| 194 Z_DEFLATED, | |
| 195 -15 /*windowBits*/, | |
| 196 8 /*memLevel*/, | |
| 197 Z_DEFAULT_STRATEGY); | |
| 198 if (ze != Z_OK) | |
| 199 { | |
| 200 errno = (ze == Z_MEM_ERROR) ? ENOMEM : EINVAL; | |
| 201 zip->errno_ = errno; | |
| 202 outf("deflateInit2() failed ze=%i", ze); | |
| 203 return -1; | |
| 204 } | |
| 205 | |
| 206 /* Set zstream to read from specified data. */ | |
| 207 zstream.next_in = (void*) data; | |
| 208 zstream.avail_in = (unsigned) data_length; | |
| 209 | |
| 210 /* We increment *o_compressed_length gradually so that if we return an | |
| 211 error, we still indicate how many butes of compressed data have been | |
| 212 written. */ | |
| 213 if (o_compressed_length) | |
| 214 { | |
| 215 *o_compressed_length = 0; | |
| 216 } | |
| 217 | |
| 218 for(;;) | |
| 219 { | |
| 220 /* todo: write an extract_buffer_cache() function so we can write | |
| 221 directly into output buffer if it has a fn_cache. */ | |
| 222 unsigned char buffer[1024]; | |
| 223 zstream.next_out = &buffer[0]; | |
| 224 zstream.avail_out = sizeof(buffer); | |
| 225 ze = deflate(&zstream, zstream.avail_in ? Z_NO_FLUSH : Z_FINISH); | |
| 226 if (ze != Z_STREAM_END && ze != Z_OK) | |
| 227 { | |
| 228 outf("deflate() failed ze=%i", ze); | |
| 229 errno = EIO; | |
| 230 zip->errno_ = errno; | |
| 231 return -1; | |
| 232 } | |
| 233 { | |
| 234 /* Send the new compressed data to buffer. */ | |
| 235 size_t bytes_written; | |
| 236 int e = extract_buffer_write(zip->buffer, buffer, zstream.next_out - buffer, &bytes_written); | |
| 237 if (o_compressed_length) | |
| 238 { | |
| 239 *o_compressed_length += bytes_written; | |
| 240 } | |
| 241 if (e) | |
| 242 { | |
| 243 if (e == -1) zip->errno_ = errno; | |
| 244 if (e == +1) zip->eof = 1; | |
| 245 outf("extract_buffer_write() failed e=%i errno=%i", e, errno); | |
| 246 return e; | |
| 247 } | |
| 248 } | |
| 249 if (ze == Z_STREAM_END) | |
| 250 { | |
| 251 break; | |
| 252 } | |
| 253 } | |
| 254 ze = deflateEnd(&zstream); | |
| 255 if (ze != Z_OK) | |
| 256 { | |
| 257 outf("deflateEnd() failed ze=%i", ze); | |
| 258 errno = EIO; | |
| 259 zip->errno_ = errno; | |
| 260 return -1; | |
| 261 } | |
| 262 if (o_compressed_length) | |
| 263 { | |
| 264 assert(*o_compressed_length == (size_t) zstream.total_out); | |
| 265 } | |
| 266 | |
| 267 return 0; | |
| 268 } | |
| 269 | |
| 270 /* Writes uncompressed data to zip->buffer. */ | |
| 271 static int s_write(extract_zip_t *zip, const void *data, size_t data_length) | |
| 272 { | |
| 273 size_t actual; | |
| 274 int e; | |
| 275 | |
| 276 if (zip->errno_) return -1; | |
| 277 if (zip->eof) return +1; | |
| 278 | |
| 279 e = extract_buffer_write(zip->buffer, data, data_length, &actual); | |
| 280 if (e == -1) zip->errno_ = errno; | |
| 281 if (e == +1) zip->eof = 1; | |
| 282 | |
| 283 return e; | |
| 284 } | |
| 285 | |
| 286 static int s_write_uint32(extract_zip_t *zip, uint32_t value) | |
| 287 { | |
| 288 if (s_native_little_endinesss()) { | |
| 289 return s_write(zip, &value, sizeof(value)); | |
| 290 } | |
| 291 else { | |
| 292 unsigned char value2[4] = { | |
| 293 (unsigned char) (value >> 0), | |
| 294 (unsigned char) (value >> 8), | |
| 295 (unsigned char) (value >> 16), | |
| 296 (unsigned char) (value >> 24) | |
| 297 }; | |
| 298 return s_write(zip, &value2, sizeof(value2)); | |
| 299 } | |
| 300 } | |
| 301 | |
| 302 static int s_write_uint16(extract_zip_t *zip, uint16_t value) | |
| 303 { | |
| 304 if (s_native_little_endinesss()) { | |
| 305 return s_write(zip, &value, sizeof(value)); | |
| 306 } | |
| 307 else { | |
| 308 unsigned char value2[2] = { | |
| 309 (unsigned char) (value >> 0), | |
| 310 (unsigned char) (value >> 8) | |
| 311 }; | |
| 312 return s_write(zip, &value2, sizeof(value2)); | |
| 313 } | |
| 314 } | |
| 315 | |
| 316 static int s_write_string(extract_zip_t *zip, const char *text) | |
| 317 { | |
| 318 return s_write(zip, text, strlen(text)); | |
| 319 } | |
| 320 | |
| 321 | |
| 322 int extract_zip_write_file( | |
| 323 extract_zip_t *zip, | |
| 324 const void *data, | |
| 325 size_t data_length, | |
| 326 const char *name) | |
| 327 { | |
| 328 int e = -1; | |
| 329 extract_zip_cd_file_t *cd_file = NULL; | |
| 330 extract_alloc_t *alloc = extract_buffer_alloc(zip->buffer); | |
| 331 | |
| 332 if (data_length > INT_MAX) { | |
| 333 assert(0); | |
| 334 errno = EINVAL; | |
| 335 return -1; | |
| 336 } | |
| 337 /* Create central directory file header for later. */ | |
| 338 if (extract_realloc2( | |
| 339 alloc, | |
| 340 &zip->cd_files, | |
| 341 sizeof(extract_zip_cd_file_t) * zip->cd_files_num, | |
| 342 sizeof(extract_zip_cd_file_t) * (zip->cd_files_num+1) | |
| 343 )) goto end; | |
| 344 cd_file = &zip->cd_files[zip->cd_files_num]; | |
| 345 cd_file->name = NULL; | |
| 346 | |
| 347 cd_file->mtime = zip->mtime; | |
| 348 cd_file->mdate = zip->mdate; | |
| 349 cd_file->crc_sum = (int32_t) crc32(crc32(0, NULL, 0), data, (int) data_length); | |
| 350 cd_file->size_uncompressed = (int) data_length; | |
| 351 if (zip->compression_method == 0) | |
| 352 { | |
| 353 cd_file->size_compressed = cd_file->size_uncompressed; | |
| 354 } | |
| 355 if (extract_strdup(alloc, name, &cd_file->name)) goto end; | |
| 356 cd_file->offset = (int) extract_buffer_pos(zip->buffer); | |
| 357 cd_file->attr_internal = zip->file_attr_internal; | |
| 358 cd_file->attr_external = zip->file_attr_external; | |
| 359 if (!cd_file->name) goto end; | |
| 360 | |
| 361 /* Write local file header. If we are using compression, we set bit 3 of | |
| 362 General purpose bit flag and write zeros for crc-32, compressed size and | |
| 363 uncompressed size; then we write the actual values in data descriptor after | |
| 364 the compressed data. */ | |
| 365 { | |
| 366 const char extra_local[] = ""; /* Modify for testing. */ | |
| 367 uint16_t general_purpose_bit_flag = zip->general_purpose_bit_flag; | |
| 368 if (zip->compression_method) general_purpose_bit_flag |= 8; | |
| 369 s_write_uint32(zip, 0x04034b50); | |
| 370 s_write_uint16(zip, zip->version_extract); /* Version needed to extract (minimum). */ | |
| 371 s_write_uint16(zip, general_purpose_bit_flag); /* General purpose bit flag */ | |
| 372 s_write_uint16(zip, zip->compression_method); /* Compression method */ | |
| 373 s_write_uint16(zip, cd_file->mtime); /* File last modification time */ | |
| 374 s_write_uint16(zip, cd_file->mdate); /* File last modification date */ | |
| 375 if (zip->compression_method) | |
| 376 { | |
| 377 s_write_uint32(zip, 0); /* CRC-32 of uncompressed data */ | |
| 378 s_write_uint32(zip, 0); /* Compressed size */ | |
| 379 s_write_uint32(zip, 0); /* Uncompressed size */ | |
| 380 } | |
| 381 else | |
| 382 { | |
| 383 s_write_uint32(zip, cd_file->crc_sum); /* CRC-32 of uncompressed data */ | |
| 384 s_write_uint32(zip, cd_file->size_compressed); /* Compressed size */ | |
| 385 s_write_uint32(zip, cd_file->size_uncompressed);/* Uncompressed size */ | |
| 386 } | |
| 387 s_write_uint16(zip, (uint16_t) strlen(name)); /* File name length (n) */ | |
| 388 s_write_uint16(zip, sizeof(extra_local)-1); /* Extra field length (m) */ | |
| 389 s_write_string(zip, cd_file->name); /* File name */ | |
| 390 s_write(zip, extra_local, sizeof(extra_local)-1); /* Extra field */ | |
| 391 } | |
| 392 | |
| 393 if (zip->compression_method) | |
| 394 { | |
| 395 /* Write compressed data. */ | |
| 396 size_t data_length_compressed; | |
| 397 s_write_compressed(zip, data, data_length, &data_length_compressed); | |
| 398 cd_file->size_compressed = (int) data_length_compressed; | |
| 399 | |
| 400 /* Write data descriptor. */ | |
| 401 s_write_uint32(zip, 0x08074b50); /* Data descriptor signature */ | |
| 402 s_write_uint32(zip, cd_file->crc_sum); /* CRC-32 of uncompressed data */ | |
| 403 s_write_uint32(zip, cd_file->size_compressed); /* Compressed size */ | |
| 404 s_write_uint32(zip, cd_file->size_uncompressed); /* Uncompressed size */ | |
| 405 } | |
| 406 else | |
| 407 { | |
| 408 s_write(zip, data, data_length); | |
| 409 } | |
| 410 | |
| 411 if (zip->errno_) e = -1; | |
| 412 else if (zip->eof) e = +1; | |
| 413 else e = 0; | |
| 414 | |
| 415 | |
| 416 end: | |
| 417 | |
| 418 if (e) { | |
| 419 /* Leave zip->cd_files_num unchanged, so calling extract_zip_close() | |
| 420 will write out any earlier files. Free cd_file->name to avoid leak. */ | |
| 421 if (cd_file) extract_free(alloc, &cd_file->name); | |
| 422 } | |
| 423 else { | |
| 424 /* cd_files[zip->cd_files_num] is valid. */ | |
| 425 zip->cd_files_num += 1; | |
| 426 } | |
| 427 | |
| 428 return e; | |
| 429 } | |
| 430 | |
| 431 int extract_zip_close(extract_zip_t **pzip) | |
| 432 { | |
| 433 int e = -1; | |
| 434 size_t pos; | |
| 435 size_t len; | |
| 436 int i; | |
| 437 extract_zip_t *zip = *pzip; | |
| 438 extract_alloc_t *alloc; | |
| 439 | |
| 440 if (!zip) { | |
| 441 return 0; | |
| 442 } | |
| 443 alloc = extract_buffer_alloc(zip->buffer); | |
| 444 pos = extract_buffer_pos(zip->buffer); | |
| 445 len = 0; | |
| 446 | |
| 447 /* Write Central directory file headers, freeing data as we go. */ | |
| 448 for (i=0; i<zip->cd_files_num; ++i) { | |
| 449 const char extra[] = ""; | |
| 450 size_t pos2 = extract_buffer_pos(zip->buffer); | |
| 451 extract_zip_cd_file_t* cd_file = &zip->cd_files[i]; | |
| 452 s_write_uint32(zip, 0x02014b50); | |
| 453 s_write_uint16(zip, zip->version_creator); /* Version made by, copied from command-line zip. */ | |
| 454 s_write_uint16(zip, zip->version_extract); /* Version needed to extract (minimum). */ | |
| 455 s_write_uint16(zip, zip->general_purpose_bit_flag); /* General purpose bit flag */ | |
| 456 s_write_uint16(zip, zip->compression_method); /* Compression method */ | |
| 457 s_write_uint16(zip, cd_file->mtime); /* File last modification time */ | |
| 458 s_write_uint16(zip, cd_file->mdate); /* File last modification date */ | |
| 459 s_write_uint32(zip, cd_file->crc_sum); /* CRC-32 of uncompressed data */ | |
| 460 s_write_uint32(zip, cd_file->size_compressed); /* Compressed size */ | |
| 461 s_write_uint32(zip, cd_file->size_uncompressed); /* Uncompressed size */ | |
| 462 s_write_uint16(zip, (uint16_t) strlen(cd_file->name)); /* File name length (n) */ | |
| 463 s_write_uint16(zip, sizeof(extra)-1); /* Extra field length (m) */ | |
| 464 s_write_uint16(zip, 0); /* File comment length (k) */ | |
| 465 s_write_uint16(zip, 0); /* Disk number where file starts */ | |
| 466 s_write_uint16(zip, cd_file->attr_internal); /* Internal file attributes */ | |
| 467 s_write_uint32(zip, cd_file->attr_external); /* External file attributes. */ | |
| 468 s_write_uint32(zip, cd_file->offset); /* Offset of local file header. */ | |
| 469 s_write_string(zip, cd_file->name); /* File name */ | |
| 470 s_write(zip, extra, sizeof(extra)-1); /* Extra field */ | |
| 471 len += extract_buffer_pos(zip->buffer) - pos2; | |
| 472 extract_free(alloc, &cd_file->name); | |
| 473 } | |
| 474 extract_free(alloc, &zip->cd_files); | |
| 475 | |
| 476 /* Write End of central directory record. */ | |
| 477 s_write_uint32(zip, 0x06054b50); | |
| 478 s_write_uint16(zip, 0); /* Number of this disk */ | |
| 479 s_write_uint16(zip, 0); /* Disk where central directory starts */ | |
| 480 s_write_uint16(zip, (uint16_t) zip->cd_files_num); /* Number of central directory records on this disk */ | |
| 481 s_write_uint16(zip, (uint16_t) zip->cd_files_num); /* Total number of central directory records */ | |
| 482 s_write_uint32(zip, (int) len); /* Size of central directory (bytes) */ | |
| 483 s_write_uint32(zip, (int) pos); /* Offset of start of central directory, relative to start of archive */ | |
| 484 | |
| 485 s_write_uint16(zip, (uint16_t) strlen(zip->archive_comment)); /* Comment length (n) */ | |
| 486 s_write_string(zip, zip->archive_comment); | |
| 487 extract_free(alloc, &zip->archive_comment); | |
| 488 | |
| 489 if (zip->errno_) e = -1; | |
| 490 else if (zip->eof) e = +1; | |
| 491 else e = 0; | |
| 492 | |
| 493 extract_free(alloc, pzip); | |
| 494 | |
| 495 return e; | |
| 496 } |
