comparison mupdf-source/source/fitz/uncfb.c @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 // Copyright (C) 2023-2025 Artifex Software, Inc.
2 //
3 // This file is part of MuPDF.
4 //
5 // MuPDF is free software: you can redistribute it and/or modify it under the
6 // terms of the GNU Affero General Public License as published by the Free
7 // Software Foundation, either version 3 of the License, or (at your option)
8 // any later version.
9 //
10 // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
11 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
13 // details.
14 //
15 // You should have received a copy of the GNU Affero General Public License
16 // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
17 //
18 // Alternative licensing terms are available from the licensor.
19 // For commercial licensing, see <https://www.artifex.com/> or contact
20 // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
21 // CA 94129, USA, for further information.
22
23 #include "mupdf/fitz.h"
24
25 #include <string.h>
26 #include <limits.h>
27
28 #define MAXREGSID 0xfffffffa
29 #define NOSTREAM 0xffffffff
30 #define MAXREGSECT 0xfffffffa
31 #define DIRSECT 0xfffffffc
32 #define FATSECT 0xfffffffd
33 #define ENDOFCHAIN 0xfffffffe
34 #define FREESECT 0xffffffff
35
36 #undef DEBUG_DIRENTRIES
37
38 typedef struct
39 {
40 char *name;
41 uint32_t sector;
42 uint64_t size;
43 uint32_t l, r, d;
44 /* Flag word used for various different things.
45 * initially the type, then marked as to whether the DFS reached it
46 * then finally the original node number for debug. */
47 uint32_t t;
48 } cfb_entry;
49
50 typedef struct
51 {
52 fz_archive super;
53
54 int max;
55 int count;
56 cfb_entry *entries;
57
58 /* Header information from the file */
59 uint16_t major;
60 uint16_t sector_shift;
61 uint32_t num_dir_sectors;
62 uint32_t num_fat_sectors;
63 uint32_t dir_sector0;
64 uint32_t mini_fat_sector0;
65 uint32_t num_mini_fat_sectors;
66 uint32_t difat_sector0;
67 uint32_t num_difat_sectors;
68 uint32_t mini_stream_sector0;
69 uint64_t mini_stream_len;
70 uint32_t difat[109];
71
72 uint32_t fatcache_sector;
73 uint8_t fatcache[4096];
74
75 uint32_t minifatcache_real_sector;
76 uint32_t minifatcache_sector;
77 uint8_t minifatcache[4096];
78
79 } fz_cfb_archive;
80
81 static void
82 read(fz_context *ctx, fz_stream *stm, uint8_t *buf, size_t size)
83 {
84 size_t n = fz_read(ctx, stm, buf, size);
85
86 if (n != size)
87 fz_throw(ctx, FZ_ERROR_FORMAT, "Short read in CFB handling");
88 }
89
90 static uint16_t
91 get16(const uint8_t *b)
92 {
93 return b[0] + (b[1]<<8);
94 }
95
96 static uint32_t
97 get32(const uint8_t *b)
98 {
99 return b[0] + (b[1]<<8) + (b[2]<<16) + (b[3]<<24);
100 }
101
102 static uint64_t
103 get64(const uint8_t *b)
104 {
105 return b[0] +
106 (((uint64_t)b[1])<<8) +
107 (((uint64_t)b[2])<<16) +
108 (((uint64_t)b[3])<<24) +
109 (((uint64_t)b[4])<<32) +
110 (((uint64_t)b[5])<<40) +
111 (((uint64_t)b[6])<<48) +
112 (((uint64_t)b[7])<<56);
113 }
114
115 static uint64_t
116 get_len(fz_context *ctx, fz_cfb_archive *cfb, const uint8_t *b)
117 {
118 uint64_t len = get64(b);
119
120 /* In v3 files the top 32bits *should* be zero, but may not be. The
121 * top bit of the lower 32bits should not be set though. */
122 if (cfb->major == 3)
123 {
124 if (len & 0x80000000)
125 fz_throw(ctx, FZ_ERROR_FORMAT, "Illegal length in CFB");
126 len &= 0xFFFFFFFFU;
127 }
128 return len;
129 }
130
131 static void
132 sector_seek(fz_context *ctx, fz_cfb_archive *cfb, uint32_t sector, uint32_t offset)
133 {
134 fz_seek(ctx, cfb->super.file, ((sector + (uint64_t)1)<<cfb->sector_shift)+offset, SEEK_SET);
135 }
136
137 static uint32_t
138 read_difat(fz_context *ctx, fz_cfb_archive *cfb, uint32_t sector)
139 {
140 uint32_t entries_per_sector;
141 uint32_t sect;
142
143 if (sector < 109)
144 {
145 return cfb->difat[sector];
146 }
147 sector -= 109;
148
149 /* Run down the difat chain until we find the right sector. */
150 entries_per_sector = (1<<(cfb->sector_shift-2)) - 1;
151 sect = cfb->difat_sector0;
152 while (sector > entries_per_sector)
153 {
154 sector_seek(ctx, cfb, sect, entries_per_sector * 4);
155 sect = fz_read_uint32_le(ctx, cfb->super.file);
156 sector -= entries_per_sector;
157 }
158
159 /* Now get the actual entry. */
160 sector_seek(ctx, cfb, sect, sector * 4);
161
162 return fz_read_uint32_le(ctx, cfb->super.file);
163 }
164
165 static uint32_t
166 read_fat(fz_context *ctx, fz_cfb_archive *cfb, uint32_t sector)
167 {
168 uint32_t sector_size = 1<<cfb->sector_shift;
169 /* We want to read the entry for sector 'sector' from the FAT. This
170 * will be in FAT sector 'fatsect'. */
171 uint32_t fatsect = sector>>(cfb->sector_shift-2);
172 /* FAT sector fatsect will be physical sector real_sect. */
173 uint32_t real_sect = read_difat(ctx, cfb, fatsect);
174
175 if (real_sect > MAXREGSECT)
176 fz_throw(ctx, FZ_ERROR_FORMAT, "Corrupt FAT");
177
178 if (real_sect != cfb->fatcache_sector)
179 {
180 sector_seek(ctx, cfb, real_sect, 0);
181 read(ctx, cfb->super.file, &cfb->fatcache[0], sector_size);
182 cfb->fatcache_sector = real_sect;
183 }
184
185 sector &= (sector_size>>2)-1;
186
187 return get32(&cfb->fatcache[sector*4]);
188 }
189
190 static uint32_t
191 read_mini_fat(fz_context *ctx, fz_cfb_archive *cfb, uint32_t sector)
192 {
193 uint32_t sector_size = 1<<cfb->sector_shift;
194 /* A mini fat sector has lots of mini sector numbers in (each 4 bytes) */
195 uint32_t mini_sectors_in_mini_fat_sector = (1<<(cfb->sector_shift-2));
196 /* We want to read the entry for sector 'sector' from the mini FAT. This
197 * will be in mini FAT sector 'minifatsect'. */
198 uint32_t minifatsect = sector / mini_sectors_in_mini_fat_sector;
199 uint32_t index_within_minifatsect = sector - minifatsect * mini_sectors_in_mini_fat_sector;
200 int cache_valid = 1;
201
202 /* minifatsect is a count of how many sectors we are into the mini fat stream.
203 * minifatsect_real_sector is the physical section that that corresponds to. */
204
205 /* If we're behind our cache position, start from scratch. */
206 if (minifatsect < cfb->minifatcache_sector)
207 {
208 cfb->minifatcache_real_sector = cfb->mini_fat_sector0;
209 cfb->minifatcache_sector = 0;
210 cache_valid = 0;
211 }
212
213 /* Skip forward until we are at the right position. */
214 while (minifatsect != cfb->minifatcache_sector)
215 {
216 cfb->minifatcache_real_sector = read_fat(ctx, cfb, cfb->minifatcache_real_sector);
217 cfb->minifatcache_sector++;
218 cache_valid = 0;
219 }
220
221 /* Prime the cache if we just moved */
222 if (!cache_valid)
223 {
224 sector_seek(ctx, cfb, cfb->minifatcache_real_sector, 0);
225 read(ctx, cfb->super.file, cfb->minifatcache, sector_size);
226 }
227
228 return get32(&cfb->minifatcache[index_within_minifatsect*4]);
229 }
230
231 static void drop_cfb_archive(fz_context *ctx, fz_archive *arch)
232 {
233 fz_cfb_archive *cfb = (fz_cfb_archive *) arch;
234 int i;
235 for (i = 0; i < cfb->count; ++i)
236 fz_free(ctx, cfb->entries[i].name);
237 fz_free(ctx, cfb->entries);
238 }
239
240 static cfb_entry *lookup_cfb_entry(fz_context *ctx, fz_cfb_archive *cfb, const char *name)
241 {
242 int i;
243 for (i = 0; i < cfb->count; i++)
244 if (!fz_strcasecmp(name, cfb->entries[i].name))
245 return &cfb->entries[i];
246 return NULL;
247 }
248
249 typedef struct
250 {
251 fz_cfb_archive *archive;
252 uint32_t first_sector;
253 uint32_t next_sector;
254 uint32_t next_sector_slow;
255 uint32_t next_sector_slow_flag;
256 uint64_t pos_at_next_sector;
257 uint64_t size;
258 fz_stream *mini_stream;
259 uint8_t buffer[4096];
260 } cfb_state;
261
262 static void
263 cfb_close(fz_context *ctx, void *state_)
264 {
265 cfb_state *state = (cfb_state *)state_;
266
267 fz_drop_archive(ctx, &state->archive->super);
268 fz_drop_stream(ctx, state->mini_stream);
269 fz_free(ctx, state);
270 }
271
272 static int
273 cfb_next(fz_context *ctx, fz_stream *stm, size_t required)
274 {
275 cfb_state *state = stm->state;
276 fz_cfb_archive *cfb = state->archive;
277 uint64_t sector_size = ((uint64_t)1)<<cfb->sector_shift;
278 uint64_t desired_sector_pos;
279 uint32_t pos_in_sector;
280 uint32_t this_sector;
281
282 if ((uint64_t)stm->pos >= state->size)
283 stm->eof = 1;
284
285 if (stm->eof)
286 {
287 stm->rp = stm->wp = state->buffer;
288 return EOF;
289 }
290
291 pos_in_sector = stm->pos & (sector_size-1);
292 desired_sector_pos = stm->pos & ~(sector_size-1);
293 if (desired_sector_pos != state->pos_at_next_sector)
294 {
295 state->pos_at_next_sector = 0;
296 state->next_sector = state->first_sector;
297 state->next_sector_slow = state->first_sector;
298 state->next_sector_slow_flag = 0;
299 }
300
301 this_sector = state->next_sector;
302 while (desired_sector_pos >= state->pos_at_next_sector)
303 {
304 this_sector = state->next_sector;
305 state->next_sector = read_fat(ctx, cfb, state->next_sector);
306 state->pos_at_next_sector += sector_size;
307 if (state->next_sector > MAXREGSECT)
308 break;
309
310 state->next_sector_slow_flag = !state->next_sector_slow_flag;
311 if (state->next_sector_slow_flag == 0)
312 state->next_sector_slow = read_fat(ctx, cfb, state->next_sector_slow);
313 if (state->next_sector_slow == state->next_sector)
314 fz_throw(ctx, FZ_ERROR_FORMAT, "Loop in FAT chain");
315 }
316 if (state->next_sector > MAXREGSECT && state->next_sector != ENDOFCHAIN)
317 fz_throw(ctx, FZ_ERROR_FORMAT, "Unexpected entry in FAT chain");
318
319 if (this_sector > MAXREGSECT)
320 fz_throw(ctx, FZ_ERROR_FORMAT, "Unexpected end of FAT chain");
321 sector_seek(ctx, cfb, this_sector, 0);
322 read(ctx, cfb->super.file, state->buffer, sector_size);
323 stm->rp = state->buffer;
324 stm->wp = stm->rp + sector_size;
325 stm->pos = state->pos_at_next_sector;
326 if ((uint64_t)stm->pos >= state->size)
327 {
328 stm->wp -= (stm->pos - state->size);
329 stm->pos = state->size;
330 }
331 stm->rp += pos_in_sector;
332
333 return *stm->rp++;
334 }
335
336 #define MINI_SECTOR_SHIFT 6
337 #define MINI_SECTOR_SIZE (1<<MINI_SECTOR_SHIFT)
338
339 static int
340 cfb_next_mini(fz_context *ctx, fz_stream *stm, size_t required)
341 {
342 cfb_state *state = stm->state;
343 fz_cfb_archive *cfb = state->archive;
344 uint64_t desired_sector_pos;
345 uint32_t pos_in_sector;
346 uint32_t this_sector;
347
348 if ((uint64_t)stm->pos >= state->size)
349 stm->eof = 1;
350
351 if (stm->eof)
352 {
353 stm->rp = stm->wp = state->buffer;
354 return EOF;
355 }
356
357 /* Whenever we say 'sector' here, we mean 'mini sector'. */
358 pos_in_sector = stm->pos & (MINI_SECTOR_SIZE-1);
359 desired_sector_pos = stm->pos & ~(MINI_SECTOR_SIZE-1);
360 if (desired_sector_pos != state->pos_at_next_sector)
361 {
362 state->pos_at_next_sector = 0;
363 state->next_sector = state->first_sector;
364 state->next_sector_slow = state->first_sector;
365 state->next_sector_slow_flag = 0;
366 }
367
368 this_sector = state->next_sector;
369 while (desired_sector_pos >= state->pos_at_next_sector)
370 {
371 this_sector = state->next_sector;
372 state->next_sector = read_mini_fat(ctx, cfb, state->next_sector);
373 state->pos_at_next_sector += MINI_SECTOR_SIZE;
374 if (state->next_sector > MAXREGSECT)
375 break;
376
377 state->next_sector_slow_flag = !state->next_sector_slow_flag;
378 if (state->next_sector_slow_flag == 0)
379 state->next_sector_slow = read_mini_fat(ctx, cfb, state->next_sector_slow);
380 if (state->next_sector_slow == state->next_sector)
381 fz_throw(ctx, FZ_ERROR_FORMAT, "Loop in FAT chain");
382 }
383 if (state->next_sector > MAXREGSECT && state->next_sector != ENDOFCHAIN)
384 fz_throw(ctx, FZ_ERROR_FORMAT, "Unexpected entry in FAT chain");
385
386 if (this_sector > MAXREGSECT)
387 fz_throw(ctx, FZ_ERROR_FORMAT, "Unexpected end of FAT chain");
388
389 fz_seek(ctx, state->mini_stream, ((uint64_t)this_sector) * MINI_SECTOR_SIZE, SEEK_SET);
390 read(ctx, state->mini_stream, state->buffer, MINI_SECTOR_SIZE);
391 stm->rp = state->buffer;
392 stm->wp = stm->rp + MINI_SECTOR_SIZE;
393 stm->pos += MINI_SECTOR_SIZE;
394 if ((uint64_t)stm->pos >= state->size)
395 {
396 stm->wp -= (stm->pos - state->size);
397 stm->pos = state->size;
398 }
399 stm->rp += pos_in_sector;
400
401 return *stm->rp++;
402 }
403
404 static void cfb_seek(fz_context *ctx, fz_stream *stm, int64_t offset, int whence)
405 {
406 cfb_state *state = stm->state;
407 int64_t pos = stm->pos - (stm->wp - stm->rp);
408 /* Convert to absolute pos */
409 if (whence == 1)
410 {
411 offset += pos; /* Was relative to current pos */
412 }
413 else if (whence == 2)
414 {
415 offset += stm->pos; /* Was relative to end */
416 }
417
418 if (offset < 0)
419 offset = 0;
420 if ((uint64_t)offset > state->size)
421 offset = (int64_t)state->size;
422 stm->pos = offset;
423 stm->rp = stm->wp = state->buffer;
424 }
425
426 static fz_stream *sector_stream(fz_context *ctx, fz_cfb_archive *cfb, uint32_t sector, uint64_t size)
427 {
428 fz_stream *stm;
429 cfb_state *state = fz_malloc_struct(ctx, cfb_state);
430
431 state->archive = (fz_cfb_archive *)fz_keep_archive(ctx, &cfb->super);
432 state->pos_at_next_sector = 0;
433 state->size = size;
434 state->first_sector = sector;
435 state->next_sector = state->first_sector;
436 state->next_sector_slow = state->first_sector;
437 state->next_sector_slow_flag = 0;
438
439 stm = fz_new_stream(ctx, state, cfb_next, cfb_close);
440 stm->seek = cfb_seek;
441
442 return stm;
443 }
444
445 static fz_stream *open_cfb_entry(fz_context *ctx, fz_archive *arch, const char *name)
446 {
447 fz_cfb_archive *cfb = (fz_cfb_archive *) arch;
448 cfb_entry *ent;
449 fz_stream *stm;
450 cfb_state *state;
451
452 ent = lookup_cfb_entry(ctx, cfb, name);
453 if (!ent)
454 return NULL;
455
456 if (ent->size >= 0x1000)
457 {
458 /* Working from entire sectors */
459 return sector_stream(ctx, cfb, ent->sector, ent->size);
460 }
461
462 /* We're working from the mini stream. */
463 state = fz_malloc_struct(ctx, cfb_state);
464
465 fz_try(ctx)
466 {
467 /* Let's get a stream that gets us the mini stream, and then work from that. */
468 state->mini_stream = sector_stream(ctx, cfb, cfb->mini_stream_sector0, cfb->mini_stream_len);
469 state->first_sector = ent->sector;
470 state->pos_at_next_sector = 0;
471 state->size = ent->size;
472 state->next_sector = state->first_sector;
473 state->next_sector_slow = state->first_sector;
474 state->next_sector_slow_flag = 0;
475 state->archive = (fz_cfb_archive *)fz_keep_archive(ctx, &cfb->super);
476
477 }
478 fz_catch(ctx)
479 {
480 fz_free(ctx, state);
481 fz_rethrow(ctx);
482 }
483
484 stm = fz_new_stream(ctx, state, cfb_next_mini, cfb_close);
485 stm->seek = cfb_seek;
486 return stm;
487 }
488
489 static fz_buffer *read_cfb_entry(fz_context *ctx, fz_archive *arch, const char *name)
490 {
491 fz_stream *stm;
492 fz_buffer *buf = NULL;
493
494 stm = open_cfb_entry(ctx, arch, name);
495 if (!stm)
496 return NULL;
497
498 fz_try(ctx)
499 buf = fz_read_all(ctx, stm, 1024);
500 fz_always(ctx)
501 fz_drop_stream(ctx, stm);
502 fz_catch(ctx)
503 fz_rethrow(ctx);
504
505 return buf;
506 }
507
508 static int has_cfb_entry(fz_context *ctx, fz_archive *arch, const char *name)
509 {
510 fz_cfb_archive *cfb = (fz_cfb_archive *) arch;
511 cfb_entry *ent = lookup_cfb_entry(ctx, cfb, name);
512 return ent != NULL;
513 }
514
515 static const char *list_cfb_entry(fz_context *ctx, fz_archive *arch, int idx)
516 {
517 fz_cfb_archive *cfb = (fz_cfb_archive *) arch;
518 if (idx < 0 || idx >= cfb->count)
519 return NULL;
520 return cfb->entries[idx].name;
521 }
522
523 static int count_cfb_entries(fz_context *ctx, fz_archive *arch)
524 {
525 fz_cfb_archive *cfb = (fz_cfb_archive *) arch;
526 return cfb->count;
527 }
528
529 static const uint8_t sig[8] = { 0xd0, 0xcf, 0x11, 0xe0, 0xa1, 0xb1, 0x1a, 0xe1 };
530 static const uint8_t zeros[16] = { 0 };
531
532 int
533 fz_is_cfb_archive(fz_context *ctx, fz_stream *file)
534 {
535 uint8_t data[nelem(sig)];
536 size_t n;
537
538 fz_seek(ctx, file, 0, SEEK_SET);
539 n = fz_read(ctx, file, data, nelem(data));
540 if (n != nelem(data))
541 return 0;
542 if (!memcmp(data, sig, nelem(sig)))
543 return 1;
544
545 return 0;
546 }
547
548 static void
549 expect(fz_context *ctx, fz_stream *file, const uint8_t *pattern, size_t n, const char *msg)
550 {
551 uint8_t buffer[64];
552
553 assert(sizeof(buffer) >= n);
554 read(ctx, file, buffer, n);
555
556 if (memcmp(buffer, pattern, n) != 0)
557 fz_throw(ctx, FZ_ERROR_FORMAT, "%s in CFB", msg);
558 }
559
560 static void
561 expect16(fz_context *ctx, fz_stream *file, uint16_t v, const char *msg)
562 {
563 uint16_t u;
564
565 u = fz_read_uint16_le(ctx, file);
566 if (u != v)
567 fz_throw(ctx, FZ_ERROR_FORMAT, "%s in CFB: 0x%04x != 0x%04x", msg, u, v);
568 }
569
570 static void
571 expect32(fz_context *ctx, fz_stream *file, uint32_t v, const char *msg)
572 {
573 uint32_t u;
574
575 u = fz_read_uint32_le(ctx, file);
576 if (u != v)
577 fz_throw(ctx, FZ_ERROR_FORMAT, "%s in CFB: 0x%08x != 0x%08x", msg, u, v);
578 }
579
580 #define REACHED 0xFFFFFFFF
581 #define REACHED_KEEP 0xFFFFFFFE
582
583 static void
584 make_absolute(fz_context *ctx, fz_cfb_archive *cfb, char *prefix, int node, int depth)
585 {
586 uint32_t type;
587
588 /* To avoid recursion where possible. */
589 while (1)
590 {
591 if (node == (int)NOSTREAM)
592 return;
593
594 if (node < 0 || node >= cfb->count)
595 fz_throw(ctx, FZ_ERROR_FORMAT, "Invalid tree");
596
597 if (depth >= 32)
598 fz_throw(ctx, FZ_ERROR_FORMAT, "CBF Tree too deep");
599
600 type = cfb->entries[node].t;
601 if (type == REACHED || type == REACHED_KEEP)
602 fz_throw(ctx, FZ_ERROR_FORMAT, "CBF Tree has cycles");
603 cfb->entries[node].t = (type == 2) ? REACHED_KEEP : REACHED;
604
605 if (prefix)
606 {
607 size_t z0 = strlen(prefix);
608 size_t z1 = strlen(cfb->entries[node].name);
609 char *newname = fz_malloc(ctx, z0+z1+2);
610 memcpy(newname, prefix, z0);
611 newname[z0] = '/';
612 memcpy(newname+z0+1, cfb->entries[node].name, z1+1);
613 fz_free(ctx, cfb->entries[node].name);
614 cfb->entries[node].name = newname;
615 }
616
617 if (cfb->entries[node].d == NOSTREAM && cfb->entries[node].r == NOSTREAM)
618 {
619 /* Handle 'l' without recursion, because there is no 'r' or 'd'. */
620 node = cfb->entries[node].l;
621 continue;
622 }
623 make_absolute(ctx, cfb, prefix, cfb->entries[node].l, depth+1);
624 if (cfb->entries[node].d == NOSTREAM)
625 {
626 /* Handle 'r' without recursion, because there is no 'd'. */
627 node = cfb->entries[node].r;
628 continue;
629 }
630 make_absolute(ctx, cfb, prefix, cfb->entries[node].r, depth+1);
631
632 /* Rather than recursing:
633 * make_absolute(ctx, cfb, node == 0 ? NULL : cfb->entries[node].name, cfb->entries[node].d, depth+1);
634 * instead just loop. */
635 prefix = node == 0 ? NULL : cfb->entries[node].name;
636 node = cfb->entries[node].d;
637 }
638
639 }
640
641 static void
642 absolutise_names(fz_context *ctx, fz_cfb_archive *cfb)
643 {
644 make_absolute(ctx, cfb, NULL, 0, 0);
645 }
646
647 static void
648 strip_unused_names(fz_context *ctx, fz_cfb_archive *cfb)
649 {
650 int i, j;
651 int n = cfb->count;
652
653 /* Init i and j so that we always delete the root node. */
654 fz_free(ctx, cfb->entries[0].name);
655 for (i = 1, j = 0; i < n; i++)
656 {
657 if (cfb->entries[i].t == REACHED_KEEP)
658 {
659 if (i != j)
660 cfb->entries[j] = cfb->entries[i];
661 cfb->entries[j].t = i;
662 j++;
663 }
664 else
665 fz_free(ctx, cfb->entries[i].name);
666 }
667 cfb->count = j;
668 }
669
670 fz_archive *
671 fz_open_cfb_archive_with_stream(fz_context *ctx, fz_stream *file)
672 {
673 fz_cfb_archive *cfb;
674 uint8_t buffer[4096];
675 uint32_t sector, slow_sector, slow_sector_flag;
676 int i;
677
678 if (!fz_is_cfb_archive(ctx, file))
679 fz_throw(ctx, FZ_ERROR_FORMAT, "cannot recognize cfb archive");
680
681 cfb = fz_new_derived_archive(ctx, file, fz_cfb_archive);
682 cfb->super.format = "cfb";
683 cfb->super.count_entries = count_cfb_entries;
684 cfb->super.list_entry = list_cfb_entry;
685 cfb->super.has_entry = has_cfb_entry;
686 cfb->super.read_entry = read_cfb_entry;
687 cfb->super.open_entry = open_cfb_entry;
688 cfb->super.drop_archive = drop_cfb_archive;
689
690 fz_try(ctx)
691 {
692 fz_seek(ctx, file, 0, SEEK_SET);
693 /* Read the header */
694 expect(ctx, file, sig, 8, "Bad signature");
695 expect(ctx, file, zeros, 16, "Bad CLSID");
696 /* The minor version is SUPPOSED to be 0x3e, but we don't seem to be
697 * able to rely on this. So just skip it. */
698 (void)fz_read_uint16_le(ctx, file);
699 cfb->major = fz_read_uint16_le(ctx, file);
700 if (cfb->major != 3 && cfb->major != 4)
701 fz_throw(ctx, FZ_ERROR_FORMAT, "Bad major version of CFB: %d", cfb->major);
702 expect16(ctx, file, 0xfffe, "Bad byte order");
703 cfb->sector_shift = fz_read_uint16_le(ctx, file);
704 if ((cfb->major == 3 && cfb->sector_shift != 9) ||
705 (cfb->major == 4 && cfb->sector_shift != 12))
706 fz_throw(ctx, FZ_ERROR_FORMAT, "Bad sector shift: %d", cfb->sector_shift);
707 expect16(ctx, file, 6, "Bad mini section shift");
708 expect(ctx, file, zeros, 6, "Bad padding");
709 cfb->num_dir_sectors = fz_read_uint32_le(ctx, file);
710 cfb->num_fat_sectors = fz_read_uint32_le(ctx, file);
711 cfb->dir_sector0 = fz_read_uint32_le(ctx, file);
712 (void)fz_read_uint32_le(ctx, file); /* Transaction signature number */
713 expect32(ctx, file, 0x1000, "Bad mini stream cutoff size");
714 cfb->mini_fat_sector0 = fz_read_uint32_le(ctx, file);
715 cfb->num_mini_fat_sectors = fz_read_uint32_le(ctx, file);
716 cfb->difat_sector0 = fz_read_uint32_le(ctx, file);
717 cfb->num_difat_sectors = fz_read_uint32_le(ctx, file);
718 for (i = 0; i < 109; i++)
719 cfb->difat[i] = fz_read_uint32_le(ctx, file);
720 cfb->fatcache_sector = (uint32_t)-1;
721 cfb->minifatcache_sector = (uint32_t)-1;
722
723 /* Read the directory entries. */
724 /* On our first pass through, EVERYTHING goes into the entries. */
725 sector = cfb->dir_sector0;
726 slow_sector = sector;
727 slow_sector_flag = 0;
728 do
729 {
730 size_t z = ((size_t)1)<<cfb->sector_shift;
731 size_t off;
732
733 /* Fetch the sector. */
734 fz_seek(ctx, file, ((int64_t)sector+1)<<cfb->sector_shift, SEEK_SET);
735 read(ctx, file, buffer, z);
736
737 for (off = 0; off < z; off += 128)
738 {
739 int count = 0;
740 int type;
741 int namelen = get16(buffer+off+64);
742
743 if (namelen == 0)
744 break;
745
746 /* What flavour of object is this? */
747 type = buffer[off+64+2];
748
749 /* Ensure our entries list is long enough. */
750 if (cfb->max == cfb->count)
751 {
752 int newmax = cfb->max * 2;
753 if (newmax == 0)
754 newmax = 32;
755 cfb->entries = fz_realloc_array(ctx, cfb->entries, newmax, cfb_entry);
756 cfb->max = newmax;
757 }
758
759 /* Count the name length in utf8 encoded bytes, including terminator. */
760 for (i = 0; i < 64; i += 2)
761 {
762 int ucs = get16(buffer+off+i);
763 if (ucs == 0)
764 break;
765 count += fz_runelen(ucs);
766 }
767 if (i+2 != namelen || i == 64)
768 fz_throw(ctx, FZ_ERROR_FORMAT, "Malformed name in CFB directory");
769
770 /* Copy the name. */
771 cfb->entries[cfb->count++].name = fz_malloc(ctx, count + 1);
772 count = 0;
773 for (i = 0; i < 64; i += 2)
774 {
775 int ucs = buffer[off+i] + (buffer[off+i+1]<<8);
776 if (ucs == 0)
777 break;
778 count += fz_runetochar(&cfb->entries[cfb->count-1].name[count], ucs);
779 }
780 cfb->entries[cfb->count-1].name[count] = 0;
781
782 cfb->entries[cfb->count-1].sector = get32(buffer+off+128-12);
783 cfb->entries[cfb->count-1].size = get_len(ctx, cfb, buffer+off+128-8);
784 cfb->entries[cfb->count-1].l = get32(buffer+off+68);
785 cfb->entries[cfb->count-1].r = get32(buffer+off+72);
786 cfb->entries[cfb->count-1].d = get32(buffer+off+76);
787 cfb->entries[cfb->count-1].t = type;
788
789 #ifdef DEBUG_DIRENTRIES
790 fz_write_printf(ctx, fz_stddbg(ctx), "%d: ", cfb->count-1);
791 if (type == 1)
792 fz_write_printf(ctx, fz_stddbg(ctx), "(storage) ");
793 else if (type == 2)
794 fz_write_printf(ctx, fz_stddbg(ctx), "(file) ");
795 else if (type == 5)
796 fz_write_printf(ctx, fz_stddbg(ctx), "(root) ");
797 else
798 fz_write_printf(ctx, fz_stddbg(ctx), "(%d?) ", type);
799
800 fz_write_printf(ctx, fz_stddbg(ctx), "%q", cfb->entries[cfb->count-1].name);
801 fz_write_printf(ctx, fz_stddbg(ctx), " @%x+%x\n", cfb->entries[cfb->count-1].sector, cfb->entries[cfb->count-1].size );
802 if (cfb->entries[cfb->count-1].l <= MAXREGSID)
803 fz_write_printf(ctx, fz_stddbg(ctx), "\tleft=%d\n", cfb->entries[cfb->count-1].l);
804 if (cfb->entries[cfb->count-1].r <= MAXREGSID)
805 fz_write_printf(ctx, fz_stddbg(ctx), "\tright=%d\n", cfb->entries[cfb->count-1].r);
806 if (cfb->entries[cfb->count-1].d <= MAXREGSID)
807 fz_write_printf(ctx, fz_stddbg(ctx), "\tchild=%d\n", cfb->entries[cfb->count-1].d);
808 #endif
809
810 /* Type 5 is just for the root. */
811 if (type == 5)
812 {
813 cfb->mini_stream_sector0 = get32(buffer+off+128-12);
814 cfb->mini_stream_len = get_len(ctx, cfb, buffer+off+128-8);
815 }
816 }
817
818 /* To get the next sector, we need to read it from the FAT. */
819 sector = read_fat(ctx, cfb, sector);
820 slow_sector_flag = !slow_sector_flag;
821 if (slow_sector_flag == 0)
822 slow_sector = read_fat(ctx, cfb, slow_sector);
823 if (slow_sector == sector)
824 fz_throw(ctx, FZ_ERROR_FORMAT, "Loop in FAT");
825 }
826 while (sector <= MAXREGSECT);
827
828 absolutise_names(ctx, cfb);
829 strip_unused_names(ctx, cfb);
830
831 #ifdef DEBUG_DIRENTRIES
832 for (i = 0; i < cfb->count; i++)
833 fz_write_printf(ctx, fz_stddbg(ctx), "%d: %s (was %d)\n", i, cfb->entries[i].name, cfb->entries[i].t);
834 #endif
835 }
836 fz_catch(ctx)
837 {
838 fz_drop_archive(ctx, &cfb->super);
839 fz_rethrow(ctx);
840 }
841
842 return &cfb->super;
843 }
844
845 fz_archive *
846 fz_open_cfb_archive(fz_context *ctx, const char *filename)
847 {
848 fz_archive *cfb = NULL;
849 fz_stream *file;
850
851 file = fz_open_file(ctx, filename);
852
853 fz_try(ctx)
854 cfb = fz_open_cfb_archive_with_stream(ctx, file);
855 fz_always(ctx)
856 fz_drop_stream(ctx, file);
857 fz_catch(ctx)
858 fz_rethrow(ctx);
859
860 return cfb;
861 }