diff mupdf-source/source/fitz/uncfb.c @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mupdf-source/source/fitz/uncfb.c	Mon Sep 15 11:43:07 2025 +0200
@@ -0,0 +1,861 @@
+// Copyright (C) 2023-2025 Artifex Software, Inc.
+//
+// This file is part of MuPDF.
+//
+// MuPDF is free software: you can redistribute it and/or modify it under the
+// terms of the GNU Affero General Public License as published by the Free
+// Software Foundation, either version 3 of the License, or (at your option)
+// any later version.
+//
+// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
+// details.
+//
+// You should have received a copy of the GNU Affero General Public License
+// along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
+//
+// Alternative licensing terms are available from the licensor.
+// For commercial licensing, see <https://www.artifex.com/> or contact
+// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
+// CA 94129, USA, for further information.
+
+#include "mupdf/fitz.h"
+
+#include <string.h>
+#include <limits.h>
+
+#define MAXREGSID 0xfffffffa
+#define NOSTREAM 0xffffffff
+#define MAXREGSECT 0xfffffffa
+#define DIRSECT 0xfffffffc
+#define FATSECT 0xfffffffd
+#define ENDOFCHAIN 0xfffffffe
+#define FREESECT 0xffffffff
+
+#undef DEBUG_DIRENTRIES
+
+typedef struct
+{
+	char *name;
+	uint32_t sector;
+	uint64_t size;
+	uint32_t l, r, d;
+	/* Flag word used for various different things.
+	 * initially the type, then marked as to whether the DFS reached it
+	 * then finally the original node number for debug. */
+	uint32_t t;
+} cfb_entry;
+
+typedef struct
+{
+	fz_archive super;
+
+	int max;
+	int count;
+	cfb_entry *entries;
+
+	/* Header information from the file */
+	uint16_t major;
+	uint16_t sector_shift;
+	uint32_t num_dir_sectors;
+	uint32_t num_fat_sectors;
+	uint32_t dir_sector0;
+	uint32_t mini_fat_sector0;
+	uint32_t num_mini_fat_sectors;
+	uint32_t difat_sector0;
+	uint32_t num_difat_sectors;
+	uint32_t mini_stream_sector0;
+	uint64_t mini_stream_len;
+	uint32_t difat[109];
+
+	uint32_t fatcache_sector;
+	uint8_t fatcache[4096];
+
+	uint32_t minifatcache_real_sector;
+	uint32_t minifatcache_sector;
+	uint8_t minifatcache[4096];
+
+} fz_cfb_archive;
+
+static void
+read(fz_context *ctx, fz_stream *stm, uint8_t *buf, size_t size)
+{
+	size_t n = fz_read(ctx, stm, buf, size);
+
+	if (n != size)
+		fz_throw(ctx, FZ_ERROR_FORMAT, "Short read in CFB handling");
+}
+
+static uint16_t
+get16(const uint8_t *b)
+{
+	return b[0] + (b[1]<<8);
+}
+
+static uint32_t
+get32(const uint8_t *b)
+{
+	return b[0] + (b[1]<<8) + (b[2]<<16) + (b[3]<<24);
+}
+
+static uint64_t
+get64(const uint8_t *b)
+{
+	return b[0] +
+		(((uint64_t)b[1])<<8) +
+		(((uint64_t)b[2])<<16) +
+		(((uint64_t)b[3])<<24) +
+		(((uint64_t)b[4])<<32) +
+		(((uint64_t)b[5])<<40) +
+		(((uint64_t)b[6])<<48) +
+		(((uint64_t)b[7])<<56);
+}
+
+static uint64_t
+get_len(fz_context *ctx, fz_cfb_archive *cfb, const uint8_t *b)
+{
+	uint64_t len = get64(b);
+
+	/* In v3 files the top 32bits *should* be zero, but may not be. The
+	 * top bit of the lower 32bits should not be set though. */
+	if (cfb->major == 3)
+	{
+		if (len & 0x80000000)
+			fz_throw(ctx, FZ_ERROR_FORMAT, "Illegal length in CFB");
+		len &= 0xFFFFFFFFU;
+	}
+	return len;
+}
+
+static void
+sector_seek(fz_context *ctx, fz_cfb_archive *cfb, uint32_t sector, uint32_t offset)
+{
+	fz_seek(ctx, cfb->super.file, ((sector + (uint64_t)1)<<cfb->sector_shift)+offset, SEEK_SET);
+}
+
+static uint32_t
+read_difat(fz_context *ctx, fz_cfb_archive *cfb, uint32_t sector)
+{
+	uint32_t entries_per_sector;
+	uint32_t sect;
+
+	if (sector < 109)
+	{
+		return cfb->difat[sector];
+	}
+	sector -= 109;
+
+	/* Run down the difat chain until we find the right sector. */
+	entries_per_sector = (1<<(cfb->sector_shift-2)) - 1;
+	sect = cfb->difat_sector0;
+	while (sector > entries_per_sector)
+	{
+		sector_seek(ctx, cfb, sect, entries_per_sector * 4);
+		sect = fz_read_uint32_le(ctx, cfb->super.file);
+		sector -= entries_per_sector;
+	}
+
+	/* Now get the actual entry. */
+	sector_seek(ctx, cfb, sect, sector * 4);
+
+	return fz_read_uint32_le(ctx, cfb->super.file);
+}
+
+static uint32_t
+read_fat(fz_context *ctx, fz_cfb_archive *cfb, uint32_t sector)
+{
+	uint32_t sector_size = 1<<cfb->sector_shift;
+	/* We want to read the entry for sector 'sector' from the FAT. This
+	 * will be in FAT sector 'fatsect'. */
+	uint32_t fatsect = sector>>(cfb->sector_shift-2);
+	/* FAT sector fatsect will be physical sector real_sect. */
+	uint32_t real_sect = read_difat(ctx, cfb, fatsect);
+
+	if (real_sect > MAXREGSECT)
+		fz_throw(ctx, FZ_ERROR_FORMAT, "Corrupt FAT");
+
+	if (real_sect != cfb->fatcache_sector)
+	{
+		sector_seek(ctx, cfb, real_sect, 0);
+		read(ctx, cfb->super.file, &cfb->fatcache[0], sector_size);
+		cfb->fatcache_sector = real_sect;
+	}
+
+	sector &= (sector_size>>2)-1;
+
+	return get32(&cfb->fatcache[sector*4]);
+}
+
+static uint32_t
+read_mini_fat(fz_context *ctx, fz_cfb_archive *cfb, uint32_t sector)
+{
+	uint32_t sector_size = 1<<cfb->sector_shift;
+	/* A mini fat sector has lots of mini sector numbers in (each 4 bytes) */
+	uint32_t mini_sectors_in_mini_fat_sector = (1<<(cfb->sector_shift-2));
+	/* We want to read the entry for sector 'sector' from the mini FAT. This
+	 * will be in mini FAT sector 'minifatsect'. */
+	uint32_t minifatsect = sector / mini_sectors_in_mini_fat_sector;
+	uint32_t index_within_minifatsect = sector - minifatsect * mini_sectors_in_mini_fat_sector;
+	int cache_valid = 1;
+
+	/* minifatsect is a count of how many sectors we are into the mini fat stream.
+	 * minifatsect_real_sector is the physical section that that corresponds to. */
+
+	/* If we're behind our cache position, start from scratch. */
+	if (minifatsect < cfb->minifatcache_sector)
+	{
+		cfb->minifatcache_real_sector = cfb->mini_fat_sector0;
+		cfb->minifatcache_sector = 0;
+		cache_valid = 0;
+	}
+
+	/* Skip forward until we are at the right position. */
+	while (minifatsect != cfb->minifatcache_sector)
+	{
+		cfb->minifatcache_real_sector = read_fat(ctx, cfb, cfb->minifatcache_real_sector);
+		cfb->minifatcache_sector++;
+		cache_valid = 0;
+	}
+
+	/* Prime the cache if we just moved */
+	if (!cache_valid)
+	{
+		sector_seek(ctx, cfb, cfb->minifatcache_real_sector, 0);
+		read(ctx, cfb->super.file, cfb->minifatcache, sector_size);
+	}
+
+	return get32(&cfb->minifatcache[index_within_minifatsect*4]);
+}
+
+static void drop_cfb_archive(fz_context *ctx, fz_archive *arch)
+{
+	fz_cfb_archive *cfb = (fz_cfb_archive *) arch;
+	int i;
+	for (i = 0; i < cfb->count; ++i)
+		fz_free(ctx, cfb->entries[i].name);
+	fz_free(ctx, cfb->entries);
+}
+
+static cfb_entry *lookup_cfb_entry(fz_context *ctx, fz_cfb_archive *cfb, const char *name)
+{
+	int i;
+	for (i = 0; i < cfb->count; i++)
+		if (!fz_strcasecmp(name, cfb->entries[i].name))
+			return &cfb->entries[i];
+	return NULL;
+}
+
+typedef struct
+{
+	fz_cfb_archive *archive;
+	uint32_t first_sector;
+	uint32_t next_sector;
+	uint32_t next_sector_slow;
+	uint32_t next_sector_slow_flag;
+	uint64_t pos_at_next_sector;
+	uint64_t size;
+	fz_stream *mini_stream;
+	uint8_t buffer[4096];
+} cfb_state;
+
+static void
+cfb_close(fz_context *ctx, void *state_)
+{
+	cfb_state *state = (cfb_state *)state_;
+
+	fz_drop_archive(ctx, &state->archive->super);
+	fz_drop_stream(ctx, state->mini_stream);
+	fz_free(ctx, state);
+}
+
+static int
+cfb_next(fz_context *ctx, fz_stream *stm, size_t required)
+{
+	cfb_state *state = stm->state;
+	fz_cfb_archive *cfb = state->archive;
+	uint64_t sector_size = ((uint64_t)1)<<cfb->sector_shift;
+	uint64_t desired_sector_pos;
+	uint32_t pos_in_sector;
+	uint32_t this_sector;
+
+	if ((uint64_t)stm->pos >= state->size)
+		stm->eof = 1;
+
+	if (stm->eof)
+	{
+		stm->rp = stm->wp = state->buffer;
+		return EOF;
+	}
+
+	pos_in_sector = stm->pos & (sector_size-1);
+	desired_sector_pos = stm->pos & ~(sector_size-1);
+	if (desired_sector_pos != state->pos_at_next_sector)
+	{
+		state->pos_at_next_sector = 0;
+		state->next_sector = state->first_sector;
+		state->next_sector_slow = state->first_sector;
+		state->next_sector_slow_flag = 0;
+	}
+
+	this_sector = state->next_sector;
+	while (desired_sector_pos >= state->pos_at_next_sector)
+	{
+		this_sector = state->next_sector;
+		state->next_sector = read_fat(ctx, cfb, state->next_sector);
+		state->pos_at_next_sector += sector_size;
+		if (state->next_sector > MAXREGSECT)
+			break;
+
+		state->next_sector_slow_flag = !state->next_sector_slow_flag;
+		if (state->next_sector_slow_flag == 0)
+			state->next_sector_slow = read_fat(ctx, cfb, state->next_sector_slow);
+		if (state->next_sector_slow == state->next_sector)
+			fz_throw(ctx, FZ_ERROR_FORMAT, "Loop in FAT chain");
+	}
+	if (state->next_sector > MAXREGSECT && state->next_sector != ENDOFCHAIN)
+		fz_throw(ctx, FZ_ERROR_FORMAT, "Unexpected entry in FAT chain");
+
+	if (this_sector > MAXREGSECT)
+		fz_throw(ctx, FZ_ERROR_FORMAT, "Unexpected end of FAT chain");
+	sector_seek(ctx, cfb, this_sector, 0);
+	read(ctx, cfb->super.file, state->buffer, sector_size);
+	stm->rp = state->buffer;
+	stm->wp = stm->rp + sector_size;
+	stm->pos = state->pos_at_next_sector;
+	if ((uint64_t)stm->pos >= state->size)
+	{
+		stm->wp -= (stm->pos - state->size);
+		stm->pos = state->size;
+	}
+	stm->rp += pos_in_sector;
+
+	return *stm->rp++;
+}
+
+#define MINI_SECTOR_SHIFT 6
+#define MINI_SECTOR_SIZE (1<<MINI_SECTOR_SHIFT)
+
+static int
+cfb_next_mini(fz_context *ctx, fz_stream *stm, size_t required)
+{
+	cfb_state *state = stm->state;
+	fz_cfb_archive *cfb = state->archive;
+	uint64_t desired_sector_pos;
+	uint32_t pos_in_sector;
+	uint32_t this_sector;
+
+	if ((uint64_t)stm->pos >= state->size)
+		stm->eof = 1;
+
+	if (stm->eof)
+	{
+		stm->rp = stm->wp = state->buffer;
+		return EOF;
+	}
+
+	/* Whenever we say 'sector' here, we mean 'mini sector'. */
+	pos_in_sector = stm->pos & (MINI_SECTOR_SIZE-1);
+	desired_sector_pos = stm->pos & ~(MINI_SECTOR_SIZE-1);
+	if (desired_sector_pos != state->pos_at_next_sector)
+	{
+		state->pos_at_next_sector = 0;
+		state->next_sector = state->first_sector;
+		state->next_sector_slow = state->first_sector;
+		state->next_sector_slow_flag = 0;
+	}
+
+	this_sector = state->next_sector;
+	while (desired_sector_pos >= state->pos_at_next_sector)
+	{
+		this_sector = state->next_sector;
+		state->next_sector = read_mini_fat(ctx, cfb, state->next_sector);
+		state->pos_at_next_sector += MINI_SECTOR_SIZE;
+		if (state->next_sector > MAXREGSECT)
+			break;
+
+		state->next_sector_slow_flag = !state->next_sector_slow_flag;
+		if (state->next_sector_slow_flag == 0)
+			state->next_sector_slow = read_mini_fat(ctx, cfb, state->next_sector_slow);
+		if (state->next_sector_slow == state->next_sector)
+			fz_throw(ctx, FZ_ERROR_FORMAT, "Loop in FAT chain");
+	}
+	if (state->next_sector > MAXREGSECT && state->next_sector != ENDOFCHAIN)
+		fz_throw(ctx, FZ_ERROR_FORMAT, "Unexpected entry in FAT chain");
+
+	if (this_sector > MAXREGSECT)
+		fz_throw(ctx, FZ_ERROR_FORMAT, "Unexpected end of FAT chain");
+
+	fz_seek(ctx, state->mini_stream, ((uint64_t)this_sector) * MINI_SECTOR_SIZE, SEEK_SET);
+	read(ctx, state->mini_stream, state->buffer, MINI_SECTOR_SIZE);
+	stm->rp = state->buffer;
+	stm->wp = stm->rp + MINI_SECTOR_SIZE;
+	stm->pos += MINI_SECTOR_SIZE;
+	if ((uint64_t)stm->pos >= state->size)
+	{
+		stm->wp -= (stm->pos - state->size);
+		stm->pos = state->size;
+	}
+	stm->rp += pos_in_sector;
+
+	return *stm->rp++;
+}
+
+static void cfb_seek(fz_context *ctx, fz_stream *stm, int64_t offset, int whence)
+{
+	cfb_state *state = stm->state;
+	int64_t pos = stm->pos - (stm->wp - stm->rp);
+	/* Convert to absolute pos */
+	if (whence == 1)
+	{
+		offset += pos; /* Was relative to current pos */
+	}
+	else if (whence == 2)
+	{
+		offset += stm->pos; /* Was relative to end */
+	}
+
+	if (offset < 0)
+		offset = 0;
+	if ((uint64_t)offset > state->size)
+		offset = (int64_t)state->size;
+	stm->pos = offset;
+	stm->rp = stm->wp = state->buffer;
+}
+
+static fz_stream *sector_stream(fz_context *ctx, fz_cfb_archive *cfb, uint32_t sector, uint64_t size)
+{
+	fz_stream *stm;
+	cfb_state *state = fz_malloc_struct(ctx, cfb_state);
+
+	state->archive = (fz_cfb_archive *)fz_keep_archive(ctx, &cfb->super);
+	state->pos_at_next_sector = 0;
+	state->size = size;
+	state->first_sector = sector;
+	state->next_sector = state->first_sector;
+	state->next_sector_slow = state->first_sector;
+	state->next_sector_slow_flag = 0;
+
+	stm = fz_new_stream(ctx, state, cfb_next, cfb_close);
+	stm->seek = cfb_seek;
+
+	return stm;
+}
+
+static fz_stream *open_cfb_entry(fz_context *ctx, fz_archive *arch, const char *name)
+{
+	fz_cfb_archive *cfb = (fz_cfb_archive *) arch;
+	cfb_entry *ent;
+	fz_stream *stm;
+	cfb_state *state;
+
+	ent = lookup_cfb_entry(ctx, cfb, name);
+	if (!ent)
+		return NULL;
+
+	if (ent->size >= 0x1000)
+	{
+		/* Working from entire sectors */
+		return sector_stream(ctx, cfb, ent->sector, ent->size);
+	}
+
+	/* We're working from the mini stream. */
+	state = fz_malloc_struct(ctx, cfb_state);
+
+	fz_try(ctx)
+	{
+		/* Let's get a stream that gets us the mini stream, and then work from that. */
+		state->mini_stream = sector_stream(ctx, cfb, cfb->mini_stream_sector0, cfb->mini_stream_len);
+		state->first_sector = ent->sector;
+		state->pos_at_next_sector = 0;
+		state->size = ent->size;
+		state->next_sector = state->first_sector;
+		state->next_sector_slow = state->first_sector;
+		state->next_sector_slow_flag = 0;
+		state->archive = (fz_cfb_archive *)fz_keep_archive(ctx, &cfb->super);
+
+	}
+	fz_catch(ctx)
+	{
+		fz_free(ctx, state);
+		fz_rethrow(ctx);
+	}
+
+	stm = fz_new_stream(ctx, state, cfb_next_mini, cfb_close);
+	stm->seek = cfb_seek;
+	return stm;
+}
+
+static fz_buffer *read_cfb_entry(fz_context *ctx, fz_archive *arch, const char *name)
+{
+	fz_stream *stm;
+	fz_buffer *buf = NULL;
+
+	stm = open_cfb_entry(ctx, arch, name);
+	if (!stm)
+		return NULL;
+
+	fz_try(ctx)
+		buf = fz_read_all(ctx, stm, 1024);
+	fz_always(ctx)
+		fz_drop_stream(ctx, stm);
+	fz_catch(ctx)
+		fz_rethrow(ctx);
+
+	return buf;
+}
+
+static int has_cfb_entry(fz_context *ctx, fz_archive *arch, const char *name)
+{
+	fz_cfb_archive *cfb = (fz_cfb_archive *) arch;
+	cfb_entry *ent = lookup_cfb_entry(ctx, cfb, name);
+	return ent != NULL;
+}
+
+static const char *list_cfb_entry(fz_context *ctx, fz_archive *arch, int idx)
+{
+	fz_cfb_archive *cfb = (fz_cfb_archive *) arch;
+	if (idx < 0 || idx >= cfb->count)
+		return NULL;
+	return cfb->entries[idx].name;
+}
+
+static int count_cfb_entries(fz_context *ctx, fz_archive *arch)
+{
+	fz_cfb_archive *cfb = (fz_cfb_archive *) arch;
+	return cfb->count;
+}
+
+static const uint8_t sig[8] = { 0xd0, 0xcf, 0x11, 0xe0, 0xa1, 0xb1, 0x1a, 0xe1 };
+static const uint8_t zeros[16] = { 0 };
+
+int
+fz_is_cfb_archive(fz_context *ctx, fz_stream *file)
+{
+	uint8_t data[nelem(sig)];
+	size_t n;
+
+	fz_seek(ctx, file, 0, SEEK_SET);
+	n = fz_read(ctx, file, data, nelem(data));
+	if (n != nelem(data))
+		return 0;
+	if (!memcmp(data, sig, nelem(sig)))
+		return 1;
+
+	return 0;
+}
+
+static void
+expect(fz_context *ctx, fz_stream *file, const uint8_t *pattern, size_t n, const char *msg)
+{
+	uint8_t buffer[64];
+
+	assert(sizeof(buffer) >= n);
+	read(ctx, file, buffer, n);
+
+	if (memcmp(buffer, pattern, n) != 0)
+		fz_throw(ctx, FZ_ERROR_FORMAT, "%s in CFB", msg);
+}
+
+static void
+expect16(fz_context *ctx, fz_stream *file, uint16_t v, const char *msg)
+{
+	uint16_t u;
+
+	u = fz_read_uint16_le(ctx, file);
+	if (u != v)
+		fz_throw(ctx, FZ_ERROR_FORMAT, "%s in CFB: 0x%04x != 0x%04x", msg, u, v);
+}
+
+static void
+expect32(fz_context *ctx, fz_stream *file, uint32_t v, const char *msg)
+{
+	uint32_t u;
+
+	u = fz_read_uint32_le(ctx, file);
+	if (u != v)
+		fz_throw(ctx, FZ_ERROR_FORMAT, "%s in CFB: 0x%08x != 0x%08x", msg, u, v);
+}
+
+#define REACHED 0xFFFFFFFF
+#define REACHED_KEEP 0xFFFFFFFE
+
+static void
+make_absolute(fz_context *ctx, fz_cfb_archive *cfb, char *prefix, int node, int depth)
+{
+	uint32_t type;
+
+	/* To avoid recursion where possible. */
+	while (1)
+	{
+		if (node == (int)NOSTREAM)
+			return;
+
+		if (node < 0 || node >= cfb->count)
+			fz_throw(ctx, FZ_ERROR_FORMAT, "Invalid tree");
+
+		if (depth >= 32)
+			fz_throw(ctx, FZ_ERROR_FORMAT, "CBF Tree too deep");
+
+		type = cfb->entries[node].t;
+		if (type == REACHED || type == REACHED_KEEP)
+			fz_throw(ctx, FZ_ERROR_FORMAT, "CBF Tree has cycles");
+		cfb->entries[node].t = (type == 2) ? REACHED_KEEP : REACHED;
+
+		if (prefix)
+		{
+			size_t z0 = strlen(prefix);
+			size_t z1 = strlen(cfb->entries[node].name);
+			char *newname = fz_malloc(ctx, z0+z1+2);
+			memcpy(newname, prefix, z0);
+			newname[z0] = '/';
+			memcpy(newname+z0+1, cfb->entries[node].name, z1+1);
+			fz_free(ctx, cfb->entries[node].name);
+			cfb->entries[node].name = newname;
+		}
+
+		if (cfb->entries[node].d == NOSTREAM && cfb->entries[node].r == NOSTREAM)
+		{
+			/* Handle 'l' without recursion, because there is no 'r' or 'd'. */
+			node = cfb->entries[node].l;
+			continue;
+		}
+		make_absolute(ctx, cfb, prefix, cfb->entries[node].l, depth+1);
+		if (cfb->entries[node].d == NOSTREAM)
+		{
+			/* Handle 'r' without recursion, because there is no 'd'. */
+			node = cfb->entries[node].r;
+			continue;
+		}
+		make_absolute(ctx, cfb, prefix, cfb->entries[node].r, depth+1);
+
+		/* Rather than recursing:
+		 *	make_absolute(ctx, cfb, node == 0 ? NULL : cfb->entries[node].name, cfb->entries[node].d, depth+1);
+		 * instead just loop. */
+		prefix = node == 0 ? NULL : cfb->entries[node].name;
+		node = cfb->entries[node].d;
+	}
+
+}
+
+static void
+absolutise_names(fz_context *ctx, fz_cfb_archive *cfb)
+{
+	make_absolute(ctx, cfb, NULL, 0, 0);
+}
+
+static void
+strip_unused_names(fz_context *ctx, fz_cfb_archive *cfb)
+{
+	int i, j;
+	int n = cfb->count;
+
+	/* Init i and j so that we always delete the root node. */
+	fz_free(ctx, cfb->entries[0].name);
+	for (i = 1, j = 0; i < n; i++)
+	{
+		if (cfb->entries[i].t == REACHED_KEEP)
+		{
+			if (i != j)
+				cfb->entries[j] = cfb->entries[i];
+			cfb->entries[j].t = i;
+			j++;
+		}
+		else
+			fz_free(ctx, cfb->entries[i].name);
+	}
+	cfb->count = j;
+}
+
+fz_archive *
+fz_open_cfb_archive_with_stream(fz_context *ctx, fz_stream *file)
+{
+	fz_cfb_archive *cfb;
+	uint8_t buffer[4096];
+	uint32_t sector, slow_sector, slow_sector_flag;
+	int i;
+
+	if (!fz_is_cfb_archive(ctx, file))
+		fz_throw(ctx, FZ_ERROR_FORMAT, "cannot recognize cfb archive");
+
+	cfb = fz_new_derived_archive(ctx, file, fz_cfb_archive);
+	cfb->super.format = "cfb";
+	cfb->super.count_entries = count_cfb_entries;
+	cfb->super.list_entry = list_cfb_entry;
+	cfb->super.has_entry = has_cfb_entry;
+	cfb->super.read_entry = read_cfb_entry;
+	cfb->super.open_entry = open_cfb_entry;
+	cfb->super.drop_archive = drop_cfb_archive;
+
+	fz_try(ctx)
+	{
+		fz_seek(ctx, file, 0, SEEK_SET);
+		/* Read the header */
+		expect(ctx, file, sig, 8, "Bad signature");
+		expect(ctx, file, zeros, 16, "Bad CLSID");
+		/* The minor version is SUPPOSED to be 0x3e, but we don't seem to be
+		 * able to rely on this. So just skip it. */
+		(void)fz_read_uint16_le(ctx, file);
+		cfb->major = fz_read_uint16_le(ctx, file);
+		if (cfb->major != 3 && cfb->major != 4)
+			fz_throw(ctx, FZ_ERROR_FORMAT, "Bad major version of CFB: %d", cfb->major);
+		expect16(ctx, file, 0xfffe, "Bad byte order");
+		cfb->sector_shift = fz_read_uint16_le(ctx, file);
+		if ((cfb->major == 3 && cfb->sector_shift != 9) ||
+			(cfb->major == 4 && cfb->sector_shift != 12))
+			fz_throw(ctx, FZ_ERROR_FORMAT, "Bad sector shift: %d", cfb->sector_shift);
+		expect16(ctx, file, 6, "Bad mini section shift");
+		expect(ctx, file, zeros, 6, "Bad padding");
+		cfb->num_dir_sectors = fz_read_uint32_le(ctx, file);
+		cfb->num_fat_sectors = fz_read_uint32_le(ctx, file);
+		cfb->dir_sector0 = fz_read_uint32_le(ctx, file);
+		(void)fz_read_uint32_le(ctx, file); /* Transaction signature number */
+		expect32(ctx, file, 0x1000, "Bad mini stream cutoff size");
+		cfb->mini_fat_sector0 = fz_read_uint32_le(ctx, file);
+		cfb->num_mini_fat_sectors = fz_read_uint32_le(ctx, file);
+		cfb->difat_sector0 = fz_read_uint32_le(ctx, file);
+		cfb->num_difat_sectors = fz_read_uint32_le(ctx, file);
+		for (i = 0; i < 109; i++)
+			cfb->difat[i] = fz_read_uint32_le(ctx, file);
+		cfb->fatcache_sector = (uint32_t)-1;
+		cfb->minifatcache_sector = (uint32_t)-1;
+
+		/* Read the directory entries. */
+		/* On our first pass through, EVERYTHING goes into the entries. */
+		sector = cfb->dir_sector0;
+		slow_sector = sector;
+		slow_sector_flag = 0;
+		do
+		{
+			size_t z = ((size_t)1)<<cfb->sector_shift;
+			size_t off;
+
+			/* Fetch the sector. */
+			fz_seek(ctx, file, ((int64_t)sector+1)<<cfb->sector_shift, SEEK_SET);
+			read(ctx, file, buffer, z);
+
+			for (off = 0; off < z; off += 128)
+			{
+				int count = 0;
+				int type;
+				int namelen = get16(buffer+off+64);
+
+				if (namelen == 0)
+					break;
+
+				/* What flavour of object is this? */
+				type = buffer[off+64+2];
+
+				/* Ensure our entries list is long enough. */
+				if (cfb->max == cfb->count)
+				{
+					int newmax = cfb->max * 2;
+					if (newmax == 0)
+						newmax = 32;
+					cfb->entries = fz_realloc_array(ctx, cfb->entries, newmax, cfb_entry);
+					cfb->max = newmax;
+				}
+
+				/* Count the name length in utf8 encoded bytes, including terminator. */
+				for (i = 0; i < 64; i += 2)
+				{
+					int ucs = get16(buffer+off+i);
+					if (ucs == 0)
+						break;
+					count += fz_runelen(ucs);
+				}
+				if (i+2 != namelen || i == 64)
+					fz_throw(ctx, FZ_ERROR_FORMAT, "Malformed name in CFB directory");
+
+				/* Copy the name. */
+				cfb->entries[cfb->count++].name = fz_malloc(ctx, count + 1);
+				count = 0;
+				for (i = 0; i < 64; i += 2)
+				{
+					int ucs = buffer[off+i] + (buffer[off+i+1]<<8);
+					if (ucs == 0)
+						break;
+					count += fz_runetochar(&cfb->entries[cfb->count-1].name[count], ucs);
+				}
+				cfb->entries[cfb->count-1].name[count] = 0;
+
+				cfb->entries[cfb->count-1].sector = get32(buffer+off+128-12);
+				cfb->entries[cfb->count-1].size = get_len(ctx, cfb, buffer+off+128-8);
+				cfb->entries[cfb->count-1].l = get32(buffer+off+68);
+				cfb->entries[cfb->count-1].r = get32(buffer+off+72);
+				cfb->entries[cfb->count-1].d = get32(buffer+off+76);
+				cfb->entries[cfb->count-1].t = type;
+
+#ifdef DEBUG_DIRENTRIES
+				fz_write_printf(ctx, fz_stddbg(ctx), "%d: ", cfb->count-1);
+				if (type == 1)
+					fz_write_printf(ctx, fz_stddbg(ctx), "(storage) ");
+				else if (type == 2)
+					fz_write_printf(ctx, fz_stddbg(ctx), "(file) ");
+				else if (type == 5)
+					fz_write_printf(ctx, fz_stddbg(ctx), "(root) ");
+				else
+					fz_write_printf(ctx, fz_stddbg(ctx), "(%d?) ", type);
+
+				fz_write_printf(ctx, fz_stddbg(ctx), "%q", cfb->entries[cfb->count-1].name);
+				fz_write_printf(ctx, fz_stddbg(ctx), " @%x+%x\n", cfb->entries[cfb->count-1].sector, cfb->entries[cfb->count-1].size );
+				if (cfb->entries[cfb->count-1].l <= MAXREGSID)
+					fz_write_printf(ctx, fz_stddbg(ctx), "\tleft=%d\n", cfb->entries[cfb->count-1].l);
+				if (cfb->entries[cfb->count-1].r <= MAXREGSID)
+					fz_write_printf(ctx, fz_stddbg(ctx), "\tright=%d\n", cfb->entries[cfb->count-1].r);
+				if (cfb->entries[cfb->count-1].d <= MAXREGSID)
+					fz_write_printf(ctx, fz_stddbg(ctx), "\tchild=%d\n", cfb->entries[cfb->count-1].d);
+#endif
+
+				/* Type 5 is just for the root. */
+				if (type == 5)
+				{
+					cfb->mini_stream_sector0 = get32(buffer+off+128-12);
+					cfb->mini_stream_len = get_len(ctx, cfb, buffer+off+128-8);
+				}
+			}
+
+			/* To get the next sector, we need to read it from the FAT. */
+			sector = read_fat(ctx, cfb, sector);
+			slow_sector_flag = !slow_sector_flag;
+			if (slow_sector_flag == 0)
+				slow_sector = read_fat(ctx, cfb, slow_sector);
+			if (slow_sector == sector)
+				fz_throw(ctx, FZ_ERROR_FORMAT, "Loop in FAT");
+		}
+		while (sector <= MAXREGSECT);
+
+		absolutise_names(ctx, cfb);
+		strip_unused_names(ctx, cfb);
+
+#ifdef DEBUG_DIRENTRIES
+		for (i = 0; i < cfb->count; i++)
+			fz_write_printf(ctx, fz_stddbg(ctx), "%d: %s (was %d)\n", i, cfb->entries[i].name, cfb->entries[i].t);
+#endif
+	}
+	fz_catch(ctx)
+	{
+		fz_drop_archive(ctx, &cfb->super);
+		fz_rethrow(ctx);
+	}
+
+	return &cfb->super;
+}
+
+fz_archive *
+fz_open_cfb_archive(fz_context *ctx, const char *filename)
+{
+	fz_archive *cfb = NULL;
+	fz_stream *file;
+
+	file = fz_open_file(ctx, filename);
+
+	fz_try(ctx)
+		cfb = fz_open_cfb_archive_with_stream(ctx, file);
+	fz_always(ctx)
+		fz_drop_stream(ctx, file);
+	fz_catch(ctx)
+		fz_rethrow(ctx);
+
+	return cfb;
+}