view mupdf-source/source/html/html-parse.c @ 7:5ab937c03c27

Apply full RELRO to all generated binaries. Also strip the generated binaries.
author Franz Glasner <fzglas.hg@dom66.de>
date Tue, 16 Sep 2025 12:37:32 +0200
parents b50eed0cc0ef
children
line wrap: on
line source

// Copyright (C) 2004-2025 Artifex Software, Inc.
//
// This file is part of MuPDF.
//
// MuPDF is free software: you can redistribute it and/or modify it under the
// terms of the GNU Affero General Public License as published by the Free
// Software Foundation, either version 3 of the License, or (at your option)
// any later version.
//
// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
// details.
//
// You should have received a copy of the GNU Affero General Public License
// along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
//
// Alternative licensing terms are available from the licensor.
// For commercial licensing, see <https://www.artifex.com/> or contact
// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
// CA 94129, USA, for further information.

#include "mupdf/fitz.h"
#include "mupdf/ucdn.h"
#include "html-imp.h"

#include <string.h>
#include <stdio.h>
#include <assert.h>

enum { T, R, B, L };

#define DEFAULT_DIR FZ_BIDI_LTR

static const char *html_default_css =
"@page{margin:3em 2em}"
"a{color:#06C;text-decoration:underline}"
"address{display:block;font-style:italic}"
"b{font-weight:bold}"
"bdo{direction:rtl;unicode-bidi:bidi-override}"
"blockquote{display:block;margin:1em 40px}"
"body{display:block;margin:1em}"
"cite{font-style:italic}"
"code{font-family:monospace}"
"dd{display:block;margin:0 0 0 40px}"
"del{text-decoration:line-through}"
"div{display:block}"
"dl{display:block;margin:1em 0}"
"dt{display:block}"
"em{font-style:italic}"
"h1{display:block;font-size:2em;font-weight:bold;margin:0.67em 0;page-break-after:avoid}"
"h2{display:block;font-size:1.5em;font-weight:bold;margin:0.83em 0;page-break-after:avoid}"
"h3{display:block;font-size:1.17em;font-weight:bold;margin:1em 0;page-break-after:avoid}"
"h4{display:block;font-size:1em;font-weight:bold;margin:1.33em 0;page-break-after:avoid}"
"h5{display:block;font-size:0.83em;font-weight:bold;margin:1.67em 0;page-break-after:avoid}"
"h6{display:block;font-size:0.67em;font-weight:bold;margin:2.33em 0;page-break-after:avoid}"
"head{display:none}"
"hr{border-style:solid;border-width:1px;display:block;margin-bottom:0.5em;margin-top:0.5em;text-align:center}"
"html{display:block}"
"i{font-style:italic}"
"ins{text-decoration:underline}"
"kbd{font-family:monospace}"
"li{display:list-item}"
"menu{display:block;list-style-type:disc;margin:1em 0;padding:0 0 0 30pt}"
"ol{display:block;list-style-type:decimal;margin:1em 0;padding:0 0 0 30pt}"
"p{display:block;margin:1em 0}"
"pre{display:block;font-family:monospace;margin:1em 0;white-space:pre}"
"samp{font-family:monospace}"
"script{display:none}"
"small{font-size:0.83em}"
"strong{font-weight:bold}"
"style{display:none}"
"sub{font-size:0.83em;vertical-align:sub}"
"sup{font-size:0.83em;vertical-align:super}"
"table{display:table;border-spacing:2px}"
"tbody{display:table-row-group}"
"td{display:table-cell;padding:1px;background-color:inherit}"
"tfoot{display:table-footer-group}"
"th{display:table-cell;font-weight:bold;padding:1px;text-align:center;background-color:inherit}"
"thead{display:table-header-group}"
"tr{display:table-row}"
"ul{display:block;list-style-type:disc;margin:1em 0;padding:0 0 0 30pt}"
"ul ul{list-style-type:circle}"
"ul ul ul{list-style-type:square}"
"var{font-style:italic}"
"colgroup{display:table-column-group}"
"col{display:table-column}"
"caption{display:block;text-align:center}"
;

static const char *mobi_default_css =
"pagebreak{display:block;page-break-before:always}"
"dl,ol,ul{margin:0}"
"p{margin:0}"
"blockquote{margin:0 40px}"
"center{display:block;text-align:center}"
"big{font-size:1.17em}"
"strike{text-decoration:line-through}"
;

static const char *fb2_default_css =
"@page{margin:3em 2em}"
"FictionBook{display:block;margin:1em}"
"stylesheet,binary{display:none}"
"description>*{display:none}"
"description>title-info{display:block}"
"description>title-info>*{display:none}"
"description>title-info>coverpage{display:block;page-break-before:always;page-break-after:always}"
"body,section,title,subtitle,p,cite,epigraph,text-author,date,poem,stanza,v,empty-line{display:block}"
"image{display:block}"
"p>image{display:inline}"
"table{display:table}"
"tr{display:table-row}"
"th,td{display:table-cell}"
"a{color:#06C;text-decoration:underline}"
"a[type=note]{font-size:small;vertical-align:super}"
"code{white-space:pre;font-family:monospace}"
"emphasis{font-style:italic}"
"strikethrough{text-decoration:line-through}"
"strong{font-weight:bold}"
"sub{font-size:small;vertical-align:sub}"
"sup{font-size:small;vertical-align:super}"
"image{margin:1em 0;text-align:center}"
"cite,poem{margin:1em 2em}"
"subtitle,epigraph,stanza{margin:1em 0}"
"title>p{text-align:center;font-size:x-large}"
"subtitle{text-align:center;font-size:large}"
"p{margin-top:1em;text-align:justify}"
"empty-line{padding-top:1em}"
"p+p{margin-top:0;text-indent:1.5em}"
"empty-line+p{margin-top:0}"
"section>title{page-break-before:always}"
;

static const char *known_html_tags[] = {
	// TODO: add known FB2 tags?
	// Sorted list of all HTML tags.
	"a", "abbr", "acronym", "address", "annotation-xml", "applet", "area",
	"article", "aside", "audio", "b", "base", "basefont", "bdi", "bdo",
	"bgsound", "big", "blink", "blockquote", "body", "br", "button",
	"canvas", "caption", "center", "cite", "code", "col", "colgroup",
	"data", "datalist", "dd", "del", "desc", "details", "dfn", "dir",
	"div", "dl", "dt", "em", "embed", "fieldset", "figcaption", "figure",
	"font", "footer", "foreignobject", "form", "frame", "frameset", "h1",
	"h2", "h3", "h4", "h5", "h6", "head", "header", "hgroup", "hr", "html",
	"i", "iframe", "image", "img", "input", "ins", "isindex", "kbd",
	"keygen", "label", "legend", "li", "link", "listing", "main",
	"malignmark", "map", "mark", "marquee", "math", "menu", "menuitem",
	"meta", "meter", "mglyph", "mi", "mn", "mo", "ms", "mtext", "multicol",
	"nav", "nextid", "nobr", "noembed", "noframes", "noscript", "object",
	"ol", "optgroup", "option", "output", "p", "param", "plaintext", "pre",
	"progress", "q", "rb", "rp", "rt", "rtc", "ruby", "s", "samp",
	"script", "section", "select", "small", "source", "spacer", "span",
	"strike", "strong", "style", "sub", "summary", "sup", "svg", "table",
	"tbody", "td", "template", "textarea", "tfoot", "th", "thead", "time",
	"title", "tr", "track", "tt", "u", "ul", "var", "video", "wbr", "xmp",
};

static const char *known_fb2_tags[] = {
	"FictionBook", "a", "binary", "body", "cite", "code", "coverpage",
	"date", "description", "emphasis", "empty-line", "epigraph", "image",
	"p", "poem", "section", "stanza", "strikethrough", "strong",
	"stylesheet", "sub", "subtitle", "sup", "table", "td", "text-author",
	"th", "title", "title-info", "tr", "v",
};

static const char *find_known_html_tag(const char *tag)
{
	int l = 0;
	int r = nelem(known_html_tags) / 2 - 1;
	while (l <= r)
	{
		int m = (l + r) >> 1;
		int c = strcmp(tag, known_html_tags[m]);
		if (c < 0)
			r = m - 1;
		else if (c > 0)
			l = m + 1;
		else
			return known_html_tags[m];
	}
	return NULL;
}

static const char *find_known_fb2_tag(const char *tag)
{
	int l = 0;
	int r = nelem(known_fb2_tags) / 2 - 1;
	while (l <= r)
	{
		int m = (l + r) >> 1;
		int c = strcmp(tag, known_fb2_tags[m]);
		if (c < 0)
			r = m - 1;
		else if (c > 0)
			l = m + 1;
		else
			return known_fb2_tags[m];
	}
	return NULL;
}

struct genstate
{
	fz_pool *pool;
	fz_html_font_set *set;
	fz_archive *zip;
	fz_tree *images;
	fz_xml_doc *xml;
	int is_fb2;
	const char *base_uri;
	fz_css *css;
	int at_bol;
	fz_html_box *emit_white;
	int last_brk_cls;

	int list_counter;
	int section_depth;
	fz_bidi_direction markup_dir;
	fz_text_language markup_lang;
	char *href;

	fz_css_style_splay *styles;
};

static int iswhite(int c)
{
	return c == ' ' || c == '\t' || c == '\r' || c == '\n';
}

static int is_all_white(const char *s)
{
	while (*s)
	{
		if (!iswhite(*s))
			return 0;
		++s;
	}
	return 1;
}

/* TODO: pool allocator for flow nodes */
/* TODO: store text by pointing to a giant buffer */

static void fz_drop_html_flow(fz_context *ctx, fz_html_flow *flow)
{
	while (flow)
	{
		fz_html_flow *next = flow->next;
		if (flow->type == FLOW_IMAGE)
			fz_drop_image(ctx, flow->content.image);
		flow = next;
	}
}

static fz_html_flow *add_flow(fz_context *ctx, fz_pool *pool, fz_html_box *top, fz_html_box *inline_box, int type, int extras)
{
	size_t size = (type == FLOW_IMAGE ? sizeof(fz_html_flow) : offsetof(fz_html_flow, content) + extras);
	fz_html_flow *flow;

	/* Shouldn't happen, but bug 705324. */
	if (top == NULL || top->type != BOX_FLOW)
		return NULL;

	flow = fz_pool_alloc(ctx, pool, size);
	flow->type = type;
	flow->expand = 0;
	flow->bidi_level = 0;
	flow->markup_lang = 0;
	flow->breaks_line = 0;
	flow->box = inline_box;
	(*top->s.build.flow_tail) = flow;
	top->s.build.flow_tail = &flow->next;
	return flow;
}

static void add_flow_space(fz_context *ctx, fz_pool *pool, fz_html_box *top, fz_html_box *inline_box)
{
	fz_html_flow *flow = add_flow(ctx, pool, top, inline_box, FLOW_SPACE, 0);
	if (flow)
		flow->expand = 1;
}

static void add_flow_break(fz_context *ctx, fz_pool *pool, fz_html_box *top, fz_html_box *inline_box)
{
	(void)add_flow(ctx, pool, top, inline_box, FLOW_BREAK, 0);
}

static void add_flow_sbreak(fz_context *ctx, fz_pool *pool, fz_html_box *top, fz_html_box *inline_box)
{
	(void)add_flow(ctx, pool, top, inline_box, FLOW_SBREAK, 0);
}

static void add_flow_shyphen(fz_context *ctx, fz_pool *pool, fz_html_box *top, fz_html_box *inline_box)
{
	(void)add_flow(ctx, pool, top, inline_box, FLOW_SHYPHEN, 0);
}

static void add_flow_word(fz_context *ctx, fz_pool *pool, fz_html_box *top, fz_html_box *inline_box, const char *a, const char *b, int lang)
{
	fz_html_flow *flow = add_flow(ctx, pool, top, inline_box, FLOW_WORD, b - a + 1);
	if (flow == NULL)
		return;
	memcpy(flow->content.text, a, b - a);
	flow->content.text[b - a] = 0;
	flow->markup_lang = lang;
}

static void add_flow_image(fz_context *ctx, fz_pool *pool, fz_html_box *top, fz_html_box *inline_box, fz_image *img)
{
	fz_html_flow *flow = add_flow(ctx, pool, top, inline_box, FLOW_IMAGE, 0);
	if (flow)
		flow->content.image = fz_keep_image(ctx, img);
}

static void add_flow_anchor(fz_context *ctx, fz_pool *pool, fz_html_box *top, fz_html_box *inline_box)
{
	(void)add_flow(ctx, pool, top, inline_box, FLOW_ANCHOR, 0);
}

fz_html_flow *fz_html_split_flow(fz_context *ctx, fz_pool *pool, fz_html_flow *flow, size_t offset)
{
	fz_html_flow *new_flow;
	char *text;
	size_t len;

	assert(flow->type == FLOW_WORD);

	if (offset == 0)
		return flow;
	text = flow->content.text;
	while (*text && offset)
	{
		int rune;
		text += fz_chartorune(&rune, text);
		offset--;
	}
	len = strlen(text);
	new_flow = fz_pool_alloc(ctx, pool, offsetof(fz_html_flow, content) + len+1);
	memcpy(new_flow, flow, offsetof(fz_html_flow, content));
	new_flow->next = flow->next;
	flow->next = new_flow;
	strcpy(new_flow->content.text, text);
	*text = 0;
	return new_flow;
}

static void flush_space(fz_context *ctx, fz_html_box *flow, int lang, struct genstate *g)
{
	static const char *space = " ";
	fz_pool *pool = g->pool;
	if (g->emit_white)
	{
		int bsp = g->emit_white->style->white_space & WS_ALLOW_BREAK_SPACE;
		if (!g->at_bol)
		{
			if (bsp)
				add_flow_space(ctx, pool, flow, g->emit_white);
			else
				add_flow_word(ctx, pool, flow, g->emit_white, space, space+1, lang);
		}
		g->emit_white = 0;
	}
}

/* pair-wise lookup table for UAX#14 linebreaks
The linebreak table entries mean:
^ prohibited break
	never break before A and after B, even with one or more spaces in between
% indirect break
	do not break before A, unless one or more spaces follow B
_ direct break
	break allowed before A
*/
static const char *pairbrk[32] =
{
/*	-OCCQGNESIPPNAHIIHBBBZCWHHJJJREEZ- */
/*	-PLPULSXYSROULLDNYAB2WMJ23LVTIBMW- */
/*	-                               J- */
	"^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^", /* OP open punctuation */
	"_^^%%^^^^%%____%%%__^^^________%", /* CL close punctuation */
	"_^^%%^^^^%%%%%_%%%__^^^________%", /* CP close parenthesis */
	"^^^%%%^^^%%%%%%%%%%%^^^%%%%%%%%%", /* QU quotation */
	"%^^%%%^^^%%%%%%%%%%%^^^%%%%%%%%%", /* GL non-breaking glue */
	"_^^%%%^^^______%%%__^^^________%", /* NS nonstarters */
	"_^^%%%^^^______%%%__^^^________%", /* EX exclamation/interrogation */
	"_^^%%%^^^__%_%_%%%__^^^________%", /* SY symbols allowing break after */
	"_^^%%%^^^__%%%_%%%__^^^________%", /* IS infix numeric separator */
	"%^^%%%^^^__%%%%%%%__^^^%%%%%_%%%", /* PR prefix numeric */
	"%^^%%%^^^__%%%_%%%__^^^________%", /* PO postfix numeric */
	"%^^%%%^^^%%%%%_%%%__^^^________%", /* NU numeric */
	"%^^%%%^^^%%%%%_%%%__^^^________%", /* AL ordinary alphabetic and symbol characters */
	"%^^%%%^^^%%%%%_%%%__^^^________%", /* HL hebrew letter */
	"_^^%%%^^^_%____%%%__^^^________%", /* ID ideographic */
	"_^^%%%^^^______%%%__^^^________%", /* IN inseparable characters */
	"_^^%_%^^^__%___%%%__^^^________%", /* HY hyphens */
	"_^^%_%^^^______%%%__^^^________%", /* BA break after */
	"%^^%%%^^^%%%%%%%%%%%^^^%%%%%%%%%", /* BB break before */
	"_^^%%%^^^______%%%_^^^^________%", /* B2 break opportunity before and after */
	"____________________^___________", /* ZW zero width space */
	"%^^%%%^^^%_%%%_%%%__^^^________%", /* CM combining mark */
	"%^^%%%^^^%%%%%%%%%%%^^^%%%%%%%%%", /* WJ word joiner */
	"_^^%%%^^^_%____%%%__^^^___%%___%", /* H2 hangul leading/vowel syllable */
	"_^^%%%^^^_%____%%%__^^^____%___%", /* H3 hangul leading/vowel/trailing syllable */
	"_^^%%%^^^_%____%%%__^^^%%%%____%", /* JL hangul leading jamo */
	"_^^%%%^^^_%____%%%__^^^___%%___%", /* JV hangul vowel jamo */
	"_^^%%%^^^_%____%%%__^^^____%___%", /* JT hangul trailing jamo */
	"_^^%%%^^^______%%%__^^^_____%__%", /* RI regional indicator */
	"_^^%%%^^^_%____%%%__^^^_______%%", /* EB emoji base */
	"_^^%%%^^^_%____%%%__^^^________%", /* EM emoji modifier */
	"%^^%%%^^^%%%%%%%%%%%^^^%%%%%%%%%", /* ZWJ zero width joiner */
};

static fz_html_box *
find_flow_encloser(fz_context *ctx, fz_html_box *flow)
{
	/* This code was written to assume that there will always be a
	 * flow box enclosing callers of this. Bug 705324 shows that
	 * this isn't always the case. In the absence of a reproducer
	 * file, all I can do is try to patch around the issue so that
	 * we won't crash. */
	while (flow->type != BOX_FLOW)
	{
		if (flow->up == NULL)
		{
			fz_warn(ctx, "Flow encloser not found. Please report this file!");
			break;
		}
		flow = flow->up;
	}
	return flow;
}

static void generate_text(fz_context *ctx, fz_html_box *box, const char *text, int lang, struct genstate *g)
{
	fz_html_box *flow;
	fz_pool *pool = g->pool;
	int collapse = box->style->white_space & WS_COLLAPSE;
	int bsp = box->style->white_space & WS_ALLOW_BREAK_SPACE;
	int bnl = box->style->white_space & WS_FORCE_BREAK_NEWLINE;

	static const char *space = " ";

	flow = find_flow_encloser(ctx, box);
	if (flow == NULL)
		return;

	while (*text)
	{
		if (bnl && (*text == '\n' || *text == '\r'))
		{
			if (text[0] == '\r' && text[1] == '\n')
				text += 2;
			else
				text += 1;
			add_flow_break(ctx, pool, flow, box);
			g->at_bol = 1;
		}
		else if (iswhite(*text))
		{
			if (collapse)
			{
				if (bnl)
					while (*text == ' ' || *text == '\t')
						++text;
				else
					while (iswhite(*text))
						++text;
				g->emit_white = box;
			}
			else
			{
				// TODO: tabs
				if (bsp)
					add_flow_space(ctx, pool, flow, box);
				else
					add_flow_word(ctx, pool, flow, box, space, space+1, lang);
				++text;
			}
			g->last_brk_cls = UCDN_LINEBREAK_CLASS_WJ; /* don't add sbreaks after a space */
		}
		else
		{
			const char *prev, *mark = text;
			int c;

			flush_space(ctx, flow, lang, g);

			if (g->at_bol)
				g->last_brk_cls = UCDN_LINEBREAK_CLASS_WJ;

			while (*text && !iswhite(*text))
			{
				prev = text;
				text += fz_chartorune(&c, text);
				if (c == 0xAD) /* soft hyphen */
				{
					if (mark != prev)
						add_flow_word(ctx, pool, flow, box, mark, prev, lang);
					add_flow_shyphen(ctx, pool, flow, box);
					mark = text;
					g->last_brk_cls = UCDN_LINEBREAK_CLASS_WJ; /* don't add sbreaks after a soft hyphen */
				}
				else if (bsp) /* allow soft breaks */
				{
					int this_brk_cls = ucdn_get_resolved_linebreak_class(c);
					if (this_brk_cls <= UCDN_LINEBREAK_CLASS_ZWJ)
					{
						int brk = pairbrk[g->last_brk_cls][this_brk_cls];

						/* we handle spaces elsewhere, so ignore these classes */
						if (brk == '@') brk = '^';
						if (brk == '#') brk = '^';
						if (brk == '%') brk = '^';

						if (brk == '_')
						{
							if (mark != prev)
								add_flow_word(ctx, pool, flow, box, mark, prev, lang);
							add_flow_sbreak(ctx, pool, flow, box);
							mark = prev;
						}

						g->last_brk_cls = this_brk_cls;
					}
				}
			}
			if (mark != text)
				add_flow_word(ctx, pool, flow, box, mark, text, lang);

			g->at_bol = 0;
		}
	}
}

static fz_image *load_html_image(fz_context *ctx, fz_archive *zip, const char *base_uri, const char *src)
{
	char path[2048];
	fz_image *img = NULL;
	fz_buffer *buf = NULL;

	fz_var(img);
	fz_var(buf);

	fz_try(ctx)
	{
		if (!strncmp(src, "data:image/jpeg;base64,", 23))
			buf = fz_new_buffer_from_base64(ctx, src+23, 0);
		else if (!strncmp(src, "data:image/png;base64,", 22))
			buf = fz_new_buffer_from_base64(ctx, src+22, 0);
		else if (!strncmp(src, "data:image/gif;base64,", 22))
			buf = fz_new_buffer_from_base64(ctx, src+22, 0);
		else
		{
			fz_strlcpy(path, base_uri, sizeof path);
			fz_strlcat(path, "/", sizeof path);
			fz_strlcat(path, src, sizeof path);
			fz_urldecode(path);
			fz_cleanname(path);
			buf = fz_read_archive_entry(ctx, zip, path);
		}
#if FZ_ENABLE_SVG
		if (strstr(src, ".svg"))
			img = fz_new_image_from_svg(ctx, buf, base_uri, zip);
		else
#endif
			img = fz_new_image_from_buffer(ctx, buf);
	}
	fz_always(ctx)
		fz_drop_buffer(ctx, buf);
	fz_catch(ctx)
	{
		fz_ignore_error(ctx);
		fz_warn(ctx, "html: cannot load image src='%s'", src);
	}

	return img;
}

static fz_image *load_svg_image(fz_context *ctx, fz_archive *zip, const char *base_uri,
	fz_xml_doc *xmldoc, fz_xml *node)
{
	fz_image *img = NULL;
#if FZ_ENABLE_SVG
	fz_try(ctx)
		img = fz_new_image_from_svg_xml(ctx, xmldoc, node, base_uri, zip);
	fz_catch(ctx)
	{
		fz_ignore_error(ctx);
		fz_warn(ctx, "html: cannot load embedded svg document");
	}
#endif
	return img;
}

static void generate_image(fz_context *ctx, fz_html_box *box, fz_image *img, struct genstate *g)
{
	fz_html_box *flow;
	fz_pool *pool = g->pool;

	flow = find_flow_encloser(ctx, box);

	flush_space(ctx, flow, 0, g);

	if (!img)
	{
		const char *alt = "[image]";
		add_flow_word(ctx, pool, flow, box, alt, alt + 7, 0);
	}
	else
	{
		fz_try(ctx)
		{
			add_flow_sbreak(ctx, pool, flow, box);
			add_flow_image(ctx, pool, flow, box, img);
			add_flow_sbreak(ctx, pool, flow, box);
		}
		fz_always(ctx)
		{
			fz_drop_image(ctx, img);
		}
		fz_catch(ctx)
			fz_rethrow(ctx);
	}

	g->at_bol = 0;
}

static void fz_drop_html_box(fz_context *ctx, fz_html_box *box)
{
	while (box)
	{
		fz_html_box *next = box->next;
		if (box->type == BOX_FLOW)
			fz_drop_html_flow(ctx, box->u.flow.head);
		fz_drop_html_box(ctx, box->down);
		box = next;
	}
}

static void fz_drop_html_imp(fz_context *ctx, fz_storable *stor)
{
	fz_html *html = (fz_html *)stor;
	fz_drop_html_box(ctx, html->tree.root);
	fz_drop_pool(ctx, html->tree.pool);
}

static void fz_drop_story_imp(fz_context *ctx, fz_storable *stor)
{
	fz_story *story = (fz_story *)stor;
	fz_free(ctx, story->user_css);
	fz_drop_html_font_set(ctx, story->font_set);
	fz_drop_xml(ctx, story->dom);
	fz_drop_html_box(ctx, story->tree.root);
	fz_drop_buffer(ctx, story->warnings);
	fz_drop_archive(ctx, story->zip);
	/* The pool must be the last thing dropped. */
	fz_drop_pool(ctx, story->tree.pool);
}

/* Drop a structure derived from an html_tree. The exact things
 * freed here will depend upon the drop function with which it
 * was created. */
static void
fz_drop_html_tree(fz_context *ctx, fz_html_tree *tree)
{
	fz_defer_reap_start(ctx);
	fz_drop_storable(ctx, &tree->storable);
	fz_defer_reap_end(ctx);
}

void fz_drop_html(fz_context *ctx, fz_html *html)
{
	fz_drop_html_tree(ctx, &html->tree);
}

void fz_drop_story(fz_context *ctx, fz_story *story)
{
	if (!story)
		return;

	fz_drop_html_tree(ctx, &story->tree);
}

fz_html *fz_keep_html(fz_context *ctx, fz_html *html)
{
	return fz_keep_storable(ctx, &html->tree.storable);
}

static fz_html_box *new_box(fz_context *ctx, struct genstate *g, fz_xml *node, int type, fz_css_style *style)
{
	fz_html_box *box;
	const char *tag = fz_xml_tag(node);
	const char *id = fz_xml_att(node, "id");
	const char *href;

	if (type == BOX_INLINE)
		box = fz_pool_alloc(ctx, g->pool, offsetof(fz_html_box, u));
	else if (type == BOX_FLOW)
		box = fz_pool_alloc(ctx, g->pool, offsetof(fz_html_box, u) + sizeof(box->u.flow));
	else
		box = fz_pool_alloc(ctx, g->pool, offsetof(fz_html_box, u) + sizeof(box->u.block));

	box->type = type;
	box->is_first_flow = 0;
	box->markup_dir = g->markup_dir;
	box->heading = 0;
	box->list_item = 0;

	box->style = fz_css_enlist(ctx, style, &g->styles, g->pool);

	if (tag)
	{
		box->tag = find_known_html_tag(tag);
		if (!box->tag && g->is_fb2)
			box->tag = find_known_fb2_tag(tag);
		if (!box->tag)
			box->tag = fz_pool_strdup(ctx, g->pool, tag);
	}
	else
	{
		box->tag = "#anon";
	}

	if (id)
		box->id = fz_pool_strdup(ctx, g->pool, id);

	if (tag && tag[0]=='a' && tag[1]==0)
	{
		// Support deprecated anchor syntax with id in "name" instead of "id" attribute.
		if (!id)
		{
			const char *name = fz_xml_att(node, "name");
			if (name)
				box->id = fz_pool_strdup(ctx, g->pool, name);
		}

		if (g->is_fb2)
		{
			href = fz_xml_att(node, "l:href");
			if (!href)
				href = fz_xml_att(node, "xlink:href");
		}
		else
		{
			href = fz_xml_att(node, "href");
		}
		if (href)
			g->href = fz_pool_strdup(ctx, g->pool, href);
	}

	if (g->href)
		box->href = g->href;

	if (type == BOX_FLOW)
	{
		box->u.flow.head = NULL;
		box->s.build.flow_tail = &box->u.flow.head;
	}

	return box;
}

static void append_box(fz_context *ctx, fz_html_box *parent, fz_html_box *child)
{
	child->up = parent;
	if (!parent->down)
		parent->down = child;
	if (parent->s.build.last_child)
		parent->s.build.last_child->next = child;
	parent->s.build.last_child = child;
}

static fz_html_box *find_block_context(fz_context *ctx, fz_html_box *box)
{
	while (box->type != BOX_BLOCK && box->type != BOX_TABLE_CELL)
		box = box->up;
	return box;
}

static fz_html_box *find_table_row_context(fz_context *ctx, fz_html_box *box)
{
	fz_html_box *look = box;
	while (look && look->type != BOX_TABLE)
		look = look->up;
	if (look)
		return look;
	fz_warn(ctx, "table-row not inside table element");
	return NULL;
}

static fz_html_box *find_table_cell_context(fz_context *ctx, fz_html_box *box)
{
	fz_html_box *look = box;
	while (look && look->type != BOX_TABLE_ROW)
		look = look->up;
	if (look)
		return look;
	fz_warn(ctx, "table-cell not inside table-row element");
	return NULL;
}

static fz_html_box *find_inline_context(fz_context *ctx, struct genstate *g, fz_html_box *box)
{
	fz_css_style style;
	fz_html_box *flow_box;

	if (box->type == BOX_FLOW || box->type == BOX_INLINE)
		return box;

	// We have an inline element that is not in an existing flow/inline context.

	// Find the closest block level box to insert content into.
	while (box->type != BOX_BLOCK && box->type != BOX_TABLE_CELL)
		box = box->up;

	// Concatenate onto the last open flow box if we have one.
	if (box->s.build.last_child && box->s.build.last_child->type == BOX_FLOW)
		return box->s.build.last_child;

	// No flow box found, create and insert one!

	// TODO: null style instead of default for flow box?
	fz_default_css_style(ctx, &style);
	flow_box = new_box(ctx, g, NULL, BOX_FLOW, &style);
	flow_box->is_first_flow = !box->down;
	g->at_bol = 1;

	append_box(ctx, box, flow_box);

	return flow_box;
}

static void gen2_children(fz_context *ctx, struct genstate *g, fz_html_box *root_box, fz_xml *root_node, fz_css_match *root_match);

static void gen2_text(fz_context *ctx, struct genstate *g, fz_html_box *root_box, fz_xml *node)
{
	fz_html_box *anon_box;
	fz_css_style style;
	const char *text;
	int collapse;

	text = fz_xml_text(node);
	collapse = root_box->style->white_space & WS_COLLAPSE;
	if (collapse && is_all_white(text))
	{
		g->emit_white = root_box;
	}
	else
	{
		if (root_box->type != BOX_INLINE)
		{
			/* Create anonymous inline box, with the same style as the top block box. */
			style = *root_box->style;

			// Make sure not to recursively multiply font sizes
			style.font_size.value = 1;
			style.font_size.unit = N_SCALE;

			root_box = find_inline_context(ctx, g, root_box);
			anon_box = new_box(ctx, g, NULL, BOX_INLINE, &style);
			append_box(ctx, root_box, anon_box);
			root_box = anon_box;
		}

		generate_text(ctx, root_box, text, g->markup_lang, g);
	}
}

static fz_html_box *gen2_inline(fz_context *ctx, struct genstate *g, fz_html_box *root_box, fz_xml *node, fz_css_style *style)
{
	fz_html_box *this_box;
	fz_html_box *flow_box;
	root_box = find_inline_context(ctx, g, root_box);
	this_box = new_box(ctx, g, node, BOX_INLINE, style);
	append_box(ctx, root_box, this_box);
	if (this_box->id)
	{
		flow_box = find_flow_encloser(ctx, this_box);
		add_flow_anchor(ctx, g->pool, flow_box, this_box);
	}
	return this_box;
}

static void gen2_break(fz_context *ctx, struct genstate *g, fz_html_box *root_box, fz_xml *node)
{
	fz_html_box *this_box;
	fz_html_box *flow_box;

	if (root_box->type != BOX_INLINE)
	{
		/* Create inline box to hold the <br> tag, with the same style as containing block. */
		/* Make sure not to recursively multiply font sizes. */
		fz_css_style style = *root_box->style;
		style.font_size.value = 1;
		style.font_size.unit = N_SCALE;
		this_box = new_box(ctx, g, node, BOX_INLINE, &style);
		append_box(ctx, find_inline_context(ctx, g, root_box), this_box);
	}
	else
	{
		this_box = root_box;
	}

	flow_box = find_flow_encloser(ctx, this_box);
	add_flow_break(ctx, g->pool, flow_box, this_box);
	g->at_bol = 1;
}

static fz_html_box *gen2_block(fz_context *ctx, struct genstate *g, fz_html_box *root_box, fz_xml *node, fz_css_style *style)
{
	fz_html_box *this_box;
	root_box = find_block_context(ctx, root_box);
	this_box = new_box(ctx, g, node, BOX_BLOCK, style);
	append_box(ctx, root_box, this_box);
	return this_box;
}

static fz_html_box *gen2_table(fz_context *ctx, struct genstate *g, fz_html_box *root_box, fz_xml *node, fz_css_style *style)
{
	fz_html_box *this_box;
	root_box = find_block_context(ctx, root_box);
	this_box = new_box(ctx, g, node, BOX_TABLE, style);
	append_box(ctx, root_box, this_box);
	return this_box;
}

static fz_html_box *gen2_table_row(fz_context *ctx, struct genstate *g, fz_html_box *root_box, fz_xml *node, fz_css_style *style)
{
	fz_html_box *this_box, *table_box;

	table_box = find_table_row_context(ctx, root_box);
	if (!table_box)
		return gen2_block(ctx, g, root_box, node, style);

	this_box = new_box(ctx, g, node, BOX_TABLE_ROW, style);
	append_box(ctx, table_box, this_box);
	return this_box;
}

static fz_html_box *gen2_table_cell(fz_context *ctx, struct genstate *g, fz_html_box *root_box, fz_xml *node, fz_css_style *style)
{
	fz_html_box *this_box, *row_box;

	row_box = find_table_cell_context(ctx, root_box);
	if (!row_box)
		return gen2_block(ctx, g, root_box, node, style);

	this_box = new_box(ctx, g, node, BOX_TABLE_CELL, style);
	append_box(ctx, row_box, this_box);
	return this_box;
}

static void gen2_image_common(fz_context *ctx, struct genstate *g, fz_html_box *root_box, fz_xml *node, fz_image *img, int display, fz_css_style *style)
{
	fz_html_box *img_block_box;
	fz_html_box *img_inline_box;

	if (display == DIS_INLINE || display == DIS_INLINE_BLOCK)
	{
		root_box = find_inline_context(ctx, g, root_box);
		img_inline_box = new_box(ctx, g, node, BOX_INLINE, style);
		append_box(ctx, root_box, img_inline_box);
		generate_image(ctx, img_inline_box, img, g);
	}
	else
	{
		root_box = find_block_context(ctx, root_box);
		img_block_box = new_box(ctx, g, node, BOX_BLOCK, style);
		append_box(ctx, root_box, img_block_box);

		root_box = find_inline_context(ctx, g, img_block_box);
		img_inline_box = new_box(ctx, g, NULL, BOX_INLINE, style);
		append_box(ctx, root_box, img_inline_box);
		generate_image(ctx, img_inline_box, img, g);
	}
}

static void gen2_image_html(fz_context *ctx, struct genstate *g, fz_html_box *root_box, fz_xml *node, int display, fz_css_style *style)
{
	const char *src = fz_xml_att(node, "src");
	if (src)
	{
		fz_css_style local_style = *style;
		fz_image *img;
		int w, h;
		const char *w_att = fz_xml_att(node, "width");
		const char *h_att = fz_xml_att(node, "height");

		if (w_att && (w = fz_atoi(w_att)) > 0)
		{
			local_style.width.value = w;
			local_style.width.unit = strchr(w_att, '%') ? N_PERCENT : N_LENGTH;
		}
		if (h_att && (h = fz_atoi(h_att)) > 0)
		{
			local_style.height.value = h;
			local_style.height.unit = strchr(h_att, '%') ? N_PERCENT : N_LENGTH;
		}

		img = load_html_image(ctx, g->zip, g->base_uri, src);
		gen2_image_common(ctx, g, root_box, node, img, display, &local_style);
	}
}

static void gen2_image_fb2(fz_context *ctx, struct genstate *g, fz_html_box *root_box, fz_xml *node, int display, fz_css_style *style)
{
	const char *src = fz_xml_att(node, "l:href");
	if (!src)
		src = fz_xml_att(node, "xlink:href");
	if (src && src[0] == '#')
	{
		fz_image *img = fz_tree_lookup(ctx, g->images, src+1);
		gen2_image_common(ctx, g, root_box, node, fz_keep_image(ctx, img), display, style);
	}
}

static void gen2_image_svg(fz_context *ctx, struct genstate *g, fz_html_box *root_box, fz_xml *node, int display, fz_css_style *style)
{
	fz_image *img = load_svg_image(ctx, g->zip, g->base_uri, g->xml, node);
	gen2_image_common(ctx, g, root_box, node, img, display, style);
}

static int get_heading_from_tag(fz_context *ctx, struct genstate *g, const char *tag)
{
	if (tag[0] == 'h' && tag[1] != 0 && tag[2] == 0)
	{
		switch (tag[1])
		{
		case '1': return 1;
		case '2': return 2;
		case '3': return 3;
		case '4': return 4;
		case '5': return 5;
		case '6': return 6;
		}
	}
	if (g->is_fb2)
	{
		if (!strcmp(tag, "title") || !strcmp(tag, "subtitle"))
			return fz_mini(g->section_depth, 6);
	}
	return 0;
}

static void gen2_tag(fz_context *ctx, struct genstate *g, fz_html_box *root_box, fz_xml *node,
	fz_css_match *match, int display, fz_css_style *style)
{
	fz_html_box *this_box;
	const char *tag;
	const char *lang_att;
	const char *dir_att;

	int save_markup_dir = g->markup_dir;
	int save_markup_lang = g->markup_lang;
	char *save_href = g->href;

	if (display == DIS_NONE)
		return;

	tag = fz_xml_tag(node);

	dir_att = fz_xml_att(node, "dir");
	if (dir_att)
	{
		if (!strcmp(dir_att, "auto"))
			g->markup_dir = FZ_BIDI_NEUTRAL;
		else if (!strcmp(dir_att, "rtl"))
			g->markup_dir = FZ_BIDI_RTL;
		else if (!strcmp(dir_att, "ltr"))
			g->markup_dir = FZ_BIDI_LTR;
		else
			g->markup_dir = DEFAULT_DIR;
	}

	lang_att = fz_xml_att(node, "lang");
	if (lang_att)
		g->markup_lang = fz_text_language_from_string(lang_att);

	switch (display)
	{
	case DIS_INLINE_BLOCK:
		// TODO handle inline block as a flow node
		this_box = gen2_block(ctx, g, root_box, node, style);
		break;

	case DIS_BLOCK:
		this_box = gen2_block(ctx, g, root_box, node, style);
		this_box->heading = get_heading_from_tag(ctx, g, tag);
		break;

	case DIS_LIST_ITEM:
		this_box = gen2_block(ctx, g, root_box, node, style);
		this_box->list_item = ++g->list_counter;
		break;

	// TODO: https://www.w3.org/TR/CSS2/tables.html#anonymous-boxes
	//
	// The table generation code should insert and create anonymous boxes
	// for any missing child/parent elements.
	//
	// MISSING CHILDREN:
	// 1: Wrap consecutive BLOCK found in a TABLE in an anon TABLE_ROW.
	// 2: Wrap consecutive BLOCK found in a TABLE_ROW in an anon TABLE_CELL.
	//
	// MISSING PARENTS:
	// 1: Wrap consecutive TABLE_CELL found outside TABLE_ROW in an anon TABLE_ROW
	// 2: Wrap consecutive TABLE_ROW found outside TABLE in an anon TABLE
	//
	// For now we ignore this and treat any such elements that are out of
	// context as plain block elements.

	case DIS_TABLE:
		this_box = gen2_table(ctx, g, root_box, node, style);
		break;
	case DIS_TABLE_GROUP:
		// no box for table-row-group elements
		this_box = root_box;
		break;
	case DIS_TABLE_ROW:
		this_box = gen2_table_row(ctx, g, root_box, node, style);
		break;
	case DIS_TABLE_CELL:
		this_box = gen2_table_cell(ctx, g, root_box, node, style);
		break;

	case DIS_INLINE:
	default:
		this_box = gen2_inline(ctx, g, root_box, node, style);
		break;
	}

	if (tag && (!strcmp(tag, "ol") || !strcmp(tag, "ul") || !strcmp(tag, "dl")))
	{
		int save_list_counter = g->list_counter;
		g->list_counter = 0;
		gen2_children(ctx, g, this_box, node, match);
		g->list_counter = save_list_counter;
	}
	else if (tag && !strcmp(tag, "section"))
	{
		int save_section_depth = g->section_depth;
		g->section_depth++;
		gen2_children(ctx, g, this_box, node, match);
		g->section_depth = save_section_depth;
	}
	else
	{
		gen2_children(ctx, g, this_box, node, match);
	}

	g->markup_dir = save_markup_dir;
	g->markup_lang = save_markup_lang;
	g->href = save_href;
}

static void gen2_children(fz_context *ctx, struct genstate *g, fz_html_box *root_box, fz_xml *root_node, fz_css_match *root_match)
{
	fz_xml *node;
	const char *tag;
	fz_css_match match;
	fz_css_style style;
	int display;

	for (node = fz_xml_down(root_node); node; node = fz_xml_next(node))
	{
		tag = fz_xml_tag(node);
		if (tag)
		{
			fz_match_css(ctx, &match, root_match, g->css, node);
			fz_apply_css_style(ctx, g->set, &style, &match);
			display = fz_get_css_match_display(&match);
			if (tag[0]=='b' && tag[1]=='r' && tag[2]==0)
			{
				gen2_break(ctx, g, root_box, node);
			}
			else if (tag[0]=='i' && tag[1]=='m' && tag[2]=='g' && tag[3]==0)
			{
				gen2_image_html(ctx, g, root_box, node, display, &style);
			}
			else if (g->is_fb2 && tag[0]=='i' && tag[1]=='m' && tag[2]=='a' && tag[3]=='g' && tag[4]=='e' && tag[5]==0)
			{
				gen2_image_fb2(ctx, g, root_box, node, display, &style);
			}
			else if (tag[0]=='s' && tag[1]=='v' && tag[2]=='g' && tag[3]==0)
			{
				gen2_image_svg(ctx, g, root_box, node, display, &style);
			}
			else
			{
				gen2_tag(ctx, g, root_box, node, &match, display, &style);
			}
		}
		else
		{
			gen2_text(ctx, g, root_box, node);
		}
	}
}

static char *concat_text(fz_context *ctx, fz_xml *root)
{
	fz_xml *node;
	size_t i = 0, n = 1;
	char *s;
	for (node = fz_xml_down(root); node; node = fz_xml_next(node))
	{
		const char *text = fz_xml_text(node);
		n += text ? strlen(text) : 0;
	}
	s = Memento_label(fz_malloc(ctx, n), "concat_html");
	for (node = fz_xml_down(root); node; node = fz_xml_next(node))
	{
		const char *text = fz_xml_text(node);
		if (text)
		{
			n = strlen(text);
			memcpy(s+i, text, n);
			i += n;
		}
	}
	s[i] = 0;
	return s;
}

static void
html_load_css_link(fz_context *ctx, fz_html_font_set *set, fz_archive *zip, const char *base_uri, fz_css *css, fz_xml *root, const char *href)
{
	char path[2048];
	char css_base_uri[2048];
	fz_buffer *buf;

	fz_var(buf);

	fz_strlcpy(path, base_uri, sizeof path);
	fz_strlcat(path, "/", sizeof path);
	fz_strlcat(path, href, sizeof path);
	fz_urldecode(path);
	fz_cleanname(path);

	fz_dirname(css_base_uri, path, sizeof css_base_uri);

	buf = NULL;
	fz_try(ctx)
	{
		buf = fz_read_archive_entry(ctx, zip, path);
		fz_parse_css(ctx, css, fz_string_from_buffer(ctx, buf), path);
		fz_add_css_font_faces(ctx, set, zip, css_base_uri, css);
	}
	fz_always(ctx)
		fz_drop_buffer(ctx, buf);
	fz_catch(ctx)
	{
		fz_rethrow_if(ctx, FZ_ERROR_SYSTEM);
		fz_report_error(ctx);
		fz_warn(ctx, "ignoring stylesheet %s", path);
	}
}

static void
html_load_css(fz_context *ctx, fz_html_font_set *set, fz_archive *zip, const char *base_uri, fz_css *css, fz_xml *root)
{
	fz_xml *html, *head, *node;

	html = fz_xml_find(root, "html");
	head = fz_xml_find_down(html, "head");
	for (node = fz_xml_down(head); node; node = fz_xml_next(node))
	{
		if (fz_xml_is_tag(node, "link"))
		{
			char *rel = fz_xml_att(node, "rel");
			if (rel && !fz_strcasecmp(rel, "stylesheet"))
			{
				char *type = fz_xml_att(node, "type");
				if ((type && !strcmp(type, "text/css")) || !type)
				{
					char *href = fz_xml_att(node, "href");
					if (href)
					{
						html_load_css_link(ctx, set, zip, base_uri, css, root, href);
					}
				}
			}
		}
		else if (fz_xml_is_tag(node, "style"))
		{
			char *s = concat_text(ctx, node);
			fz_try(ctx)
			{
				fz_parse_css(ctx, css, s, "<style>");
				fz_add_css_font_faces(ctx, set, zip, base_uri, css);
			}
			fz_always(ctx)
				fz_free(ctx, s);
			fz_catch(ctx)
			{
				fz_rethrow_if(ctx, FZ_ERROR_SYSTEM);
				fz_report_error(ctx);
				fz_warn(ctx, "ignoring inline stylesheet");
			}
		}
	}
}

static void
fb2_load_css(fz_context *ctx, fz_html_font_set *set, fz_archive *zip, const char *base_uri, fz_css *css, fz_xml *root)
{
	fz_xml *fictionbook, *stylesheet;

	fictionbook = fz_xml_find(root, "FictionBook");
	stylesheet = fz_xml_find_down(fictionbook, "stylesheet");
	if (stylesheet)
	{
		char *s = concat_text(ctx, stylesheet);
		fz_try(ctx)
		{
			fz_parse_css(ctx, css, s, "<stylesheet>");
			fz_add_css_font_faces(ctx, set, zip, base_uri, css);
		}
		fz_catch(ctx)
		{
			fz_rethrow_if(ctx, FZ_ERROR_SYSTEM);
			fz_report_error(ctx);
			fz_warn(ctx, "ignoring inline stylesheet");
		}
		fz_free(ctx, s);
	}
}

static fz_tree *
load_fb2_images(fz_context *ctx, fz_xml *root)
{
	fz_xml *fictionbook, *binary;
	fz_tree *images = NULL;

	fictionbook = fz_xml_find(root, "FictionBook");
	for (binary = fz_xml_find_down(fictionbook, "binary"); binary; binary = fz_xml_find_next(binary, "binary"))
	{
		const char *id = fz_xml_att(binary, "id");
		char *b64 = NULL;
		fz_buffer *buf = NULL;
		fz_image *img = NULL;

		fz_var(b64);
		fz_var(buf);

		if (id == NULL)
		{
			fz_warn(ctx, "Skipping image with no id");
			continue;
		}

		fz_try(ctx)
		{
			b64 = concat_text(ctx, binary);
			buf = fz_new_buffer_from_base64(ctx, b64, strlen(b64));
			img = fz_new_image_from_buffer(ctx, buf);
		}
		fz_always(ctx)
		{
			fz_drop_buffer(ctx, buf);
			fz_free(ctx, b64);
		}
		fz_catch(ctx)
			fz_rethrow(ctx);

		images = fz_tree_insert(ctx, images, id, img);
	}

	return images;
}

typedef struct
{
	uint32_t *data;
	size_t cap;
	size_t len;
} uni_buf;

typedef struct
{
	fz_context *ctx;
	fz_pool *pool;
	fz_html_flow *flow;
	uni_buf *buffer;
} bidi_data;

static void fragment_cb(const uint32_t *fragment,
			size_t fragment_len,
			int bidi_level,
			int script,
			void *arg)
{
	bidi_data *data = (bidi_data *)arg;

	/* We are guaranteed that fragmentOffset will be at the beginning
	 * of flow. */
	while (fragment_len > 0)
	{
		size_t len;

		if (data->flow->type == FLOW_SPACE)
		{
			len = 1;
		}
		else if (data->flow->type == FLOW_BREAK || data->flow->type == FLOW_SBREAK ||
				data->flow->type == FLOW_SHYPHEN || data->flow->type == FLOW_ANCHOR)
		{
			len = 0;
		}
		else
		{
			/* Must be text */
			len = fz_utflen(data->flow->content.text);
			if (len > fragment_len)
			{
				/* We need to split this flow box */
				(void)fz_html_split_flow(data->ctx, data->pool, data->flow, fragment_len);
				len = fz_utflen(data->flow->content.text);
			}
		}

		/* This flow box is entirely contained within this fragment. */
		data->flow->bidi_level = bidi_level;
		data->flow->script = script;
		data->flow = data->flow->next;
		fragment_len -= len;
	}
}

static fz_bidi_direction
detect_flow_directionality(fz_context *ctx, fz_pool *pool, uni_buf *buffer, fz_bidi_direction bidi_dir, fz_html_flow *flow)
{
	fz_html_flow *end = flow;
	bidi_data data;

	while (end)
	{
		unsigned int level = end->bidi_level;

		/* Gather the text from the flow up into a single buffer (at
		 * least, as much of it as has the same direction markup). */
		buffer->len = 0;
		while (end && (level & 1) == (end->bidi_level & 1))
		{
			size_t len = 0;
			const char *text = "";
			int broken = 0;

			switch (end->type)
			{
			case FLOW_WORD:
				len = fz_utflen(end->content.text);
				text = end->content.text;
				break;
			case FLOW_SPACE:
				len = 1;
				text = " ";
				break;
			case FLOW_SHYPHEN:
			case FLOW_SBREAK:
				break;
			case FLOW_BREAK:
			case FLOW_IMAGE:
				broken = 1;
				break;
			}

			end = end->next;

			if (broken)
				break;

			/* Make sure the buffer is large enough */
			if (buffer->len + len > buffer->cap)
			{
				size_t newcap = buffer->cap;
				if (newcap < 128)
					newcap = 128; /* Sensible small default */

				while (newcap < buffer->len + len)
					newcap = (newcap * 3) / 2;

				buffer->data = fz_realloc_array(ctx, buffer->data, newcap, uint32_t);
				buffer->cap = newcap;
			}

			/* Expand the utf8 text into Unicode and store it in the buffer */
			while (*text)
			{
				int rune;
				text += fz_chartorune(&rune, text);
				buffer->data[buffer->len++] = rune;
			}
		}

		/* Detect directionality for the buffer */
		data.ctx = ctx;
		data.pool = pool;
		data.flow = flow;
		data.buffer = buffer;
		fz_bidi_fragment_text(ctx, buffer->data, buffer->len, &bidi_dir, fragment_cb, &data, 0 /* Flags */);
		flow = end;
	}
	return bidi_dir;
}

static void
detect_box_directionality(fz_context *ctx, fz_pool *pool, uni_buf *buffer, fz_html_box *box)
{
	while (box)
	{
		if (box->type == BOX_FLOW)
			box->markup_dir = detect_flow_directionality(ctx, pool, buffer, box->markup_dir, box->u.flow.head);
		detect_box_directionality(ctx, pool, buffer, box->down);
		box = box->next;
	}
}

static void
detect_directionality(fz_context *ctx, fz_pool *pool, fz_html_box *box)
{
	uni_buf buffer = { NULL };

	fz_try(ctx)
		detect_box_directionality(ctx, pool, &buffer, box);
	fz_always(ctx)
		fz_free(ctx, buffer.data);
	fz_catch(ctx)
		fz_rethrow(ctx);
}

static fz_xml_doc *
parse_to_xml(fz_context *ctx, fz_buffer *buf, int try_xml, int try_html5)
{
	fz_xml_doc *xml;

	if (try_xml && try_html5)
	{
		fz_try(ctx)
			xml = fz_parse_xml(ctx, buf, 1);
		fz_catch(ctx)
		{
			if (fz_caught(ctx) == FZ_ERROR_SYNTAX)
			{
				fz_report_error(ctx);
				fz_warn(ctx, "syntax error in XHTML; retrying using HTML5 parser");
				xml = fz_parse_xml_from_html5(ctx, buf);
			}
			else
				fz_rethrow(ctx);
		}
	}
	else if (try_xml)
		xml = fz_parse_xml(ctx, buf, 1);
	else
	{
		assert(try_html5);
		xml = fz_parse_xml_from_html5(ctx, buf);
	}

	return xml;
}

static void move_background_color_style_up(fz_context *ctx, struct genstate *g, fz_html_box *root, fz_html_box *from)
{
	fz_css_color transparent = { 0, 0, 0, 0 };
	fz_css_style s1, s2;
	memcpy(&s1, root->style, sizeof s1);
	memcpy(&s2, from->style, sizeof s2);
	s1.background_color = s2.background_color;
	s2.background_color = transparent;
	root->style = fz_css_enlist(ctx, &s1, &g->styles, g->pool);
	from->style = fz_css_enlist(ctx, &s2, &g->styles, g->pool);
}

static void move_background_color_up(fz_context *ctx, struct genstate *g, fz_html_box *root)
{
	fz_html_box *html, *body;

	if (root->style->background_color.a != 0)
	{
		return;
	}

	html = root->down;
	if (html && !strcmp(html->tag, "html"))
	{
		if (html->style->background_color.a != 0)
		{
			move_background_color_style_up(ctx, g, root, html);
			return;
		}

		body = html->down;
		if (body && !strcmp(body->tag, "body"))
		{
			if (body->style->background_color.a != 0)
			{
				move_background_color_style_up(ctx, g, root, body);
				return;
			}
		}
	}
}

static void
xml_to_boxes(fz_context *ctx, fz_html_font_set *set, fz_archive *zip, const char *base_uri, const char *user_css,
	fz_xml_doc *xml, fz_html_tree *tree, char **rtitle, int try_fictionbook, int is_mobi)
{
	fz_xml *root, *node;
	char *title;

	fz_css_match root_match, match;
	struct genstate g = {0};

	g.pool = NULL;
	g.set = set;
	g.zip = zip;
	g.images = NULL;
	g.xml = xml;
	g.is_fb2 = 0;
	g.base_uri = base_uri;
	g.css = NULL;
	g.at_bol = 0;
	g.emit_white = 0;
	g.last_brk_cls = UCDN_LINEBREAK_CLASS_OP;
	g.list_counter = 0;
	g.section_depth = 0;
	g.markup_dir = FZ_BIDI_LTR;
	g.markup_lang = FZ_LANG_UNSET;
	g.href = NULL;
	g.styles = NULL;

	if (rtitle)
		*rtitle = NULL;

	root = fz_xml_root(g.xml);
	g.css = fz_new_css(ctx);

#ifndef NDEBUG
	if (fz_atoi(getenv("FZ_DEBUG_XML")))
		fz_debug_xml(root, 0);
#endif

	fz_try(ctx)
	{
		if (try_fictionbook && fz_xml_find(root, "FictionBook"))
		{
			g.is_fb2 = 1;
			fz_parse_css(ctx, g.css, fb2_default_css, "<default:fb2>");
			if (fz_use_document_css(ctx))
				fb2_load_css(ctx, g.set, g.zip, g.base_uri, g.css, root);
			g.images = load_fb2_images(ctx, root);
		}
		else if (is_mobi)
		{
			g.is_fb2 = 0;
			fz_parse_css(ctx, g.css, html_default_css, "<default:html>");
			fz_parse_css(ctx, g.css, mobi_default_css, "<default:mobi>");
			if (fz_use_document_css(ctx))
				html_load_css(ctx, g.set, g.zip, g.base_uri, g.css, root);
		}
		else
		{
			g.is_fb2 = 0;
			fz_parse_css(ctx, g.css, html_default_css, "<default:html>");
			if (fz_use_document_css(ctx))
				html_load_css(ctx, g.set, g.zip, g.base_uri, g.css, root);
		}

		if (user_css)
		{
			fz_parse_css(ctx, g.css, user_css, "<user>");
			fz_add_css_font_faces(ctx, g.set, g.zip, ".", g.css);
		}
	}
	fz_catch(ctx)
	{
		fz_drop_tree(ctx, g.images, (void(*)(fz_context*,void*))fz_drop_image);
		fz_drop_css(ctx, g.css);
		fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
		fz_rethrow_if(ctx, FZ_ERROR_SYSTEM);
		fz_report_error(ctx);
		fz_warn(ctx, "ignoring styles");
		g.css = fz_new_css(ctx);
		g.images = NULL;
	}

#ifndef NDEBUG
	if (fz_atoi(getenv("FZ_DEBUG_CSS")))
		fz_debug_css(ctx, g.css);
#endif

	fz_try(ctx)
	{
		fz_css_style style;
		int display;

		fz_match_css_at_page(ctx, &root_match, g.css);
		fz_apply_css_style(ctx, g.set, &style, &root_match);

		g.pool = tree->pool;
		g.markup_dir = DEFAULT_DIR;
		g.markup_lang = FZ_LANG_UNSET;

		// Create root node
		tree->root = new_box(ctx, &g, NULL, BOX_BLOCK, &style);
		// TODO: transfer page margins out of this hacky box

		tree->root->tag = ":root";
		tree->root->s.layout.em = 0;
		tree->root->s.layout.x = 0;
		tree->root->s.layout.y = 0;
		tree->root->s.layout.w = 0;
		tree->root->s.layout.b = 0;

		// Create document node (html).
		fz_match_css(ctx, &match, &root_match, g.css, root);
		fz_apply_css_style(ctx, g.set, &style, &match);
		display = fz_get_css_match_display(&match);
		gen2_tag(ctx, &g, tree->root, root, &match, display, &style);

		detect_directionality(ctx, g.pool, tree->root);

		if (g.is_fb2)
		{
			node = fz_xml_find(root, "FictionBook");
			node = fz_xml_find_down(node, "description");
			node = fz_xml_find_down(node, "title-info");
			node = fz_xml_find_down(node, "book-title");
			if (rtitle)
			{
				title = fz_xml_text(fz_xml_down(node));
				if (title)
					*rtitle = fz_pool_strdup(ctx, g.pool, title);
			}
		}
		else
		{
			node = fz_xml_find(root, "html");
			node = fz_xml_find_down(node, "head");
			node = fz_xml_find_down(node, "title");
			if (rtitle)
			{
				title = fz_xml_text(fz_xml_down(node));
				if (title)
					*rtitle = fz_pool_strdup(ctx, g.pool, title);
			}

			// Move html or body background-color to :root.
			move_background_color_up(ctx, &g, tree->root);
		}
	}
	fz_always(ctx)
	{
		fz_drop_tree(ctx, g.images, (void(*)(fz_context*,void*))fz_drop_image);
		fz_drop_css(ctx, g.css);
	}
	fz_catch(ctx)
	{
		if (rtitle)
		{
			fz_free(ctx, *rtitle);
			*rtitle = NULL;
		}
		fz_rethrow(ctx);
	}
}

static const char *mobi_font_size[7] = {
	"0.67em",
	"0.83em",
	"1em",
	"1.17em",
	"1.33em",
	"1.5em",
	"1.67em",
};

static void
patch_mobi_html(fz_context *ctx, fz_pool *pool, fz_xml *node)
{
	fz_xml *down;
	char buf[500];
	while (node)
	{
		char *tag = fz_xml_tag(node);
		if (tag)
		{
			// Read MOBI attributes, convert to inline CSS style
			if (!strcmp(tag, "font"))
			{
				const char *size = fz_xml_att(node, "size");
				if (size)
				{
					if (!strcmp(size, "1")) size = mobi_font_size[0];
					else if (!strcmp(size, "2")) size = mobi_font_size[1];
					else if (!strcmp(size, "3")) size = mobi_font_size[2];
					else if (!strcmp(size, "4")) size = mobi_font_size[3];
					else if (!strcmp(size, "5")) size = mobi_font_size[4];
					else if (!strcmp(size, "6")) size = mobi_font_size[5];
					else if (!strcmp(size, "7")) size = mobi_font_size[6];
					else if (!strcmp(size, "+1")) size = mobi_font_size[3];
					else if (!strcmp(size, "+2")) size = mobi_font_size[4];
					else if (!strcmp(size, "+3")) size = mobi_font_size[5];
					else if (!strcmp(size, "+4")) size = mobi_font_size[6];
					else if (!strcmp(size, "+5")) size = mobi_font_size[6];
					else if (!strcmp(size, "+6")) size = mobi_font_size[6];
					else if (!strcmp(size, "-1")) size = mobi_font_size[1];
					else if (!strcmp(size, "-2")) size = mobi_font_size[0];
					else if (!strcmp(size, "-3")) size = mobi_font_size[0];
					else if (!strcmp(size, "-4")) size = mobi_font_size[0];
					else if (!strcmp(size, "-5")) size = mobi_font_size[0];
					else if (!strcmp(size, "-6")) size = mobi_font_size[0];
					fz_snprintf(buf, sizeof buf, "font-size:%s", size);
					fz_xml_add_att(ctx, pool, node, "style", buf);
				}
			}
			else
			{
				char *height = fz_xml_att(node, "height");
				char *width = fz_xml_att(node, "width");
				char *align = fz_xml_att(node, "align");
				if (height || width || align)
				{
					buf[0] = 0;
					if (height)
					{
						fz_strlcat(buf, "margin-top:", sizeof buf);
						fz_strlcat(buf, height, sizeof buf);
						fz_strlcat(buf, ";", sizeof buf);
					}
					if (width)
					{
						fz_strlcat(buf, "text-indent:", sizeof buf);
						fz_strlcat(buf, width, sizeof buf);
						fz_strlcat(buf, ";", sizeof buf);
					}
					if (align)
					{
						fz_strlcat(buf, "text-align:", sizeof buf);
						fz_strlcat(buf, align, sizeof buf);
						fz_strlcat(buf, ";", sizeof buf);
					}
					fz_xml_add_att(ctx, pool, node, "style", buf);
				}
				if (!strcmp(tag, "img"))
				{
					char *recindex = fz_xml_att(node, "recindex");
					if (recindex)
						fz_xml_add_att(ctx, pool, node, "src", recindex);
				}
			}
		}

		down = fz_xml_down(node);
		if (down)
			patch_mobi_html(ctx, pool, down);

		node = fz_xml_next(node);
	}
}

static void
fz_parse_html_tree(fz_context *ctx,
	fz_html_font_set *set, fz_archive *zip, const char *base_uri, fz_buffer *buf, const char *user_css,
	int try_xml, int try_html5, fz_html_tree *tree, char **rtitle, int try_fictionbook, int patch_mobi)
{
	fz_xml_doc *xml;

	if (rtitle)
		*rtitle = NULL;

	xml = parse_to_xml(ctx, buf, try_xml, try_html5);

	if (patch_mobi)
		patch_mobi_html(ctx, xml->u.doc.pool, xml);

	fz_try(ctx)
		xml_to_boxes(ctx, set, zip, base_uri, user_css, xml, tree, rtitle, try_fictionbook, patch_mobi);
	fz_always(ctx)
		fz_drop_xml(ctx, xml);
	fz_catch(ctx)
		fz_rethrow(ctx);
}

#define fz_new_derived_html_tree(CTX, TYPE, DROP) \
 ((TYPE *)Memento_label(fz_new_html_tree_of_size(CTX, sizeof(TYPE), DROP), #TYPE))

static fz_html_tree *
fz_new_html_tree_of_size(fz_context *ctx, size_t size, fz_store_drop_fn *drop)
{
	fz_pool *pool = fz_new_pool(ctx);
	fz_html_tree *tree;

	fz_try(ctx)
	{
		tree = fz_pool_alloc(ctx, pool, size);
		FZ_INIT_STORABLE(tree, 1, drop);
		tree->pool = pool;
	}
	fz_catch(ctx)
	{
		fz_drop_pool(ctx, pool);
		fz_rethrow(ctx);
	}

	return tree;
}

fz_html *
fz_parse_html(fz_context *ctx,
	fz_html_font_set *set, fz_archive *zip, const char *base_uri, fz_buffer *buf, const char *user_css,
	int try_xml, int try_html5, int patch_mobi)
{
	fz_html *html = fz_new_derived_html_tree(ctx, fz_html, fz_drop_html_imp);

	html->layout_w = 0;
	html->layout_h = 0;
	html->layout_em = 0;

	fz_try(ctx)
		fz_parse_html_tree(ctx, set, zip, base_uri, buf, user_css, try_xml, try_html5, &html->tree, &html->title, 1, patch_mobi);
	fz_catch(ctx)
	{
		fz_drop_html(ctx, html);
		fz_rethrow(ctx);
	}

	return html;
}

typedef struct
{
	int saved;
	fz_warning_cb *old;
	void *arg;
	fz_buffer *buffer;
	fz_context *ctx;
} warning_save;

static void
warn_to_buffer(void *user, const char *message)
{
	warning_save *save = (warning_save *)user;
	fz_context *ctx = save->ctx;

	fz_try(ctx)
	{
		fz_append_string(ctx, save->buffer, message);
		fz_append_byte(ctx, save->buffer, '\n');
	}
	fz_catch(ctx)
	{
		/* Silently swallow the error. */
		fz_rethrow_if(ctx, FZ_ERROR_SYSTEM);
		fz_report_error(ctx);
	}
}

static void
redirect_warnings_to_buffer(fz_context *ctx, fz_buffer *buf, warning_save *save)
{
	save->saved = 1;
	save->old = fz_warning_callback(ctx, &save->arg);
	save->buffer = buf;
	save->ctx = ctx;

	fz_flush_warnings(ctx);
	fz_set_warning_callback(ctx, warn_to_buffer, save);
}

static void
restore_warnings(fz_context *ctx, warning_save *save)
{
	if (!save->saved)
		return;

	fz_flush_warnings(ctx);
	fz_set_warning_callback(ctx, save->old, save->arg);
}

fz_story *
fz_new_story(fz_context *ctx, fz_buffer *buf, const char *user_css, float em, fz_archive *zip)
{
	fz_story *story = fz_new_derived_html_tree(ctx, fz_story, fz_drop_story_imp);
	warning_save saved = { 0 };
	fz_buffer *local_buffer = NULL;

	if (buf == NULL)
	{
		local_buffer = fz_new_buffer(ctx, 0);
		buf = local_buffer;
	}

	fz_var(local_buffer);
	fz_var(saved);

	fz_try(ctx)
	{
		story->zip = fz_keep_archive(ctx, zip);
		story->font_set = fz_new_html_font_set(ctx);
		story->em = em;
		story->user_css = user_css ? fz_strdup(ctx, user_css) : NULL;
		story->warnings = fz_new_buffer(ctx, 128);
		redirect_warnings_to_buffer(ctx, story->warnings, &saved);
		story->dom = parse_to_xml(ctx, buf, 0, 1);
	}
	fz_always(ctx)
	{
		restore_warnings(ctx, &saved);
		fz_drop_buffer(ctx, local_buffer);
	}
	fz_catch(ctx)
	{
		fz_drop_html_tree(ctx, &story->tree);
		fz_rethrow(ctx);
	}

	return story;
}

fz_html *
fz_parse_xhtml(fz_context *ctx, fz_html_font_set *set, fz_archive *zip, const char *base_uri, fz_buffer *buf, const char *user_css)
{
	/* try as XML first, fall back to HTML5 */
	return fz_parse_html(ctx, set, zip, base_uri, buf, user_css, 1, 1, 0);
}

static void indent(int level)
{
	while (level-- > 0)
		putchar('\t');
}

static void
fz_debug_html_flow(fz_context *ctx, fz_html_flow *flow, int level)
{
	fz_html_box *sbox = NULL;
	while (flow)
	{
		if (flow->box != sbox) {
			sbox = flow->box;
			indent(level);
#ifndef NDEBUG
			printf("@style <%s> em=%g font='%s'", sbox->tag, sbox->s.layout.em, fz_font_name(ctx, sbox->style->font));
#else
			printf("@style em=%g font='%s'", sbox->s.layout.em, fz_font_name(ctx, sbox->style->font));
#endif
			if (fz_font_is_serif(ctx, sbox->style->font))
				printf(" serif");
			else
				printf(" sans");
			if (fz_font_is_monospaced(ctx, sbox->style->font))
				printf(" monospaced");
			if (fz_font_is_bold(ctx, sbox->style->font))
				printf(" bold");
			if (fz_font_is_italic(ctx, sbox->style->font))
				printf(" italic");
			if (sbox->style->small_caps)
				printf(" small-caps");
			printf("\n");
		}

		indent(level);
		switch (flow->type) {
		case FLOW_WORD: printf("word "); break;
		case FLOW_SPACE: printf("space"); break;
		case FLOW_SBREAK: printf("sbrk "); break;
		case FLOW_SHYPHEN: printf("shy  "); break;
		case FLOW_BREAK: printf("break"); break;
		case FLOW_IMAGE: printf("image"); break;
		case FLOW_ANCHOR: printf("anchor"); break;
		}
		// printf(" y=%g x=%g w=%g", flow->y, flow->x, flow->w);
		if (flow->type == FLOW_IMAGE)
			printf(" h=%g", flow->h);
		if (flow->type == FLOW_WORD)
			printf(" text='%s'", flow->content.text);
		printf("\n");
		if (flow->breaks_line) {
			indent(level);
			printf("*\n");
		}

		flow = flow->next;
	}
}

fz_structure fz_html_tag_to_structure(const char *tag)
{
	if (!strcmp(tag, "body")) return FZ_STRUCTURE_DOCUMENT;
	if (!strcmp(tag, "div")) return FZ_STRUCTURE_DIV;
	if (!strcmp(tag, "span")) return FZ_STRUCTURE_SPAN;
	if (!strcmp(tag, "blockquote")) return FZ_STRUCTURE_BLOCKQUOTE;
	if (!strcmp(tag, "p")) return FZ_STRUCTURE_P;
	if (!strcmp(tag, "h1")) return FZ_STRUCTURE_H1;
	if (!strcmp(tag, "h2")) return FZ_STRUCTURE_H2;
	if (!strcmp(tag, "h3")) return FZ_STRUCTURE_H3;
	if (!strcmp(tag, "h4")) return FZ_STRUCTURE_H4;
	if (!strcmp(tag, "h5")) return FZ_STRUCTURE_H5;
	if (!strcmp(tag, "h6")) return FZ_STRUCTURE_H6;
	if (!strcmp(tag, "ol")) return FZ_STRUCTURE_LIST;
	if (!strcmp(tag, "ul")) return FZ_STRUCTURE_LIST;
	if (!strcmp(tag, "dl")) return FZ_STRUCTURE_LIST;
	if (!strcmp(tag, "li")) return FZ_STRUCTURE_LISTITEM;
	if (!strcmp(tag, "table")) return FZ_STRUCTURE_TABLE;
	if (!strcmp(tag, "tr")) return FZ_STRUCTURE_TR;
	if (!strcmp(tag, "th")) return FZ_STRUCTURE_TH;
	if (!strcmp(tag, "td")) return FZ_STRUCTURE_TD;
	if (!strcmp(tag, "thead")) return FZ_STRUCTURE_THEAD;
	if (!strcmp(tag, "tbody")) return FZ_STRUCTURE_TBODY;
	if (!strcmp(tag, "tfoot")) return FZ_STRUCTURE_TFOOT;
	return FZ_STRUCTURE_INVALID;
}

static void
fz_debug_html_box(fz_context *ctx, fz_html_box *box, int level)
{
	while (box)
	{
		indent(level);
		printf("box ");
		switch (box->type) {
		case BOX_BLOCK: printf("block"); break;
		case BOX_FLOW: printf("flow"); break;
		case BOX_INLINE: printf("inline"); break;
		case BOX_TABLE: printf("table"); break;
		case BOX_TABLE_ROW: printf("table-row"); break;
		case BOX_TABLE_CELL: printf("table-cell"); break;
		}

		printf(" <%s>", box->tag);
		// printf(" em=%g", box->em);
		// printf(" x=%g y=%g w=%g b=%g", box->x, box->y, box->w, box->b);

		if (box->is_first_flow)
			printf(" is-first-flow");
		if (box->list_item)
			printf(" list=%d", box->list_item);
		if (box->id)
			printf(" id=(%s)", box->id);
		if (box->href)
			printf(" href=(%s)", box->href);
		printf("\n");

		if (box->type == BOX_BLOCK || box->type == BOX_TABLE) {
			indent(level+1);
			printf(">margin=(%g %g %g %g)\n", box->u.block.margin[0], box->u.block.margin[1], box->u.block.margin[2], box->u.block.margin[3]);
			//indent(level+1);
			//printf(">padding=(%g %g %g %g)\n", box->u.block.padding[0], box->u.block.padding[1], box->u.block.padding[2], box->u.block.padding[3]);
			//indent(level+1);
			//printf(">border=(%g %g %g %g)\n", box->u.block.border[0], box->u.block.border[1], box->u.block.border[2], box->u.block.border[3]);
		}

		if (box->down)
			fz_debug_html_box(ctx, box->down, level + 1);
		if (box->type == BOX_FLOW) {
			indent(level+1);
			printf("flow\n");
			fz_debug_html_flow(ctx, box->u.flow.head, level + 2);
		}

		box = box->next;
	}
}

void
fz_debug_html(fz_context *ctx, fz_html_box *box)
{
	fz_debug_html_box(ctx, box, 0);
}

static size_t
fz_html_size(fz_context *ctx, fz_html *html)
{
	return html ? fz_pool_size(ctx, html->tree.pool) : 0;
}

/* Magic to make html storable. */
typedef struct {
	int refs;
	void *doc;
	int chapter_num;
} fz_html_key;

static int
fz_make_hash_html_key(fz_context *ctx, fz_store_hash *hash, void *key_)
{
	fz_html_key *key = (fz_html_key *)key_;
	hash->u.pi.ptr = key->doc;
	hash->u.pi.i = key->chapter_num;
	return 1;
}

static void *
fz_keep_html_key(fz_context *ctx, void *key_)
{
	fz_html_key *key = (fz_html_key *)key_;
	return fz_keep_imp(ctx, key, &key->refs);
}

static void
fz_drop_html_key(fz_context *ctx, void *key_)
{
	fz_html_key *key = (fz_html_key *)key_;
	if (fz_drop_imp(ctx, key, &key->refs))
	{
		fz_free(ctx, key);
	}
}

static int
fz_cmp_html_key(fz_context *ctx, void *k0_, void *k1_)
{
	fz_html_key *k0 = (fz_html_key *)k0_;
	fz_html_key *k1 = (fz_html_key *)k1_;
	return k0->doc == k1->doc && k0->chapter_num == k1->chapter_num;
}

static void
fz_format_html_key(fz_context *ctx, char *s, size_t n, void *key_)
{
	fz_html_key *key = (fz_html_key *)key_;
	fz_snprintf(s, n, "(html doc=%p, ch=%d)", key->doc, key->chapter_num);
}

static const fz_store_type fz_html_store_type =
{
	"fz_html",
	fz_make_hash_html_key,
	fz_keep_html_key,
	fz_drop_html_key,
	fz_cmp_html_key,
	fz_format_html_key,
	NULL
};

fz_html *fz_store_html(fz_context *ctx, fz_html *html, void *doc, int chapter)
{
	fz_html_key *key = NULL;
	fz_html *other_html;

	/* Stick the parsed html in the store */
	fz_var(key);

	fz_try(ctx)
	{
		key = fz_malloc_struct(ctx, fz_html_key);
		key->refs = 1;
		key->doc = doc;
		key->chapter_num = chapter;
		other_html = fz_store_item(ctx, key, html, fz_html_size(ctx, html), &fz_html_store_type);
		if (other_html)
		{
			fz_drop_html(ctx, html);
			html = other_html;
		}
	}
	fz_always(ctx)
		fz_drop_html_key(ctx, key);
	fz_catch(ctx)
	{
		/* Do nothing */
	}

	return html;
}

fz_html *fz_find_html(fz_context *ctx, void *doc, int chapter)
{
	fz_html_key key;

	key.refs = 1;
	key.doc = doc;
	key.chapter_num = chapter;
	return fz_find_item(ctx, &fz_drop_html_imp, &key, &fz_html_store_type);
}

static int
html_filter_store(fz_context *ctx, void *doc, void *key_)
{
	fz_html_key *key = (fz_html_key *)key_;

	return (doc == key->doc);
}

void fz_purge_stored_html(fz_context *ctx, void *doc)
{
	fz_filter_store(ctx, html_filter_store, doc, &fz_html_store_type);
}

static void
convert_to_boxes(fz_context *ctx, fz_story *story)
{
	warning_save saved = { 0 };

	if (story->dom == NULL)
		return;

	fz_var(saved);

	fz_try(ctx)
	{
		redirect_warnings_to_buffer(ctx, story->warnings, &saved);
		xml_to_boxes(ctx, story->font_set, story->zip, ".", story->user_css, story->dom, &story->tree, NULL, 0, 0);
	}
	fz_always(ctx)
	{
		fz_drop_xml(ctx, story->dom);
		story->dom = NULL;
		restore_warnings(ctx, &saved);
	}
	fz_catch(ctx)
		fz_rethrow(ctx);
}

int fz_place_story(fz_context *ctx, fz_story *story, fz_rect where, fz_rect *filled)
{
	return fz_place_story_flags(ctx, story, where, filled, 0);
}

int fz_place_story_flags(fz_context *ctx, fz_story *story, fz_rect where, fz_rect *filled, int flags)
{
	float w, h;

	if (filled)
		*filled = fz_empty_rect;

	if (story == NULL || story->complete)
		return 0;

	/* Convert from XML to box model on the first attempt to place.
	 * The DOM is unusable from here on in. */
	convert_to_boxes(ctx, story);

	w = where.x1 - where.x0;
	h = where.y1 - where.y0;
	/* Confusingly, we call the layout using restart_draw, not restart_place,
	 * because we don't want to destroy the current values in restart_place
	 * in case we have to retry later. This means the values are left in
	 * the correct struct though! */
	story->restart_draw.start = story->restart_place.start;
	story->restart_draw.start_flow = story->restart_place.start_flow;
	story->restart_draw.end = NULL;
	story->restart_draw.end_flow = NULL;
	story->restart_draw.reason = FZ_HTML_RESTART_REASON_NONE;
	story->restart_draw.flags = flags;
	story->bbox = where;
	fz_restartable_layout_html(ctx, &story->tree, where.x0, where.y0, w, h, story->em, &story->restart_draw);
	story->restart_draw.start = story->restart_place.start;
	story->restart_draw.start_flow = story->restart_place.start_flow;

	if (filled)
	{
		fz_html_box *b = story->tree.root;
		filled->x0 = b->s.layout.x - b->u.block.margin[L] - b->u.block.border[L] - b->u.block.padding[L];
		filled->x1 = b->s.layout.w + b->u.block.margin[R] + b->u.block.border[R] + b->u.block.padding[R] + b->s.layout.x;
		filled->y0 = b->s.layout.y - b->u.block.margin[T] - b->u.block.border[T] - b->u.block.padding[T];
		filled->y1 = b->s.layout.b + b->u.block.margin[B] + b->u.block.border[B] + b->u.block.padding[B];
	}

#ifndef NDEBUG
	if (fz_atoi(getenv("FZ_DEBUG_HTML")))
		fz_debug_html(ctx, story->tree.root);
#endif

	if (story->restart_draw.end == NULL)
		return FZ_HTML_RESTART_REASON_NONE;
	if (story->restart_draw.reason == FZ_HTML_RESTART_REASON_LINE_WIDTH)
		return FZ_HTML_RESTART_REASON_LINE_WIDTH;
	return FZ_HTML_RESTART_REASON_LINE_HEIGHT;
}

const char *
fz_story_warnings(fz_context *ctx, fz_story *story)
{
	unsigned char *data;

	if (!story)
		return NULL;

	convert_to_boxes(ctx, story);

	fz_terminate_buffer(ctx, story->warnings);

	if (fz_buffer_storage(ctx, story->warnings, &data) == 0)
		return NULL;

	return (const char *)data;
}