view mupdf-source/source/fitz/stext-device.c @ 46:7ee69f120f19 default tip

>>>>> tag v1.26.5+1 for changeset b74429b0f5c4
author Franz Glasner <fzglas.hg@dom66.de>
date Sat, 11 Oct 2025 17:17:30 +0200
parents aa33339d6b8a
children
line wrap: on
line source

// Copyright (C) 2004-2025 Artifex Software, Inc.
//
// This file is part of MuPDF.
//
// MuPDF is free software: you can redistribute it and/or modify it under the
// terms of the GNU Affero General Public License as published by the Free
// Software Foundation, either version 3 of the License, or (at your option)
// any later version.
//
// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
// details.
// You should have received a copy of the GNU Affero General Public License
// along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
//
// Alternative licensing terms are available from the licensor.
// For commercial licensing, see <https://www.artifex.com/> or contact
// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
// CA 94129, USA, for further information.

#include "mupdf/fitz.h"

#include "glyphbox.h"

#include <float.h>
#include <string.h>

/* Simple layout structure */

fz_layout_block *fz_new_layout(fz_context *ctx)
{
	fz_pool *pool = fz_new_pool(ctx);
	fz_layout_block *block;
	fz_try(ctx)
	{
		block = fz_pool_alloc(ctx, pool, sizeof (fz_layout_block));
		block->pool = pool;
		block->head = NULL;
		block->tailp = &block->head;
	}
	fz_catch(ctx)
	{
		fz_drop_pool(ctx, pool);
		fz_rethrow(ctx);
	}
	return block;
}

void fz_drop_layout(fz_context *ctx, fz_layout_block *block)
{
	if (block)
		fz_drop_pool(ctx, block->pool);
}

void fz_add_layout_line(fz_context *ctx, fz_layout_block *block, float x, float y, float font_size, const char *p)
{
	fz_layout_line *line = fz_pool_alloc(ctx, block->pool, sizeof (fz_layout_line));
	line->x = x;
	line->y = y;
	line->font_size = font_size;
	line->p = p;
	line->text = NULL;
	line->next = NULL;
	*block->tailp = line;
	block->tailp = &line->next;
	block->text_tailp = &line->text;
}

void fz_add_layout_char(fz_context *ctx, fz_layout_block *block, float x, float advance, const char *p)
{
	fz_layout_char *ch = fz_pool_alloc(ctx, block->pool, sizeof (fz_layout_char));
	ch->x = x;
	ch->advance = advance;
	ch->p = p;
	ch->next = NULL;
	*block->text_tailp = ch;
	block->text_tailp = &ch->next;
}

/* Extract text into blocks and lines. */

#define PARAGRAPH_DIST 1.5f
#define SPACE_DIST 0.15f
#define SPACE_MAX_DIST 0.8f
#define BASE_MAX_DIST 0.8f
#define FAKE_BOLD_MAX_DIST 0.1f

/* We keep a stack of the different metatexts that apply at any
 * given point (normally none!). Whenever we get some content
 * with a metatext in force, we really want to update the bounds
 * for that metatext. But running along the whole list each time
 * would be painful. So we just update the bounds for dev->metatext
 * and rely on metatext_bounds() propagating it upwards 'just in
 * time' for us to use metatexts other than the latest one. This
 * also means we need to propagate bounds upwards when we pop
 * a metatext.
 *
 * Why do we need bounds at all? Well, suppose we get:
 *    /Span <</ActualText (c) >> BDC /Im0 Do EMC
 * Then where on the page do we put 'c' ? By collecting the
 * bounds, we can place 'c' wherever the image was.
 */
typedef struct metatext_t
{
	fz_metatext type;
	char *text;
	fz_rect bounds;
	struct metatext_t *prev;
} metatext_t;

typedef struct
{
	fz_point from;
	fz_point to;
	float thickness;
} rect_details;

typedef struct
{
	fz_device super;
	fz_stext_page *page;
	int id;
	fz_point pen, start;
	fz_point lag_pen;
	fz_matrix trm;
	int new_obj;
	int lastchar;
	int lastbidi;
	int flags;
	int color;
	int last_was_fake_bold;
	const fz_text *lasttext;
	fz_stext_options opts;

	metatext_t *metatext;

	/* Store the last values we saw. We need this for flushing the actualtext. */
	struct
	{
		int valid;
		int clipped;
		fz_matrix trm;
		int wmode;
		int bidi_level;
		fz_font *font;
		int flags;
	} last;

	/* The list of 'rects' seen during processing (if we're collecting styles). */
	int rect_max;
	int rect_len;
	rect_details *rects;
} fz_stext_device;

const char *fz_stext_options_usage =
	"Text output options:\n"
	"\tpreserve-images: keep images in output\n"
	"\tpreserve-ligatures: do not expand ligatures into constituent characters\n"
	"\tpreserve-spans: do not merge spans on the same line\n"
	"\tpreserve-whitespace: do not convert all whitespace into space characters\n"
	"\tinhibit-spaces: don't add spaces between gaps in the text\n"
	"\tparagraph-break: break blocks at paragraph boundaries\n"
	"\tdehyphenate: attempt to join up hyphenated words\n"
	"\tignore-actualtext: do not apply ActualText replacements\n"
	"\tuse-cid-for-unknown-unicode: use character code if unicode mapping fails\n"
	"\tuse-gid-for-unknown-unicode: use glyph index if unicode mapping fails\n"
	"\taccurate-bboxes: calculate char bboxes from the outlines\n"
	"\taccurate-ascenders: calculate ascender/descender from font glyphs\n"
	"\taccurate-side-bearings: expand char bboxes to completely include width of glyphs\n"
	"\tcollect-styles: attempt to detect text features (fake bold, strikeout, underlined etc)\n"
	"\tclip: do not include text that is completely clipped\n"
	"\tclip-rect=x0:y0:x1:y1 specify clipping rectangle within which to collect content\n"
	"\tstructured: collect structure markup\n"
	"\tvectors: include vector bboxes in output\n"
	"\tsegment: attempt to segment the page\n"
	"\ttable-hunt: hunt for tables within a (segmented) page\n"
	"\n";

/* Find the current actualtext, if any. Will abort if dev == NULL. */
static metatext_t *
find_actualtext(fz_stext_device *dev)
{
	metatext_t *mt = dev->metatext;

	while (mt && mt->type != FZ_METATEXT_ACTUALTEXT)
		mt = mt->prev;

	return mt;
}

/* Find the bounds of the given metatext. Will abort if mt or
 * dev are NULL. */
static fz_rect *
metatext_bounds(metatext_t *mt, fz_stext_device *dev)
{
	metatext_t *mt2 = dev->metatext;

	while (mt2 != mt)
	{
		mt2->prev->bounds = fz_union_rect(mt2->prev->bounds, mt2->bounds);
		mt2 = mt2->prev;
	}

	return &mt->bounds;
}

/* Find the bounds of the current actualtext, or NULL if there
 * isn't one. Will abort if dev is NULL. */
static fz_rect *
actualtext_bounds(fz_stext_device *dev)
{
	metatext_t *mt = find_actualtext(dev);

	if (mt == NULL)
		return NULL;

	return metatext_bounds(mt, dev);
}

fz_stext_page *
fz_new_stext_page(fz_context *ctx, fz_rect mediabox)
{
	fz_pool *pool = fz_new_pool(ctx);
	fz_stext_page *page = NULL;
	fz_try(ctx)
	{
		page = fz_pool_alloc(ctx, pool, sizeof(*page));
		page->pool = pool;
		page->mediabox = mediabox;
		page->first_block = NULL;
		page->last_block = NULL;
	}
	fz_catch(ctx)
	{
		fz_drop_pool(ctx, pool);
		fz_rethrow(ctx);
	}
	return page;
}

static void
drop_run(fz_context *ctx, fz_stext_block *block)
{
	fz_stext_line *line;
	fz_stext_char *ch;
	while (block)
	{
		switch (block->type)
		{
		case FZ_STEXT_BLOCK_IMAGE:
			fz_drop_image(ctx, block->u.i.image);
			break;
		case FZ_STEXT_BLOCK_TEXT:
			for (line = block->u.t.first_line; line; line = line->next)
				for (ch = line->first_char; ch; ch = ch->next)
					fz_drop_font(ctx, ch->font);
			break;
		case FZ_STEXT_BLOCK_STRUCT:
			drop_run(ctx, block->u.s.down->first_block);
			break;
		default:
			break;
		}
		block = block->next;
	}
}

void
fz_drop_stext_page(fz_context *ctx, fz_stext_page *page)
{
	if (page)
	{
		drop_run(ctx, page->first_block);
		fz_drop_pool(ctx, page->pool);
	}
}

/*
 * This adds a new block at the end of the page. This should not be used
 * to add 'struct' blocks to the page as those have to be added internally,
 * with more complicated pointer setup.
 */
static fz_stext_block *
add_block_to_page(fz_context *ctx, fz_stext_page *page)
{
	fz_stext_block *block = fz_pool_alloc(ctx, page->pool, sizeof *page->first_block);
	block->bbox = fz_empty_rect; /* Fixes bug 703267. */
	block->prev = page->last_block;
	if (page->last_struct)
	{
		if (page->last_struct->last_block)
		{
			block->prev = page->last_struct->last_block;
			block->prev->next = block;
			page->last_struct->last_block = block;
		}
		else
			page->last_struct->last_block = page->last_struct->first_block = block;
	}
	else if (!page->last_block)
	{
		page->last_block = block;
		if (!page->first_block)
			page->first_block = block;
	}
	else
	{
		page->last_block->next = block;
		page->last_block = block;
	}
	return block;
}

static fz_stext_block *
add_text_block_to_page(fz_context *ctx, fz_stext_page *page)
{
	fz_stext_block *block = add_block_to_page(ctx, page);
	block->type = FZ_STEXT_BLOCK_TEXT;
	return block;
}

static fz_stext_block *
add_image_block_to_page(fz_context *ctx, fz_stext_page *page, fz_matrix ctm, fz_image *image)
{
	fz_stext_block *block = add_block_to_page(ctx, page);
	block->type = FZ_STEXT_BLOCK_IMAGE;
	block->u.i.transform = ctm;
	block->u.i.image = fz_keep_image(ctx, image);
	block->bbox = fz_transform_rect(fz_unit_rect, ctm);
	return block;
}

static fz_stext_line *
add_line_to_block(fz_context *ctx, fz_stext_page *page, fz_stext_block *block, const fz_point *dir, int wmode, int bidi)
{
	fz_stext_line *line = fz_pool_alloc(ctx, page->pool, sizeof *block->u.t.first_line);
	line->prev = block->u.t.last_line;
	if (!block->u.t.first_line)
		block->u.t.first_line = block->u.t.last_line = line;
	else
	{
		block->u.t.last_line->next = line;
		block->u.t.last_line = line;
	}

	line->dir = *dir;
	line->wmode = wmode;

	return line;
}

#define NON_ACCURATE_GLYPH_ADDED_SPACE (-2)
#define NON_ACCURATE_GLYPH (-1)

static fz_stext_char *
add_char_to_line(fz_context *ctx, fz_stext_page *page, fz_stext_line *line, fz_matrix trm, fz_font *font, float size, int c, int glyph, fz_point *p, fz_point *q, int bidi, int color, int synthetic, int flags, int dev_flags)
{
	fz_stext_char *ch = fz_pool_alloc(ctx, page->pool, sizeof *line->first_char);
	fz_point a, d;

	if (!line->first_char)
		line->first_char = line->last_char = ch;
	else
	{
		line->last_char->next = ch;
		line->last_char = ch;
	}

	ch->c = c;
	ch->argb = color;
	ch->bidi = bidi;
	ch->origin = *p;
	ch->size = size;
	ch->font = fz_keep_font(ctx, font);
	ch->flags = flags | (synthetic ? FZ_STEXT_SYNTHETIC : 0);
	if (font->flags.is_bold)
		ch->flags |= FZ_STEXT_BOLD;

	if (line->wmode == 0)
	{
		fz_rect bounds;
		int bounded = 0;
		a.x = 0;
		d.x = 0;
		if (glyph == NON_ACCURATE_GLYPH_ADDED_SPACE)
		{
			/* Added space, in accurate mode. */
			a.y = d.y = 0;
		}
		else if (glyph == NON_ACCURATE_GLYPH)
		{
			/* Non accurate mode. */
			a.y = fz_font_ascender(ctx, font);
			d.y = fz_font_descender(ctx, font);
		}
		else
		{
			/* Any glyph in accurate mode */
			bounds = fz_bound_glyph(ctx, font, glyph, fz_identity);
			bounded = 1;
			a.y = bounds.y1;
			d.y = bounds.y0;
		}
		if (dev_flags & FZ_STEXT_ACCURATE_SIDE_BEARINGS)
		{
			if (!bounded)
				bounds = fz_bound_glyph(ctx, font, glyph, fz_identity);
			if (a.x > bounds.x0)
				a.x = bounds.x0;
			if (d.y < bounds.x1)
				d.y = bounds.x1;
		}
	}
	else
	{
		a.x = 1;
		d.x = 0;
		a.y = 0;
		d.y = 0;
	}
	a = fz_transform_vector(a, trm);
	d = fz_transform_vector(d, trm);

	ch->quad.ll = fz_make_point(p->x + d.x, p->y + d.y);
	ch->quad.ul = fz_make_point(p->x + a.x, p->y + a.y);
	ch->quad.lr = fz_make_point(q->x + d.x, q->y + d.y);
	ch->quad.ur = fz_make_point(q->x + a.x, q->y + a.y);

	return ch;
}

static void
remove_last_char(fz_context *ctx, fz_stext_line *line)
{
	if (line && line->first_char)
	{
		fz_stext_char *prev = NULL;
		fz_stext_char *ch = line->first_char;
		while (ch->next)
		{
			prev = ch;
			ch = ch->next;
		}
		if (prev)
		{
			/* The characters are pool allocated, so we don't actually leak the removed node. */
			/* We do need to drop the char's font reference though. */
			fz_drop_font(ctx, prev->next->font);
			line->last_char = prev;
			line->last_char->next = NULL;
		}
	}
}

static fz_stext_char *reverse_bidi_span(fz_stext_char *curr, fz_stext_char *tail)
{
	fz_stext_char *prev, *next;
	prev = tail;
	while (curr != tail)
	{
		next = curr->next;
		curr->next = prev;
		prev = curr;
		curr = next;
	}
	return prev;
}

static void reverse_bidi_line(fz_stext_line *line)
{
	fz_stext_char *a, *b, **prev;
	prev = &line->first_char;
	for (a = line->first_char; a; a = a->next)
	{
		if (a->bidi)
		{
			b = a;
			while (b->next && b->next->bidi)
				b = b->next;
			if (a != b)
				*prev = reverse_bidi_span(a, b->next);
		}
		prev = &a->next;
		line->last_char = a;
	}
}

static int is_hyphen(int c)
{
	/* check for: hyphen-minus, soft hyphen, hyphen, and non-breaking hyphen */
	return (c == '-' || c == 0xAD || c == 0x2010 || c == 0x2011);
}

static float
vec_dot(const fz_point *a, const fz_point *b)
{
	return a->x * b->x + a->y * b->y;
}

static int may_add_space(int lastchar)
{
	/* Basic latin, greek, cyrillic, hebrew, arabic,
	 * general punctuation,
	 * superscripts and subscripts,
	 * and currency symbols.
	 */
	return (lastchar != ' ' && (lastchar < 0x700 || (lastchar >= 0x2000 && lastchar <= 0x20CF)));
}

#define FAKEBOLD_THRESHOLD_RECIP 10

static int
close(float a, float b, float size)
{
	a -= b;
	if (a < 0)
		a = -a;

	return FAKEBOLD_THRESHOLD_RECIP * a < size;
}

static int
font_equiv(fz_context *ctx, fz_font *f, fz_font *g)
{
	unsigned char fdigest[16];
	unsigned char gdigest[16];

	if (f == g)
		return 1;

	if (strcmp(f->name, g->name) != 0)
		return 0;

	fz_font_digest(ctx, f, fdigest);
	fz_font_digest(ctx, g, gdigest);

	return (memcmp(fdigest, gdigest, 16) == 0);
}

static int
check_for_fake_bold(fz_context *ctx, fz_stext_block *block, fz_font *font, int c, fz_point p, float size, int flags)
{
	fz_stext_line *line;
	fz_stext_char *ch;

	for (; block != NULL; block = block->next)
	{
		if (block->type == FZ_STEXT_BLOCK_STRUCT)
		{
			if (block->u.s.down != NULL && check_for_fake_bold(ctx, block->u.s.down->first_block, font, c, p, size, flags))
				return 1;
		}
		else if (block->type == FZ_STEXT_BLOCK_TEXT)
		{
			for (line = block->u.t.first_line; line != NULL; line = line->next)
			{
				fz_stext_char *pr = NULL;
				for (ch = line->first_char; ch != NULL; ch = ch->next)
				{
					/* Not perfect, but it'll do! */
					if (ch->c == c && close(ch->origin.x, p.x, size) && close(ch->origin.y, p.y, size) && font_equiv(ctx, ch->font, font))
					{
						/* If we were filled before, and we are stroking now... */
						if ((ch->flags & (FZ_STEXT_FILLED | FZ_STEXT_STROKED)) == FZ_STEXT_FILLED &&
							(flags & (FZ_STEXT_FILLED | FZ_STEXT_STROKED)) == FZ_STEXT_STROKED)
						{
							/* Update this to be filled + stroked, but don't specifically mark it as fake bold. */
							ch->flags |= flags;
							return 1;
						}
						/* Overlaying spaces is tricksy. How can that count as boldening when it doesn't mark? We only accept these
						 * as boldening if either the char before, or the char after were also boldened. */
						ch->flags |= flags;

						if (c == ' ')
						{
							if ((pr && (pr->flags & FZ_STEXT_BOLD) != 0) ||
								(ch->next && (ch->next->flags & FZ_STEXT_BOLD) != 0))
							{
								/* OK, we can be bold. */
								ch->flags |= FZ_STEXT_BOLD;
								return 1;
							}
							/* Ignore this and keep going */
						}
						else
						{
							ch->flags |= FZ_STEXT_BOLD;
							return 1;
						}
					}
					pr = ch;
				}
			}
		}
	}

	return 0;
}

static void
fz_add_stext_char_imp(fz_context *ctx, fz_stext_device *dev, fz_font *font, int c, int glyph, fz_matrix trm, float adv, int wmode, int bidi, int force_new_line, int flags)
{
	fz_stext_page *page = dev->page;
	fz_stext_block *cur_block;
	fz_stext_line *cur_line;

	int new_para = 0;
	int new_line = 1;
	int add_space = 0;
	fz_point dir, ndir, p, q;
	float size;
	fz_point delta;
	float spacing = 0;
	float base_offset = 0;
	float dist;

	/* Preserve RTL-ness only (and ignore level) so we can use bit 2 as "visual" tag for reordering pass. */
	bidi = bidi & 1;

	/* dir = direction vector for motion. ndir = normalised(dir) */
	if (wmode == 0)
	{
		dir.x = 1;
		dir.y = 0;
	}
	else
	{
		dir.x = 0;
		dir.y = -1;
	}
	dir = fz_transform_vector(dir, trm);
	ndir = fz_normalize_vector(dir);

	size = fz_matrix_expansion(trm);

	/* We need to identify where glyphs 'start' (p) and 'stop' (q).
	 * Each glyph holds its 'start' position, and the next glyph in the
	 * span (or span->max if there is no next glyph) holds its 'end'
	 * position.
	 *
	 * For both horizontal and vertical motion, trm->{e,f} gives the
	 * origin (usually the bottom left) of the glyph.
	 *
	 * In horizontal mode:
	 *   + p is bottom left.
	 *   + q is the bottom right
	 * In vertical mode:
	 *   + p is top left (where it advanced from)
	 *   + q is bottom left
	 */
	if (wmode == 0)
	{
		p.x = trm.e;
		p.y = trm.f;
		q.x = trm.e + adv * dir.x;
		q.y = trm.f + adv * dir.y;
	}
	else
	{
		p.x = trm.e - adv * dir.x;
		p.y = trm.f - adv * dir.y;
		q.x = trm.e;
		q.y = trm.f;
	}

	if ((dev->opts.flags & FZ_STEXT_COLLECT_STYLES) != 0)
	{
		if (glyph == -1)
		{
			if (dev->last_was_fake_bold)
				goto move_pen_and_exit;
		}
		else if (check_for_fake_bold(ctx, page->first_block, font, c, p, size, flags))
		{
			dev->last_was_fake_bold = 1;
			goto move_pen_and_exit;
		}
		dev->last_was_fake_bold = 0;
	}

	/* Find current position to enter new text. */
	cur_block = page->last_struct ? page->last_struct->last_block : page->last_block;
	if (cur_block && cur_block->type != FZ_STEXT_BLOCK_TEXT)
		cur_block = NULL;
	cur_line = cur_block ? cur_block->u.t.last_line : NULL;

	if (cur_line && glyph < 0)
	{
		/* Don't advance pen or break lines for no-glyph characters in a cluster */
		add_char_to_line(ctx, page, cur_line, trm, font, size, c, (dev->flags & FZ_STEXT_ACCURATE_BBOXES) ? glyph : NON_ACCURATE_GLYPH, &dev->pen, &dev->pen, bidi, dev->color, 0, flags, dev->flags);
		dev->lastbidi = bidi;
		dev->lastchar = c;
		return;
	}

	if (cur_line == NULL || cur_line->wmode != wmode || vec_dot(&ndir, &cur_line->dir) < 0.999f)
	{
		/* If the matrix has changed rotation, or the wmode is different (or if we don't have a line at all),
		 * then we can't append to the current block/line. */
		new_para = 1;
		new_line = 1;
	}
	else
	{
		/* Detect fake bold where text is printed twice in the same place. */
		/* Largely supplanted by the check_for_fake_bold mechanism above,
		 * but we leave this in for backward compatibility as it's cheap,
		 * and works even when FZ_STEXT_COLLECT_STYLES is not set. */
		dist = hypotf(q.x - dev->pen.x, q.y - dev->pen.y) / size;
		if (dist < FAKE_BOLD_MAX_DIST && c == dev->lastchar)
			return;

		/* Calculate how far we've moved since the last character. */
		delta.x = p.x - dev->pen.x;
		delta.y = p.y - dev->pen.y;

		/* The transform has not changed, so we know we're in the same
		 * direction. Calculate 2 distances; how far off the previous
		 * baseline we are, together with how far along the baseline
		 * we are from the expected position. */
		spacing = (ndir.x * delta.x + ndir.y * delta.y) / size;
		base_offset = (-ndir.y * delta.x + ndir.x * delta.y) / size;

		/* Only a small amount off the baseline - we'll take this */
		if (fabsf(base_offset) < BASE_MAX_DIST)
		{
			/* If mixed LTR and RTL content */
			if ((bidi & 1) != (dev->lastbidi & 1))
			{
				/* Ignore jumps within line when switching between LTR and RTL text. */
				new_line = 0;
			}

			/* RTL */
			else if (bidi & 1)
			{
				fz_point logical_delta = fz_make_point(p.x - dev->lag_pen.x, p.y - dev->lag_pen.y);
				float logical_spacing = (ndir.x * logical_delta.x + ndir.y * logical_delta.y) / size + adv;

				/* If the pen is where we would have been if we
				 * had advanced backwards from the previous
				 * character by this character's advance, we
				 * are probably seeing characters emitted in
				 * logical order.
				 */
				if (fabsf(logical_spacing) < SPACE_DIST)
				{
					new_line = 0;
				}

				/* However, if the pen has advanced to where we would expect it
				 * in an LTR context, we're seeing them emitted in visual order
				 * and should flag them for reordering!
				 */
				else if (fabsf(spacing) < SPACE_DIST)
				{
					bidi = 3; /* mark line as visual */
					new_line = 0;
				}

				/* And any other small jump could be a missing space. */
				else if (logical_spacing < 0 && logical_spacing > -SPACE_MAX_DIST)
				{
					if (wmode == 0 && may_add_space(dev->lastchar))
						add_space = 1;
					new_line = 0;
				}
				else if (spacing < 0 && spacing > -SPACE_MAX_DIST)
				{
					/* Motion is in line, but negative. We've probably got overlapping
					 * chars here. Live with it. */
					new_line = 0;
				}
				else if (spacing > 0 && spacing < SPACE_MAX_DIST)
				{
					bidi = 3; /* mark line as visual */
					if (wmode == 0 && may_add_space(dev->lastchar))
						add_space = 1;
					new_line = 0;
				}

				else
				{
					/* Motion is large and unexpected (probably a new table column). */
					new_line = 1;
				}
			}

			/* LTR or neutral character */
			else
			{
				if (fabsf(spacing) < SPACE_DIST)
				{
					/* Motion is in line and small enough to ignore. */
					new_line = 0;
				}
				else if (spacing < 0 && spacing > -SPACE_MAX_DIST)
				{
					/* Motion is in line, but negative. We've probably got overlapping
					 * chars here. Live with it. */
					new_line = 0;
				}
				else if (spacing > 0 && spacing < SPACE_MAX_DIST)
				{
					/* Motion is forward in line and large enough to warrant us adding a space. */
					if (wmode == 0 && may_add_space(dev->lastchar))
						add_space = 1;
					new_line = 0;
				}
				else
				{
					/* Motion is large and unexpected (probably a new table column). */
					new_line = 1;
				}
			}
		}

		/* Enough for a new line, but not enough for a new paragraph */
		else if (fabsf(base_offset) <= PARAGRAPH_DIST)
		{
			/* Check indent to spot text-indent style paragraphs */
			if (wmode == 0 && cur_line && dev->new_obj)
				if ((p.x - dev->start.x) > 0.5f)
					new_para = 1;
			new_line = 1;
		}

		/* Way off the baseline - open a new paragraph */
		else
		{
			new_para = 1;
			new_line = 1;
		}
	}

	/* Start a new block (but only at the beginning of a text object) */
	if (new_para || !cur_block)
	{
		cur_block = add_text_block_to_page(ctx, page);
		cur_line = cur_block->u.t.last_line;
	}

	if (new_line && (dev->flags & FZ_STEXT_DEHYPHENATE) && is_hyphen(dev->lastchar))
	{
		remove_last_char(ctx, cur_line);
		new_line = 0;
	}

	/* Start a new line */
	if (new_line || !cur_line || force_new_line)
	{
		cur_line = add_line_to_block(ctx, page, cur_block, &ndir, wmode, bidi);
		dev->start = p;
	}

	/* Add synthetic space */
	if (add_space && !(dev->flags & FZ_STEXT_INHIBIT_SPACES))
		add_char_to_line(ctx, page, cur_line, trm, font, size, ' ', (dev->flags & FZ_STEXT_ACCURATE_BBOXES) ? NON_ACCURATE_GLYPH_ADDED_SPACE : NON_ACCURATE_GLYPH, &dev->pen, &p, bidi, dev->color, 1, flags, dev->flags);

	add_char_to_line(ctx, page, cur_line, trm, font, size, c, (dev->flags & FZ_STEXT_ACCURATE_BBOXES) ? glyph : NON_ACCURATE_GLYPH, &p, &q, bidi, dev->color, 0, flags, dev->flags);

move_pen_and_exit:
	dev->lastchar = c;
	dev->lastbidi = bidi;
	dev->lag_pen = p;
	dev->pen = q;

	dev->new_obj = 0;
	dev->trm = trm;
}

static void
fz_add_stext_char(fz_context *ctx,
	fz_stext_device *dev,
	fz_font *font,
	int c,
	int glyph,
	fz_matrix trm,
	float adv,
	int wmode,
	int bidi,
	int force_new_line,
	int flags)
{
	/* ignore when one unicode character maps to multiple glyphs */
	if (c == -1)
		return;

	if (dev->flags & FZ_STEXT_ACCURATE_ASCENDERS)
		fz_calculate_font_ascender_descender(ctx, font);

	if (!(dev->flags & FZ_STEXT_PRESERVE_LIGATURES))
	{
		switch (c)
		{
		case 0xFB00: /* ff */
			fz_add_stext_char_imp(ctx, dev, font, 'f', glyph, trm, adv, wmode, bidi, force_new_line, flags);
			fz_add_stext_char_imp(ctx, dev, font, 'f', -1, trm, 0, wmode, bidi, 0, flags);
			return;
		case 0xFB01: /* fi */
			fz_add_stext_char_imp(ctx, dev, font, 'f', glyph, trm, adv, wmode, bidi, force_new_line, flags);
			fz_add_stext_char_imp(ctx, dev, font, 'i', -1, trm, 0, wmode, bidi, 0, flags);
			return;
		case 0xFB02: /* fl */
			fz_add_stext_char_imp(ctx, dev, font, 'f', glyph, trm, adv, wmode, bidi, force_new_line, flags);
			fz_add_stext_char_imp(ctx, dev, font, 'l', -1, trm, 0, wmode, bidi, 0, flags);
			return;
		case 0xFB03: /* ffi */
			fz_add_stext_char_imp(ctx, dev, font, 'f', glyph, trm, adv, wmode, bidi, force_new_line, flags);
			fz_add_stext_char_imp(ctx, dev, font, 'f', -1, trm, 0, wmode, bidi, 0, flags);
			fz_add_stext_char_imp(ctx, dev, font, 'i', -1, trm, 0, wmode, bidi, 0, flags);
			return;
		case 0xFB04: /* ffl */
			fz_add_stext_char_imp(ctx, dev, font, 'f', glyph, trm, adv, wmode, bidi, force_new_line, flags);
			fz_add_stext_char_imp(ctx, dev, font, 'f', -1, trm, 0, wmode, bidi, 0, flags);
			fz_add_stext_char_imp(ctx, dev, font, 'l', -1, trm, 0, wmode, bidi, 0, flags);
			return;
		case 0xFB05: /* long st */
		case 0xFB06: /* st */
			fz_add_stext_char_imp(ctx, dev, font, 's', glyph, trm, adv, wmode, bidi, force_new_line, flags);
			fz_add_stext_char_imp(ctx, dev, font, 't', -1, trm, 0, wmode, bidi, 0, flags);
			return;
		}
	}

	if (!(dev->flags & FZ_STEXT_PRESERVE_WHITESPACE))
	{
		switch (c)
		{
		case 0x0009: /* tab */
		case 0x0020: /* space */
		case 0x00A0: /* no-break space */
		case 0x1680: /* ogham space mark */
		case 0x180E: /* mongolian vowel separator */
		case 0x2000: /* en quad */
		case 0x2001: /* em quad */
		case 0x2002: /* en space */
		case 0x2003: /* em space */
		case 0x2004: /* three-per-em space */
		case 0x2005: /* four-per-em space */
		case 0x2006: /* six-per-em space */
		case 0x2007: /* figure space */
		case 0x2008: /* punctuation space */
		case 0x2009: /* thin space */
		case 0x200A: /* hair space */
		case 0x202F: /* narrow no-break space */
		case 0x205F: /* medium mathematical space */
		case 0x3000: /* ideographic space */
			c = ' ';
		}
	}

	fz_add_stext_char_imp(ctx, dev, font, c, glyph, trm, adv, wmode, bidi, force_new_line, flags);
}

static fz_rect
current_clip(fz_context *ctx, fz_stext_device *dev)
{
	fz_rect r = fz_infinite_rect;

	if (dev->flags & FZ_STEXT_CLIP)
	{
		r = fz_device_current_scissor(ctx, &dev->super);
		r = fz_intersect_rect(r, dev->page->mediabox);
	}
	if (dev->flags & FZ_STEXT_CLIP_RECT)
		r = fz_intersect_rect(r, dev->opts.clip);

	return r;
}

static void
do_extract(fz_context *ctx, fz_stext_device *dev, fz_text_span *span, fz_matrix ctm, int start, int end, int flags)
{
	fz_font *font = span->font;
	fz_matrix tm = span->trm;
	float adv;
	int unicode;
	int i;

	for (i = start; i < end; i++)
	{
		if (dev->flags & (FZ_STEXT_CLIP | FZ_STEXT_CLIP_RECT))
		{
			fz_rect r = current_clip(ctx, dev);
			if (fz_glyph_entirely_outside_box(ctx, &ctm, span, &span->items[i], &r))
			{
				dev->last.clipped = 1;
				continue;
			}
		}
		dev->last.clipped = 0;

		/* Calculate new pen location and delta */
		tm.e = span->items[i].x;
		tm.f = span->items[i].y;
		dev->last.trm = fz_concat(tm, ctm);
		dev->last.bidi_level = span->bidi_level;
		dev->last.wmode = span->wmode;
		if (font != dev->last.font)
		{
			fz_drop_font(ctx, dev->last.font);
			dev->last.font = fz_keep_font(ctx, font);
		}
		dev->last.valid = 1;
		dev->last.flags = flags;

		/* Calculate bounding box and new pen position based on font metrics */
		if (span->items[i].gid >= 0)
			adv = span->items[i].adv;
		else
			adv = 0;

		unicode = span->items[i].ucs;
		if (unicode == FZ_REPLACEMENT_CHARACTER)
		{
			if (dev->flags & FZ_STEXT_USE_CID_FOR_UNKNOWN_UNICODE)
			{
				unicode = span->items[i].cid;
				flags |= FZ_STEXT_UNICODE_IS_CID;
			}
			else if (dev->flags & FZ_STEXT_USE_GID_FOR_UNKNOWN_UNICODE)
			{
				unicode = span->items[i].gid;
				flags |= FZ_STEXT_UNICODE_IS_GID;
			}
		}

		/* Send the chars we have through. */
		fz_add_stext_char(ctx, dev, font,
			unicode,
			span->items[i].gid,
			dev->last.trm,
			adv,
			dev->last.wmode,
			dev->last.bidi_level,
			(i == 0) && (dev->flags & FZ_STEXT_PRESERVE_SPANS),
			flags);
	}
}

static int
rune_index(const char *utf8, size_t idx)
{
	int rune;

	do
	{
		int len = fz_chartorune(&rune, utf8);
		if (rune == 0)
			return -1;
		utf8 += len;
	}
	while (idx--);

	return rune;
}

static void
flush_actualtext(fz_context *ctx, fz_stext_device *dev, const char *actualtext, int i, int end)
{
	if (*actualtext == 0)
		return;

	if (dev->flags & (FZ_STEXT_CLIP | FZ_STEXT_CLIP_RECT))
		if (dev->last.clipped)
			return;

	while (end < 0 || (end >= 0 && i < end))
	{
		int rune;
		actualtext += fz_chartorune(&rune, actualtext);

		if (rune == 0)
			break;

		fz_add_stext_char(ctx, dev, dev->last.font,
			rune,
			-1,
			dev->last.trm,
			0,
			dev->last.wmode,
			dev->last.bidi_level,
			(i == 0) && (dev->flags & FZ_STEXT_PRESERVE_SPANS),
			dev->last.flags);
		i++;
	}
}

static void
do_extract_within_actualtext(fz_context *ctx, fz_stext_device *dev, fz_text_span *span, fz_matrix ctm, metatext_t *mt, int flags)
{
	/* We are within an actualtext block. This means we can't just add the chars
	 * as they are. We need to add the chars as they are meant to be. Sadly the
	 * actualtext mechanism doesn't help us at all with positioning. */
	fz_font *font = span->font;
	fz_matrix tm = span->trm;
	float adv;
	int start, i, end;
	char *actualtext = mt->text;
	size_t z = fz_utflen(actualtext);

	/* If actualtext is empty, nothing to do! */
	if (z == 0)
		return;

	/* Now, we HOPE that the creator of a PDF will minimise the actual text
	 * differences, so that we'll get:
	 *   "Politicians <Actualtext="lie">fib</ActualText>, always."
	 * rather than:
	 *   "<Actualtext="Politicians lie, always">Politicians fib, always.</ActualText>
	 * but experience with PDF files tells us that this won't always be the case.
	 *
	 * We try to minimise the actualtext section here, just in case.
	 */

	/* Spot a matching prefix and send it. */
	for (start = 0; start < span->len; start++)
	{
		int rune;
		int len = fz_chartorune(&rune, actualtext);
		if (span->items[start].ucs != rune || rune == 0)
			break;
		actualtext += len; z--;
	}
	if (start != 0)
		do_extract(ctx, dev, span, ctm, 0, start, flags);

	if (start == span->len)
	{
		/* The prefix has consumed all this object. Just shorten the actualtext and we'll
		 * catch the rest next time. */
		z = strlen(actualtext)+1;
		memmove(mt->text, actualtext, z);
		return;
	}

	/* We haven't consumed the whole string, so there must be runes left.
	 * Shut coverity up. */
	assert(z != 0);

	/* Spot a matching postfix. Can't send it til the end. */
	for (end = span->len; end > start; end--)
	{
		/* Nasty n^2 algo here, cos backtracking through utf8 is not trivial. It'll do. */
		int rune = rune_index(actualtext, z-1);
		if (span->items[end-1].ucs != rune)
			break;
		z--;
	}
	/* So we can send end -> span->len at the end. */

	/* So we have at least SOME chars that don't match. */
	/* Now, do the difficult bit in the middle.*/
	/* items[start..end] have to be sent with actualtext[start..z] */
	for (i = start; i < end; i++)
	{
		fz_text_item *item = &span->items[i];
		int rune = -1;

		if (dev->flags & (FZ_STEXT_CLIP | FZ_STEXT_CLIP_RECT))
		{
			fz_rect r = current_clip(ctx, dev);
			if (fz_glyph_entirely_outside_box(ctx, &ctm, span, &span->items[i], &r))
			{
				dev->last.clipped = 1;
				continue;
			}
		}
		dev->last.clipped = 0;

		if ((size_t)i < z)
			actualtext += fz_chartorune(&rune, actualtext);

		/* Calculate new pen location and delta */
		tm.e = item->x;
		tm.f = item->y;
		dev->last.trm = fz_concat(tm, ctm);
		dev->last.bidi_level = span->bidi_level;
		dev->last.wmode = span->wmode;
		if (font != dev->last.font)
		{
			fz_drop_font(ctx, dev->last.font);
			dev->last.font = fz_keep_font(ctx, font);
		}
		dev->last.valid = 1;
		dev->last.flags = flags;

		/* Calculate bounding box and new pen position based on font metrics */
		if (item->gid >= 0)
			adv = item->adv;
		else
			adv = 0;

		fz_add_stext_char(ctx, dev, font,
			rune,
			span->items[i].gid,
			dev->last.trm,
			adv,
			dev->last.wmode,
			dev->last.bidi_level,
			(i == 0) && (dev->flags & FZ_STEXT_PRESERVE_SPANS),
			flags);
	}

	/* If we haven't spotted a postfix by this point, then don't force ourselves to output
	 * any more of the actualtext at this point. We might get a new text object that matches
	 * more of it. */
	if (end == span->len)
	{
		/* Shorten actualtext and exit. */
		z = strlen(actualtext)+1;
		memmove(mt->text, actualtext, z);
		return;
	}

	/* We found a matching postfix. It seems likely that this is going to be the only
	 * text object we get, so send any remaining actualtext now. */
	flush_actualtext(ctx, dev, actualtext, i, i + strlen(actualtext) - (span->len - end));

	/* Send the postfix */
	if (end != span->len)
		do_extract(ctx, dev, span, ctm, end, span->len, flags);

	mt->text[0] = 0;
}

static void
fz_stext_extract(fz_context *ctx, fz_stext_device *dev, fz_text_span *span, fz_matrix ctm, int flags)
{
	fz_stext_device *tdev = (fz_stext_device*)dev;
	metatext_t *mt = NULL;

	if (span->len == 0)
		return;

	/* Are we in an actualtext? */
	if (!(tdev->opts.flags & FZ_STEXT_IGNORE_ACTUALTEXT))
		mt = find_actualtext(dev);

	if (mt)
		do_extract_within_actualtext(ctx, dev, span, ctm, mt, flags);
	else
		do_extract(ctx, dev, span, ctm, 0, span->len, flags);
}

static uint32_t hexrgba_from_color(fz_context *ctx, fz_colorspace *colorspace, const float *color, float alpha)
{
	float rgb[3];
	fz_convert_color(ctx, colorspace, color, fz_device_rgb(ctx), rgb, NULL, fz_default_color_params);
	return
		((uint32_t) (fz_clampi(alpha * 255 + 0.5f, 0, 255) << 24)) |
		((uint32_t) (fz_clampi(rgb[0] * 255 + 0.5f, 0, 255) << 16)) |
		((uint32_t) (fz_clampi(rgb[1] * 255 + 0.5f, 0, 255) << 8)) |
		((uint32_t) (fz_clampi(rgb[2] * 255 + 0.5f, 0, 255)));
}

static void
fz_stext_fill_text(fz_context *ctx, fz_device *dev, const fz_text *text, fz_matrix ctm,
	fz_colorspace *colorspace, const float *color, float alpha, fz_color_params color_params)
{
	fz_stext_device *tdev = (fz_stext_device*)dev;
	fz_text_span *span;
	if (text == tdev->lasttext && (tdev->opts.flags & FZ_STEXT_COLLECT_STYLES) == 0)
		return;
	tdev->color = hexrgba_from_color(ctx, colorspace, color, alpha);
	tdev->new_obj = 1;
	for (span = text->head; span; span = span->next)
		fz_stext_extract(ctx, tdev, span, ctm, FZ_STEXT_FILLED);
	fz_drop_text(ctx, tdev->lasttext);
	tdev->lasttext = fz_keep_text(ctx, text);
}

static void
fz_stext_stroke_text(fz_context *ctx, fz_device *dev, const fz_text *text, const fz_stroke_state *stroke, fz_matrix ctm,
	fz_colorspace *colorspace, const float *color, float alpha, fz_color_params color_params)
{
	fz_stext_device *tdev = (fz_stext_device*)dev;
	fz_text_span *span;
	if (text == tdev->lasttext && (tdev->opts.flags & FZ_STEXT_COLLECT_STYLES) == 0)
		return;
	tdev->color = hexrgba_from_color(ctx, colorspace, color, alpha);
	tdev->new_obj = 1;
	for (span = text->head; span; span = span->next)
		fz_stext_extract(ctx, tdev, span, ctm, FZ_STEXT_STROKED);
	fz_drop_text(ctx, tdev->lasttext);
	tdev->lasttext = fz_keep_text(ctx, text);
}

static void
fz_stext_clip_text(fz_context *ctx, fz_device *dev, const fz_text *text, fz_matrix ctm, fz_rect scissor)
{
	fz_stext_device *tdev = (fz_stext_device*)dev;
	fz_text_span *span;
	if (text == tdev->lasttext && (tdev->opts.flags & FZ_STEXT_COLLECT_STYLES) == 0)
		return;
	tdev->color = 0;
	tdev->new_obj = 1;
	for (span = text->head; span; span = span->next)
		fz_stext_extract(ctx, tdev, span, ctm, FZ_STEXT_FILLED | FZ_STEXT_CLIPPED);
	fz_drop_text(ctx, tdev->lasttext);
	tdev->lasttext = fz_keep_text(ctx, text);
}

static void
fz_stext_clip_stroke_text(fz_context *ctx, fz_device *dev, const fz_text *text, const fz_stroke_state *stroke, fz_matrix ctm, fz_rect scissor)
{
	fz_stext_device *tdev = (fz_stext_device*)dev;
	fz_text_span *span;
	if (text == tdev->lasttext && (tdev->opts.flags & FZ_STEXT_COLLECT_STYLES) == 0)
		return;
	tdev->color = 0;
	tdev->new_obj = 1;
	for (span = text->head; span; span = span->next)
		fz_stext_extract(ctx, tdev, span, ctm, FZ_STEXT_STROKED | FZ_STEXT_CLIPPED);
	fz_drop_text(ctx, tdev->lasttext);
	tdev->lasttext = fz_keep_text(ctx, text);
}

static void
fz_stext_ignore_text(fz_context *ctx, fz_device *dev, const fz_text *text, fz_matrix ctm)
{
	fz_stext_device *tdev = (fz_stext_device*)dev;
	fz_text_span *span;
	if (text == tdev->lasttext && (tdev->opts.flags & FZ_STEXT_COLLECT_STYLES) == 0)
		return;
	tdev->color = 0;
	tdev->new_obj = 1;
	for (span = text->head; span; span = span->next)
		fz_stext_extract(ctx, tdev, span, ctm, 0);
	fz_drop_text(ctx, tdev->lasttext);
	tdev->lasttext = fz_keep_text(ctx, text);
}

static void
fz_stext_begin_metatext(fz_context *ctx, fz_device *dev, fz_metatext meta, const char *text)
{
	fz_stext_device *tdev = (fz_stext_device*)dev;
	metatext_t *mt = find_actualtext(tdev);

	if (mt != NULL && meta == FZ_METATEXT_ACTUALTEXT)
		flush_actualtext(ctx, tdev, mt->text, 0, -1);

	if (meta == FZ_METATEXT_ACTUALTEXT)
		tdev->last.valid = 0;

	mt = fz_malloc_struct(ctx, metatext_t);

	mt->prev = tdev->metatext;
	tdev->metatext = mt;
	mt->type = meta;
	mt->text = text ? fz_strdup(ctx, text) : NULL;
	mt->bounds = fz_empty_rect;
}

static void
pop_metatext(fz_context *ctx, fz_stext_device *dev)
{
	metatext_t *prev;
	fz_rect bounds;

	if (!dev->metatext)
		return;

	prev = dev->metatext->prev;
	bounds = dev->metatext->bounds;
	fz_free(ctx, dev->metatext->text);
	fz_free(ctx, dev->metatext);
	dev->metatext = prev;
	if (prev)
		prev->bounds = fz_union_rect(prev->bounds, bounds);
}

static void
fz_stext_end_metatext(fz_context *ctx, fz_device *dev)
{
	fz_stext_device *tdev = (fz_stext_device*)dev;
	fz_font *myfont = NULL;

	if (!tdev->metatext)
		return; /* Mismatched pop. Live with it. */

	if (tdev->metatext->type != FZ_METATEXT_ACTUALTEXT)
	{
		/* We only deal with ActualText here. Just pop anything else off,
		 * and we're done. */
		pop_metatext(ctx, tdev);
		return;
	}

	/* If we have a 'last' text position, send the content after that. */
	if (tdev->last.valid)
	{
		flush_actualtext(ctx, tdev, tdev->metatext->text, 0, -1);
		pop_metatext(ctx, tdev);
		tdev->last.valid = 0;
		return;
	}

	/* If we have collected a rectangle for content that encloses the actual text,
	 * send the content there. */
	if (!fz_is_empty_rect(tdev->metatext->bounds))
	{
		tdev->last.trm.a = tdev->metatext->bounds.x1 - tdev->metatext->bounds.x0;
		tdev->last.trm.b = 0;
		tdev->last.trm.c = 0;
		tdev->last.trm.d = tdev->metatext->bounds.y1 - tdev->metatext->bounds.y0;
		tdev->last.trm.e = tdev->metatext->bounds.x0;
		tdev->last.trm.f = tdev->metatext->bounds.y0;
	}
	else
	{
		if ((dev->flags & (FZ_STEXT_CLIP | FZ_STEXT_CLIP_RECT)) == 0)
			fz_warn(ctx, "Actualtext with no position. Text may be lost or mispositioned.");
		pop_metatext(ctx, tdev);
		return;
	}

	fz_var(myfont);

	fz_try(ctx)
	{
		if (tdev->last.font == NULL)
		{
			myfont = fz_new_base14_font(ctx, "Helvetica");
			tdev->last.font = myfont;
		}
		flush_actualtext(ctx, tdev, tdev->metatext->text, 0, -1);
		pop_metatext(ctx, tdev);
	}
	fz_always(ctx)
	{
		if (myfont)
		{
			tdev->last.font = NULL;
			fz_drop_font(ctx, myfont);
		}
	}
	fz_catch(ctx)
		fz_rethrow(ctx);
}


/* Images and shadings */

static void
fz_stext_fill_image(fz_context *ctx, fz_device *dev, fz_image *img, fz_matrix ctm, float alpha, fz_color_params color_params)
{
	fz_stext_device *tdev = (fz_stext_device*)dev;
	fz_rect *bounds = actualtext_bounds(tdev);

	/* If there is an actualtext in force, update its bounds. */
	if (bounds)
	{
		static const fz_rect unit = { 0, 0, 1, 1 };
		*bounds = fz_union_rect(*bounds, fz_transform_rect(unit, ctm));
	}

	/* Unless we are being told to preserve images, nothing to do here. */
	if ((tdev->opts.flags & FZ_STEXT_PRESERVE_IMAGES) == 0)
		return;

	/* If the alpha is less than 50% then it's probably a watermark or effect or something. Skip it. */
	if (alpha >= 0.5f)
		add_image_block_to_page(ctx, tdev->page, ctm, img);

}

static void
fz_stext_fill_image_mask(fz_context *ctx, fz_device *dev, fz_image *img, fz_matrix ctm,
		fz_colorspace *cspace, const float *color, float alpha, fz_color_params color_params)
{
	fz_stext_fill_image(ctx, dev, img, ctm, alpha, color_params);
}

static fz_image *
fz_new_image_from_shade(fz_context *ctx, fz_shade *shade, fz_matrix *in_out_ctm, fz_color_params color_params, fz_rect scissor)
{
	fz_matrix ctm = *in_out_ctm;
	fz_pixmap *pix;
	fz_image *img = NULL;
	fz_rect bounds;
	fz_irect bbox;

	bounds = fz_bound_shade(ctx, shade, ctm);
	bounds = fz_intersect_rect(bounds, scissor);
	bbox = fz_irect_from_rect(bounds);

	pix = fz_new_pixmap_with_bbox(ctx, fz_device_rgb(ctx), bbox, NULL, !shade->use_background);
	fz_try(ctx)
	{
		if (shade->use_background)
			fz_fill_pixmap_with_color(ctx, pix, shade->colorspace, shade->background, color_params);
		else
			fz_clear_pixmap(ctx, pix);
		fz_paint_shade(ctx, shade, NULL, ctm, pix, color_params, bbox, NULL, NULL);
		img = fz_new_image_from_pixmap(ctx, pix, NULL);
	}
	fz_always(ctx)
		fz_drop_pixmap(ctx, pix);
	fz_catch(ctx)
		fz_rethrow(ctx);

	in_out_ctm->a = pix->w;
	in_out_ctm->b = 0;
	in_out_ctm->c = 0;
	in_out_ctm->d = pix->h;
	in_out_ctm->e = pix->x;
	in_out_ctm->f = pix->y;
	return img;
}

static void
fz_stext_fill_shade(fz_context *ctx, fz_device *dev, fz_shade *shade, fz_matrix ctm, float alpha, fz_color_params color_params)
{
	fz_stext_device *tdev = (fz_stext_device*)dev;
	fz_rect *bounds = actualtext_bounds(tdev);
	fz_matrix local_ctm;
	fz_rect scissor;
	fz_image *image;

	/* If we aren't keeping images, but we are in a bound, update the bounds
	 * without generating the entire image. */
	if ((tdev->opts.flags & FZ_STEXT_PRESERVE_IMAGES) == 0 && bounds)
	{
		*bounds = fz_union_rect(*bounds, fz_bound_shade(ctx, shade, ctm));
		return;
	}

	/* Unless we are preserving image, nothing to do here. */
	if ((tdev->opts.flags & FZ_STEXT_PRESERVE_IMAGES) == 0)
		return;

	local_ctm = ctm;
	scissor = fz_device_current_scissor(ctx, dev);
	if (dev->flags & FZ_STEXT_CLIP_RECT)
		scissor = fz_intersect_rect(scissor, tdev->opts.clip);
	scissor = fz_intersect_rect(scissor, tdev->page->mediabox);
	image = fz_new_image_from_shade(ctx, shade, &local_ctm, color_params, scissor);
	fz_try(ctx)
		fz_stext_fill_image(ctx, dev, image, local_ctm, alpha, color_params);
	fz_always(ctx)
		fz_drop_image(ctx, image);
	fz_catch(ctx)
		fz_rethrow(ctx);
}

static void
fixup_bboxes_and_bidi(fz_context *ctx, fz_stext_block *block)
{
	fz_stext_line *line;
	fz_stext_char *ch;

	for ( ; block != NULL; block = block->next)
	{
		if (block->type == FZ_STEXT_BLOCK_STRUCT)
			if (block->u.s.down)
				fixup_bboxes_and_bidi(ctx, block->u.s.down->first_block);
		if (block->type != FZ_STEXT_BLOCK_TEXT)
			continue;
		for (line = block->u.t.first_line; line; line = line->next)
		{
			int reorder = 0;
			for (ch = line->first_char; ch; ch = ch->next)
			{
				fz_rect ch_box = fz_rect_from_quad(ch->quad);
				if (ch == line->first_char)
					line->bbox = ch_box;
				else
					line->bbox = fz_union_rect(line->bbox, ch_box);
				if (ch->bidi == 3)
					reorder = 1;
			}
			block->bbox = fz_union_rect(block->bbox, line->bbox);
			if (reorder)
				reverse_bidi_line(line);
		}
	}
}

static void
advance_to_x(fz_point *a, fz_point b, float x)
{
	a->y += (b.y - a->y) * (x - a->x) / (b.x - a->x);
	a->x = x;
}

static void
advance_to_y(fz_point *a, fz_point b, float y)
{
	a->x += (b.x - a->x) * (y - a->y) / (b.y - a->y);
	a->y = y;
}

static int
line_crosses_rect(fz_point a, fz_point b, fz_rect r)
{
	/* Cope with trivial exclusions */
	if (a.x < r.x0 && b.x < r.x0)
		return 0;
	if (a.x > r.x1 && b.x > r.x1)
		return 0;
	if (a.y < r.y0 && b.y < r.y0)
		return 0;
	if (a.y > r.y1 && b.y > r.y1)
		return 0;

	if (a.x < r.x0)
		advance_to_x(&a, b, r.x0);
	if (a.x > r.x1)
		advance_to_x(&a, b, r.x1);
	if (a.y < r.y0)
		advance_to_y(&a, b, r.y0);
	if (a.y > r.y1)
		advance_to_y(&a, b, r.y1);

	return fz_is_point_inside_rect(a, r);
}

static float
calculate_ascent(fz_point p, fz_point origin, fz_point dir)
{
	return fabsf((origin.x-p.x)*dir.y - (origin.y-p.y)*dir.x);
}

/* Create us a rect from the given quad, but extend it downwards
 * to allow for underlines that pass under the glyphs. */
static fz_rect expanded_rect_from_quad(fz_quad quad, fz_point dir, fz_point origin, float size)
{
	/* Consider the two rects from A and g respectively.
	 *
	 * ul +------+ ur   or
	 *    |  /\  |         ul +------+ ur
	 *    | /__\ |            | /''\ |
	 *    |/    \|            |(    ||
	 * ll +------+ lr         | ''''||
	 *                        |  ''' | <-expected underline level
	 *                     ll +------+ lr
	 *
	 * So an underline won't cross A's rect, but will cross g's.
	 * We want to make a rect that includes a suitable amount of
	 * space underneath. The information we have available to us
	 * is summed up here:
	 *
	 *  ul +---------+ ur
	 *     |         |
	 *     | origin  |
	 *     |+----------> dir
	 *     |         |
	 *  ll +---------+ lr
	 *
	 * Consider the distance from ul to the line that passes through
	 * the origin with direction dir. Similarly, consider the distance
	 * from ur to the same line. This can be thought of as the 'ascent'
	 * of this character.
	 *
	 * We'd like the distance from ul to ll to be greater than this, so
	 * as to ensure we cover the possible location where an underline
	 * might reasonably go.
	 *
	 * If we have a line (l) through point A with direction vector u,
	 * the distance between point P and line(l) is:
	 *
	 * d(P,l) = || AP x u || / || u ||
	 *
	 * where x is the cross product.
	 *
	 * For us, because || dir || = 1:
	 *
	 * d(ul, origin) = || (origin-ul) x dir ||
	 *
	 * The cross product is only defined in 3 (or 7!) dimensions, so
	 * extend both vectors into 3d by defining a 0 z component.
	 *
	 * (origin-ul) x dir = [ (origin.y - ul.y) . 0     - 0                 . dir.y ]
	 *                     [ 0                 . dir.x - (origin.x - ul.y) . 0     ]
	 *                     [ (origin.x - ul.x) . dir.y - (origin.y - ul.y) . dir.x ]
	 *
	 * So d(ul, origin) = abs(D) where D = (origin.x-ul.x).dir.y - (origin.y-ul.y).dir.x
	 */
	float ascent = (calculate_ascent(quad.ul, origin, dir) + calculate_ascent(quad.ur, origin, dir)) / 2;
	fz_point left = { quad.ll.x - quad.ul.x, quad.ll.y - quad.ul.y };
	fz_point right = { quad.lr.x - quad.ur.x, quad.lr.y - quad.ur.y };
	float height = (hypotf(left.x, left.y) + hypotf(right.x, right.y))/2;
	int neg = 0;
	float extra_rise = 0;

	/* Spaces will have 0 ascent. underscores will have small ascent.
	 * We want a sane ascent to be able to spot strikeouts, but not
	 * so big that it incorporates lines above the text, like borders. */
	if (ascent < 0.75*size)
		extra_rise = 0.75*size - ascent;

	/* We'd like height to be at least ascent + 1/4 size */
	if (height < 0)
		neg = 1, height = -height;
	if (height < ascent + size * 0.25f)
		height = ascent + size * 0.25f;

	height -= ascent;
	if (neg)
		height = -height;
	quad.ll.x += - height * dir.y;
	quad.ll.y +=   height * dir.x;
	quad.lr.x += - height * dir.y;
	quad.lr.y +=   height * dir.x;
	quad.ul.x -= - extra_rise * dir.y;
	quad.ul.y -=   extra_rise * dir.x;
	quad.ur.x -= - extra_rise * dir.y;
	quad.ur.y -=   extra_rise * dir.x;

	return fz_rect_from_quad(quad);
}

static int feq(float a,float b)
{
#define EPSILON 0.00001
	a -= b;
	if (a < 0)
		a = -a;
	return a < EPSILON;
}

static void
check_strikeout(fz_context *ctx, fz_stext_block *block, fz_point from, fz_point to, fz_point dir, float thickness)
{
	for ( ; block; block = block->next)
	{
		fz_stext_line *line;

		if (block->type != FZ_STEXT_BLOCK_TEXT)
			continue;

		for (line = block->u.t.first_line; line != NULL; line = line->next)
		{
			fz_stext_char *ch;

			if ((!feq(line->dir.x, dir.x) || !feq(line->dir.y, dir.y)) &&
				(!feq(line->dir.x, -dir.x) || !feq(line->dir.y, -dir.y)))
				continue;

			/* Matching directions... */

			/* Unfortunately, we don't have a valid line->bbox at this point, so we need to check
			 * chars. - FIXME: Now we do! */
			for (ch = line->first_char; ch; ch = ch->next)
			{
				fz_point up;
				float dx, dy, dot;
				fz_rect ch_box;

				/* If the thickness is more than a 1/4 of the size, it's a highlight, not a
				 * line! */
				if (ch->size < thickness*4)
					continue;

				ch_box = expanded_rect_from_quad(ch->quad, line->dir, ch->origin, ch->size);

				if (!line_crosses_rect(from, to, ch_box))
					continue;

				/* Is this a strikeout or an underline? */

				/* The baseline moves from ch->origin in the direction line->dir */
				up.x = line->dir.y;
				up.y = -line->dir.x;

				/* How far is our line displaced from the line through the origin? */
				dx = from.x - ch->origin.x;
				dy = from.y - ch->origin.y;
				/* Dot product with up. up is normalised */
				dot = dx * up.x + dy * up.y;

				if (dot > 0)
					ch->flags |= FZ_STEXT_STRIKEOUT;
				else
					ch->flags |= FZ_STEXT_UNDERLINE;
			}
		}
	}
}

static void
check_rects_for_strikeout(fz_context *ctx, fz_stext_device *tdev, fz_stext_page *page)
{
	int i, n = tdev->rect_len;

	for (i = 0; i < n; i++)
	{
		fz_point from = tdev->rects[i].from;
		fz_point to = tdev->rects[i].to;
		float thickness = tdev->rects[i].thickness;
		fz_point dir;
		dir.x = to.x - from.x;
		dir.y = to.y - from.y;
		dir = fz_normalize_vector(dir);

		check_strikeout(ctx, page->first_block, from, to, dir, thickness);
	}
}

static void
fz_stext_close_device(fz_context *ctx, fz_device *dev)
{
	fz_stext_device *tdev = (fz_stext_device*)dev;
	fz_stext_page *page = tdev->page;

	fixup_bboxes_and_bidi(ctx, page->first_block);

	if (tdev->opts.flags & FZ_STEXT_COLLECT_STYLES)
		check_rects_for_strikeout(ctx, tdev, page);

	/* TODO: smart sorting of blocks and lines in reading order */
	/* TODO: unicode NFC normalization */

	if (tdev->opts.flags & FZ_STEXT_SEGMENT)
		fz_segment_stext_page(ctx, page);

	if (tdev->opts.flags & FZ_STEXT_PARAGRAPH_BREAK)
		fz_paragraph_break(ctx, page);

	if (tdev->opts.flags & FZ_STEXT_TABLE_HUNT)
		fz_table_hunt(ctx, page);
}

static void
fz_stext_drop_device(fz_context *ctx, fz_device *dev)
{
	fz_stext_device *tdev = (fz_stext_device*)dev;
	fz_drop_text(ctx, tdev->lasttext);
	fz_drop_font(ctx, tdev->last.font);
	while (tdev->metatext)
		pop_metatext(ctx, tdev);

	fz_free(ctx, tdev->rects);
}

static int
val_is_rect(const char *val, fz_rect *rp)
{
	fz_rect r;
	const char *s;

	s = strchr(val, ':');
	if (s == NULL || s == val)
		return 0;
	r.x0 = fz_atof(val);
	val = s+1;
	s = strchr(val, ':');
	if (s == NULL || s == val)
		return 0;
	r.y0 = fz_atof(val);
	val = s+1;
	s = strchr(val, ':');
	if (s == NULL || s == val)
		return 0;
	r.x1 = fz_atof(val);
	val = s+1;
	r.y1 = fz_atof(val);

	*rp = r;

	return 1;
}

fz_stext_options *
fz_parse_stext_options(fz_context *ctx, fz_stext_options *opts, const char *string)
{
	const char *val;

	memset(opts, 0, sizeof *opts);

	if (fz_has_option(ctx, string, "preserve-ligatures", &val) && fz_option_eq(val, "yes"))
		opts->flags |= FZ_STEXT_PRESERVE_LIGATURES;
	if (fz_has_option(ctx, string, "preserve-whitespace", &val) && fz_option_eq(val, "yes"))
		opts->flags |= FZ_STEXT_PRESERVE_WHITESPACE;
	if (fz_has_option(ctx, string, "preserve-images", &val) && fz_option_eq(val, "yes"))
		opts->flags |= FZ_STEXT_PRESERVE_IMAGES;
	if (fz_has_option(ctx, string, "inhibit-spaces", &val) && fz_option_eq(val, "yes"))
		opts->flags |= FZ_STEXT_INHIBIT_SPACES;
	if (fz_has_option(ctx, string, "dehyphenate", &val) && fz_option_eq(val, "yes"))
		opts->flags |= FZ_STEXT_DEHYPHENATE;
	if (fz_has_option(ctx, string, "preserve-spans", &val) && fz_option_eq(val, "yes"))
		opts->flags |= FZ_STEXT_PRESERVE_SPANS;
	if (fz_has_option(ctx, string, "structured", &val) && fz_option_eq(val, "yes"))
		opts->flags |= FZ_STEXT_COLLECT_STRUCTURE;
	if (fz_has_option(ctx, string, "use-cid-for-unknown-unicode", &val) && fz_option_eq(val, "yes"))
		opts->flags |= FZ_STEXT_USE_CID_FOR_UNKNOWN_UNICODE;
	if (fz_has_option(ctx, string, "use-gid-for-unknown-unicode", &val) && fz_option_eq(val, "yes"))
		opts->flags |= FZ_STEXT_USE_GID_FOR_UNKNOWN_UNICODE;
	if (fz_has_option(ctx, string, "accurate-bboxes", &val) && fz_option_eq(val, "yes"))
		opts->flags |= FZ_STEXT_ACCURATE_BBOXES;
	if (fz_has_option(ctx, string, "vectors", &val) && fz_option_eq(val, "yes"))
		opts->flags |= FZ_STEXT_COLLECT_VECTORS;
	if (fz_has_option(ctx, string, "ignore-actualtext", & val) && fz_option_eq(val, "yes"))
		opts->flags |= FZ_STEXT_IGNORE_ACTUALTEXT;
	if (fz_has_option(ctx, string, "segment", &val) && fz_option_eq(val, "yes"))
		opts->flags |= FZ_STEXT_SEGMENT;
	if (fz_has_option(ctx, string, "paragraph-break", &val) && fz_option_eq(val, "yes"))
		opts->flags |= FZ_STEXT_PARAGRAPH_BREAK;
	if (fz_has_option(ctx, string, "table-hunt", &val) && fz_option_eq(val, "yes"))
		opts->flags |= FZ_STEXT_TABLE_HUNT;
	if (fz_has_option(ctx, string, "collect-styles", &val) && fz_option_eq(val, "yes"))
		opts->flags |= FZ_STEXT_COLLECT_STYLES;
	if (fz_has_option(ctx, string, "accurate-ascenders", &val) && fz_option_eq(val, "yes"))
		opts->flags |= FZ_STEXT_ACCURATE_ASCENDERS;
	if (fz_has_option(ctx, string, "accurate-side-bearings", &val) && fz_option_eq(val, "yes"))
		opts->flags |= FZ_STEXT_ACCURATE_SIDE_BEARINGS;

	opts->flags |= FZ_STEXT_CLIP;
	if (fz_has_option(ctx, string, "mediabox-clip", &val))
	{
		fz_warn(ctx, "The 'mediabox-clip' option has been deprecated. Use 'clip' instead.");
		if (fz_option_eq(val, "no"))
			opts->flags ^= FZ_STEXT_CLIP;
	}
	if (fz_has_option(ctx, string, "clip", &val) && fz_option_eq(val, "no"))
		opts->flags ^= FZ_STEXT_CLIP;
	if (fz_has_option(ctx, string, "clip-rect", &val) && val_is_rect(val, &opts->clip))
		opts->flags |= FZ_STEXT_CLIP_RECT;

	opts->scale = 1;
	if (fz_has_option(ctx, string, "resolution", &val))
		opts->scale = fz_atof(val) / 96.0f; /* HTML base resolution is 96ppi */

	return opts;
}

typedef struct
{
	int fail;
	int count;
	fz_point corners[4];
} is_rect_data;

static void
stash_point(is_rect_data *rd, float x, float y)
{
	if (rd->count > 3)
	{
		rd->fail = 1;
		return;
	}

	rd->corners[rd->count].x = x;
	rd->corners[rd->count].y = y;
	rd->count++;
}

static void
is_rect_moveto(fz_context *ctx, void *arg, float x, float y)
{
	is_rect_data *rd = arg;
	if (rd->fail)
		return;

	if (rd->count != 0)
	{
		rd->fail = 1;
		return;
	}
	stash_point(rd, x, y);
}

static void
is_rect_lineto(fz_context *ctx, void *arg, float x, float y)
{
	is_rect_data *rd = arg;
	if (rd->fail)
		return;

	if (rd->count == 4 && rd->corners[0].x == x && rd->corners[1].y == y)
		return;

	stash_point(rd, x, y);
}

static void
is_rect_curveto(fz_context *ctx, void *arg, float x1, float y1, float x2, float y2, float x3, float y3)
{
	is_rect_data *rd = arg;
	rd->fail = 1;
}

static void
is_rect_closepath(fz_context *ctx, void *arg)
{
	is_rect_data *rd = arg;
	if (rd->fail)
		return;
	if (rd->count == 3)
		stash_point(rd, rd->corners[0].x, rd->corners[0].y);
	if (rd->count != 4)
		rd->fail = 1;
}

static int
is_path_rect(fz_context *ctx, const fz_path *path, fz_point *from, fz_point *to, float *thickness, fz_matrix ctm)
{
	float d01, d01x, d01y, d03, d03x, d03y, d32x, d32y;
	is_rect_data rd = { 0 };
	static const fz_path_walker walker =
	{
		is_rect_moveto, is_rect_lineto, is_rect_curveto, is_rect_closepath
	};
	int i;

	fz_walk_path(ctx, path, &walker, &rd);

	if (rd.fail)
		return 0;

	if (rd.count == 2)
	{
		stash_point(&rd, rd.corners[1].x, rd.corners[1].y);
		stash_point(&rd, rd.corners[0].x, rd.corners[0].y);
	}

	for (i = 0 ; i < 4; i++)
	{
		fz_point p = fz_transform_point(rd.corners[i], ctm);

		rd.corners[i].x = p.x;
		rd.corners[i].y = p.y;
	}

	/* So we have a 4 cornered path. Hopefully something like:
	 * 0---------1
	 * |         |
	 * 3---------2
	 * but it might be:
	 * 0---------3
	 * |         |
	 * 1---------2
	*/
	while (1)
	{
		d01x = rd.corners[1].x - rd.corners[0].x;
		d01y = rd.corners[1].y - rd.corners[0].y;
		d01 = d01x * d01x + d01y * d01y;
		d03x = rd.corners[3].x - rd.corners[0].x;
		d03y = rd.corners[3].y - rd.corners[0].y;
		d03 = d03x * d03x + d03y * d03y;
		if(d01 < d03)
		{
			/* We are the latter case. Transpose it. */
			fz_point p = rd.corners[1];
			rd.corners[1] = rd.corners[3];
			rd.corners[3] = p;
		}
		else
			break;
	}
	d32x = rd.corners[2].x - rd.corners[3].x;
	d32y = rd.corners[2].y - rd.corners[3].y;

	/* So d32x and d01x need to be the same for this to be a strikeout. */
	if (!feq(d32x, d01x) || !feq(d32y, d01y))
		return 0;

	/* We are plausibly a rectangle. */
	*thickness = sqrtf(d03x * d03x + d03y * d03y);

	from->x = (rd.corners[0].x + rd.corners[3].x)/2;
	from->y = (rd.corners[0].y + rd.corners[3].y)/2;
	to->x = (rd.corners[1].x + rd.corners[2].x)/2;
	to->y = (rd.corners[1].y + rd.corners[2].y)/2;

	return 1;
}

static void
check_for_strikeout(fz_context *ctx, fz_stext_device *tdev, fz_stext_page *page, const fz_path *path, fz_matrix ctm)
{
	float thickness;
	fz_point from, to;

	/* Is this path a thin rectangle (possibly rotated)? If so, then we need to
	 * consider it as being a strikeout or underline. */
	if (!is_path_rect(ctx, path, &from, &to, &thickness, ctm))
		return;

	/* Add to the list of rects in the device. */
	if (tdev->rect_len == tdev->rect_max)
	{
		int newmax = tdev->rect_max * 2;
		if (newmax == 0)
			newmax = 32;

		tdev->rects = fz_realloc(ctx, tdev->rects, sizeof(*tdev->rects) * newmax);
		tdev->rect_max = newmax;
	}
	tdev->rects[tdev->rect_len].from = from;
	tdev->rects[tdev->rect_len].to = to;
	tdev->rects[tdev->rect_len].thickness = thickness;
	tdev->rect_len++;
}

static void
add_vector(fz_context *ctx, fz_stext_page *page, fz_rect bbox, uint32_t flags, uint32_t argb)
{
	fz_stext_block *b = add_block_to_page(ctx, page);

	b->type = FZ_STEXT_BLOCK_VECTOR;
	b->bbox = bbox;
	b->u.v.flags = flags;
	b->u.v.argb = argb;
}

typedef struct
{
	fz_matrix ctm;
	uint32_t argb;
	uint32_t flags;
	fz_stext_page *page;
	fz_rect leftovers;
	fz_rect pending;
	int count;
	fz_point p[5];
} split_path_data;

static void
maybe_rect(fz_context *ctx, split_path_data *sp)
{
	int rect = 0;
	int i;

	if (sp->count >= 0)
	{
		if (sp->count == 3)
		{
			/* Allow for "moveto A, lineto B, lineto A, close" */
			if (feq(sp->p[0].x, sp->p[2].x) || feq(sp->p[0].y, sp->p[2].y))
				sp->count = 2;
		}
		if (sp->count == 2)
		{
			if (feq(sp->p[0].x, sp->p[1].x) || feq(sp->p[0].y, sp->p[1].y))
				rect = 1; /* Count that as a rect */
		}
		else if (sp->count == 4 || sp->count == 5)
		{
			if (feq(sp->p[0].x, sp->p[1].x) && feq(sp->p[2].x, sp->p[3].x) && feq(sp->p[0].y, sp->p[3].y) && feq(sp->p[1].y, sp->p[2].y))
				rect = 1;
			else if (feq(sp->p[0].x, sp->p[3].x) && feq(sp->p[1].x, sp->p[2].x) && feq(sp->p[0].y, sp->p[1].y) && feq(sp->p[2].y, sp->p[3].y))
				rect = 1;
		}
		if (rect)
		{
			fz_rect bounds;

			bounds.x0 = bounds.x1 = sp->p[0].x;
			bounds.y0 = bounds.y1 = sp->p[0].y;
			for (i = 1; i < sp->count; i++)
				bounds = fz_include_point_in_rect(bounds, sp->p[i]);
			if (fz_is_valid_rect(sp->pending))
				add_vector(ctx, sp->page, sp->pending, sp->flags | FZ_STEXT_VECTOR_IS_RECTANGLE | FZ_STEXT_VECTOR_CONTINUES, sp->argb);
			sp->pending = bounds;
			return;
		}

		for (i = 0; i < sp->count; i++)
			sp->leftovers = fz_include_point_in_rect(sp->leftovers, sp->p[i]);
	}
}

static void
split_move(fz_context *ctx, void *arg, float x, float y)
{
	split_path_data *sp = (split_path_data *)arg;
	fz_point p = fz_transform_point_xy(x, y, sp->ctm);

	maybe_rect(ctx, sp);
	sp->p[0] = p;
	sp->count = 1;
}

static void
split_line(fz_context *ctx, void *arg, float x, float y)
{
	split_path_data *sp = (split_path_data *)arg;
	fz_point p = fz_transform_point_xy(x, y, sp->ctm);
	int i;

	if (sp->count >= 0)
	{
		/* Check for lines to the same point. */
		if (feq(sp->p[sp->count-1].x, p.x) && feq(sp->p[sp->count-1].y, p.y))
			return;
		/* If we're still maybe a rect, just record the point. */
		if (sp->count < 4)
		{
			sp->p[sp->count++] = p;
			return;
		}
		/* Check for close line? */
		if (sp->count == 4)
		{
			if (feq(sp->p[0].x, p.x) && feq(sp->p[0].y, p.y))
			{
				/* We've just drawn a line back to the start point. */
				/* Needless saving of point, but it makes the logic
				 * easier elsewhere. */
				sp->p[sp->count++] = p;
				return;
			}
		}
		/* We can no longer be a rect. Output the points we had saved. */
		for (i = 0; i < sp->count; i++)
			sp->leftovers = fz_include_point_in_rect(sp->leftovers, sp->p[i]);
		/* Remember we're not a rect. */
		sp->count = -1;
	}
	/* Roll this point into the non-rect bounds. */
	sp->leftovers = fz_include_point_in_rect(sp->leftovers, p);
}

static void
split_curve(fz_context *ctx, void *arg, float x1, float y1, float x2, float y2, float x3, float y3)
{
	split_path_data *sp = (split_path_data *)arg;
	fz_point p1 = fz_transform_point_xy(x1, y1, sp->ctm);
	fz_point p2 = fz_transform_point_xy(x2, y2, sp->ctm);
	fz_point p3 = fz_transform_point_xy(x3, y3, sp->ctm);
	int i;

	if (sp->count >= 0)
	{
		/* We can no longer be a rect. Output the points we had saved. */
		for (i = 0; i < sp->count; i++)
			sp->leftovers = fz_include_point_in_rect(sp->leftovers, sp->p[i]);
		/* Remember we're not a rect. */
		sp->count = -1;
	}
	/* Roll these points into the non-rect bounds. */
	sp->leftovers = fz_include_point_in_rect(sp->leftovers, p1);
	sp->leftovers = fz_include_point_in_rect(sp->leftovers, p2);
	sp->leftovers = fz_include_point_in_rect(sp->leftovers, p3);
}

static void
split_close(fz_context *ctx, void *arg)
{
	split_path_data *sp = (split_path_data *)arg;

	maybe_rect(ctx, sp);
	sp->count = 0;
}


static const
fz_path_walker split_path_rects =
{
	split_move,
	split_line,
	split_curve,
	split_close
};

static void
add_vectors_from_path(fz_context *ctx, fz_stext_page *page, const fz_path *path, fz_matrix ctm, fz_colorspace *cs, const float *color, float alpha, fz_color_params cp, int stroke)
{
	int have_leftovers;
	split_path_data sp;

	sp.ctm = ctm;
	sp.argb = hexrgba_from_color(ctx, cs, color, alpha);
	sp.flags = stroke ? FZ_STEXT_VECTOR_IS_STROKED : 0;
	sp.page = page;
	sp.count = 0;
	sp.leftovers = fz_empty_rect;
	sp.pending = fz_empty_rect;
	fz_walk_path(ctx, path, &split_path_rects, &sp);

	have_leftovers = fz_is_valid_rect(sp.leftovers);

	maybe_rect(ctx, &sp);

	if (fz_is_valid_rect(sp.pending))
		add_vector(ctx, page, sp.pending, sp.flags | FZ_STEXT_VECTOR_IS_RECTANGLE | (have_leftovers ? FZ_STEXT_VECTOR_CONTINUES : 0), sp.argb);
	if (have_leftovers)
		add_vector(ctx, page, sp.leftovers, sp.flags, sp.argb);
}

static void
fz_stext_fill_path(fz_context *ctx, fz_device *dev, const fz_path *path, int even_odd, fz_matrix ctm, fz_colorspace *cs, const float *color, float alpha, fz_color_params cp)
{
	fz_stext_device *tdev = (fz_stext_device*)dev;
	fz_stext_page *page = tdev->page;
	fz_rect path_bounds = fz_bound_path(ctx, path, NULL, ctm);
	fz_rect *bounds = actualtext_bounds(tdev);

	/* If we're in an actualtext, then update the bounds to include this content. */
	if (bounds != NULL)
		*bounds = fz_union_rect(*bounds, path_bounds);

	if (tdev->flags & FZ_STEXT_COLLECT_STYLES)
		check_for_strikeout(ctx, tdev, page, path, ctm);

	if (tdev->flags & FZ_STEXT_COLLECT_VECTORS)
		add_vectors_from_path(ctx, page, path, ctm, cs, color, alpha, cp, 0);
}

static void
fz_stext_stroke_path(fz_context *ctx, fz_device *dev, const fz_path *path, const fz_stroke_state *ss, fz_matrix ctm, fz_colorspace *cs, const float *color, float alpha, fz_color_params cp)
{
	fz_stext_device *tdev = (fz_stext_device*)dev;
	fz_stext_page *page = tdev->page;
	fz_rect path_bounds = fz_bound_path(ctx, path, ss, ctm);
	fz_rect *bounds = actualtext_bounds((fz_stext_device *)dev);

	/* If we're in an actualtext, then update the bounds to include this content. */
	if (bounds != NULL)
		*bounds = fz_union_rect(*bounds, path_bounds);

	if (tdev->flags & FZ_STEXT_COLLECT_STYLES)
		check_for_strikeout(ctx, tdev, page, path, ctm);

	if (tdev->flags & FZ_STEXT_COLLECT_VECTORS)
		add_vectors_from_path(ctx, page, path, ctm, cs, color, alpha, cp, 1);
}

static void
new_stext_struct(fz_context *ctx, fz_stext_page *page, fz_stext_block *block, fz_structure standard, const char *raw)
{
	fz_stext_struct *str;
	size_t z;

	if (raw == NULL)
		raw = "";
	z = strlen(raw);

	str = fz_pool_alloc(ctx, page->pool, sizeof(*str) + z);
	str->first_block = NULL;
	str->last_block = NULL;
	str->standard = standard;
	str->parent = page->last_struct;
	str->up = block;
	memcpy(str->raw, raw, z+1);

	block->u.s.down = str;
}

static void
fz_stext_begin_structure(fz_context *ctx, fz_device *dev, fz_structure standard, const char *raw, int idx)
{
	fz_stext_device *tdev = (fz_stext_device*)dev;
	fz_stext_page *page = tdev->page;
	fz_stext_block *block, *le, *gt, *newblock;

	if (raw == NULL)
		raw = "";

	/* Find a pointer to the last block. */
	if (page->last_block)
	{
		block = page->last_block;
	}
	else if (page->last_struct)
	{
		block = page->last_struct->last_block;
	}
	else
	{
		block = page->first_block;
	}

	/* So block is somewhere in the content chain. Let's try and find:
	 *   le = the struct node <= idx before block in the content chain.
	 *   ge = the struct node >= idx after block in the content chain.
	 * Search backwards to start with.
	 */
	gt = NULL;
	le = block;
	while (le)
	{
		if (le->type == FZ_STEXT_BLOCK_STRUCT)
		{
			if (le->u.s.index > idx)
				gt = le;
			if (le->u.s.index <= idx)
				break;
		}
		le = le->prev;
	}
	/* The following loop copes with finding gt (the smallest block with an index higher
	 * than we want) if we haven't found it already. The while loop in here was designed
	 * to cope with 'block' being in the middle of a list. In fact, the way the code is
	 * currently, block will always be at the end of a list, so the while won't do anything.
	 * But I'm loathe to remove it in case we ever change this code to start from wherever
	 * we did the last insertion. */
	if (gt == NULL)
	{
		gt = block;
		while (gt)
		{
			if (gt->type == FZ_STEXT_BLOCK_STRUCT)
			{
				if (gt->u.s.index <= idx)
					le = gt;
				if (gt->u.s.index >= idx)
					break;
			}
			block = gt;
			gt = gt->next;
		}
	}

	if (le && le->u.s.index == idx)
	{
		/* We want to move down into the le block. Does it have a struct
		 * attached yet? */
		if (le->u.s.down == NULL)
		{
			/* No. We need to create a new struct node. */
			new_stext_struct(ctx, page, le, standard, raw);
		}
		else if (le->u.s.down->standard != standard || strcmp(raw, le->u.s.down->raw) != 0)
		{
			/* Yes, but it doesn't match the one we expect! */
			fz_warn(ctx, "Mismatched structure type!");
		}
		page->last_struct = le->u.s.down;
		page->last_block = le->u.s.down->last_block;

		return;
	}

	/* We are going to need to create a new block. Create a complete unlinked one here. */
	newblock = fz_pool_alloc(ctx, page->pool, sizeof *page->first_block);
	newblock->bbox = fz_empty_rect;
	newblock->prev = NULL;
	newblock->next = NULL;
	newblock->type = FZ_STEXT_BLOCK_STRUCT;
	newblock->u.s.index = idx;
	newblock->u.s.down = NULL;
	/* If this throws, we leak newblock but it's within the pool, so it doesn't matter. */
	new_stext_struct(ctx, page, newblock, standard, raw);

	/* So now we just need to link it in somewhere. */
	if (gt)
	{
		/* Link it in before gt. */
		newblock->prev = gt->prev;
		if (gt->prev)
			gt->prev->next = newblock;
		gt->prev = newblock;
		newblock->next = gt;
	}
	else if (block)
	{
		/* Link it in at the end of the list (i.e. after 'block') */
		newblock->prev = block;
		block->next = newblock;
	}
	else if (page->last_struct)
	{
		/* We have no blocks at all at this level. */
		page->last_struct->first_block = newblock;
		page->last_struct->last_block = newblock;
	}
	else
	{
		/* We have no blocks at ANY level. */
		page->first_block = newblock;
	}
	/* Wherever we linked it in, that's where we want to continue adding content. */
	page->last_struct = newblock->u.s.down;
	page->last_block = NULL;
}

static void
fz_stext_end_structure(fz_context *ctx, fz_device *dev)
{
	fz_stext_device *tdev = (fz_stext_device*)dev;
	fz_stext_page *page = tdev->page;
	fz_stext_struct *str = page->last_struct;

	if (str == NULL)
	{
		fz_warn(ctx, "Structure out of sync");
		return;
	}

	page->last_struct = str->parent;
	if (page->last_struct == NULL)
	{
		page->last_block = page->first_block;
		/* Yuck */
		while (page->last_block->next)
			page->last_block = page->last_block->next;
	}
	else
	{
		page->last_block = page->last_struct->last_block;
	}
}

fz_device *
fz_new_stext_device(fz_context *ctx, fz_stext_page *page, const fz_stext_options *opts)
{
	fz_stext_device *dev = fz_new_derived_device(ctx, fz_stext_device);

	dev->super.close_device = fz_stext_close_device;
	dev->super.drop_device = fz_stext_drop_device;

	dev->super.fill_text = fz_stext_fill_text;
	dev->super.stroke_text = fz_stext_stroke_text;
	dev->super.clip_text = fz_stext_clip_text;
	dev->super.clip_stroke_text = fz_stext_clip_stroke_text;
	dev->super.ignore_text = fz_stext_ignore_text;
	dev->super.begin_metatext = fz_stext_begin_metatext;
	dev->super.end_metatext = fz_stext_end_metatext;

	dev->super.fill_shade = fz_stext_fill_shade;
	dev->super.fill_image = fz_stext_fill_image;
	dev->super.fill_image_mask = fz_stext_fill_image_mask;

	if (opts)
	{
		dev->flags = opts->flags;
		if (opts->flags & FZ_STEXT_COLLECT_STRUCTURE)
		{
			dev->super.begin_structure = fz_stext_begin_structure;
			dev->super.end_structure = fz_stext_end_structure;
		}
		if (opts->flags & (FZ_STEXT_COLLECT_VECTORS | FZ_STEXT_COLLECT_STYLES))
		{
			dev->super.fill_path = fz_stext_fill_path;
			dev->super.stroke_path = fz_stext_stroke_path;
		}
	}
	dev->page = page;
	dev->pen.x = 0;
	dev->pen.y = 0;
	dev->trm = fz_identity;
	dev->lastchar = ' ';
	dev->lasttext = NULL;
	dev->lastbidi = 0;
	dev->last_was_fake_bold = 1;
	if (opts)
		dev->opts = *opts;

	if ((dev->flags & FZ_STEXT_PRESERVE_IMAGES) == 0)
		dev->super.hints |= FZ_DONT_DECODE_IMAGES;

	dev->rect_max = 0;
	dev->rect_len = 0;
	dev->rects = NULL;

	return (fz_device*)dev;
}