diff mupdf-source/source/fitz/warp.c @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mupdf-source/source/fitz/warp.c	Mon Sep 15 11:43:07 2025 +0200
@@ -0,0 +1,2313 @@
+// Copyright (C) 2004-2025 Artifex Software, Inc.
+//
+// This file is part of MuPDF.
+//
+// MuPDF is free software: you can redistribute it and/or modify it under the
+// terms of the GNU Affero General Public License as published by the Free
+// Software Foundation, either version 3 of the License, or (at your option)
+// any later version.
+//
+// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
+// details.
+//
+// You should have received a copy of the GNU Affero General Public License
+// along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
+//
+// Alternative licensing terms are available from the licensor.
+// For commercial licensing, see <https://www.artifex.com/> or contact
+// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
+// CA 94129, USA, for further information.
+
+#include "mupdf/fitz.h"
+
+#include "pixmap-imp.h"
+
+#include <string.h>
+
+/* Define TIMINGS to get timing information dumped to stdout. */
+#undef TIMINGS
+
+/* Define WARP_DEBUG to get debugging output (and PNGs saved). Note
+ * that this will affect timings! */
+#undef WARP_DEBUG
+
+/* Define WARP_SPEW_DEBUG to get even more debug output (and PNGs). */
+#undef WARP_SPEW_DEBUG
+
+/* One reference suggested doing histogram equalisation. */
+#define DO_HISTEQ
+
+/* Define DETECT_DOCUMENT_RGB, and edge detection on RGB documents will
+ * look for edges in just the R,G,B planes as well as the grey plane. */
+#undef DETECT_DOCUMENT_RGB
+
+#undef SLOW_INTERPOLATION
+#undef SLOW_WARPING
+
+#ifdef WARP_DEBUG
+static void
+debug_printf(fz_context *ctx, const char *fmt, ...)
+{
+	char text[1024];
+	va_list list;
+	va_start(list, fmt);
+	vsnprintf(text, sizeof(text), fmt, list);
+	va_end(list);
+
+#ifdef _WIN32
+	fz_write_string(ctx, fz_stdods(ctx), text);
+#endif
+	fputs(text, stderr);
+}
+#else
+#define debug_printf(CTX, FMT, ...) do {} while (0)
+#endif
+
+#if defined(TIMINGS) && defined(_WIN32)
+
+#include "windows.h"
+
+#define MAX_TIMERS 256
+static struct {
+	int started;
+	int stackptr;
+	int stack[MAX_TIMERS];
+	const char *name[MAX_TIMERS];
+	LARGE_INTEGER time[MAX_TIMERS];
+} timer;
+#define START_TIME() \
+	do {\
+		int i = timer.stack[timer.stackptr++] = timer.started++;\
+		QueryPerformanceCounter(&timer.time[i]);\
+	} while (0)
+#define END_TIME(NAME) \
+	do {\
+		LARGE_INTEGER end;\
+		int i;\
+		QueryPerformanceCounter(&end);\
+		i = timer.stack[--timer.stackptr];\
+		timer.time[i].QuadPart = end.QuadPart - timer.time[i].QuadPart;\
+		timer.name[i] = NAME;\
+	} while (0)
+#define DUMP_TIMES() \
+	do {\
+		int i;\
+		LARGE_INTEGER freq;\
+		QueryPerformanceFrequency(&freq);\
+		float f = freq.QuadPart;\
+		for (i = 0; i < timer.started; i++)\
+			debug_printf(ctx, "%s: %g\n", timer.name[i], (int)1000*timer.time[i].QuadPart/f);\
+	} while (0)
+#else
+#define START_TIME() do {} while(0)
+#define END_TIME(NAME) do {} while(0)
+#define DUMP_TIMES() do {} while(0)
+#endif
+
+typedef struct
+{
+	int x;
+	int y;
+} fz_ipoint;
+
+typedef struct
+{
+	int i;
+	int f;
+	int di;
+	int df;
+} fz_bresenham_core;
+
+typedef struct
+{
+	fz_bresenham_core c;
+	int n;
+} fz_bresenham;
+
+typedef struct
+{
+	fz_bresenham_core x;
+	fz_bresenham_core y;
+	int n;
+} fz_ipoint_bresenham;
+
+typedef struct
+{
+	fz_bresenham_core sx;
+	fz_bresenham_core sy;
+	fz_bresenham_core ex;
+	fz_bresenham_core ey;
+	int n;
+} fz_ipoint2_bresenham;
+
+static inline fz_bresenham_core
+init_bresenham_core(int start, int end, int n)
+{
+	fz_bresenham_core b;
+	int delta = end-start;
+
+	b.di = n == 0 ? 0 : delta/n;
+	b.df = delta - n*b.di; /* 0 <= b.df < n */
+	if (b.df < 0)
+		b.di--, b.df += n;
+	/* Starts with bi.i = start, bi.f = n, and then does a half
+	 * step. */
+	b.i = start + (b.di>>1);
+	b.f = n - (((b.di & 1) * n + b.df)>>1);
+
+	return b;
+}
+
+#ifdef CURRENTLY_UNUSED
+static inline fz_bresenham
+init_bresenham(int start, int end, int n)
+{
+	fz_bresenham b;
+
+	b.c = init_bresenham_core(start, end, n);
+	b.n = n;
+
+	return b;
+}
+
+static inline void
+step(fz_bresenham *b)
+{
+	step_core(&b->c, b->n);
+}
+#endif
+
+static inline void
+step_core(fz_bresenham_core *b, int n)
+{
+	b->i += b->di;
+	b->f -= b->df;
+	if (b->f <= 0)
+	{
+		b->f += n;
+		b->i++;
+	}
+}
+
+static inline fz_ipoint_bresenham
+init_ip_bresenham(fz_ipoint start, fz_ipoint end, int n)
+{
+	fz_ipoint_bresenham b;
+
+	b.x = init_bresenham_core(start.x, end.x, n);
+	b.y = init_bresenham_core(start.y, end.y, n);
+	b.n = n;
+
+	return b;
+}
+
+static inline void
+step_ip(fz_ipoint_bresenham *b)
+{
+	step_core(&b->x, b->n);
+	step_core(&b->y, b->n);
+}
+
+static inline fz_ipoint
+current_ip(const fz_ipoint_bresenham *b)
+{
+	fz_ipoint ip;
+
+	ip.x = b->x.i;
+	ip.y = b->y.i;
+
+	return ip;
+}
+
+static inline fz_ipoint2_bresenham
+init_ip2_bresenham(fz_ipoint ss, fz_ipoint se, fz_ipoint es, fz_ipoint ee, int n)
+{
+	fz_ipoint2_bresenham b;
+
+	b.sx = init_bresenham_core(ss.x, se.x, n);
+	b.sy = init_bresenham_core(ss.y, se.y, n);
+	b.ex = init_bresenham_core(es.x, ee.x, n);
+	b.ey = init_bresenham_core(es.y, ee.y, n);
+	b.n = n;
+
+	return b;
+}
+
+static inline void
+step_ip2(fz_ipoint2_bresenham *b)
+{
+	step_core(&b->sx, b->n);
+	step_core(&b->sy, b->n);
+	step_core(&b->ex, b->n);
+	step_core(&b->ey, b->n);
+}
+
+static inline fz_ipoint
+start_ip(const fz_ipoint2_bresenham *b)
+{
+	fz_ipoint ip;
+
+	ip.x = b->sx.i;
+	ip.y = b->sy.i;
+
+	return ip;
+}
+
+static fz_forceinline fz_ipoint
+end_ip(const fz_ipoint2_bresenham *b)
+{
+	fz_ipoint ip;
+
+	ip.x = b->ex.i;
+	ip.y = b->ey.i;
+
+	return ip;
+}
+
+static void
+interp_n(unsigned char *d, const unsigned char *s0,
+	const unsigned char *s1, int f, int n)
+{
+	do
+	{
+		int a = *s0++;
+		int b = *s1++ - a;
+		*d++ = ((a<<8) + b*f + 128)>>8;
+	}
+	while (--n);
+}
+
+static void
+interp2_n(unsigned char *d, const unsigned char *s0,
+	const unsigned char *s1, const unsigned char *s2,
+	int f0, int f1, int n)
+{
+	do
+	{
+		int a = *s0++;
+		int b = *s1++ - a;
+		int c;
+		a = (a<<8) + b*f0;
+		c = (*s2++<<8) - a;
+		*d++ = ((a<<8) + c*f1 + (1<<15))>>16;
+	}
+	while (--n);
+}
+
+static inline void
+copy_pixel(unsigned char *d, const fz_pixmap *src, fz_ipoint p)
+{
+	int u = p.x>>8;
+	int v = p.y>>8;
+	int fu = p.x & 255;
+	int fv = p.y & 255;
+	int n = src->n;
+	const unsigned char *s;
+	ptrdiff_t stride = src->stride;
+
+	if (u < 0)
+		u = 0, fu = 0;
+	else if (u >= src->w-1)
+		u = src->w-1, fu = 0;
+
+	if (v < 0)
+		v = 0, fv = 0;
+	else if (v >= src->h-1)
+		v = src->h-1, fv = 0;
+
+	s = &src->samples[u * n + v * stride];
+
+#ifdef SLOW_INTERPOLATION
+	{
+		int i;
+
+		for (i = 0; i < n; i++)
+		{
+			int v0 = s[0];
+			int v1 = s[n];
+			int v2 = s[stride];
+			int v3 = s[stride+n];
+			int v01, v23, v;
+			v01 = (v0<<8) + (v1-v0)*fu;
+			v23 = (v2<<8) + (v3-v2)*fu;
+			v   = (v01<<8) + (v23-v01)*fv;
+			assert(v >= 0 && v < (1<<24)-32768);
+			*d++ = (v + 32768)>>16;
+			s++;
+		}
+		return;
+	}
+#else
+	if (fu == 0)
+	{
+		if (fv == 0)
+		{
+			/* Copy single pixel */
+			memcpy(d, s, n);
+			return;
+		}
+		/* interpolate y pixels */
+		interp_n(d, s, s + stride, fv, n);
+		return;
+	}
+	if (fv == 0)
+	{
+		/* interpolate x pixels */
+		interp_n(d, s, s+n, fu, n);
+		return;
+	}
+
+	if (fu <= fv)
+	{
+		/* Top half of the trapezoid. */
+		interp2_n(d, s, s+stride, s+stride+n, fv, fu, n);
+	}
+	else
+	{
+		/* Bottom half of the trapezoid. */
+		interp2_n(d, s, s+n, s+stride+n, fu, fv, n);
+	}
+#endif
+}
+
+static void
+warp_core(unsigned char *d, int n, int width, int height, int stride,
+	const fz_ipoint corner[4], const fz_pixmap *src)
+{
+	fz_ipoint2_bresenham row_bres;
+	int x;
+
+	/* We have a bresenham pair for how to move the start
+	 * and end of the row each y step. */
+	row_bres = init_ip2_bresenham(corner[0], corner[3],
+					corner[1], corner[2], height);
+	stride -= width * n;
+
+#ifdef SLOW_WARPING
+	{
+		int h;
+		for (h = 0 ; h < height ; h++)
+		{
+			int sx = corner[0].x + (corner[3].x - corner[0].x)*h/height;
+			int sy = corner[0].y + (corner[3].y - corner[0].y)*h/height;
+			int ex = corner[1].x + (corner[2].x - corner[1].x)*h/height;
+			int ey = corner[1].y + (corner[2].y - corner[1].y)*h/height;
+			for (x = 0; x < width; x++)
+			{
+				fz_ipoint p;
+				p.x = sx + (ex-sx)*x/width;
+				p.y = sy + (ey-sy)*x/width;
+				copy_pixel(d, src, p);
+				d += n;
+			}
+			d += stride;
+		}
+	}
+#else
+	for (; height > 0; height--)
+	{
+		/* We have a bresenham for how to move the
+		 * current pixel across the row. */
+		fz_ipoint_bresenham pix_bres;
+		pix_bres = init_ip_bresenham(start_ip(&row_bres),
+					end_ip(&row_bres),
+					width);
+		for (x = width; x > 0; x--)
+		{
+			/* Copy pixel */
+			copy_pixel(d, src, current_ip(&pix_bres));
+			d += n;
+			step_ip(&pix_bres);
+		}
+
+		/* step to the next line. */
+		step_ip2(&row_bres);
+		d += stride;
+	}
+#endif
+}
+
+/*
+	points are clockwise from NW.
+
+	This performs simple affine warping.
+*/
+fz_pixmap *
+fz_warp_pixmap(fz_context *ctx, fz_pixmap *src, fz_quad points, int width, int height)
+{
+	fz_pixmap *dst;
+
+	if (src == NULL)
+		return NULL;
+
+	if (width >= (1<<24) || width < 0 || height >= (1<<24) || height < 0)
+		fz_throw(ctx, FZ_ERROR_LIMIT, "Bad width/height");
+
+	dst = fz_new_pixmap(ctx, src->colorspace, width, height,
+			src->seps, src->alpha);
+	dst->xres = src->xres;
+	dst->yres = src->yres;
+
+	fz_try(ctx)
+	{
+		unsigned char *d = dst->samples;
+		int n = dst->n;
+		fz_ipoint corner[4];
+
+		/* Find the corner texture positions as fixed point */
+		corner[0].x = (int)(points.ul.x * 256 + 128);
+		corner[0].y = (int)(points.ul.y * 256 + 128);
+		corner[1].x = (int)(points.ur.x * 256 + 128);
+		corner[1].y = (int)(points.ur.y * 256 + 128);
+		corner[2].x = (int)(points.ll.x * 256 + 128);
+		corner[2].y = (int)(points.ll.y * 256 + 128);
+		corner[3].x = (int)(points.lr.x * 256 + 128);
+		corner[3].y = (int)(points.lr.y * 256 + 128);
+
+		warp_core(d, n, width, height, width * n, corner, src);
+	}
+	fz_catch(ctx)
+	{
+		fz_drop_pixmap(ctx, dst);
+		fz_rethrow(ctx);
+	}
+
+	return dst;
+}
+
+static float
+dist(fz_point a, fz_point b)
+{
+	float x = a.x-b.x;
+	float y = a.y-b.y;
+
+	return sqrtf(x*x+y*y);
+}
+
+/* Again, affine warping, but this time where the destination width/height
+ * are chosen automatically. */
+fz_pixmap *
+fz_autowarp_pixmap(fz_context *ctx, fz_pixmap *src, fz_quad points)
+{
+	float w0 = dist(points.ur, points.ul);
+	float w1 = dist(points.ll, points.lr);
+	float h0 = dist(points.lr, points.ul);
+	float h1 = dist(points.ll, points.ur);
+	int w = (w0+w1+0.5)/2;
+	int h = (h0+h1+0.5)/2;
+
+	return fz_warp_pixmap(ctx, src, points, w, h);
+}
+
+/*
+	Corner detection: We shall steal the algorithm from the Dropbox
+	Document Scanner, as described here:
+
+	https://dropbox.tech/machine-learning/fast-and-accurate-document-detection-for-scanning
+
+	A good reference to the steps involved in the canny edge
+	detection process can be found here:
+
+	https://towardsdatascience.com/canny-edge-detection-step-by-step-in-python-computer-vision-b49c3a2d8123
+
+	This involves:
+	* Canny Edge Detection
+	* * Greyscale conversion
+	* * Noise reduction
+	* * Gradient Calculation
+	* * Non-maximum suppression
+	* * Double threshold
+	* * Edge tracking by Hysteresis
+	* Hough Transform to fix possible edges
+	* Computing intersections and scoring quads
+
+	We modify the gradient calculation with a simple scale to ensure we fill the range.
+*/
+
+#ifdef DO_HISTEQ
+static void
+histeq(fz_pixmap *im)
+{
+	uint32_t count[256];
+	unsigned char tbl[256];
+	int i;
+	unsigned char *s = im->samples;
+	int n = im->w*im->h;
+	int sigma;
+	int den;
+
+	memset(count, 0, sizeof(count));
+	for (i = n; i > 0; i--)
+		count[*s++]++;
+
+	/* Rather than doing a pure histogram equalisation, we bend
+	 * the table so that 0 always stays as 0, and 255 always stays
+	 * as 255. */
+	sigma = (count[0]>>1) - count[0];
+	den = sigma + n - (count[255]>>1);
+	for (i = 0; i < 256; i++)
+	{
+		int v = count[i];
+		sigma += v - (v>>1);
+		tbl[i] = (int)(255.0f * sigma / den + 0.5f);
+		sigma += (v>>1);
+	}
+
+	s = im->samples;
+	for (i = n; i > 0; i--)
+		*s = tbl[*s], s++;
+}
+#endif
+
+/* The first functions apply a 5x5 gauss filter to blur the greyscale
+ * image and remove noise. The gauss filter is a convolution with
+ * weights:
+ *
+ * 2  4  5  4  2
+ * 4  9 12  9  4
+ * 5 12 15 12  5
+ * 4  9 12  9  4
+ * 2  4  5  4  2
+ *
+ * As you can see, there are 3 distinct lines of weights within that
+ * matrix. We walk each row of source pixels once, calculating each of
+ * those convolutions, storing the result in a rolling buffer of 5x3xw
+ * entries. Then we sum columns from the buffer to give us the results.
+ */
+
+/* Read across a row of pixels of width w, from s, performing
+ * the horizontal portion of the convolutions, storing the results
+ * in 3 lines of a buffer, starting at d, &d[w], &d[2*w].
+ *
+ * d[0*w] uses weights (5 12 15 12  5)
+ * d[1*w] uses weights (4  9 12  9  4)
+ * d[2*w] uses weights (2  4 5   4  2)
+ */
+static void
+gauss5row(uint16_t *d, const unsigned char *s, int w)
+{
+	int i;
+	int s0 = s[0];
+	int s1 = s[1];
+	int s2 = s[2];
+	int s3 = s[3];
+
+	s += 4;
+
+	d[2*w] = 11*s0 +  4*s1 +  2*s2;
+	d[1*w] = 25*s0 +  9*s1 +  4*s2;
+	*d++   = 32*s0 + 12*s1 +  5*s2;
+
+	d[2*w] =  6*s0 +  5*s1 +  4*s2 +  2*s3;
+	d[1*w] = 13*s0 + 12*s1 +  9*s2 +  4*s3;
+	*d++   = 17*s0 + 15*s1 + 12*s2 +  5*s3;
+
+	for (i = w - 4; i > 0; i--)
+	{
+		int d2 = 2*s0 +  4*s1 +  5*s2 +  4*s3;
+		int d1 = 4*s0 +  9*s1 + 12*s2 +  9*s3;
+		int d0 = 5*s0 + 12*s1 + 15*s2 + 12*s3;
+		s0 = s1;
+		s1 = s2;
+		s2 = s3;
+		s3 = *s++;
+		d[2*w] = d2 + 2*s3;
+		d[1*w] = d1 + 4*s3;
+		*d++   = d0 + 5*s3;
+	}
+
+	d[2*w] =  2*s0 +  4*s1 +  5*s2 +  6*s3;
+	d[1*w] =  4*s0 +  9*s1 + 12*s2 + 13*s3;
+	*d++   =  5*s0 + 12*s1 + 15*s2 + 17*s3;
+
+	d[2*w] =  2*s1 +  4*s2 + 11*s3;
+	d[1*w] =  4*s1 +  9*s2 + 25*s3;
+	*d     =  5*s1 + 12*s2 + 32*s3;
+}
+
+/* Calculate the results for row y of the image, of width w, by
+ * summing results from the temporary buffer s, and writing into the
+ * original pixmap at d. */
+static void
+gauss5col(unsigned char *d, const uint16_t *s, int y, int w)
+{
+	const uint16_t *s0, *s1, *s2, *s3, *s4;
+	y *= 3;
+	s0 = &s[((y+ 9+2)%15)*w];
+	s1 = &s[((y+12+1)%15)*w];
+	s2 = &s[( y      %15)*w];
+	s3 = &s[((y+ 3+1)%15)*w];
+	s4 = &s[((y+ 6+2)%15)*w];
+
+	for (; w > 0; w--)
+		*d++ = (*s0++ + *s1++ + *s2++ + *s3++ + *s4++ + 79)/159;
+}
+
+static void
+gauss5x5(fz_context *ctx, fz_pixmap *src)
+{
+	int w = src->w;
+	int h = src->h;
+	uint16_t *buf;
+	unsigned char *s = src->samples;
+	int y;
+
+	if (w < 5 || h < 5)
+		fz_throw(ctx, FZ_ERROR_ARGUMENT, "Pixmap too small");
+
+	buf = fz_malloc(ctx, w*3*5 * sizeof(uint16_t));
+
+	gauss5row(&buf[0*3*w], &s[0*w], w);
+	gauss5row(&buf[1*3*w], &s[1*w], w);
+	memcpy(&buf[3*3*w], buf, w*3 * sizeof(uint16_t)); /* row -2 */
+	memcpy(&buf[4*3*w], buf, w*3 * sizeof(uint16_t)); /* row -1 */
+	for (y = 2; y < h; y++)
+	{
+		gauss5row(&buf[(y%5)*3*w], &s[2*w], w);
+
+		gauss5col(s, buf, y-2, w);
+		s += w;
+	}
+	for (; y < h+2; y++)
+	{
+		memcpy(&buf[(y%5)*3*w], &buf[((y+4)%5)*3*w], 3*w*sizeof(uint16_t));
+		gauss5col(s, buf, y-2, w);
+		s += w;
+	}
+
+	fz_free(ctx, buf);
+}
+
+#ifdef DETECT_DOCUMENT_RGB
+/* Variant of the above that works on a single plane from rgb data */
+static void
+gauss5row3(uint16_t *d, const unsigned char *s, int w)
+{
+	int i;
+	int s0 = s[0];
+	int s1 = s[3];
+	int s2 = s[6];
+	int s3 = s[9];
+
+	s += 4*3;
+
+	d[2*w] = 11*s0 +  4*s1 +  2*s2;
+	d[1*w] = 25*s0 +  9*s1 +  4*s2;
+	*d++   = 32*s0 + 12*s1 +  5*s2;
+
+	d[2*w] =  6*s0 +  5*s1 +  4*s2 +  2*s3;
+	d[1*w] = 13*s0 + 12*s1 +  9*s2 +  4*s3;
+	*d++   = 17*s0 + 15*s1 + 12*s2 +  5*s3;
+
+	for (i = w - 4; i > 0; i--)
+	{
+		int d2 = 2*s0 +  4*s1 +  5*s2 +  4*s3;
+		int d1 = 4*s0 +  9*s1 + 12*s2 +  9*s3;
+		int d0 = 5*s0 + 12*s1 + 15*s2 + 12*s3;
+		s0 = s1;
+		s1 = s2;
+		s2 = s3;
+		s3 = *s; s += 3;
+		d[2*w] = d2 + 2*s3;
+		d[1*w] = d1 + 4*s3;
+		*d++   = d0 + 5*s3;
+	}
+
+	d[2*w] =  2*s0 +  4*s1 +  5*s2 +  6*s3;
+	d[1*w] =  4*s0 +  9*s1 + 12*s2 + 13*s3;
+	*d++   =  5*s0 + 12*s1 + 15*s2 + 17*s3;
+
+	d[2*w] =  2*s1 +  4*s2 + 11*s3;
+	d[1*w] =  4*s1 +  9*s2 + 25*s3;
+	*d     =  5*s1 + 12*s2 + 32*s3;
+}
+
+static void
+gauss5x5_3(fz_context *ctx, fz_pixmap *dst, const fz_pixmap *src, int comp)
+{
+	int w = src->w;
+	int h = src->h;
+	uint16_t *buf;
+	unsigned char *s = src->samples + comp;
+	unsigned char *d = dst->samples;
+	int y;
+
+	if (w < 5 || h < 5)
+		fz_throw(ctx, FZ_ERROR_ARGUMENT, "Pixmap too small");
+
+	buf = fz_malloc(ctx, w*3*5 * sizeof(uint16_t));
+
+	gauss5row3(&buf[0*3*w], s, w);
+	s += w*3;
+	gauss5row3(&buf[1*3*w], s, w);
+	s += w*3;
+	memcpy(&buf[3*3*w], buf, w*3 * sizeof(uint16_t)); /* row -2 */
+	memcpy(&buf[4*3*w], buf, w*3 * sizeof(uint16_t)); /* row -1 */
+	for (y = 2; y < h; y++)
+	{
+		gauss5row3(&buf[((y*3)%15)*w], s, w);
+
+		gauss5col(d, buf, y-2, w);
+		s += w*3;
+		d += w;
+	}
+	for (; y < h+2; y++)
+	{
+		memcpy(&buf[(y%5)*3*w], &buf[((y+4)%5)*3*w], 3*w*sizeof(uint16_t));
+		gauss5col(d, buf, y-2, w);
+		d += w;
+	}
+
+	fz_free(ctx, buf);
+}
+#endif
+
+/* The next set of functions perform the gradient calculation.
+ * We convolve with Sobel kernels Kx and Ky respectively:
+ *
+ * Kx =  -1  0  1    Ky =  1  2  1
+ *       -2  0  2          0  0  0
+ *       -1  0  1         -1 -2 -1
+ *
+ * We do this by using a rolling temporary buffer of int16_t's to hold
+ * 3 pairs of lines of weights scaled by (1 0 1) and (1 2 1).
+ *
+ * We can then sum entries from those lines to calculate kx and ky for
+ * each pixel in the image.
+ *
+ * Then by examining the values of x and y, we can figure out the
+ * "direction" of the edge (horizontal, vertical, or either diagonal),
+ * and the magnitude of the difference across that edge. These get
+ * encoded back into the original image storage using the 2 bottom bits
+ * for direction, and the top 6 bits for magnitude.
+ */
+
+static void
+pregradrow(int16_t *d, const unsigned char *s, int w)
+{
+	int i;
+	unsigned char s0 = *s++;
+	unsigned char s1 = *s++;
+
+	d[w] = 3*s0 + s1;
+	*d++ = s1 - s0;
+	for (i = w-2; i > 0; i--)
+	{
+		int s2 = *s++;
+		d[w] = s0 + 2*s1 + s2;
+		*d++ = s2 - s0;
+		s0 = s1;
+		s1 = s2;
+	}
+	d[w] = s0 + 3*s1;
+	*d   = s1 - s0;
+}
+
+static void
+pregradcol(unsigned char *d, const int16_t *buf, int y, int w, uint32_t *max)
+{
+	const int16_t *s0 = &buf[((y+2)%3)*w*2];
+	const int16_t *s1 = &buf[((y  )%3)*w*2];
+	const int16_t *s2 = &buf[((y+1)%3)*w*2];
+	int i;
+
+	for (i = w; i > 0; i--)
+	{
+		uint32_t ax, ay, mag;
+		int x;
+		y = s0[w] - s2[w];
+		x = *s0++ + 2 * *s1++ + *s2++;
+		ax = x >= 0 ? x : -x;
+		ay = y >= 0 ? y : -y;
+		/* x and y are now both in the range -1020..1020 */
+		/* Now we calculate slope and gradient.
+		 *   angle = atan2(y, x);
+		 *   intensity = hypot(x, y);
+		 * But wait, we don't need that accuracy. We only need
+		 * to distinguish 4 directions...
+		 *
+		 * -22.5 < angle <=  22.5 = 0
+		 *  22.5 < angle <=  67.5 = 1
+		 *  67.5 < angle <= 112.5 = 2
+		 * 115.5 < angle <= 157.5 = 3
+		 * (and the reflections)
+		 *
+		 * 7 0 1   (x positive right, y positive downwards)
+		 * 6 * 2
+		 * 5 4 3
+		 *
+		 * tan(22.5)*65536 = 27146.
+		 * And for magnitude, we consider the magnitude just
+		 * along the 8 directions we've picked. So
+		 * 65536/SQR(2) = 46341.
+		 * We want magnitude in the 0...63 range.
+		 */
+		if (ax<<16 < 27146*ay)
+		{
+			/* angle = 0 */
+			mag = ay<<16; /* 0 to 1020<<16 */
+		}
+		else if (ay<<16 < ax*27146)
+		{
+			/* angle = 2 */
+			mag = ax<<16; /* 0 to 1020<<16 */
+		}
+		else
+		{
+			/* angle = 1 or 3 */
+			mag = (46341*(ax+ay));
+		}
+		if (mag > *max)
+			*max = mag;
+	}
+}
+
+static uint32_t
+pregrad(fz_context *ctx, fz_pixmap *src)
+{
+	int w = src->w;
+	int h = src->h;
+	unsigned char *s = src->samples;
+	int16_t *buf = fz_malloc(ctx, w*2*3*sizeof(int16_t));
+	int y;
+	uint32_t max = 0;
+
+	pregradrow(buf, s, w); /* Line 0 */
+	memcpy(&buf[w*2*2], buf, w*2*sizeof(int16_t)); /* Line 1 */
+	s += w;
+	for (y = 1; y < h-1; y++)
+	{
+		pregradrow(&buf[(y%3)*w*2], s, w);
+		pregradcol(s-w, buf, y-1, w, &max);
+		s += w;
+	}
+	memcpy(&buf[((y+1)%3)*w*2], &buf[(y%3)*w*2], w*2*sizeof(int16_t)); /* Line h */
+	pregradcol(s-w, buf, h-2, w, &max);
+	pregradcol(s, buf, h-1, w, &max);
+
+	fz_free(ctx, buf);
+
+	if (max == 0)
+		return 1;
+	else
+		return 0x7FFFFFFFU/max;
+}
+
+
+static void
+gradrow(int16_t *d, const unsigned char *s, int w)
+{
+	int i;
+	unsigned char s0 = *s++;
+	unsigned char s1 = *s++;
+
+	d[w] = 3*s0 + s1;
+	*d++ = s1 - s0;
+	for (i = w-2; i > 0; i--)
+	{
+		int s2 = *s++;
+		d[w] = s0 + 2*s1 + s2;
+		*d++ = s2 - s0;
+		s0 = s1;
+		s1 = s2;
+	}
+	d[w] = s0 + 3*s1;
+	*d   = s1 - s0;
+}
+
+static void
+gradcol(unsigned char *d, const int16_t *buf, int y, int w, int scale)
+{
+	const int16_t *s0 = &buf[((y+2)%3)*w*2];
+	const int16_t *s1 = &buf[((y  )%3)*w*2];
+	const int16_t *s2 = &buf[((y+1)%3)*w*2];
+	int i;
+
+	for (i = w; i > 0; i--)
+	{
+		uint32_t ax, ay, mag, scaled;
+		int angle, x;
+		y = s0[w] - s2[w];
+		x = *s0++ + 2 * *s1++ + *s2++;
+		ax = x >= 0 ? x : -x;
+		ay = y >= 0 ? y : -y;
+		/* x and y are now both in the range -1020..1020 */
+		/* Now we calculate slope and gradient.
+		 *   angle = atan2(y, x);
+		 *   intensity = hypot(x, y);
+		 * But wait, we don't need that accuracy. We only need
+		 * to distinguish 4 directions...
+		 *
+		 * -22.5 < angle <=  22.5 = 0
+		 *  22.5 < angle <=  67.5 = 1
+		 *  67.5 < angle <= 112.5 = 2
+		 * 115.5 < angle <= 157.5 = 3
+		 * (and the reflections)
+		 *
+		 * 7 0 1   (x positive right, y positive downwards)
+		 * 6 * 2
+		 * 5 4 3
+		 *
+		 * tan(22.5)*65536 = 27146.
+		 * And for magnitude, we consider the magnitude just
+		 * along the 8 directions we've picked. So
+		 * 65536/SQR(2) = 46341.
+		 * We want magnitude in the 0...63 range.
+		 */
+		if (ax<<16 < 27146*ay)
+		{
+			angle = 0;
+			mag = ay<<16; /* 0 to 1020<<16 */
+		}
+		else if (ay<<16 < ax*27146)
+		{
+			angle = 2;
+			mag = ax<<16; /* 0 to 1020<<16 */
+		}
+		else
+		{
+			/* 1 or 3 */
+			angle = (x^y) >= 0 ? 3 : 1;
+			mag = (46341*(ax+ay));
+		}
+		scaled = (mag * scale)>>25;
+		assert(scaled >= 0 && scaled <= 63);
+		*d++ = (scaled<<2) | angle;
+	}
+}
+
+static void
+grad(fz_context *ctx, fz_pixmap *src, uint32_t scale)
+{
+	int w = src->w;
+	int h = src->h;
+	unsigned char *s = src->samples;
+	int16_t *buf = fz_malloc(ctx, w*2*3*sizeof(int16_t));
+	int y;
+
+	gradrow(buf, s, w); /* Line 0 */
+	memcpy(&buf[w*2*2], buf, w*2*sizeof(int16_t)); /* Line 1 */
+	s += w;
+	for (y = 1; y < h-1; y++)
+	{
+		gradrow(&buf[(y%3)*w*2], s, w);
+		gradcol(s-w, buf, y-1, w, scale);
+		s += w;
+	}
+	memcpy(&buf[((y+1)%3)*w*2], &buf[(y%3)*w*2], w*2*sizeof(int16_t)); /* Line h */
+	gradcol(s-w, buf, h-2, w, scale);
+	gradcol(s, buf, h-1, w, scale);
+
+	fz_free(ctx, buf);
+}
+
+#ifdef DETECT_DOCUMENT_RGB
+static void
+combine_grad(fz_pixmap *grey, const fz_pixmap *r, const fz_pixmap *g, const fz_pixmap *b)
+{
+	int n;
+	unsigned char *sd = grey->samples;
+	const unsigned char *sr = r->samples;
+	const unsigned char *sg = g->samples;
+	const unsigned char *sb = b->samples;
+
+	for (n = g->w * g->h; n > 0; n--)
+	{
+		unsigned char vg = *sg++;
+		unsigned char vr = *sr++;
+		unsigned char vb = *sb++;
+		unsigned char vd = *sd++;
+		if (vr > vg)
+			vg = vr;
+		if (vb > vg)
+			vg = vb;
+		if (vg > vd)
+			sd[-1] = vg;
+	}
+}
+#endif
+
+/* Next, we perform Non-Maximum Suppression and Double Thresholding,
+ * both in the same phase.
+ *
+ * We walk the image, looking at the magnitude of the edges. Edges below
+ * the 'weak' threshold are discarded. Otherwise, neighbouring pixels in
+ * the direction of the edge are considered; if other pixels are stronger
+ * then this pixel is discarded. If not, we classify ourself as either
+ * 'strong' or 'weak'.
+ */
+#define WEAK_EDGE 64
+#define STRONG_EDGE 128
+static void
+nonmax(fz_context *ctx, fz_pixmap *dst, const fz_pixmap *src, int pass)
+{
+	int w = src->w;
+	int h = src->h;
+	const unsigned char *s0 = src->samples;
+	const unsigned char *s1 = s0;
+	const unsigned char *s2 = s0+w;
+	unsigned char *d = dst->samples;
+	int x, y;
+	/* thresholds are in the 0 to 63 range.
+	 * WEAK is typically 0.1ish, STRONG 0.3ish
+	 */
+	int weak = 6 - pass;
+	int strong = 12 - pass*2;
+
+	/* On entry, pixels have the angle in the bottom 2 bits and the magnitude in the rest. */
+	/* On exit, strong pixels have bit 7 set, weak pixels have bit 6, others are 0.
+	 * strong and weak pixels have the angle in bits 4 and 5. */
+
+	for (y = h-1; y >= 0;)
+	{
+		int lastmag;
+		int ang = *s1++;
+		int mag = ang>>2;
+		int q, r;
+
+		/* Pixel 0 */
+		if (mag <= weak)
+		{
+			/* Not even a weak edge. We'll never keep it. */
+			*d++ = 0;
+			s0++;
+			s2++;
+		}
+		else
+		{
+			ang &= 3;
+			switch (ang)
+			{
+				default:
+				case 0:
+					q = (*s0++)>>2;
+					r = (*s2++)>>2;
+					break;
+				case 1:
+					q = (*++s0)>>2;
+					r = 0;
+					s2++;
+					break;
+				case 2:
+					s0++;
+					s2++;
+					q = 0;
+					r = *s1>>2;
+					break;
+				case 3:
+					q = 0;
+					s0++;
+					r = (*++s2)>>2;
+					break;
+			}
+			if (mag < q || mag < r)
+			{
+				/* Neighbouring edges are stronger.
+				 * Lose this one. */
+				*d++ = 0;
+			}
+			else if (mag < strong)
+			{
+				/* Weak edge. */
+				*d++ = WEAK_EDGE | (ang<<4);
+			}
+			else
+			{
+				/* Strong edge */
+				*d++ = STRONG_EDGE | (ang<<4);
+			}
+		}
+		lastmag = mag;
+		for (x = w-2; x > 0; x--)
+		{
+			ang = *s1++;
+			mag = ang>>2;
+			if (mag <= weak)
+			{
+				/* Not even a weak edge. We'll never keep it. */
+				*d++ = 0;
+				s0++;
+				s2++;
+			}
+			else
+			{
+				ang &= 3;
+				switch (ang)
+				{
+					default:
+					case 0:
+						q = (*s0++)>>2;
+						r = (*s2++)>>2;
+						break;
+					case 1:
+						q = (*++s0)>>2;
+						r = ((s2++)[-1])>>2;
+						break;
+					case 2:
+						s0++;
+						s2++;
+						q = lastmag;
+						r = *s1>>2;
+						break;
+					case 3:
+						q = ((s0++)[-1])>>2;
+						r = (*++s2)>>2;
+						break;
+				}
+				if (mag < q || mag < r)
+				{
+					/* Neighbouring edges are stronger.
+					 * Lose this one. */
+					*d++ = 0;
+				}
+				else if (mag < strong)
+				{
+					/* Weak edge. */
+					*d++ = WEAK_EDGE | (ang<<4);
+				}
+				else
+				{
+					/* Strong edge */
+					*d++ = STRONG_EDGE | (ang<<4);
+				}
+			}
+			lastmag = mag;
+		}
+		/* Pixel w-1 */
+		ang = *s1++;
+		mag = ang>>2;
+		if (mag <= weak)
+		{
+			/* Not even a weak edge. We'll never keep it. */
+			*d++ = 0;
+			s0++;
+			s2++;
+			lastmag = 0;
+		}
+		else
+		{
+			ang &= 3;
+			switch (ang)
+			{
+				default:
+				case 0:
+					q = (*s0++)>>2;
+					r = (*s2++)>>2;
+					break;
+				case 1:
+					q = 0;
+					s0++;
+					r = ((s2++)[-1])>>2;
+					break;
+				case 2:
+					s0++;
+					s2++;
+					q = 0;
+					r = *s1>>2;
+					break;
+				case 3:
+					q = ((s0++)[-1])>>2;
+					r = 0;
+					s2++;
+					break;
+			}
+			if (mag < q || mag < r)
+			{
+				/* Neighbouring edges are stronger.
+				 * Lose this one. */
+				*d++ = 0;
+			}
+			else if (mag < strong)
+			{
+				/* Weak edge. */
+				*d++ = WEAK_EDGE | (ang<<4);
+			}
+			else
+			{
+				/* Strong edge */
+				*d++ = STRONG_EDGE | (ang<<4);
+			}
+		}
+		s0 = s1-w;
+		if (--y == 0)
+			s2 = s1;
+	}
+}
+
+/* Next, we have the hysteresis phase. Here we bump any 'weak' pixel
+ * that has at least one strong pixel around it up to being a 'strong'
+ * pixel.
+ *
+ * On entry, strong pixels have bit 7 set, weak pixels have bit 6 set,
+ * the angle is in bits 4 and 5, everything else is 0.
+ *
+ * We walk the rows of the image, and for each pixel we set bit 0 to be
+ * the logical OR of all bit 7's of itself, and its horizontally
+ * neighbouring pixels.
+ *
+ * Once we have done the first 2 rows like that, we can combine the
+ * operation of generating the bottom bits for row i, with the
+ * calculation of row i-1. Any given pixel on row i-1 should be promoted
+ * to 'strong' if it was 'weak', and if the logical OR of the matching
+ * pixels in row i, i-1 or i-2 has bit 0 set.
+ *
+ * At the end of this process any pixel with bit 7 set is 'strong'.
+ * Bits 4 and 5 still have the angle.
+ */
+
+static void
+hysteresis(fz_context *ctx, fz_pixmap *src)
+{
+	int w = src->w;
+	int h = src->h;
+	unsigned char *s0 = src->samples;
+	unsigned char *s1 = s0;
+	unsigned char *s2 = s0;
+	unsigned char v0, v1, v2, r0, r1, r2;
+	int x, y;
+
+	/* On entry, strong pixels have bit 7 set, weak pixels have bit 6, others are 0.
+	 * strong and weak pixels have the angle in bits 4 and 5. */
+
+	/* We make the bottom bit in every pixel be 1 iff the pixel
+	 * or the ones to either side of it are 'strong'. */
+
+	/* First row - just do the bottom bit. */
+	/* Pixel 0 */
+	v0 = *s0++;
+	v1 = *s0++;
+	s0[-2] = v0 | ((v0 | v1)>>7);
+	/* Middle pixels */
+	for (x = w-2; x > 0; x--)
+	{
+		v2 = *s0++;
+		s0[-2] = v1 | ((v0 | v1 | v2)>>7);
+		v0 = v1;
+		v1 = v2;
+	}
+	/* Pixel w-1 */
+	s0[-1] = v1 | ((v0 | v1)>>7);
+	assert(s0 == src->samples + w);
+
+	/* Second row - do the "bottom bit" for the second row, and
+	 * perform hysteresis on the top row. */
+	/* Pixel 0 */
+	v0 = *s0++;
+	v1 = *s0++;
+	r0 = v0 | ((v0 | v1)>>7);
+	s0[-2] = r0;
+	r1 = *s1++;
+	if ((r1>>6) & (r0 | r1))
+		s1[-1] |= 128;
+	/* Middle pixels */
+	for (x = w-2; x > 0; x--)
+	{
+		v2 = *s0++;
+		r0 = v1 | ((v0 | v1 | v2)>>7);
+		s0[-2] = r0;
+		r1 = *s1++;
+		if ((r1>>6) & (r0 | r1))
+			s1[-1] |= 128;
+		v0 = v1;
+		v1 = v2;
+	}
+	/* Pixel w-1 */
+	r0 = v1 | ((v0 | v1)>>7);
+	s0[-1] = r0;
+	r1 = *s1++;
+	if ((r1>>6) & (r0 | r1))
+		s1[-1] |= 128;
+	assert(s0 == s1 + w);
+
+	/* Now we get into the swing of things. We do the "bottom bit"
+	 * for row n+1, and do the actual processing for row n. */
+	for (y = h-4; y > 0; y--)
+	{
+		/* Pixel 0 */
+		v0 = *s0++;
+		v1 = *s0++;
+		r0 = v0 | ((v0 | v1)>>7);
+		s0[-2] = r0;
+		r1 = *s1++;
+		r2 = *s2++;
+		if ((r1>>6) & (r0 | r1 | r2))
+			s1[-1] |= 128;
+		/* Middle pixels */
+		for (x = w-2; x > 0; x--)
+		{
+			v2 = *s0++;
+			r0 = v1 | ((v0 | v1 | v2)>>7);
+			s0[-2] = r0;
+			r1 = *s1++;
+			r2 = *s2++;
+			if ((r1>>6) & (r0 | r1 | r2))
+				s1[-1] |= 128;
+			v0 = v1;
+			v1 = v2;
+		}
+		/* Pixel w-1 */
+		r0 = v1 | ((v0 | v1)>>7);
+		s0[-1] = r0;
+		r1 = *s1++;
+		r2 = *s2++;
+		if ((r1>>6) & (r0 | r1 | r2))
+			s1[-1] |= 128;
+		assert(s0 == s1 + w);
+		assert(s1 == s2 + w);
+	}
+
+	/* Final 2 rows together */
+	/* Pixel 0 */
+	v0 = *s0++;
+	v1 = *s0++;
+	r0 = v0 | ((v0 | v1)>>7);
+	r1 = *s1++;
+	r2 = *s2++;
+	if ((r1>>6) & (r0 | r1 | r2))
+		s1[-1] |= 128;
+	if ((r0>>6) & (r0 | r1))
+		s0[-2] |= 128;
+	/* Middle pixels */
+	for (x = w-2; x > 0; x--)
+	{
+		v2 = *s0++;
+		r0 = v1 | ((v0 | v1 | v2)>>7);
+		r1 = *s1++;
+		r2 = *s2++;
+		if ((r1>>6) & (r0 | r1 | r2))
+			s1[-1] |= 128;
+		if ((r0>>6) & (r0 | r1))
+			s0[-2] |= 128;
+		v0 = v1;
+		v1 = v2;
+	}
+	/* Pixel w-1 */
+	r0 = v1 | ((v0 | v1)>>7);
+	r1 = *s1;
+	r2 = *s2;
+	if ((r1>>6) & (r0 | r1 | r2))
+		s1[0] |= 128;
+	if ((r0>>6) & (r0 | r1))
+		s0[-1] |= 128;
+}
+
+#ifdef WARP_DEBUG
+/* A simple function to keep just bit 7 of the image. This 'cleans' the
+ * pixmap so that a grey version can be saved out for visual checking. */
+static void
+clean(fz_context *ctx, fz_pixmap *src)
+{
+	int w = src->w;
+	int h = src->h;
+	unsigned char *s = src->samples;
+
+	for (w = w*h; w > 0; w--)
+		*s = *s & 128, s++;
+}
+#endif
+
+#define SINTABLE_SHIFT 14
+static int16_t sintable[270];
+#define costable (&sintable[90])
+
+/* We have collected an array of edge data.
+ * For each pixel, we know whether there is a 'strong' edge
+ * there, and if so, in which of 4 directions it runs.
+ *
+ * We want to convert this into hough space.
+ *
+ * The literature describes points in Hough space as having the
+ * form (r, theta). The value of any given point point in hough
+ * space is the "strength" of the line described by:
+ *   x.cos(theta) + y.sin(theta) = r
+ *
+ *  |     \
+ *  |     /\
+ *  |    /\/\
+ *  |  r/    \
+ *  |  /      \
+ *  | /        \
+ *  |/theta     \
+ * -+--------------
+ *
+ * i.e. r is the shortest distance from the origin to the line,
+ * and theta gives us the angle of that shortest line.
+ *
+ * But, we are using angles of theta from the vertical, so we need
+ * a different formulation:
+ *
+ *  |     \
+ *  |     /\
+ *  |    /\/\
+ *  |  r/    \  t = theta
+ *  |  /      \
+ *  |t/        \
+ *  |/          \
+ * -+--------------
+ *
+ * So we're using 90-theta. cos(90-theta) = sin(theta),
+ * and sin(90-theta) = cos(theta).
+ *
+ * So: x.sin(theta) + y.cos(theta) = r (for theta measured
+ * clockwise from the y axis).
+ *
+ * We've been collecting angles according to their position in one
+ * of 4 octants:
+ *
+ * Ang 0 = close to a horizontal edge (-22.5 to 22.5 degrees)
+ * Ang 1 = close to diagonal edge (top left to bottom right) (22.5 to 67.5 degrees)
+ * Ang 2 = close to a vertical edge (67.5 to 112.5 degrees)
+ * Ang 3 = close to diagonal edge (bottom left to top right) (112.5 to 157.5 degrees)
+ *
+ * The other 4 octants mirror onto these.
+ *
+ * So, for each point in our (x,y) pixmap we whether we have a strong
+ * pixel or not. If we have such a pixel, then we know that an edge
+ * passes through that point, and which of those 4 octants it is in.
+ *
+ * We therefore consider all the possible angles within that octant,
+ * and add a 'vote' for each of those lines into our hough transformed
+ * space.
+ */
+
+
+static void
+mark_hough(uint32_t *d, int x, int y, int maxlen, int reduce, int ang)
+{
+	int theta;
+	int stride = (maxlen*2)>>reduce;
+	int minang, maxang;
+
+	switch (ang)
+	{
+		/* The angles are really 22.5, 67.5 etc, but we are working in ints
+		 * and specifying maxang as the first one greater than the one we
+		 * want to mark. */
+		default:
+		case 0:
+			/* Vertical boundary. Lines through this boundary
+			 * go horizontally. So the perpendicular to them
+			 * is vertical. */
+			minang = 0; maxang = 23;
+			break;
+		case 1:
+			/* NE boundary */
+			minang = 23; maxang = 68;
+			break;
+		case 2:
+			/* Horizontal boundary. */
+			minang = 68; maxang = 113;
+			break;
+		case 3:
+			/* SE boundary */
+			minang = 113; maxang = 158;
+			break;
+		case 4:
+			/* For debugging: */
+			minang = 0; maxang = 180;
+			break;
+	}
+
+	d += minang * stride;
+	while (1)
+	{
+		for (theta = minang; theta < maxang; theta++)
+		{
+			int p = (x*sintable[theta] + y*costable[theta])>>SINTABLE_SHIFT;
+			int v = (maxlen + p)>>reduce;
+
+			d[v]++;
+			d += stride;
+		}
+		if (ang != 0)
+			break;
+		ang = 4;
+		minang = 158;
+		d += (minang - maxang) * stride;
+		maxang = 180;
+	}
+}
+
+#ifdef WARP_DEBUG
+static void
+save_hough_debug(fz_context *ctx, uint32_t *hough, int stride)
+{
+	uint32_t scale;
+	uint32_t maxval;
+	uint32_t *p;
+	int y;
+	fz_pixmap *dst = fz_new_pixmap(ctx, NULL, stride, 180, NULL, 0);
+	unsigned char *d = dst->samples;
+	/* Make the image of the hough space (for debugging) */
+	maxval = 1; /* Avoid possible division by zero */
+	p = hough;
+	for (y = 180*stride; y > 0; y--)
+	{
+		uint32_t v = *p++;
+		if (v > maxval)
+			maxval = v;
+	}
+
+	scale = 0xFFFFFFFFU/maxval;
+	p = hough;
+	for (y = 180*stride; y > 0; y--)
+	{
+		*d++ = (scale * *p++)>>24;
+	}
+	fz_save_pixmap_as_png(ctx, dst, "hough.png");
+	fz_drop_pixmap(ctx, dst);
+}
+#endif
+
+static uint32_t *do_hough(fz_context *ctx, const fz_pixmap *src, int stride, int maxlen, int reduce)
+{
+	int w = src->w;
+	int h = src->h;
+	int x, y;
+	const unsigned char *s = src->samples;
+	uint32_t *hough = fz_calloc(ctx, sizeof(uint32_t), 180*stride);
+
+	START_TIME();
+	/* Construct the hough space representation. */
+	for (y = 0; y < h; y++)
+	{
+		for (x = 0; x < w; x++)
+		{
+			unsigned char v = *s++;
+			if (v & 128)
+				mark_hough(hough, x, y, maxlen, reduce, (v>>4)&3);
+		}
+	}
+	END_TIME("Building hough");
+
+#ifdef WARP_DEBUG
+	save_hough_debug(ctx, hough, stride);
+#endif
+
+	return hough;
+}
+
+typedef struct
+{
+	int ang;
+	int dis;
+	int strength;
+	int x0;
+	int y0;
+	int x1;
+	int y1;
+} hough_edge_t;
+
+typedef struct
+{
+	int e0;
+	int e1;
+	int score;
+	float x;
+	float y;
+} hough_point_t;
+
+static int
+intersect(hough_point_t *pt, hough_edge_t *edges, int e0, int e1)
+{
+	int x1 = edges[e0].x0;
+	int y1 = edges[e0].y0;
+	int x2 = edges[e0].x1;
+	int y2 = edges[e0].y1;
+	int x3 = edges[e1].x0;
+	int y3 = edges[e1].y0;
+	int x4 = edges[e1].x1;
+	int y4 = edges[e1].y1;
+	int d = (x1-x2)*(y3-y4)-(y1-y2)*(x3-x4);
+	int t = (x1-x3)*(y3-y4)-(y1-y3)*(x3-x4);
+	float ft;
+
+	if (d < 0)
+	{
+		if (t < d || t > 0)
+			return 0;
+	}
+	else
+	{
+		if (t < 0 || t > d)
+			return 0;
+	}
+
+	pt->e0 = e0;
+	pt->e1 = e1;
+	pt->score = edges[e0].strength + edges[e1].strength;
+	ft = t / (float)d;
+	pt->x = x1 + ft*(x2-x1);
+	pt->y = y1 + ft*(y2-y1);
+
+	return 1;
+}
+
+typedef struct
+{
+	int w;
+	int h;
+	int edge[4];
+	int point[4];
+	int best_score;
+	int best_edge[4];
+	int best_point[4];
+} hough_route_t;
+
+/* To test convexity, we use the dot product:
+ *      B
+ *     / \
+ *    /   C
+ *   A
+ *
+ * The dot product of vectors u and v is:
+ *    dot(u,v) = |u|.|v|.cos(theta)
+ * Helpfully, cos(theta) > 0 for  -90 < theta < 90.
+ * So if we can form perp(AB) = the vector given by rotating
+ * AB 90 degrees clockwise, we can then look at the sign of:
+ *    dot(perp(AB),BC)
+ * If we do that for each corner of the polygon, and the signs
+ * of the results for all of them are the same, we have convexity.
+ *
+ * If any of the dot products are zero, the edges are parallel
+ * (i.e. the same edge). This should never happen.
+ *
+ * If AB = (x,y), then perp(AB) = (y,-x)
+ */
+static void
+score_corner(hough_point_t *points, hough_route_t *route, int p0, int *score, int *sign)
+{
+	float x0, x1, x2, y0, y1, y2, ux, uy, vx, vy, dot;
+	int s, len;
+	float costheta;
+
+	/* If we've already decided this is duff, then bale. */
+	if (*score < 0)
+		return;
+
+	x0 = points[route->point[p0]].x;
+	y0 = points[route->point[p0]].y;
+	x1 = points[route->point[(p0+1)&3]].x;
+	y1 = points[route->point[(p0+1)&3]].y;
+	x2 = points[route->point[(p0+2)&3]].x;
+	y2 = points[route->point[(p0+2)&3]].y;
+
+	ux = y1-y0;  /* u = Perp(p0 to p1)*/
+	uy = x0-x1;
+	vx = x2-x1;  /* v = p1 to p2 */
+	vy = y2-y1;
+
+	dot = ux*vx + uy*vy;
+	if (dot == 0)
+	{
+		*score = -1;
+		return;
+	}
+
+	s = (dot > 0) ? 1 : -1;
+	if (*sign == 0)
+		*sign = s;
+	else if (*sign != s)
+	{
+		*score = -1;
+		return;
+	}
+
+	len = sqrt(ux*ux + uy*uy) * sqrt(vx*vx + vy*vy);
+	costheta = dot / (float)len;
+	if (costheta < 0)
+		costheta = -costheta;
+	if (costheta < 0.7)
+	{
+		*score = -1;
+		return;
+	}
+	costheta *= costheta;
+
+	*score += points[route->point[(p0+1)%3]].score * costheta;
+}
+
+/* route points to 8 ints:
+ * 2*i   = edge number
+ * 2*i+1 = point number
+ */
+static int
+score_route(hough_point_t *points, hough_route_t *route)
+{
+	int score = 0;
+	int sign = 0;
+
+	score_corner(points, route, 0, &score, &sign);
+	score_corner(points, route, 1, &score, &sign);
+	score_corner(points, route, 2, &score, &sign);
+	score_corner(points, route, 3, &score, &sign);
+
+	return score;
+}
+
+static float
+score_by_area(const hough_point_t *points, const hough_route_t *route)
+{
+	float double_area_of_quad =
+		points[route->point[0]].x * points[route->point[1]].y +
+		points[route->point[1]].x * points[route->point[2]].y +
+		points[route->point[2]].x * points[route->point[3]].y +
+		points[route->point[3]].x * points[route->point[0]].y -
+		points[route->point[1]].x * points[route->point[0]].y -
+		points[route->point[2]].x * points[route->point[1]].y -
+		points[route->point[3]].x * points[route->point[2]].y -
+		points[route->point[0]].x * points[route->point[3]].y;
+	float double_area = route->w * (float)route->h * 2;
+	if (double_area_of_quad < 0)
+		double_area_of_quad = -double_area_of_quad;
+	/* Anything larger than a quarter of the screen is acceptable. */
+	if (double_area_of_quad*4 > double_area)
+		return 1;
+	/* Anything smaller than a 16th of the screen is unacceptable in all circumstances. */
+	if (double_area_of_quad*16 < double_area)
+		return 0;
+
+	/* Otherwise, scale the score down by how much it's less than a quarter of the screen. */
+	return (double_area_of_quad*4)/double_area;
+}
+
+/* The first n+1 edges of the route are filled in, as are the first n
+ * points.
+ * 2*i   = edge number
+ * 2*i+1 = point number
+ */
+static void
+find_route(fz_context *ctx, hough_point_t *points, int num_points, hough_route_t *route, int n)
+{
+	int i;
+
+	for (i = 0; i < num_points; i++)
+	{
+		/* If this point continues our route (e0 == route->edge[n])
+		 * then the next point in the route might be e1. */
+		int e0 = points[i].e0;
+		int e1 = points[i].e1;
+		if (e0 == route->edge[n])
+		{
+		}
+		else if (e1 == route->edge[n])
+		{
+			int t = e0; e0 = e1; e1 = t;
+		}
+		else
+			continue; /* Doesn't fit. Keep searching. */
+
+		/* If we've found 3 points already, then this is the
+		 * fourth. */
+		if (n == 3)
+		{
+			int score = 0;
+
+			/* If we haven't looped back to our first edge,
+			 * no good. Keep searching. */
+			if (route->edge[0] != e1)
+				continue;
+			route->point[3] = i;
+
+			score = score_route(points, route);
+
+			if (score > 0)
+				score *= score_by_area(points, route);
+
+#ifdef WARP_DEBUG
+			debug_printf(ctx, "Found route: (point=%d %d %d %d) (edge %d %d %d %d) score=%d\n",
+				route->point[0], route->point[1], route->point[2], route->point[3],
+				route->edge[0], route->edge[1], route->edge[2], route->edge[3],
+				score);
+#endif
+
+			/* We want our route to be convex */
+			if (score < 0)
+				continue;
+
+			/* Score the route */
+			if (route->best_score < score)
+			{
+				route->best_score = score;
+				memcpy(route->best_edge, route->edge, sizeof(*route->edge)*4);
+				memcpy(route->best_point, route->point, sizeof(*route->point)*4);
+			}
+		}
+		else
+		{
+			int j;
+			for (j = 0; j < n && route->edge[j] != e1; j++);
+			/* If we're about to loop back to any of the
+			 * previous edges, keep searching. */
+			if (j != n)
+				continue;
+
+			/* Possible. Extend the route, and recurse. */
+			route->point[n] = i;
+			route->edge[n+1] = e1;
+			find_route(ctx, points, num_points, route, n+1);
+		}
+	}
+}
+
+#define MAX_EDGES 32
+#define BLOT_ANG 10
+#define BLOT_DIS 10
+static int
+make_hough(fz_context *ctx, const fz_pixmap *src, fz_quad *corners)
+{
+	int w = src->w;
+	int h = src->h;
+	int maxlen = (int)(sqrtf(w*w + h*h) + 0.5);
+	uint32_t *hough;
+	int x, y;
+	int reduce;
+	int stride;
+	hough_edge_t edge[MAX_EDGES];
+	hough_point_t points[MAX_EDGES * MAX_EDGES];
+	hough_route_t route;
+	int num_edges, num_points;
+
+	/* costable could (should) be statically inited. */
+	{
+		int i;
+		for (i = 0; i < 270; i++)
+		{
+			float theta = i*M_PI/180;
+			sintable[i] = (int16_t)((1<<SINTABLE_SHIFT)*sinf(theta) + 0.5f);
+		}
+	}
+
+	/* Figure out a suitable scale for the data. */
+	reduce = 0;
+	while ((maxlen*2>>reduce) > 720 && reduce < 16)
+		reduce++;
+
+	stride = (maxlen*2)>>reduce;
+	hough = do_hough(ctx, src, stride, maxlen, reduce);
+
+	/* We want to find the top n edges that aren't too close to
+	 * one another. */
+	for (x = 0; x < MAX_EDGES; x++)
+	{
+		int ang, dis;
+		int minang, maxang, mindis, maxdis;
+		int where = 0;
+		uint32_t *p = hough;
+		uint32_t maxval = 0;
+		for (y = 180*stride; y > 0; y--)
+		{
+			uint32_t v = *p++;
+			if (v > maxval)
+				maxval = v, where = y;
+		}
+		if (where == 0)
+			break;
+		where = 180*stride - where;
+		ang = edge[x].ang = where/stride;
+		dis = edge[x].dis = where - edge[x].ang*stride;
+		edge[x].strength = hough[where];
+		/* We don't want to find any other maxima that are too
+		 * close to this one, so we 'blot out' stuff around this
+		 * maxima. */
+#ifdef WARP_DEBUG
+		debug_printf(ctx, "Maxima %d: dist=%d ang=%d strength=%d\n",
+			x, (dis<<reduce)-maxlen, ang-90, edge[x].strength);
+#endif
+		minang = ang - BLOT_ANG;
+		if (minang < 0)
+			minang = 0;
+		maxang = ang + BLOT_ANG;
+		if (maxang > 180)
+			maxang = 180;
+		mindis = dis - BLOT_DIS;
+		if (mindis < 0)
+			mindis = 0;
+		maxdis = dis + BLOT_DIS;
+		if (maxdis > stride)
+			maxdis = stride;
+		p = hough + minang*stride + mindis;
+		maxdis = (maxdis-mindis)*sizeof(uint32_t);
+		for (y = maxang-minang; y > 0; y--)
+		{
+			memset(p, 0, maxdis);
+			p += stride;
+		}
+#ifdef WARP_DEBUG
+		//save_hough_debug(ctx, hough, stride);
+#endif
+	}
+	num_edges = x;
+	if (num_edges == 0)
+		return 0;
+
+	/* Find edges in terms of lines. */
+	for (x = 0; x < num_edges; x++)
+	{
+		int ang = edge[x].ang;
+		int dis = edge[x].dis;
+		int p = (dis<<reduce) - maxlen;
+		if (ang < 45 || ang > 135)
+		{
+			/* Mostly horizontal line */
+			edge[x].x0 = 0;
+			edge[x].x1 = w;
+			edge[x].y0 = ((p<<SINTABLE_SHIFT) - edge[x].x0*sintable[ang])/costable[ang];
+			edge[x].y1 = ((p<<SINTABLE_SHIFT) - edge[x].x1*sintable[ang])/costable[ang];
+		}
+		else
+		{
+			/* Mostly vertical line */
+			edge[x].y0 = 0;
+			edge[x].y1 = h;
+			edge[x].x0 = ((p<<SINTABLE_SHIFT) - edge[x].y0*costable[ang])/sintable[ang];
+			edge[x].x1 = ((p<<SINTABLE_SHIFT) - edge[x].y1*costable[ang])/sintable[ang];
+		}
+	}
+
+	/* Find the points of intersection */
+	num_points = 0;
+	for (x = 0; x < num_edges-1; x++)
+		for (y = x+1; y < num_edges; y++)
+			num_points += intersect(&points[num_points],
+						edge, x, y);
+
+#ifdef WARP_DEBUG
+	{
+		debug_printf(ctx, "%d edges, %d points\n", num_edges, num_points);
+		for (x = 0; x < num_points; x++)
+		{
+			debug_printf(ctx, "p%d: %d %d (score %d, %d+%d)\n", x,
+				(int)points[x].x, (int)points[x].y, points[x].score,
+				points[x].e0, points[x].e1);
+		}
+	}
+#endif
+
+	/* Now, go looking for 'routes' A->B->C->D->A */
+	{
+		int i;
+		route.w = src->w;
+		route.h = src->h;
+		route.best_score = -1;
+		for (i = 0; i < num_points; i++)
+		{
+			route.edge[0] = points[i].e0;
+			route.point[0] = i;
+			route.edge[1] = points[i].e1;
+			find_route(ctx, points, num_points, &route, 1);
+		}
+
+#ifdef WARP_DEBUG
+		if (route.best_score >= 0)
+		{
+			debug_printf(ctx, "Score: %d, Edges=%d->%d->%d->%d, Points=%d->%d->%d->%d\n",
+				route.best_score,
+				route.best_edge[0],
+				route.best_edge[1],
+				route.best_edge[2],
+				route.best_edge[3],
+				route.best_point[0],
+				route.best_point[1],
+				route.best_point[2],
+				route.best_point[3]);
+			debug_printf(ctx, "(%d,%d)->(%d,%d)->(%d,%d)->(%d,%d)\n",
+				(int)points[route.best_point[0]].x,
+				(int)points[route.best_point[0]].y,
+				(int)points[route.best_point[1]].x,
+				(int)points[route.best_point[1]].y,
+				(int)points[route.best_point[2]].x,
+				(int)points[route.best_point[2]].y,
+				(int)points[route.best_point[3]].x,
+				(int)points[route.best_point[3]].y);
+		}
+#endif
+	}
+
+#ifdef WARP_DEBUG
+	/* Mark up the src (again, for debugging) */
+	{
+		fz_device *dev = fz_new_draw_device(ctx, fz_identity, src);
+		fz_stroke_state *stroke = fz_new_stroke_state(ctx);
+		float col = 1;
+		fz_color_params params = { FZ_RI_PERCEPTUAL };
+#ifdef WARP_SPEW_DEBUG
+		for (x = 0; x < num_edges; x++)
+		{
+			char text[64];
+			fz_path *path = fz_new_path(ctx);
+			fz_moveto(ctx, path, edge[x].x0, edge[x].y0);
+			fz_lineto(ctx, path, edge[x].x1, edge[x].y1);
+			fz_stroke_path(ctx, dev, path, stroke, fz_identity, fz_device_gray(ctx), &col, 1, params);
+			fz_drop_path(ctx, path);
+			debug_printf(ctx, "%d %d -> %d %d\n", edge[x].x0, edge[x].y0, edge[x].x1, edge[x].y1);
+			sprintf(text, "line%d.png", x);
+			fz_save_pixmap_as_png(ctx, src, text);
+		}
+#endif
+
+		stroke->linewidth *= 4;
+		if (route.best_score >= 0)
+		{
+			fz_path *path = fz_new_path(ctx);
+			fz_moveto(ctx, path, points[route.best_point[0]].x, points[route.best_point[0]].y);
+			fz_lineto(ctx, path, points[route.best_point[1]].x, points[route.best_point[1]].y);
+			fz_lineto(ctx, path, points[route.best_point[2]].x, points[route.best_point[2]].y);
+			fz_lineto(ctx, path, points[route.best_point[3]].x, points[route.best_point[3]].y);
+			fz_closepath(ctx, path);
+			fz_stroke_path(ctx, dev, path, stroke, fz_identity, fz_device_gray(ctx), &col, 1, params);
+			fz_drop_path(ctx, path);
+		}
+
+		fz_drop_stroke_state(ctx, stroke);
+		fz_close_device(ctx, dev);
+		fz_drop_device(ctx, dev);
+	}
+#endif
+
+	fz_free(ctx, hough);
+
+	if (route.best_score == -1)
+		return 0;
+
+	if (corners)
+	{
+		corners->ul.x = points[route.best_point[0]].x;
+		corners->ul.y = points[route.best_point[0]].y;
+		corners->ur.x = points[route.best_point[1]].x;
+		corners->ur.y = points[route.best_point[1]].y;
+		corners->ll.x = points[route.best_point[2]].x;
+		corners->ll.y = points[route.best_point[2]].y;
+		corners->lr.x = points[route.best_point[3]].x;
+		corners->lr.y = points[route.best_point[3]].y;
+	}
+
+	/* Discard any possible matches that aren't at least 1/8 of the pixmap. */
+	{
+		fz_rect r = fz_empty_rect;
+		if (corners)
+		{
+			r = fz_include_point_in_rect(r, corners->ul);
+			r = fz_include_point_in_rect(r, corners->ur);
+			r = fz_include_point_in_rect(r, corners->ll);
+			r = fz_include_point_in_rect(r, corners->lr);
+		}
+		if ((r.x1 - r.x0) * (r.y1 - r.y0) * 8 < (src->w * src->h))
+			return 0;
+	}
+
+	return 1;
+}
+
+#define DOC_DETECT_MAXDIM 500
+int
+fz_detect_document(fz_context *ctx, fz_quad *points, fz_pixmap *orig_src)
+{
+	fz_color_params p = {FZ_RI_PERCEPTUAL };
+	fz_pixmap *grey = NULL;
+	fz_pixmap *src = NULL;
+#ifdef DETECT_DOCUMENT_RGB
+	fz_pixmap *r = NULL;
+	fz_pixmap *g = NULL;
+	fz_pixmap *b = NULL;
+#endif
+	fz_pixmap *processed = NULL;
+	int i;
+	int found = 0;
+
+	/* Gauss function has std deviation of ~sqr(2), so a variance of
+	 * 2. Apply it twice and we get twice that. So:
+	 *	n	stddev	variance
+	 *	1	1.4	2
+	 *	2	2	4
+	 *	3	2.8	8
+	 *	4	4	16
+	 *	5	5.6	32
+	 *	6	8	64
+	 *	7	11.3	128
+	 *	8	16	256
+	 *	9	22.6	512
+	 *	10	32	1024
+	 *	11	45	2048
+	 *
+	 * The stddev is also known as the "width" by some sources.
+	 *
+	 * Our testing indicates that a width of 30ish works well for a
+	 * image with it's major axis being ~3k pixels. So figure on a
+	 * width of about 1% of the major axis dimension.
+	 *
+	 * By subsampling the incoming image, we can reduce the work we
+	 * do in the gauss phase, as we get to use a smaller width.
+	 */
+	int n = 10; /* Based on DOC_DETECT_MAXDIM */
+	int l2factor = 0;
+
+	fz_var(src);
+	fz_var(grey);
+#ifdef DETECT_DOCUMENT_RGB
+	fz_var(r);
+	fz_var(g);
+	fz_var(b);
+#endif
+	fz_var(processed);
+
+	fz_try(ctx)
+	{
+		START_TIME();
+		{
+			int maxdim = orig_src->w > orig_src->h ? orig_src->w : orig_src->h;
+			while (maxdim > DOC_DETECT_MAXDIM)
+				maxdim >>= 1, l2factor++;
+			START_TIME();
+			if (l2factor == 0)
+				src = fz_keep_pixmap(ctx, orig_src);
+			else
+			{
+				src = fz_clone_pixmap(ctx, orig_src);
+				fz_subsample_pixmap(ctx, src, l2factor);
+			}
+			END_TIME("subsample");
+		}
+		START_TIME();
+		if (src->n == 1 && src->alpha == 0)
+		{
+			if (src == orig_src)
+				grey = fz_clone_pixmap(ctx, src);
+			else
+				grey = fz_keep_pixmap(ctx, src);
+		}
+		else
+		{
+			grey = fz_convert_pixmap(ctx, src,
+				fz_default_gray(ctx, NULL), NULL, NULL, p, 0);
+		}
+		END_TIME("clone");
+		START_TIME();
+		for (i = 0; i < n; i++)
+			gauss5x5(ctx, grey);
+		END_TIME("gauss grey");
+#ifdef WARP_DEBUG
+		fz_save_pixmap_as_png(ctx, grey, "gauss.png");
+#endif
+#ifdef DO_HISTEQ
+		START_TIME();
+		histeq(grey);
+		END_TIME("histeq grey");
+#ifdef WARP_DEBUG
+		fz_save_pixmap_as_png(ctx, grey, "hist.png");
+#endif
+#endif
+		START_TIME();
+		grad(ctx, grey, pregrad(ctx, grey));
+		END_TIME("grad grey");
+#ifdef WARP_DEBUG
+		fz_save_pixmap_as_png(ctx, grey, "grad.png");
+#endif
+
+#ifdef DETECT_DOCUMENT_RGB
+		if (src->n == 3 && src->alpha == 0)
+		{
+			START_TIME();
+			r = fz_new_pixmap(ctx, NULL, src->w, src->h, NULL, 0);
+			g = fz_new_pixmap(ctx, NULL, src->w, src->h, NULL, 0);
+			b = fz_new_pixmap(ctx, NULL, src->w, src->h, NULL, 0);
+			gauss5x5_3(ctx, r, src, 0);
+			for (i = 1; i < n; i++)
+				gauss5x5(ctx, r);
+			gauss5x5_3(ctx, g, src, 1);
+			for (i = 1; i < n; i++)
+				gauss5x5(ctx, g);
+			gauss5x5_3(ctx, b, src, 2);
+			for (i = 1; i < n; i++)
+				gauss5x5(ctx, b);
+#ifdef DO_HISTEQ
+			histeq(r);
+			histeq(g);
+			histeq(b);
+#endif
+			grad(ctx, r);
+			grad(ctx, g);
+			grad(ctx, b);
+#ifdef WARP_DEBUG
+			fz_save_pixmap_as_png(ctx, r, "r.png");
+			fz_save_pixmap_as_png(ctx, g, "g.png");
+			fz_save_pixmap_as_png(ctx, b, "b.png");
+#endif
+			combine_grad(grey, r, g, b);
+			END_TIME("rgb");
+			fz_drop_pixmap(ctx, r);
+			fz_drop_pixmap(ctx, g);
+			fz_drop_pixmap(ctx, b);
+			r = NULL;
+			g = NULL;
+			b = NULL;
+		}
+#ifdef WARP_DEBUG
+		fz_save_pixmap_as_png(ctx, grey, "combined.png");
+#endif
+#endif
+		processed = fz_new_pixmap(ctx, fz_device_gray(ctx), grey->w, grey->h, NULL, 0);
+
+		/* Do multiple passes if required, dropping the thresholds for
+		 * strong/weak pixels each time until we find a suitable result. */
+		for (i = 0; i < 6; i++)
+		{
+			START_TIME();
+			nonmax(ctx, processed, grey, i);
+			END_TIME("nonmax");
+#ifdef WARP_DEBUG
+			fz_save_pixmap_as_png(ctx, processed, "nonmax.png");
+#endif
+			START_TIME();
+			hysteresis(ctx, processed);
+			END_TIME("hysteresis");
+#ifdef WARP_DEBUG
+			fz_save_pixmap_as_png(ctx, processed, "hysteresis.png");
+#endif
+			START_TIME();
+			found = make_hough(ctx, processed, points);
+			END_TIME("total hough");
+			END_TIME("Total time");
+			DUMP_TIMES();
+#ifdef WARP_DEBUG
+			clean(ctx, processed);
+			fz_save_pixmap_as_png(ctx, processed, "out.png");
+#endif
+			if (found)
+				break;
+		}
+	}
+	fz_always(ctx)
+	{
+		fz_drop_pixmap(ctx, src);
+#ifdef DETECT_DOCUMENT_RGB
+		fz_drop_pixmap(ctx, r);
+		fz_drop_pixmap(ctx, g);
+		fz_drop_pixmap(ctx, b);
+#endif
+		fz_drop_pixmap(ctx, grey);
+		fz_drop_pixmap(ctx, processed);
+	}
+	fz_catch(ctx)
+	{
+		fz_rethrow(ctx);
+	}
+
+	if (found && points)
+	{
+		float f = (1<<l2factor);
+		float r = f/2;
+		points->ul.x = points->ul.x *f + r;
+		points->ul.y = points->ul.y *f + r;
+		points->ur.x = points->ur.x *f + r;
+		points->ur.y = points->ur.y *f + r;
+		points->ll.x = points->ll.x *f + r;
+		points->ll.y = points->ll.y *f + r;
+		points->lr.x = points->lr.x *f + r;
+		points->lr.y = points->lr.y *f + r;
+	}
+
+	return found;
+}