Mercurial > hgrepos > Python2 > PyMuPDF
diff mupdf-source/source/fitz/warp.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mupdf-source/source/fitz/warp.c Mon Sep 15 11:43:07 2025 +0200 @@ -0,0 +1,2313 @@ +// Copyright (C) 2004-2025 Artifex Software, Inc. +// +// This file is part of MuPDF. +// +// MuPDF is free software: you can redistribute it and/or modify it under the +// terms of the GNU Affero General Public License as published by the Free +// Software Foundation, either version 3 of the License, or (at your option) +// any later version. +// +// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more +// details. +// +// You should have received a copy of the GNU Affero General Public License +// along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html> +// +// Alternative licensing terms are available from the licensor. +// For commercial licensing, see <https://www.artifex.com/> or contact +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. + +#include "mupdf/fitz.h" + +#include "pixmap-imp.h" + +#include <string.h> + +/* Define TIMINGS to get timing information dumped to stdout. */ +#undef TIMINGS + +/* Define WARP_DEBUG to get debugging output (and PNGs saved). Note + * that this will affect timings! */ +#undef WARP_DEBUG + +/* Define WARP_SPEW_DEBUG to get even more debug output (and PNGs). */ +#undef WARP_SPEW_DEBUG + +/* One reference suggested doing histogram equalisation. */ +#define DO_HISTEQ + +/* Define DETECT_DOCUMENT_RGB, and edge detection on RGB documents will + * look for edges in just the R,G,B planes as well as the grey plane. */ +#undef DETECT_DOCUMENT_RGB + +#undef SLOW_INTERPOLATION +#undef SLOW_WARPING + +#ifdef WARP_DEBUG +static void +debug_printf(fz_context *ctx, const char *fmt, ...) +{ + char text[1024]; + va_list list; + va_start(list, fmt); + vsnprintf(text, sizeof(text), fmt, list); + va_end(list); + +#ifdef _WIN32 + fz_write_string(ctx, fz_stdods(ctx), text); +#endif + fputs(text, stderr); +} +#else +#define debug_printf(CTX, FMT, ...) do {} while (0) +#endif + +#if defined(TIMINGS) && defined(_WIN32) + +#include "windows.h" + +#define MAX_TIMERS 256 +static struct { + int started; + int stackptr; + int stack[MAX_TIMERS]; + const char *name[MAX_TIMERS]; + LARGE_INTEGER time[MAX_TIMERS]; +} timer; +#define START_TIME() \ + do {\ + int i = timer.stack[timer.stackptr++] = timer.started++;\ + QueryPerformanceCounter(&timer.time[i]);\ + } while (0) +#define END_TIME(NAME) \ + do {\ + LARGE_INTEGER end;\ + int i;\ + QueryPerformanceCounter(&end);\ + i = timer.stack[--timer.stackptr];\ + timer.time[i].QuadPart = end.QuadPart - timer.time[i].QuadPart;\ + timer.name[i] = NAME;\ + } while (0) +#define DUMP_TIMES() \ + do {\ + int i;\ + LARGE_INTEGER freq;\ + QueryPerformanceFrequency(&freq);\ + float f = freq.QuadPart;\ + for (i = 0; i < timer.started; i++)\ + debug_printf(ctx, "%s: %g\n", timer.name[i], (int)1000*timer.time[i].QuadPart/f);\ + } while (0) +#else +#define START_TIME() do {} while(0) +#define END_TIME(NAME) do {} while(0) +#define DUMP_TIMES() do {} while(0) +#endif + +typedef struct +{ + int x; + int y; +} fz_ipoint; + +typedef struct +{ + int i; + int f; + int di; + int df; +} fz_bresenham_core; + +typedef struct +{ + fz_bresenham_core c; + int n; +} fz_bresenham; + +typedef struct +{ + fz_bresenham_core x; + fz_bresenham_core y; + int n; +} fz_ipoint_bresenham; + +typedef struct +{ + fz_bresenham_core sx; + fz_bresenham_core sy; + fz_bresenham_core ex; + fz_bresenham_core ey; + int n; +} fz_ipoint2_bresenham; + +static inline fz_bresenham_core +init_bresenham_core(int start, int end, int n) +{ + fz_bresenham_core b; + int delta = end-start; + + b.di = n == 0 ? 0 : delta/n; + b.df = delta - n*b.di; /* 0 <= b.df < n */ + if (b.df < 0) + b.di--, b.df += n; + /* Starts with bi.i = start, bi.f = n, and then does a half + * step. */ + b.i = start + (b.di>>1); + b.f = n - (((b.di & 1) * n + b.df)>>1); + + return b; +} + +#ifdef CURRENTLY_UNUSED +static inline fz_bresenham +init_bresenham(int start, int end, int n) +{ + fz_bresenham b; + + b.c = init_bresenham_core(start, end, n); + b.n = n; + + return b; +} + +static inline void +step(fz_bresenham *b) +{ + step_core(&b->c, b->n); +} +#endif + +static inline void +step_core(fz_bresenham_core *b, int n) +{ + b->i += b->di; + b->f -= b->df; + if (b->f <= 0) + { + b->f += n; + b->i++; + } +} + +static inline fz_ipoint_bresenham +init_ip_bresenham(fz_ipoint start, fz_ipoint end, int n) +{ + fz_ipoint_bresenham b; + + b.x = init_bresenham_core(start.x, end.x, n); + b.y = init_bresenham_core(start.y, end.y, n); + b.n = n; + + return b; +} + +static inline void +step_ip(fz_ipoint_bresenham *b) +{ + step_core(&b->x, b->n); + step_core(&b->y, b->n); +} + +static inline fz_ipoint +current_ip(const fz_ipoint_bresenham *b) +{ + fz_ipoint ip; + + ip.x = b->x.i; + ip.y = b->y.i; + + return ip; +} + +static inline fz_ipoint2_bresenham +init_ip2_bresenham(fz_ipoint ss, fz_ipoint se, fz_ipoint es, fz_ipoint ee, int n) +{ + fz_ipoint2_bresenham b; + + b.sx = init_bresenham_core(ss.x, se.x, n); + b.sy = init_bresenham_core(ss.y, se.y, n); + b.ex = init_bresenham_core(es.x, ee.x, n); + b.ey = init_bresenham_core(es.y, ee.y, n); + b.n = n; + + return b; +} + +static inline void +step_ip2(fz_ipoint2_bresenham *b) +{ + step_core(&b->sx, b->n); + step_core(&b->sy, b->n); + step_core(&b->ex, b->n); + step_core(&b->ey, b->n); +} + +static inline fz_ipoint +start_ip(const fz_ipoint2_bresenham *b) +{ + fz_ipoint ip; + + ip.x = b->sx.i; + ip.y = b->sy.i; + + return ip; +} + +static fz_forceinline fz_ipoint +end_ip(const fz_ipoint2_bresenham *b) +{ + fz_ipoint ip; + + ip.x = b->ex.i; + ip.y = b->ey.i; + + return ip; +} + +static void +interp_n(unsigned char *d, const unsigned char *s0, + const unsigned char *s1, int f, int n) +{ + do + { + int a = *s0++; + int b = *s1++ - a; + *d++ = ((a<<8) + b*f + 128)>>8; + } + while (--n); +} + +static void +interp2_n(unsigned char *d, const unsigned char *s0, + const unsigned char *s1, const unsigned char *s2, + int f0, int f1, int n) +{ + do + { + int a = *s0++; + int b = *s1++ - a; + int c; + a = (a<<8) + b*f0; + c = (*s2++<<8) - a; + *d++ = ((a<<8) + c*f1 + (1<<15))>>16; + } + while (--n); +} + +static inline void +copy_pixel(unsigned char *d, const fz_pixmap *src, fz_ipoint p) +{ + int u = p.x>>8; + int v = p.y>>8; + int fu = p.x & 255; + int fv = p.y & 255; + int n = src->n; + const unsigned char *s; + ptrdiff_t stride = src->stride; + + if (u < 0) + u = 0, fu = 0; + else if (u >= src->w-1) + u = src->w-1, fu = 0; + + if (v < 0) + v = 0, fv = 0; + else if (v >= src->h-1) + v = src->h-1, fv = 0; + + s = &src->samples[u * n + v * stride]; + +#ifdef SLOW_INTERPOLATION + { + int i; + + for (i = 0; i < n; i++) + { + int v0 = s[0]; + int v1 = s[n]; + int v2 = s[stride]; + int v3 = s[stride+n]; + int v01, v23, v; + v01 = (v0<<8) + (v1-v0)*fu; + v23 = (v2<<8) + (v3-v2)*fu; + v = (v01<<8) + (v23-v01)*fv; + assert(v >= 0 && v < (1<<24)-32768); + *d++ = (v + 32768)>>16; + s++; + } + return; + } +#else + if (fu == 0) + { + if (fv == 0) + { + /* Copy single pixel */ + memcpy(d, s, n); + return; + } + /* interpolate y pixels */ + interp_n(d, s, s + stride, fv, n); + return; + } + if (fv == 0) + { + /* interpolate x pixels */ + interp_n(d, s, s+n, fu, n); + return; + } + + if (fu <= fv) + { + /* Top half of the trapezoid. */ + interp2_n(d, s, s+stride, s+stride+n, fv, fu, n); + } + else + { + /* Bottom half of the trapezoid. */ + interp2_n(d, s, s+n, s+stride+n, fu, fv, n); + } +#endif +} + +static void +warp_core(unsigned char *d, int n, int width, int height, int stride, + const fz_ipoint corner[4], const fz_pixmap *src) +{ + fz_ipoint2_bresenham row_bres; + int x; + + /* We have a bresenham pair for how to move the start + * and end of the row each y step. */ + row_bres = init_ip2_bresenham(corner[0], corner[3], + corner[1], corner[2], height); + stride -= width * n; + +#ifdef SLOW_WARPING + { + int h; + for (h = 0 ; h < height ; h++) + { + int sx = corner[0].x + (corner[3].x - corner[0].x)*h/height; + int sy = corner[0].y + (corner[3].y - corner[0].y)*h/height; + int ex = corner[1].x + (corner[2].x - corner[1].x)*h/height; + int ey = corner[1].y + (corner[2].y - corner[1].y)*h/height; + for (x = 0; x < width; x++) + { + fz_ipoint p; + p.x = sx + (ex-sx)*x/width; + p.y = sy + (ey-sy)*x/width; + copy_pixel(d, src, p); + d += n; + } + d += stride; + } + } +#else + for (; height > 0; height--) + { + /* We have a bresenham for how to move the + * current pixel across the row. */ + fz_ipoint_bresenham pix_bres; + pix_bres = init_ip_bresenham(start_ip(&row_bres), + end_ip(&row_bres), + width); + for (x = width; x > 0; x--) + { + /* Copy pixel */ + copy_pixel(d, src, current_ip(&pix_bres)); + d += n; + step_ip(&pix_bres); + } + + /* step to the next line. */ + step_ip2(&row_bres); + d += stride; + } +#endif +} + +/* + points are clockwise from NW. + + This performs simple affine warping. +*/ +fz_pixmap * +fz_warp_pixmap(fz_context *ctx, fz_pixmap *src, fz_quad points, int width, int height) +{ + fz_pixmap *dst; + + if (src == NULL) + return NULL; + + if (width >= (1<<24) || width < 0 || height >= (1<<24) || height < 0) + fz_throw(ctx, FZ_ERROR_LIMIT, "Bad width/height"); + + dst = fz_new_pixmap(ctx, src->colorspace, width, height, + src->seps, src->alpha); + dst->xres = src->xres; + dst->yres = src->yres; + + fz_try(ctx) + { + unsigned char *d = dst->samples; + int n = dst->n; + fz_ipoint corner[4]; + + /* Find the corner texture positions as fixed point */ + corner[0].x = (int)(points.ul.x * 256 + 128); + corner[0].y = (int)(points.ul.y * 256 + 128); + corner[1].x = (int)(points.ur.x * 256 + 128); + corner[1].y = (int)(points.ur.y * 256 + 128); + corner[2].x = (int)(points.ll.x * 256 + 128); + corner[2].y = (int)(points.ll.y * 256 + 128); + corner[3].x = (int)(points.lr.x * 256 + 128); + corner[3].y = (int)(points.lr.y * 256 + 128); + + warp_core(d, n, width, height, width * n, corner, src); + } + fz_catch(ctx) + { + fz_drop_pixmap(ctx, dst); + fz_rethrow(ctx); + } + + return dst; +} + +static float +dist(fz_point a, fz_point b) +{ + float x = a.x-b.x; + float y = a.y-b.y; + + return sqrtf(x*x+y*y); +} + +/* Again, affine warping, but this time where the destination width/height + * are chosen automatically. */ +fz_pixmap * +fz_autowarp_pixmap(fz_context *ctx, fz_pixmap *src, fz_quad points) +{ + float w0 = dist(points.ur, points.ul); + float w1 = dist(points.ll, points.lr); + float h0 = dist(points.lr, points.ul); + float h1 = dist(points.ll, points.ur); + int w = (w0+w1+0.5)/2; + int h = (h0+h1+0.5)/2; + + return fz_warp_pixmap(ctx, src, points, w, h); +} + +/* + Corner detection: We shall steal the algorithm from the Dropbox + Document Scanner, as described here: + + https://dropbox.tech/machine-learning/fast-and-accurate-document-detection-for-scanning + + A good reference to the steps involved in the canny edge + detection process can be found here: + + https://towardsdatascience.com/canny-edge-detection-step-by-step-in-python-computer-vision-b49c3a2d8123 + + This involves: + * Canny Edge Detection + * * Greyscale conversion + * * Noise reduction + * * Gradient Calculation + * * Non-maximum suppression + * * Double threshold + * * Edge tracking by Hysteresis + * Hough Transform to fix possible edges + * Computing intersections and scoring quads + + We modify the gradient calculation with a simple scale to ensure we fill the range. +*/ + +#ifdef DO_HISTEQ +static void +histeq(fz_pixmap *im) +{ + uint32_t count[256]; + unsigned char tbl[256]; + int i; + unsigned char *s = im->samples; + int n = im->w*im->h; + int sigma; + int den; + + memset(count, 0, sizeof(count)); + for (i = n; i > 0; i--) + count[*s++]++; + + /* Rather than doing a pure histogram equalisation, we bend + * the table so that 0 always stays as 0, and 255 always stays + * as 255. */ + sigma = (count[0]>>1) - count[0]; + den = sigma + n - (count[255]>>1); + for (i = 0; i < 256; i++) + { + int v = count[i]; + sigma += v - (v>>1); + tbl[i] = (int)(255.0f * sigma / den + 0.5f); + sigma += (v>>1); + } + + s = im->samples; + for (i = n; i > 0; i--) + *s = tbl[*s], s++; +} +#endif + +/* The first functions apply a 5x5 gauss filter to blur the greyscale + * image and remove noise. The gauss filter is a convolution with + * weights: + * + * 2 4 5 4 2 + * 4 9 12 9 4 + * 5 12 15 12 5 + * 4 9 12 9 4 + * 2 4 5 4 2 + * + * As you can see, there are 3 distinct lines of weights within that + * matrix. We walk each row of source pixels once, calculating each of + * those convolutions, storing the result in a rolling buffer of 5x3xw + * entries. Then we sum columns from the buffer to give us the results. + */ + +/* Read across a row of pixels of width w, from s, performing + * the horizontal portion of the convolutions, storing the results + * in 3 lines of a buffer, starting at d, &d[w], &d[2*w]. + * + * d[0*w] uses weights (5 12 15 12 5) + * d[1*w] uses weights (4 9 12 9 4) + * d[2*w] uses weights (2 4 5 4 2) + */ +static void +gauss5row(uint16_t *d, const unsigned char *s, int w) +{ + int i; + int s0 = s[0]; + int s1 = s[1]; + int s2 = s[2]; + int s3 = s[3]; + + s += 4; + + d[2*w] = 11*s0 + 4*s1 + 2*s2; + d[1*w] = 25*s0 + 9*s1 + 4*s2; + *d++ = 32*s0 + 12*s1 + 5*s2; + + d[2*w] = 6*s0 + 5*s1 + 4*s2 + 2*s3; + d[1*w] = 13*s0 + 12*s1 + 9*s2 + 4*s3; + *d++ = 17*s0 + 15*s1 + 12*s2 + 5*s3; + + for (i = w - 4; i > 0; i--) + { + int d2 = 2*s0 + 4*s1 + 5*s2 + 4*s3; + int d1 = 4*s0 + 9*s1 + 12*s2 + 9*s3; + int d0 = 5*s0 + 12*s1 + 15*s2 + 12*s3; + s0 = s1; + s1 = s2; + s2 = s3; + s3 = *s++; + d[2*w] = d2 + 2*s3; + d[1*w] = d1 + 4*s3; + *d++ = d0 + 5*s3; + } + + d[2*w] = 2*s0 + 4*s1 + 5*s2 + 6*s3; + d[1*w] = 4*s0 + 9*s1 + 12*s2 + 13*s3; + *d++ = 5*s0 + 12*s1 + 15*s2 + 17*s3; + + d[2*w] = 2*s1 + 4*s2 + 11*s3; + d[1*w] = 4*s1 + 9*s2 + 25*s3; + *d = 5*s1 + 12*s2 + 32*s3; +} + +/* Calculate the results for row y of the image, of width w, by + * summing results from the temporary buffer s, and writing into the + * original pixmap at d. */ +static void +gauss5col(unsigned char *d, const uint16_t *s, int y, int w) +{ + const uint16_t *s0, *s1, *s2, *s3, *s4; + y *= 3; + s0 = &s[((y+ 9+2)%15)*w]; + s1 = &s[((y+12+1)%15)*w]; + s2 = &s[( y %15)*w]; + s3 = &s[((y+ 3+1)%15)*w]; + s4 = &s[((y+ 6+2)%15)*w]; + + for (; w > 0; w--) + *d++ = (*s0++ + *s1++ + *s2++ + *s3++ + *s4++ + 79)/159; +} + +static void +gauss5x5(fz_context *ctx, fz_pixmap *src) +{ + int w = src->w; + int h = src->h; + uint16_t *buf; + unsigned char *s = src->samples; + int y; + + if (w < 5 || h < 5) + fz_throw(ctx, FZ_ERROR_ARGUMENT, "Pixmap too small"); + + buf = fz_malloc(ctx, w*3*5 * sizeof(uint16_t)); + + gauss5row(&buf[0*3*w], &s[0*w], w); + gauss5row(&buf[1*3*w], &s[1*w], w); + memcpy(&buf[3*3*w], buf, w*3 * sizeof(uint16_t)); /* row -2 */ + memcpy(&buf[4*3*w], buf, w*3 * sizeof(uint16_t)); /* row -1 */ + for (y = 2; y < h; y++) + { + gauss5row(&buf[(y%5)*3*w], &s[2*w], w); + + gauss5col(s, buf, y-2, w); + s += w; + } + for (; y < h+2; y++) + { + memcpy(&buf[(y%5)*3*w], &buf[((y+4)%5)*3*w], 3*w*sizeof(uint16_t)); + gauss5col(s, buf, y-2, w); + s += w; + } + + fz_free(ctx, buf); +} + +#ifdef DETECT_DOCUMENT_RGB +/* Variant of the above that works on a single plane from rgb data */ +static void +gauss5row3(uint16_t *d, const unsigned char *s, int w) +{ + int i; + int s0 = s[0]; + int s1 = s[3]; + int s2 = s[6]; + int s3 = s[9]; + + s += 4*3; + + d[2*w] = 11*s0 + 4*s1 + 2*s2; + d[1*w] = 25*s0 + 9*s1 + 4*s2; + *d++ = 32*s0 + 12*s1 + 5*s2; + + d[2*w] = 6*s0 + 5*s1 + 4*s2 + 2*s3; + d[1*w] = 13*s0 + 12*s1 + 9*s2 + 4*s3; + *d++ = 17*s0 + 15*s1 + 12*s2 + 5*s3; + + for (i = w - 4; i > 0; i--) + { + int d2 = 2*s0 + 4*s1 + 5*s2 + 4*s3; + int d1 = 4*s0 + 9*s1 + 12*s2 + 9*s3; + int d0 = 5*s0 + 12*s1 + 15*s2 + 12*s3; + s0 = s1; + s1 = s2; + s2 = s3; + s3 = *s; s += 3; + d[2*w] = d2 + 2*s3; + d[1*w] = d1 + 4*s3; + *d++ = d0 + 5*s3; + } + + d[2*w] = 2*s0 + 4*s1 + 5*s2 + 6*s3; + d[1*w] = 4*s0 + 9*s1 + 12*s2 + 13*s3; + *d++ = 5*s0 + 12*s1 + 15*s2 + 17*s3; + + d[2*w] = 2*s1 + 4*s2 + 11*s3; + d[1*w] = 4*s1 + 9*s2 + 25*s3; + *d = 5*s1 + 12*s2 + 32*s3; +} + +static void +gauss5x5_3(fz_context *ctx, fz_pixmap *dst, const fz_pixmap *src, int comp) +{ + int w = src->w; + int h = src->h; + uint16_t *buf; + unsigned char *s = src->samples + comp; + unsigned char *d = dst->samples; + int y; + + if (w < 5 || h < 5) + fz_throw(ctx, FZ_ERROR_ARGUMENT, "Pixmap too small"); + + buf = fz_malloc(ctx, w*3*5 * sizeof(uint16_t)); + + gauss5row3(&buf[0*3*w], s, w); + s += w*3; + gauss5row3(&buf[1*3*w], s, w); + s += w*3; + memcpy(&buf[3*3*w], buf, w*3 * sizeof(uint16_t)); /* row -2 */ + memcpy(&buf[4*3*w], buf, w*3 * sizeof(uint16_t)); /* row -1 */ + for (y = 2; y < h; y++) + { + gauss5row3(&buf[((y*3)%15)*w], s, w); + + gauss5col(d, buf, y-2, w); + s += w*3; + d += w; + } + for (; y < h+2; y++) + { + memcpy(&buf[(y%5)*3*w], &buf[((y+4)%5)*3*w], 3*w*sizeof(uint16_t)); + gauss5col(d, buf, y-2, w); + d += w; + } + + fz_free(ctx, buf); +} +#endif + +/* The next set of functions perform the gradient calculation. + * We convolve with Sobel kernels Kx and Ky respectively: + * + * Kx = -1 0 1 Ky = 1 2 1 + * -2 0 2 0 0 0 + * -1 0 1 -1 -2 -1 + * + * We do this by using a rolling temporary buffer of int16_t's to hold + * 3 pairs of lines of weights scaled by (1 0 1) and (1 2 1). + * + * We can then sum entries from those lines to calculate kx and ky for + * each pixel in the image. + * + * Then by examining the values of x and y, we can figure out the + * "direction" of the edge (horizontal, vertical, or either diagonal), + * and the magnitude of the difference across that edge. These get + * encoded back into the original image storage using the 2 bottom bits + * for direction, and the top 6 bits for magnitude. + */ + +static void +pregradrow(int16_t *d, const unsigned char *s, int w) +{ + int i; + unsigned char s0 = *s++; + unsigned char s1 = *s++; + + d[w] = 3*s0 + s1; + *d++ = s1 - s0; + for (i = w-2; i > 0; i--) + { + int s2 = *s++; + d[w] = s0 + 2*s1 + s2; + *d++ = s2 - s0; + s0 = s1; + s1 = s2; + } + d[w] = s0 + 3*s1; + *d = s1 - s0; +} + +static void +pregradcol(unsigned char *d, const int16_t *buf, int y, int w, uint32_t *max) +{ + const int16_t *s0 = &buf[((y+2)%3)*w*2]; + const int16_t *s1 = &buf[((y )%3)*w*2]; + const int16_t *s2 = &buf[((y+1)%3)*w*2]; + int i; + + for (i = w; i > 0; i--) + { + uint32_t ax, ay, mag; + int x; + y = s0[w] - s2[w]; + x = *s0++ + 2 * *s1++ + *s2++; + ax = x >= 0 ? x : -x; + ay = y >= 0 ? y : -y; + /* x and y are now both in the range -1020..1020 */ + /* Now we calculate slope and gradient. + * angle = atan2(y, x); + * intensity = hypot(x, y); + * But wait, we don't need that accuracy. We only need + * to distinguish 4 directions... + * + * -22.5 < angle <= 22.5 = 0 + * 22.5 < angle <= 67.5 = 1 + * 67.5 < angle <= 112.5 = 2 + * 115.5 < angle <= 157.5 = 3 + * (and the reflections) + * + * 7 0 1 (x positive right, y positive downwards) + * 6 * 2 + * 5 4 3 + * + * tan(22.5)*65536 = 27146. + * And for magnitude, we consider the magnitude just + * along the 8 directions we've picked. So + * 65536/SQR(2) = 46341. + * We want magnitude in the 0...63 range. + */ + if (ax<<16 < 27146*ay) + { + /* angle = 0 */ + mag = ay<<16; /* 0 to 1020<<16 */ + } + else if (ay<<16 < ax*27146) + { + /* angle = 2 */ + mag = ax<<16; /* 0 to 1020<<16 */ + } + else + { + /* angle = 1 or 3 */ + mag = (46341*(ax+ay)); + } + if (mag > *max) + *max = mag; + } +} + +static uint32_t +pregrad(fz_context *ctx, fz_pixmap *src) +{ + int w = src->w; + int h = src->h; + unsigned char *s = src->samples; + int16_t *buf = fz_malloc(ctx, w*2*3*sizeof(int16_t)); + int y; + uint32_t max = 0; + + pregradrow(buf, s, w); /* Line 0 */ + memcpy(&buf[w*2*2], buf, w*2*sizeof(int16_t)); /* Line 1 */ + s += w; + for (y = 1; y < h-1; y++) + { + pregradrow(&buf[(y%3)*w*2], s, w); + pregradcol(s-w, buf, y-1, w, &max); + s += w; + } + memcpy(&buf[((y+1)%3)*w*2], &buf[(y%3)*w*2], w*2*sizeof(int16_t)); /* Line h */ + pregradcol(s-w, buf, h-2, w, &max); + pregradcol(s, buf, h-1, w, &max); + + fz_free(ctx, buf); + + if (max == 0) + return 1; + else + return 0x7FFFFFFFU/max; +} + + +static void +gradrow(int16_t *d, const unsigned char *s, int w) +{ + int i; + unsigned char s0 = *s++; + unsigned char s1 = *s++; + + d[w] = 3*s0 + s1; + *d++ = s1 - s0; + for (i = w-2; i > 0; i--) + { + int s2 = *s++; + d[w] = s0 + 2*s1 + s2; + *d++ = s2 - s0; + s0 = s1; + s1 = s2; + } + d[w] = s0 + 3*s1; + *d = s1 - s0; +} + +static void +gradcol(unsigned char *d, const int16_t *buf, int y, int w, int scale) +{ + const int16_t *s0 = &buf[((y+2)%3)*w*2]; + const int16_t *s1 = &buf[((y )%3)*w*2]; + const int16_t *s2 = &buf[((y+1)%3)*w*2]; + int i; + + for (i = w; i > 0; i--) + { + uint32_t ax, ay, mag, scaled; + int angle, x; + y = s0[w] - s2[w]; + x = *s0++ + 2 * *s1++ + *s2++; + ax = x >= 0 ? x : -x; + ay = y >= 0 ? y : -y; + /* x and y are now both in the range -1020..1020 */ + /* Now we calculate slope and gradient. + * angle = atan2(y, x); + * intensity = hypot(x, y); + * But wait, we don't need that accuracy. We only need + * to distinguish 4 directions... + * + * -22.5 < angle <= 22.5 = 0 + * 22.5 < angle <= 67.5 = 1 + * 67.5 < angle <= 112.5 = 2 + * 115.5 < angle <= 157.5 = 3 + * (and the reflections) + * + * 7 0 1 (x positive right, y positive downwards) + * 6 * 2 + * 5 4 3 + * + * tan(22.5)*65536 = 27146. + * And for magnitude, we consider the magnitude just + * along the 8 directions we've picked. So + * 65536/SQR(2) = 46341. + * We want magnitude in the 0...63 range. + */ + if (ax<<16 < 27146*ay) + { + angle = 0; + mag = ay<<16; /* 0 to 1020<<16 */ + } + else if (ay<<16 < ax*27146) + { + angle = 2; + mag = ax<<16; /* 0 to 1020<<16 */ + } + else + { + /* 1 or 3 */ + angle = (x^y) >= 0 ? 3 : 1; + mag = (46341*(ax+ay)); + } + scaled = (mag * scale)>>25; + assert(scaled >= 0 && scaled <= 63); + *d++ = (scaled<<2) | angle; + } +} + +static void +grad(fz_context *ctx, fz_pixmap *src, uint32_t scale) +{ + int w = src->w; + int h = src->h; + unsigned char *s = src->samples; + int16_t *buf = fz_malloc(ctx, w*2*3*sizeof(int16_t)); + int y; + + gradrow(buf, s, w); /* Line 0 */ + memcpy(&buf[w*2*2], buf, w*2*sizeof(int16_t)); /* Line 1 */ + s += w; + for (y = 1; y < h-1; y++) + { + gradrow(&buf[(y%3)*w*2], s, w); + gradcol(s-w, buf, y-1, w, scale); + s += w; + } + memcpy(&buf[((y+1)%3)*w*2], &buf[(y%3)*w*2], w*2*sizeof(int16_t)); /* Line h */ + gradcol(s-w, buf, h-2, w, scale); + gradcol(s, buf, h-1, w, scale); + + fz_free(ctx, buf); +} + +#ifdef DETECT_DOCUMENT_RGB +static void +combine_grad(fz_pixmap *grey, const fz_pixmap *r, const fz_pixmap *g, const fz_pixmap *b) +{ + int n; + unsigned char *sd = grey->samples; + const unsigned char *sr = r->samples; + const unsigned char *sg = g->samples; + const unsigned char *sb = b->samples; + + for (n = g->w * g->h; n > 0; n--) + { + unsigned char vg = *sg++; + unsigned char vr = *sr++; + unsigned char vb = *sb++; + unsigned char vd = *sd++; + if (vr > vg) + vg = vr; + if (vb > vg) + vg = vb; + if (vg > vd) + sd[-1] = vg; + } +} +#endif + +/* Next, we perform Non-Maximum Suppression and Double Thresholding, + * both in the same phase. + * + * We walk the image, looking at the magnitude of the edges. Edges below + * the 'weak' threshold are discarded. Otherwise, neighbouring pixels in + * the direction of the edge are considered; if other pixels are stronger + * then this pixel is discarded. If not, we classify ourself as either + * 'strong' or 'weak'. + */ +#define WEAK_EDGE 64 +#define STRONG_EDGE 128 +static void +nonmax(fz_context *ctx, fz_pixmap *dst, const fz_pixmap *src, int pass) +{ + int w = src->w; + int h = src->h; + const unsigned char *s0 = src->samples; + const unsigned char *s1 = s0; + const unsigned char *s2 = s0+w; + unsigned char *d = dst->samples; + int x, y; + /* thresholds are in the 0 to 63 range. + * WEAK is typically 0.1ish, STRONG 0.3ish + */ + int weak = 6 - pass; + int strong = 12 - pass*2; + + /* On entry, pixels have the angle in the bottom 2 bits and the magnitude in the rest. */ + /* On exit, strong pixels have bit 7 set, weak pixels have bit 6, others are 0. + * strong and weak pixels have the angle in bits 4 and 5. */ + + for (y = h-1; y >= 0;) + { + int lastmag; + int ang = *s1++; + int mag = ang>>2; + int q, r; + + /* Pixel 0 */ + if (mag <= weak) + { + /* Not even a weak edge. We'll never keep it. */ + *d++ = 0; + s0++; + s2++; + } + else + { + ang &= 3; + switch (ang) + { + default: + case 0: + q = (*s0++)>>2; + r = (*s2++)>>2; + break; + case 1: + q = (*++s0)>>2; + r = 0; + s2++; + break; + case 2: + s0++; + s2++; + q = 0; + r = *s1>>2; + break; + case 3: + q = 0; + s0++; + r = (*++s2)>>2; + break; + } + if (mag < q || mag < r) + { + /* Neighbouring edges are stronger. + * Lose this one. */ + *d++ = 0; + } + else if (mag < strong) + { + /* Weak edge. */ + *d++ = WEAK_EDGE | (ang<<4); + } + else + { + /* Strong edge */ + *d++ = STRONG_EDGE | (ang<<4); + } + } + lastmag = mag; + for (x = w-2; x > 0; x--) + { + ang = *s1++; + mag = ang>>2; + if (mag <= weak) + { + /* Not even a weak edge. We'll never keep it. */ + *d++ = 0; + s0++; + s2++; + } + else + { + ang &= 3; + switch (ang) + { + default: + case 0: + q = (*s0++)>>2; + r = (*s2++)>>2; + break; + case 1: + q = (*++s0)>>2; + r = ((s2++)[-1])>>2; + break; + case 2: + s0++; + s2++; + q = lastmag; + r = *s1>>2; + break; + case 3: + q = ((s0++)[-1])>>2; + r = (*++s2)>>2; + break; + } + if (mag < q || mag < r) + { + /* Neighbouring edges are stronger. + * Lose this one. */ + *d++ = 0; + } + else if (mag < strong) + { + /* Weak edge. */ + *d++ = WEAK_EDGE | (ang<<4); + } + else + { + /* Strong edge */ + *d++ = STRONG_EDGE | (ang<<4); + } + } + lastmag = mag; + } + /* Pixel w-1 */ + ang = *s1++; + mag = ang>>2; + if (mag <= weak) + { + /* Not even a weak edge. We'll never keep it. */ + *d++ = 0; + s0++; + s2++; + lastmag = 0; + } + else + { + ang &= 3; + switch (ang) + { + default: + case 0: + q = (*s0++)>>2; + r = (*s2++)>>2; + break; + case 1: + q = 0; + s0++; + r = ((s2++)[-1])>>2; + break; + case 2: + s0++; + s2++; + q = 0; + r = *s1>>2; + break; + case 3: + q = ((s0++)[-1])>>2; + r = 0; + s2++; + break; + } + if (mag < q || mag < r) + { + /* Neighbouring edges are stronger. + * Lose this one. */ + *d++ = 0; + } + else if (mag < strong) + { + /* Weak edge. */ + *d++ = WEAK_EDGE | (ang<<4); + } + else + { + /* Strong edge */ + *d++ = STRONG_EDGE | (ang<<4); + } + } + s0 = s1-w; + if (--y == 0) + s2 = s1; + } +} + +/* Next, we have the hysteresis phase. Here we bump any 'weak' pixel + * that has at least one strong pixel around it up to being a 'strong' + * pixel. + * + * On entry, strong pixels have bit 7 set, weak pixels have bit 6 set, + * the angle is in bits 4 and 5, everything else is 0. + * + * We walk the rows of the image, and for each pixel we set bit 0 to be + * the logical OR of all bit 7's of itself, and its horizontally + * neighbouring pixels. + * + * Once we have done the first 2 rows like that, we can combine the + * operation of generating the bottom bits for row i, with the + * calculation of row i-1. Any given pixel on row i-1 should be promoted + * to 'strong' if it was 'weak', and if the logical OR of the matching + * pixels in row i, i-1 or i-2 has bit 0 set. + * + * At the end of this process any pixel with bit 7 set is 'strong'. + * Bits 4 and 5 still have the angle. + */ + +static void +hysteresis(fz_context *ctx, fz_pixmap *src) +{ + int w = src->w; + int h = src->h; + unsigned char *s0 = src->samples; + unsigned char *s1 = s0; + unsigned char *s2 = s0; + unsigned char v0, v1, v2, r0, r1, r2; + int x, y; + + /* On entry, strong pixels have bit 7 set, weak pixels have bit 6, others are 0. + * strong and weak pixels have the angle in bits 4 and 5. */ + + /* We make the bottom bit in every pixel be 1 iff the pixel + * or the ones to either side of it are 'strong'. */ + + /* First row - just do the bottom bit. */ + /* Pixel 0 */ + v0 = *s0++; + v1 = *s0++; + s0[-2] = v0 | ((v0 | v1)>>7); + /* Middle pixels */ + for (x = w-2; x > 0; x--) + { + v2 = *s0++; + s0[-2] = v1 | ((v0 | v1 | v2)>>7); + v0 = v1; + v1 = v2; + } + /* Pixel w-1 */ + s0[-1] = v1 | ((v0 | v1)>>7); + assert(s0 == src->samples + w); + + /* Second row - do the "bottom bit" for the second row, and + * perform hysteresis on the top row. */ + /* Pixel 0 */ + v0 = *s0++; + v1 = *s0++; + r0 = v0 | ((v0 | v1)>>7); + s0[-2] = r0; + r1 = *s1++; + if ((r1>>6) & (r0 | r1)) + s1[-1] |= 128; + /* Middle pixels */ + for (x = w-2; x > 0; x--) + { + v2 = *s0++; + r0 = v1 | ((v0 | v1 | v2)>>7); + s0[-2] = r0; + r1 = *s1++; + if ((r1>>6) & (r0 | r1)) + s1[-1] |= 128; + v0 = v1; + v1 = v2; + } + /* Pixel w-1 */ + r0 = v1 | ((v0 | v1)>>7); + s0[-1] = r0; + r1 = *s1++; + if ((r1>>6) & (r0 | r1)) + s1[-1] |= 128; + assert(s0 == s1 + w); + + /* Now we get into the swing of things. We do the "bottom bit" + * for row n+1, and do the actual processing for row n. */ + for (y = h-4; y > 0; y--) + { + /* Pixel 0 */ + v0 = *s0++; + v1 = *s0++; + r0 = v0 | ((v0 | v1)>>7); + s0[-2] = r0; + r1 = *s1++; + r2 = *s2++; + if ((r1>>6) & (r0 | r1 | r2)) + s1[-1] |= 128; + /* Middle pixels */ + for (x = w-2; x > 0; x--) + { + v2 = *s0++; + r0 = v1 | ((v0 | v1 | v2)>>7); + s0[-2] = r0; + r1 = *s1++; + r2 = *s2++; + if ((r1>>6) & (r0 | r1 | r2)) + s1[-1] |= 128; + v0 = v1; + v1 = v2; + } + /* Pixel w-1 */ + r0 = v1 | ((v0 | v1)>>7); + s0[-1] = r0; + r1 = *s1++; + r2 = *s2++; + if ((r1>>6) & (r0 | r1 | r2)) + s1[-1] |= 128; + assert(s0 == s1 + w); + assert(s1 == s2 + w); + } + + /* Final 2 rows together */ + /* Pixel 0 */ + v0 = *s0++; + v1 = *s0++; + r0 = v0 | ((v0 | v1)>>7); + r1 = *s1++; + r2 = *s2++; + if ((r1>>6) & (r0 | r1 | r2)) + s1[-1] |= 128; + if ((r0>>6) & (r0 | r1)) + s0[-2] |= 128; + /* Middle pixels */ + for (x = w-2; x > 0; x--) + { + v2 = *s0++; + r0 = v1 | ((v0 | v1 | v2)>>7); + r1 = *s1++; + r2 = *s2++; + if ((r1>>6) & (r0 | r1 | r2)) + s1[-1] |= 128; + if ((r0>>6) & (r0 | r1)) + s0[-2] |= 128; + v0 = v1; + v1 = v2; + } + /* Pixel w-1 */ + r0 = v1 | ((v0 | v1)>>7); + r1 = *s1; + r2 = *s2; + if ((r1>>6) & (r0 | r1 | r2)) + s1[0] |= 128; + if ((r0>>6) & (r0 | r1)) + s0[-1] |= 128; +} + +#ifdef WARP_DEBUG +/* A simple function to keep just bit 7 of the image. This 'cleans' the + * pixmap so that a grey version can be saved out for visual checking. */ +static void +clean(fz_context *ctx, fz_pixmap *src) +{ + int w = src->w; + int h = src->h; + unsigned char *s = src->samples; + + for (w = w*h; w > 0; w--) + *s = *s & 128, s++; +} +#endif + +#define SINTABLE_SHIFT 14 +static int16_t sintable[270]; +#define costable (&sintable[90]) + +/* We have collected an array of edge data. + * For each pixel, we know whether there is a 'strong' edge + * there, and if so, in which of 4 directions it runs. + * + * We want to convert this into hough space. + * + * The literature describes points in Hough space as having the + * form (r, theta). The value of any given point point in hough + * space is the "strength" of the line described by: + * x.cos(theta) + y.sin(theta) = r + * + * | \ + * | /\ + * | /\/\ + * | r/ \ + * | / \ + * | / \ + * |/theta \ + * -+-------------- + * + * i.e. r is the shortest distance from the origin to the line, + * and theta gives us the angle of that shortest line. + * + * But, we are using angles of theta from the vertical, so we need + * a different formulation: + * + * | \ + * | /\ + * | /\/\ + * | r/ \ t = theta + * | / \ + * |t/ \ + * |/ \ + * -+-------------- + * + * So we're using 90-theta. cos(90-theta) = sin(theta), + * and sin(90-theta) = cos(theta). + * + * So: x.sin(theta) + y.cos(theta) = r (for theta measured + * clockwise from the y axis). + * + * We've been collecting angles according to their position in one + * of 4 octants: + * + * Ang 0 = close to a horizontal edge (-22.5 to 22.5 degrees) + * Ang 1 = close to diagonal edge (top left to bottom right) (22.5 to 67.5 degrees) + * Ang 2 = close to a vertical edge (67.5 to 112.5 degrees) + * Ang 3 = close to diagonal edge (bottom left to top right) (112.5 to 157.5 degrees) + * + * The other 4 octants mirror onto these. + * + * So, for each point in our (x,y) pixmap we whether we have a strong + * pixel or not. If we have such a pixel, then we know that an edge + * passes through that point, and which of those 4 octants it is in. + * + * We therefore consider all the possible angles within that octant, + * and add a 'vote' for each of those lines into our hough transformed + * space. + */ + + +static void +mark_hough(uint32_t *d, int x, int y, int maxlen, int reduce, int ang) +{ + int theta; + int stride = (maxlen*2)>>reduce; + int minang, maxang; + + switch (ang) + { + /* The angles are really 22.5, 67.5 etc, but we are working in ints + * and specifying maxang as the first one greater than the one we + * want to mark. */ + default: + case 0: + /* Vertical boundary. Lines through this boundary + * go horizontally. So the perpendicular to them + * is vertical. */ + minang = 0; maxang = 23; + break; + case 1: + /* NE boundary */ + minang = 23; maxang = 68; + break; + case 2: + /* Horizontal boundary. */ + minang = 68; maxang = 113; + break; + case 3: + /* SE boundary */ + minang = 113; maxang = 158; + break; + case 4: + /* For debugging: */ + minang = 0; maxang = 180; + break; + } + + d += minang * stride; + while (1) + { + for (theta = minang; theta < maxang; theta++) + { + int p = (x*sintable[theta] + y*costable[theta])>>SINTABLE_SHIFT; + int v = (maxlen + p)>>reduce; + + d[v]++; + d += stride; + } + if (ang != 0) + break; + ang = 4; + minang = 158; + d += (minang - maxang) * stride; + maxang = 180; + } +} + +#ifdef WARP_DEBUG +static void +save_hough_debug(fz_context *ctx, uint32_t *hough, int stride) +{ + uint32_t scale; + uint32_t maxval; + uint32_t *p; + int y; + fz_pixmap *dst = fz_new_pixmap(ctx, NULL, stride, 180, NULL, 0); + unsigned char *d = dst->samples; + /* Make the image of the hough space (for debugging) */ + maxval = 1; /* Avoid possible division by zero */ + p = hough; + for (y = 180*stride; y > 0; y--) + { + uint32_t v = *p++; + if (v > maxval) + maxval = v; + } + + scale = 0xFFFFFFFFU/maxval; + p = hough; + for (y = 180*stride; y > 0; y--) + { + *d++ = (scale * *p++)>>24; + } + fz_save_pixmap_as_png(ctx, dst, "hough.png"); + fz_drop_pixmap(ctx, dst); +} +#endif + +static uint32_t *do_hough(fz_context *ctx, const fz_pixmap *src, int stride, int maxlen, int reduce) +{ + int w = src->w; + int h = src->h; + int x, y; + const unsigned char *s = src->samples; + uint32_t *hough = fz_calloc(ctx, sizeof(uint32_t), 180*stride); + + START_TIME(); + /* Construct the hough space representation. */ + for (y = 0; y < h; y++) + { + for (x = 0; x < w; x++) + { + unsigned char v = *s++; + if (v & 128) + mark_hough(hough, x, y, maxlen, reduce, (v>>4)&3); + } + } + END_TIME("Building hough"); + +#ifdef WARP_DEBUG + save_hough_debug(ctx, hough, stride); +#endif + + return hough; +} + +typedef struct +{ + int ang; + int dis; + int strength; + int x0; + int y0; + int x1; + int y1; +} hough_edge_t; + +typedef struct +{ + int e0; + int e1; + int score; + float x; + float y; +} hough_point_t; + +static int +intersect(hough_point_t *pt, hough_edge_t *edges, int e0, int e1) +{ + int x1 = edges[e0].x0; + int y1 = edges[e0].y0; + int x2 = edges[e0].x1; + int y2 = edges[e0].y1; + int x3 = edges[e1].x0; + int y3 = edges[e1].y0; + int x4 = edges[e1].x1; + int y4 = edges[e1].y1; + int d = (x1-x2)*(y3-y4)-(y1-y2)*(x3-x4); + int t = (x1-x3)*(y3-y4)-(y1-y3)*(x3-x4); + float ft; + + if (d < 0) + { + if (t < d || t > 0) + return 0; + } + else + { + if (t < 0 || t > d) + return 0; + } + + pt->e0 = e0; + pt->e1 = e1; + pt->score = edges[e0].strength + edges[e1].strength; + ft = t / (float)d; + pt->x = x1 + ft*(x2-x1); + pt->y = y1 + ft*(y2-y1); + + return 1; +} + +typedef struct +{ + int w; + int h; + int edge[4]; + int point[4]; + int best_score; + int best_edge[4]; + int best_point[4]; +} hough_route_t; + +/* To test convexity, we use the dot product: + * B + * / \ + * / C + * A + * + * The dot product of vectors u and v is: + * dot(u,v) = |u|.|v|.cos(theta) + * Helpfully, cos(theta) > 0 for -90 < theta < 90. + * So if we can form perp(AB) = the vector given by rotating + * AB 90 degrees clockwise, we can then look at the sign of: + * dot(perp(AB),BC) + * If we do that for each corner of the polygon, and the signs + * of the results for all of them are the same, we have convexity. + * + * If any of the dot products are zero, the edges are parallel + * (i.e. the same edge). This should never happen. + * + * If AB = (x,y), then perp(AB) = (y,-x) + */ +static void +score_corner(hough_point_t *points, hough_route_t *route, int p0, int *score, int *sign) +{ + float x0, x1, x2, y0, y1, y2, ux, uy, vx, vy, dot; + int s, len; + float costheta; + + /* If we've already decided this is duff, then bale. */ + if (*score < 0) + return; + + x0 = points[route->point[p0]].x; + y0 = points[route->point[p0]].y; + x1 = points[route->point[(p0+1)&3]].x; + y1 = points[route->point[(p0+1)&3]].y; + x2 = points[route->point[(p0+2)&3]].x; + y2 = points[route->point[(p0+2)&3]].y; + + ux = y1-y0; /* u = Perp(p0 to p1)*/ + uy = x0-x1; + vx = x2-x1; /* v = p1 to p2 */ + vy = y2-y1; + + dot = ux*vx + uy*vy; + if (dot == 0) + { + *score = -1; + return; + } + + s = (dot > 0) ? 1 : -1; + if (*sign == 0) + *sign = s; + else if (*sign != s) + { + *score = -1; + return; + } + + len = sqrt(ux*ux + uy*uy) * sqrt(vx*vx + vy*vy); + costheta = dot / (float)len; + if (costheta < 0) + costheta = -costheta; + if (costheta < 0.7) + { + *score = -1; + return; + } + costheta *= costheta; + + *score += points[route->point[(p0+1)%3]].score * costheta; +} + +/* route points to 8 ints: + * 2*i = edge number + * 2*i+1 = point number + */ +static int +score_route(hough_point_t *points, hough_route_t *route) +{ + int score = 0; + int sign = 0; + + score_corner(points, route, 0, &score, &sign); + score_corner(points, route, 1, &score, &sign); + score_corner(points, route, 2, &score, &sign); + score_corner(points, route, 3, &score, &sign); + + return score; +} + +static float +score_by_area(const hough_point_t *points, const hough_route_t *route) +{ + float double_area_of_quad = + points[route->point[0]].x * points[route->point[1]].y + + points[route->point[1]].x * points[route->point[2]].y + + points[route->point[2]].x * points[route->point[3]].y + + points[route->point[3]].x * points[route->point[0]].y - + points[route->point[1]].x * points[route->point[0]].y - + points[route->point[2]].x * points[route->point[1]].y - + points[route->point[3]].x * points[route->point[2]].y - + points[route->point[0]].x * points[route->point[3]].y; + float double_area = route->w * (float)route->h * 2; + if (double_area_of_quad < 0) + double_area_of_quad = -double_area_of_quad; + /* Anything larger than a quarter of the screen is acceptable. */ + if (double_area_of_quad*4 > double_area) + return 1; + /* Anything smaller than a 16th of the screen is unacceptable in all circumstances. */ + if (double_area_of_quad*16 < double_area) + return 0; + + /* Otherwise, scale the score down by how much it's less than a quarter of the screen. */ + return (double_area_of_quad*4)/double_area; +} + +/* The first n+1 edges of the route are filled in, as are the first n + * points. + * 2*i = edge number + * 2*i+1 = point number + */ +static void +find_route(fz_context *ctx, hough_point_t *points, int num_points, hough_route_t *route, int n) +{ + int i; + + for (i = 0; i < num_points; i++) + { + /* If this point continues our route (e0 == route->edge[n]) + * then the next point in the route might be e1. */ + int e0 = points[i].e0; + int e1 = points[i].e1; + if (e0 == route->edge[n]) + { + } + else if (e1 == route->edge[n]) + { + int t = e0; e0 = e1; e1 = t; + } + else + continue; /* Doesn't fit. Keep searching. */ + + /* If we've found 3 points already, then this is the + * fourth. */ + if (n == 3) + { + int score = 0; + + /* If we haven't looped back to our first edge, + * no good. Keep searching. */ + if (route->edge[0] != e1) + continue; + route->point[3] = i; + + score = score_route(points, route); + + if (score > 0) + score *= score_by_area(points, route); + +#ifdef WARP_DEBUG + debug_printf(ctx, "Found route: (point=%d %d %d %d) (edge %d %d %d %d) score=%d\n", + route->point[0], route->point[1], route->point[2], route->point[3], + route->edge[0], route->edge[1], route->edge[2], route->edge[3], + score); +#endif + + /* We want our route to be convex */ + if (score < 0) + continue; + + /* Score the route */ + if (route->best_score < score) + { + route->best_score = score; + memcpy(route->best_edge, route->edge, sizeof(*route->edge)*4); + memcpy(route->best_point, route->point, sizeof(*route->point)*4); + } + } + else + { + int j; + for (j = 0; j < n && route->edge[j] != e1; j++); + /* If we're about to loop back to any of the + * previous edges, keep searching. */ + if (j != n) + continue; + + /* Possible. Extend the route, and recurse. */ + route->point[n] = i; + route->edge[n+1] = e1; + find_route(ctx, points, num_points, route, n+1); + } + } +} + +#define MAX_EDGES 32 +#define BLOT_ANG 10 +#define BLOT_DIS 10 +static int +make_hough(fz_context *ctx, const fz_pixmap *src, fz_quad *corners) +{ + int w = src->w; + int h = src->h; + int maxlen = (int)(sqrtf(w*w + h*h) + 0.5); + uint32_t *hough; + int x, y; + int reduce; + int stride; + hough_edge_t edge[MAX_EDGES]; + hough_point_t points[MAX_EDGES * MAX_EDGES]; + hough_route_t route; + int num_edges, num_points; + + /* costable could (should) be statically inited. */ + { + int i; + for (i = 0; i < 270; i++) + { + float theta = i*M_PI/180; + sintable[i] = (int16_t)((1<<SINTABLE_SHIFT)*sinf(theta) + 0.5f); + } + } + + /* Figure out a suitable scale for the data. */ + reduce = 0; + while ((maxlen*2>>reduce) > 720 && reduce < 16) + reduce++; + + stride = (maxlen*2)>>reduce; + hough = do_hough(ctx, src, stride, maxlen, reduce); + + /* We want to find the top n edges that aren't too close to + * one another. */ + for (x = 0; x < MAX_EDGES; x++) + { + int ang, dis; + int minang, maxang, mindis, maxdis; + int where = 0; + uint32_t *p = hough; + uint32_t maxval = 0; + for (y = 180*stride; y > 0; y--) + { + uint32_t v = *p++; + if (v > maxval) + maxval = v, where = y; + } + if (where == 0) + break; + where = 180*stride - where; + ang = edge[x].ang = where/stride; + dis = edge[x].dis = where - edge[x].ang*stride; + edge[x].strength = hough[where]; + /* We don't want to find any other maxima that are too + * close to this one, so we 'blot out' stuff around this + * maxima. */ +#ifdef WARP_DEBUG + debug_printf(ctx, "Maxima %d: dist=%d ang=%d strength=%d\n", + x, (dis<<reduce)-maxlen, ang-90, edge[x].strength); +#endif + minang = ang - BLOT_ANG; + if (minang < 0) + minang = 0; + maxang = ang + BLOT_ANG; + if (maxang > 180) + maxang = 180; + mindis = dis - BLOT_DIS; + if (mindis < 0) + mindis = 0; + maxdis = dis + BLOT_DIS; + if (maxdis > stride) + maxdis = stride; + p = hough + minang*stride + mindis; + maxdis = (maxdis-mindis)*sizeof(uint32_t); + for (y = maxang-minang; y > 0; y--) + { + memset(p, 0, maxdis); + p += stride; + } +#ifdef WARP_DEBUG + //save_hough_debug(ctx, hough, stride); +#endif + } + num_edges = x; + if (num_edges == 0) + return 0; + + /* Find edges in terms of lines. */ + for (x = 0; x < num_edges; x++) + { + int ang = edge[x].ang; + int dis = edge[x].dis; + int p = (dis<<reduce) - maxlen; + if (ang < 45 || ang > 135) + { + /* Mostly horizontal line */ + edge[x].x0 = 0; + edge[x].x1 = w; + edge[x].y0 = ((p<<SINTABLE_SHIFT) - edge[x].x0*sintable[ang])/costable[ang]; + edge[x].y1 = ((p<<SINTABLE_SHIFT) - edge[x].x1*sintable[ang])/costable[ang]; + } + else + { + /* Mostly vertical line */ + edge[x].y0 = 0; + edge[x].y1 = h; + edge[x].x0 = ((p<<SINTABLE_SHIFT) - edge[x].y0*costable[ang])/sintable[ang]; + edge[x].x1 = ((p<<SINTABLE_SHIFT) - edge[x].y1*costable[ang])/sintable[ang]; + } + } + + /* Find the points of intersection */ + num_points = 0; + for (x = 0; x < num_edges-1; x++) + for (y = x+1; y < num_edges; y++) + num_points += intersect(&points[num_points], + edge, x, y); + +#ifdef WARP_DEBUG + { + debug_printf(ctx, "%d edges, %d points\n", num_edges, num_points); + for (x = 0; x < num_points; x++) + { + debug_printf(ctx, "p%d: %d %d (score %d, %d+%d)\n", x, + (int)points[x].x, (int)points[x].y, points[x].score, + points[x].e0, points[x].e1); + } + } +#endif + + /* Now, go looking for 'routes' A->B->C->D->A */ + { + int i; + route.w = src->w; + route.h = src->h; + route.best_score = -1; + for (i = 0; i < num_points; i++) + { + route.edge[0] = points[i].e0; + route.point[0] = i; + route.edge[1] = points[i].e1; + find_route(ctx, points, num_points, &route, 1); + } + +#ifdef WARP_DEBUG + if (route.best_score >= 0) + { + debug_printf(ctx, "Score: %d, Edges=%d->%d->%d->%d, Points=%d->%d->%d->%d\n", + route.best_score, + route.best_edge[0], + route.best_edge[1], + route.best_edge[2], + route.best_edge[3], + route.best_point[0], + route.best_point[1], + route.best_point[2], + route.best_point[3]); + debug_printf(ctx, "(%d,%d)->(%d,%d)->(%d,%d)->(%d,%d)\n", + (int)points[route.best_point[0]].x, + (int)points[route.best_point[0]].y, + (int)points[route.best_point[1]].x, + (int)points[route.best_point[1]].y, + (int)points[route.best_point[2]].x, + (int)points[route.best_point[2]].y, + (int)points[route.best_point[3]].x, + (int)points[route.best_point[3]].y); + } +#endif + } + +#ifdef WARP_DEBUG + /* Mark up the src (again, for debugging) */ + { + fz_device *dev = fz_new_draw_device(ctx, fz_identity, src); + fz_stroke_state *stroke = fz_new_stroke_state(ctx); + float col = 1; + fz_color_params params = { FZ_RI_PERCEPTUAL }; +#ifdef WARP_SPEW_DEBUG + for (x = 0; x < num_edges; x++) + { + char text[64]; + fz_path *path = fz_new_path(ctx); + fz_moveto(ctx, path, edge[x].x0, edge[x].y0); + fz_lineto(ctx, path, edge[x].x1, edge[x].y1); + fz_stroke_path(ctx, dev, path, stroke, fz_identity, fz_device_gray(ctx), &col, 1, params); + fz_drop_path(ctx, path); + debug_printf(ctx, "%d %d -> %d %d\n", edge[x].x0, edge[x].y0, edge[x].x1, edge[x].y1); + sprintf(text, "line%d.png", x); + fz_save_pixmap_as_png(ctx, src, text); + } +#endif + + stroke->linewidth *= 4; + if (route.best_score >= 0) + { + fz_path *path = fz_new_path(ctx); + fz_moveto(ctx, path, points[route.best_point[0]].x, points[route.best_point[0]].y); + fz_lineto(ctx, path, points[route.best_point[1]].x, points[route.best_point[1]].y); + fz_lineto(ctx, path, points[route.best_point[2]].x, points[route.best_point[2]].y); + fz_lineto(ctx, path, points[route.best_point[3]].x, points[route.best_point[3]].y); + fz_closepath(ctx, path); + fz_stroke_path(ctx, dev, path, stroke, fz_identity, fz_device_gray(ctx), &col, 1, params); + fz_drop_path(ctx, path); + } + + fz_drop_stroke_state(ctx, stroke); + fz_close_device(ctx, dev); + fz_drop_device(ctx, dev); + } +#endif + + fz_free(ctx, hough); + + if (route.best_score == -1) + return 0; + + if (corners) + { + corners->ul.x = points[route.best_point[0]].x; + corners->ul.y = points[route.best_point[0]].y; + corners->ur.x = points[route.best_point[1]].x; + corners->ur.y = points[route.best_point[1]].y; + corners->ll.x = points[route.best_point[2]].x; + corners->ll.y = points[route.best_point[2]].y; + corners->lr.x = points[route.best_point[3]].x; + corners->lr.y = points[route.best_point[3]].y; + } + + /* Discard any possible matches that aren't at least 1/8 of the pixmap. */ + { + fz_rect r = fz_empty_rect; + if (corners) + { + r = fz_include_point_in_rect(r, corners->ul); + r = fz_include_point_in_rect(r, corners->ur); + r = fz_include_point_in_rect(r, corners->ll); + r = fz_include_point_in_rect(r, corners->lr); + } + if ((r.x1 - r.x0) * (r.y1 - r.y0) * 8 < (src->w * src->h)) + return 0; + } + + return 1; +} + +#define DOC_DETECT_MAXDIM 500 +int +fz_detect_document(fz_context *ctx, fz_quad *points, fz_pixmap *orig_src) +{ + fz_color_params p = {FZ_RI_PERCEPTUAL }; + fz_pixmap *grey = NULL; + fz_pixmap *src = NULL; +#ifdef DETECT_DOCUMENT_RGB + fz_pixmap *r = NULL; + fz_pixmap *g = NULL; + fz_pixmap *b = NULL; +#endif + fz_pixmap *processed = NULL; + int i; + int found = 0; + + /* Gauss function has std deviation of ~sqr(2), so a variance of + * 2. Apply it twice and we get twice that. So: + * n stddev variance + * 1 1.4 2 + * 2 2 4 + * 3 2.8 8 + * 4 4 16 + * 5 5.6 32 + * 6 8 64 + * 7 11.3 128 + * 8 16 256 + * 9 22.6 512 + * 10 32 1024 + * 11 45 2048 + * + * The stddev is also known as the "width" by some sources. + * + * Our testing indicates that a width of 30ish works well for a + * image with it's major axis being ~3k pixels. So figure on a + * width of about 1% of the major axis dimension. + * + * By subsampling the incoming image, we can reduce the work we + * do in the gauss phase, as we get to use a smaller width. + */ + int n = 10; /* Based on DOC_DETECT_MAXDIM */ + int l2factor = 0; + + fz_var(src); + fz_var(grey); +#ifdef DETECT_DOCUMENT_RGB + fz_var(r); + fz_var(g); + fz_var(b); +#endif + fz_var(processed); + + fz_try(ctx) + { + START_TIME(); + { + int maxdim = orig_src->w > orig_src->h ? orig_src->w : orig_src->h; + while (maxdim > DOC_DETECT_MAXDIM) + maxdim >>= 1, l2factor++; + START_TIME(); + if (l2factor == 0) + src = fz_keep_pixmap(ctx, orig_src); + else + { + src = fz_clone_pixmap(ctx, orig_src); + fz_subsample_pixmap(ctx, src, l2factor); + } + END_TIME("subsample"); + } + START_TIME(); + if (src->n == 1 && src->alpha == 0) + { + if (src == orig_src) + grey = fz_clone_pixmap(ctx, src); + else + grey = fz_keep_pixmap(ctx, src); + } + else + { + grey = fz_convert_pixmap(ctx, src, + fz_default_gray(ctx, NULL), NULL, NULL, p, 0); + } + END_TIME("clone"); + START_TIME(); + for (i = 0; i < n; i++) + gauss5x5(ctx, grey); + END_TIME("gauss grey"); +#ifdef WARP_DEBUG + fz_save_pixmap_as_png(ctx, grey, "gauss.png"); +#endif +#ifdef DO_HISTEQ + START_TIME(); + histeq(grey); + END_TIME("histeq grey"); +#ifdef WARP_DEBUG + fz_save_pixmap_as_png(ctx, grey, "hist.png"); +#endif +#endif + START_TIME(); + grad(ctx, grey, pregrad(ctx, grey)); + END_TIME("grad grey"); +#ifdef WARP_DEBUG + fz_save_pixmap_as_png(ctx, grey, "grad.png"); +#endif + +#ifdef DETECT_DOCUMENT_RGB + if (src->n == 3 && src->alpha == 0) + { + START_TIME(); + r = fz_new_pixmap(ctx, NULL, src->w, src->h, NULL, 0); + g = fz_new_pixmap(ctx, NULL, src->w, src->h, NULL, 0); + b = fz_new_pixmap(ctx, NULL, src->w, src->h, NULL, 0); + gauss5x5_3(ctx, r, src, 0); + for (i = 1; i < n; i++) + gauss5x5(ctx, r); + gauss5x5_3(ctx, g, src, 1); + for (i = 1; i < n; i++) + gauss5x5(ctx, g); + gauss5x5_3(ctx, b, src, 2); + for (i = 1; i < n; i++) + gauss5x5(ctx, b); +#ifdef DO_HISTEQ + histeq(r); + histeq(g); + histeq(b); +#endif + grad(ctx, r); + grad(ctx, g); + grad(ctx, b); +#ifdef WARP_DEBUG + fz_save_pixmap_as_png(ctx, r, "r.png"); + fz_save_pixmap_as_png(ctx, g, "g.png"); + fz_save_pixmap_as_png(ctx, b, "b.png"); +#endif + combine_grad(grey, r, g, b); + END_TIME("rgb"); + fz_drop_pixmap(ctx, r); + fz_drop_pixmap(ctx, g); + fz_drop_pixmap(ctx, b); + r = NULL; + g = NULL; + b = NULL; + } +#ifdef WARP_DEBUG + fz_save_pixmap_as_png(ctx, grey, "combined.png"); +#endif +#endif + processed = fz_new_pixmap(ctx, fz_device_gray(ctx), grey->w, grey->h, NULL, 0); + + /* Do multiple passes if required, dropping the thresholds for + * strong/weak pixels each time until we find a suitable result. */ + for (i = 0; i < 6; i++) + { + START_TIME(); + nonmax(ctx, processed, grey, i); + END_TIME("nonmax"); +#ifdef WARP_DEBUG + fz_save_pixmap_as_png(ctx, processed, "nonmax.png"); +#endif + START_TIME(); + hysteresis(ctx, processed); + END_TIME("hysteresis"); +#ifdef WARP_DEBUG + fz_save_pixmap_as_png(ctx, processed, "hysteresis.png"); +#endif + START_TIME(); + found = make_hough(ctx, processed, points); + END_TIME("total hough"); + END_TIME("Total time"); + DUMP_TIMES(); +#ifdef WARP_DEBUG + clean(ctx, processed); + fz_save_pixmap_as_png(ctx, processed, "out.png"); +#endif + if (found) + break; + } + } + fz_always(ctx) + { + fz_drop_pixmap(ctx, src); +#ifdef DETECT_DOCUMENT_RGB + fz_drop_pixmap(ctx, r); + fz_drop_pixmap(ctx, g); + fz_drop_pixmap(ctx, b); +#endif + fz_drop_pixmap(ctx, grey); + fz_drop_pixmap(ctx, processed); + } + fz_catch(ctx) + { + fz_rethrow(ctx); + } + + if (found && points) + { + float f = (1<<l2factor); + float r = f/2; + points->ul.x = points->ul.x *f + r; + points->ul.y = points->ul.y *f + r; + points->ur.x = points->ur.x *f + r; + points->ur.y = points->ur.y *f + r; + points->ll.x = points->ll.x *f + r; + points->ll.y = points->ll.y *f + r; + points->lr.x = points->lr.x *f + r; + points->lr.y = points->lr.y *f + r; + } + + return found; +}
