Mercurial > hgrepos > Python2 > PyMuPDF
view mupdf-source/source/fitz/warp.c @ 32:72c1b70d4f5c
Also apply -Werror=implicit-function-declaration
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Sun, 21 Sep 2025 15:10:12 +0200 |
| parents | b50eed0cc0ef |
| children |
line wrap: on
line source
// Copyright (C) 2004-2025 Artifex Software, Inc. // // This file is part of MuPDF. // // MuPDF is free software: you can redistribute it and/or modify it under the // terms of the GNU Affero General Public License as published by the Free // Software Foundation, either version 3 of the License, or (at your option) // any later version. // // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more // details. // // You should have received a copy of the GNU Affero General Public License // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html> // // Alternative licensing terms are available from the licensor. // For commercial licensing, see <https://www.artifex.com/> or contact // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, // CA 94129, USA, for further information. #include "mupdf/fitz.h" #include "pixmap-imp.h" #include <string.h> /* Define TIMINGS to get timing information dumped to stdout. */ #undef TIMINGS /* Define WARP_DEBUG to get debugging output (and PNGs saved). Note * that this will affect timings! */ #undef WARP_DEBUG /* Define WARP_SPEW_DEBUG to get even more debug output (and PNGs). */ #undef WARP_SPEW_DEBUG /* One reference suggested doing histogram equalisation. */ #define DO_HISTEQ /* Define DETECT_DOCUMENT_RGB, and edge detection on RGB documents will * look for edges in just the R,G,B planes as well as the grey plane. */ #undef DETECT_DOCUMENT_RGB #undef SLOW_INTERPOLATION #undef SLOW_WARPING #ifdef WARP_DEBUG static void debug_printf(fz_context *ctx, const char *fmt, ...) { char text[1024]; va_list list; va_start(list, fmt); vsnprintf(text, sizeof(text), fmt, list); va_end(list); #ifdef _WIN32 fz_write_string(ctx, fz_stdods(ctx), text); #endif fputs(text, stderr); } #else #define debug_printf(CTX, FMT, ...) do {} while (0) #endif #if defined(TIMINGS) && defined(_WIN32) #include "windows.h" #define MAX_TIMERS 256 static struct { int started; int stackptr; int stack[MAX_TIMERS]; const char *name[MAX_TIMERS]; LARGE_INTEGER time[MAX_TIMERS]; } timer; #define START_TIME() \ do {\ int i = timer.stack[timer.stackptr++] = timer.started++;\ QueryPerformanceCounter(&timer.time[i]);\ } while (0) #define END_TIME(NAME) \ do {\ LARGE_INTEGER end;\ int i;\ QueryPerformanceCounter(&end);\ i = timer.stack[--timer.stackptr];\ timer.time[i].QuadPart = end.QuadPart - timer.time[i].QuadPart;\ timer.name[i] = NAME;\ } while (0) #define DUMP_TIMES() \ do {\ int i;\ LARGE_INTEGER freq;\ QueryPerformanceFrequency(&freq);\ float f = freq.QuadPart;\ for (i = 0; i < timer.started; i++)\ debug_printf(ctx, "%s: %g\n", timer.name[i], (int)1000*timer.time[i].QuadPart/f);\ } while (0) #else #define START_TIME() do {} while(0) #define END_TIME(NAME) do {} while(0) #define DUMP_TIMES() do {} while(0) #endif typedef struct { int x; int y; } fz_ipoint; typedef struct { int i; int f; int di; int df; } fz_bresenham_core; typedef struct { fz_bresenham_core c; int n; } fz_bresenham; typedef struct { fz_bresenham_core x; fz_bresenham_core y; int n; } fz_ipoint_bresenham; typedef struct { fz_bresenham_core sx; fz_bresenham_core sy; fz_bresenham_core ex; fz_bresenham_core ey; int n; } fz_ipoint2_bresenham; static inline fz_bresenham_core init_bresenham_core(int start, int end, int n) { fz_bresenham_core b; int delta = end-start; b.di = n == 0 ? 0 : delta/n; b.df = delta - n*b.di; /* 0 <= b.df < n */ if (b.df < 0) b.di--, b.df += n; /* Starts with bi.i = start, bi.f = n, and then does a half * step. */ b.i = start + (b.di>>1); b.f = n - (((b.di & 1) * n + b.df)>>1); return b; } #ifdef CURRENTLY_UNUSED static inline fz_bresenham init_bresenham(int start, int end, int n) { fz_bresenham b; b.c = init_bresenham_core(start, end, n); b.n = n; return b; } static inline void step(fz_bresenham *b) { step_core(&b->c, b->n); } #endif static inline void step_core(fz_bresenham_core *b, int n) { b->i += b->di; b->f -= b->df; if (b->f <= 0) { b->f += n; b->i++; } } static inline fz_ipoint_bresenham init_ip_bresenham(fz_ipoint start, fz_ipoint end, int n) { fz_ipoint_bresenham b; b.x = init_bresenham_core(start.x, end.x, n); b.y = init_bresenham_core(start.y, end.y, n); b.n = n; return b; } static inline void step_ip(fz_ipoint_bresenham *b) { step_core(&b->x, b->n); step_core(&b->y, b->n); } static inline fz_ipoint current_ip(const fz_ipoint_bresenham *b) { fz_ipoint ip; ip.x = b->x.i; ip.y = b->y.i; return ip; } static inline fz_ipoint2_bresenham init_ip2_bresenham(fz_ipoint ss, fz_ipoint se, fz_ipoint es, fz_ipoint ee, int n) { fz_ipoint2_bresenham b; b.sx = init_bresenham_core(ss.x, se.x, n); b.sy = init_bresenham_core(ss.y, se.y, n); b.ex = init_bresenham_core(es.x, ee.x, n); b.ey = init_bresenham_core(es.y, ee.y, n); b.n = n; return b; } static inline void step_ip2(fz_ipoint2_bresenham *b) { step_core(&b->sx, b->n); step_core(&b->sy, b->n); step_core(&b->ex, b->n); step_core(&b->ey, b->n); } static inline fz_ipoint start_ip(const fz_ipoint2_bresenham *b) { fz_ipoint ip; ip.x = b->sx.i; ip.y = b->sy.i; return ip; } static fz_forceinline fz_ipoint end_ip(const fz_ipoint2_bresenham *b) { fz_ipoint ip; ip.x = b->ex.i; ip.y = b->ey.i; return ip; } static void interp_n(unsigned char *d, const unsigned char *s0, const unsigned char *s1, int f, int n) { do { int a = *s0++; int b = *s1++ - a; *d++ = ((a<<8) + b*f + 128)>>8; } while (--n); } static void interp2_n(unsigned char *d, const unsigned char *s0, const unsigned char *s1, const unsigned char *s2, int f0, int f1, int n) { do { int a = *s0++; int b = *s1++ - a; int c; a = (a<<8) + b*f0; c = (*s2++<<8) - a; *d++ = ((a<<8) + c*f1 + (1<<15))>>16; } while (--n); } static inline void copy_pixel(unsigned char *d, const fz_pixmap *src, fz_ipoint p) { int u = p.x>>8; int v = p.y>>8; int fu = p.x & 255; int fv = p.y & 255; int n = src->n; const unsigned char *s; ptrdiff_t stride = src->stride; if (u < 0) u = 0, fu = 0; else if (u >= src->w-1) u = src->w-1, fu = 0; if (v < 0) v = 0, fv = 0; else if (v >= src->h-1) v = src->h-1, fv = 0; s = &src->samples[u * n + v * stride]; #ifdef SLOW_INTERPOLATION { int i; for (i = 0; i < n; i++) { int v0 = s[0]; int v1 = s[n]; int v2 = s[stride]; int v3 = s[stride+n]; int v01, v23, v; v01 = (v0<<8) + (v1-v0)*fu; v23 = (v2<<8) + (v3-v2)*fu; v = (v01<<8) + (v23-v01)*fv; assert(v >= 0 && v < (1<<24)-32768); *d++ = (v + 32768)>>16; s++; } return; } #else if (fu == 0) { if (fv == 0) { /* Copy single pixel */ memcpy(d, s, n); return; } /* interpolate y pixels */ interp_n(d, s, s + stride, fv, n); return; } if (fv == 0) { /* interpolate x pixels */ interp_n(d, s, s+n, fu, n); return; } if (fu <= fv) { /* Top half of the trapezoid. */ interp2_n(d, s, s+stride, s+stride+n, fv, fu, n); } else { /* Bottom half of the trapezoid. */ interp2_n(d, s, s+n, s+stride+n, fu, fv, n); } #endif } static void warp_core(unsigned char *d, int n, int width, int height, int stride, const fz_ipoint corner[4], const fz_pixmap *src) { fz_ipoint2_bresenham row_bres; int x; /* We have a bresenham pair for how to move the start * and end of the row each y step. */ row_bres = init_ip2_bresenham(corner[0], corner[3], corner[1], corner[2], height); stride -= width * n; #ifdef SLOW_WARPING { int h; for (h = 0 ; h < height ; h++) { int sx = corner[0].x + (corner[3].x - corner[0].x)*h/height; int sy = corner[0].y + (corner[3].y - corner[0].y)*h/height; int ex = corner[1].x + (corner[2].x - corner[1].x)*h/height; int ey = corner[1].y + (corner[2].y - corner[1].y)*h/height; for (x = 0; x < width; x++) { fz_ipoint p; p.x = sx + (ex-sx)*x/width; p.y = sy + (ey-sy)*x/width; copy_pixel(d, src, p); d += n; } d += stride; } } #else for (; height > 0; height--) { /* We have a bresenham for how to move the * current pixel across the row. */ fz_ipoint_bresenham pix_bres; pix_bres = init_ip_bresenham(start_ip(&row_bres), end_ip(&row_bres), width); for (x = width; x > 0; x--) { /* Copy pixel */ copy_pixel(d, src, current_ip(&pix_bres)); d += n; step_ip(&pix_bres); } /* step to the next line. */ step_ip2(&row_bres); d += stride; } #endif } /* points are clockwise from NW. This performs simple affine warping. */ fz_pixmap * fz_warp_pixmap(fz_context *ctx, fz_pixmap *src, fz_quad points, int width, int height) { fz_pixmap *dst; if (src == NULL) return NULL; if (width >= (1<<24) || width < 0 || height >= (1<<24) || height < 0) fz_throw(ctx, FZ_ERROR_LIMIT, "Bad width/height"); dst = fz_new_pixmap(ctx, src->colorspace, width, height, src->seps, src->alpha); dst->xres = src->xres; dst->yres = src->yres; fz_try(ctx) { unsigned char *d = dst->samples; int n = dst->n; fz_ipoint corner[4]; /* Find the corner texture positions as fixed point */ corner[0].x = (int)(points.ul.x * 256 + 128); corner[0].y = (int)(points.ul.y * 256 + 128); corner[1].x = (int)(points.ur.x * 256 + 128); corner[1].y = (int)(points.ur.y * 256 + 128); corner[2].x = (int)(points.ll.x * 256 + 128); corner[2].y = (int)(points.ll.y * 256 + 128); corner[3].x = (int)(points.lr.x * 256 + 128); corner[3].y = (int)(points.lr.y * 256 + 128); warp_core(d, n, width, height, width * n, corner, src); } fz_catch(ctx) { fz_drop_pixmap(ctx, dst); fz_rethrow(ctx); } return dst; } static float dist(fz_point a, fz_point b) { float x = a.x-b.x; float y = a.y-b.y; return sqrtf(x*x+y*y); } /* Again, affine warping, but this time where the destination width/height * are chosen automatically. */ fz_pixmap * fz_autowarp_pixmap(fz_context *ctx, fz_pixmap *src, fz_quad points) { float w0 = dist(points.ur, points.ul); float w1 = dist(points.ll, points.lr); float h0 = dist(points.lr, points.ul); float h1 = dist(points.ll, points.ur); int w = (w0+w1+0.5)/2; int h = (h0+h1+0.5)/2; return fz_warp_pixmap(ctx, src, points, w, h); } /* Corner detection: We shall steal the algorithm from the Dropbox Document Scanner, as described here: https://dropbox.tech/machine-learning/fast-and-accurate-document-detection-for-scanning A good reference to the steps involved in the canny edge detection process can be found here: https://towardsdatascience.com/canny-edge-detection-step-by-step-in-python-computer-vision-b49c3a2d8123 This involves: * Canny Edge Detection * * Greyscale conversion * * Noise reduction * * Gradient Calculation * * Non-maximum suppression * * Double threshold * * Edge tracking by Hysteresis * Hough Transform to fix possible edges * Computing intersections and scoring quads We modify the gradient calculation with a simple scale to ensure we fill the range. */ #ifdef DO_HISTEQ static void histeq(fz_pixmap *im) { uint32_t count[256]; unsigned char tbl[256]; int i; unsigned char *s = im->samples; int n = im->w*im->h; int sigma; int den; memset(count, 0, sizeof(count)); for (i = n; i > 0; i--) count[*s++]++; /* Rather than doing a pure histogram equalisation, we bend * the table so that 0 always stays as 0, and 255 always stays * as 255. */ sigma = (count[0]>>1) - count[0]; den = sigma + n - (count[255]>>1); for (i = 0; i < 256; i++) { int v = count[i]; sigma += v - (v>>1); tbl[i] = (int)(255.0f * sigma / den + 0.5f); sigma += (v>>1); } s = im->samples; for (i = n; i > 0; i--) *s = tbl[*s], s++; } #endif /* The first functions apply a 5x5 gauss filter to blur the greyscale * image and remove noise. The gauss filter is a convolution with * weights: * * 2 4 5 4 2 * 4 9 12 9 4 * 5 12 15 12 5 * 4 9 12 9 4 * 2 4 5 4 2 * * As you can see, there are 3 distinct lines of weights within that * matrix. We walk each row of source pixels once, calculating each of * those convolutions, storing the result in a rolling buffer of 5x3xw * entries. Then we sum columns from the buffer to give us the results. */ /* Read across a row of pixels of width w, from s, performing * the horizontal portion of the convolutions, storing the results * in 3 lines of a buffer, starting at d, &d[w], &d[2*w]. * * d[0*w] uses weights (5 12 15 12 5) * d[1*w] uses weights (4 9 12 9 4) * d[2*w] uses weights (2 4 5 4 2) */ static void gauss5row(uint16_t *d, const unsigned char *s, int w) { int i; int s0 = s[0]; int s1 = s[1]; int s2 = s[2]; int s3 = s[3]; s += 4; d[2*w] = 11*s0 + 4*s1 + 2*s2; d[1*w] = 25*s0 + 9*s1 + 4*s2; *d++ = 32*s0 + 12*s1 + 5*s2; d[2*w] = 6*s0 + 5*s1 + 4*s2 + 2*s3; d[1*w] = 13*s0 + 12*s1 + 9*s2 + 4*s3; *d++ = 17*s0 + 15*s1 + 12*s2 + 5*s3; for (i = w - 4; i > 0; i--) { int d2 = 2*s0 + 4*s1 + 5*s2 + 4*s3; int d1 = 4*s0 + 9*s1 + 12*s2 + 9*s3; int d0 = 5*s0 + 12*s1 + 15*s2 + 12*s3; s0 = s1; s1 = s2; s2 = s3; s3 = *s++; d[2*w] = d2 + 2*s3; d[1*w] = d1 + 4*s3; *d++ = d0 + 5*s3; } d[2*w] = 2*s0 + 4*s1 + 5*s2 + 6*s3; d[1*w] = 4*s0 + 9*s1 + 12*s2 + 13*s3; *d++ = 5*s0 + 12*s1 + 15*s2 + 17*s3; d[2*w] = 2*s1 + 4*s2 + 11*s3; d[1*w] = 4*s1 + 9*s2 + 25*s3; *d = 5*s1 + 12*s2 + 32*s3; } /* Calculate the results for row y of the image, of width w, by * summing results from the temporary buffer s, and writing into the * original pixmap at d. */ static void gauss5col(unsigned char *d, const uint16_t *s, int y, int w) { const uint16_t *s0, *s1, *s2, *s3, *s4; y *= 3; s0 = &s[((y+ 9+2)%15)*w]; s1 = &s[((y+12+1)%15)*w]; s2 = &s[( y %15)*w]; s3 = &s[((y+ 3+1)%15)*w]; s4 = &s[((y+ 6+2)%15)*w]; for (; w > 0; w--) *d++ = (*s0++ + *s1++ + *s2++ + *s3++ + *s4++ + 79)/159; } static void gauss5x5(fz_context *ctx, fz_pixmap *src) { int w = src->w; int h = src->h; uint16_t *buf; unsigned char *s = src->samples; int y; if (w < 5 || h < 5) fz_throw(ctx, FZ_ERROR_ARGUMENT, "Pixmap too small"); buf = fz_malloc(ctx, w*3*5 * sizeof(uint16_t)); gauss5row(&buf[0*3*w], &s[0*w], w); gauss5row(&buf[1*3*w], &s[1*w], w); memcpy(&buf[3*3*w], buf, w*3 * sizeof(uint16_t)); /* row -2 */ memcpy(&buf[4*3*w], buf, w*3 * sizeof(uint16_t)); /* row -1 */ for (y = 2; y < h; y++) { gauss5row(&buf[(y%5)*3*w], &s[2*w], w); gauss5col(s, buf, y-2, w); s += w; } for (; y < h+2; y++) { memcpy(&buf[(y%5)*3*w], &buf[((y+4)%5)*3*w], 3*w*sizeof(uint16_t)); gauss5col(s, buf, y-2, w); s += w; } fz_free(ctx, buf); } #ifdef DETECT_DOCUMENT_RGB /* Variant of the above that works on a single plane from rgb data */ static void gauss5row3(uint16_t *d, const unsigned char *s, int w) { int i; int s0 = s[0]; int s1 = s[3]; int s2 = s[6]; int s3 = s[9]; s += 4*3; d[2*w] = 11*s0 + 4*s1 + 2*s2; d[1*w] = 25*s0 + 9*s1 + 4*s2; *d++ = 32*s0 + 12*s1 + 5*s2; d[2*w] = 6*s0 + 5*s1 + 4*s2 + 2*s3; d[1*w] = 13*s0 + 12*s1 + 9*s2 + 4*s3; *d++ = 17*s0 + 15*s1 + 12*s2 + 5*s3; for (i = w - 4; i > 0; i--) { int d2 = 2*s0 + 4*s1 + 5*s2 + 4*s3; int d1 = 4*s0 + 9*s1 + 12*s2 + 9*s3; int d0 = 5*s0 + 12*s1 + 15*s2 + 12*s3; s0 = s1; s1 = s2; s2 = s3; s3 = *s; s += 3; d[2*w] = d2 + 2*s3; d[1*w] = d1 + 4*s3; *d++ = d0 + 5*s3; } d[2*w] = 2*s0 + 4*s1 + 5*s2 + 6*s3; d[1*w] = 4*s0 + 9*s1 + 12*s2 + 13*s3; *d++ = 5*s0 + 12*s1 + 15*s2 + 17*s3; d[2*w] = 2*s1 + 4*s2 + 11*s3; d[1*w] = 4*s1 + 9*s2 + 25*s3; *d = 5*s1 + 12*s2 + 32*s3; } static void gauss5x5_3(fz_context *ctx, fz_pixmap *dst, const fz_pixmap *src, int comp) { int w = src->w; int h = src->h; uint16_t *buf; unsigned char *s = src->samples + comp; unsigned char *d = dst->samples; int y; if (w < 5 || h < 5) fz_throw(ctx, FZ_ERROR_ARGUMENT, "Pixmap too small"); buf = fz_malloc(ctx, w*3*5 * sizeof(uint16_t)); gauss5row3(&buf[0*3*w], s, w); s += w*3; gauss5row3(&buf[1*3*w], s, w); s += w*3; memcpy(&buf[3*3*w], buf, w*3 * sizeof(uint16_t)); /* row -2 */ memcpy(&buf[4*3*w], buf, w*3 * sizeof(uint16_t)); /* row -1 */ for (y = 2; y < h; y++) { gauss5row3(&buf[((y*3)%15)*w], s, w); gauss5col(d, buf, y-2, w); s += w*3; d += w; } for (; y < h+2; y++) { memcpy(&buf[(y%5)*3*w], &buf[((y+4)%5)*3*w], 3*w*sizeof(uint16_t)); gauss5col(d, buf, y-2, w); d += w; } fz_free(ctx, buf); } #endif /* The next set of functions perform the gradient calculation. * We convolve with Sobel kernels Kx and Ky respectively: * * Kx = -1 0 1 Ky = 1 2 1 * -2 0 2 0 0 0 * -1 0 1 -1 -2 -1 * * We do this by using a rolling temporary buffer of int16_t's to hold * 3 pairs of lines of weights scaled by (1 0 1) and (1 2 1). * * We can then sum entries from those lines to calculate kx and ky for * each pixel in the image. * * Then by examining the values of x and y, we can figure out the * "direction" of the edge (horizontal, vertical, or either diagonal), * and the magnitude of the difference across that edge. These get * encoded back into the original image storage using the 2 bottom bits * for direction, and the top 6 bits for magnitude. */ static void pregradrow(int16_t *d, const unsigned char *s, int w) { int i; unsigned char s0 = *s++; unsigned char s1 = *s++; d[w] = 3*s0 + s1; *d++ = s1 - s0; for (i = w-2; i > 0; i--) { int s2 = *s++; d[w] = s0 + 2*s1 + s2; *d++ = s2 - s0; s0 = s1; s1 = s2; } d[w] = s0 + 3*s1; *d = s1 - s0; } static void pregradcol(unsigned char *d, const int16_t *buf, int y, int w, uint32_t *max) { const int16_t *s0 = &buf[((y+2)%3)*w*2]; const int16_t *s1 = &buf[((y )%3)*w*2]; const int16_t *s2 = &buf[((y+1)%3)*w*2]; int i; for (i = w; i > 0; i--) { uint32_t ax, ay, mag; int x; y = s0[w] - s2[w]; x = *s0++ + 2 * *s1++ + *s2++; ax = x >= 0 ? x : -x; ay = y >= 0 ? y : -y; /* x and y are now both in the range -1020..1020 */ /* Now we calculate slope and gradient. * angle = atan2(y, x); * intensity = hypot(x, y); * But wait, we don't need that accuracy. We only need * to distinguish 4 directions... * * -22.5 < angle <= 22.5 = 0 * 22.5 < angle <= 67.5 = 1 * 67.5 < angle <= 112.5 = 2 * 115.5 < angle <= 157.5 = 3 * (and the reflections) * * 7 0 1 (x positive right, y positive downwards) * 6 * 2 * 5 4 3 * * tan(22.5)*65536 = 27146. * And for magnitude, we consider the magnitude just * along the 8 directions we've picked. So * 65536/SQR(2) = 46341. * We want magnitude in the 0...63 range. */ if (ax<<16 < 27146*ay) { /* angle = 0 */ mag = ay<<16; /* 0 to 1020<<16 */ } else if (ay<<16 < ax*27146) { /* angle = 2 */ mag = ax<<16; /* 0 to 1020<<16 */ } else { /* angle = 1 or 3 */ mag = (46341*(ax+ay)); } if (mag > *max) *max = mag; } } static uint32_t pregrad(fz_context *ctx, fz_pixmap *src) { int w = src->w; int h = src->h; unsigned char *s = src->samples; int16_t *buf = fz_malloc(ctx, w*2*3*sizeof(int16_t)); int y; uint32_t max = 0; pregradrow(buf, s, w); /* Line 0 */ memcpy(&buf[w*2*2], buf, w*2*sizeof(int16_t)); /* Line 1 */ s += w; for (y = 1; y < h-1; y++) { pregradrow(&buf[(y%3)*w*2], s, w); pregradcol(s-w, buf, y-1, w, &max); s += w; } memcpy(&buf[((y+1)%3)*w*2], &buf[(y%3)*w*2], w*2*sizeof(int16_t)); /* Line h */ pregradcol(s-w, buf, h-2, w, &max); pregradcol(s, buf, h-1, w, &max); fz_free(ctx, buf); if (max == 0) return 1; else return 0x7FFFFFFFU/max; } static void gradrow(int16_t *d, const unsigned char *s, int w) { int i; unsigned char s0 = *s++; unsigned char s1 = *s++; d[w] = 3*s0 + s1; *d++ = s1 - s0; for (i = w-2; i > 0; i--) { int s2 = *s++; d[w] = s0 + 2*s1 + s2; *d++ = s2 - s0; s0 = s1; s1 = s2; } d[w] = s0 + 3*s1; *d = s1 - s0; } static void gradcol(unsigned char *d, const int16_t *buf, int y, int w, int scale) { const int16_t *s0 = &buf[((y+2)%3)*w*2]; const int16_t *s1 = &buf[((y )%3)*w*2]; const int16_t *s2 = &buf[((y+1)%3)*w*2]; int i; for (i = w; i > 0; i--) { uint32_t ax, ay, mag, scaled; int angle, x; y = s0[w] - s2[w]; x = *s0++ + 2 * *s1++ + *s2++; ax = x >= 0 ? x : -x; ay = y >= 0 ? y : -y; /* x and y are now both in the range -1020..1020 */ /* Now we calculate slope and gradient. * angle = atan2(y, x); * intensity = hypot(x, y); * But wait, we don't need that accuracy. We only need * to distinguish 4 directions... * * -22.5 < angle <= 22.5 = 0 * 22.5 < angle <= 67.5 = 1 * 67.5 < angle <= 112.5 = 2 * 115.5 < angle <= 157.5 = 3 * (and the reflections) * * 7 0 1 (x positive right, y positive downwards) * 6 * 2 * 5 4 3 * * tan(22.5)*65536 = 27146. * And for magnitude, we consider the magnitude just * along the 8 directions we've picked. So * 65536/SQR(2) = 46341. * We want magnitude in the 0...63 range. */ if (ax<<16 < 27146*ay) { angle = 0; mag = ay<<16; /* 0 to 1020<<16 */ } else if (ay<<16 < ax*27146) { angle = 2; mag = ax<<16; /* 0 to 1020<<16 */ } else { /* 1 or 3 */ angle = (x^y) >= 0 ? 3 : 1; mag = (46341*(ax+ay)); } scaled = (mag * scale)>>25; assert(scaled >= 0 && scaled <= 63); *d++ = (scaled<<2) | angle; } } static void grad(fz_context *ctx, fz_pixmap *src, uint32_t scale) { int w = src->w; int h = src->h; unsigned char *s = src->samples; int16_t *buf = fz_malloc(ctx, w*2*3*sizeof(int16_t)); int y; gradrow(buf, s, w); /* Line 0 */ memcpy(&buf[w*2*2], buf, w*2*sizeof(int16_t)); /* Line 1 */ s += w; for (y = 1; y < h-1; y++) { gradrow(&buf[(y%3)*w*2], s, w); gradcol(s-w, buf, y-1, w, scale); s += w; } memcpy(&buf[((y+1)%3)*w*2], &buf[(y%3)*w*2], w*2*sizeof(int16_t)); /* Line h */ gradcol(s-w, buf, h-2, w, scale); gradcol(s, buf, h-1, w, scale); fz_free(ctx, buf); } #ifdef DETECT_DOCUMENT_RGB static void combine_grad(fz_pixmap *grey, const fz_pixmap *r, const fz_pixmap *g, const fz_pixmap *b) { int n; unsigned char *sd = grey->samples; const unsigned char *sr = r->samples; const unsigned char *sg = g->samples; const unsigned char *sb = b->samples; for (n = g->w * g->h; n > 0; n--) { unsigned char vg = *sg++; unsigned char vr = *sr++; unsigned char vb = *sb++; unsigned char vd = *sd++; if (vr > vg) vg = vr; if (vb > vg) vg = vb; if (vg > vd) sd[-1] = vg; } } #endif /* Next, we perform Non-Maximum Suppression and Double Thresholding, * both in the same phase. * * We walk the image, looking at the magnitude of the edges. Edges below * the 'weak' threshold are discarded. Otherwise, neighbouring pixels in * the direction of the edge are considered; if other pixels are stronger * then this pixel is discarded. If not, we classify ourself as either * 'strong' or 'weak'. */ #define WEAK_EDGE 64 #define STRONG_EDGE 128 static void nonmax(fz_context *ctx, fz_pixmap *dst, const fz_pixmap *src, int pass) { int w = src->w; int h = src->h; const unsigned char *s0 = src->samples; const unsigned char *s1 = s0; const unsigned char *s2 = s0+w; unsigned char *d = dst->samples; int x, y; /* thresholds are in the 0 to 63 range. * WEAK is typically 0.1ish, STRONG 0.3ish */ int weak = 6 - pass; int strong = 12 - pass*2; /* On entry, pixels have the angle in the bottom 2 bits and the magnitude in the rest. */ /* On exit, strong pixels have bit 7 set, weak pixels have bit 6, others are 0. * strong and weak pixels have the angle in bits 4 and 5. */ for (y = h-1; y >= 0;) { int lastmag; int ang = *s1++; int mag = ang>>2; int q, r; /* Pixel 0 */ if (mag <= weak) { /* Not even a weak edge. We'll never keep it. */ *d++ = 0; s0++; s2++; } else { ang &= 3; switch (ang) { default: case 0: q = (*s0++)>>2; r = (*s2++)>>2; break; case 1: q = (*++s0)>>2; r = 0; s2++; break; case 2: s0++; s2++; q = 0; r = *s1>>2; break; case 3: q = 0; s0++; r = (*++s2)>>2; break; } if (mag < q || mag < r) { /* Neighbouring edges are stronger. * Lose this one. */ *d++ = 0; } else if (mag < strong) { /* Weak edge. */ *d++ = WEAK_EDGE | (ang<<4); } else { /* Strong edge */ *d++ = STRONG_EDGE | (ang<<4); } } lastmag = mag; for (x = w-2; x > 0; x--) { ang = *s1++; mag = ang>>2; if (mag <= weak) { /* Not even a weak edge. We'll never keep it. */ *d++ = 0; s0++; s2++; } else { ang &= 3; switch (ang) { default: case 0: q = (*s0++)>>2; r = (*s2++)>>2; break; case 1: q = (*++s0)>>2; r = ((s2++)[-1])>>2; break; case 2: s0++; s2++; q = lastmag; r = *s1>>2; break; case 3: q = ((s0++)[-1])>>2; r = (*++s2)>>2; break; } if (mag < q || mag < r) { /* Neighbouring edges are stronger. * Lose this one. */ *d++ = 0; } else if (mag < strong) { /* Weak edge. */ *d++ = WEAK_EDGE | (ang<<4); } else { /* Strong edge */ *d++ = STRONG_EDGE | (ang<<4); } } lastmag = mag; } /* Pixel w-1 */ ang = *s1++; mag = ang>>2; if (mag <= weak) { /* Not even a weak edge. We'll never keep it. */ *d++ = 0; s0++; s2++; lastmag = 0; } else { ang &= 3; switch (ang) { default: case 0: q = (*s0++)>>2; r = (*s2++)>>2; break; case 1: q = 0; s0++; r = ((s2++)[-1])>>2; break; case 2: s0++; s2++; q = 0; r = *s1>>2; break; case 3: q = ((s0++)[-1])>>2; r = 0; s2++; break; } if (mag < q || mag < r) { /* Neighbouring edges are stronger. * Lose this one. */ *d++ = 0; } else if (mag < strong) { /* Weak edge. */ *d++ = WEAK_EDGE | (ang<<4); } else { /* Strong edge */ *d++ = STRONG_EDGE | (ang<<4); } } s0 = s1-w; if (--y == 0) s2 = s1; } } /* Next, we have the hysteresis phase. Here we bump any 'weak' pixel * that has at least one strong pixel around it up to being a 'strong' * pixel. * * On entry, strong pixels have bit 7 set, weak pixels have bit 6 set, * the angle is in bits 4 and 5, everything else is 0. * * We walk the rows of the image, and for each pixel we set bit 0 to be * the logical OR of all bit 7's of itself, and its horizontally * neighbouring pixels. * * Once we have done the first 2 rows like that, we can combine the * operation of generating the bottom bits for row i, with the * calculation of row i-1. Any given pixel on row i-1 should be promoted * to 'strong' if it was 'weak', and if the logical OR of the matching * pixels in row i, i-1 or i-2 has bit 0 set. * * At the end of this process any pixel with bit 7 set is 'strong'. * Bits 4 and 5 still have the angle. */ static void hysteresis(fz_context *ctx, fz_pixmap *src) { int w = src->w; int h = src->h; unsigned char *s0 = src->samples; unsigned char *s1 = s0; unsigned char *s2 = s0; unsigned char v0, v1, v2, r0, r1, r2; int x, y; /* On entry, strong pixels have bit 7 set, weak pixels have bit 6, others are 0. * strong and weak pixels have the angle in bits 4 and 5. */ /* We make the bottom bit in every pixel be 1 iff the pixel * or the ones to either side of it are 'strong'. */ /* First row - just do the bottom bit. */ /* Pixel 0 */ v0 = *s0++; v1 = *s0++; s0[-2] = v0 | ((v0 | v1)>>7); /* Middle pixels */ for (x = w-2; x > 0; x--) { v2 = *s0++; s0[-2] = v1 | ((v0 | v1 | v2)>>7); v0 = v1; v1 = v2; } /* Pixel w-1 */ s0[-1] = v1 | ((v0 | v1)>>7); assert(s0 == src->samples + w); /* Second row - do the "bottom bit" for the second row, and * perform hysteresis on the top row. */ /* Pixel 0 */ v0 = *s0++; v1 = *s0++; r0 = v0 | ((v0 | v1)>>7); s0[-2] = r0; r1 = *s1++; if ((r1>>6) & (r0 | r1)) s1[-1] |= 128; /* Middle pixels */ for (x = w-2; x > 0; x--) { v2 = *s0++; r0 = v1 | ((v0 | v1 | v2)>>7); s0[-2] = r0; r1 = *s1++; if ((r1>>6) & (r0 | r1)) s1[-1] |= 128; v0 = v1; v1 = v2; } /* Pixel w-1 */ r0 = v1 | ((v0 | v1)>>7); s0[-1] = r0; r1 = *s1++; if ((r1>>6) & (r0 | r1)) s1[-1] |= 128; assert(s0 == s1 + w); /* Now we get into the swing of things. We do the "bottom bit" * for row n+1, and do the actual processing for row n. */ for (y = h-4; y > 0; y--) { /* Pixel 0 */ v0 = *s0++; v1 = *s0++; r0 = v0 | ((v0 | v1)>>7); s0[-2] = r0; r1 = *s1++; r2 = *s2++; if ((r1>>6) & (r0 | r1 | r2)) s1[-1] |= 128; /* Middle pixels */ for (x = w-2; x > 0; x--) { v2 = *s0++; r0 = v1 | ((v0 | v1 | v2)>>7); s0[-2] = r0; r1 = *s1++; r2 = *s2++; if ((r1>>6) & (r0 | r1 | r2)) s1[-1] |= 128; v0 = v1; v1 = v2; } /* Pixel w-1 */ r0 = v1 | ((v0 | v1)>>7); s0[-1] = r0; r1 = *s1++; r2 = *s2++; if ((r1>>6) & (r0 | r1 | r2)) s1[-1] |= 128; assert(s0 == s1 + w); assert(s1 == s2 + w); } /* Final 2 rows together */ /* Pixel 0 */ v0 = *s0++; v1 = *s0++; r0 = v0 | ((v0 | v1)>>7); r1 = *s1++; r2 = *s2++; if ((r1>>6) & (r0 | r1 | r2)) s1[-1] |= 128; if ((r0>>6) & (r0 | r1)) s0[-2] |= 128; /* Middle pixels */ for (x = w-2; x > 0; x--) { v2 = *s0++; r0 = v1 | ((v0 | v1 | v2)>>7); r1 = *s1++; r2 = *s2++; if ((r1>>6) & (r0 | r1 | r2)) s1[-1] |= 128; if ((r0>>6) & (r0 | r1)) s0[-2] |= 128; v0 = v1; v1 = v2; } /* Pixel w-1 */ r0 = v1 | ((v0 | v1)>>7); r1 = *s1; r2 = *s2; if ((r1>>6) & (r0 | r1 | r2)) s1[0] |= 128; if ((r0>>6) & (r0 | r1)) s0[-1] |= 128; } #ifdef WARP_DEBUG /* A simple function to keep just bit 7 of the image. This 'cleans' the * pixmap so that a grey version can be saved out for visual checking. */ static void clean(fz_context *ctx, fz_pixmap *src) { int w = src->w; int h = src->h; unsigned char *s = src->samples; for (w = w*h; w > 0; w--) *s = *s & 128, s++; } #endif #define SINTABLE_SHIFT 14 static int16_t sintable[270]; #define costable (&sintable[90]) /* We have collected an array of edge data. * For each pixel, we know whether there is a 'strong' edge * there, and if so, in which of 4 directions it runs. * * We want to convert this into hough space. * * The literature describes points in Hough space as having the * form (r, theta). The value of any given point point in hough * space is the "strength" of the line described by: * x.cos(theta) + y.sin(theta) = r * * | \ * | /\ * | /\/\ * | r/ \ * | / \ * | / \ * |/theta \ * -+-------------- * * i.e. r is the shortest distance from the origin to the line, * and theta gives us the angle of that shortest line. * * But, we are using angles of theta from the vertical, so we need * a different formulation: * * | \ * | /\ * | /\/\ * | r/ \ t = theta * | / \ * |t/ \ * |/ \ * -+-------------- * * So we're using 90-theta. cos(90-theta) = sin(theta), * and sin(90-theta) = cos(theta). * * So: x.sin(theta) + y.cos(theta) = r (for theta measured * clockwise from the y axis). * * We've been collecting angles according to their position in one * of 4 octants: * * Ang 0 = close to a horizontal edge (-22.5 to 22.5 degrees) * Ang 1 = close to diagonal edge (top left to bottom right) (22.5 to 67.5 degrees) * Ang 2 = close to a vertical edge (67.5 to 112.5 degrees) * Ang 3 = close to diagonal edge (bottom left to top right) (112.5 to 157.5 degrees) * * The other 4 octants mirror onto these. * * So, for each point in our (x,y) pixmap we whether we have a strong * pixel or not. If we have such a pixel, then we know that an edge * passes through that point, and which of those 4 octants it is in. * * We therefore consider all the possible angles within that octant, * and add a 'vote' for each of those lines into our hough transformed * space. */ static void mark_hough(uint32_t *d, int x, int y, int maxlen, int reduce, int ang) { int theta; int stride = (maxlen*2)>>reduce; int minang, maxang; switch (ang) { /* The angles are really 22.5, 67.5 etc, but we are working in ints * and specifying maxang as the first one greater than the one we * want to mark. */ default: case 0: /* Vertical boundary. Lines through this boundary * go horizontally. So the perpendicular to them * is vertical. */ minang = 0; maxang = 23; break; case 1: /* NE boundary */ minang = 23; maxang = 68; break; case 2: /* Horizontal boundary. */ minang = 68; maxang = 113; break; case 3: /* SE boundary */ minang = 113; maxang = 158; break; case 4: /* For debugging: */ minang = 0; maxang = 180; break; } d += minang * stride; while (1) { for (theta = minang; theta < maxang; theta++) { int p = (x*sintable[theta] + y*costable[theta])>>SINTABLE_SHIFT; int v = (maxlen + p)>>reduce; d[v]++; d += stride; } if (ang != 0) break; ang = 4; minang = 158; d += (minang - maxang) * stride; maxang = 180; } } #ifdef WARP_DEBUG static void save_hough_debug(fz_context *ctx, uint32_t *hough, int stride) { uint32_t scale; uint32_t maxval; uint32_t *p; int y; fz_pixmap *dst = fz_new_pixmap(ctx, NULL, stride, 180, NULL, 0); unsigned char *d = dst->samples; /* Make the image of the hough space (for debugging) */ maxval = 1; /* Avoid possible division by zero */ p = hough; for (y = 180*stride; y > 0; y--) { uint32_t v = *p++; if (v > maxval) maxval = v; } scale = 0xFFFFFFFFU/maxval; p = hough; for (y = 180*stride; y > 0; y--) { *d++ = (scale * *p++)>>24; } fz_save_pixmap_as_png(ctx, dst, "hough.png"); fz_drop_pixmap(ctx, dst); } #endif static uint32_t *do_hough(fz_context *ctx, const fz_pixmap *src, int stride, int maxlen, int reduce) { int w = src->w; int h = src->h; int x, y; const unsigned char *s = src->samples; uint32_t *hough = fz_calloc(ctx, sizeof(uint32_t), 180*stride); START_TIME(); /* Construct the hough space representation. */ for (y = 0; y < h; y++) { for (x = 0; x < w; x++) { unsigned char v = *s++; if (v & 128) mark_hough(hough, x, y, maxlen, reduce, (v>>4)&3); } } END_TIME("Building hough"); #ifdef WARP_DEBUG save_hough_debug(ctx, hough, stride); #endif return hough; } typedef struct { int ang; int dis; int strength; int x0; int y0; int x1; int y1; } hough_edge_t; typedef struct { int e0; int e1; int score; float x; float y; } hough_point_t; static int intersect(hough_point_t *pt, hough_edge_t *edges, int e0, int e1) { int x1 = edges[e0].x0; int y1 = edges[e0].y0; int x2 = edges[e0].x1; int y2 = edges[e0].y1; int x3 = edges[e1].x0; int y3 = edges[e1].y0; int x4 = edges[e1].x1; int y4 = edges[e1].y1; int d = (x1-x2)*(y3-y4)-(y1-y2)*(x3-x4); int t = (x1-x3)*(y3-y4)-(y1-y3)*(x3-x4); float ft; if (d < 0) { if (t < d || t > 0) return 0; } else { if (t < 0 || t > d) return 0; } pt->e0 = e0; pt->e1 = e1; pt->score = edges[e0].strength + edges[e1].strength; ft = t / (float)d; pt->x = x1 + ft*(x2-x1); pt->y = y1 + ft*(y2-y1); return 1; } typedef struct { int w; int h; int edge[4]; int point[4]; int best_score; int best_edge[4]; int best_point[4]; } hough_route_t; /* To test convexity, we use the dot product: * B * / \ * / C * A * * The dot product of vectors u and v is: * dot(u,v) = |u|.|v|.cos(theta) * Helpfully, cos(theta) > 0 for -90 < theta < 90. * So if we can form perp(AB) = the vector given by rotating * AB 90 degrees clockwise, we can then look at the sign of: * dot(perp(AB),BC) * If we do that for each corner of the polygon, and the signs * of the results for all of them are the same, we have convexity. * * If any of the dot products are zero, the edges are parallel * (i.e. the same edge). This should never happen. * * If AB = (x,y), then perp(AB) = (y,-x) */ static void score_corner(hough_point_t *points, hough_route_t *route, int p0, int *score, int *sign) { float x0, x1, x2, y0, y1, y2, ux, uy, vx, vy, dot; int s, len; float costheta; /* If we've already decided this is duff, then bale. */ if (*score < 0) return; x0 = points[route->point[p0]].x; y0 = points[route->point[p0]].y; x1 = points[route->point[(p0+1)&3]].x; y1 = points[route->point[(p0+1)&3]].y; x2 = points[route->point[(p0+2)&3]].x; y2 = points[route->point[(p0+2)&3]].y; ux = y1-y0; /* u = Perp(p0 to p1)*/ uy = x0-x1; vx = x2-x1; /* v = p1 to p2 */ vy = y2-y1; dot = ux*vx + uy*vy; if (dot == 0) { *score = -1; return; } s = (dot > 0) ? 1 : -1; if (*sign == 0) *sign = s; else if (*sign != s) { *score = -1; return; } len = sqrt(ux*ux + uy*uy) * sqrt(vx*vx + vy*vy); costheta = dot / (float)len; if (costheta < 0) costheta = -costheta; if (costheta < 0.7) { *score = -1; return; } costheta *= costheta; *score += points[route->point[(p0+1)%3]].score * costheta; } /* route points to 8 ints: * 2*i = edge number * 2*i+1 = point number */ static int score_route(hough_point_t *points, hough_route_t *route) { int score = 0; int sign = 0; score_corner(points, route, 0, &score, &sign); score_corner(points, route, 1, &score, &sign); score_corner(points, route, 2, &score, &sign); score_corner(points, route, 3, &score, &sign); return score; } static float score_by_area(const hough_point_t *points, const hough_route_t *route) { float double_area_of_quad = points[route->point[0]].x * points[route->point[1]].y + points[route->point[1]].x * points[route->point[2]].y + points[route->point[2]].x * points[route->point[3]].y + points[route->point[3]].x * points[route->point[0]].y - points[route->point[1]].x * points[route->point[0]].y - points[route->point[2]].x * points[route->point[1]].y - points[route->point[3]].x * points[route->point[2]].y - points[route->point[0]].x * points[route->point[3]].y; float double_area = route->w * (float)route->h * 2; if (double_area_of_quad < 0) double_area_of_quad = -double_area_of_quad; /* Anything larger than a quarter of the screen is acceptable. */ if (double_area_of_quad*4 > double_area) return 1; /* Anything smaller than a 16th of the screen is unacceptable in all circumstances. */ if (double_area_of_quad*16 < double_area) return 0; /* Otherwise, scale the score down by how much it's less than a quarter of the screen. */ return (double_area_of_quad*4)/double_area; } /* The first n+1 edges of the route are filled in, as are the first n * points. * 2*i = edge number * 2*i+1 = point number */ static void find_route(fz_context *ctx, hough_point_t *points, int num_points, hough_route_t *route, int n) { int i; for (i = 0; i < num_points; i++) { /* If this point continues our route (e0 == route->edge[n]) * then the next point in the route might be e1. */ int e0 = points[i].e0; int e1 = points[i].e1; if (e0 == route->edge[n]) { } else if (e1 == route->edge[n]) { int t = e0; e0 = e1; e1 = t; } else continue; /* Doesn't fit. Keep searching. */ /* If we've found 3 points already, then this is the * fourth. */ if (n == 3) { int score = 0; /* If we haven't looped back to our first edge, * no good. Keep searching. */ if (route->edge[0] != e1) continue; route->point[3] = i; score = score_route(points, route); if (score > 0) score *= score_by_area(points, route); #ifdef WARP_DEBUG debug_printf(ctx, "Found route: (point=%d %d %d %d) (edge %d %d %d %d) score=%d\n", route->point[0], route->point[1], route->point[2], route->point[3], route->edge[0], route->edge[1], route->edge[2], route->edge[3], score); #endif /* We want our route to be convex */ if (score < 0) continue; /* Score the route */ if (route->best_score < score) { route->best_score = score; memcpy(route->best_edge, route->edge, sizeof(*route->edge)*4); memcpy(route->best_point, route->point, sizeof(*route->point)*4); } } else { int j; for (j = 0; j < n && route->edge[j] != e1; j++); /* If we're about to loop back to any of the * previous edges, keep searching. */ if (j != n) continue; /* Possible. Extend the route, and recurse. */ route->point[n] = i; route->edge[n+1] = e1; find_route(ctx, points, num_points, route, n+1); } } } #define MAX_EDGES 32 #define BLOT_ANG 10 #define BLOT_DIS 10 static int make_hough(fz_context *ctx, const fz_pixmap *src, fz_quad *corners) { int w = src->w; int h = src->h; int maxlen = (int)(sqrtf(w*w + h*h) + 0.5); uint32_t *hough; int x, y; int reduce; int stride; hough_edge_t edge[MAX_EDGES]; hough_point_t points[MAX_EDGES * MAX_EDGES]; hough_route_t route; int num_edges, num_points; /* costable could (should) be statically inited. */ { int i; for (i = 0; i < 270; i++) { float theta = i*M_PI/180; sintable[i] = (int16_t)((1<<SINTABLE_SHIFT)*sinf(theta) + 0.5f); } } /* Figure out a suitable scale for the data. */ reduce = 0; while ((maxlen*2>>reduce) > 720 && reduce < 16) reduce++; stride = (maxlen*2)>>reduce; hough = do_hough(ctx, src, stride, maxlen, reduce); /* We want to find the top n edges that aren't too close to * one another. */ for (x = 0; x < MAX_EDGES; x++) { int ang, dis; int minang, maxang, mindis, maxdis; int where = 0; uint32_t *p = hough; uint32_t maxval = 0; for (y = 180*stride; y > 0; y--) { uint32_t v = *p++; if (v > maxval) maxval = v, where = y; } if (where == 0) break; where = 180*stride - where; ang = edge[x].ang = where/stride; dis = edge[x].dis = where - edge[x].ang*stride; edge[x].strength = hough[where]; /* We don't want to find any other maxima that are too * close to this one, so we 'blot out' stuff around this * maxima. */ #ifdef WARP_DEBUG debug_printf(ctx, "Maxima %d: dist=%d ang=%d strength=%d\n", x, (dis<<reduce)-maxlen, ang-90, edge[x].strength); #endif minang = ang - BLOT_ANG; if (minang < 0) minang = 0; maxang = ang + BLOT_ANG; if (maxang > 180) maxang = 180; mindis = dis - BLOT_DIS; if (mindis < 0) mindis = 0; maxdis = dis + BLOT_DIS; if (maxdis > stride) maxdis = stride; p = hough + minang*stride + mindis; maxdis = (maxdis-mindis)*sizeof(uint32_t); for (y = maxang-minang; y > 0; y--) { memset(p, 0, maxdis); p += stride; } #ifdef WARP_DEBUG //save_hough_debug(ctx, hough, stride); #endif } num_edges = x; if (num_edges == 0) return 0; /* Find edges in terms of lines. */ for (x = 0; x < num_edges; x++) { int ang = edge[x].ang; int dis = edge[x].dis; int p = (dis<<reduce) - maxlen; if (ang < 45 || ang > 135) { /* Mostly horizontal line */ edge[x].x0 = 0; edge[x].x1 = w; edge[x].y0 = ((p<<SINTABLE_SHIFT) - edge[x].x0*sintable[ang])/costable[ang]; edge[x].y1 = ((p<<SINTABLE_SHIFT) - edge[x].x1*sintable[ang])/costable[ang]; } else { /* Mostly vertical line */ edge[x].y0 = 0; edge[x].y1 = h; edge[x].x0 = ((p<<SINTABLE_SHIFT) - edge[x].y0*costable[ang])/sintable[ang]; edge[x].x1 = ((p<<SINTABLE_SHIFT) - edge[x].y1*costable[ang])/sintable[ang]; } } /* Find the points of intersection */ num_points = 0; for (x = 0; x < num_edges-1; x++) for (y = x+1; y < num_edges; y++) num_points += intersect(&points[num_points], edge, x, y); #ifdef WARP_DEBUG { debug_printf(ctx, "%d edges, %d points\n", num_edges, num_points); for (x = 0; x < num_points; x++) { debug_printf(ctx, "p%d: %d %d (score %d, %d+%d)\n", x, (int)points[x].x, (int)points[x].y, points[x].score, points[x].e0, points[x].e1); } } #endif /* Now, go looking for 'routes' A->B->C->D->A */ { int i; route.w = src->w; route.h = src->h; route.best_score = -1; for (i = 0; i < num_points; i++) { route.edge[0] = points[i].e0; route.point[0] = i; route.edge[1] = points[i].e1; find_route(ctx, points, num_points, &route, 1); } #ifdef WARP_DEBUG if (route.best_score >= 0) { debug_printf(ctx, "Score: %d, Edges=%d->%d->%d->%d, Points=%d->%d->%d->%d\n", route.best_score, route.best_edge[0], route.best_edge[1], route.best_edge[2], route.best_edge[3], route.best_point[0], route.best_point[1], route.best_point[2], route.best_point[3]); debug_printf(ctx, "(%d,%d)->(%d,%d)->(%d,%d)->(%d,%d)\n", (int)points[route.best_point[0]].x, (int)points[route.best_point[0]].y, (int)points[route.best_point[1]].x, (int)points[route.best_point[1]].y, (int)points[route.best_point[2]].x, (int)points[route.best_point[2]].y, (int)points[route.best_point[3]].x, (int)points[route.best_point[3]].y); } #endif } #ifdef WARP_DEBUG /* Mark up the src (again, for debugging) */ { fz_device *dev = fz_new_draw_device(ctx, fz_identity, src); fz_stroke_state *stroke = fz_new_stroke_state(ctx); float col = 1; fz_color_params params = { FZ_RI_PERCEPTUAL }; #ifdef WARP_SPEW_DEBUG for (x = 0; x < num_edges; x++) { char text[64]; fz_path *path = fz_new_path(ctx); fz_moveto(ctx, path, edge[x].x0, edge[x].y0); fz_lineto(ctx, path, edge[x].x1, edge[x].y1); fz_stroke_path(ctx, dev, path, stroke, fz_identity, fz_device_gray(ctx), &col, 1, params); fz_drop_path(ctx, path); debug_printf(ctx, "%d %d -> %d %d\n", edge[x].x0, edge[x].y0, edge[x].x1, edge[x].y1); sprintf(text, "line%d.png", x); fz_save_pixmap_as_png(ctx, src, text); } #endif stroke->linewidth *= 4; if (route.best_score >= 0) { fz_path *path = fz_new_path(ctx); fz_moveto(ctx, path, points[route.best_point[0]].x, points[route.best_point[0]].y); fz_lineto(ctx, path, points[route.best_point[1]].x, points[route.best_point[1]].y); fz_lineto(ctx, path, points[route.best_point[2]].x, points[route.best_point[2]].y); fz_lineto(ctx, path, points[route.best_point[3]].x, points[route.best_point[3]].y); fz_closepath(ctx, path); fz_stroke_path(ctx, dev, path, stroke, fz_identity, fz_device_gray(ctx), &col, 1, params); fz_drop_path(ctx, path); } fz_drop_stroke_state(ctx, stroke); fz_close_device(ctx, dev); fz_drop_device(ctx, dev); } #endif fz_free(ctx, hough); if (route.best_score == -1) return 0; if (corners) { corners->ul.x = points[route.best_point[0]].x; corners->ul.y = points[route.best_point[0]].y; corners->ur.x = points[route.best_point[1]].x; corners->ur.y = points[route.best_point[1]].y; corners->ll.x = points[route.best_point[2]].x; corners->ll.y = points[route.best_point[2]].y; corners->lr.x = points[route.best_point[3]].x; corners->lr.y = points[route.best_point[3]].y; } /* Discard any possible matches that aren't at least 1/8 of the pixmap. */ { fz_rect r = fz_empty_rect; if (corners) { r = fz_include_point_in_rect(r, corners->ul); r = fz_include_point_in_rect(r, corners->ur); r = fz_include_point_in_rect(r, corners->ll); r = fz_include_point_in_rect(r, corners->lr); } if ((r.x1 - r.x0) * (r.y1 - r.y0) * 8 < (src->w * src->h)) return 0; } return 1; } #define DOC_DETECT_MAXDIM 500 int fz_detect_document(fz_context *ctx, fz_quad *points, fz_pixmap *orig_src) { fz_color_params p = {FZ_RI_PERCEPTUAL }; fz_pixmap *grey = NULL; fz_pixmap *src = NULL; #ifdef DETECT_DOCUMENT_RGB fz_pixmap *r = NULL; fz_pixmap *g = NULL; fz_pixmap *b = NULL; #endif fz_pixmap *processed = NULL; int i; int found = 0; /* Gauss function has std deviation of ~sqr(2), so a variance of * 2. Apply it twice and we get twice that. So: * n stddev variance * 1 1.4 2 * 2 2 4 * 3 2.8 8 * 4 4 16 * 5 5.6 32 * 6 8 64 * 7 11.3 128 * 8 16 256 * 9 22.6 512 * 10 32 1024 * 11 45 2048 * * The stddev is also known as the "width" by some sources. * * Our testing indicates that a width of 30ish works well for a * image with it's major axis being ~3k pixels. So figure on a * width of about 1% of the major axis dimension. * * By subsampling the incoming image, we can reduce the work we * do in the gauss phase, as we get to use a smaller width. */ int n = 10; /* Based on DOC_DETECT_MAXDIM */ int l2factor = 0; fz_var(src); fz_var(grey); #ifdef DETECT_DOCUMENT_RGB fz_var(r); fz_var(g); fz_var(b); #endif fz_var(processed); fz_try(ctx) { START_TIME(); { int maxdim = orig_src->w > orig_src->h ? orig_src->w : orig_src->h; while (maxdim > DOC_DETECT_MAXDIM) maxdim >>= 1, l2factor++; START_TIME(); if (l2factor == 0) src = fz_keep_pixmap(ctx, orig_src); else { src = fz_clone_pixmap(ctx, orig_src); fz_subsample_pixmap(ctx, src, l2factor); } END_TIME("subsample"); } START_TIME(); if (src->n == 1 && src->alpha == 0) { if (src == orig_src) grey = fz_clone_pixmap(ctx, src); else grey = fz_keep_pixmap(ctx, src); } else { grey = fz_convert_pixmap(ctx, src, fz_default_gray(ctx, NULL), NULL, NULL, p, 0); } END_TIME("clone"); START_TIME(); for (i = 0; i < n; i++) gauss5x5(ctx, grey); END_TIME("gauss grey"); #ifdef WARP_DEBUG fz_save_pixmap_as_png(ctx, grey, "gauss.png"); #endif #ifdef DO_HISTEQ START_TIME(); histeq(grey); END_TIME("histeq grey"); #ifdef WARP_DEBUG fz_save_pixmap_as_png(ctx, grey, "hist.png"); #endif #endif START_TIME(); grad(ctx, grey, pregrad(ctx, grey)); END_TIME("grad grey"); #ifdef WARP_DEBUG fz_save_pixmap_as_png(ctx, grey, "grad.png"); #endif #ifdef DETECT_DOCUMENT_RGB if (src->n == 3 && src->alpha == 0) { START_TIME(); r = fz_new_pixmap(ctx, NULL, src->w, src->h, NULL, 0); g = fz_new_pixmap(ctx, NULL, src->w, src->h, NULL, 0); b = fz_new_pixmap(ctx, NULL, src->w, src->h, NULL, 0); gauss5x5_3(ctx, r, src, 0); for (i = 1; i < n; i++) gauss5x5(ctx, r); gauss5x5_3(ctx, g, src, 1); for (i = 1; i < n; i++) gauss5x5(ctx, g); gauss5x5_3(ctx, b, src, 2); for (i = 1; i < n; i++) gauss5x5(ctx, b); #ifdef DO_HISTEQ histeq(r); histeq(g); histeq(b); #endif grad(ctx, r); grad(ctx, g); grad(ctx, b); #ifdef WARP_DEBUG fz_save_pixmap_as_png(ctx, r, "r.png"); fz_save_pixmap_as_png(ctx, g, "g.png"); fz_save_pixmap_as_png(ctx, b, "b.png"); #endif combine_grad(grey, r, g, b); END_TIME("rgb"); fz_drop_pixmap(ctx, r); fz_drop_pixmap(ctx, g); fz_drop_pixmap(ctx, b); r = NULL; g = NULL; b = NULL; } #ifdef WARP_DEBUG fz_save_pixmap_as_png(ctx, grey, "combined.png"); #endif #endif processed = fz_new_pixmap(ctx, fz_device_gray(ctx), grey->w, grey->h, NULL, 0); /* Do multiple passes if required, dropping the thresholds for * strong/weak pixels each time until we find a suitable result. */ for (i = 0; i < 6; i++) { START_TIME(); nonmax(ctx, processed, grey, i); END_TIME("nonmax"); #ifdef WARP_DEBUG fz_save_pixmap_as_png(ctx, processed, "nonmax.png"); #endif START_TIME(); hysteresis(ctx, processed); END_TIME("hysteresis"); #ifdef WARP_DEBUG fz_save_pixmap_as_png(ctx, processed, "hysteresis.png"); #endif START_TIME(); found = make_hough(ctx, processed, points); END_TIME("total hough"); END_TIME("Total time"); DUMP_TIMES(); #ifdef WARP_DEBUG clean(ctx, processed); fz_save_pixmap_as_png(ctx, processed, "out.png"); #endif if (found) break; } } fz_always(ctx) { fz_drop_pixmap(ctx, src); #ifdef DETECT_DOCUMENT_RGB fz_drop_pixmap(ctx, r); fz_drop_pixmap(ctx, g); fz_drop_pixmap(ctx, b); #endif fz_drop_pixmap(ctx, grey); fz_drop_pixmap(ctx, processed); } fz_catch(ctx) { fz_rethrow(ctx); } if (found && points) { float f = (1<<l2factor); float r = f/2; points->ul.x = points->ul.x *f + r; points->ul.y = points->ul.y *f + r; points->ur.x = points->ur.x *f + r; points->ur.y = points->ur.y *f + r; points->ll.x = points->ll.x *f + r; points->ll.y = points->ll.y *f + r; points->lr.x = points->lr.x *f + r; points->lr.y = points->lr.y *f + r; } return found; }
