Mercurial > hgrepos > Python2 > PyMuPDF
diff mupdf-source/source/fitz/draw-scale-simple.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mupdf-source/source/fitz/draw-scale-simple.c Mon Sep 15 11:43:07 2025 +0200 @@ -0,0 +1,1897 @@ +// Copyright (C) 2004-2025 Artifex Software, Inc. +// +// This file is part of MuPDF. +// +// MuPDF is free software: you can redistribute it and/or modify it under the +// terms of the GNU Affero General Public License as published by the Free +// Software Foundation, either version 3 of the License, or (at your option) +// any later version. +// +// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more +// details. +// +// You should have received a copy of the GNU Affero General Public License +// along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html> +// +// Alternative licensing terms are available from the licensor. +// For commercial licensing, see <https://www.artifex.com/> or contact +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. + +/* +This code does smooth scaling of a pixmap. + +This function returns a new pixmap representing the area starting at (0,0) +given by taking the source pixmap src, scaling it to width w, and height h, +and then positioning it at (frac(x),frac(y)). + +This is a cut-down version of draw_scale.c that only copes with filters +that return values strictly in the 0..1 range, and uses bytes for +intermediate results rather than ints. +*/ + +#include "mupdf/fitz.h" + +#include "draw-imp.h" +#include "pixmap-imp.h" + +#include <math.h> +#include <string.h> +#include <assert.h> +#include <limits.h> + +/* Do we special case handling of single pixel high/wide images? The + * 'purest' handling is given by not special casing them, but certain + * files that use such images 'stack' them to give full images. Not + * special casing them results in then being fainter and giving noticeable + * rounding errors. + */ +#define SINGLE_PIXEL_SPECIALS + +/* +Consider a row of source samples, src, of width src_w, positioned at x, +scaled to width dst_w. + +src[i] is centred at: x + (i + 0.5)*dst_w/src_w + +Therefore the distance between the centre of the jth output pixel and +the centre of the ith source sample is: + +dist[j,i] = j + 0.5 - (x + (i + 0.5)*dst_w/src_w) + +When scaling up, therefore: + +dst[j] = SUM(filter(dist[j,i]) * src[i]) + (for all ints i) + +This can be simplified by noticing that filters are only non zero within +a given filter width (henceforth called W). So: + +dst[j] = SUM(filter(dist[j,i]) * src[i]) + (for ints i, s.t. (j*src_w/dst_w)-W < i < (j*src_w/dst_w)+W) + +When scaling down, each filtered source sample is stretched to be wider +to avoid aliasing issues. This effectively reduces the distance between +centres. + +dst[j] = SUM(filter(dist[j,i] * F) * F * src[i]) + (where F = dst_w/src_w) + (for ints i, s.t. (j-W)/F < i < (j+W)/F) + +*/ + +typedef struct fz_scale_filter +{ + int width; + float (*fn)(struct fz_scale_filter *, float); +} fz_scale_filter; + +/* Image scale filters */ + +static float +triangle(fz_scale_filter *filter, float f) +{ + if (f >= 1) + return 0; + return 1-f; +} + +static float +box(fz_scale_filter *filter, float f) +{ + if (f >= 0.5f) + return 0; + return 1; +} + +static float +simple(fz_scale_filter *filter, float x) +{ + if (x >= 1) + return 0; + return 1 + (2*x - 3)*x*x; +} + +fz_scale_filter fz_scale_filter_box = { 1, box }; +fz_scale_filter fz_scale_filter_triangle = { 1, triangle }; +fz_scale_filter fz_scale_filter_simple = { 1, simple }; + +/* +We build ourselves a set of tables to contain the precalculated weights +for a given set of scale settings. + +The first dst_w entries in index are the index into index of the +sets of weight for each destination pixel. + +Each of the sets of weights is a set of values consisting of: + the minimum source pixel index used for this destination pixel + the number of weights used for this destination pixel + the weights themselves + +So to calculate dst[i] we do the following: + + weights = &index[index[i]]; + min = *weights++; + len = *weights++; + dst[i] = 0; + while (--len > 0) + dst[i] += src[min++] * *weights++ + +in addition, we guarantee that at the end of this process weights will now +point to the weights value for dst pixel i+1. + +In the simplest version of this algorithm, we would scale the whole image +horizontally first into a temporary buffer, then scale that temporary +buffer again vertically to give us our result. Using such a simple +algorithm would mean that could use the same style of weights for both +horizontal and vertical scaling. + +Unfortunately, this would also require a large temporary buffer, +particularly in the case where we are scaling up. + +We therefore modify the algorithm as follows; we scale scanlines from the +source image horizontally into a temporary buffer, until we have all the +contributors for a given output scanline. We then produce that output +scanline from the temporary buffer. In this way we restrict the height +of the temporary buffer to a small fraction of the final size. + +Unfortunately, this means that the pseudo code for recombining a +scanline of fully scaled pixels is as follows: + + weights = &index[index[y]]; + min = *weights++; + len = *weights++; + for (x=0 to dst_w) + min2 = min + len2 = len + weights2 = weights + dst[x] = 0; + while (--len2 > 0) + dst[x] += temp[x][(min2++) % tmp_buf_height] * *weights2++ + +i.e. it requires a % operation for every source pixel - this is typically +expensive. + +To avoid this, we alter the order in which vertical weights are stored, +so that they are ordered in the same order as the temporary buffer lines +would appear. This simplifies the algorithm to: + + weights = &index[index[y]]; + min = *weights++; + len = *weights++; + for (x=0 to dst_w) + min2 = 0 + len2 = len + weights2 = weights + dst[x] = 0; + while (--len2 > 0) + dst[x] += temp[i][min2++] * *weights2++ + +This means that len may be larger than it needs to be (due to the +possible inclusion of a zero weight row or two), but in practise this +is only an increase of 1 or 2 at worst. + +We implement this by generating the weights as normal (but ensuring we +leave enough space) and then reordering afterwards. + +*/ + +/* This structure is accessed from ARM code - bear this in mind before + * altering it! */ +typedef struct +{ + int flip; /* true if outputting reversed */ + int count; /* number of output pixels we have records for in this table */ + int max_len; /* Maximum number of weights for any one output pixel */ + int n; /* number of components (src->n) */ + int new_line; /* True if no weights for the current output pixel */ + int patch_l; /* How many output pixels we skip over */ + int index[FZ_FLEXIBLE_ARRAY]; +} fz_weights; + +struct fz_scale_cache +{ + int src_w; + float x; + float dst_w; + fz_scale_filter *filter; + int vertical; + int dst_w_int; + int patch_l; + int patch_r; + int n; + int flip; + fz_weights *weights; +}; + +static fz_weights * +new_weights(fz_context *ctx, fz_scale_filter *filter, int src_w, float dst_w, int patch_w, int n, int flip, int patch_l) +{ + int max_len; + fz_weights *weights; + + if (src_w > dst_w) + { + /* Scaling down, so there will be a maximum of + * 2*filterwidth*src_w/dst_w src pixels + * contributing to each dst pixel. */ + max_len = (int)ceilf((2 * filter->width * src_w)/dst_w); + if (max_len > src_w) + max_len = src_w; + } + else + { + /* Scaling up, so there will be a maximum of + * 2*filterwidth src pixels contributing to each dst pixel. + */ + max_len = 2 * filter->width; + } + /* We need the size of the struct, + * plus patch_w*sizeof(int) for the index + * plus (2+max_len)*sizeof(int) for the weights + * plus room for an extra set of weights for reordering. + */ + weights = fz_malloc_flexible(ctx, fz_weights, index, (max_len+3) * (patch_w+1)); + if (!weights) + return NULL; + weights->count = -1; + weights->max_len = max_len; + weights->index[0] = patch_w; + weights->n = n; + weights->patch_l = patch_l; + weights->flip = flip; + return weights; +} + +/* j is destination pixel in the patch_l..patch_l+patch_w range */ +static void +init_weights(fz_weights *weights, int j) +{ + int index; + + j -= weights->patch_l; + assert(weights->count == j-1); + weights->count++; + weights->new_line = 1; + if (j == 0) + index = weights->index[0]; + else + { + index = weights->index[j-1]; + index += 2 + weights->index[index+1]; + } + weights->index[j] = index; /* row pointer */ + weights->index[index] = 0; /* min */ + weights->index[index+1] = 0; /* len */ +} + +static void +insert_weight(fz_weights *weights, int j, int i, int weight) +{ + int min, len, index; + + /* Move j from patch_l...patch_l+patch_w range to 0..patch_w range */ + j -= weights->patch_l; + if (weights->new_line) + { + /* New line */ + weights->new_line = 0; + index = weights->index[j]; /* row pointer */ + weights->index[index] = i; /* min */ + weights->index[index+1] = 0; /* len */ + } + index = weights->index[j]; + min = weights->index[index++]; + len = weights->index[index++]; + while (i < min) + { + /* This only happens in rare cases, but we need to insert + * one earlier. In exceedingly rare cases we may need to + * insert more than one earlier. */ + int k; + + for (k = len; k > 0; k--) + { + weights->index[index+k] = weights->index[index+k-1]; + } + weights->index[index] = 0; + min--; + len++; + weights->index[index-2] = min; + weights->index[index-1] = len; + } + if (i-min >= len) + { + /* The usual case */ + while (i-min >= ++len) + { + weights->index[index+len-1] = 0; + } + assert(len-1 == i-min); + weights->index[index+i-min] = weight; + weights->index[index-1] = len; + assert(len <= weights->max_len); + } + else + { + /* Infrequent case */ + weights->index[index+i-min] += weight; + } +} + +static void +add_weight(fz_weights *weights, int j, int i, fz_scale_filter *filter, + float x, float F, float G, int src_w, float dst_w) +{ + float dist = j - x + 0.5f - ((i + 0.5f)*dst_w/src_w); + float f; + int weight; + + dist *= G; + if (dist < 0) + dist = -dist; + f = filter->fn(filter, dist)*F; + weight = (int)(256*f+0.5f); + + /* Ensure i is in range */ + if (i < 0 || i >= src_w) + return; + if (weight != 0) + insert_weight(weights, j, i, weight); +} + +static void +reorder_weights(fz_weights *weights, int j, int src_w) +{ + int idx = weights->index[j - weights->patch_l]; + int min = weights->index[idx++]; + int len = weights->index[idx++]; + int max = weights->max_len; + int tmp = idx+max; + int i, off; + + /* Copy into the temporary area */ + memcpy(&weights->index[tmp], &weights->index[idx], sizeof(int)*len); + + /* Pad out if required */ + assert(len <= max); + assert(min+len <= src_w); + off = 0; + if (len < max) + { + memset(&weights->index[tmp+len], 0, sizeof(int)*(max-len)); + len = max; + if (min + len > src_w) + { + off = min + len - src_w; + min = src_w - len; + weights->index[idx-2] = min; + } + weights->index[idx-1] = len; + } + + /* Copy back into the proper places */ + for (i = 0; i < len; i++) + { + weights->index[idx+((min+i+off) % max)] = weights->index[tmp+i]; + } +} + +/* Due to rounding and edge effects, the sums for the weights sometimes don't + * add up to 256. This causes visible rendering effects. Therefore, we take + * pains to ensure that they 1) never exceed 256, and 2) add up to exactly + * 256 for all pixels that are completely covered. See bug #691629. */ +static void +check_weights(fz_weights *weights, int j, int w, float x, float wf) +{ + int idx, len; + int sum = 0; + int max = -256; + int maxidx = 0; + int i; + + idx = weights->index[j - weights->patch_l]; + idx++; /* min */ + len = weights->index[idx++]; + + for(i=0; i < len; i++) + { + int v = weights->index[idx++]; + sum += v; + if (v > max) + { + max = v; + maxidx = idx; + } + } + /* If we aren't the first or last pixel, OR if the sum is too big + * then adjust it. */ + if (((j != 0) && (j != w-1)) || (sum > 256)) + weights->index[maxidx-1] += 256-sum; + /* Otherwise, if we are the first pixel, and it's fully covered, then + * adjust it. */ + else if ((j == 0) && (x < 0.0001f) && (sum != 256)) + weights->index[maxidx-1] += 256-sum; + /* Finally, if we are the last pixel, and it's fully covered, then + * adjust it. */ + else if ((j == w-1) && (w - wf < 0.0001f) && (sum != 256)) + weights->index[maxidx-1] += 256-sum; +} + +static int +window_fix(int l, int *rp, float window, float centre) +{ + int r = *rp; + while (centre - l > window) + l++; + while (r - centre > window) + r--; + *rp = r; + return l; +} + +static fz_weights * +make_weights(fz_context *ctx, int src_w, float x, float dst_w, fz_scale_filter *filter, int vertical, int dst_w_int, int patch_l, int patch_r, int n, int flip, fz_scale_cache *cache) +{ + fz_weights *weights; + float F, G; + float window; + int j; + + if (cache) + { + if (cache->src_w == src_w && cache->x == x && cache->dst_w == dst_w && + cache->filter == filter && cache->vertical == vertical && + cache->dst_w_int == dst_w_int && + cache->patch_l == patch_l && cache->patch_r == patch_r && + cache->n == n && cache->flip == flip) + { + return cache->weights; + } + cache->src_w = src_w; + cache->x = x; + cache->dst_w = dst_w; + cache->filter = filter; + cache->vertical = vertical; + cache->dst_w_int = dst_w_int; + cache->patch_l = patch_l; + cache->patch_r = patch_r; + cache->n = n; + cache->flip = flip; + fz_free(ctx, cache->weights); + cache->weights = NULL; + } + + if (dst_w < src_w) + { + /* Scaling down */ + F = dst_w / src_w; + G = 1; + } + else + { + /* Scaling up */ + F = 1; + G = src_w / dst_w; + } + window = filter->width / F; + weights = new_weights(ctx, filter, src_w, dst_w, patch_r-patch_l, n, flip, patch_l); + if (!weights) + return NULL; + for (j = patch_l; j < patch_r; j++) + { + /* find the position of the centre of dst[j] in src space */ + float centre = (j - x + 0.5f)*src_w/dst_w - 0.5f; + int l, r; + l = ceilf(centre - window); + r = floorf(centre + window); + + /* Now, due to the vagaries of floating point, if centre is large, l + * and r can actually end up further than 2*window apart. All we care + * about in this case is that we don't crash! We want a cheap correction + * that avoids the assert and doesn't cost too much in the normal case. + * This should do. */ + if (r - l > 2 * window) + l = window_fix(l, &r, window, centre); + + init_weights(weights, j); + for (; l <= r; l++) + { + add_weight(weights, j, l, filter, x, F, G, src_w, dst_w); + } + if (weights->new_line) + { + /* In very rare cases (bug 706764) we might not actually + * have generated any non-zero weights for this destination + * pixel. Just use the central pixel. */ + int src_x = floorf(centre); + if (src_x >= src_w) + src_x = src_w-1; + if (src_x < 0) + src_x = 0; + insert_weight(weights, j, src_x, 1); + } + check_weights(weights, j, dst_w_int, x, dst_w); + if (vertical) + { + reorder_weights(weights, j, src_w); + } + } + weights->count++; /* weights->count = dst_w_int now */ + if (cache) + { + cache->weights = weights; + } + return weights; +} + +static void +scale_row_to_temp(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights) +{ + const int *contrib = &weights->index[weights->index[0]]; + int len, i, j, n; + const unsigned char *min; + int tmp[FZ_MAX_COLORS]; + int *t = tmp; + + n = weights->n; + for (j = 0; j < n; j++) + tmp[j] = 128; + if (weights->flip) + { + dst += (weights->count-1)*n; + for (i=weights->count; i > 0; i--) + { + min = &src[n * *contrib++]; + len = *contrib++; + while (len-- > 0) + { + for (j = n; j > 0; j--) + *t++ += *min++ * *contrib; + t -= n; + contrib++; + } + for (j = n; j > 0; j--) + { + *dst++ = (unsigned char)(*t>>8); + *t++ = 128; + } + t -= n; + dst -= n*2; + } + } + else + { + for (i=weights->count; i > 0; i--) + { + min = &src[n * *contrib++]; + len = *contrib++; + while (len-- > 0) + { + for (j = n; j > 0; j--) + *t++ += *min++ * *contrib; + t -= n; + contrib++; + } + for (j = n; j > 0; j--) + { + *dst++ = (unsigned char)(*t>>8); + *t++ = 128; + } + t -= n; + } + } +} + +#ifdef ARCH_ARM + +static void +scale_row_to_temp1(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights) +__attribute__((naked)); + +static void +scale_row_to_temp2(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights) +__attribute__((naked)); + +static void +scale_row_to_temp3(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights) +__attribute__((naked)); + +static void +scale_row_to_temp4(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights) +__attribute__((naked)); + +static void +scale_row_from_temp(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights, int width, int n, int row) +__attribute__((naked)); + +static void +scale_row_from_temp_alpha(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights, int width, int n, int row) +__attribute__((naked)); + +static void +scale_row_to_temp1(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights) +{ + asm volatile( + ENTER_ARM + ".syntax unified\n" + "stmfd r13!,{r4-r7,r9,r14} \n" + "@ r0 = dst \n" + "@ r1 = src \n" + "@ r2 = weights \n" + "ldr r12,[r2],#4 @ r12= flip \n" + "ldr r3, [r2],#20 @ r3 = count r2 = &index\n" + "ldr r4, [r2] @ r4 = index[0] \n" + "cmp r12,#0 @ if (flip) \n" + "beq 5f @ { \n" + "add r2, r2, r4, LSL #2 @ r2 = &index[index[0]] \n" + "add r0, r0, r3 @ dst += count \n" + "1: \n" + "ldr r4, [r2], #4 @ r4 = *contrib++ \n" + "ldr r9, [r2], #4 @ r9 = len = *contrib++ \n" + "mov r5, #128 @ r5 = a = 128 \n" + "add r4, r1, r4 @ r4 = min = &src[r4] \n" + "subs r9, r9, #1 @ len-- \n" + "blt 3f @ while (len >= 0) \n" + "2: @ { \n" + "ldrgt r6, [r2], #4 @ r6 = *contrib++ \n" + "ldrbgt r7, [r4], #1 @ r7 = *min++ \n" + "ldr r12,[r2], #4 @ r12 = *contrib++ \n" + "ldrb r14,[r4], #1 @ r14 = *min++ \n" + "mlagt r5, r6, r7, r5 @ g += r6 * r7 \n" + "subs r9, r9, #2 @ r9 = len -= 2 \n" + "mla r5, r12,r14,r5 @ g += r14 * r12 \n" + "bge 2b @ } \n" + "3: \n" + "mov r5, r5, lsr #8 @ g >>= 8 \n" + "strb r5,[r0, #-1]! @ *--dst=a \n" + "subs r3, r3, #1 @ i-- \n" + "bgt 1b @ \n" + "ldmfd r13!,{r4-r7,r9,PC} @ pop, return to thumb \n" + "5:" + "add r2, r2, r4, LSL #2 @ r2 = &index[index[0]] \n" + "6:" + "ldr r4, [r2], #4 @ r4 = *contrib++ \n" + "ldr r9, [r2], #4 @ r9 = len = *contrib++ \n" + "mov r5, #128 @ r5 = a = 128 \n" + "add r4, r1, r4 @ r4 = min = &src[r4] \n" + "subs r9, r9, #1 @ len-- \n" + "blt 9f @ while (len > 0) \n" + "7: @ { \n" + "ldrgt r6, [r2], #4 @ r6 = *contrib++ \n" + "ldrbgt r7, [r4], #1 @ r7 = *min++ \n" + "ldr r12,[r2], #4 @ r12 = *contrib++ \n" + "ldrb r14,[r4], #1 @ r14 = *min++ \n" + "mlagt r5, r6,r7,r5 @ a += r6 * r7 \n" + "subs r9, r9, #2 @ r9 = len -= 2 \n" + "mla r5, r12,r14,r5 @ a += r14 * r12 \n" + "bge 7b @ } \n" + "9: \n" + "mov r5, r5, LSR #8 @ a >>= 8 \n" + "strb r5, [r0], #1 @ *dst++=a \n" + "subs r3, r3, #1 @ i-- \n" + "bgt 6b @ \n" + "ldmfd r13!,{r4-r7,r9,PC} @ pop, return to thumb \n" + ENTER_THUMB + ); +} + +static void +scale_row_to_temp2(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights) +{ + asm volatile( + ENTER_ARM + "stmfd r13!,{r4-r6,r9-r11,r14} \n" + "@ r0 = dst \n" + "@ r1 = src \n" + "@ r2 = weights \n" + "ldr r12,[r2],#4 @ r12= flip \n" + "ldr r3, [r2],#20 @ r3 = count r2 = &index\n" + "ldr r4, [r2] @ r4 = index[0] \n" + "cmp r12,#0 @ if (flip) \n" + "beq 4f @ { \n" + "add r2, r2, r4, LSL #2 @ r2 = &index[index[0]] \n" + "add r0, r0, r3, LSL #1 @ dst += 2*count \n" + "1: \n" + "ldr r4, [r2], #4 @ r4 = *contrib++ \n" + "ldr r9, [r2], #4 @ r9 = len = *contrib++ \n" + "mov r5, #128 @ r5 = g = 128 \n" + "mov r6, #128 @ r6 = a = 128 \n" + "add r4, r1, r4, LSL #1 @ r4 = min = &src[2*r4] \n" + "cmp r9, #0 @ while (len-- > 0) \n" + "beq 3f @ { \n" + "2: \n" + "ldr r14,[r2], #4 @ r14 = *contrib++ \n" + "ldrb r11,[r4], #1 @ r11 = *min++ \n" + "ldrb r12,[r4], #1 @ r12 = *min++ \n" + "subs r9, r9, #1 @ r9 = len-- \n" + "mla r5, r14,r11,r5 @ g += r11 * r14 \n" + "mla r6, r14,r12,r6 @ a += r12 * r14 \n" + "bgt 2b @ } \n" + "3: \n" + "mov r5, r5, lsr #8 @ g >>= 8 \n" + "mov r6, r6, lsr #8 @ a >>= 8 \n" + "strb r5, [r0, #-2]! @ *--dst=a \n" + "strb r6, [r0, #1] @ *--dst=g \n" + "subs r3, r3, #1 @ i-- \n" + "bgt 1b @ \n" + "ldmfd r13!,{r4-r6,r9-r11,PC} @ pop, return to thumb \n" + "4:" + "add r2, r2, r4, LSL #2 @ r2 = &index[index[0]] \n" + "5:" + "ldr r4, [r2], #4 @ r4 = *contrib++ \n" + "ldr r9, [r2], #4 @ r9 = len = *contrib++ \n" + "mov r5, #128 @ r5 = g = 128 \n" + "mov r6, #128 @ r6 = a = 128 \n" + "add r4, r1, r4, LSL #1 @ r4 = min = &src[2*r4] \n" + "cmp r9, #0 @ while (len-- > 0) \n" + "beq 7f @ { \n" + "6: \n" + "ldr r14,[r2], #4 @ r10 = *contrib++ \n" + "ldrb r11,[r4], #1 @ r11 = *min++ \n" + "ldrb r12,[r4], #1 @ r12 = *min++ \n" + "subs r9, r9, #1 @ r9 = len-- \n" + "mla r5, r14,r11,r5 @ g += r11 * r14 \n" + "mla r6, r14,r12,r6 @ a += r12 * r14 \n" + "bgt 6b @ } \n" + "7: \n" + "mov r5, r5, lsr #8 @ g >>= 8 \n" + "mov r6, r6, lsr #8 @ a >>= 8 \n" + "strb r5, [r0], #1 @ *dst++=g \n" + "strb r6, [r0], #1 @ *dst++=a \n" + "subs r3, r3, #1 @ i-- \n" + "bgt 5b @ \n" + "ldmfd r13!,{r4-r6,r9-r11,PC} @ pop, return to thumb \n" + ENTER_THUMB + ); +} + +static void +scale_row_to_temp3(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights) +{ + asm volatile( + ENTER_ARM + "stmfd r13!,{r4-r11,r14} \n" + "@ r0 = dst \n" + "@ r1 = src \n" + "@ r2 = weights \n" + "ldr r12,[r2],#4 @ r12= flip \n" + "ldr r3, [r2],#20 @ r3 = count r2 = &index\n" + "ldr r4, [r2] @ r4 = index[0] \n" + "cmp r12,#0 @ if (flip) \n" + "beq 4f @ { \n" + "add r2, r2, r4, LSL #2 @ r2 = &index[index[0]] \n" + "add r0, r0, r3, LSL #1 @ \n" + "add r0, r0, r3 @ dst += 3*count \n" + "1: \n" + "ldr r4, [r2], #4 @ r4 = *contrib++ \n" + "ldr r9, [r2], #4 @ r9 = len = *contrib++ \n" + "mov r5, #128 @ r5 = r = 128 \n" + "mov r6, #128 @ r6 = g = 128 \n" + "add r7, r1, r4, LSL #1 @ \n" + "add r4, r7, r4 @ r4 = min = &src[3*r4] \n" + "mov r7, #128 @ r7 = b = 128 \n" + "cmp r9, #0 @ while (len-- > 0) \n" + "beq 3f @ { \n" + "2: \n" + "ldr r14,[r2], #4 @ r14 = *contrib++ \n" + "ldrb r8, [r4], #1 @ r8 = *min++ \n" + "ldrb r11,[r4], #1 @ r11 = *min++ \n" + "ldrb r12,[r4], #1 @ r12 = *min++ \n" + "subs r9, r9, #1 @ r9 = len-- \n" + "mla r5, r14,r8, r5 @ r += r8 * r14 \n" + "mla r6, r14,r11,r6 @ g += r11 * r14 \n" + "mla r7, r14,r12,r7 @ b += r12 * r14 \n" + "bgt 2b @ } \n" + "3: \n" + "mov r5, r5, lsr #8 @ r >>= 8 \n" + "mov r6, r6, lsr #8 @ g >>= 8 \n" + "mov r7, r7, lsr #8 @ b >>= 8 \n" + "strb r5, [r0, #-3]! @ *--dst=r \n" + "strb r6, [r0, #1] @ *--dst=g \n" + "strb r7, [r0, #2] @ *--dst=b \n" + "subs r3, r3, #1 @ i-- \n" + "bgt 1b @ \n" + "ldmfd r13!,{r4-r11,PC} @ pop, return to thumb \n" + "4:" + "add r2, r2, r4, LSL #2 @ r2 = &index[index[0]] \n" + "5:" + "ldr r4, [r2], #4 @ r4 = *contrib++ \n" + "ldr r9, [r2], #4 @ r9 = len = *contrib++ \n" + "mov r5, #128 @ r5 = r = 128 \n" + "mov r6, #128 @ r6 = g = 128 \n" + "add r7, r1, r4, LSL #1 @ r7 = min = &src[2*r4] \n" + "add r4, r7, r4 @ r4 = min = &src[3*r4] \n" + "mov r7, #128 @ r7 = b = 128 \n" + "cmp r9, #0 @ while (len-- > 0) \n" + "beq 7f @ { \n" + "6: \n" + "ldr r14,[r2], #4 @ r10 = *contrib++ \n" + "ldrb r8, [r4], #1 @ r8 = *min++ \n" + "ldrb r11,[r4], #1 @ r11 = *min++ \n" + "ldrb r12,[r4], #1 @ r12 = *min++ \n" + "subs r9, r9, #1 @ r9 = len-- \n" + "mla r5, r14,r8, r5 @ r += r8 * r14 \n" + "mla r6, r14,r11,r6 @ g += r11 * r14 \n" + "mla r7, r14,r12,r7 @ b += r12 * r14 \n" + "bgt 6b @ } \n" + "7: \n" + "mov r5, r5, lsr #8 @ r >>= 8 \n" + "mov r6, r6, lsr #8 @ g >>= 8 \n" + "mov r7, r7, lsr #8 @ b >>= 8 \n" + "strb r5, [r0], #1 @ *dst++=r \n" + "strb r6, [r0], #1 @ *dst++=g \n" + "strb r7, [r0], #1 @ *dst++=b \n" + "subs r3, r3, #1 @ i-- \n" + "bgt 5b @ \n" + "ldmfd r13!,{r4-r11,PC} @ pop, return to thumb \n" + ENTER_THUMB + ); +} + +static void +scale_row_to_temp4(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights) +{ + asm volatile( + ENTER_ARM + "stmfd r13!,{r4-r11,r14} \n" + "@ r0 = dst \n" + "@ r1 = src \n" + "@ r2 = weights \n" + "ldr r12,[r2],#4 @ r12= flip \n" + "ldr r3, [r2],#20 @ r3 = count r2 = &index\n" + "ldr r4, [r2] @ r4 = index[0] \n" + "ldr r5,=0x00800080 @ r5 = rounding \n" + "ldr r6,=0x00FF00FF @ r7 = 0x00FF00FF \n" + "cmp r12,#0 @ if (flip) \n" + "beq 4f @ { \n" + "add r2, r2, r4, LSL #2 @ r2 = &index[index[0]] \n" + "add r0, r0, r3, LSL #2 @ dst += 4*count \n" + "1: \n" + "ldr r4, [r2], #4 @ r4 = *contrib++ \n" + "ldr r9, [r2], #4 @ r9 = len = *contrib++ \n" + "mov r7, r5 @ r7 = b = rounding \n" + "mov r8, r5 @ r8 = a = rounding \n" + "add r4, r1, r4, LSL #2 @ r4 = min = &src[4*r4] \n" + "cmp r9, #0 @ while (len-- > 0) \n" + "beq 3f @ { \n" + "2: \n" + "ldr r11,[r4], #4 @ r11 = *min++ \n" + "ldr r10,[r2], #4 @ r10 = *contrib++ \n" + "subs r9, r9, #1 @ r9 = len-- \n" + "and r12,r6, r11 @ r12 = __22__00 \n" + "and r11,r6, r11,LSR #8 @ r11 = __33__11 \n" + "mla r7, r10,r12,r7 @ b += r14 * r10 \n" + "mla r8, r10,r11,r8 @ a += r11 * r10 \n" + "bgt 2b @ } \n" + "3: \n" + "and r7, r6, r7, lsr #8 @ r7 = __22__00 \n" + "bic r8, r8, r6 @ r8 = 33__11__ \n" + "orr r7, r7, r8 @ r7 = 33221100 \n" + "str r7, [r0, #-4]! @ *--dst=r \n" + "subs r3, r3, #1 @ i-- \n" + "bgt 1b @ \n" + "ldmfd r13!,{r4-r11,PC} @ pop, return to thumb \n" + "4: \n" + "add r2, r2, r4, LSL #2 @ r2 = &index[index[0]] \n" + "5: \n" + "ldr r4, [r2], #4 @ r4 = *contrib++ \n" + "ldr r9, [r2], #4 @ r9 = len = *contrib++ \n" + "mov r7, r5 @ r7 = b = rounding \n" + "mov r8, r5 @ r8 = a = rounding \n" + "add r4, r1, r4, LSL #2 @ r4 = min = &src[4*r4] \n" + "cmp r9, #0 @ while (len-- > 0) \n" + "beq 7f @ { \n" + "6: \n" + "ldr r11,[r4], #4 @ r11 = *min++ \n" + "ldr r10,[r2], #4 @ r10 = *contrib++ \n" + "subs r9, r9, #1 @ r9 = len-- \n" + "and r12,r6, r11 @ r12 = __22__00 \n" + "and r11,r6, r11,LSR #8 @ r11 = __33__11 \n" + "mla r7, r10,r12,r7 @ b += r14 * r10 \n" + "mla r8, r10,r11,r8 @ a += r11 * r10 \n" + "bgt 6b @ } \n" + "7: \n" + "and r7, r6, r7, lsr #8 @ r7 = __22__00 \n" + "bic r8, r8, r6 @ r8 = 33__11__ \n" + "orr r7, r7, r8 @ r7 = 33221100 \n" + "str r7, [r0], #4 @ *dst++=r \n" + "subs r3, r3, #1 @ i-- \n" + "bgt 5b @ \n" + "ldmfd r13!,{r4-r11,PC} @ pop, return to thumb \n" + ENTER_THUMB + ); +} + +static void +scale_row_from_temp(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights, int width, int n, int row) +{ + asm volatile( + ENTER_ARM + "stmfd r13!,{r4-r11,r14} \n" + "@ r0 = dst \n" + "@ r1 = src \n" + "@ r2 = &weights->index[0] \n" + "@ r3 = width \n" + "@ r12= row \n" + "ldr r14,[r13,#4*9] @ r14= n \n" + "ldr r12,[r13,#4*10] @ r12= row \n" + "add r2, r2, #24 @ r2 = weights->index \n" + "mul r3, r14, r3 @ r3 = width *= n \n" + "ldr r4, [r2, r12, LSL #2] @ r4 = index[row] \n" + "add r2, r2, #4 @ r2 = &index[1] \n" + "subs r6, r3, #4 @ r6 = x = width-4 \n" + "ldr r14,[r2, r4, LSL #2]! @ r2 = contrib = index[index[row]+1]\n" + " @ r14= len = *contrib \n" + "blt 4f @ while (x >= 0) { \n" +#ifndef ARCH_UNALIGNED_OK + "tst r3, #3 @ if ((r3 & 3) \n" + "tsteq r1, #3 @ || (r1 & 3)) \n" + "bne 4f @ can't do fast code \n" +#endif + "ldr r9, =0x00FF00FF @ r9 = 0x00FF00FF \n" + "1: \n" + "ldr r7, =0x00800080 @ r5 = val0 = round \n" + "stmfd r13!,{r1,r2,r7} @ stash r1,r2,r5 \n" + " @ r1 = min = src \n" + " @ r2 = contrib2-4 \n" + "movs r8, r14 @ r8 = len2 = len \n" + "mov r5, r7 @ r7 = val1 = round \n" + "ble 3f @ while (len2-- > 0) { \n" + "2: \n" + "ldr r12,[r1], r3 @ r12 = *min r5 = min += width\n" + "ldr r10,[r2, #4]! @ r10 = *contrib2++ \n" + "subs r8, r8, #1 @ len2-- \n" + "and r11,r9, r12 @ r11= __22__00 \n" + "and r12,r9, r12,LSR #8 @ r12= __33__11 \n" + "mla r5, r10,r11,r5 @ r5 = val0 += r11 * r10\n" + "mla r7, r10,r12,r7 @ r7 = val1 += r12 * r10\n" + "bgt 2b @ } \n" + "and r5, r9, r5, LSR #8 @ r5 = __22__00 \n" + "and r7, r7, r9, LSL #8 @ r7 = 33__11__ \n" + "orr r5, r5, r7 @ r5 = 33221100 \n" + "3: \n" + "ldmfd r13!,{r1,r2,r7} @ restore r1,r2,r7 \n" + "subs r6, r6, #4 @ x-- \n" + "add r1, r1, #4 @ src++ \n" + "str r5, [r0], #4 @ *dst++ = val \n" + "bge 1b @ \n" + "4: @ } (Less than 4 to go) \n" + "adds r6, r6, #4 @ r6 = x += 4 \n" + "beq 8f @ if (x == 0) done \n" + "5: \n" + "mov r5, r1 @ r5 = min = src \n" + "mov r7, #128 @ r7 = val = 128 \n" + "movs r8, r14 @ r8 = len2 = len \n" + "add r9, r2, #4 @ r9 = contrib2 \n" + "ble 7f @ while (len2-- > 0) { \n" + "6: \n" + "ldr r10,[r9], #4 @ r10 = *contrib2++ \n" + "ldrb r12,[r5], r3 @ r12 = *min r5 = min += width\n" + "subs r8, r8, #1 @ len2-- \n" + "@ stall r12 \n" + "mla r7, r10,r12,r7 @ val += r12 * r10 \n" + "bgt 6b @ } \n" + "7: \n" + "mov r7, r7, asr #8 @ r7 = val >>= 8 \n" + "subs r6, r6, #1 @ x-- \n" + "add r1, r1, #1 @ src++ \n" + "strb r7, [r0], #1 @ *dst++ = val \n" + "bgt 5b @ \n" + "8: \n" + "ldmfd r13!,{r4-r11,PC} @ pop, return to thumb \n" + ".ltorg \n" + ENTER_THUMB + ); +} + +static void +scale_row_from_temp_alpha(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights, int width, int n, int row) +{ + asm volatile( + ENTER_ARM + "stmfd r13!,{r4-r11,r14} \n" + "mov r11,#255 @ r11= 255 \n" + "ldr r12,[r13,#4*10] @ r12= row \n" + "@ r0 = dst \n" + "@ r1 = src \n" + "@ r2 = &weights->index[0] \n" + "@ r3 = width \n" + "@ r11= 255 \n" + "@ r12= row \n" + "add r2, r2, #24 @ r2 = weights->index \n" + "ldr r4, [r2, r12, LSL #2] @ r4 = index[row] \n" + "add r2, r2, #4 @ r2 = &index[1] \n" + "mov r6, r3 @ r6 = x = width \n" + "ldr r14,[r2, r4, LSL #2]! @ r2 = contrib = index[index[row]+1]\n" + " @ r14= len = *contrib \n" + "5: \n" + "ldr r4,[r13,#4*9] @ r10= nn = n \n" + "1: \n" + "mov r5, r1 @ r5 = min = src \n" + "mov r7, #128 @ r7 = val = 128 \n" + "movs r8, r14 @ r8 = len2 = len \n" + "add r9, r2, #4 @ r9 = contrib2 \n" + "ble 7f @ while (len2-- > 0) { \n" + "6: \n" + "ldr r10,[r9], #4 @ r10 = *contrib2++ \n" + "ldrb r12,[r5], r3 @ r12 = *min r5 = min += width\n" + "subs r8, r8, #1 @ len2-- \n" + "@ stall r12 \n" + "mla r7, r10,r12,r7 @ val += r12 * r10 \n" + "bgt 6b @ } \n" + "7: \n" + "mov r7, r7, asr #8 @ r7 = val >>= 8 \n" + "subs r4, r4, #1 @ r4 = nn-- \n" + "add r1, r1, #1 @ src++ \n" + "strb r7, [r0], #1 @ *dst++ = val \n" + "bgt 1b @ \n" + "subs r6, r6, #1 @ x-- \n" + "strb r11,[r0], #1 @ *dst++ = 255 \n" + "bgt 5b @ \n" + "ldmfd r13!,{r4-r11,PC} @ pop, return to thumb \n" + ".ltorg \n" + ENTER_THUMB + ); +} +#else + +static void +scale_row_to_temp1(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights) +{ + const int *contrib = &weights->index[weights->index[0]]; + int len, i; + const unsigned char *min; + + assert(weights->n == 1); + if (weights->flip) + { + dst += weights->count; + for (i=weights->count; i > 0; i--) + { + int val = 128; + min = &src[*contrib++]; + len = *contrib++; + while (len-- > 0) + { + val += *min++ * *contrib++; + } + *--dst = (unsigned char)(val>>8); + } + } + else + { + for (i=weights->count; i > 0; i--) + { + int val = 128; + min = &src[*contrib++]; + len = *contrib++; + while (len-- > 0) + { + val += *min++ * *contrib++; + } + *dst++ = (unsigned char)(val>>8); + } + } +} + +static void +scale_row_to_temp2(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights) +{ + const int *contrib = &weights->index[weights->index[0]]; + int len, i; + const unsigned char *min; + + assert(weights->n == 2); + if (weights->flip) + { + dst += 2*weights->count; + for (i=weights->count; i > 0; i--) + { + int c1 = 128; + int c2 = 128; + min = &src[2 * *contrib++]; + len = *contrib++; + while (len-- > 0) + { + c1 += *min++ * *contrib; + c2 += *min++ * *contrib++; + } + *--dst = (unsigned char)(c2>>8); + *--dst = (unsigned char)(c1>>8); + } + } + else + { + for (i=weights->count; i > 0; i--) + { + int c1 = 128; + int c2 = 128; + min = &src[2 * *contrib++]; + len = *contrib++; + while (len-- > 0) + { + c1 += *min++ * *contrib; + c2 += *min++ * *contrib++; + } + *dst++ = (unsigned char)(c1>>8); + *dst++ = (unsigned char)(c2>>8); + } + } +} + +static void +scale_row_to_temp3(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights) +{ + const int *contrib = &weights->index[weights->index[0]]; + int len, i; + const unsigned char *min; + + assert(weights->n == 3); + if (weights->flip) + { + dst += 3*weights->count; + for (i=weights->count; i > 0; i--) + { + int c1 = 128; + int c2 = 128; + int c3 = 128; + min = &src[3 * *contrib++]; + len = *contrib++; + while (len-- > 0) + { + int c = *contrib++; + c1 += *min++ * c; + c2 += *min++ * c; + c3 += *min++ * c; + } + *--dst = (unsigned char)(c3>>8); + *--dst = (unsigned char)(c2>>8); + *--dst = (unsigned char)(c1>>8); + } + } + else + { + for (i=weights->count; i > 0; i--) + { + int c1 = 128; + int c2 = 128; + int c3 = 128; + min = &src[3 * *contrib++]; + len = *contrib++; + while (len-- > 0) + { + int c = *contrib++; + c1 += *min++ * c; + c2 += *min++ * c; + c3 += *min++ * c; + } + *dst++ = (unsigned char)(c1>>8); + *dst++ = (unsigned char)(c2>>8); + *dst++ = (unsigned char)(c3>>8); + } + } +} + +static void +scale_row_to_temp4(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights) +{ + const int *contrib = &weights->index[weights->index[0]]; + int len, i; + const unsigned char *min; + + assert(weights->n == 4); + if (weights->flip) + { + dst += 4*weights->count; + for (i=weights->count; i > 0; i--) + { + int r = 128; + int g = 128; + int b = 128; + int a = 128; + min = &src[4 * *contrib++]; + len = *contrib++; + while (len-- > 0) + { + r += *min++ * *contrib; + g += *min++ * *contrib; + b += *min++ * *contrib; + a += *min++ * *contrib++; + } + *--dst = (unsigned char)(a>>8); + *--dst = (unsigned char)(b>>8); + *--dst = (unsigned char)(g>>8); + *--dst = (unsigned char)(r>>8); + } + } + else + { + for (i=weights->count; i > 0; i--) + { + int r = 128; + int g = 128; + int b = 128; + int a = 128; + min = &src[4 * *contrib++]; + len = *contrib++; + while (len-- > 0) + { + r += *min++ * *contrib; + g += *min++ * *contrib; + b += *min++ * *contrib; + a += *min++ * *contrib++; + } + *dst++ = (unsigned char)(r>>8); + *dst++ = (unsigned char)(g>>8); + *dst++ = (unsigned char)(b>>8); + *dst++ = (unsigned char)(a>>8); + } + } +} + +static void +scale_row_from_temp(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights, int w, int n, int row) +{ + const int *contrib = &weights->index[weights->index[row]]; + int len, x; + int width = w * n; + + contrib++; /* Skip min */ + len = *contrib++; + for (x=width; x > 0; x--) + { + const unsigned char *min = src; + int val = 128; + int len2 = len; + const int *contrib2 = contrib; + + while (len2-- > 0) + { + val += *min * *contrib2++; + min += width; + } + *dst++ = (unsigned char)(val>>8); + src++; + } +} + +static void +scale_row_from_temp_alpha(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights, int w, int n, int row) +{ + const int *contrib = &weights->index[weights->index[row]]; + int len, x; + int width = w * n; + + contrib++; /* Skip min */ + len = *contrib++; + for (x=w; x > 0; x--) + { + int nn; + for (nn = n; nn > 0; nn--) + { + const unsigned char *min = src; + int val = 128; + int len2 = len; + const int *contrib2 = contrib; + + while (len2-- > 0) + { + val += *min * *contrib2++; + min += width; + } + *dst++ = (unsigned char)(val>>8); + src++; + } + *dst++ = 255; + } +} +#endif + +#ifdef SINGLE_PIXEL_SPECIALS +static void +duplicate_single_pixel(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, int n, int forcealpha, int w, int h, int stride) +{ + int i; + + for (i = n; i > 0; i--) + *dst++ = *src++; + if (forcealpha) + *dst++ = 255; + n += forcealpha; + for (i = w-1; i > 0; i--) + { + memcpy(dst, dst-n, n); + dst += n; + } + w *= n; + dst -= w; + h--; + while (h--) + { + memcpy(dst+stride, dst, w); + dst += stride; + } +} + +static void +scale_single_row(unsigned char * FZ_RESTRICT dst, int dstride, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights, int src_w, int h, int forcealpha) +{ + const int *contrib = &weights->index[weights->index[0]]; + int min, len, i, j, n, nf; + int tmp[FZ_MAX_COLORS]; + + n = weights->n; + nf = n + forcealpha; + /* Scale a single row */ + for (j = 0; j < nf; j++) + tmp[j] = 128; + if (weights->flip) + { + dst += (weights->count-1)*nf; + for (i=weights->count; i > 0; i--) + { + min = *contrib++; + len = *contrib++; + min *= n; + while (len-- > 0) + { + int c = *contrib++; + for (j = 0; j < n; j++) + tmp[j] += src[min++] * c; + if (forcealpha) + tmp[j] += 255 * c; + } + for (j = 0; j < nf; j++) + { + *dst++ = (unsigned char)(tmp[j]>>8); + tmp[j] = 128; + } + dst -= 2*nf; + } + dst += nf + dstride; + } + else + { + for (i=weights->count; i > 0; i--) + { + min = *contrib++; + len = *contrib++; + min *= n; + while (len-- > 0) + { + int c = *contrib++; + for (j = 0; j < n; j++) + tmp[j] += src[min++] * c; + if (forcealpha) + tmp[j] += 255 * c; + } + for (j = 0; j < nf; j++) + { + *dst++ = (unsigned char)(tmp[j]>>8); + tmp[j] = 128; + } + } + dst += dstride - weights->count * nf; + } + /* And then duplicate it h times */ + nf *= weights->count; + while (--h > 0) + { + memcpy(dst, dst-dstride, nf); + dst += dstride; + } +} + +static void +scale_single_col(unsigned char * FZ_RESTRICT dst, int dstride, const unsigned char * FZ_RESTRICT src, int sstride, const fz_weights * FZ_RESTRICT weights, int src_w, int n, int w, int forcealpha) +{ + const int *contrib = &weights->index[weights->index[0]]; + int min, len, i, j; + int tmp[FZ_MAX_COLORS]; + int nf = n + forcealpha; + + for (j = 0; j < nf; j++) + tmp[j] = 128; + if (weights->flip) + { + src_w = (src_w-1)*sstride; + for (i=weights->count; i > 0; i--) + { + /* Scale the next pixel in the column */ + min = *contrib++; + len = *contrib++; + min = src_w-min*sstride; + while (len-- > 0) + { + int c = *contrib++; + for (j = 0; j < n; j++) + tmp[j] += src[min+j] * c; + if (forcealpha) + tmp[j] += 255 * c; + min -= sstride; + } + for (j = 0; j < nf; j++) + { + *dst++ = (unsigned char)(tmp[j]>>8); + tmp[j] = 128; + } + /* And then duplicate it across the row */ + for (j = (w-1)*nf; j > 0; j--) + { + *dst = dst[-nf]; + dst++; + } + dst += dstride - w*nf; + } + } + else + { + for (i=weights->count; i > 0; i--) + { + /* Scale the next pixel in the column */ + min = *contrib++; + len = *contrib++; + min *= sstride; + while (len-- > 0) + { + int c = *contrib++; + for (j = 0; j < n; j++) + tmp[j] += src[min+j] * c; + if (forcealpha) + tmp[j] += 255 * c; + min += sstride; + } + for (j = 0; j < nf; j++) + { + *dst++ = (unsigned char)(tmp[j]>>8); + tmp[j] = 128; + } + /* And then duplicate it across the row */ + for (j = (w-1)*nf; j > 0; j--) + { + *dst = dst[-nf]; + dst++; + } + dst += dstride - w*nf; + } + } +} +#endif /* SINGLE_PIXEL_SPECIALS */ + +static void +get_alpha_edge_values(const fz_weights * FZ_RESTRICT rows, int * FZ_RESTRICT tp, int * FZ_RESTRICT bp) +{ + const int *contrib = &rows->index[rows->index[0]]; + int len, i, t, b; + + /* Calculate the edge alpha values */ + contrib++; /* Skip min */ + len = *contrib++; + t = 0; + while (len--) + t += *contrib++; + for (i=rows->count-2; i > 0; i--) + { + contrib++; /* Skip min */ + len = *contrib++; + contrib += len; + } + b = 0; + if (i == 0) + { + contrib++; + len = *contrib++; + while (len--) + b += *contrib++; + } + if (rows->flip && i == 0) + { + *tp = b; + *bp = t; + } + else + { + *tp = t; + *bp = b; + } +} + +static void +adjust_alpha_edges(fz_pixmap * FZ_RESTRICT pix, const fz_weights * FZ_RESTRICT rows, const fz_weights * FZ_RESTRICT cols) +{ + int t, l, r, b, tl, tr, bl, br, x, y; + unsigned char *dp = pix->samples; + int w = pix->w; + int n = pix->n; + int span = w >= 2 ? (w-1)*n : 0; + int stride = pix->stride; + + get_alpha_edge_values(rows, &t, &b); + get_alpha_edge_values(cols, &l, &r); + + l = (255 * l + 128)>>8; + r = (255 * r + 128)>>8; + tl = (l * t + 128)>>8; + tr = (r * t + 128)>>8; + bl = (l * b + 128)>>8; + br = (r * b + 128)>>8; + t = (255 * t + 128)>>8; + b = (255 * b + 128)>>8; + dp += n-1; + *dp = tl; + dp += n; + for (x = w-2; x > 0; x--) + { + *dp = t; + dp += n; + } + if (x == 0) + { + *dp = tr; + dp += n; + } + dp += stride - w*n; + for (y = pix->h-2; y > 0; y--) + { + dp[span] = r; + *dp = l; + dp += stride; + } + if (y == 0) + { + *dp = bl; + dp += n; + for (x = w-2; x > 0; x--) + { + *dp = b; + dp += n; + } + if (x == 0) + { + *dp = br; + } + } +} + +fz_pixmap * +fz_scale_pixmap(fz_context *ctx, fz_pixmap *src, float x, float y, float w, float h, const fz_irect *clip) +{ + return fz_scale_pixmap_cached(ctx, src, x, y, w, h, clip, NULL, NULL); +} + +fz_pixmap * +fz_scale_pixmap_cached(fz_context *ctx, const fz_pixmap *src, float x, float y, float w, float h, const fz_irect *clip, fz_scale_cache *cache_x, fz_scale_cache *cache_y) +{ + fz_scale_filter *filter = &fz_scale_filter_simple; + fz_weights *contrib_rows = NULL; + fz_weights *contrib_cols = NULL; + fz_pixmap *output = NULL; + unsigned char *temp = NULL; + int max_row, temp_span, temp_rows, row; + int dst_w_int, dst_h_int, dst_x_int, dst_y_int; + int flip_x, flip_y, forcealpha; + fz_rect patch; + + fz_var(contrib_cols); + fz_var(contrib_rows); + + /* Avoid extreme scales where overflows become problematic. */ + if (w > (1<<24) || h > (1<<24) || w < -(1<<24) || h < -(1<<24)) + return NULL; + if (x > (1<<24) || y > (1<<24) || x < -(1<<24) || y < -(1<<24)) + return NULL; + + /* Clamp small ranges of w and h */ + if (w <= -1) + { + /* Large negative range. Don't clamp */ + } + else if (w < 0) + { + w = -1; + } + else if (w < 1) + { + w = 1; + } + if (h <= -1) + { + /* Large negative range. Don't clamp */ + } + else if (h < 0) + { + h = -1; + } + else if (h < 1) + { + h = 1; + } + + /* If the src has an alpha, we'll make the dst have an alpha automatically. + * We also need to force the dst to have an alpha if x/y/w/h aren't ints. */ + forcealpha = !src->alpha && (x != (float)(int)x || y != (float)(int)y || w != (float)(int)w || h != (float)(int)h); + + /* Find the destination bbox, width/height, and sub pixel offset, + * allowing for whether we're flipping or not. */ + /* The (x,y) position given describes where the top left corner + * of the source image should be mapped to (i.e. where (0,0) in image + * space ends up). Also there are differences in the way we scale + * horizontally and vertically. When scaling rows horizontally, we + * always read forwards through the source, and store either forwards + * or in reverse as required. When scaling vertically, we always store + * out forwards, but may feed source rows in in a different order. + * + * Consider the image rectangle 'r' to which the image is mapped, + * and the (possibly) larger rectangle 'R', given by expanding 'r' to + * complete pixels. + * + * x can either be r.xmin-R.xmin or R.xmax-r.xmax depending on whether + * the image is x flipped or not. Whatever happens 0 <= x < 1. + * y is always R.ymax - r.ymax. + */ + /* dst_x_int is calculated to be the left of the scaled image, and + * x (the sub pixel offset) is the distance in from either the left + * or right pixel expanded edge. */ + flip_x = (w < 0); + if (flip_x) + { + float tmp; + w = -w; + dst_x_int = floorf(x-w); + tmp = ceilf(x); + dst_w_int = (int)tmp; + x = tmp - x; + dst_w_int -= dst_x_int; + } + else + { + dst_x_int = floorf(x); + x -= dst_x_int; + dst_w_int = (int)ceilf(x + w); + } + /* dst_y_int is calculated to be the top of the scaled image, and + * y (the sub pixel offset) is the distance in from either the top + * or bottom pixel expanded edge. + */ + flip_y = (h < 0); + if (flip_y) + { + float tmp; + h = -h; + dst_y_int = floorf(y-h); + tmp = ceilf(y); + dst_h_int = (int)tmp; + y = tmp - y; + dst_h_int -= dst_y_int; + } + else + { + dst_y_int = floorf(y); + y -= dst_y_int; + dst_h_int = (int)ceilf(y + h); + } + + fz_valgrind_pixmap(src); + + /* Step 0: Calculate the patch */ + patch.x0 = 0; + patch.y0 = 0; + patch.x1 = dst_w_int; + patch.y1 = dst_h_int; + if (clip) + { + if (flip_x) + { + if (dst_x_int + dst_w_int > clip->x1) + patch.x0 = dst_x_int + dst_w_int - clip->x1; + if (clip->x0 > dst_x_int) + { + patch.x1 = dst_w_int - (clip->x0 - dst_x_int); + dst_x_int = clip->x0; + } + } + else + { + if (dst_x_int + dst_w_int > clip->x1) + patch.x1 = clip->x1 - dst_x_int; + if (clip->x0 > dst_x_int) + { + patch.x0 = clip->x0 - dst_x_int; + dst_x_int += patch.x0; + } + } + + if (flip_y) + { + if (dst_y_int + dst_h_int > clip->y1) + patch.y1 = clip->y1 - dst_y_int; + if (clip->y0 > dst_y_int) + { + patch.y0 = clip->y0 - dst_y_int; + dst_y_int = clip->y0; + } + } + else + { + if (dst_y_int + dst_h_int > clip->y1) + patch.y1 = clip->y1 - dst_y_int; + if (clip->y0 > dst_y_int) + { + patch.y0 = clip->y0 - dst_y_int; + dst_y_int += patch.y0; + } + } + } + if (patch.x0 >= patch.x1 || patch.y0 >= patch.y1) + return NULL; + + fz_try(ctx) + { + /* Step 1: Calculate the weights for columns and rows */ +#ifdef SINGLE_PIXEL_SPECIALS + if (src->w == 1) + contrib_cols = NULL; + else +#endif /* SINGLE_PIXEL_SPECIALS */ + contrib_cols = Memento_label(make_weights(ctx, src->w, x, w, filter, 0, dst_w_int, patch.x0, patch.x1, src->n, flip_x, cache_x), "contrib_cols"); +#ifdef SINGLE_PIXEL_SPECIALS + if (src->h == 1) + contrib_rows = NULL; + else +#endif /* SINGLE_PIXEL_SPECIALS */ + contrib_rows = Memento_label(make_weights(ctx, src->h, y, h, filter, 1, dst_h_int, patch.y0, patch.y1, src->n, flip_y, cache_y), "contrib_rows"); + + output = fz_new_pixmap(ctx, src->colorspace, patch.x1 - patch.x0, patch.y1 - patch.y0, src->seps, src->alpha || forcealpha); + } + fz_catch(ctx) + { + if (!cache_x) + fz_free(ctx, contrib_cols); + if (!cache_y) + fz_free(ctx, contrib_rows); + fz_rethrow(ctx); + } + output->x = dst_x_int; + output->y = dst_y_int; + + /* Step 2: Apply the weights */ +#ifdef SINGLE_PIXEL_SPECIALS + if (!contrib_rows) + { + /* Only 1 source pixel high. */ + if (!contrib_cols) + { + /* Only 1 pixel in the entire image! */ + duplicate_single_pixel(output->samples, src->samples, src->n, forcealpha, patch.x1-patch.x0, patch.y1-patch.y0, output->stride); + fz_valgrind_pixmap(output); + } + else + { + /* Scale the row once, then copy it. */ + scale_single_row(output->samples, output->stride, src->samples, contrib_cols, src->w, patch.y1-patch.y0, forcealpha); + fz_valgrind_pixmap(output); + } + } + else if (!contrib_cols) + { + /* Only 1 source pixel wide. Scale the col and duplicate. */ + scale_single_col(output->samples, output->stride, src->samples, src->stride, contrib_rows, src->h, src->n, patch.x1-patch.x0, forcealpha); + fz_valgrind_pixmap(output); + } + else +#endif /* SINGLE_PIXEL_SPECIALS */ + { + void (*row_scale_in)(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights); + void (*row_scale_out)(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights, int w, int n, int row); + + temp_span = contrib_cols->count * src->n; + temp_rows = contrib_rows->max_len; + if (temp_span <= 0 || temp_rows > INT_MAX / temp_span) + goto cleanup; + fz_try(ctx) + { + temp = fz_calloc(ctx, (size_t)temp_span*temp_rows, sizeof(unsigned char)); + } + fz_catch(ctx) + { + fz_drop_pixmap(ctx, output); + if (!cache_x) + fz_free(ctx, contrib_cols); + if (!cache_y) + fz_free(ctx, contrib_rows); + fz_rethrow(ctx); + } + switch (src->n) + { + default: + row_scale_in = scale_row_to_temp; + break; + case 1: /* Image mask case or Greyscale case */ + row_scale_in = scale_row_to_temp1; + break; + case 2: /* Greyscale with alpha case */ + row_scale_in = scale_row_to_temp2; + break; + case 3: /* RGB case */ + row_scale_in = scale_row_to_temp3; + break; + case 4: /* RGBA or CMYK case */ + row_scale_in = scale_row_to_temp4; + break; + } + row_scale_out = forcealpha ? scale_row_from_temp_alpha : scale_row_from_temp; + max_row = contrib_rows->index[contrib_rows->index[0]]; + for (row = 0; row < contrib_rows->count; row++) + { + /* + Which source rows do we need to have scaled into the + temporary buffer in order to be able to do the final + scale? + */ + int row_index = contrib_rows->index[row]; + int row_min = contrib_rows->index[row_index++]; + int row_len = contrib_rows->index[row_index]; + while (max_row < row_min+row_len) + { + /* Scale another row */ + assert(max_row < src->h); + (*row_scale_in)(&temp[temp_span*(max_row % temp_rows)], &src->samples[(flip_y ? (src->h-1-max_row): max_row)*src->stride], contrib_cols); + max_row++; + } + + (*row_scale_out)(&output->samples[row*output->stride], temp, contrib_rows, contrib_cols->count, src->n, row); + } + fz_free(ctx, temp); + + if (forcealpha) + adjust_alpha_edges(output, contrib_rows, contrib_cols); + + fz_valgrind_pixmap(output); + } + +cleanup: + if (!cache_y) + fz_free(ctx, contrib_rows); + if (!cache_x) + fz_free(ctx, contrib_cols); + + return output; +} + +void +fz_drop_scale_cache(fz_context *ctx, fz_scale_cache *sc) +{ + if (!sc) + return; + fz_free(ctx, sc->weights); + fz_free(ctx, sc); +} + +fz_scale_cache * +fz_new_scale_cache(fz_context *ctx) +{ + return fz_malloc_struct(ctx, fz_scale_cache); +}
