Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/source/fitz/draw-scale-simple.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 // Copyright (C) 2004-2025 Artifex Software, Inc. | |
| 2 // | |
| 3 // This file is part of MuPDF. | |
| 4 // | |
| 5 // MuPDF is free software: you can redistribute it and/or modify it under the | |
| 6 // terms of the GNU Affero General Public License as published by the Free | |
| 7 // Software Foundation, either version 3 of the License, or (at your option) | |
| 8 // any later version. | |
| 9 // | |
| 10 // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY | |
| 11 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | |
| 12 // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more | |
| 13 // details. | |
| 14 // | |
| 15 // You should have received a copy of the GNU Affero General Public License | |
| 16 // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html> | |
| 17 // | |
| 18 // Alternative licensing terms are available from the licensor. | |
| 19 // For commercial licensing, see <https://www.artifex.com/> or contact | |
| 20 // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, | |
| 21 // CA 94129, USA, for further information. | |
| 22 | |
| 23 /* | |
| 24 This code does smooth scaling of a pixmap. | |
| 25 | |
| 26 This function returns a new pixmap representing the area starting at (0,0) | |
| 27 given by taking the source pixmap src, scaling it to width w, and height h, | |
| 28 and then positioning it at (frac(x),frac(y)). | |
| 29 | |
| 30 This is a cut-down version of draw_scale.c that only copes with filters | |
| 31 that return values strictly in the 0..1 range, and uses bytes for | |
| 32 intermediate results rather than ints. | |
| 33 */ | |
| 34 | |
| 35 #include "mupdf/fitz.h" | |
| 36 | |
| 37 #include "draw-imp.h" | |
| 38 #include "pixmap-imp.h" | |
| 39 | |
| 40 #include <math.h> | |
| 41 #include <string.h> | |
| 42 #include <assert.h> | |
| 43 #include <limits.h> | |
| 44 | |
| 45 /* Do we special case handling of single pixel high/wide images? The | |
| 46 * 'purest' handling is given by not special casing them, but certain | |
| 47 * files that use such images 'stack' them to give full images. Not | |
| 48 * special casing them results in then being fainter and giving noticeable | |
| 49 * rounding errors. | |
| 50 */ | |
| 51 #define SINGLE_PIXEL_SPECIALS | |
| 52 | |
| 53 /* | |
| 54 Consider a row of source samples, src, of width src_w, positioned at x, | |
| 55 scaled to width dst_w. | |
| 56 | |
| 57 src[i] is centred at: x + (i + 0.5)*dst_w/src_w | |
| 58 | |
| 59 Therefore the distance between the centre of the jth output pixel and | |
| 60 the centre of the ith source sample is: | |
| 61 | |
| 62 dist[j,i] = j + 0.5 - (x + (i + 0.5)*dst_w/src_w) | |
| 63 | |
| 64 When scaling up, therefore: | |
| 65 | |
| 66 dst[j] = SUM(filter(dist[j,i]) * src[i]) | |
| 67 (for all ints i) | |
| 68 | |
| 69 This can be simplified by noticing that filters are only non zero within | |
| 70 a given filter width (henceforth called W). So: | |
| 71 | |
| 72 dst[j] = SUM(filter(dist[j,i]) * src[i]) | |
| 73 (for ints i, s.t. (j*src_w/dst_w)-W < i < (j*src_w/dst_w)+W) | |
| 74 | |
| 75 When scaling down, each filtered source sample is stretched to be wider | |
| 76 to avoid aliasing issues. This effectively reduces the distance between | |
| 77 centres. | |
| 78 | |
| 79 dst[j] = SUM(filter(dist[j,i] * F) * F * src[i]) | |
| 80 (where F = dst_w/src_w) | |
| 81 (for ints i, s.t. (j-W)/F < i < (j+W)/F) | |
| 82 | |
| 83 */ | |
| 84 | |
| 85 typedef struct fz_scale_filter | |
| 86 { | |
| 87 int width; | |
| 88 float (*fn)(struct fz_scale_filter *, float); | |
| 89 } fz_scale_filter; | |
| 90 | |
| 91 /* Image scale filters */ | |
| 92 | |
| 93 static float | |
| 94 triangle(fz_scale_filter *filter, float f) | |
| 95 { | |
| 96 if (f >= 1) | |
| 97 return 0; | |
| 98 return 1-f; | |
| 99 } | |
| 100 | |
| 101 static float | |
| 102 box(fz_scale_filter *filter, float f) | |
| 103 { | |
| 104 if (f >= 0.5f) | |
| 105 return 0; | |
| 106 return 1; | |
| 107 } | |
| 108 | |
| 109 static float | |
| 110 simple(fz_scale_filter *filter, float x) | |
| 111 { | |
| 112 if (x >= 1) | |
| 113 return 0; | |
| 114 return 1 + (2*x - 3)*x*x; | |
| 115 } | |
| 116 | |
| 117 fz_scale_filter fz_scale_filter_box = { 1, box }; | |
| 118 fz_scale_filter fz_scale_filter_triangle = { 1, triangle }; | |
| 119 fz_scale_filter fz_scale_filter_simple = { 1, simple }; | |
| 120 | |
| 121 /* | |
| 122 We build ourselves a set of tables to contain the precalculated weights | |
| 123 for a given set of scale settings. | |
| 124 | |
| 125 The first dst_w entries in index are the index into index of the | |
| 126 sets of weight for each destination pixel. | |
| 127 | |
| 128 Each of the sets of weights is a set of values consisting of: | |
| 129 the minimum source pixel index used for this destination pixel | |
| 130 the number of weights used for this destination pixel | |
| 131 the weights themselves | |
| 132 | |
| 133 So to calculate dst[i] we do the following: | |
| 134 | |
| 135 weights = &index[index[i]]; | |
| 136 min = *weights++; | |
| 137 len = *weights++; | |
| 138 dst[i] = 0; | |
| 139 while (--len > 0) | |
| 140 dst[i] += src[min++] * *weights++ | |
| 141 | |
| 142 in addition, we guarantee that at the end of this process weights will now | |
| 143 point to the weights value for dst pixel i+1. | |
| 144 | |
| 145 In the simplest version of this algorithm, we would scale the whole image | |
| 146 horizontally first into a temporary buffer, then scale that temporary | |
| 147 buffer again vertically to give us our result. Using such a simple | |
| 148 algorithm would mean that could use the same style of weights for both | |
| 149 horizontal and vertical scaling. | |
| 150 | |
| 151 Unfortunately, this would also require a large temporary buffer, | |
| 152 particularly in the case where we are scaling up. | |
| 153 | |
| 154 We therefore modify the algorithm as follows; we scale scanlines from the | |
| 155 source image horizontally into a temporary buffer, until we have all the | |
| 156 contributors for a given output scanline. We then produce that output | |
| 157 scanline from the temporary buffer. In this way we restrict the height | |
| 158 of the temporary buffer to a small fraction of the final size. | |
| 159 | |
| 160 Unfortunately, this means that the pseudo code for recombining a | |
| 161 scanline of fully scaled pixels is as follows: | |
| 162 | |
| 163 weights = &index[index[y]]; | |
| 164 min = *weights++; | |
| 165 len = *weights++; | |
| 166 for (x=0 to dst_w) | |
| 167 min2 = min | |
| 168 len2 = len | |
| 169 weights2 = weights | |
| 170 dst[x] = 0; | |
| 171 while (--len2 > 0) | |
| 172 dst[x] += temp[x][(min2++) % tmp_buf_height] * *weights2++ | |
| 173 | |
| 174 i.e. it requires a % operation for every source pixel - this is typically | |
| 175 expensive. | |
| 176 | |
| 177 To avoid this, we alter the order in which vertical weights are stored, | |
| 178 so that they are ordered in the same order as the temporary buffer lines | |
| 179 would appear. This simplifies the algorithm to: | |
| 180 | |
| 181 weights = &index[index[y]]; | |
| 182 min = *weights++; | |
| 183 len = *weights++; | |
| 184 for (x=0 to dst_w) | |
| 185 min2 = 0 | |
| 186 len2 = len | |
| 187 weights2 = weights | |
| 188 dst[x] = 0; | |
| 189 while (--len2 > 0) | |
| 190 dst[x] += temp[i][min2++] * *weights2++ | |
| 191 | |
| 192 This means that len may be larger than it needs to be (due to the | |
| 193 possible inclusion of a zero weight row or two), but in practise this | |
| 194 is only an increase of 1 or 2 at worst. | |
| 195 | |
| 196 We implement this by generating the weights as normal (but ensuring we | |
| 197 leave enough space) and then reordering afterwards. | |
| 198 | |
| 199 */ | |
| 200 | |
| 201 /* This structure is accessed from ARM code - bear this in mind before | |
| 202 * altering it! */ | |
| 203 typedef struct | |
| 204 { | |
| 205 int flip; /* true if outputting reversed */ | |
| 206 int count; /* number of output pixels we have records for in this table */ | |
| 207 int max_len; /* Maximum number of weights for any one output pixel */ | |
| 208 int n; /* number of components (src->n) */ | |
| 209 int new_line; /* True if no weights for the current output pixel */ | |
| 210 int patch_l; /* How many output pixels we skip over */ | |
| 211 int index[FZ_FLEXIBLE_ARRAY]; | |
| 212 } fz_weights; | |
| 213 | |
| 214 struct fz_scale_cache | |
| 215 { | |
| 216 int src_w; | |
| 217 float x; | |
| 218 float dst_w; | |
| 219 fz_scale_filter *filter; | |
| 220 int vertical; | |
| 221 int dst_w_int; | |
| 222 int patch_l; | |
| 223 int patch_r; | |
| 224 int n; | |
| 225 int flip; | |
| 226 fz_weights *weights; | |
| 227 }; | |
| 228 | |
| 229 static fz_weights * | |
| 230 new_weights(fz_context *ctx, fz_scale_filter *filter, int src_w, float dst_w, int patch_w, int n, int flip, int patch_l) | |
| 231 { | |
| 232 int max_len; | |
| 233 fz_weights *weights; | |
| 234 | |
| 235 if (src_w > dst_w) | |
| 236 { | |
| 237 /* Scaling down, so there will be a maximum of | |
| 238 * 2*filterwidth*src_w/dst_w src pixels | |
| 239 * contributing to each dst pixel. */ | |
| 240 max_len = (int)ceilf((2 * filter->width * src_w)/dst_w); | |
| 241 if (max_len > src_w) | |
| 242 max_len = src_w; | |
| 243 } | |
| 244 else | |
| 245 { | |
| 246 /* Scaling up, so there will be a maximum of | |
| 247 * 2*filterwidth src pixels contributing to each dst pixel. | |
| 248 */ | |
| 249 max_len = 2 * filter->width; | |
| 250 } | |
| 251 /* We need the size of the struct, | |
| 252 * plus patch_w*sizeof(int) for the index | |
| 253 * plus (2+max_len)*sizeof(int) for the weights | |
| 254 * plus room for an extra set of weights for reordering. | |
| 255 */ | |
| 256 weights = fz_malloc_flexible(ctx, fz_weights, index, (max_len+3) * (patch_w+1)); | |
| 257 if (!weights) | |
| 258 return NULL; | |
| 259 weights->count = -1; | |
| 260 weights->max_len = max_len; | |
| 261 weights->index[0] = patch_w; | |
| 262 weights->n = n; | |
| 263 weights->patch_l = patch_l; | |
| 264 weights->flip = flip; | |
| 265 return weights; | |
| 266 } | |
| 267 | |
| 268 /* j is destination pixel in the patch_l..patch_l+patch_w range */ | |
| 269 static void | |
| 270 init_weights(fz_weights *weights, int j) | |
| 271 { | |
| 272 int index; | |
| 273 | |
| 274 j -= weights->patch_l; | |
| 275 assert(weights->count == j-1); | |
| 276 weights->count++; | |
| 277 weights->new_line = 1; | |
| 278 if (j == 0) | |
| 279 index = weights->index[0]; | |
| 280 else | |
| 281 { | |
| 282 index = weights->index[j-1]; | |
| 283 index += 2 + weights->index[index+1]; | |
| 284 } | |
| 285 weights->index[j] = index; /* row pointer */ | |
| 286 weights->index[index] = 0; /* min */ | |
| 287 weights->index[index+1] = 0; /* len */ | |
| 288 } | |
| 289 | |
| 290 static void | |
| 291 insert_weight(fz_weights *weights, int j, int i, int weight) | |
| 292 { | |
| 293 int min, len, index; | |
| 294 | |
| 295 /* Move j from patch_l...patch_l+patch_w range to 0..patch_w range */ | |
| 296 j -= weights->patch_l; | |
| 297 if (weights->new_line) | |
| 298 { | |
| 299 /* New line */ | |
| 300 weights->new_line = 0; | |
| 301 index = weights->index[j]; /* row pointer */ | |
| 302 weights->index[index] = i; /* min */ | |
| 303 weights->index[index+1] = 0; /* len */ | |
| 304 } | |
| 305 index = weights->index[j]; | |
| 306 min = weights->index[index++]; | |
| 307 len = weights->index[index++]; | |
| 308 while (i < min) | |
| 309 { | |
| 310 /* This only happens in rare cases, but we need to insert | |
| 311 * one earlier. In exceedingly rare cases we may need to | |
| 312 * insert more than one earlier. */ | |
| 313 int k; | |
| 314 | |
| 315 for (k = len; k > 0; k--) | |
| 316 { | |
| 317 weights->index[index+k] = weights->index[index+k-1]; | |
| 318 } | |
| 319 weights->index[index] = 0; | |
| 320 min--; | |
| 321 len++; | |
| 322 weights->index[index-2] = min; | |
| 323 weights->index[index-1] = len; | |
| 324 } | |
| 325 if (i-min >= len) | |
| 326 { | |
| 327 /* The usual case */ | |
| 328 while (i-min >= ++len) | |
| 329 { | |
| 330 weights->index[index+len-1] = 0; | |
| 331 } | |
| 332 assert(len-1 == i-min); | |
| 333 weights->index[index+i-min] = weight; | |
| 334 weights->index[index-1] = len; | |
| 335 assert(len <= weights->max_len); | |
| 336 } | |
| 337 else | |
| 338 { | |
| 339 /* Infrequent case */ | |
| 340 weights->index[index+i-min] += weight; | |
| 341 } | |
| 342 } | |
| 343 | |
| 344 static void | |
| 345 add_weight(fz_weights *weights, int j, int i, fz_scale_filter *filter, | |
| 346 float x, float F, float G, int src_w, float dst_w) | |
| 347 { | |
| 348 float dist = j - x + 0.5f - ((i + 0.5f)*dst_w/src_w); | |
| 349 float f; | |
| 350 int weight; | |
| 351 | |
| 352 dist *= G; | |
| 353 if (dist < 0) | |
| 354 dist = -dist; | |
| 355 f = filter->fn(filter, dist)*F; | |
| 356 weight = (int)(256*f+0.5f); | |
| 357 | |
| 358 /* Ensure i is in range */ | |
| 359 if (i < 0 || i >= src_w) | |
| 360 return; | |
| 361 if (weight != 0) | |
| 362 insert_weight(weights, j, i, weight); | |
| 363 } | |
| 364 | |
| 365 static void | |
| 366 reorder_weights(fz_weights *weights, int j, int src_w) | |
| 367 { | |
| 368 int idx = weights->index[j - weights->patch_l]; | |
| 369 int min = weights->index[idx++]; | |
| 370 int len = weights->index[idx++]; | |
| 371 int max = weights->max_len; | |
| 372 int tmp = idx+max; | |
| 373 int i, off; | |
| 374 | |
| 375 /* Copy into the temporary area */ | |
| 376 memcpy(&weights->index[tmp], &weights->index[idx], sizeof(int)*len); | |
| 377 | |
| 378 /* Pad out if required */ | |
| 379 assert(len <= max); | |
| 380 assert(min+len <= src_w); | |
| 381 off = 0; | |
| 382 if (len < max) | |
| 383 { | |
| 384 memset(&weights->index[tmp+len], 0, sizeof(int)*(max-len)); | |
| 385 len = max; | |
| 386 if (min + len > src_w) | |
| 387 { | |
| 388 off = min + len - src_w; | |
| 389 min = src_w - len; | |
| 390 weights->index[idx-2] = min; | |
| 391 } | |
| 392 weights->index[idx-1] = len; | |
| 393 } | |
| 394 | |
| 395 /* Copy back into the proper places */ | |
| 396 for (i = 0; i < len; i++) | |
| 397 { | |
| 398 weights->index[idx+((min+i+off) % max)] = weights->index[tmp+i]; | |
| 399 } | |
| 400 } | |
| 401 | |
| 402 /* Due to rounding and edge effects, the sums for the weights sometimes don't | |
| 403 * add up to 256. This causes visible rendering effects. Therefore, we take | |
| 404 * pains to ensure that they 1) never exceed 256, and 2) add up to exactly | |
| 405 * 256 for all pixels that are completely covered. See bug #691629. */ | |
| 406 static void | |
| 407 check_weights(fz_weights *weights, int j, int w, float x, float wf) | |
| 408 { | |
| 409 int idx, len; | |
| 410 int sum = 0; | |
| 411 int max = -256; | |
| 412 int maxidx = 0; | |
| 413 int i; | |
| 414 | |
| 415 idx = weights->index[j - weights->patch_l]; | |
| 416 idx++; /* min */ | |
| 417 len = weights->index[idx++]; | |
| 418 | |
| 419 for(i=0; i < len; i++) | |
| 420 { | |
| 421 int v = weights->index[idx++]; | |
| 422 sum += v; | |
| 423 if (v > max) | |
| 424 { | |
| 425 max = v; | |
| 426 maxidx = idx; | |
| 427 } | |
| 428 } | |
| 429 /* If we aren't the first or last pixel, OR if the sum is too big | |
| 430 * then adjust it. */ | |
| 431 if (((j != 0) && (j != w-1)) || (sum > 256)) | |
| 432 weights->index[maxidx-1] += 256-sum; | |
| 433 /* Otherwise, if we are the first pixel, and it's fully covered, then | |
| 434 * adjust it. */ | |
| 435 else if ((j == 0) && (x < 0.0001f) && (sum != 256)) | |
| 436 weights->index[maxidx-1] += 256-sum; | |
| 437 /* Finally, if we are the last pixel, and it's fully covered, then | |
| 438 * adjust it. */ | |
| 439 else if ((j == w-1) && (w - wf < 0.0001f) && (sum != 256)) | |
| 440 weights->index[maxidx-1] += 256-sum; | |
| 441 } | |
| 442 | |
| 443 static int | |
| 444 window_fix(int l, int *rp, float window, float centre) | |
| 445 { | |
| 446 int r = *rp; | |
| 447 while (centre - l > window) | |
| 448 l++; | |
| 449 while (r - centre > window) | |
| 450 r--; | |
| 451 *rp = r; | |
| 452 return l; | |
| 453 } | |
| 454 | |
| 455 static fz_weights * | |
| 456 make_weights(fz_context *ctx, int src_w, float x, float dst_w, fz_scale_filter *filter, int vertical, int dst_w_int, int patch_l, int patch_r, int n, int flip, fz_scale_cache *cache) | |
| 457 { | |
| 458 fz_weights *weights; | |
| 459 float F, G; | |
| 460 float window; | |
| 461 int j; | |
| 462 | |
| 463 if (cache) | |
| 464 { | |
| 465 if (cache->src_w == src_w && cache->x == x && cache->dst_w == dst_w && | |
| 466 cache->filter == filter && cache->vertical == vertical && | |
| 467 cache->dst_w_int == dst_w_int && | |
| 468 cache->patch_l == patch_l && cache->patch_r == patch_r && | |
| 469 cache->n == n && cache->flip == flip) | |
| 470 { | |
| 471 return cache->weights; | |
| 472 } | |
| 473 cache->src_w = src_w; | |
| 474 cache->x = x; | |
| 475 cache->dst_w = dst_w; | |
| 476 cache->filter = filter; | |
| 477 cache->vertical = vertical; | |
| 478 cache->dst_w_int = dst_w_int; | |
| 479 cache->patch_l = patch_l; | |
| 480 cache->patch_r = patch_r; | |
| 481 cache->n = n; | |
| 482 cache->flip = flip; | |
| 483 fz_free(ctx, cache->weights); | |
| 484 cache->weights = NULL; | |
| 485 } | |
| 486 | |
| 487 if (dst_w < src_w) | |
| 488 { | |
| 489 /* Scaling down */ | |
| 490 F = dst_w / src_w; | |
| 491 G = 1; | |
| 492 } | |
| 493 else | |
| 494 { | |
| 495 /* Scaling up */ | |
| 496 F = 1; | |
| 497 G = src_w / dst_w; | |
| 498 } | |
| 499 window = filter->width / F; | |
| 500 weights = new_weights(ctx, filter, src_w, dst_w, patch_r-patch_l, n, flip, patch_l); | |
| 501 if (!weights) | |
| 502 return NULL; | |
| 503 for (j = patch_l; j < patch_r; j++) | |
| 504 { | |
| 505 /* find the position of the centre of dst[j] in src space */ | |
| 506 float centre = (j - x + 0.5f)*src_w/dst_w - 0.5f; | |
| 507 int l, r; | |
| 508 l = ceilf(centre - window); | |
| 509 r = floorf(centre + window); | |
| 510 | |
| 511 /* Now, due to the vagaries of floating point, if centre is large, l | |
| 512 * and r can actually end up further than 2*window apart. All we care | |
| 513 * about in this case is that we don't crash! We want a cheap correction | |
| 514 * that avoids the assert and doesn't cost too much in the normal case. | |
| 515 * This should do. */ | |
| 516 if (r - l > 2 * window) | |
| 517 l = window_fix(l, &r, window, centre); | |
| 518 | |
| 519 init_weights(weights, j); | |
| 520 for (; l <= r; l++) | |
| 521 { | |
| 522 add_weight(weights, j, l, filter, x, F, G, src_w, dst_w); | |
| 523 } | |
| 524 if (weights->new_line) | |
| 525 { | |
| 526 /* In very rare cases (bug 706764) we might not actually | |
| 527 * have generated any non-zero weights for this destination | |
| 528 * pixel. Just use the central pixel. */ | |
| 529 int src_x = floorf(centre); | |
| 530 if (src_x >= src_w) | |
| 531 src_x = src_w-1; | |
| 532 if (src_x < 0) | |
| 533 src_x = 0; | |
| 534 insert_weight(weights, j, src_x, 1); | |
| 535 } | |
| 536 check_weights(weights, j, dst_w_int, x, dst_w); | |
| 537 if (vertical) | |
| 538 { | |
| 539 reorder_weights(weights, j, src_w); | |
| 540 } | |
| 541 } | |
| 542 weights->count++; /* weights->count = dst_w_int now */ | |
| 543 if (cache) | |
| 544 { | |
| 545 cache->weights = weights; | |
| 546 } | |
| 547 return weights; | |
| 548 } | |
| 549 | |
| 550 static void | |
| 551 scale_row_to_temp(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights) | |
| 552 { | |
| 553 const int *contrib = &weights->index[weights->index[0]]; | |
| 554 int len, i, j, n; | |
| 555 const unsigned char *min; | |
| 556 int tmp[FZ_MAX_COLORS]; | |
| 557 int *t = tmp; | |
| 558 | |
| 559 n = weights->n; | |
| 560 for (j = 0; j < n; j++) | |
| 561 tmp[j] = 128; | |
| 562 if (weights->flip) | |
| 563 { | |
| 564 dst += (weights->count-1)*n; | |
| 565 for (i=weights->count; i > 0; i--) | |
| 566 { | |
| 567 min = &src[n * *contrib++]; | |
| 568 len = *contrib++; | |
| 569 while (len-- > 0) | |
| 570 { | |
| 571 for (j = n; j > 0; j--) | |
| 572 *t++ += *min++ * *contrib; | |
| 573 t -= n; | |
| 574 contrib++; | |
| 575 } | |
| 576 for (j = n; j > 0; j--) | |
| 577 { | |
| 578 *dst++ = (unsigned char)(*t>>8); | |
| 579 *t++ = 128; | |
| 580 } | |
| 581 t -= n; | |
| 582 dst -= n*2; | |
| 583 } | |
| 584 } | |
| 585 else | |
| 586 { | |
| 587 for (i=weights->count; i > 0; i--) | |
| 588 { | |
| 589 min = &src[n * *contrib++]; | |
| 590 len = *contrib++; | |
| 591 while (len-- > 0) | |
| 592 { | |
| 593 for (j = n; j > 0; j--) | |
| 594 *t++ += *min++ * *contrib; | |
| 595 t -= n; | |
| 596 contrib++; | |
| 597 } | |
| 598 for (j = n; j > 0; j--) | |
| 599 { | |
| 600 *dst++ = (unsigned char)(*t>>8); | |
| 601 *t++ = 128; | |
| 602 } | |
| 603 t -= n; | |
| 604 } | |
| 605 } | |
| 606 } | |
| 607 | |
| 608 #ifdef ARCH_ARM | |
| 609 | |
| 610 static void | |
| 611 scale_row_to_temp1(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights) | |
| 612 __attribute__((naked)); | |
| 613 | |
| 614 static void | |
| 615 scale_row_to_temp2(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights) | |
| 616 __attribute__((naked)); | |
| 617 | |
| 618 static void | |
| 619 scale_row_to_temp3(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights) | |
| 620 __attribute__((naked)); | |
| 621 | |
| 622 static void | |
| 623 scale_row_to_temp4(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights) | |
| 624 __attribute__((naked)); | |
| 625 | |
| 626 static void | |
| 627 scale_row_from_temp(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights, int width, int n, int row) | |
| 628 __attribute__((naked)); | |
| 629 | |
| 630 static void | |
| 631 scale_row_from_temp_alpha(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights, int width, int n, int row) | |
| 632 __attribute__((naked)); | |
| 633 | |
| 634 static void | |
| 635 scale_row_to_temp1(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights) | |
| 636 { | |
| 637 asm volatile( | |
| 638 ENTER_ARM | |
| 639 ".syntax unified\n" | |
| 640 "stmfd r13!,{r4-r7,r9,r14} \n" | |
| 641 "@ r0 = dst \n" | |
| 642 "@ r1 = src \n" | |
| 643 "@ r2 = weights \n" | |
| 644 "ldr r12,[r2],#4 @ r12= flip \n" | |
| 645 "ldr r3, [r2],#20 @ r3 = count r2 = &index\n" | |
| 646 "ldr r4, [r2] @ r4 = index[0] \n" | |
| 647 "cmp r12,#0 @ if (flip) \n" | |
| 648 "beq 5f @ { \n" | |
| 649 "add r2, r2, r4, LSL #2 @ r2 = &index[index[0]] \n" | |
| 650 "add r0, r0, r3 @ dst += count \n" | |
| 651 "1: \n" | |
| 652 "ldr r4, [r2], #4 @ r4 = *contrib++ \n" | |
| 653 "ldr r9, [r2], #4 @ r9 = len = *contrib++ \n" | |
| 654 "mov r5, #128 @ r5 = a = 128 \n" | |
| 655 "add r4, r1, r4 @ r4 = min = &src[r4] \n" | |
| 656 "subs r9, r9, #1 @ len-- \n" | |
| 657 "blt 3f @ while (len >= 0) \n" | |
| 658 "2: @ { \n" | |
| 659 "ldrgt r6, [r2], #4 @ r6 = *contrib++ \n" | |
| 660 "ldrbgt r7, [r4], #1 @ r7 = *min++ \n" | |
| 661 "ldr r12,[r2], #4 @ r12 = *contrib++ \n" | |
| 662 "ldrb r14,[r4], #1 @ r14 = *min++ \n" | |
| 663 "mlagt r5, r6, r7, r5 @ g += r6 * r7 \n" | |
| 664 "subs r9, r9, #2 @ r9 = len -= 2 \n" | |
| 665 "mla r5, r12,r14,r5 @ g += r14 * r12 \n" | |
| 666 "bge 2b @ } \n" | |
| 667 "3: \n" | |
| 668 "mov r5, r5, lsr #8 @ g >>= 8 \n" | |
| 669 "strb r5,[r0, #-1]! @ *--dst=a \n" | |
| 670 "subs r3, r3, #1 @ i-- \n" | |
| 671 "bgt 1b @ \n" | |
| 672 "ldmfd r13!,{r4-r7,r9,PC} @ pop, return to thumb \n" | |
| 673 "5:" | |
| 674 "add r2, r2, r4, LSL #2 @ r2 = &index[index[0]] \n" | |
| 675 "6:" | |
| 676 "ldr r4, [r2], #4 @ r4 = *contrib++ \n" | |
| 677 "ldr r9, [r2], #4 @ r9 = len = *contrib++ \n" | |
| 678 "mov r5, #128 @ r5 = a = 128 \n" | |
| 679 "add r4, r1, r4 @ r4 = min = &src[r4] \n" | |
| 680 "subs r9, r9, #1 @ len-- \n" | |
| 681 "blt 9f @ while (len > 0) \n" | |
| 682 "7: @ { \n" | |
| 683 "ldrgt r6, [r2], #4 @ r6 = *contrib++ \n" | |
| 684 "ldrbgt r7, [r4], #1 @ r7 = *min++ \n" | |
| 685 "ldr r12,[r2], #4 @ r12 = *contrib++ \n" | |
| 686 "ldrb r14,[r4], #1 @ r14 = *min++ \n" | |
| 687 "mlagt r5, r6,r7,r5 @ a += r6 * r7 \n" | |
| 688 "subs r9, r9, #2 @ r9 = len -= 2 \n" | |
| 689 "mla r5, r12,r14,r5 @ a += r14 * r12 \n" | |
| 690 "bge 7b @ } \n" | |
| 691 "9: \n" | |
| 692 "mov r5, r5, LSR #8 @ a >>= 8 \n" | |
| 693 "strb r5, [r0], #1 @ *dst++=a \n" | |
| 694 "subs r3, r3, #1 @ i-- \n" | |
| 695 "bgt 6b @ \n" | |
| 696 "ldmfd r13!,{r4-r7,r9,PC} @ pop, return to thumb \n" | |
| 697 ENTER_THUMB | |
| 698 ); | |
| 699 } | |
| 700 | |
| 701 static void | |
| 702 scale_row_to_temp2(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights) | |
| 703 { | |
| 704 asm volatile( | |
| 705 ENTER_ARM | |
| 706 "stmfd r13!,{r4-r6,r9-r11,r14} \n" | |
| 707 "@ r0 = dst \n" | |
| 708 "@ r1 = src \n" | |
| 709 "@ r2 = weights \n" | |
| 710 "ldr r12,[r2],#4 @ r12= flip \n" | |
| 711 "ldr r3, [r2],#20 @ r3 = count r2 = &index\n" | |
| 712 "ldr r4, [r2] @ r4 = index[0] \n" | |
| 713 "cmp r12,#0 @ if (flip) \n" | |
| 714 "beq 4f @ { \n" | |
| 715 "add r2, r2, r4, LSL #2 @ r2 = &index[index[0]] \n" | |
| 716 "add r0, r0, r3, LSL #1 @ dst += 2*count \n" | |
| 717 "1: \n" | |
| 718 "ldr r4, [r2], #4 @ r4 = *contrib++ \n" | |
| 719 "ldr r9, [r2], #4 @ r9 = len = *contrib++ \n" | |
| 720 "mov r5, #128 @ r5 = g = 128 \n" | |
| 721 "mov r6, #128 @ r6 = a = 128 \n" | |
| 722 "add r4, r1, r4, LSL #1 @ r4 = min = &src[2*r4] \n" | |
| 723 "cmp r9, #0 @ while (len-- > 0) \n" | |
| 724 "beq 3f @ { \n" | |
| 725 "2: \n" | |
| 726 "ldr r14,[r2], #4 @ r14 = *contrib++ \n" | |
| 727 "ldrb r11,[r4], #1 @ r11 = *min++ \n" | |
| 728 "ldrb r12,[r4], #1 @ r12 = *min++ \n" | |
| 729 "subs r9, r9, #1 @ r9 = len-- \n" | |
| 730 "mla r5, r14,r11,r5 @ g += r11 * r14 \n" | |
| 731 "mla r6, r14,r12,r6 @ a += r12 * r14 \n" | |
| 732 "bgt 2b @ } \n" | |
| 733 "3: \n" | |
| 734 "mov r5, r5, lsr #8 @ g >>= 8 \n" | |
| 735 "mov r6, r6, lsr #8 @ a >>= 8 \n" | |
| 736 "strb r5, [r0, #-2]! @ *--dst=a \n" | |
| 737 "strb r6, [r0, #1] @ *--dst=g \n" | |
| 738 "subs r3, r3, #1 @ i-- \n" | |
| 739 "bgt 1b @ \n" | |
| 740 "ldmfd r13!,{r4-r6,r9-r11,PC} @ pop, return to thumb \n" | |
| 741 "4:" | |
| 742 "add r2, r2, r4, LSL #2 @ r2 = &index[index[0]] \n" | |
| 743 "5:" | |
| 744 "ldr r4, [r2], #4 @ r4 = *contrib++ \n" | |
| 745 "ldr r9, [r2], #4 @ r9 = len = *contrib++ \n" | |
| 746 "mov r5, #128 @ r5 = g = 128 \n" | |
| 747 "mov r6, #128 @ r6 = a = 128 \n" | |
| 748 "add r4, r1, r4, LSL #1 @ r4 = min = &src[2*r4] \n" | |
| 749 "cmp r9, #0 @ while (len-- > 0) \n" | |
| 750 "beq 7f @ { \n" | |
| 751 "6: \n" | |
| 752 "ldr r14,[r2], #4 @ r10 = *contrib++ \n" | |
| 753 "ldrb r11,[r4], #1 @ r11 = *min++ \n" | |
| 754 "ldrb r12,[r4], #1 @ r12 = *min++ \n" | |
| 755 "subs r9, r9, #1 @ r9 = len-- \n" | |
| 756 "mla r5, r14,r11,r5 @ g += r11 * r14 \n" | |
| 757 "mla r6, r14,r12,r6 @ a += r12 * r14 \n" | |
| 758 "bgt 6b @ } \n" | |
| 759 "7: \n" | |
| 760 "mov r5, r5, lsr #8 @ g >>= 8 \n" | |
| 761 "mov r6, r6, lsr #8 @ a >>= 8 \n" | |
| 762 "strb r5, [r0], #1 @ *dst++=g \n" | |
| 763 "strb r6, [r0], #1 @ *dst++=a \n" | |
| 764 "subs r3, r3, #1 @ i-- \n" | |
| 765 "bgt 5b @ \n" | |
| 766 "ldmfd r13!,{r4-r6,r9-r11,PC} @ pop, return to thumb \n" | |
| 767 ENTER_THUMB | |
| 768 ); | |
| 769 } | |
| 770 | |
| 771 static void | |
| 772 scale_row_to_temp3(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights) | |
| 773 { | |
| 774 asm volatile( | |
| 775 ENTER_ARM | |
| 776 "stmfd r13!,{r4-r11,r14} \n" | |
| 777 "@ r0 = dst \n" | |
| 778 "@ r1 = src \n" | |
| 779 "@ r2 = weights \n" | |
| 780 "ldr r12,[r2],#4 @ r12= flip \n" | |
| 781 "ldr r3, [r2],#20 @ r3 = count r2 = &index\n" | |
| 782 "ldr r4, [r2] @ r4 = index[0] \n" | |
| 783 "cmp r12,#0 @ if (flip) \n" | |
| 784 "beq 4f @ { \n" | |
| 785 "add r2, r2, r4, LSL #2 @ r2 = &index[index[0]] \n" | |
| 786 "add r0, r0, r3, LSL #1 @ \n" | |
| 787 "add r0, r0, r3 @ dst += 3*count \n" | |
| 788 "1: \n" | |
| 789 "ldr r4, [r2], #4 @ r4 = *contrib++ \n" | |
| 790 "ldr r9, [r2], #4 @ r9 = len = *contrib++ \n" | |
| 791 "mov r5, #128 @ r5 = r = 128 \n" | |
| 792 "mov r6, #128 @ r6 = g = 128 \n" | |
| 793 "add r7, r1, r4, LSL #1 @ \n" | |
| 794 "add r4, r7, r4 @ r4 = min = &src[3*r4] \n" | |
| 795 "mov r7, #128 @ r7 = b = 128 \n" | |
| 796 "cmp r9, #0 @ while (len-- > 0) \n" | |
| 797 "beq 3f @ { \n" | |
| 798 "2: \n" | |
| 799 "ldr r14,[r2], #4 @ r14 = *contrib++ \n" | |
| 800 "ldrb r8, [r4], #1 @ r8 = *min++ \n" | |
| 801 "ldrb r11,[r4], #1 @ r11 = *min++ \n" | |
| 802 "ldrb r12,[r4], #1 @ r12 = *min++ \n" | |
| 803 "subs r9, r9, #1 @ r9 = len-- \n" | |
| 804 "mla r5, r14,r8, r5 @ r += r8 * r14 \n" | |
| 805 "mla r6, r14,r11,r6 @ g += r11 * r14 \n" | |
| 806 "mla r7, r14,r12,r7 @ b += r12 * r14 \n" | |
| 807 "bgt 2b @ } \n" | |
| 808 "3: \n" | |
| 809 "mov r5, r5, lsr #8 @ r >>= 8 \n" | |
| 810 "mov r6, r6, lsr #8 @ g >>= 8 \n" | |
| 811 "mov r7, r7, lsr #8 @ b >>= 8 \n" | |
| 812 "strb r5, [r0, #-3]! @ *--dst=r \n" | |
| 813 "strb r6, [r0, #1] @ *--dst=g \n" | |
| 814 "strb r7, [r0, #2] @ *--dst=b \n" | |
| 815 "subs r3, r3, #1 @ i-- \n" | |
| 816 "bgt 1b @ \n" | |
| 817 "ldmfd r13!,{r4-r11,PC} @ pop, return to thumb \n" | |
| 818 "4:" | |
| 819 "add r2, r2, r4, LSL #2 @ r2 = &index[index[0]] \n" | |
| 820 "5:" | |
| 821 "ldr r4, [r2], #4 @ r4 = *contrib++ \n" | |
| 822 "ldr r9, [r2], #4 @ r9 = len = *contrib++ \n" | |
| 823 "mov r5, #128 @ r5 = r = 128 \n" | |
| 824 "mov r6, #128 @ r6 = g = 128 \n" | |
| 825 "add r7, r1, r4, LSL #1 @ r7 = min = &src[2*r4] \n" | |
| 826 "add r4, r7, r4 @ r4 = min = &src[3*r4] \n" | |
| 827 "mov r7, #128 @ r7 = b = 128 \n" | |
| 828 "cmp r9, #0 @ while (len-- > 0) \n" | |
| 829 "beq 7f @ { \n" | |
| 830 "6: \n" | |
| 831 "ldr r14,[r2], #4 @ r10 = *contrib++ \n" | |
| 832 "ldrb r8, [r4], #1 @ r8 = *min++ \n" | |
| 833 "ldrb r11,[r4], #1 @ r11 = *min++ \n" | |
| 834 "ldrb r12,[r4], #1 @ r12 = *min++ \n" | |
| 835 "subs r9, r9, #1 @ r9 = len-- \n" | |
| 836 "mla r5, r14,r8, r5 @ r += r8 * r14 \n" | |
| 837 "mla r6, r14,r11,r6 @ g += r11 * r14 \n" | |
| 838 "mla r7, r14,r12,r7 @ b += r12 * r14 \n" | |
| 839 "bgt 6b @ } \n" | |
| 840 "7: \n" | |
| 841 "mov r5, r5, lsr #8 @ r >>= 8 \n" | |
| 842 "mov r6, r6, lsr #8 @ g >>= 8 \n" | |
| 843 "mov r7, r7, lsr #8 @ b >>= 8 \n" | |
| 844 "strb r5, [r0], #1 @ *dst++=r \n" | |
| 845 "strb r6, [r0], #1 @ *dst++=g \n" | |
| 846 "strb r7, [r0], #1 @ *dst++=b \n" | |
| 847 "subs r3, r3, #1 @ i-- \n" | |
| 848 "bgt 5b @ \n" | |
| 849 "ldmfd r13!,{r4-r11,PC} @ pop, return to thumb \n" | |
| 850 ENTER_THUMB | |
| 851 ); | |
| 852 } | |
| 853 | |
| 854 static void | |
| 855 scale_row_to_temp4(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights) | |
| 856 { | |
| 857 asm volatile( | |
| 858 ENTER_ARM | |
| 859 "stmfd r13!,{r4-r11,r14} \n" | |
| 860 "@ r0 = dst \n" | |
| 861 "@ r1 = src \n" | |
| 862 "@ r2 = weights \n" | |
| 863 "ldr r12,[r2],#4 @ r12= flip \n" | |
| 864 "ldr r3, [r2],#20 @ r3 = count r2 = &index\n" | |
| 865 "ldr r4, [r2] @ r4 = index[0] \n" | |
| 866 "ldr r5,=0x00800080 @ r5 = rounding \n" | |
| 867 "ldr r6,=0x00FF00FF @ r7 = 0x00FF00FF \n" | |
| 868 "cmp r12,#0 @ if (flip) \n" | |
| 869 "beq 4f @ { \n" | |
| 870 "add r2, r2, r4, LSL #2 @ r2 = &index[index[0]] \n" | |
| 871 "add r0, r0, r3, LSL #2 @ dst += 4*count \n" | |
| 872 "1: \n" | |
| 873 "ldr r4, [r2], #4 @ r4 = *contrib++ \n" | |
| 874 "ldr r9, [r2], #4 @ r9 = len = *contrib++ \n" | |
| 875 "mov r7, r5 @ r7 = b = rounding \n" | |
| 876 "mov r8, r5 @ r8 = a = rounding \n" | |
| 877 "add r4, r1, r4, LSL #2 @ r4 = min = &src[4*r4] \n" | |
| 878 "cmp r9, #0 @ while (len-- > 0) \n" | |
| 879 "beq 3f @ { \n" | |
| 880 "2: \n" | |
| 881 "ldr r11,[r4], #4 @ r11 = *min++ \n" | |
| 882 "ldr r10,[r2], #4 @ r10 = *contrib++ \n" | |
| 883 "subs r9, r9, #1 @ r9 = len-- \n" | |
| 884 "and r12,r6, r11 @ r12 = __22__00 \n" | |
| 885 "and r11,r6, r11,LSR #8 @ r11 = __33__11 \n" | |
| 886 "mla r7, r10,r12,r7 @ b += r14 * r10 \n" | |
| 887 "mla r8, r10,r11,r8 @ a += r11 * r10 \n" | |
| 888 "bgt 2b @ } \n" | |
| 889 "3: \n" | |
| 890 "and r7, r6, r7, lsr #8 @ r7 = __22__00 \n" | |
| 891 "bic r8, r8, r6 @ r8 = 33__11__ \n" | |
| 892 "orr r7, r7, r8 @ r7 = 33221100 \n" | |
| 893 "str r7, [r0, #-4]! @ *--dst=r \n" | |
| 894 "subs r3, r3, #1 @ i-- \n" | |
| 895 "bgt 1b @ \n" | |
| 896 "ldmfd r13!,{r4-r11,PC} @ pop, return to thumb \n" | |
| 897 "4: \n" | |
| 898 "add r2, r2, r4, LSL #2 @ r2 = &index[index[0]] \n" | |
| 899 "5: \n" | |
| 900 "ldr r4, [r2], #4 @ r4 = *contrib++ \n" | |
| 901 "ldr r9, [r2], #4 @ r9 = len = *contrib++ \n" | |
| 902 "mov r7, r5 @ r7 = b = rounding \n" | |
| 903 "mov r8, r5 @ r8 = a = rounding \n" | |
| 904 "add r4, r1, r4, LSL #2 @ r4 = min = &src[4*r4] \n" | |
| 905 "cmp r9, #0 @ while (len-- > 0) \n" | |
| 906 "beq 7f @ { \n" | |
| 907 "6: \n" | |
| 908 "ldr r11,[r4], #4 @ r11 = *min++ \n" | |
| 909 "ldr r10,[r2], #4 @ r10 = *contrib++ \n" | |
| 910 "subs r9, r9, #1 @ r9 = len-- \n" | |
| 911 "and r12,r6, r11 @ r12 = __22__00 \n" | |
| 912 "and r11,r6, r11,LSR #8 @ r11 = __33__11 \n" | |
| 913 "mla r7, r10,r12,r7 @ b += r14 * r10 \n" | |
| 914 "mla r8, r10,r11,r8 @ a += r11 * r10 \n" | |
| 915 "bgt 6b @ } \n" | |
| 916 "7: \n" | |
| 917 "and r7, r6, r7, lsr #8 @ r7 = __22__00 \n" | |
| 918 "bic r8, r8, r6 @ r8 = 33__11__ \n" | |
| 919 "orr r7, r7, r8 @ r7 = 33221100 \n" | |
| 920 "str r7, [r0], #4 @ *dst++=r \n" | |
| 921 "subs r3, r3, #1 @ i-- \n" | |
| 922 "bgt 5b @ \n" | |
| 923 "ldmfd r13!,{r4-r11,PC} @ pop, return to thumb \n" | |
| 924 ENTER_THUMB | |
| 925 ); | |
| 926 } | |
| 927 | |
| 928 static void | |
| 929 scale_row_from_temp(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights, int width, int n, int row) | |
| 930 { | |
| 931 asm volatile( | |
| 932 ENTER_ARM | |
| 933 "stmfd r13!,{r4-r11,r14} \n" | |
| 934 "@ r0 = dst \n" | |
| 935 "@ r1 = src \n" | |
| 936 "@ r2 = &weights->index[0] \n" | |
| 937 "@ r3 = width \n" | |
| 938 "@ r12= row \n" | |
| 939 "ldr r14,[r13,#4*9] @ r14= n \n" | |
| 940 "ldr r12,[r13,#4*10] @ r12= row \n" | |
| 941 "add r2, r2, #24 @ r2 = weights->index \n" | |
| 942 "mul r3, r14, r3 @ r3 = width *= n \n" | |
| 943 "ldr r4, [r2, r12, LSL #2] @ r4 = index[row] \n" | |
| 944 "add r2, r2, #4 @ r2 = &index[1] \n" | |
| 945 "subs r6, r3, #4 @ r6 = x = width-4 \n" | |
| 946 "ldr r14,[r2, r4, LSL #2]! @ r2 = contrib = index[index[row]+1]\n" | |
| 947 " @ r14= len = *contrib \n" | |
| 948 "blt 4f @ while (x >= 0) { \n" | |
| 949 #ifndef ARCH_UNALIGNED_OK | |
| 950 "tst r3, #3 @ if ((r3 & 3) \n" | |
| 951 "tsteq r1, #3 @ || (r1 & 3)) \n" | |
| 952 "bne 4f @ can't do fast code \n" | |
| 953 #endif | |
| 954 "ldr r9, =0x00FF00FF @ r9 = 0x00FF00FF \n" | |
| 955 "1: \n" | |
| 956 "ldr r7, =0x00800080 @ r5 = val0 = round \n" | |
| 957 "stmfd r13!,{r1,r2,r7} @ stash r1,r2,r5 \n" | |
| 958 " @ r1 = min = src \n" | |
| 959 " @ r2 = contrib2-4 \n" | |
| 960 "movs r8, r14 @ r8 = len2 = len \n" | |
| 961 "mov r5, r7 @ r7 = val1 = round \n" | |
| 962 "ble 3f @ while (len2-- > 0) { \n" | |
| 963 "2: \n" | |
| 964 "ldr r12,[r1], r3 @ r12 = *min r5 = min += width\n" | |
| 965 "ldr r10,[r2, #4]! @ r10 = *contrib2++ \n" | |
| 966 "subs r8, r8, #1 @ len2-- \n" | |
| 967 "and r11,r9, r12 @ r11= __22__00 \n" | |
| 968 "and r12,r9, r12,LSR #8 @ r12= __33__11 \n" | |
| 969 "mla r5, r10,r11,r5 @ r5 = val0 += r11 * r10\n" | |
| 970 "mla r7, r10,r12,r7 @ r7 = val1 += r12 * r10\n" | |
| 971 "bgt 2b @ } \n" | |
| 972 "and r5, r9, r5, LSR #8 @ r5 = __22__00 \n" | |
| 973 "and r7, r7, r9, LSL #8 @ r7 = 33__11__ \n" | |
| 974 "orr r5, r5, r7 @ r5 = 33221100 \n" | |
| 975 "3: \n" | |
| 976 "ldmfd r13!,{r1,r2,r7} @ restore r1,r2,r7 \n" | |
| 977 "subs r6, r6, #4 @ x-- \n" | |
| 978 "add r1, r1, #4 @ src++ \n" | |
| 979 "str r5, [r0], #4 @ *dst++ = val \n" | |
| 980 "bge 1b @ \n" | |
| 981 "4: @ } (Less than 4 to go) \n" | |
| 982 "adds r6, r6, #4 @ r6 = x += 4 \n" | |
| 983 "beq 8f @ if (x == 0) done \n" | |
| 984 "5: \n" | |
| 985 "mov r5, r1 @ r5 = min = src \n" | |
| 986 "mov r7, #128 @ r7 = val = 128 \n" | |
| 987 "movs r8, r14 @ r8 = len2 = len \n" | |
| 988 "add r9, r2, #4 @ r9 = contrib2 \n" | |
| 989 "ble 7f @ while (len2-- > 0) { \n" | |
| 990 "6: \n" | |
| 991 "ldr r10,[r9], #4 @ r10 = *contrib2++ \n" | |
| 992 "ldrb r12,[r5], r3 @ r12 = *min r5 = min += width\n" | |
| 993 "subs r8, r8, #1 @ len2-- \n" | |
| 994 "@ stall r12 \n" | |
| 995 "mla r7, r10,r12,r7 @ val += r12 * r10 \n" | |
| 996 "bgt 6b @ } \n" | |
| 997 "7: \n" | |
| 998 "mov r7, r7, asr #8 @ r7 = val >>= 8 \n" | |
| 999 "subs r6, r6, #1 @ x-- \n" | |
| 1000 "add r1, r1, #1 @ src++ \n" | |
| 1001 "strb r7, [r0], #1 @ *dst++ = val \n" | |
| 1002 "bgt 5b @ \n" | |
| 1003 "8: \n" | |
| 1004 "ldmfd r13!,{r4-r11,PC} @ pop, return to thumb \n" | |
| 1005 ".ltorg \n" | |
| 1006 ENTER_THUMB | |
| 1007 ); | |
| 1008 } | |
| 1009 | |
| 1010 static void | |
| 1011 scale_row_from_temp_alpha(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights, int width, int n, int row) | |
| 1012 { | |
| 1013 asm volatile( | |
| 1014 ENTER_ARM | |
| 1015 "stmfd r13!,{r4-r11,r14} \n" | |
| 1016 "mov r11,#255 @ r11= 255 \n" | |
| 1017 "ldr r12,[r13,#4*10] @ r12= row \n" | |
| 1018 "@ r0 = dst \n" | |
| 1019 "@ r1 = src \n" | |
| 1020 "@ r2 = &weights->index[0] \n" | |
| 1021 "@ r3 = width \n" | |
| 1022 "@ r11= 255 \n" | |
| 1023 "@ r12= row \n" | |
| 1024 "add r2, r2, #24 @ r2 = weights->index \n" | |
| 1025 "ldr r4, [r2, r12, LSL #2] @ r4 = index[row] \n" | |
| 1026 "add r2, r2, #4 @ r2 = &index[1] \n" | |
| 1027 "mov r6, r3 @ r6 = x = width \n" | |
| 1028 "ldr r14,[r2, r4, LSL #2]! @ r2 = contrib = index[index[row]+1]\n" | |
| 1029 " @ r14= len = *contrib \n" | |
| 1030 "5: \n" | |
| 1031 "ldr r4,[r13,#4*9] @ r10= nn = n \n" | |
| 1032 "1: \n" | |
| 1033 "mov r5, r1 @ r5 = min = src \n" | |
| 1034 "mov r7, #128 @ r7 = val = 128 \n" | |
| 1035 "movs r8, r14 @ r8 = len2 = len \n" | |
| 1036 "add r9, r2, #4 @ r9 = contrib2 \n" | |
| 1037 "ble 7f @ while (len2-- > 0) { \n" | |
| 1038 "6: \n" | |
| 1039 "ldr r10,[r9], #4 @ r10 = *contrib2++ \n" | |
| 1040 "ldrb r12,[r5], r3 @ r12 = *min r5 = min += width\n" | |
| 1041 "subs r8, r8, #1 @ len2-- \n" | |
| 1042 "@ stall r12 \n" | |
| 1043 "mla r7, r10,r12,r7 @ val += r12 * r10 \n" | |
| 1044 "bgt 6b @ } \n" | |
| 1045 "7: \n" | |
| 1046 "mov r7, r7, asr #8 @ r7 = val >>= 8 \n" | |
| 1047 "subs r4, r4, #1 @ r4 = nn-- \n" | |
| 1048 "add r1, r1, #1 @ src++ \n" | |
| 1049 "strb r7, [r0], #1 @ *dst++ = val \n" | |
| 1050 "bgt 1b @ \n" | |
| 1051 "subs r6, r6, #1 @ x-- \n" | |
| 1052 "strb r11,[r0], #1 @ *dst++ = 255 \n" | |
| 1053 "bgt 5b @ \n" | |
| 1054 "ldmfd r13!,{r4-r11,PC} @ pop, return to thumb \n" | |
| 1055 ".ltorg \n" | |
| 1056 ENTER_THUMB | |
| 1057 ); | |
| 1058 } | |
| 1059 #else | |
| 1060 | |
| 1061 static void | |
| 1062 scale_row_to_temp1(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights) | |
| 1063 { | |
| 1064 const int *contrib = &weights->index[weights->index[0]]; | |
| 1065 int len, i; | |
| 1066 const unsigned char *min; | |
| 1067 | |
| 1068 assert(weights->n == 1); | |
| 1069 if (weights->flip) | |
| 1070 { | |
| 1071 dst += weights->count; | |
| 1072 for (i=weights->count; i > 0; i--) | |
| 1073 { | |
| 1074 int val = 128; | |
| 1075 min = &src[*contrib++]; | |
| 1076 len = *contrib++; | |
| 1077 while (len-- > 0) | |
| 1078 { | |
| 1079 val += *min++ * *contrib++; | |
| 1080 } | |
| 1081 *--dst = (unsigned char)(val>>8); | |
| 1082 } | |
| 1083 } | |
| 1084 else | |
| 1085 { | |
| 1086 for (i=weights->count; i > 0; i--) | |
| 1087 { | |
| 1088 int val = 128; | |
| 1089 min = &src[*contrib++]; | |
| 1090 len = *contrib++; | |
| 1091 while (len-- > 0) | |
| 1092 { | |
| 1093 val += *min++ * *contrib++; | |
| 1094 } | |
| 1095 *dst++ = (unsigned char)(val>>8); | |
| 1096 } | |
| 1097 } | |
| 1098 } | |
| 1099 | |
| 1100 static void | |
| 1101 scale_row_to_temp2(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights) | |
| 1102 { | |
| 1103 const int *contrib = &weights->index[weights->index[0]]; | |
| 1104 int len, i; | |
| 1105 const unsigned char *min; | |
| 1106 | |
| 1107 assert(weights->n == 2); | |
| 1108 if (weights->flip) | |
| 1109 { | |
| 1110 dst += 2*weights->count; | |
| 1111 for (i=weights->count; i > 0; i--) | |
| 1112 { | |
| 1113 int c1 = 128; | |
| 1114 int c2 = 128; | |
| 1115 min = &src[2 * *contrib++]; | |
| 1116 len = *contrib++; | |
| 1117 while (len-- > 0) | |
| 1118 { | |
| 1119 c1 += *min++ * *contrib; | |
| 1120 c2 += *min++ * *contrib++; | |
| 1121 } | |
| 1122 *--dst = (unsigned char)(c2>>8); | |
| 1123 *--dst = (unsigned char)(c1>>8); | |
| 1124 } | |
| 1125 } | |
| 1126 else | |
| 1127 { | |
| 1128 for (i=weights->count; i > 0; i--) | |
| 1129 { | |
| 1130 int c1 = 128; | |
| 1131 int c2 = 128; | |
| 1132 min = &src[2 * *contrib++]; | |
| 1133 len = *contrib++; | |
| 1134 while (len-- > 0) | |
| 1135 { | |
| 1136 c1 += *min++ * *contrib; | |
| 1137 c2 += *min++ * *contrib++; | |
| 1138 } | |
| 1139 *dst++ = (unsigned char)(c1>>8); | |
| 1140 *dst++ = (unsigned char)(c2>>8); | |
| 1141 } | |
| 1142 } | |
| 1143 } | |
| 1144 | |
| 1145 static void | |
| 1146 scale_row_to_temp3(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights) | |
| 1147 { | |
| 1148 const int *contrib = &weights->index[weights->index[0]]; | |
| 1149 int len, i; | |
| 1150 const unsigned char *min; | |
| 1151 | |
| 1152 assert(weights->n == 3); | |
| 1153 if (weights->flip) | |
| 1154 { | |
| 1155 dst += 3*weights->count; | |
| 1156 for (i=weights->count; i > 0; i--) | |
| 1157 { | |
| 1158 int c1 = 128; | |
| 1159 int c2 = 128; | |
| 1160 int c3 = 128; | |
| 1161 min = &src[3 * *contrib++]; | |
| 1162 len = *contrib++; | |
| 1163 while (len-- > 0) | |
| 1164 { | |
| 1165 int c = *contrib++; | |
| 1166 c1 += *min++ * c; | |
| 1167 c2 += *min++ * c; | |
| 1168 c3 += *min++ * c; | |
| 1169 } | |
| 1170 *--dst = (unsigned char)(c3>>8); | |
| 1171 *--dst = (unsigned char)(c2>>8); | |
| 1172 *--dst = (unsigned char)(c1>>8); | |
| 1173 } | |
| 1174 } | |
| 1175 else | |
| 1176 { | |
| 1177 for (i=weights->count; i > 0; i--) | |
| 1178 { | |
| 1179 int c1 = 128; | |
| 1180 int c2 = 128; | |
| 1181 int c3 = 128; | |
| 1182 min = &src[3 * *contrib++]; | |
| 1183 len = *contrib++; | |
| 1184 while (len-- > 0) | |
| 1185 { | |
| 1186 int c = *contrib++; | |
| 1187 c1 += *min++ * c; | |
| 1188 c2 += *min++ * c; | |
| 1189 c3 += *min++ * c; | |
| 1190 } | |
| 1191 *dst++ = (unsigned char)(c1>>8); | |
| 1192 *dst++ = (unsigned char)(c2>>8); | |
| 1193 *dst++ = (unsigned char)(c3>>8); | |
| 1194 } | |
| 1195 } | |
| 1196 } | |
| 1197 | |
| 1198 static void | |
| 1199 scale_row_to_temp4(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights) | |
| 1200 { | |
| 1201 const int *contrib = &weights->index[weights->index[0]]; | |
| 1202 int len, i; | |
| 1203 const unsigned char *min; | |
| 1204 | |
| 1205 assert(weights->n == 4); | |
| 1206 if (weights->flip) | |
| 1207 { | |
| 1208 dst += 4*weights->count; | |
| 1209 for (i=weights->count; i > 0; i--) | |
| 1210 { | |
| 1211 int r = 128; | |
| 1212 int g = 128; | |
| 1213 int b = 128; | |
| 1214 int a = 128; | |
| 1215 min = &src[4 * *contrib++]; | |
| 1216 len = *contrib++; | |
| 1217 while (len-- > 0) | |
| 1218 { | |
| 1219 r += *min++ * *contrib; | |
| 1220 g += *min++ * *contrib; | |
| 1221 b += *min++ * *contrib; | |
| 1222 a += *min++ * *contrib++; | |
| 1223 } | |
| 1224 *--dst = (unsigned char)(a>>8); | |
| 1225 *--dst = (unsigned char)(b>>8); | |
| 1226 *--dst = (unsigned char)(g>>8); | |
| 1227 *--dst = (unsigned char)(r>>8); | |
| 1228 } | |
| 1229 } | |
| 1230 else | |
| 1231 { | |
| 1232 for (i=weights->count; i > 0; i--) | |
| 1233 { | |
| 1234 int r = 128; | |
| 1235 int g = 128; | |
| 1236 int b = 128; | |
| 1237 int a = 128; | |
| 1238 min = &src[4 * *contrib++]; | |
| 1239 len = *contrib++; | |
| 1240 while (len-- > 0) | |
| 1241 { | |
| 1242 r += *min++ * *contrib; | |
| 1243 g += *min++ * *contrib; | |
| 1244 b += *min++ * *contrib; | |
| 1245 a += *min++ * *contrib++; | |
| 1246 } | |
| 1247 *dst++ = (unsigned char)(r>>8); | |
| 1248 *dst++ = (unsigned char)(g>>8); | |
| 1249 *dst++ = (unsigned char)(b>>8); | |
| 1250 *dst++ = (unsigned char)(a>>8); | |
| 1251 } | |
| 1252 } | |
| 1253 } | |
| 1254 | |
| 1255 static void | |
| 1256 scale_row_from_temp(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights, int w, int n, int row) | |
| 1257 { | |
| 1258 const int *contrib = &weights->index[weights->index[row]]; | |
| 1259 int len, x; | |
| 1260 int width = w * n; | |
| 1261 | |
| 1262 contrib++; /* Skip min */ | |
| 1263 len = *contrib++; | |
| 1264 for (x=width; x > 0; x--) | |
| 1265 { | |
| 1266 const unsigned char *min = src; | |
| 1267 int val = 128; | |
| 1268 int len2 = len; | |
| 1269 const int *contrib2 = contrib; | |
| 1270 | |
| 1271 while (len2-- > 0) | |
| 1272 { | |
| 1273 val += *min * *contrib2++; | |
| 1274 min += width; | |
| 1275 } | |
| 1276 *dst++ = (unsigned char)(val>>8); | |
| 1277 src++; | |
| 1278 } | |
| 1279 } | |
| 1280 | |
| 1281 static void | |
| 1282 scale_row_from_temp_alpha(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights, int w, int n, int row) | |
| 1283 { | |
| 1284 const int *contrib = &weights->index[weights->index[row]]; | |
| 1285 int len, x; | |
| 1286 int width = w * n; | |
| 1287 | |
| 1288 contrib++; /* Skip min */ | |
| 1289 len = *contrib++; | |
| 1290 for (x=w; x > 0; x--) | |
| 1291 { | |
| 1292 int nn; | |
| 1293 for (nn = n; nn > 0; nn--) | |
| 1294 { | |
| 1295 const unsigned char *min = src; | |
| 1296 int val = 128; | |
| 1297 int len2 = len; | |
| 1298 const int *contrib2 = contrib; | |
| 1299 | |
| 1300 while (len2-- > 0) | |
| 1301 { | |
| 1302 val += *min * *contrib2++; | |
| 1303 min += width; | |
| 1304 } | |
| 1305 *dst++ = (unsigned char)(val>>8); | |
| 1306 src++; | |
| 1307 } | |
| 1308 *dst++ = 255; | |
| 1309 } | |
| 1310 } | |
| 1311 #endif | |
| 1312 | |
| 1313 #ifdef SINGLE_PIXEL_SPECIALS | |
| 1314 static void | |
| 1315 duplicate_single_pixel(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, int n, int forcealpha, int w, int h, int stride) | |
| 1316 { | |
| 1317 int i; | |
| 1318 | |
| 1319 for (i = n; i > 0; i--) | |
| 1320 *dst++ = *src++; | |
| 1321 if (forcealpha) | |
| 1322 *dst++ = 255; | |
| 1323 n += forcealpha; | |
| 1324 for (i = w-1; i > 0; i--) | |
| 1325 { | |
| 1326 memcpy(dst, dst-n, n); | |
| 1327 dst += n; | |
| 1328 } | |
| 1329 w *= n; | |
| 1330 dst -= w; | |
| 1331 h--; | |
| 1332 while (h--) | |
| 1333 { | |
| 1334 memcpy(dst+stride, dst, w); | |
| 1335 dst += stride; | |
| 1336 } | |
| 1337 } | |
| 1338 | |
| 1339 static void | |
| 1340 scale_single_row(unsigned char * FZ_RESTRICT dst, int dstride, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights, int src_w, int h, int forcealpha) | |
| 1341 { | |
| 1342 const int *contrib = &weights->index[weights->index[0]]; | |
| 1343 int min, len, i, j, n, nf; | |
| 1344 int tmp[FZ_MAX_COLORS]; | |
| 1345 | |
| 1346 n = weights->n; | |
| 1347 nf = n + forcealpha; | |
| 1348 /* Scale a single row */ | |
| 1349 for (j = 0; j < nf; j++) | |
| 1350 tmp[j] = 128; | |
| 1351 if (weights->flip) | |
| 1352 { | |
| 1353 dst += (weights->count-1)*nf; | |
| 1354 for (i=weights->count; i > 0; i--) | |
| 1355 { | |
| 1356 min = *contrib++; | |
| 1357 len = *contrib++; | |
| 1358 min *= n; | |
| 1359 while (len-- > 0) | |
| 1360 { | |
| 1361 int c = *contrib++; | |
| 1362 for (j = 0; j < n; j++) | |
| 1363 tmp[j] += src[min++] * c; | |
| 1364 if (forcealpha) | |
| 1365 tmp[j] += 255 * c; | |
| 1366 } | |
| 1367 for (j = 0; j < nf; j++) | |
| 1368 { | |
| 1369 *dst++ = (unsigned char)(tmp[j]>>8); | |
| 1370 tmp[j] = 128; | |
| 1371 } | |
| 1372 dst -= 2*nf; | |
| 1373 } | |
| 1374 dst += nf + dstride; | |
| 1375 } | |
| 1376 else | |
| 1377 { | |
| 1378 for (i=weights->count; i > 0; i--) | |
| 1379 { | |
| 1380 min = *contrib++; | |
| 1381 len = *contrib++; | |
| 1382 min *= n; | |
| 1383 while (len-- > 0) | |
| 1384 { | |
| 1385 int c = *contrib++; | |
| 1386 for (j = 0; j < n; j++) | |
| 1387 tmp[j] += src[min++] * c; | |
| 1388 if (forcealpha) | |
| 1389 tmp[j] += 255 * c; | |
| 1390 } | |
| 1391 for (j = 0; j < nf; j++) | |
| 1392 { | |
| 1393 *dst++ = (unsigned char)(tmp[j]>>8); | |
| 1394 tmp[j] = 128; | |
| 1395 } | |
| 1396 } | |
| 1397 dst += dstride - weights->count * nf; | |
| 1398 } | |
| 1399 /* And then duplicate it h times */ | |
| 1400 nf *= weights->count; | |
| 1401 while (--h > 0) | |
| 1402 { | |
| 1403 memcpy(dst, dst-dstride, nf); | |
| 1404 dst += dstride; | |
| 1405 } | |
| 1406 } | |
| 1407 | |
| 1408 static void | |
| 1409 scale_single_col(unsigned char * FZ_RESTRICT dst, int dstride, const unsigned char * FZ_RESTRICT src, int sstride, const fz_weights * FZ_RESTRICT weights, int src_w, int n, int w, int forcealpha) | |
| 1410 { | |
| 1411 const int *contrib = &weights->index[weights->index[0]]; | |
| 1412 int min, len, i, j; | |
| 1413 int tmp[FZ_MAX_COLORS]; | |
| 1414 int nf = n + forcealpha; | |
| 1415 | |
| 1416 for (j = 0; j < nf; j++) | |
| 1417 tmp[j] = 128; | |
| 1418 if (weights->flip) | |
| 1419 { | |
| 1420 src_w = (src_w-1)*sstride; | |
| 1421 for (i=weights->count; i > 0; i--) | |
| 1422 { | |
| 1423 /* Scale the next pixel in the column */ | |
| 1424 min = *contrib++; | |
| 1425 len = *contrib++; | |
| 1426 min = src_w-min*sstride; | |
| 1427 while (len-- > 0) | |
| 1428 { | |
| 1429 int c = *contrib++; | |
| 1430 for (j = 0; j < n; j++) | |
| 1431 tmp[j] += src[min+j] * c; | |
| 1432 if (forcealpha) | |
| 1433 tmp[j] += 255 * c; | |
| 1434 min -= sstride; | |
| 1435 } | |
| 1436 for (j = 0; j < nf; j++) | |
| 1437 { | |
| 1438 *dst++ = (unsigned char)(tmp[j]>>8); | |
| 1439 tmp[j] = 128; | |
| 1440 } | |
| 1441 /* And then duplicate it across the row */ | |
| 1442 for (j = (w-1)*nf; j > 0; j--) | |
| 1443 { | |
| 1444 *dst = dst[-nf]; | |
| 1445 dst++; | |
| 1446 } | |
| 1447 dst += dstride - w*nf; | |
| 1448 } | |
| 1449 } | |
| 1450 else | |
| 1451 { | |
| 1452 for (i=weights->count; i > 0; i--) | |
| 1453 { | |
| 1454 /* Scale the next pixel in the column */ | |
| 1455 min = *contrib++; | |
| 1456 len = *contrib++; | |
| 1457 min *= sstride; | |
| 1458 while (len-- > 0) | |
| 1459 { | |
| 1460 int c = *contrib++; | |
| 1461 for (j = 0; j < n; j++) | |
| 1462 tmp[j] += src[min+j] * c; | |
| 1463 if (forcealpha) | |
| 1464 tmp[j] += 255 * c; | |
| 1465 min += sstride; | |
| 1466 } | |
| 1467 for (j = 0; j < nf; j++) | |
| 1468 { | |
| 1469 *dst++ = (unsigned char)(tmp[j]>>8); | |
| 1470 tmp[j] = 128; | |
| 1471 } | |
| 1472 /* And then duplicate it across the row */ | |
| 1473 for (j = (w-1)*nf; j > 0; j--) | |
| 1474 { | |
| 1475 *dst = dst[-nf]; | |
| 1476 dst++; | |
| 1477 } | |
| 1478 dst += dstride - w*nf; | |
| 1479 } | |
| 1480 } | |
| 1481 } | |
| 1482 #endif /* SINGLE_PIXEL_SPECIALS */ | |
| 1483 | |
| 1484 static void | |
| 1485 get_alpha_edge_values(const fz_weights * FZ_RESTRICT rows, int * FZ_RESTRICT tp, int * FZ_RESTRICT bp) | |
| 1486 { | |
| 1487 const int *contrib = &rows->index[rows->index[0]]; | |
| 1488 int len, i, t, b; | |
| 1489 | |
| 1490 /* Calculate the edge alpha values */ | |
| 1491 contrib++; /* Skip min */ | |
| 1492 len = *contrib++; | |
| 1493 t = 0; | |
| 1494 while (len--) | |
| 1495 t += *contrib++; | |
| 1496 for (i=rows->count-2; i > 0; i--) | |
| 1497 { | |
| 1498 contrib++; /* Skip min */ | |
| 1499 len = *contrib++; | |
| 1500 contrib += len; | |
| 1501 } | |
| 1502 b = 0; | |
| 1503 if (i == 0) | |
| 1504 { | |
| 1505 contrib++; | |
| 1506 len = *contrib++; | |
| 1507 while (len--) | |
| 1508 b += *contrib++; | |
| 1509 } | |
| 1510 if (rows->flip && i == 0) | |
| 1511 { | |
| 1512 *tp = b; | |
| 1513 *bp = t; | |
| 1514 } | |
| 1515 else | |
| 1516 { | |
| 1517 *tp = t; | |
| 1518 *bp = b; | |
| 1519 } | |
| 1520 } | |
| 1521 | |
| 1522 static void | |
| 1523 adjust_alpha_edges(fz_pixmap * FZ_RESTRICT pix, const fz_weights * FZ_RESTRICT rows, const fz_weights * FZ_RESTRICT cols) | |
| 1524 { | |
| 1525 int t, l, r, b, tl, tr, bl, br, x, y; | |
| 1526 unsigned char *dp = pix->samples; | |
| 1527 int w = pix->w; | |
| 1528 int n = pix->n; | |
| 1529 int span = w >= 2 ? (w-1)*n : 0; | |
| 1530 int stride = pix->stride; | |
| 1531 | |
| 1532 get_alpha_edge_values(rows, &t, &b); | |
| 1533 get_alpha_edge_values(cols, &l, &r); | |
| 1534 | |
| 1535 l = (255 * l + 128)>>8; | |
| 1536 r = (255 * r + 128)>>8; | |
| 1537 tl = (l * t + 128)>>8; | |
| 1538 tr = (r * t + 128)>>8; | |
| 1539 bl = (l * b + 128)>>8; | |
| 1540 br = (r * b + 128)>>8; | |
| 1541 t = (255 * t + 128)>>8; | |
| 1542 b = (255 * b + 128)>>8; | |
| 1543 dp += n-1; | |
| 1544 *dp = tl; | |
| 1545 dp += n; | |
| 1546 for (x = w-2; x > 0; x--) | |
| 1547 { | |
| 1548 *dp = t; | |
| 1549 dp += n; | |
| 1550 } | |
| 1551 if (x == 0) | |
| 1552 { | |
| 1553 *dp = tr; | |
| 1554 dp += n; | |
| 1555 } | |
| 1556 dp += stride - w*n; | |
| 1557 for (y = pix->h-2; y > 0; y--) | |
| 1558 { | |
| 1559 dp[span] = r; | |
| 1560 *dp = l; | |
| 1561 dp += stride; | |
| 1562 } | |
| 1563 if (y == 0) | |
| 1564 { | |
| 1565 *dp = bl; | |
| 1566 dp += n; | |
| 1567 for (x = w-2; x > 0; x--) | |
| 1568 { | |
| 1569 *dp = b; | |
| 1570 dp += n; | |
| 1571 } | |
| 1572 if (x == 0) | |
| 1573 { | |
| 1574 *dp = br; | |
| 1575 } | |
| 1576 } | |
| 1577 } | |
| 1578 | |
| 1579 fz_pixmap * | |
| 1580 fz_scale_pixmap(fz_context *ctx, fz_pixmap *src, float x, float y, float w, float h, const fz_irect *clip) | |
| 1581 { | |
| 1582 return fz_scale_pixmap_cached(ctx, src, x, y, w, h, clip, NULL, NULL); | |
| 1583 } | |
| 1584 | |
| 1585 fz_pixmap * | |
| 1586 fz_scale_pixmap_cached(fz_context *ctx, const fz_pixmap *src, float x, float y, float w, float h, const fz_irect *clip, fz_scale_cache *cache_x, fz_scale_cache *cache_y) | |
| 1587 { | |
| 1588 fz_scale_filter *filter = &fz_scale_filter_simple; | |
| 1589 fz_weights *contrib_rows = NULL; | |
| 1590 fz_weights *contrib_cols = NULL; | |
| 1591 fz_pixmap *output = NULL; | |
| 1592 unsigned char *temp = NULL; | |
| 1593 int max_row, temp_span, temp_rows, row; | |
| 1594 int dst_w_int, dst_h_int, dst_x_int, dst_y_int; | |
| 1595 int flip_x, flip_y, forcealpha; | |
| 1596 fz_rect patch; | |
| 1597 | |
| 1598 fz_var(contrib_cols); | |
| 1599 fz_var(contrib_rows); | |
| 1600 | |
| 1601 /* Avoid extreme scales where overflows become problematic. */ | |
| 1602 if (w > (1<<24) || h > (1<<24) || w < -(1<<24) || h < -(1<<24)) | |
| 1603 return NULL; | |
| 1604 if (x > (1<<24) || y > (1<<24) || x < -(1<<24) || y < -(1<<24)) | |
| 1605 return NULL; | |
| 1606 | |
| 1607 /* Clamp small ranges of w and h */ | |
| 1608 if (w <= -1) | |
| 1609 { | |
| 1610 /* Large negative range. Don't clamp */ | |
| 1611 } | |
| 1612 else if (w < 0) | |
| 1613 { | |
| 1614 w = -1; | |
| 1615 } | |
| 1616 else if (w < 1) | |
| 1617 { | |
| 1618 w = 1; | |
| 1619 } | |
| 1620 if (h <= -1) | |
| 1621 { | |
| 1622 /* Large negative range. Don't clamp */ | |
| 1623 } | |
| 1624 else if (h < 0) | |
| 1625 { | |
| 1626 h = -1; | |
| 1627 } | |
| 1628 else if (h < 1) | |
| 1629 { | |
| 1630 h = 1; | |
| 1631 } | |
| 1632 | |
| 1633 /* If the src has an alpha, we'll make the dst have an alpha automatically. | |
| 1634 * We also need to force the dst to have an alpha if x/y/w/h aren't ints. */ | |
| 1635 forcealpha = !src->alpha && (x != (float)(int)x || y != (float)(int)y || w != (float)(int)w || h != (float)(int)h); | |
| 1636 | |
| 1637 /* Find the destination bbox, width/height, and sub pixel offset, | |
| 1638 * allowing for whether we're flipping or not. */ | |
| 1639 /* The (x,y) position given describes where the top left corner | |
| 1640 * of the source image should be mapped to (i.e. where (0,0) in image | |
| 1641 * space ends up). Also there are differences in the way we scale | |
| 1642 * horizontally and vertically. When scaling rows horizontally, we | |
| 1643 * always read forwards through the source, and store either forwards | |
| 1644 * or in reverse as required. When scaling vertically, we always store | |
| 1645 * out forwards, but may feed source rows in in a different order. | |
| 1646 * | |
| 1647 * Consider the image rectangle 'r' to which the image is mapped, | |
| 1648 * and the (possibly) larger rectangle 'R', given by expanding 'r' to | |
| 1649 * complete pixels. | |
| 1650 * | |
| 1651 * x can either be r.xmin-R.xmin or R.xmax-r.xmax depending on whether | |
| 1652 * the image is x flipped or not. Whatever happens 0 <= x < 1. | |
| 1653 * y is always R.ymax - r.ymax. | |
| 1654 */ | |
| 1655 /* dst_x_int is calculated to be the left of the scaled image, and | |
| 1656 * x (the sub pixel offset) is the distance in from either the left | |
| 1657 * or right pixel expanded edge. */ | |
| 1658 flip_x = (w < 0); | |
| 1659 if (flip_x) | |
| 1660 { | |
| 1661 float tmp; | |
| 1662 w = -w; | |
| 1663 dst_x_int = floorf(x-w); | |
| 1664 tmp = ceilf(x); | |
| 1665 dst_w_int = (int)tmp; | |
| 1666 x = tmp - x; | |
| 1667 dst_w_int -= dst_x_int; | |
| 1668 } | |
| 1669 else | |
| 1670 { | |
| 1671 dst_x_int = floorf(x); | |
| 1672 x -= dst_x_int; | |
| 1673 dst_w_int = (int)ceilf(x + w); | |
| 1674 } | |
| 1675 /* dst_y_int is calculated to be the top of the scaled image, and | |
| 1676 * y (the sub pixel offset) is the distance in from either the top | |
| 1677 * or bottom pixel expanded edge. | |
| 1678 */ | |
| 1679 flip_y = (h < 0); | |
| 1680 if (flip_y) | |
| 1681 { | |
| 1682 float tmp; | |
| 1683 h = -h; | |
| 1684 dst_y_int = floorf(y-h); | |
| 1685 tmp = ceilf(y); | |
| 1686 dst_h_int = (int)tmp; | |
| 1687 y = tmp - y; | |
| 1688 dst_h_int -= dst_y_int; | |
| 1689 } | |
| 1690 else | |
| 1691 { | |
| 1692 dst_y_int = floorf(y); | |
| 1693 y -= dst_y_int; | |
| 1694 dst_h_int = (int)ceilf(y + h); | |
| 1695 } | |
| 1696 | |
| 1697 fz_valgrind_pixmap(src); | |
| 1698 | |
| 1699 /* Step 0: Calculate the patch */ | |
| 1700 patch.x0 = 0; | |
| 1701 patch.y0 = 0; | |
| 1702 patch.x1 = dst_w_int; | |
| 1703 patch.y1 = dst_h_int; | |
| 1704 if (clip) | |
| 1705 { | |
| 1706 if (flip_x) | |
| 1707 { | |
| 1708 if (dst_x_int + dst_w_int > clip->x1) | |
| 1709 patch.x0 = dst_x_int + dst_w_int - clip->x1; | |
| 1710 if (clip->x0 > dst_x_int) | |
| 1711 { | |
| 1712 patch.x1 = dst_w_int - (clip->x0 - dst_x_int); | |
| 1713 dst_x_int = clip->x0; | |
| 1714 } | |
| 1715 } | |
| 1716 else | |
| 1717 { | |
| 1718 if (dst_x_int + dst_w_int > clip->x1) | |
| 1719 patch.x1 = clip->x1 - dst_x_int; | |
| 1720 if (clip->x0 > dst_x_int) | |
| 1721 { | |
| 1722 patch.x0 = clip->x0 - dst_x_int; | |
| 1723 dst_x_int += patch.x0; | |
| 1724 } | |
| 1725 } | |
| 1726 | |
| 1727 if (flip_y) | |
| 1728 { | |
| 1729 if (dst_y_int + dst_h_int > clip->y1) | |
| 1730 patch.y1 = clip->y1 - dst_y_int; | |
| 1731 if (clip->y0 > dst_y_int) | |
| 1732 { | |
| 1733 patch.y0 = clip->y0 - dst_y_int; | |
| 1734 dst_y_int = clip->y0; | |
| 1735 } | |
| 1736 } | |
| 1737 else | |
| 1738 { | |
| 1739 if (dst_y_int + dst_h_int > clip->y1) | |
| 1740 patch.y1 = clip->y1 - dst_y_int; | |
| 1741 if (clip->y0 > dst_y_int) | |
| 1742 { | |
| 1743 patch.y0 = clip->y0 - dst_y_int; | |
| 1744 dst_y_int += patch.y0; | |
| 1745 } | |
| 1746 } | |
| 1747 } | |
| 1748 if (patch.x0 >= patch.x1 || patch.y0 >= patch.y1) | |
| 1749 return NULL; | |
| 1750 | |
| 1751 fz_try(ctx) | |
| 1752 { | |
| 1753 /* Step 1: Calculate the weights for columns and rows */ | |
| 1754 #ifdef SINGLE_PIXEL_SPECIALS | |
| 1755 if (src->w == 1) | |
| 1756 contrib_cols = NULL; | |
| 1757 else | |
| 1758 #endif /* SINGLE_PIXEL_SPECIALS */ | |
| 1759 contrib_cols = Memento_label(make_weights(ctx, src->w, x, w, filter, 0, dst_w_int, patch.x0, patch.x1, src->n, flip_x, cache_x), "contrib_cols"); | |
| 1760 #ifdef SINGLE_PIXEL_SPECIALS | |
| 1761 if (src->h == 1) | |
| 1762 contrib_rows = NULL; | |
| 1763 else | |
| 1764 #endif /* SINGLE_PIXEL_SPECIALS */ | |
| 1765 contrib_rows = Memento_label(make_weights(ctx, src->h, y, h, filter, 1, dst_h_int, patch.y0, patch.y1, src->n, flip_y, cache_y), "contrib_rows"); | |
| 1766 | |
| 1767 output = fz_new_pixmap(ctx, src->colorspace, patch.x1 - patch.x0, patch.y1 - patch.y0, src->seps, src->alpha || forcealpha); | |
| 1768 } | |
| 1769 fz_catch(ctx) | |
| 1770 { | |
| 1771 if (!cache_x) | |
| 1772 fz_free(ctx, contrib_cols); | |
| 1773 if (!cache_y) | |
| 1774 fz_free(ctx, contrib_rows); | |
| 1775 fz_rethrow(ctx); | |
| 1776 } | |
| 1777 output->x = dst_x_int; | |
| 1778 output->y = dst_y_int; | |
| 1779 | |
| 1780 /* Step 2: Apply the weights */ | |
| 1781 #ifdef SINGLE_PIXEL_SPECIALS | |
| 1782 if (!contrib_rows) | |
| 1783 { | |
| 1784 /* Only 1 source pixel high. */ | |
| 1785 if (!contrib_cols) | |
| 1786 { | |
| 1787 /* Only 1 pixel in the entire image! */ | |
| 1788 duplicate_single_pixel(output->samples, src->samples, src->n, forcealpha, patch.x1-patch.x0, patch.y1-patch.y0, output->stride); | |
| 1789 fz_valgrind_pixmap(output); | |
| 1790 } | |
| 1791 else | |
| 1792 { | |
| 1793 /* Scale the row once, then copy it. */ | |
| 1794 scale_single_row(output->samples, output->stride, src->samples, contrib_cols, src->w, patch.y1-patch.y0, forcealpha); | |
| 1795 fz_valgrind_pixmap(output); | |
| 1796 } | |
| 1797 } | |
| 1798 else if (!contrib_cols) | |
| 1799 { | |
| 1800 /* Only 1 source pixel wide. Scale the col and duplicate. */ | |
| 1801 scale_single_col(output->samples, output->stride, src->samples, src->stride, contrib_rows, src->h, src->n, patch.x1-patch.x0, forcealpha); | |
| 1802 fz_valgrind_pixmap(output); | |
| 1803 } | |
| 1804 else | |
| 1805 #endif /* SINGLE_PIXEL_SPECIALS */ | |
| 1806 { | |
| 1807 void (*row_scale_in)(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights); | |
| 1808 void (*row_scale_out)(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights, int w, int n, int row); | |
| 1809 | |
| 1810 temp_span = contrib_cols->count * src->n; | |
| 1811 temp_rows = contrib_rows->max_len; | |
| 1812 if (temp_span <= 0 || temp_rows > INT_MAX / temp_span) | |
| 1813 goto cleanup; | |
| 1814 fz_try(ctx) | |
| 1815 { | |
| 1816 temp = fz_calloc(ctx, (size_t)temp_span*temp_rows, sizeof(unsigned char)); | |
| 1817 } | |
| 1818 fz_catch(ctx) | |
| 1819 { | |
| 1820 fz_drop_pixmap(ctx, output); | |
| 1821 if (!cache_x) | |
| 1822 fz_free(ctx, contrib_cols); | |
| 1823 if (!cache_y) | |
| 1824 fz_free(ctx, contrib_rows); | |
| 1825 fz_rethrow(ctx); | |
| 1826 } | |
| 1827 switch (src->n) | |
| 1828 { | |
| 1829 default: | |
| 1830 row_scale_in = scale_row_to_temp; | |
| 1831 break; | |
| 1832 case 1: /* Image mask case or Greyscale case */ | |
| 1833 row_scale_in = scale_row_to_temp1; | |
| 1834 break; | |
| 1835 case 2: /* Greyscale with alpha case */ | |
| 1836 row_scale_in = scale_row_to_temp2; | |
| 1837 break; | |
| 1838 case 3: /* RGB case */ | |
| 1839 row_scale_in = scale_row_to_temp3; | |
| 1840 break; | |
| 1841 case 4: /* RGBA or CMYK case */ | |
| 1842 row_scale_in = scale_row_to_temp4; | |
| 1843 break; | |
| 1844 } | |
| 1845 row_scale_out = forcealpha ? scale_row_from_temp_alpha : scale_row_from_temp; | |
| 1846 max_row = contrib_rows->index[contrib_rows->index[0]]; | |
| 1847 for (row = 0; row < contrib_rows->count; row++) | |
| 1848 { | |
| 1849 /* | |
| 1850 Which source rows do we need to have scaled into the | |
| 1851 temporary buffer in order to be able to do the final | |
| 1852 scale? | |
| 1853 */ | |
| 1854 int row_index = contrib_rows->index[row]; | |
| 1855 int row_min = contrib_rows->index[row_index++]; | |
| 1856 int row_len = contrib_rows->index[row_index]; | |
| 1857 while (max_row < row_min+row_len) | |
| 1858 { | |
| 1859 /* Scale another row */ | |
| 1860 assert(max_row < src->h); | |
| 1861 (*row_scale_in)(&temp[temp_span*(max_row % temp_rows)], &src->samples[(flip_y ? (src->h-1-max_row): max_row)*src->stride], contrib_cols); | |
| 1862 max_row++; | |
| 1863 } | |
| 1864 | |
| 1865 (*row_scale_out)(&output->samples[row*output->stride], temp, contrib_rows, contrib_cols->count, src->n, row); | |
| 1866 } | |
| 1867 fz_free(ctx, temp); | |
| 1868 | |
| 1869 if (forcealpha) | |
| 1870 adjust_alpha_edges(output, contrib_rows, contrib_cols); | |
| 1871 | |
| 1872 fz_valgrind_pixmap(output); | |
| 1873 } | |
| 1874 | |
| 1875 cleanup: | |
| 1876 if (!cache_y) | |
| 1877 fz_free(ctx, contrib_rows); | |
| 1878 if (!cache_x) | |
| 1879 fz_free(ctx, contrib_cols); | |
| 1880 | |
| 1881 return output; | |
| 1882 } | |
| 1883 | |
| 1884 void | |
| 1885 fz_drop_scale_cache(fz_context *ctx, fz_scale_cache *sc) | |
| 1886 { | |
| 1887 if (!sc) | |
| 1888 return; | |
| 1889 fz_free(ctx, sc->weights); | |
| 1890 fz_free(ctx, sc); | |
| 1891 } | |
| 1892 | |
| 1893 fz_scale_cache * | |
| 1894 fz_new_scale_cache(fz_context *ctx) | |
| 1895 { | |
| 1896 return fz_malloc_struct(ctx, fz_scale_cache); | |
| 1897 } |
