Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/source/fitz/strtof.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 // Copyright (C) 2004-2021 Artifex Software, Inc. | |
| 2 // | |
| 3 // This file is part of MuPDF. | |
| 4 // | |
| 5 // MuPDF is free software: you can redistribute it and/or modify it under the | |
| 6 // terms of the GNU Affero General Public License as published by the Free | |
| 7 // Software Foundation, either version 3 of the License, or (at your option) | |
| 8 // any later version. | |
| 9 // | |
| 10 // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY | |
| 11 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | |
| 12 // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more | |
| 13 // details. | |
| 14 // | |
| 15 // You should have received a copy of the GNU Affero General Public License | |
| 16 // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html> | |
| 17 // | |
| 18 // Alternative licensing terms are available from the licensor. | |
| 19 // For commercial licensing, see <https://www.artifex.com/> or contact | |
| 20 // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, | |
| 21 // CA 94129, USA, for further information. | |
| 22 | |
| 23 #include "mupdf/fitz.h" | |
| 24 | |
| 25 #include <assert.h> | |
| 26 #include <errno.h> | |
| 27 #include <float.h> | |
| 28 | |
| 29 #ifndef INFINITY | |
| 30 #define INFINITY (DBL_MAX+DBL_MAX) | |
| 31 #endif | |
| 32 #ifndef NAN | |
| 33 #define NAN (INFINITY-INFINITY) | |
| 34 #endif | |
| 35 | |
| 36 /* | |
| 37 We use "Algorithm D" from "Contributions to a Proposed Standard for Binary | |
| 38 Floating-Point Arithmetic" by Jerome Coonen (1984). | |
| 39 | |
| 40 The implementation uses a self-made floating point type, 'strtof_fp_t', with | |
| 41 a 32-bit significand. The steps of the algorithm are | |
| 42 | |
| 43 INPUT: Up to 9 decimal digits d1, ... d9 and an exponent dexp. | |
| 44 OUTPUT: A float corresponding to the number d1 ... d9 * 10^dexp. | |
| 45 | |
| 46 1) Convert the integer d1 ... d9 to an strtof_fp_t x. | |
| 47 2) Lookup the strtof_fp_t power = 10 ^ |dexp|. | |
| 48 3) If dexp is positive set x = x * power, else set x = x / power. Use rounding mode 'round to odd'. | |
| 49 4) Round x to a float using rounding mode 'to even'. | |
| 50 | |
| 51 Step 1) is always lossless as the strtof_fp_t's significand can hold a 9-digit integer. | |
| 52 In the case |dexp| <= 13 the cached power is exact and the algorithm returns | |
| 53 the exactly rounded result (with rounding mode 'to even'). | |
| 54 There is no double-rounding in 3), 4) as the multiply/divide uses 'round to odd'. | |
| 55 | |
| 56 For |dexp| > 13 the maximum error is bounded by (1/2 + 1/256) ulp. | |
| 57 This is small enough to ensure that binary to decimal to binary conversion | |
| 58 is the identity if the decimal format uses 9 correctly rounded significant digits. | |
| 59 */ | |
| 60 typedef struct strtof_fp_t | |
| 61 { | |
| 62 uint32_t f; | |
| 63 int e; | |
| 64 } strtof_fp_t; | |
| 65 | |
| 66 /* Multiply/Divide x by y with 'round to odd'. Assume that x and y are normalized. */ | |
| 67 | |
| 68 static strtof_fp_t | |
| 69 strtof_multiply(strtof_fp_t x, strtof_fp_t y) | |
| 70 { | |
| 71 uint64_t tmp; | |
| 72 strtof_fp_t res; | |
| 73 | |
| 74 assert(x.f & y.f & 0x80000000); | |
| 75 | |
| 76 res.e = x.e + y.e + 32; | |
| 77 tmp = (uint64_t) x.f * y.f; | |
| 78 /* Normalize. */ | |
| 79 if ((tmp < ((uint64_t) 1 << 63))) | |
| 80 { | |
| 81 tmp <<= 1; | |
| 82 --res.e; | |
| 83 } | |
| 84 | |
| 85 res.f = tmp >> 32; | |
| 86 | |
| 87 /* Set the last bit of the significand to 1 if the result is | |
| 88 inexact. */ | |
| 89 if (tmp & 0xffffffff) | |
| 90 res.f |= 1; | |
| 91 return res; | |
| 92 } | |
| 93 | |
| 94 static strtof_fp_t | |
| 95 divide(strtof_fp_t x, strtof_fp_t y) | |
| 96 { | |
| 97 uint64_t product, quotient; | |
| 98 uint32_t remainder; | |
| 99 strtof_fp_t res; | |
| 100 | |
| 101 res.e = x.e - y.e - 32; | |
| 102 product = (uint64_t) x.f << 32; | |
| 103 quotient = product / y.f; | |
| 104 remainder = product % y.f; | |
| 105 /* 2^31 <= quotient <= 2^33 - 2. */ | |
| 106 if (quotient <= 0xffffffff) | |
| 107 res.f = quotient; | |
| 108 else | |
| 109 { | |
| 110 ++res.e; | |
| 111 /* If quotient % 2 != 0 we have remainder != 0. */ | |
| 112 res.f = quotient >> 1; | |
| 113 } | |
| 114 if (remainder) | |
| 115 res.f |= 1; | |
| 116 return res; | |
| 117 } | |
| 118 | |
| 119 /* From 10^0 to 10^54. Generated with GNU MPFR. */ | |
| 120 static const uint32_t strtof_powers_ten[55] = { | |
| 121 0x80000000, 0xa0000000, 0xc8000000, 0xfa000000, 0x9c400000, 0xc3500000, | |
| 122 0xf4240000, 0x98968000, 0xbebc2000, 0xee6b2800, 0x9502f900, 0xba43b740, | |
| 123 0xe8d4a510, 0x9184e72a, 0xb5e620f4, 0xe35fa932, 0x8e1bc9bf, 0xb1a2bc2f, | |
| 124 0xde0b6b3a, 0x8ac72305, 0xad78ebc6, 0xd8d726b7, 0x87867832, 0xa968163f, | |
| 125 0xd3c21bcf, 0x84595161, 0xa56fa5ba, 0xcecb8f28, 0x813f3979, 0xa18f07d7, | |
| 126 0xc9f2c9cd, 0xfc6f7c40, 0x9dc5ada8, 0xc5371912, 0xf684df57, 0x9a130b96, | |
| 127 0xc097ce7c, 0xf0bdc21b, 0x96769951, 0xbc143fa5, 0xeb194f8e, 0x92efd1b9, | |
| 128 0xb7abc627, 0xe596b7b1, 0x8f7e32ce, 0xb35dbf82, 0xe0352f63, 0x8c213d9e, | |
| 129 0xaf298d05, 0xdaf3f046, 0x88d8762c, 0xab0e93b7, 0xd5d238a5, 0x85a36367, | |
| 130 0xa70c3c41 | |
| 131 }; | |
| 132 static const int strtof_powers_ten_e[55] = { | |
| 133 -31, -28, -25, -22, -18, -15, -12, -8, -5, -2, | |
| 134 2, 5, 8, 12, 15, 18, 22, 25, 28, 32, 35, 38, 42, 45, 48, 52, 55, 58, 62, 65, | |
| 135 68, 71, 75, 78, 81, 85, 88, 91, 95, 98, 101, 105, 108, 111, 115, 118, 121, | |
| 136 125, 128, 131, 135, 138, 141, 145, 148 | |
| 137 }; | |
| 138 | |
| 139 static strtof_fp_t | |
| 140 strtof_cached_power(int i) | |
| 141 { | |
| 142 strtof_fp_t result; | |
| 143 assert (i >= 0 && i <= 54); | |
| 144 result.f = strtof_powers_ten[i]; | |
| 145 result.e = strtof_powers_ten_e[i]; | |
| 146 return result; | |
| 147 } | |
| 148 | |
| 149 /* Find number of leading zero bits in an uint32_t. Derived from the | |
| 150 "Bit Twiddling Hacks" at graphics.stanford.edu/~seander/bithacks.html. */ | |
| 151 static unsigned char clz_table[256] = { | |
| 152 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, | |
| 153 # define sixteen_times(N) N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, | |
| 154 sixteen_times (3) sixteen_times (2) sixteen_times (2) | |
| 155 sixteen_times (1) sixteen_times (1) sixteen_times (1) sixteen_times (1) | |
| 156 /* Zero for the rest. */ | |
| 157 }; | |
| 158 static unsigned | |
| 159 leading_zeros (uint32_t x) | |
| 160 { | |
| 161 unsigned tmp1, tmp2; | |
| 162 | |
| 163 tmp1 = x >> 16; | |
| 164 if (tmp1) | |
| 165 { | |
| 166 tmp2 = tmp1 >> 8; | |
| 167 if (tmp2) | |
| 168 return clz_table[tmp2]; | |
| 169 else | |
| 170 return 8 + clz_table[tmp1]; | |
| 171 } | |
| 172 else | |
| 173 { | |
| 174 tmp1 = x >> 8; | |
| 175 if (tmp1) | |
| 176 return 16 + clz_table[tmp1]; | |
| 177 else | |
| 178 return 24 + clz_table[x]; | |
| 179 } | |
| 180 } | |
| 181 | |
| 182 static strtof_fp_t | |
| 183 uint32_to_diy (uint32_t x) | |
| 184 { | |
| 185 strtof_fp_t result = {x, 0}; | |
| 186 unsigned shift = leading_zeros(x); | |
| 187 | |
| 188 result.f <<= shift; | |
| 189 result.e -= shift; | |
| 190 return result; | |
| 191 } | |
| 192 | |
| 193 #define SP_SIGNIFICAND_SIZE 23 | |
| 194 #define SP_EXPONENT_BIAS (127 + SP_SIGNIFICAND_SIZE) | |
| 195 #define SP_MIN_EXPONENT (-SP_EXPONENT_BIAS) | |
| 196 #define SP_EXPONENT_MASK 0x7f800000 | |
| 197 #define SP_SIGNIFICAND_MASK 0x7fffff | |
| 198 #define SP_HIDDEN_BIT 0x800000 /* 2^23 */ | |
| 199 | |
| 200 /* Convert normalized strtof_fp_t to IEEE-754 single with 'round to even'. | |
| 201 See "Implementing IEEE 754-2008 Rounding" in the | |
| 202 "Handbook of Floating-Point Arithmetik". | |
| 203 */ | |
| 204 static float | |
| 205 diy_to_float(strtof_fp_t x, int negative) | |
| 206 { | |
| 207 uint32_t result; | |
| 208 union | |
| 209 { | |
| 210 float f; | |
| 211 uint32_t n; | |
| 212 } tmp; | |
| 213 | |
| 214 assert(x.f & 0x80000000); | |
| 215 | |
| 216 /* We have 2^32 - 2^7 = 0xffffff80. */ | |
| 217 if (x.e > 96 || (x.e == 96 && x.f >= 0xffffff80)) | |
| 218 { | |
| 219 /* Overflow. Set result to infinity. */ | |
| 220 errno = ERANGE; | |
| 221 result = 0xff << SP_SIGNIFICAND_SIZE; | |
| 222 } | |
| 223 /* We have 2^32 - 2^8 = 0xffffff00. */ | |
| 224 else if (x.e > -158) | |
| 225 { | |
| 226 /* x is greater or equal to FLT_MAX. So we get a normalized number. */ | |
| 227 result = (uint32_t) (x.e + 158) << SP_SIGNIFICAND_SIZE; | |
| 228 result |= (x.f >> 8) & SP_SIGNIFICAND_MASK; | |
| 229 | |
| 230 if (x.f & 0x80) | |
| 231 { | |
| 232 /* Round-bit is set. */ | |
| 233 if (x.f & 0x7f) | |
| 234 /* Sticky-bit is set. */ | |
| 235 ++result; | |
| 236 else if (x.f & 0x100) | |
| 237 /* Significand is odd. */ | |
| 238 ++result; | |
| 239 } | |
| 240 } | |
| 241 else if (x.e == -158 && x.f >= 0xffffff00) | |
| 242 { | |
| 243 /* x is in the range (2^32, 2^32 - 2^8] * 2^-158, so its smaller than | |
| 244 FLT_MIN but still rounds to it. */ | |
| 245 result = 1U << SP_SIGNIFICAND_SIZE; | |
| 246 } | |
| 247 else if (x.e > -181) | |
| 248 { | |
| 249 /* Non-zero Denormal. */ | |
| 250 int shift = -149 - x.e; /* 9 <= shift <= 31. */ | |
| 251 | |
| 252 result = x.f >> shift; | |
| 253 | |
| 254 if (x.f & (1U << (shift - 1))) | |
| 255 /* Round-bit is set. */ | |
| 256 { | |
| 257 if (x.f & ((1U << (shift - 1)) - 1)) | |
| 258 /* Sticky-bit is set. */ | |
| 259 ++result; | |
| 260 else if (x.f & 1U << shift) | |
| 261 /* Significand is odd. */ | |
| 262 ++result; | |
| 263 } | |
| 264 } | |
| 265 else if (x.e == -181 && x.f > 0x80000000) | |
| 266 { | |
| 267 /* x is in the range (0.5,1) * 2^-149 so it rounds to the smallest | |
| 268 denormal. Can't handle this in the previous case as shifting a | |
| 269 uint32_t 32 bits to the right is undefined behaviour. */ | |
| 270 result = 1; | |
| 271 } | |
| 272 else | |
| 273 { | |
| 274 /* Underflow. */ | |
| 275 errno = ERANGE; | |
| 276 result = 0; | |
| 277 } | |
| 278 | |
| 279 if (negative) | |
| 280 result |= 0x80000000; | |
| 281 | |
| 282 tmp.n = result; | |
| 283 return tmp.f; | |
| 284 } | |
| 285 | |
| 286 static float | |
| 287 scale_integer_to_float(uint32_t M, int N, int negative) | |
| 288 { | |
| 289 strtof_fp_t result, x, power; | |
| 290 | |
| 291 if (M == 0) | |
| 292 return negative ? -0.f : 0.f; | |
| 293 if (N > 38) | |
| 294 { | |
| 295 /* Overflow. */ | |
| 296 errno = ERANGE; | |
| 297 return negative ? -INFINITY : INFINITY; | |
| 298 } | |
| 299 if (N < -54) | |
| 300 { | |
| 301 /* Underflow. */ | |
| 302 errno = ERANGE; | |
| 303 return negative ? -0.f : 0.f; | |
| 304 } | |
| 305 /* If N is in the range {-13, ..., 13} the conversion is exact. | |
| 306 Try to scale N into this region. */ | |
| 307 while (N > 13 && M <= 0xffffffff / 10) | |
| 308 { | |
| 309 M *= 10; | |
| 310 --N; | |
| 311 } | |
| 312 | |
| 313 while (N < -13 && M % 10 == 0) | |
| 314 { | |
| 315 M /= 10; | |
| 316 ++N; | |
| 317 } | |
| 318 | |
| 319 x = uint32_to_diy (M); | |
| 320 if (N >= 0) | |
| 321 { | |
| 322 power = strtof_cached_power(N); | |
| 323 result = strtof_multiply(x, power); | |
| 324 } | |
| 325 else | |
| 326 { | |
| 327 power = strtof_cached_power(-N); | |
| 328 result = divide(x, power); | |
| 329 } | |
| 330 | |
| 331 return diy_to_float(result, negative); | |
| 332 } | |
| 333 | |
| 334 /* Return non-zero if *s starts with string (must be uppercase), ignoring case, | |
| 335 and increment *s by its length. */ | |
| 336 static int | |
| 337 starts_with(const char **s, const char *string) | |
| 338 { | |
| 339 const char *x = *s, *y = string; | |
| 340 while (*x && *y && (*x == *y || *x == *y + 32)) | |
| 341 ++x, ++y; | |
| 342 if (*y == 0) | |
| 343 { | |
| 344 /* Match. */ | |
| 345 *s = x; | |
| 346 return 1; | |
| 347 } | |
| 348 else | |
| 349 return 0; | |
| 350 } | |
| 351 #define SET_TAILPTR(tailptr, s) \ | |
| 352 do \ | |
| 353 if (tailptr) \ | |
| 354 *tailptr = (char *) s; \ | |
| 355 while (0) | |
| 356 | |
| 357 float | |
| 358 fz_strtof(const char *string, char **tailptr) | |
| 359 { | |
| 360 /* FIXME: error (1/2 + 1/256) ulp */ | |
| 361 const char *s; | |
| 362 uint32_t M = 0; | |
| 363 int N = 0; | |
| 364 /* If decimal_digits gets 9 we truncate all following digits. */ | |
| 365 int decimal_digits = 0; | |
| 366 int negative = 0; | |
| 367 const char *number_start = 0; | |
| 368 | |
| 369 /* Skip leading whitespace (isspace in "C" locale). */ | |
| 370 s = string; | |
| 371 while (*s == ' ' || *s == '\f' || *s == '\n' || *s == '\r' || *s == '\t' || *s == '\v') | |
| 372 ++s; | |
| 373 | |
| 374 /* Parse sign. */ | |
| 375 if (*s == '+') | |
| 376 ++s; | |
| 377 if (*s == '-') | |
| 378 { | |
| 379 negative = 1; | |
| 380 ++s; | |
| 381 } | |
| 382 number_start = s; | |
| 383 /* Parse digits before decimal point. */ | |
| 384 while (*s >= '0' && *s <= '9') | |
| 385 { | |
| 386 if (decimal_digits) | |
| 387 { | |
| 388 if (decimal_digits < 9) | |
| 389 { | |
| 390 ++decimal_digits; | |
| 391 M = M * 10 + *s - '0'; | |
| 392 } | |
| 393 /* Really arcane strings might overflow N. */ | |
| 394 else if (N < 1000) | |
| 395 ++N; | |
| 396 } | |
| 397 else if (*s > '0') | |
| 398 { | |
| 399 M = *s - '0'; | |
| 400 ++decimal_digits; | |
| 401 } | |
| 402 ++s; | |
| 403 } | |
| 404 | |
| 405 /* Parse decimal point. */ | |
| 406 if (*s == '.') | |
| 407 ++s; | |
| 408 | |
| 409 /* Parse digits after decimal point. */ | |
| 410 while (*s >= '0' && *s <= '9') | |
| 411 { | |
| 412 if (decimal_digits < 9) | |
| 413 { | |
| 414 if (decimal_digits || *s > '0') | |
| 415 { | |
| 416 ++decimal_digits; | |
| 417 M = M * 10 + *s - '0'; | |
| 418 } | |
| 419 --N; | |
| 420 } | |
| 421 ++s; | |
| 422 } | |
| 423 if ((s == number_start + 1 && *number_start == '.') || number_start == s) | |
| 424 { | |
| 425 /* No Number. Check for INF and NAN strings. */ | |
| 426 s = number_start; | |
| 427 if (starts_with(&s, "INFINITY") || starts_with(&s, "INF")) | |
| 428 { | |
| 429 errno = ERANGE; | |
| 430 SET_TAILPTR(tailptr, s); | |
| 431 return negative ? -INFINITY : +INFINITY; | |
| 432 } | |
| 433 else if (starts_with(&s, "NAN")) | |
| 434 { | |
| 435 SET_TAILPTR(tailptr, s); | |
| 436 return (float)NAN; | |
| 437 } | |
| 438 else | |
| 439 { | |
| 440 SET_TAILPTR(tailptr, string); | |
| 441 return 0.f; | |
| 442 } | |
| 443 } | |
| 444 | |
| 445 /* Parse exponent. */ | |
| 446 if (*s == 'e' || *s == 'E') | |
| 447 { | |
| 448 int exp_negative = 0; | |
| 449 int exp = 0; | |
| 450 const char *int_start; | |
| 451 const char *exp_start = s; | |
| 452 | |
| 453 ++s; | |
| 454 if (*s == '+') | |
| 455 ++s; | |
| 456 else if (*s == '-') | |
| 457 { | |
| 458 ++s; | |
| 459 exp_negative = 1; | |
| 460 } | |
| 461 int_start = s; | |
| 462 /* Parse integer. */ | |
| 463 while (*s >= '0' && *s <= '9') | |
| 464 { | |
| 465 /* Make sure exp does not get overflowed. */ | |
| 466 if (exp < 100) | |
| 467 exp = exp * 10 + *s - '0'; | |
| 468 ++s; | |
| 469 } | |
| 470 if (exp_negative) | |
| 471 exp = -exp; | |
| 472 if (s == int_start) | |
| 473 /* No Number. */ | |
| 474 s = exp_start; | |
| 475 else | |
| 476 N += exp; | |
| 477 } | |
| 478 | |
| 479 SET_TAILPTR(tailptr, s); | |
| 480 return scale_integer_to_float(M, N, negative); | |
| 481 } |
