comparison mupdf-source/source/fitz/strtof.c @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 // Copyright (C) 2004-2021 Artifex Software, Inc.
2 //
3 // This file is part of MuPDF.
4 //
5 // MuPDF is free software: you can redistribute it and/or modify it under the
6 // terms of the GNU Affero General Public License as published by the Free
7 // Software Foundation, either version 3 of the License, or (at your option)
8 // any later version.
9 //
10 // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
11 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
13 // details.
14 //
15 // You should have received a copy of the GNU Affero General Public License
16 // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
17 //
18 // Alternative licensing terms are available from the licensor.
19 // For commercial licensing, see <https://www.artifex.com/> or contact
20 // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
21 // CA 94129, USA, for further information.
22
23 #include "mupdf/fitz.h"
24
25 #include <assert.h>
26 #include <errno.h>
27 #include <float.h>
28
29 #ifndef INFINITY
30 #define INFINITY (DBL_MAX+DBL_MAX)
31 #endif
32 #ifndef NAN
33 #define NAN (INFINITY-INFINITY)
34 #endif
35
36 /*
37 We use "Algorithm D" from "Contributions to a Proposed Standard for Binary
38 Floating-Point Arithmetic" by Jerome Coonen (1984).
39
40 The implementation uses a self-made floating point type, 'strtof_fp_t', with
41 a 32-bit significand. The steps of the algorithm are
42
43 INPUT: Up to 9 decimal digits d1, ... d9 and an exponent dexp.
44 OUTPUT: A float corresponding to the number d1 ... d9 * 10^dexp.
45
46 1) Convert the integer d1 ... d9 to an strtof_fp_t x.
47 2) Lookup the strtof_fp_t power = 10 ^ |dexp|.
48 3) If dexp is positive set x = x * power, else set x = x / power. Use rounding mode 'round to odd'.
49 4) Round x to a float using rounding mode 'to even'.
50
51 Step 1) is always lossless as the strtof_fp_t's significand can hold a 9-digit integer.
52 In the case |dexp| <= 13 the cached power is exact and the algorithm returns
53 the exactly rounded result (with rounding mode 'to even').
54 There is no double-rounding in 3), 4) as the multiply/divide uses 'round to odd'.
55
56 For |dexp| > 13 the maximum error is bounded by (1/2 + 1/256) ulp.
57 This is small enough to ensure that binary to decimal to binary conversion
58 is the identity if the decimal format uses 9 correctly rounded significant digits.
59 */
60 typedef struct strtof_fp_t
61 {
62 uint32_t f;
63 int e;
64 } strtof_fp_t;
65
66 /* Multiply/Divide x by y with 'round to odd'. Assume that x and y are normalized. */
67
68 static strtof_fp_t
69 strtof_multiply(strtof_fp_t x, strtof_fp_t y)
70 {
71 uint64_t tmp;
72 strtof_fp_t res;
73
74 assert(x.f & y.f & 0x80000000);
75
76 res.e = x.e + y.e + 32;
77 tmp = (uint64_t) x.f * y.f;
78 /* Normalize. */
79 if ((tmp < ((uint64_t) 1 << 63)))
80 {
81 tmp <<= 1;
82 --res.e;
83 }
84
85 res.f = tmp >> 32;
86
87 /* Set the last bit of the significand to 1 if the result is
88 inexact. */
89 if (tmp & 0xffffffff)
90 res.f |= 1;
91 return res;
92 }
93
94 static strtof_fp_t
95 divide(strtof_fp_t x, strtof_fp_t y)
96 {
97 uint64_t product, quotient;
98 uint32_t remainder;
99 strtof_fp_t res;
100
101 res.e = x.e - y.e - 32;
102 product = (uint64_t) x.f << 32;
103 quotient = product / y.f;
104 remainder = product % y.f;
105 /* 2^31 <= quotient <= 2^33 - 2. */
106 if (quotient <= 0xffffffff)
107 res.f = quotient;
108 else
109 {
110 ++res.e;
111 /* If quotient % 2 != 0 we have remainder != 0. */
112 res.f = quotient >> 1;
113 }
114 if (remainder)
115 res.f |= 1;
116 return res;
117 }
118
119 /* From 10^0 to 10^54. Generated with GNU MPFR. */
120 static const uint32_t strtof_powers_ten[55] = {
121 0x80000000, 0xa0000000, 0xc8000000, 0xfa000000, 0x9c400000, 0xc3500000,
122 0xf4240000, 0x98968000, 0xbebc2000, 0xee6b2800, 0x9502f900, 0xba43b740,
123 0xe8d4a510, 0x9184e72a, 0xb5e620f4, 0xe35fa932, 0x8e1bc9bf, 0xb1a2bc2f,
124 0xde0b6b3a, 0x8ac72305, 0xad78ebc6, 0xd8d726b7, 0x87867832, 0xa968163f,
125 0xd3c21bcf, 0x84595161, 0xa56fa5ba, 0xcecb8f28, 0x813f3979, 0xa18f07d7,
126 0xc9f2c9cd, 0xfc6f7c40, 0x9dc5ada8, 0xc5371912, 0xf684df57, 0x9a130b96,
127 0xc097ce7c, 0xf0bdc21b, 0x96769951, 0xbc143fa5, 0xeb194f8e, 0x92efd1b9,
128 0xb7abc627, 0xe596b7b1, 0x8f7e32ce, 0xb35dbf82, 0xe0352f63, 0x8c213d9e,
129 0xaf298d05, 0xdaf3f046, 0x88d8762c, 0xab0e93b7, 0xd5d238a5, 0x85a36367,
130 0xa70c3c41
131 };
132 static const int strtof_powers_ten_e[55] = {
133 -31, -28, -25, -22, -18, -15, -12, -8, -5, -2,
134 2, 5, 8, 12, 15, 18, 22, 25, 28, 32, 35, 38, 42, 45, 48, 52, 55, 58, 62, 65,
135 68, 71, 75, 78, 81, 85, 88, 91, 95, 98, 101, 105, 108, 111, 115, 118, 121,
136 125, 128, 131, 135, 138, 141, 145, 148
137 };
138
139 static strtof_fp_t
140 strtof_cached_power(int i)
141 {
142 strtof_fp_t result;
143 assert (i >= 0 && i <= 54);
144 result.f = strtof_powers_ten[i];
145 result.e = strtof_powers_ten_e[i];
146 return result;
147 }
148
149 /* Find number of leading zero bits in an uint32_t. Derived from the
150 "Bit Twiddling Hacks" at graphics.stanford.edu/~seander/bithacks.html. */
151 static unsigned char clz_table[256] = {
152 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
153 # define sixteen_times(N) N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
154 sixteen_times (3) sixteen_times (2) sixteen_times (2)
155 sixteen_times (1) sixteen_times (1) sixteen_times (1) sixteen_times (1)
156 /* Zero for the rest. */
157 };
158 static unsigned
159 leading_zeros (uint32_t x)
160 {
161 unsigned tmp1, tmp2;
162
163 tmp1 = x >> 16;
164 if (tmp1)
165 {
166 tmp2 = tmp1 >> 8;
167 if (tmp2)
168 return clz_table[tmp2];
169 else
170 return 8 + clz_table[tmp1];
171 }
172 else
173 {
174 tmp1 = x >> 8;
175 if (tmp1)
176 return 16 + clz_table[tmp1];
177 else
178 return 24 + clz_table[x];
179 }
180 }
181
182 static strtof_fp_t
183 uint32_to_diy (uint32_t x)
184 {
185 strtof_fp_t result = {x, 0};
186 unsigned shift = leading_zeros(x);
187
188 result.f <<= shift;
189 result.e -= shift;
190 return result;
191 }
192
193 #define SP_SIGNIFICAND_SIZE 23
194 #define SP_EXPONENT_BIAS (127 + SP_SIGNIFICAND_SIZE)
195 #define SP_MIN_EXPONENT (-SP_EXPONENT_BIAS)
196 #define SP_EXPONENT_MASK 0x7f800000
197 #define SP_SIGNIFICAND_MASK 0x7fffff
198 #define SP_HIDDEN_BIT 0x800000 /* 2^23 */
199
200 /* Convert normalized strtof_fp_t to IEEE-754 single with 'round to even'.
201 See "Implementing IEEE 754-2008 Rounding" in the
202 "Handbook of Floating-Point Arithmetik".
203 */
204 static float
205 diy_to_float(strtof_fp_t x, int negative)
206 {
207 uint32_t result;
208 union
209 {
210 float f;
211 uint32_t n;
212 } tmp;
213
214 assert(x.f & 0x80000000);
215
216 /* We have 2^32 - 2^7 = 0xffffff80. */
217 if (x.e > 96 || (x.e == 96 && x.f >= 0xffffff80))
218 {
219 /* Overflow. Set result to infinity. */
220 errno = ERANGE;
221 result = 0xff << SP_SIGNIFICAND_SIZE;
222 }
223 /* We have 2^32 - 2^8 = 0xffffff00. */
224 else if (x.e > -158)
225 {
226 /* x is greater or equal to FLT_MAX. So we get a normalized number. */
227 result = (uint32_t) (x.e + 158) << SP_SIGNIFICAND_SIZE;
228 result |= (x.f >> 8) & SP_SIGNIFICAND_MASK;
229
230 if (x.f & 0x80)
231 {
232 /* Round-bit is set. */
233 if (x.f & 0x7f)
234 /* Sticky-bit is set. */
235 ++result;
236 else if (x.f & 0x100)
237 /* Significand is odd. */
238 ++result;
239 }
240 }
241 else if (x.e == -158 && x.f >= 0xffffff00)
242 {
243 /* x is in the range (2^32, 2^32 - 2^8] * 2^-158, so its smaller than
244 FLT_MIN but still rounds to it. */
245 result = 1U << SP_SIGNIFICAND_SIZE;
246 }
247 else if (x.e > -181)
248 {
249 /* Non-zero Denormal. */
250 int shift = -149 - x.e; /* 9 <= shift <= 31. */
251
252 result = x.f >> shift;
253
254 if (x.f & (1U << (shift - 1)))
255 /* Round-bit is set. */
256 {
257 if (x.f & ((1U << (shift - 1)) - 1))
258 /* Sticky-bit is set. */
259 ++result;
260 else if (x.f & 1U << shift)
261 /* Significand is odd. */
262 ++result;
263 }
264 }
265 else if (x.e == -181 && x.f > 0x80000000)
266 {
267 /* x is in the range (0.5,1) * 2^-149 so it rounds to the smallest
268 denormal. Can't handle this in the previous case as shifting a
269 uint32_t 32 bits to the right is undefined behaviour. */
270 result = 1;
271 }
272 else
273 {
274 /* Underflow. */
275 errno = ERANGE;
276 result = 0;
277 }
278
279 if (negative)
280 result |= 0x80000000;
281
282 tmp.n = result;
283 return tmp.f;
284 }
285
286 static float
287 scale_integer_to_float(uint32_t M, int N, int negative)
288 {
289 strtof_fp_t result, x, power;
290
291 if (M == 0)
292 return negative ? -0.f : 0.f;
293 if (N > 38)
294 {
295 /* Overflow. */
296 errno = ERANGE;
297 return negative ? -INFINITY : INFINITY;
298 }
299 if (N < -54)
300 {
301 /* Underflow. */
302 errno = ERANGE;
303 return negative ? -0.f : 0.f;
304 }
305 /* If N is in the range {-13, ..., 13} the conversion is exact.
306 Try to scale N into this region. */
307 while (N > 13 && M <= 0xffffffff / 10)
308 {
309 M *= 10;
310 --N;
311 }
312
313 while (N < -13 && M % 10 == 0)
314 {
315 M /= 10;
316 ++N;
317 }
318
319 x = uint32_to_diy (M);
320 if (N >= 0)
321 {
322 power = strtof_cached_power(N);
323 result = strtof_multiply(x, power);
324 }
325 else
326 {
327 power = strtof_cached_power(-N);
328 result = divide(x, power);
329 }
330
331 return diy_to_float(result, negative);
332 }
333
334 /* Return non-zero if *s starts with string (must be uppercase), ignoring case,
335 and increment *s by its length. */
336 static int
337 starts_with(const char **s, const char *string)
338 {
339 const char *x = *s, *y = string;
340 while (*x && *y && (*x == *y || *x == *y + 32))
341 ++x, ++y;
342 if (*y == 0)
343 {
344 /* Match. */
345 *s = x;
346 return 1;
347 }
348 else
349 return 0;
350 }
351 #define SET_TAILPTR(tailptr, s) \
352 do \
353 if (tailptr) \
354 *tailptr = (char *) s; \
355 while (0)
356
357 float
358 fz_strtof(const char *string, char **tailptr)
359 {
360 /* FIXME: error (1/2 + 1/256) ulp */
361 const char *s;
362 uint32_t M = 0;
363 int N = 0;
364 /* If decimal_digits gets 9 we truncate all following digits. */
365 int decimal_digits = 0;
366 int negative = 0;
367 const char *number_start = 0;
368
369 /* Skip leading whitespace (isspace in "C" locale). */
370 s = string;
371 while (*s == ' ' || *s == '\f' || *s == '\n' || *s == '\r' || *s == '\t' || *s == '\v')
372 ++s;
373
374 /* Parse sign. */
375 if (*s == '+')
376 ++s;
377 if (*s == '-')
378 {
379 negative = 1;
380 ++s;
381 }
382 number_start = s;
383 /* Parse digits before decimal point. */
384 while (*s >= '0' && *s <= '9')
385 {
386 if (decimal_digits)
387 {
388 if (decimal_digits < 9)
389 {
390 ++decimal_digits;
391 M = M * 10 + *s - '0';
392 }
393 /* Really arcane strings might overflow N. */
394 else if (N < 1000)
395 ++N;
396 }
397 else if (*s > '0')
398 {
399 M = *s - '0';
400 ++decimal_digits;
401 }
402 ++s;
403 }
404
405 /* Parse decimal point. */
406 if (*s == '.')
407 ++s;
408
409 /* Parse digits after decimal point. */
410 while (*s >= '0' && *s <= '9')
411 {
412 if (decimal_digits < 9)
413 {
414 if (decimal_digits || *s > '0')
415 {
416 ++decimal_digits;
417 M = M * 10 + *s - '0';
418 }
419 --N;
420 }
421 ++s;
422 }
423 if ((s == number_start + 1 && *number_start == '.') || number_start == s)
424 {
425 /* No Number. Check for INF and NAN strings. */
426 s = number_start;
427 if (starts_with(&s, "INFINITY") || starts_with(&s, "INF"))
428 {
429 errno = ERANGE;
430 SET_TAILPTR(tailptr, s);
431 return negative ? -INFINITY : +INFINITY;
432 }
433 else if (starts_with(&s, "NAN"))
434 {
435 SET_TAILPTR(tailptr, s);
436 return (float)NAN;
437 }
438 else
439 {
440 SET_TAILPTR(tailptr, string);
441 return 0.f;
442 }
443 }
444
445 /* Parse exponent. */
446 if (*s == 'e' || *s == 'E')
447 {
448 int exp_negative = 0;
449 int exp = 0;
450 const char *int_start;
451 const char *exp_start = s;
452
453 ++s;
454 if (*s == '+')
455 ++s;
456 else if (*s == '-')
457 {
458 ++s;
459 exp_negative = 1;
460 }
461 int_start = s;
462 /* Parse integer. */
463 while (*s >= '0' && *s <= '9')
464 {
465 /* Make sure exp does not get overflowed. */
466 if (exp < 100)
467 exp = exp * 10 + *s - '0';
468 ++s;
469 }
470 if (exp_negative)
471 exp = -exp;
472 if (s == int_start)
473 /* No Number. */
474 s = exp_start;
475 else
476 N += exp;
477 }
478
479 SET_TAILPTR(tailptr, s);
480 return scale_integer_to_float(M, N, negative);
481 }