comparison mupdf-source/source/fitz/string.c @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 // Copyright (C) 2004-2025 Artifex Software, Inc.
2 //
3 // This file is part of MuPDF.
4 //
5 // MuPDF is free software: you can redistribute it and/or modify it under the
6 // terms of the GNU Affero General Public License as published by the Free
7 // Software Foundation, either version 3 of the License, or (at your option)
8 // any later version.
9 //
10 // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
11 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
13 // details.
14 //
15 // You should have received a copy of the GNU Affero General Public License
16 // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
17 //
18 // Alternative licensing terms are available from the licensor.
19 // For commercial licensing, see <https://www.artifex.com/> or contact
20 // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
21 // CA 94129, USA, for further information.
22
23 #include "mupdf/fitz.h"
24
25 #include <string.h>
26 #include <errno.h>
27 #include <math.h>
28 #include <float.h>
29 #include <stdlib.h>
30
31 #ifdef _WIN32
32 #include <windows.h> /* for MultiByteToWideChar etc. */
33 #endif
34
35 #include "utfdata.h"
36
37 static const int *
38 fz_ucd_bsearch(int c, const int *t, int n, int ne)
39 {
40 const int *p;
41 int m;
42 while (n > 1)
43 {
44 m = n/2;
45 p = t + m*ne;
46 if (c >= p[0])
47 {
48 t = p;
49 n = n - m;
50 }
51 else
52 {
53 n = m;
54 }
55 }
56 if (n && c >= t[0])
57 return t;
58 return 0;
59 }
60
61 int
62 fz_tolower(int c)
63 {
64 const int *p;
65
66 /* Make ASCII fast. */
67 if (c < 128)
68 {
69 if (c >= 'A' && c <= 'Z')
70 c += 'a' - 'A';
71 return c;
72 }
73
74 p = fz_ucd_bsearch(c, ucd_tolower2, nelem(ucd_tolower2) / 3, 3);
75 if (p && c >= p[0] && c <= p[1])
76 return c + p[2];
77 p = fz_ucd_bsearch(c, ucd_tolower1, nelem(ucd_tolower1) / 2, 2);
78 if (p && c == p[0])
79 return c + p[1];
80 return c;
81 }
82
83 int
84 fz_toupper(int c)
85 {
86 const int *p;
87 p = fz_ucd_bsearch(c, ucd_toupper2, nelem(ucd_toupper2) / 3, 3);
88 if (p && c >= p[0] && c <= p[1])
89 return c + p[2];
90 p = fz_ucd_bsearch(c, ucd_toupper1, nelem(ucd_toupper1) / 2, 2);
91 if (p && c == p[0])
92 return c + p[1];
93 return c;
94 }
95
96 size_t
97 fz_strnlen(const char *s, size_t n)
98 {
99 const char *p = memchr(s, 0, n);
100 return p ? (size_t) (p - s) : n;
101 }
102
103 int
104 fz_strncasecmp(const char *a, const char *b, size_t n)
105 {
106 while (n > 0)
107 {
108 int ucs_a, ucs_b, n_a, n_b;
109 n_a = fz_chartorunen(&ucs_a, a, n);
110 n_b = fz_chartorunen(&ucs_b, b, n);
111 /* We believe that for all unicode characters X and Y, s.t.
112 * fz_tolower(X) == fz_tolower(Y), X and Y must utf8 encode to
113 * the same number of bytes. */
114 assert(n_a == n_b);
115 assert((size_t)n_a <= n);
116
117 // one or both of the strings are short
118 if (ucs_a == 0 || ucs_b == 0)
119 return ucs_a - ucs_b;
120
121 if (ucs_a != ucs_b)
122 {
123 ucs_a = fz_tolower(ucs_a);
124 ucs_b = fz_tolower(ucs_b);
125 }
126 if (ucs_a != ucs_b)
127 return ucs_a - ucs_b;
128
129 a += n_a;
130 b += n_b;
131 n -= n_a;
132 }
133 return 0;
134 }
135
136 int
137 fz_strcasecmp(const char *a, const char *b)
138 {
139 while (1)
140 {
141 int ucs_a, ucs_b;
142 a += fz_chartorune(&ucs_a, a);
143 b += fz_chartorune(&ucs_b, b);
144 ucs_a = fz_tolower(ucs_a);
145 ucs_b = fz_tolower(ucs_b);
146 if (ucs_a == ucs_b)
147 {
148 if (ucs_a == 0)
149 return 0;
150 }
151 else
152 return ucs_a - ucs_b;
153 }
154 }
155
156 char *
157 fz_strsep(char **stringp, const char *delim)
158 {
159 char *ret = *stringp;
160 if (!ret) return NULL;
161 if ((*stringp = strpbrk(*stringp, delim)) != NULL)
162 *((*stringp)++) = '\0';
163 return ret;
164 }
165
166 size_t
167 fz_strlcpy(char *dst, const char *src, size_t siz)
168 {
169 register char *d = dst;
170 register const char *s = src;
171 register size_t n = siz;
172
173 /* Copy as many bytes as will fit */
174 if (n != 0 && --n != 0) {
175 do {
176 if ((*d++ = *s++) == 0)
177 break;
178 } while (--n != 0);
179 }
180
181 /* Not enough room in dst, add NUL and traverse rest of src */
182 if (n == 0) {
183 if (siz != 0)
184 *d = '\0'; /* NUL-terminate dst */
185 while (*s++)
186 ;
187 }
188
189 return(s - src - 1); /* count does not include NUL */
190 }
191
192 size_t
193 fz_strlcat(char *dst, const char *src, size_t siz)
194 {
195 register char *d = dst;
196 register const char *s = src;
197 register size_t n = siz;
198 size_t dlen;
199
200 /* Find the end of dst and adjust bytes left but don't go past end */
201 while (*d != '\0' && n-- != 0)
202 d++;
203 dlen = d - dst;
204 n = siz - dlen;
205
206 if (n == 0)
207 return dlen + strlen(s);
208 while (*s != '\0') {
209 if (n != 1) {
210 *d++ = *s;
211 n--;
212 }
213 s++;
214 }
215 *d = '\0';
216
217 return dlen + (s - src); /* count does not include NUL */
218 }
219
220 void
221 fz_dirname(char *dir, const char *path, size_t n)
222 {
223 size_t i;
224
225 if (!path || !path[0])
226 {
227 fz_strlcpy(dir, ".", n);
228 return;
229 }
230
231 fz_strlcpy(dir, path, n);
232
233 i = strlen(dir);
234 for(; dir[i] == '/'; --i) if (!i) { fz_strlcpy(dir, "/", n); return; }
235 for(; dir[i] != '/'; --i) if (!i) { fz_strlcpy(dir, ".", n); return; }
236 for(; dir[i] == '/'; --i) if (!i) { fz_strlcpy(dir, "/", n); return; }
237 dir[i+1] = 0;
238 }
239
240 const char *
241 fz_basename(const char *path)
242 {
243 const char *name = strrchr(path, '/');
244 if (!name)
245 name = strrchr(path, '\\');
246 if (!name)
247 return path;
248 return name + 1;
249 }
250
251 #ifdef _WIN32
252
253 char *fz_realpath(const char *path, char *buf)
254 {
255 wchar_t wpath[PATH_MAX];
256 wchar_t wbuf[PATH_MAX];
257 int i;
258 if (!MultiByteToWideChar(CP_UTF8, 0, path, -1, wpath, PATH_MAX))
259 return NULL;
260 if (!GetFullPathNameW(wpath, PATH_MAX, wbuf, NULL))
261 return NULL;
262 if (!WideCharToMultiByte(CP_UTF8, 0, wbuf, -1, buf, PATH_MAX, NULL, NULL))
263 return NULL;
264 for (i=0; buf[i]; ++i)
265 if (buf[i] == '\\')
266 buf[i] = '/';
267 return buf;
268 }
269
270 #else
271
272 char *fz_realpath(const char *path, char *buf)
273 {
274 return realpath(path, buf);
275 }
276
277 #endif
278
279 static inline int ishex(int a)
280 {
281 return (a >= 'A' && a <= 'F') ||
282 (a >= 'a' && a <= 'f') ||
283 (a >= '0' && a <= '9');
284 }
285
286 static inline int tohex(int c)
287 {
288 if (c >= '0' && c <= '9') return c - '0';
289 if (c >= 'a' && c <= 'f') return c - 'a' + 0xA;
290 if (c >= 'A' && c <= 'F') return c - 'A' + 0xA;
291 return 0;
292 }
293
294 #define URIRESERVED ";/?:@&=+$,"
295 #define URIALPHA "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
296 #define URIDIGIT "0123456789"
297 #define URIMARK "-_.!~*'()"
298 #define URIUNESCAPED URIALPHA URIDIGIT URIMARK
299 #define HEX "0123456789ABCDEF"
300
301 /* Same as fz_decode_uri_component but in-place */
302 char *
303 fz_urldecode(char *url)
304 {
305 char *s = url;
306 char *p = url;
307 while (*s)
308 {
309 int c = (unsigned char) *s++;
310 if (c == '%' && ishex(s[0]) && ishex(s[1]))
311 {
312 int a = tohex(*s++);
313 int b = tohex(*s++);
314 *p++ = a << 4 | b;
315 }
316 else
317 {
318 *p++ = c;
319 }
320 }
321 *p = 0;
322 return url;
323 }
324
325 char *
326 fz_decode_uri_component(fz_context *ctx, const char *s)
327 {
328 char *uri = fz_malloc(ctx, strlen(s) + 1);
329 char *p = uri;
330 while (*s)
331 {
332 int c = (unsigned char) *s++;
333 if (c == '%' && ishex(s[0]) && ishex(s[1]))
334 {
335 int a = tohex(*s++);
336 int b = tohex(*s++);
337 *p++ = a << 4 | b;
338 }
339 else
340 {
341 *p++ = c;
342 }
343 }
344 *p = 0;
345 return uri;
346 }
347
348 char *
349 fz_decode_uri(fz_context *ctx, const char *s)
350 {
351 char *uri = fz_malloc(ctx, strlen(s) + 1);
352 char *p = uri;
353 while (*s)
354 {
355 int c = (unsigned char) *s++;
356 if (c == '%' && ishex(s[0]) && ishex(s[1]))
357 {
358 int a = tohex(*s++);
359 int b = tohex(*s++);
360 c = a << 4 | b;
361 if (strchr(URIRESERVED "#", c)) {
362 *p++ = '%';
363 *p++ = HEX[a];
364 *p++ = HEX[b];
365 } else {
366 *p++ = c;
367 }
368 }
369 else
370 {
371 *p++ = c;
372 }
373 }
374 *p = 0;
375 return uri;
376 }
377
378 static char *
379 fz_encode_uri_imp(fz_context *ctx, const char *s, const char *unescaped)
380 {
381 char *uri = fz_malloc(ctx, strlen(s) * 3 + 1); /* allocate enough for worst case */
382 char *p = uri;
383 while (*s)
384 {
385 int c = (unsigned char) *s++;
386 if (strchr(unescaped, c))
387 {
388 *p++ = c;
389 }
390 else
391 {
392 *p++ = '%';
393 *p++ = HEX[(c >> 4) & 15];
394 *p++ = HEX[(c) & 15];
395 }
396 }
397 *p = 0;
398 return uri;
399 }
400
401 char *
402 fz_encode_uri_component(fz_context *ctx, const char *s)
403 {
404 return fz_encode_uri_imp(ctx, s, URIUNESCAPED);
405 }
406
407 char *
408 fz_encode_uri_pathname(fz_context *ctx, const char *s)
409 {
410 return fz_encode_uri_imp(ctx, s, URIUNESCAPED "/");
411 }
412
413 char *
414 fz_encode_uri(fz_context *ctx, const char *s)
415 {
416 return fz_encode_uri_imp(ctx, s, URIUNESCAPED URIRESERVED "#");
417 }
418
419 void
420 fz_format_output_path(fz_context *ctx, char *path, size_t size, const char *fmt, int page)
421 {
422 const char *s, *p;
423 char num[40];
424 int i, n;
425 int z = 0;
426
427 for (i = 0; page; page /= 10)
428 num[i++] = '0' + page % 10;
429 num[i] = 0;
430
431 s = p = strchr(fmt, '%');
432 if (p)
433 {
434 ++p;
435 while (*p >= '0' && *p <= '9')
436 z = z * 10 + (*p++ - '0');
437 }
438 if (p && *p == 'd')
439 {
440 ++p;
441 }
442 else
443 {
444 s = p = strrchr(fmt, '.');
445 if (!p)
446 s = p = fmt + strlen(fmt);
447 }
448
449 if (z < 1)
450 z = 1;
451 while (i < z && i < (int)sizeof num)
452 num[i++] = '0';
453 n = s - fmt;
454 if (n + i + strlen(p) >= size)
455 fz_throw(ctx, FZ_ERROR_ARGUMENT, "path name buffer overflow");
456 memcpy(path, fmt, n);
457 while (i > 0)
458 path[n++] = num[--i];
459 fz_strlcpy(path + n, p, size - n);
460 }
461
462 #define SEP(x) ((x)=='/' || (x) == 0)
463
464 char *
465 fz_cleanname(char *name)
466 {
467 char *p, *q, *dotdot;
468 int rooted;
469
470 rooted = name[0] == '/';
471
472 /*
473 * invariants:
474 * p points at beginning of path element we're considering.
475 * q points just past the last path element we wrote (no slash).
476 * dotdot points just past the point where .. cannot backtrack
477 * any further (no slash).
478 */
479 p = q = dotdot = name + rooted;
480 while (*p)
481 {
482 if(p[0] == '/') /* null element */
483 p++;
484 else if (p[0] == '.' && SEP(p[1]))
485 p += 1; /* don't count the separator in case it is nul */
486 else if (p[0] == '.' && p[1] == '.' && SEP(p[2]))
487 {
488 p += 2;
489 if (q > dotdot) /* can backtrack */
490 {
491 while(--q > dotdot && *q != '/')
492 ;
493 }
494 else if (!rooted) /* /.. is / but ./../ is .. */
495 {
496 if (q != name)
497 *q++ = '/';
498 *q++ = '.';
499 *q++ = '.';
500 dotdot = q;
501 }
502 }
503 else /* real path element */
504 {
505 if (q != name+rooted)
506 *q++ = '/';
507 while ((*q = *p) != '/' && *q != 0)
508 p++, q++;
509 }
510 }
511
512 if (q == name) /* empty string is really "." */
513 *q++ = '.';
514 *q = '\0';
515 return name;
516 }
517
518 char *
519 fz_cleanname_strdup(fz_context *ctx, const char *name)
520 {
521 size_t len = strlen(name);
522 char *newname = fz_malloc(ctx, fz_maxz(2, len + 1));
523 memcpy(newname, name, len + 1);
524 newname[len] = '\0';
525 return fz_cleanname(newname);
526 }
527
528 enum
529 {
530 UTFmax = 4, /* maximum bytes per rune */
531 Runesync = 0x80, /* cannot represent part of a UTF sequence (<) */
532 Runeself = 0x80, /* rune and UTF sequences are the same (<) */
533 Runeerror = 0xFFFD, /* decoding error in UTF */
534 Runemax = 0x10FFFF, /* maximum rune value */
535 };
536
537 enum
538 {
539 Bit1 = 7,
540 Bitx = 6,
541 Bit2 = 5,
542 Bit3 = 4,
543 Bit4 = 3,
544 Bit5 = 2,
545
546 T1 = ((1<<(Bit1+1))-1) ^ 0xFF, /* 0000 0000 */
547 Tx = ((1<<(Bitx+1))-1) ^ 0xFF, /* 1000 0000 */
548 T2 = ((1<<(Bit2+1))-1) ^ 0xFF, /* 1100 0000 */
549 T3 = ((1<<(Bit3+1))-1) ^ 0xFF, /* 1110 0000 */
550 T4 = ((1<<(Bit4+1))-1) ^ 0xFF, /* 1111 0000 */
551 T5 = ((1<<(Bit5+1))-1) ^ 0xFF, /* 1111 1000 */
552
553 Rune1 = (1<<(Bit1+0*Bitx))-1, /* 0000 0000 0111 1111 */
554 Rune2 = (1<<(Bit2+1*Bitx))-1, /* 0000 0111 1111 1111 */
555 Rune3 = (1<<(Bit3+2*Bitx))-1, /* 1111 1111 1111 1111 */
556 Rune4 = (1<<(Bit4+3*Bitx))-1, /* 0001 1111 1111 1111 1111 1111 */
557
558 Maskx = (1<<Bitx)-1, /* 0011 1111 */
559 Testx = Maskx ^ 0xFF, /* 1100 0000 */
560
561 Bad = Runeerror,
562 };
563
564 int
565 fz_chartorune(int *rune, const char *str)
566 {
567 int c, c1, c2, c3;
568 int l;
569
570 /* overlong null character */
571 if((unsigned char)str[0] == 0xc0 && (unsigned char)str[1] == 0x80) {
572 *rune = 0;
573 return 2;
574 }
575
576 /*
577 * one character sequence
578 * 00000-0007F => T1
579 */
580 c = *(const unsigned char*)str;
581 if(c < Tx) {
582 *rune = c;
583 return 1;
584 }
585
586 /*
587 * two character sequence
588 * 0080-07FF => T2 Tx
589 */
590 c1 = *(const unsigned char*)(str+1) ^ Tx;
591 if(c1 & Testx)
592 goto bad;
593 if(c < T3) {
594 if(c < T2)
595 goto bad;
596 l = ((c << Bitx) | c1) & Rune2;
597 if(l <= Rune1)
598 goto bad;
599 *rune = l;
600 return 2;
601 }
602
603 /*
604 * three character sequence
605 * 0800-FFFF => T3 Tx Tx
606 */
607 c2 = *(const unsigned char*)(str+2) ^ Tx;
608 if(c2 & Testx)
609 goto bad;
610 if(c < T4) {
611 l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3;
612 if(l <= Rune2)
613 goto bad;
614 *rune = l;
615 return 3;
616 }
617
618 /*
619 * four character sequence (21-bit value)
620 * 10000-1FFFFF => T4 Tx Tx Tx
621 */
622 c3 = *(const unsigned char*)(str+3) ^ Tx;
623 if (c3 & Testx)
624 goto bad;
625 if (c < T5) {
626 l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4;
627 if (l <= Rune3)
628 goto bad;
629 *rune = l;
630 return 4;
631 }
632 /*
633 * Support for 5-byte or longer UTF-8 would go here, but
634 * since we don't have that, we'll just fall through to bad.
635 */
636
637 /*
638 * bad decoding
639 */
640 bad:
641 *rune = Bad;
642 return 1;
643 }
644
645 int
646 fz_chartorunen(int *rune, const char *str, size_t n)
647 {
648 int c, c1, c2, c3;
649 int l;
650
651 if (n < 1)
652 goto bad;
653
654 /*
655 * one character sequence
656 * 00000-0007F => T1
657 */
658 c = *(const unsigned char*)str;
659 if(c < Tx) {
660 *rune = c;
661 return 1;
662 }
663
664 if (n < 2)
665 goto bad;
666
667 /* overlong null character */
668 if((unsigned char)str[0] == 0xc0 && (unsigned char)str[1] == 0x80) {
669 *rune = 0;
670 return 2;
671 }
672
673 /*
674 * two character sequence
675 * 0080-07FF => T2 Tx
676 */
677 c1 = *(const unsigned char*)(str+1) ^ Tx;
678 if(c1 & Testx)
679 goto bad;
680 if(c < T3) {
681 if(c < T2)
682 goto bad;
683 l = ((c << Bitx) | c1) & Rune2;
684 if(l <= Rune1)
685 goto bad;
686 *rune = l;
687 return 2;
688 }
689
690 if (n < 3)
691 goto bad;
692
693 /*
694 * three character sequence
695 * 0800-FFFF => T3 Tx Tx
696 */
697 c2 = *(const unsigned char*)(str+2) ^ Tx;
698 if(c2 & Testx)
699 goto bad;
700 if(c < T4) {
701 l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3;
702 if(l <= Rune2)
703 goto bad;
704 *rune = l;
705 return 3;
706 }
707
708 if (n < 4)
709 goto bad;
710
711 /*
712 * four character sequence (21-bit value)
713 * 10000-1FFFFF => T4 Tx Tx Tx
714 */
715 c3 = *(const unsigned char*)(str+3) ^ Tx;
716 if (c3 & Testx)
717 goto bad;
718 if (c < T5) {
719 l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4;
720 if (l <= Rune3)
721 goto bad;
722 *rune = l;
723 return 4;
724 }
725 /*
726 * Support for 5-byte or longer UTF-8 would go here, but
727 * since we don't have that, we'll just fall through to bad.
728 */
729
730 /*
731 * bad decoding
732 */
733 bad:
734 *rune = Bad;
735 return 1;
736 }
737
738 int
739 fz_runetochar(char *str, int rune)
740 {
741 /* Runes are signed, so convert to unsigned for range check. */
742 unsigned int c = (unsigned int)rune;
743
744 /* overlong null character */
745 if (c == 0) {
746 ((unsigned char *)str)[0] = 0xc0;
747 ((unsigned char *)str)[1] = 0x80;
748 return 2;
749 }
750
751 /*
752 * one character sequence
753 * 00000-0007F => 00-7F
754 */
755 if(c <= Rune1) {
756 str[0] = c;
757 return 1;
758 }
759
760 /*
761 * two character sequence
762 * 0080-07FF => T2 Tx
763 */
764 if(c <= Rune2) {
765 str[0] = T2 | (c >> 1*Bitx);
766 str[1] = Tx | (c & Maskx);
767 return 2;
768 }
769
770 /*
771 * If the Rune is out of range, convert it to the error rune.
772 * Do this test here because the error rune encodes to three bytes.
773 * Doing it earlier would duplicate work, since an out of range
774 * Rune wouldn't have fit in one or two bytes.
775 */
776 if (c > Runemax)
777 c = Runeerror;
778
779 /*
780 * three character sequence
781 * 0800-FFFF => T3 Tx Tx
782 */
783 if (c <= Rune3) {
784 str[0] = T3 | (c >> 2*Bitx);
785 str[1] = Tx | ((c >> 1*Bitx) & Maskx);
786 str[2] = Tx | (c & Maskx);
787 return 3;
788 }
789
790 /*
791 * four character sequence (21-bit value)
792 * 10000-1FFFFF => T4 Tx Tx Tx
793 */
794 str[0] = T4 | (c >> 3*Bitx);
795 str[1] = Tx | ((c >> 2*Bitx) & Maskx);
796 str[2] = Tx | ((c >> 1*Bitx) & Maskx);
797 str[3] = Tx | (c & Maskx);
798 return 4;
799 }
800
801 int
802 fz_runelen(int c)
803 {
804 char str[10];
805 return fz_runetochar(str, c);
806 }
807
808 int
809 fz_runeidx(const char *s, const char *p)
810 {
811 int rune;
812 int i = 0;
813 while (s < p) {
814 if (*(unsigned char *)s < Runeself)
815 ++s;
816 else
817 s += fz_chartorune(&rune, s);
818 ++i;
819 }
820 return i;
821 }
822
823 const char *
824 fz_runeptr(const char *s, int i)
825 {
826 int rune;
827 while (i-- > 0) {
828 rune = *(unsigned char*)s;
829 if (rune < Runeself) {
830 if (rune == 0)
831 return NULL;
832 ++s;
833 } else
834 s += fz_chartorune(&rune, s);
835 }
836 return s;
837 }
838
839 int
840 fz_utflen(const char *s)
841 {
842 int c, n, rune;
843 n = 0;
844 for(;;) {
845 c = *(const unsigned char*)s;
846 if(c < Runeself) {
847 if(c == 0)
848 return n;
849 s++;
850 } else
851 s += fz_chartorune(&rune, s);
852 n++;
853 }
854 }
855
856 float fz_atof(const char *s)
857 {
858 float result;
859
860 if (s == NULL)
861 return 0;
862
863 errno = 0;
864 result = fz_strtof(s, NULL);
865 if ((errno == ERANGE && result == 0) || isnan(result))
866 /* Return 1.0 on underflow, as it's a small known value that won't cause a divide by 0. */
867 return 1;
868 result = fz_clamp(result, -FLT_MAX, FLT_MAX);
869 return result;
870 }
871
872 int fz_atoi(const char *s)
873 {
874 if (s == NULL)
875 return 0;
876 return atoi(s);
877 }
878
879 int64_t fz_atoi64(const char *s)
880 {
881 if (s == NULL)
882 return 0;
883 return atoll(s);
884 }
885
886 size_t fz_atoz(const char *s)
887 {
888 int64_t i;
889
890 if (s == NULL)
891 return 0;
892 i = atoll(s);
893 if (i < 0 || (int64_t)(size_t)i != i)
894 return 0;
895 return (size_t)i;
896 }
897
898 int fz_is_page_range(fz_context *ctx, const char *s)
899 {
900 /* TODO: check the actual syntax... */
901 while (*s)
902 {
903 if ((*s < '0' || *s > '9') && *s != 'N' && *s != '-' && *s != ',')
904 return 0;
905 s++;
906 }
907 return 1;
908 }
909
910 const char *fz_parse_page_range(fz_context *ctx, const char *s, int *a, int *b, int n)
911 {
912 const char *orig = s;
913
914 if (!s || !s[0])
915 return NULL;
916
917 if (s[0] == ',')
918 s += 1;
919
920 if (s[0] == 'N')
921 {
922 *a = n;
923 s += 1;
924 }
925 else
926 *a = strtol(s, (char**)&s, 10);
927
928 if (s[0] == '-')
929 {
930 if (s[1] == 'N')
931 {
932 *b = n;
933 s += 2;
934 }
935 else
936 *b = strtol(s+1, (char**)&s, 10);
937 }
938 else
939 *b = *a;
940
941 if (*a < 0) *a = n + 1 + *a;
942 if (*b < 0) *b = n + 1 + *b;
943
944 *a = fz_clampi(*a, 1, n);
945 *b = fz_clampi(*b, 1, n);
946
947 if (s == orig)
948 {
949 fz_warn(ctx, "skipping invalid page range");
950 return NULL;
951 }
952
953 return s;
954 }
955
956 /* memmem from musl */
957
958 #define MAX(a,b) ((a)>(b)?(a):(b))
959
960 #define BITOP(a,b,op) \
961 ((a)[(size_t)(b)/(8*sizeof *(a))] op (size_t)1<<((size_t)(b)%(8*sizeof *(a))))
962
963 static char *twobyte_memmem(const unsigned char *h, size_t k, const unsigned char *n)
964 {
965 uint16_t nw = n[0]<<8 | n[1], hw = h[0]<<8 | h[1];
966 for (h++, k--; k; k--, hw = hw<<8 | *++h)
967 if (hw == nw) return (char *)h-1;
968 return 0;
969 }
970
971 static char *threebyte_memmem(const unsigned char *h, size_t k, const unsigned char *n)
972 {
973 uint32_t nw = n[0]<<24 | n[1]<<16 | n[2]<<8;
974 uint32_t hw = h[0]<<24 | h[1]<<16 | h[2]<<8;
975 for (h+=2, k-=2; k; k--, hw = (hw|*++h)<<8)
976 if (hw == nw) return (char *)h-2;
977 return 0;
978 }
979
980 static char *fourbyte_memmem(const unsigned char *h, size_t k, const unsigned char *n)
981 {
982 uint32_t nw = n[0]<<24 | n[1]<<16 | n[2]<<8 | n[3];
983 uint32_t hw = h[0]<<24 | h[1]<<16 | h[2]<<8 | h[3];
984 for (h+=3, k-=3; k; k--, hw = hw<<8 | *++h)
985 if (hw == nw) return (char *)h-3;
986 return 0;
987 }
988
989 static char *twoway_memmem(const unsigned char *h, const unsigned char *z, const unsigned char *n, size_t l)
990 {
991 size_t i, ip, jp, k, p, ms, p0, mem, mem0;
992 size_t byteset[32 / sizeof(size_t)] = { 0 };
993 size_t shift[256];
994
995 /* Computing length of needle and fill shift table */
996 for (i=0; i<l; i++)
997 BITOP(byteset, n[i], |=), shift[n[i]] = i+1;
998
999 /* Compute maximal suffix */
1000 ip = (size_t)-1; jp = 0; k = p = 1;
1001 while (jp+k<l) {
1002 if (n[ip+k] == n[jp+k]) {
1003 if (k == p) {
1004 jp += p;
1005 k = 1;
1006 } else k++;
1007 } else if (n[ip+k] > n[jp+k]) {
1008 jp += k;
1009 k = 1;
1010 p = jp - ip;
1011 } else {
1012 ip = jp++;
1013 k = p = 1;
1014 }
1015 }
1016 ms = ip;
1017 p0 = p;
1018
1019 /* And with the opposite comparison */
1020 ip = (size_t)-1; jp = 0; k = p = 1;
1021 while (jp+k<l) {
1022 if (n[ip+k] == n[jp+k]) {
1023 if (k == p) {
1024 jp += p;
1025 k = 1;
1026 } else k++;
1027 } else if (n[ip+k] < n[jp+k]) {
1028 jp += k;
1029 k = 1;
1030 p = jp - ip;
1031 } else {
1032 ip = jp++;
1033 k = p = 1;
1034 }
1035 }
1036 if (ip+1 > ms+1) ms = ip;
1037 else p = p0;
1038
1039 /* Periodic needle? */
1040 if (memcmp(n, n+p, ms+1)) {
1041 mem0 = 0;
1042 p = MAX(ms, l-ms-1) + 1;
1043 } else mem0 = l-p;
1044 mem = 0;
1045
1046 /* Search loop */
1047 for (;;) {
1048 /* If remainder of haystack is shorter than needle, done */
1049 if ((size_t)(z-h) < l) return 0;
1050
1051 /* Check last byte first; advance by shift on mismatch */
1052 if (BITOP(byteset, h[l-1], &)) {
1053 k = l-shift[h[l-1]];
1054 if (k) {
1055 if (mem0 && mem && k < p) k = l-p;
1056 h += k;
1057 mem = 0;
1058 continue;
1059 }
1060 } else {
1061 h += l;
1062 mem = 0;
1063 continue;
1064 }
1065
1066 /* Compare right half */
1067 for (k=MAX(ms+1,mem); k<l && n[k] == h[k]; k++);
1068 if (k < l) {
1069 h += k-ms;
1070 mem = 0;
1071 continue;
1072 }
1073 /* Compare left half */
1074 for (k=ms+1; k>mem && n[k-1] == h[k-1]; k--);
1075 if (k <= mem) return (char *)h;
1076 h += p;
1077 mem = mem0;
1078 }
1079 }
1080
1081 void *fz_memmem(const void *h0, size_t k, const void *n0, size_t l)
1082 {
1083 const unsigned char *h = h0, *n = n0;
1084
1085 /* Return immediately on empty needle */
1086 if (!l) return (void *)h;
1087
1088 /* Return immediately when needle is longer than haystack */
1089 if (k<l) return 0;
1090
1091 /* Use faster algorithms for short needles */
1092 h = memchr(h0, *n, k);
1093 if (!h || l==1) return (void *)h;
1094 k -= h - (const unsigned char *)h0;
1095 if (k<l) return 0;
1096 if (l==2) return twobyte_memmem(h, k, n);
1097 if (l==3) return threebyte_memmem(h, k, n);
1098 if (l==4) return fourbyte_memmem(h, k, n);
1099
1100 return twoway_memmem(h, h+k, n, l);
1101 }
1102
1103 char *
1104 fz_utf8_from_wchar(fz_context *ctx, const wchar_t *s)
1105 {
1106 const wchar_t *src = s;
1107 char *d;
1108 char *dst;
1109 int len = 1;
1110
1111 while (*src)
1112 {
1113 len += fz_runelen(*src++);
1114 }
1115
1116 d = Memento_label(fz_malloc(ctx, len), "utf8_from_wchar");
1117 dst = d;
1118 src = s;
1119 while (*src)
1120 {
1121 dst += fz_runetochar(dst, *src++);
1122 }
1123 *dst = 0;
1124
1125 return d;
1126 }
1127
1128 wchar_t *
1129 fz_wchar_from_utf8(fz_context *ctx, const char *path)
1130 {
1131 size_t z = 0;
1132 const char *p = path;
1133 wchar_t *wpath, *w;
1134
1135 if (!path)
1136 return NULL;
1137
1138 while (*p)
1139 {
1140 int c;
1141 p += fz_chartorune(&c, p);
1142 z++;
1143 if (c >= 0x10000)
1144 z++;
1145 }
1146
1147 w = wpath = fz_malloc(ctx, 2*(z+1));
1148 while (*path)
1149 {
1150 int c;
1151 path += fz_chartorune(&c, path);
1152 if (c >= 0x10000)
1153 {
1154 c -= 0x10000;
1155 *w++ = 0xd800 + (c>>10);
1156 *w++ = 0xdc00 + (c&1023);
1157 }
1158 else
1159 *w++ = c;
1160 }
1161 *w = 0;
1162
1163 return wpath;
1164 }
1165
1166 const char *
1167 fz_strstr(const char *haystack, const char *needle)
1168 {
1169 size_t matchlen = 0;
1170 char d;
1171
1172 if (haystack == NULL || needle == NULL)
1173 return NULL;
1174
1175 while ((d = needle[matchlen]) != 0)
1176 {
1177 char c = *haystack++;
1178 if (c == 0)
1179 return NULL;
1180 if (c == d)
1181 matchlen++;
1182 else
1183 {
1184 haystack -= matchlen;
1185 matchlen = 0;
1186 }
1187 }
1188
1189 return haystack - matchlen;
1190 }
1191
1192 const char *
1193 fz_strstrcase(const char *haystack, const char *needle)
1194 {
1195 size_t matchlen = 0;
1196 size_t firstlen;
1197
1198 if (haystack == NULL || needle == NULL)
1199 return NULL;
1200
1201 while (1)
1202 {
1203 int c, d;
1204 int nc, nd;
1205
1206 nd = fz_chartorune(&d, &needle[matchlen]);
1207 if (d == 0)
1208 break;
1209 nc = fz_chartorune(&c, haystack);
1210 if (matchlen == 0)
1211 firstlen = nc;
1212 haystack += nc;
1213 matchlen += nd;
1214 if (c == 0)
1215 return NULL;
1216 if (c != d)
1217 haystack -= matchlen - firstlen, matchlen = 0;
1218 }
1219
1220 return haystack - matchlen;
1221 }
1222
1223 static inline int my_isdigit(int c) {
1224 return c >= '0' && c <= '9';
1225 }
1226
1227 int
1228 fz_strverscmp(const char *l0, const char *r0)
1229 {
1230 // This strverscmp implementation is borrowed from musl.
1231 // Copyright © 2005-2020 Rich Felker, et al.
1232 // Standard MIT license.
1233 const unsigned char *l = (const void *)l0;
1234 const unsigned char *r = (const void *)r0;
1235 size_t i, dp, j;
1236 int z = 1;
1237
1238 /* Find maximal matching prefix and track its maximal digit
1239 * suffix and whether those digits are all zeros. */
1240 for (dp=i=0; l[i]==r[i]; i++) {
1241 int c = l[i];
1242 if (!c) return 0;
1243 if (!my_isdigit(c)) dp=i+1, z=1;
1244 else if (c!='0') z=0;
1245 }
1246
1247 if (l[dp]!='0' && r[dp]!='0') {
1248 /* If we're not looking at a digit sequence that began
1249 * with a zero, longest digit string is greater. */
1250 for (j=i; my_isdigit(l[j]); j++)
1251 if (!my_isdigit(r[j])) return 1;
1252 if (my_isdigit(r[j])) return -1;
1253 } else if (z && dp<i && (my_isdigit(l[i]) || my_isdigit(r[i]))) {
1254 /* Otherwise, if common prefix of digit sequence is
1255 * all zeros, digits order less than non-digits. */
1256 return (unsigned char)(l[i]-'0') - (unsigned char)(r[i]-'0');
1257 }
1258
1259 return l[i] - r[i];
1260 }