Python2/PyMuPDF: mupdf-source/source/fitz/string.c comparison

comparison mupdf-source/source/fitz/string.c @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.

author	Franz Glasner <fzglas.hg@dom66.de>
date	Mon, 15 Sep 2025 11:43:07 +0200
parents
children

comparison

equal deleted inserted replaced

-:1d09e1dec1d9
+:b50eed0cc0ef
+// Copyright (C) 2004-2025 Artifex Software, Inc.
+//
+// This file is part of MuPDF.
+//
+// MuPDF is free software: you can redistribute it and/or modify it under the
+// terms of the GNU Affero General Public License as published by the Free
+// Software Foundation, either version 3 of the License, or (at your option)
+// any later version.
+//
+// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
+// details.
+//
+// You should have received a copy of the GNU Affero General Public License
+// along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
+//
+// Alternative licensing terms are available from the licensor.
+// For commercial licensing, see <https://www.artifex.com/> or contact
+// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
+// CA 94129, USA, for further information.
+#include "mupdf/fitz.h"
+#include <string.h>
+#include <errno.h>
+#include <math.h>
+#include <float.h>
+#include <stdlib.h>
+#ifdef _WIN32
+#include <windows.h> /* for MultiByteToWideChar etc. */
+#endif
+#include "utfdata.h"
+static const int *
+fz_ucd_bsearch(int c, const int *t, int n, int ne)
+{
+	const int *p;
+	int m;
+	while (n > 1)
+	{
+		m = n/2;
+		p = t + m*ne;
+		if (c >= p[0])
+		{
+			t = p;
+			n = n - m;
+		}
+		else
+		{
+			n = m;
+		}
+	}
+	if (n && c >= t[0])
+		return t;
+	return 0;
+}
+int
+fz_tolower(int c)
+{
+	const int *p;
+	/* Make ASCII fast. */
+	if (c < 128)
+	{
+		if (c >= 'A' && c <= 'Z')
+			c += 'a' - 'A';
+		return c;
+	}
+	p = fz_ucd_bsearch(c, ucd_tolower2, nelem(ucd_tolower2) / 3, 3);
+	if (p && c >= p[0] && c <= p[1])
+		return c + p[2];
+	p = fz_ucd_bsearch(c, ucd_tolower1, nelem(ucd_tolower1) / 2, 2);
+	if (p && c == p[0])
+		return c + p[1];
+	return c;
+}
+int
+fz_toupper(int c)
+{
+	const int *p;
+	p = fz_ucd_bsearch(c, ucd_toupper2, nelem(ucd_toupper2) / 3, 3);
+	if (p && c >= p[0] && c <= p[1])
+		return c + p[2];
+	p = fz_ucd_bsearch(c, ucd_toupper1, nelem(ucd_toupper1) / 2, 2);
+	if (p && c == p[0])
+		return c + p[1];
+	return c;
+}
+size_t
+fz_strnlen(const char *s, size_t n)
+{
+	const char *p = memchr(s, 0, n);
+	return p ? (size_t) (p - s) : n;
+}
+int
+fz_strncasecmp(const char *a, const char *b, size_t n)
+{
+	while (n > 0)
+	{
+		int ucs_a, ucs_b, n_a, n_b;
+		n_a = fz_chartorunen(&ucs_a, a, n);
+		n_b = fz_chartorunen(&ucs_b, b, n);
+		/* We believe that for all unicode characters X and Y, s.t.
+		 * fz_tolower(X) == fz_tolower(Y), X and Y must utf8 encode to
+		 * the same number of bytes. */
+		assert(n_a == n_b);
+		assert((size_t)n_a <= n);
+		// one or both of the strings are short
+		if (ucs_a == 0 || ucs_b == 0)
+			return ucs_a - ucs_b;
+		if (ucs_a != ucs_b)
+		{
+			ucs_a = fz_tolower(ucs_a);
+			ucs_b = fz_tolower(ucs_b);
+		}
+		if (ucs_a != ucs_b)
+			return ucs_a - ucs_b;
+		a += n_a;
+		b += n_b;
+		n -= n_a;
+	}
+	return 0;
+}
+int
+fz_strcasecmp(const char *a, const char *b)
+{
+	while (1)
+	{
+		int ucs_a, ucs_b;
+		a += fz_chartorune(&ucs_a, a);
+		b += fz_chartorune(&ucs_b, b);
+		ucs_a = fz_tolower(ucs_a);
+		ucs_b = fz_tolower(ucs_b);
+		if (ucs_a == ucs_b)
+		{
+			if (ucs_a == 0)
+				return 0;
+		}
+		else
+			return ucs_a - ucs_b;
+	}
+}
+char *
+fz_strsep(char **stringp, const char *delim)
+{
+	char *ret = *stringp;
+	if (!ret) return NULL;
+	if ((*stringp = strpbrk(*stringp, delim)) != NULL)
+		*((*stringp)++) = '\0';
+	return ret;
+}
+size_t
+fz_strlcpy(char *dst, const char *src, size_t siz)
+{
+	register char *d = dst;
+	register const char *s = src;
+	register size_t n = siz;
+	/* Copy as many bytes as will fit */
+	if (n != 0 && --n != 0) {
+		do {
+			if ((*d++ = *s++) == 0)
+				break;
+		} while (--n != 0);
+	}
+	/* Not enough room in dst, add NUL and traverse rest of src */
+	if (n == 0) {
+		if (siz != 0)
+			*d = '\0';		/* NUL-terminate dst */
+		while (*s++)
+			;
+	}
+	return(s - src - 1);	/* count does not include NUL */
+}
+size_t
+fz_strlcat(char *dst, const char *src, size_t siz)
+{
+	register char *d = dst;
+	register const char *s = src;
+	register size_t n = siz;
+	size_t dlen;
+	/* Find the end of dst and adjust bytes left but don't go past end */
+	while (*d != '\0' && n-- != 0)
+		d++;
+	dlen = d - dst;
+	n = siz - dlen;
+	if (n == 0)
+		return dlen + strlen(s);
+	while (*s != '\0') {
+		if (n != 1) {
+			*d++ = *s;
+			n--;
+		}
+		s++;
+	}
+	*d = '\0';
+	return dlen + (s - src);	/* count does not include NUL */
+}
+void
+fz_dirname(char *dir, const char *path, size_t n)
+{
+	size_t i;
+	if (!path || !path[0])
+	{
+		fz_strlcpy(dir, ".", n);
+		return;
+	}
+	fz_strlcpy(dir, path, n);
+	i = strlen(dir);
+	for(; dir[i] == '/'; --i) if (!i) { fz_strlcpy(dir, "/", n); return; }
+	for(; dir[i] != '/'; --i) if (!i) { fz_strlcpy(dir, ".", n); return; }
+	for(; dir[i] == '/'; --i) if (!i) { fz_strlcpy(dir, "/", n); return; }
+	dir[i+1] = 0;
+}
+const char *
+fz_basename(const char *path)
+{
+	const char *name = strrchr(path, '/');
+	if (!name)
+		name = strrchr(path, '\\');
+	if (!name)
+		return path;
+	return name + 1;
+}
+#ifdef _WIN32
+char *fz_realpath(const char *path, char *buf)
+{
+	wchar_t wpath[PATH_MAX];
+	wchar_t wbuf[PATH_MAX];
+	int i;
+	if (!MultiByteToWideChar(CP_UTF8, 0, path, -1, wpath, PATH_MAX))
+		return NULL;
+	if (!GetFullPathNameW(wpath, PATH_MAX, wbuf, NULL))
+		return NULL;
+	if (!WideCharToMultiByte(CP_UTF8, 0, wbuf, -1, buf, PATH_MAX, NULL, NULL))
+		return NULL;
+	for (i=0; buf[i]; ++i)
+		if (buf[i] == '\\')
+			buf[i] = '/';
+	return buf;
+}
+#else
+char *fz_realpath(const char *path, char *buf)
+{
+	return realpath(path, buf);
+}
+#endif
+static inline int ishex(int a)
+{
+	return (a >= 'A' && a <= 'F') ||
+		(a >= 'a' && a <= 'f') ||
+		(a >= '0' && a <= '9');
+}
+static inline int tohex(int c)
+{
+	if (c >= '0' && c <= '9') return c - '0';
+	if (c >= 'a' && c <= 'f') return c - 'a' + 0xA;
+	if (c >= 'A' && c <= 'F') return c - 'A' + 0xA;
+	return 0;
+}
+#define URIRESERVED ";/?:@&=+$,"
+#define URIALPHA "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
+#define URIDIGIT "0123456789"
+#define URIMARK "-_.!~*'()"
+#define URIUNESCAPED URIALPHA URIDIGIT URIMARK
+#define HEX "0123456789ABCDEF"
+/* Same as fz_decode_uri_component but in-place */
+char *
+fz_urldecode(char *url)
+{
+	char *s = url;
+	char *p = url;
+	while (*s)
+	{
+		int c = (unsigned char) *s++;
+		if (c == '%' && ishex(s[0]) && ishex(s[1]))
+		{
+			int a = tohex(*s++);
+			int b = tohex(*s++);
+			*p++ = a << 4 | b;
+		}
+		else
+		{
+			*p++ = c;
+		}
+	}
+	*p = 0;
+	return url;
+}
+char *
+fz_decode_uri_component(fz_context *ctx, const char *s)
+{
+	char *uri = fz_malloc(ctx, strlen(s) + 1);
+	char *p = uri;
+	while (*s)
+	{
+		int c = (unsigned char) *s++;
+		if (c == '%' && ishex(s[0]) && ishex(s[1]))
+		{
+			int a = tohex(*s++);
+			int b = tohex(*s++);
+			*p++ = a << 4 | b;
+		}
+		else
+		{
+			*p++ = c;
+		}
+	}
+	*p = 0;
+	return uri;
+}
+char *
+fz_decode_uri(fz_context *ctx, const char *s)
+{
+	char *uri = fz_malloc(ctx, strlen(s) + 1);
+	char *p = uri;
+	while (*s)
+	{
+		int c = (unsigned char) *s++;
+		if (c == '%' && ishex(s[0]) && ishex(s[1]))
+		{
+			int a = tohex(*s++);
+			int b = tohex(*s++);
+			c = a << 4 | b;
+			if (strchr(URIRESERVED "#", c)) {
+				*p++ = '%';
+				*p++ = HEX[a];
+				*p++ = HEX[b];
+			} else {
+				*p++ = c;
+			}
+		}
+		else
+		{
+			*p++ = c;
+		}
+	}
+	*p = 0;
+	return uri;
+}
+static char *
+fz_encode_uri_imp(fz_context *ctx, const char *s, const char *unescaped)
+{
+	char *uri = fz_malloc(ctx, strlen(s) * 3 + 1); /* allocate enough for worst case */
+	char *p = uri;
+	while (*s)
+	{
+		int c = (unsigned char) *s++;
+		if (strchr(unescaped, c))
+		{
+			*p++ = c;
+		}
+		else
+		{
+			*p++ = '%';
+			*p++ = HEX[(c >> 4) & 15];
+			*p++ = HEX[(c) & 15];
+		}
+	}
+	*p = 0;
+	return uri;
+}
+char *
+fz_encode_uri_component(fz_context *ctx, const char *s)
+{
+	return fz_encode_uri_imp(ctx, s, URIUNESCAPED);
+}
+char *
+fz_encode_uri_pathname(fz_context *ctx, const char *s)
+{
+	return fz_encode_uri_imp(ctx, s, URIUNESCAPED "/");
+}
+char *
+fz_encode_uri(fz_context *ctx, const char *s)
+{
+	return fz_encode_uri_imp(ctx, s, URIUNESCAPED URIRESERVED "#");
+}
+void
+fz_format_output_path(fz_context *ctx, char *path, size_t size, const char *fmt, int page)
+{
+	const char *s, *p;
+	char num[40];
+	int i, n;
+	int z = 0;
+	for (i = 0; page; page /= 10)
+		num[i++] = '0' + page % 10;
+	num[i] = 0;
+	s = p = strchr(fmt, '%');
+	if (p)
+	{
+		++p;
+		while (*p >= '0' && *p <= '9')
+			z = z * 10 + (*p++ - '0');
+	}
+	if (p && *p == 'd')
+	{
+		++p;
+	}
+	else
+	{
+		s = p = strrchr(fmt, '.');
+		if (!p)
+			s = p = fmt + strlen(fmt);
+	}
+	if (z < 1)
+		z = 1;
+	while (i < z && i < (int)sizeof num)
+		num[i++] = '0';
+	n = s - fmt;
+	if (n + i + strlen(p) >= size)
+		fz_throw(ctx, FZ_ERROR_ARGUMENT, "path name buffer overflow");
+	memcpy(path, fmt, n);
+	while (i > 0)
+		path[n++] = num[--i];
+	fz_strlcpy(path + n, p, size - n);
+}
+#define SEP(x) ((x)=='/' || (x) == 0)
+char *
+fz_cleanname(char *name)
+{
+	char *p, *q, *dotdot;
+	int rooted;
+	rooted = name[0] == '/';
+	/*
+	 * invariants:
+	 *		p points at beginning of path element we're considering.
+	 *		q points just past the last path element we wrote (no slash).
+	 *		dotdot points just past the point where .. cannot backtrack
+	 *				any further (no slash).
+	 */
+	p = q = dotdot = name + rooted;
+	while (*p)
+	{
+		if(p[0] == '/') /* null element */
+			p++;
+		else if (p[0] == '.' && SEP(p[1]))
+			p += 1; /* don't count the separator in case it is nul */
+		else if (p[0] == '.' && p[1] == '.' && SEP(p[2]))
+		{
+			p += 2;
+			if (q > dotdot) /* can backtrack */
+			{
+				while(--q > dotdot && *q != '/')
+					;
+			}
+			else if (!rooted) /* /.. is / but ./../ is .. */
+			{
+				if (q != name)
+					*q++ = '/';
+				*q++ = '.';
+				*q++ = '.';
+				dotdot = q;
+			}
+		}
+		else /* real path element */
+		{
+			if (q != name+rooted)
+				*q++ = '/';
+			while ((*q = *p) != '/' && *q != 0)
+				p++, q++;
+		}
+	}
+	if (q == name) /* empty string is really "." */
+		*q++ = '.';
+	*q = '\0';
+	return name;
+}
+char *
+fz_cleanname_strdup(fz_context *ctx, const char *name)
+{
+	size_t len = strlen(name);
+	char *newname = fz_malloc(ctx, fz_maxz(2, len + 1));
+	memcpy(newname, name, len + 1);
+	newname[len] = '\0';
+	return fz_cleanname(newname);
+}
+enum
+{
+	UTFmax = 4, /* maximum bytes per rune */
+	Runesync = 0x80, /* cannot represent part of a UTF sequence (<) */
+	Runeself = 0x80, /* rune and UTF sequences are the same (<) */
+	Runeerror = 0xFFFD, /* decoding error in UTF */
+	Runemax = 0x10FFFF, /* maximum rune value */
+};
+enum
+{
+	Bit1 = 7,
+	Bitx = 6,
+	Bit2 = 5,
+	Bit3 = 4,
+	Bit4 = 3,
+	Bit5 = 2,
+	T1 = ((1<<(Bit1+1))-1) ^ 0xFF, /* 0000 0000 */
+	Tx = ((1<<(Bitx+1))-1) ^ 0xFF, /* 1000 0000 */
+	T2 = ((1<<(Bit2+1))-1) ^ 0xFF, /* 1100 0000 */
+	T3 = ((1<<(Bit3+1))-1) ^ 0xFF, /* 1110 0000 */
+	T4 = ((1<<(Bit4+1))-1) ^ 0xFF, /* 1111 0000 */
+	T5 = ((1<<(Bit5+1))-1) ^ 0xFF, /* 1111 1000 */
+	Rune1 = (1<<(Bit1+0*Bitx))-1, /* 0000 0000 0111 1111 */
+	Rune2 = (1<<(Bit2+1*Bitx))-1, /* 0000 0111 1111 1111 */
+	Rune3 = (1<<(Bit3+2*Bitx))-1, /* 1111 1111 1111 1111 */
+	Rune4 = (1<<(Bit4+3*Bitx))-1, /* 0001 1111 1111 1111 1111 1111 */
+	Maskx = (1<<Bitx)-1,	/* 0011 1111 */
+	Testx = Maskx ^ 0xFF,	/* 1100 0000 */
+	Bad = Runeerror,
+};
+int
+fz_chartorune(int *rune, const char *str)
+{
+	int c, c1, c2, c3;
+	int l;
+	/* overlong null character */
+	if((unsigned char)str[0] == 0xc0 && (unsigned char)str[1] == 0x80) {
+		*rune = 0;
+		return 2;
+	}
+	/*
+	 * one character sequence
+	 *	00000-0007F => T1
+	 */
+	c = *(const unsigned char*)str;
+	if(c < Tx) {
+		*rune = c;
+		return 1;
+	}
+	/*
+	 * two character sequence
+	 *	0080-07FF => T2 Tx
+	 */
+	c1 = *(const unsigned char*)(str+1) ^ Tx;
+	if(c1 & Testx)
+		goto bad;
+	if(c < T3) {
+		if(c < T2)
+			goto bad;
+		l = ((c << Bitx) | c1) & Rune2;
+		if(l <= Rune1)
+			goto bad;
+		*rune = l;
+		return 2;
+	}
+	/*
+	 * three character sequence
+	 *	0800-FFFF => T3 Tx Tx
+	 */
+	c2 = *(const unsigned char*)(str+2) ^ Tx;
+	if(c2 & Testx)
+		goto bad;
+	if(c < T4) {
+		l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3;
+		if(l <= Rune2)
+			goto bad;
+		*rune = l;
+		return 3;
+	}
+	/*
+	 * four character sequence (21-bit value)
+	 *	10000-1FFFFF => T4 Tx Tx Tx
+	 */
+	c3 = *(const unsigned char*)(str+3) ^ Tx;
+	if (c3 & Testx)
+		goto bad;
+	if (c < T5) {
+		l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4;
+		if (l <= Rune3)
+			goto bad;
+		*rune = l;
+		return 4;
+	}
+	/*
+	 * Support for 5-byte or longer UTF-8 would go here, but
+	 * since we don't have that, we'll just fall through to bad.
+	 */
+	/*
+	 * bad decoding
+	 */
+bad:
+	*rune = Bad;
+	return 1;
+}
+int
+fz_chartorunen(int *rune, const char *str, size_t n)
+{
+	int c, c1, c2, c3;
+	int l;
+	if (n < 1)
+		goto bad;
+	/*
+	 * one character sequence
+	 *	00000-0007F => T1
+	 */
+	c = *(const unsigned char*)str;
+	if(c < Tx) {
+		*rune = c;
+		return 1;
+	}
+	if (n < 2)
+		goto bad;
+	/* overlong null character */
+	if((unsigned char)str[0] == 0xc0 && (unsigned char)str[1] == 0x80) {
+		*rune = 0;
+		return 2;
+	}
+	/*
+	 * two character sequence
+	 *	0080-07FF => T2 Tx
+	 */
+	c1 = *(const unsigned char*)(str+1) ^ Tx;
+	if(c1 & Testx)
+		goto bad;
+	if(c < T3) {
+		if(c < T2)
+			goto bad;
+		l = ((c << Bitx) | c1) & Rune2;
+		if(l <= Rune1)
+			goto bad;
+		*rune = l;
+		return 2;
+	}
+	if (n < 3)
+		goto bad;
+	/*
+	 * three character sequence
+	 *	0800-FFFF => T3 Tx Tx
+	 */
+	c2 = *(const unsigned char*)(str+2) ^ Tx;
+	if(c2 & Testx)
+		goto bad;
+	if(c < T4) {
+		l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3;
+		if(l <= Rune2)
+			goto bad;
+		*rune = l;
+		return 3;
+	}
+	if (n < 4)
+		goto bad;
+	/*
+	 * four character sequence (21-bit value)
+	 *	10000-1FFFFF => T4 Tx Tx Tx
+	 */
+	c3 = *(const unsigned char*)(str+3) ^ Tx;
+	if (c3 & Testx)
+		goto bad;
+	if (c < T5) {
+		l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4;
+		if (l <= Rune3)
+			goto bad;
+		*rune = l;
+		return 4;
+	}
+	/*
+	 * Support for 5-byte or longer UTF-8 would go here, but
+	 * since we don't have that, we'll just fall through to bad.
+	 */
+	/*
+	 * bad decoding
+	 */
+bad:
+	*rune = Bad;
+	return 1;
+}
+int
+fz_runetochar(char *str, int rune)
+{
+	/* Runes are signed, so convert to unsigned for range check. */
+	unsigned int c = (unsigned int)rune;
+	/* overlong null character */
+	if (c == 0) {
+		((unsigned char *)str)[0] = 0xc0;
+		((unsigned char *)str)[1] = 0x80;
+		return 2;
+	}
+	/*
+	 * one character sequence
+	 *	00000-0007F => 00-7F
+	 */
+	if(c <= Rune1) {
+		str[0] = c;
+		return 1;
+	}
+	/*
+	 * two character sequence
+	 *	0080-07FF => T2 Tx
+	 */
+	if(c <= Rune2) {
+		str[0] = T2 | (c >> 1*Bitx);
+		str[1] = Tx | (c & Maskx);
+		return 2;
+	}
+	/*
+	 * If the Rune is out of range, convert it to the error rune.
+	 * Do this test here because the error rune encodes to three bytes.
+	 * Doing it earlier would duplicate work, since an out of range
+	 * Rune wouldn't have fit in one or two bytes.
+	 */
+	if (c > Runemax)
+		c = Runeerror;
+	/*
+	 * three character sequence
+	 *	0800-FFFF => T3 Tx Tx
+	 */
+	if (c <= Rune3) {
+		str[0] = T3 | (c >> 2*Bitx);
+		str[1] = Tx | ((c >> 1*Bitx) & Maskx);
+		str[2] = Tx | (c & Maskx);
+		return 3;
+	}
+	/*
+	 * four character sequence (21-bit value)
+	 *	10000-1FFFFF => T4 Tx Tx Tx
+	 */
+	str[0] = T4 | (c >> 3*Bitx);
+	str[1] = Tx | ((c >> 2*Bitx) & Maskx);
+	str[2] = Tx | ((c >> 1*Bitx) & Maskx);
+	str[3] = Tx | (c & Maskx);
+	return 4;
+}
+int
+fz_runelen(int c)
+{
+	char str[10];
+	return fz_runetochar(str, c);
+}
+int
+fz_runeidx(const char *s, const char *p)
+{
+	int rune;
+	int i = 0;
+	while (s < p) {
+		if (*(unsigned char *)s < Runeself)
+			++s;
+		else
+			s += fz_chartorune(&rune, s);
+		++i;
+	}
+	return i;
+}
+const char *
+fz_runeptr(const char *s, int i)
+{
+	int rune;
+	while (i-- > 0) {
+		rune = *(unsigned char*)s;
+		if (rune < Runeself) {
+			if (rune == 0)
+				return NULL;
+			++s;
+		} else
+			s += fz_chartorune(&rune, s);
+	}
+	return s;
+}
+int
+fz_utflen(const char *s)
+{
+	int c, n, rune;
+	n = 0;
+	for(;;) {
+		c = *(const unsigned char*)s;
+		if(c < Runeself) {
+			if(c == 0)
+				return n;
+			s++;
+		} else
+			s += fz_chartorune(&rune, s);
+		n++;
+	}
+}
+float fz_atof(const char *s)
+{
+	float result;
+	if (s == NULL)
+		return 0;
+	errno = 0;
+	result = fz_strtof(s, NULL);
+	if ((errno == ERANGE && result == 0) || isnan(result))
+		/* Return 1.0 on  underflow, as it's a small known value that won't cause a divide by 0.  */
+		return 1;
+	result = fz_clamp(result, -FLT_MAX, FLT_MAX);
+	return result;
+}
+int fz_atoi(const char *s)
+{
+	if (s == NULL)
+		return 0;
+	return atoi(s);
+}
+int64_t fz_atoi64(const char *s)
+{
+	if (s == NULL)
+		return 0;
+	return atoll(s);
+}
+size_t fz_atoz(const char *s)
+{
+	int64_t i;
+	if (s == NULL)
+		return 0;
+	i = atoll(s);
+	if (i < 0 || (int64_t)(size_t)i != i)
+		return 0;
+	return (size_t)i;
+}
+int fz_is_page_range(fz_context *ctx, const char *s)
+{
+	/* TODO: check the actual syntax... */
+	while (*s)
+	{
+		if ((*s < '0' || *s > '9') && *s != 'N' && *s != '-' && *s != ',')
+			return 0;
+		s++;
+	}
+	return 1;
+}
+const char *fz_parse_page_range(fz_context *ctx, const char *s, int *a, int *b, int n)
+{
+	const char *orig = s;
+	if (!s || !s[0])
+		return NULL;
+	if (s[0] == ',')
+		s += 1;
+	if (s[0] == 'N')
+	{
+		*a = n;
+		s += 1;
+	}
+	else
+		*a = strtol(s, (char**)&s, 10);
+	if (s[0] == '-')
+	{
+		if (s[1] == 'N')
+		{
+			*b = n;
+			s += 2;
+		}
+		else
+			*b = strtol(s+1, (char**)&s, 10);
+	}
+	else
+		*b = *a;
+	if (*a < 0) *a = n + 1 + *a;
+	if (*b < 0) *b = n + 1 + *b;
+	*a = fz_clampi(*a, 1, n);
+	*b = fz_clampi(*b, 1, n);
+	if (s == orig)
+	{
+		fz_warn(ctx, "skipping invalid page range");
+		return NULL;
+	}
+	return s;
+}
+/* memmem from musl */
+#define MAX(a,b) ((a)>(b)?(a):(b))
+#define BITOP(a,b,op) \
+((a)[(size_t)(b)/(8*sizeof *(a))] op (size_t)1<<((size_t)(b)%(8*sizeof *(a))))
+static char *twobyte_memmem(const unsigned char *h, size_t k, const unsigned char *n)
+{
+	uint16_t nw = n[0]<<8 | n[1], hw = h[0]<<8 | h[1];
+	for (h++, k--; k; k--, hw = hw<<8 | *++h)
+		if (hw == nw) return (char *)h-1;
+	return 0;
+}
+static char *threebyte_memmem(const unsigned char *h, size_t k, const unsigned char *n)
+{
+	uint32_t nw = n[0]<<24 | n[1]<<16 | n[2]<<8;
+	uint32_t hw = h[0]<<24 | h[1]<<16 | h[2]<<8;
+	for (h+=2, k-=2; k; k--, hw = (hw|*++h)<<8)
+		if (hw == nw) return (char *)h-2;
+	return 0;
+}
+static char *fourbyte_memmem(const unsigned char *h, size_t k, const unsigned char *n)
+{
+	uint32_t nw = n[0]<<24 | n[1]<<16 | n[2]<<8 | n[3];
+	uint32_t hw = h[0]<<24 | h[1]<<16 | h[2]<<8 | h[3];
+	for (h+=3, k-=3; k; k--, hw = hw<<8 | *++h)
+		if (hw == nw) return (char *)h-3;
+	return 0;
+}
+static char *twoway_memmem(const unsigned char *h, const unsigned char *z, const unsigned char *n, size_t l)
+{
+	size_t i, ip, jp, k, p, ms, p0, mem, mem0;
+	size_t byteset[32 / sizeof(size_t)] = { 0 };
+	size_t shift[256];
+	/* Computing length of needle and fill shift table */
+	for (i=0; i<l; i++)
+		BITOP(byteset, n[i], |=), shift[n[i]] = i+1;
+	/* Compute maximal suffix */
+	ip = (size_t)-1; jp = 0; k = p = 1;
+	while (jp+k<l) {
+		if (n[ip+k] == n[jp+k]) {
+			if (k == p) {
+				jp += p;
+				k = 1;
+			} else k++;
+		} else if (n[ip+k] > n[jp+k]) {
+			jp += k;
+			k = 1;
+			p = jp - ip;
+		} else {
+			ip = jp++;
+			k = p = 1;
+		}
+	}
+	ms = ip;
+	p0 = p;
+	/* And with the opposite comparison */
+	ip = (size_t)-1; jp = 0; k = p = 1;
+	while (jp+k<l) {
+		if (n[ip+k] == n[jp+k]) {
+			if (k == p) {
+				jp += p;
+				k = 1;
+			} else k++;
+		} else if (n[ip+k] < n[jp+k]) {
+			jp += k;
+			k = 1;
+			p = jp - ip;
+		} else {
+			ip = jp++;
+			k = p = 1;
+		}
+	}
+	if (ip+1 > ms+1) ms = ip;
+	else p = p0;
+	/* Periodic needle? */
+	if (memcmp(n, n+p, ms+1)) {
+		mem0 = 0;
+		p = MAX(ms, l-ms-1) + 1;
+	} else mem0 = l-p;
+	mem = 0;
+	/* Search loop */
+	for (;;) {
+		/* If remainder of haystack is shorter than needle, done */
+		if ((size_t)(z-h) < l) return 0;
+		/* Check last byte first; advance by shift on mismatch */
+		if (BITOP(byteset, h[l-1], &)) {
+			k = l-shift[h[l-1]];
+			if (k) {
+				if (mem0 && mem && k < p) k = l-p;
+				h += k;
+				mem = 0;
+				continue;
+			}
+		} else {
+			h += l;
+			mem = 0;
+			continue;
+		}
+		/* Compare right half */
+		for (k=MAX(ms+1,mem); k<l && n[k] == h[k]; k++);
+		if (k < l) {
+			h += k-ms;
+			mem = 0;
+			continue;
+		}
+		/* Compare left half */
+		for (k=ms+1; k>mem && n[k-1] == h[k-1]; k--);
+		if (k <= mem) return (char *)h;
+		h += p;
+		mem = mem0;
+	}
+}
+void *fz_memmem(const void *h0, size_t k, const void *n0, size_t l)
+{
+	const unsigned char *h = h0, *n = n0;
+	/* Return immediately on empty needle */
+	if (!l) return (void *)h;
+	/* Return immediately when needle is longer than haystack */
+	if (k<l) return 0;
+	/* Use faster algorithms for short needles */
+	h = memchr(h0, *n, k);
+	if (!h || l==1) return (void *)h;
+	k -= h - (const unsigned char *)h0;
+	if (k<l) return 0;
+	if (l==2) return twobyte_memmem(h, k, n);
+	if (l==3) return threebyte_memmem(h, k, n);
+	if (l==4) return fourbyte_memmem(h, k, n);
+	return twoway_memmem(h, h+k, n, l);
+}
+char *
+fz_utf8_from_wchar(fz_context *ctx, const wchar_t *s)
+{
+	const wchar_t *src = s;
+	char *d;
+	char *dst;
+	int len = 1;
+	while (*src)
+	{
+		len += fz_runelen(*src++);
+	}
+	d = Memento_label(fz_malloc(ctx, len), "utf8_from_wchar");
+	dst = d;
+	src = s;
+	while (*src)
+	{
+		dst += fz_runetochar(dst, *src++);
+	}
+	*dst = 0;
+	return d;
+}
+wchar_t *
+fz_wchar_from_utf8(fz_context *ctx, const char *path)
+{
+	size_t z = 0;
+	const char *p = path;
+	wchar_t *wpath, *w;
+	if (!path)
+		return NULL;
+	while (*p)
+	{
+		int c;
+		p += fz_chartorune(&c, p);
+		z++;
+		if (c >= 0x10000)
+			z++;
+	}
+	w = wpath = fz_malloc(ctx, 2*(z+1));
+	while (*path)
+	{
+		int c;
+		path += fz_chartorune(&c, path);
+		if (c >= 0x10000)
+		{
+			c -= 0x10000;
+			*w++ = 0xd800 + (c>>10);
+			*w++ = 0xdc00 + (c&1023);
+		}
+		else
+			*w++ = c;
+	}
+	*w = 0;
+	return wpath;
+}
+const char *
+fz_strstr(const char *haystack, const char *needle)
+{
+	size_t matchlen = 0;
+	char d;
+	if (haystack == NULL || needle == NULL)
+		return NULL;
+	while ((d = needle[matchlen]) != 0)
+	{
+		char c = *haystack++;
+		if (c == 0)
+			return NULL;
+		if (c == d)
+			matchlen++;
+		else
+		{
+			haystack -= matchlen;
+			matchlen = 0;
+		}
+	}
+	return haystack - matchlen;
+}
+const char *
+fz_strstrcase(const char *haystack, const char *needle)
+{
+	size_t matchlen = 0;
+	size_t firstlen;
+	if (haystack == NULL || needle == NULL)
+		return NULL;
+	while (1)
+	{
+		int c, d;
+		int nc, nd;
+		nd = fz_chartorune(&d, &needle[matchlen]);
+		if (d == 0)
+			break;
+		nc = fz_chartorune(&c, haystack);
+		if (matchlen == 0)
+			firstlen = nc;
+		haystack += nc;
+		matchlen += nd;
+		if (c == 0)
+			return NULL;
+		if (c != d)
+			haystack -= matchlen - firstlen, matchlen = 0;
+	}
+	return haystack - matchlen;
+}
+static inline int my_isdigit(int c) {
+	return c >= '0' && c <= '9';
+}
+int
+fz_strverscmp(const char *l0, const char *r0)
+{
+	// This strverscmp implementation is borrowed from musl.
+	// Copyright © 2005-2020 Rich Felker, et al.
+	// Standard MIT license.
+	const unsigned char *l = (const void *)l0;
+	const unsigned char *r = (const void *)r0;
+	size_t i, dp, j;
+	int z = 1;
+	/* Find maximal matching prefix and track its maximal digit
+	 * suffix and whether those digits are all zeros. */
+	for (dp=i=0; l[i]==r[i]; i++) {
+		int c = l[i];
+		if (!c) return 0;
+		if (!my_isdigit(c)) dp=i+1, z=1;
+		else if (c!='0') z=0;
+	}
+	if (l[dp]!='0' && r[dp]!='0') {
+		/* If we're not looking at a digit sequence that began
+		 * with a zero, longest digit string is greater. */
+		for (j=i; my_isdigit(l[j]); j++)
+			if (!my_isdigit(r[j])) return 1;
+		if (my_isdigit(r[j])) return -1;
+	} else if (z && dp<i && (my_isdigit(l[i]) || my_isdigit(r[i]))) {
+		/* Otherwise, if common prefix of digit sequence is
+		 * all zeros, digits order less than non-digits. */
+		return (unsigned char)(l[i]-'0') - (unsigned char)(r[i]-'0');
+	}
+	return l[i] - r[i];
+}

Mercurial > hgrepos > Python2 > PyMuPDF

comparison mupdf-source/source/fitz/string.c @ 2:b50eed0cc0ef upstream