diff mupdf-source/thirdparty/mujs/jsstring.c @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mupdf-source/thirdparty/mujs/jsstring.c	Mon Sep 15 11:43:07 2025 +0200
@@ -0,0 +1,798 @@
+#include "jsi.h"
+#include "utf.h"
+#include "regexp.h"
+
+static int js_doregexec(js_State *J, Reprog *prog, const char *string, Resub *sub, int eflags)
+{
+	int result = js_regexec(prog, string, sub, eflags);
+	if (result < 0)
+		js_error(J, "regexec failed");
+	return result;
+}
+
+static const char *checkstring(js_State *J, int idx)
+{
+	if (!js_iscoercible(J, idx))
+		js_typeerror(J, "string function called on null or undefined");
+	return js_tostring(J, idx);
+}
+
+int js_runeat(js_State *J, const char *s, int i)
+{
+	Rune rune = EOF;
+	while (i >= 0) {
+		rune = *(unsigned char*)s;
+		if (rune < Runeself) {
+			if (rune == 0)
+				return EOF;
+			++s;
+			--i;
+		} else {
+			s += chartorune(&rune, s);
+			if (rune >= 0x10000)
+				i -= 2;
+			else
+				--i;
+		}
+	}
+	if (rune >= 0x10000) {
+		/* high surrogate */
+		if (i == -2)
+			return 0xd800 + ((rune - 0x10000) >> 10);
+		/* low surrogate */
+		else
+			return 0xdc00 + ((rune - 0x10000) & 0x3ff);
+	}
+	return rune;
+}
+
+int js_utflen(const char *s)
+{
+	int c;
+	int n;
+	Rune rune;
+
+	n = 0;
+	for(;;) {
+		c = *(unsigned char *)s;
+		if (c < Runeself) {
+			if (c == 0)
+				return n;
+			s++;
+			n++;
+		} else {
+			s += chartorune(&rune, s);
+			if (rune >= 0x10000)
+				n += 2;
+			else
+				n++;
+		}
+	}
+}
+
+int js_utfptrtoidx(const char *s, const char *p)
+{
+	Rune rune;
+	int i = 0;
+	while (s < p) {
+		if (*(unsigned char *)s < Runeself)
+			++s;
+		else
+			s += chartorune(&rune, s);
+		if (rune >= 0x10000)
+			i += 2;
+		else
+			i += 1;
+	}
+	return i;
+}
+
+static void jsB_new_String(js_State *J)
+{
+	js_newstring(J, js_gettop(J) > 1 ? js_tostring(J, 1) : "");
+}
+
+static void jsB_String(js_State *J)
+{
+	js_pushstring(J, js_gettop(J) > 1 ? js_tostring(J, 1) : "");
+}
+
+static void Sp_toString(js_State *J)
+{
+	js_Object *self = js_toobject(J, 0);
+	if (self->type != JS_CSTRING) js_typeerror(J, "not a string");
+	js_pushstring(J, self->u.s.string);
+}
+
+static void Sp_valueOf(js_State *J)
+{
+	js_Object *self = js_toobject(J, 0);
+	if (self->type != JS_CSTRING) js_typeerror(J, "not a string");
+	js_pushstring(J, self->u.s.string);
+}
+
+static void Sp_charAt(js_State *J)
+{
+	char buf[UTFmax + 1];
+	const char *s = checkstring(J, 0);
+	int pos = js_tointeger(J, 1);
+	Rune rune = js_runeat(J, s, pos);
+	if (rune >= 0) {
+		buf[runetochar(buf, &rune)] = 0;
+		js_pushstring(J, buf);
+	} else {
+		js_pushliteral(J, "");
+	}
+}
+
+static void Sp_charCodeAt(js_State *J)
+{
+	const char *s = checkstring(J, 0);
+	int pos = js_tointeger(J, 1);
+	Rune rune = js_runeat(J, s, pos);
+	if (rune >= 0)
+		js_pushnumber(J, rune);
+	else
+		js_pushnumber(J, NAN);
+}
+
+static void Sp_concat(js_State *J)
+{
+	int i, top = js_gettop(J);
+	int n;
+	char * volatile out = NULL;
+	const char *s;
+
+	if (top == 1)
+		return;
+
+	s = checkstring(J, 0);
+	n = 1 + strlen(s);
+
+	if (js_try(J)) {
+		js_free(J, out);
+		js_throw(J);
+	}
+
+	if (n > JS_STRLIMIT)
+		js_rangeerror(J, "invalid string length");
+	out = js_malloc(J, n);
+	strcpy(out, s);
+
+	for (i = 1; i < top; ++i) {
+		s = js_tostring(J, i);
+		n += strlen(s);
+		if (n > JS_STRLIMIT)
+			js_rangeerror(J, "invalid string length");
+		out = js_realloc(J, out, n);
+		strcat(out, s);
+	}
+
+	js_pushstring(J, out);
+	js_endtry(J);
+	js_free(J, out);
+}
+
+static void Sp_indexOf(js_State *J)
+{
+	const char *haystack = checkstring(J, 0);
+	const char *needle = js_tostring(J, 1);
+	int pos = js_tointeger(J, 2);
+	int len = strlen(needle);
+	int k = 0;
+	Rune rune;
+	while (*haystack) {
+		if (k >= pos && !strncmp(haystack, needle, len)) {
+			js_pushnumber(J, k);
+			return;
+		}
+		haystack += chartorune(&rune, haystack);
+		++k;
+	}
+	js_pushnumber(J, -1);
+}
+
+static void Sp_lastIndexOf(js_State *J)
+{
+	const char *haystack = checkstring(J, 0);
+	const char *needle = js_tostring(J, 1);
+	int pos = js_isdefined(J, 2) ? js_tointeger(J, 2) : (int)strlen(haystack);
+	int len = strlen(needle);
+	int k = 0, last = -1;
+	Rune rune;
+	while (*haystack && k <= pos) {
+		if (!strncmp(haystack, needle, len))
+			last = k;
+		haystack += chartorune(&rune, haystack);
+		++k;
+	}
+	js_pushnumber(J, last);
+}
+
+static void Sp_localeCompare(js_State *J)
+{
+	const char *a = checkstring(J, 0);
+	const char *b = js_tostring(J, 1);
+	js_pushnumber(J, strcmp(a, b));
+}
+
+static void Sp_substring_imp(js_State *J, const char *s, int a, int n)
+{
+	Rune head_rune = 0, tail_rune = 0;
+	const char *head, *tail;
+	char *p;
+	int i, k, head_len, tail_len;
+
+	/* find start of substring */
+	head = s;
+	for (i = 0; i < a; ++i) {
+		head += chartorune(&head_rune, head);
+		if (head_rune >= 0x10000)
+			++i;
+	}
+
+	/* find end of substring */
+	tail = head;
+	for (k = i - a; k < n; ++k) {
+		tail += chartorune(&tail_rune, tail);
+		if (tail_rune >= 0x10000)
+			++k;
+	}
+
+	/* no surrogate pair splits! */
+	if (i == a && k == n) {
+		js_pushlstring(J, head, tail - head);
+		return;
+	}
+
+	if (js_try(J)) {
+		js_free(J, p);
+		js_throw(J);
+	}
+
+	p = js_malloc(J, UTFmax + (tail - head));
+
+	/* substring starts with low surrogate (head is just after character) */
+	if (i > a) {
+		head_rune = 0xdc00 + ((head_rune - 0x10000) & 0x3ff);
+		head_len = runetochar(p, &head_rune);
+		memcpy(p + head_len, head, tail - head);
+		js_pushlstring(J, p, head_len + (tail - head));
+	}
+
+	/* substring ends with high surrogate (tail is just after character) */
+	if (k > n) {
+		tail -= runelen(tail_rune);
+		memcpy(p, head, tail - head);
+		tail_rune = 0xd800 + ((tail_rune - 0x10000) >> 10);
+		tail_len = runetochar(p + (tail - head), &tail_rune);
+		js_pushlstring(J, p, (tail - head) + tail_len);
+	}
+
+	js_endtry(J);
+	js_free(J, p);
+}
+
+static void Sp_slice(js_State *J)
+{
+	const char *str = checkstring(J, 0);
+	int len = js_utflen(str);
+	int s = js_tointeger(J, 1);
+	int e = js_isdefined(J, 2) ? js_tointeger(J, 2) : len;
+
+	s = s < 0 ? s + len : s;
+	e = e < 0 ? e + len : e;
+
+	s = s < 0 ? 0 : s > len ? len : s;
+	e = e < 0 ? 0 : e > len ? len : e;
+
+	if (s < e)
+		Sp_substring_imp(J, str, s, e - s);
+	else
+		Sp_substring_imp(J, str, e, s - e);
+}
+
+static void Sp_substring(js_State *J)
+{
+	const char *str = checkstring(J, 0);
+	int len = js_utflen(str);
+	int s = js_tointeger(J, 1);
+	int e = js_isdefined(J, 2) ? js_tointeger(J, 2) : len;
+
+	s = s < 0 ? 0 : s > len ? len : s;
+	e = e < 0 ? 0 : e > len ? len : e;
+
+	if (s < e)
+		Sp_substring_imp(J, str, s, e - s);
+	else
+		Sp_substring_imp(J, str, e, s - e);
+}
+
+static void Sp_toLowerCase(js_State *J)
+{
+	const char *s = checkstring(J, 0);
+	char * volatile dst = NULL;
+	char *d;
+	Rune rune;
+
+	if (js_try(J)) {
+		js_free(J, dst);
+		js_throw(J);
+	}
+
+	d = dst = js_malloc(J, UTFmax * strlen(s) + 1);
+	while (*s) {
+		s += chartorune(&rune, s);
+		rune = tolowerrune(rune);
+		d += runetochar(d, &rune);
+	}
+	*d = 0;
+
+	js_pushstring(J, dst);
+	js_endtry(J);
+	js_free(J, dst);
+}
+
+static void Sp_toUpperCase(js_State *J)
+{
+	const char *s = checkstring(J, 0);
+	char * volatile dst = NULL;
+	char *d;
+	Rune rune;
+
+	if (js_try(J)) {
+		js_free(J, dst);
+		js_throw(J);
+	}
+
+	d = dst = js_malloc(J, UTFmax * strlen(s) + 1);
+	while (*s) {
+		s += chartorune(&rune, s);
+		rune = toupperrune(rune);
+		d += runetochar(d, &rune);
+	}
+	*d = 0;
+
+	js_pushstring(J, dst);
+	js_endtry(J);
+	js_free(J, dst);
+}
+
+static int istrim(int c)
+{
+	return c == 0x9 || c == 0xB || c == 0xC || c == 0x20 || c == 0xA0 || c == 0xFEFF ||
+		c == 0xA || c == 0xD || c == 0x2028 || c == 0x2029;
+}
+
+static void Sp_trim(js_State *J)
+{
+	const char *s, *e;
+	s = checkstring(J, 0);
+	while (istrim(*s))
+		++s;
+	e = s + strlen(s);
+	while (e > s && istrim(e[-1]))
+		--e;
+	js_pushlstring(J, s, e - s);
+}
+
+static void S_fromCharCode(js_State *J)
+{
+	int i, top = js_gettop(J);
+	char * volatile s = NULL;
+	char *p;
+	Rune c;
+
+	if (js_try(J)) {
+		js_free(J, s);
+		js_throw(J);
+	}
+
+	s = p = js_malloc(J, (top-1) * UTFmax + 1);
+
+	for (i = 1; i < top; ++i) {
+		c = js_touint32(J, i);
+		p += runetochar(p, &c);
+	}
+	*p = 0;
+
+	js_pushstring(J, s);
+	js_endtry(J);
+	js_free(J, s);
+}
+
+static void Sp_match(js_State *J)
+{
+	js_Regexp *re;
+	const char *text;
+	int len;
+	const char *a, *b, *c, *e;
+	Resub m;
+
+	text = checkstring(J, 0);
+
+	if (js_isregexp(J, 1))
+		js_copy(J, 1);
+	else if (js_isundefined(J, 1))
+		js_newregexp(J, "", 0);
+	else
+		js_newregexp(J, js_tostring(J, 1), 0);
+
+	re = js_toregexp(J, -1);
+	if (!(re->flags & JS_REGEXP_G)) {
+		js_RegExp_prototype_exec(J, re, text);
+		return;
+	}
+
+	re->last = 0;
+
+	js_newarray(J);
+
+	len = 0;
+	a = text;
+	e = text + strlen(text);
+	while (a <= e) {
+		if (js_doregexec(J, re->prog, a, &m, a > text ? REG_NOTBOL : 0))
+			break;
+
+		b = m.sub[0].sp;
+		c = m.sub[0].ep;
+
+		js_pushlstring(J, b, c - b);
+		js_setindex(J, -2, len++);
+
+		a = c;
+		if (c - b == 0)
+			++a;
+	}
+
+	if (len == 0) {
+		js_pop(J, 1);
+		js_pushnull(J);
+	}
+}
+
+static void Sp_search(js_State *J)
+{
+	js_Regexp *re;
+	const char *text;
+	Resub m;
+
+	text = checkstring(J, 0);
+
+	if (js_isregexp(J, 1))
+		js_copy(J, 1);
+	else if (js_isundefined(J, 1))
+		js_newregexp(J, "", 0);
+	else
+		js_newregexp(J, js_tostring(J, 1), 0);
+
+	re = js_toregexp(J, -1);
+
+	if (!js_doregexec(J, re->prog, text, &m, 0))
+		js_pushnumber(J, js_utfptrtoidx(text, m.sub[0].sp));
+	else
+		js_pushnumber(J, -1);
+}
+
+static void Sp_replace_regexp(js_State *J)
+{
+	js_Regexp *re;
+	const char *source, *s, *r;
+	js_Buffer *sb = NULL;
+	int n, x;
+	Resub m;
+
+	source = checkstring(J, 0);
+	re = js_toregexp(J, 1);
+
+	if (js_doregexec(J, re->prog, source, &m, 0)) {
+		js_copy(J, 0);
+		return;
+	}
+
+	re->last = 0;
+
+loop:
+	s = m.sub[0].sp;
+	n = m.sub[0].ep - m.sub[0].sp;
+
+	if (js_iscallable(J, 2)) {
+		js_copy(J, 2);
+		js_pushundefined(J);
+		for (x = 0; m.sub[x].sp; ++x) /* arg 0..x: substring and subexps that matched */
+			js_pushlstring(J, m.sub[x].sp, m.sub[x].ep - m.sub[x].sp);
+		js_pushnumber(J, s - source); /* arg x+2: offset within search string */
+		js_copy(J, 0); /* arg x+3: search string */
+		js_call(J, 2 + x);
+		r = js_tostring(J, -1);
+		js_putm(J, &sb, source, s);
+		js_puts(J, &sb, r);
+		js_pop(J, 1);
+	} else {
+		r = js_tostring(J, 2);
+		js_putm(J, &sb, source, s);
+		while (*r) {
+			if (*r == '$') {
+				switch (*(++r)) {
+				case 0: --r; /* end of string; back up */
+				/* fallthrough */
+				case '$': js_putc(J, &sb, '$'); break;
+				case '`': js_putm(J, &sb, source, s); break;
+				case '\'': js_puts(J, &sb, s + n); break;
+				case '&':
+					js_putm(J, &sb, s, s + n);
+					break;
+				case '0': case '1': case '2': case '3': case '4':
+				case '5': case '6': case '7': case '8': case '9':
+					x = *r - '0';
+					if (r[1] >= '0' && r[1] <= '9')
+						x = x * 10 + *(++r) - '0';
+					if (x > 0 && x < m.nsub) {
+						js_putm(J, &sb, m.sub[x].sp, m.sub[x].ep);
+					} else {
+						js_putc(J, &sb, '$');
+						if (x > 10) {
+							js_putc(J, &sb, '0' + x / 10);
+							js_putc(J, &sb, '0' + x % 10);
+						} else {
+							js_putc(J, &sb, '0' + x);
+						}
+					}
+					break;
+				default:
+					js_putc(J, &sb, '$');
+					js_putc(J, &sb, *r);
+					break;
+				}
+				++r;
+			} else {
+				js_putc(J, &sb, *r++);
+			}
+		}
+	}
+
+	if (re->flags & JS_REGEXP_G) {
+		source = m.sub[0].ep;
+		if (n == 0) {
+			if (*source)
+				js_putc(J, &sb, *source++);
+			else
+				goto end;
+		}
+		if (!js_doregexec(J, re->prog, source, &m, REG_NOTBOL))
+			goto loop;
+	}
+
+end:
+	js_puts(J, &sb, s + n);
+	js_putc(J, &sb, 0);
+
+	if (js_try(J)) {
+		js_free(J, sb);
+		js_throw(J);
+	}
+	js_pushstring(J, sb ? sb->s : "");
+	js_endtry(J);
+	js_free(J, sb);
+}
+
+static void Sp_replace_string(js_State *J)
+{
+	const char *source, *needle, *s, *r;
+	js_Buffer *sb = NULL;
+	int n;
+
+	source = checkstring(J, 0);
+	needle = js_tostring(J, 1);
+
+	s = strstr(source, needle);
+	if (!s) {
+		js_copy(J, 0);
+		return;
+	}
+	n = strlen(needle);
+
+	if (js_iscallable(J, 2)) {
+		js_copy(J, 2);
+		js_pushundefined(J);
+		js_pushlstring(J, s, n); /* arg 1: substring that matched */
+		js_pushnumber(J, s - source); /* arg 2: offset within search string */
+		js_copy(J, 0); /* arg 3: search string */
+		js_call(J, 3);
+		r = js_tostring(J, -1);
+		js_putm(J, &sb, source, s);
+		js_puts(J, &sb, r);
+		js_puts(J, &sb, s + n);
+		js_putc(J, &sb, 0);
+		js_pop(J, 1);
+	} else {
+		r = js_tostring(J, 2);
+		js_putm(J, &sb, source, s);
+		while (*r) {
+			if (*r == '$') {
+				switch (*(++r)) {
+				case 0: --r; /* end of string; back up */
+				/* fallthrough */
+				case '$': js_putc(J, &sb, '$'); break;
+				case '&': js_putm(J, &sb, s, s + n); break;
+				case '`': js_putm(J, &sb, source, s); break;
+				case '\'': js_puts(J, &sb, s + n); break;
+				default: js_putc(J, &sb, '$'); js_putc(J, &sb, *r); break;
+				}
+				++r;
+			} else {
+				js_putc(J, &sb, *r++);
+			}
+		}
+		js_puts(J, &sb, s + n);
+		js_putc(J, &sb, 0);
+	}
+
+	if (js_try(J)) {
+		js_free(J, sb);
+		js_throw(J);
+	}
+	js_pushstring(J, sb ? sb->s : "");
+	js_endtry(J);
+	js_free(J, sb);
+}
+
+static void Sp_replace(js_State *J)
+{
+	if (js_isregexp(J, 1))
+		Sp_replace_regexp(J);
+	else
+		Sp_replace_string(J);
+}
+
+static void Sp_split_regexp(js_State *J)
+{
+	js_Regexp *re;
+	const char *text;
+	int limit, len, k;
+	const char *p, *a, *b, *c, *e;
+	Resub m;
+
+	text = checkstring(J, 0);
+	re = js_toregexp(J, 1);
+	limit = js_isdefined(J, 2) ? js_tointeger(J, 2) : 1 << 30;
+
+	js_newarray(J);
+	len = 0;
+
+	if (limit == 0)
+		return;
+
+	e = text + strlen(text);
+
+	/* splitting the empty string */
+	if (e == text) {
+		if (js_doregexec(J, re->prog, text, &m, 0)) {
+			js_pushliteral(J, "");
+			js_setindex(J, -2, 0);
+		}
+		return;
+	}
+
+	p = a = text;
+	while (a < e) {
+		if (js_doregexec(J, re->prog, a, &m, a > text ? REG_NOTBOL : 0))
+			break; /* no match */
+
+		b = m.sub[0].sp;
+		c = m.sub[0].ep;
+
+		/* empty string at end of last match */
+		if (b == c && b == p) {
+			++a;
+			continue;
+		}
+
+		if (len == limit) return;
+		js_pushlstring(J, p, b - p);
+		js_setindex(J, -2, len++);
+
+		for (k = 1; k < m.nsub; ++k) {
+			if (len == limit) return;
+			js_pushlstring(J, m.sub[k].sp, m.sub[k].ep - m.sub[k].sp);
+			js_setindex(J, -2, len++);
+		}
+
+		a = p = c;
+	}
+
+	if (len == limit) return;
+	js_pushstring(J, p);
+	js_setindex(J, -2, len);
+}
+
+static void Sp_split_string(js_State *J)
+{
+	const char *str = checkstring(J, 0);
+	const char *sep = js_tostring(J, 1);
+	int limit = js_isdefined(J, 2) ? js_tointeger(J, 2) : 1 << 30;
+	int i, n;
+
+	js_newarray(J);
+
+	if (limit == 0)
+		return;
+
+	n = strlen(sep);
+
+	/* empty string */
+	if (n == 0) {
+		Rune rune;
+		for (i = 0; *str && i < limit; ++i) {
+			n = chartorune(&rune, str);
+			js_pushlstring(J, str, n);
+			js_setindex(J, -2, i);
+			str += n;
+		}
+		return;
+	}
+
+	for (i = 0; str && i < limit; ++i) {
+		const char *s = strstr(str, sep);
+		if (s) {
+			js_pushlstring(J, str, s-str);
+			js_setindex(J, -2, i);
+			str = s + n;
+		} else {
+			js_pushstring(J, str);
+			js_setindex(J, -2, i);
+			str = NULL;
+		}
+	}
+}
+
+static void Sp_split(js_State *J)
+{
+	if (js_isundefined(J, 1)) {
+		js_newarray(J);
+		js_pushstring(J, js_tostring(J, 0));
+		js_setindex(J, -2, 0);
+	} else if (js_isregexp(J, 1)) {
+		Sp_split_regexp(J);
+	} else {
+		Sp_split_string(J);
+	}
+}
+
+void jsB_initstring(js_State *J)
+{
+	J->String_prototype->u.s.shrstr[0] = 0;
+	J->String_prototype->u.s.string = J->String_prototype->u.s.shrstr;
+	J->String_prototype->u.s.length = 0;
+
+	js_pushobject(J, J->String_prototype);
+	{
+		jsB_propf(J, "String.prototype.toString", Sp_toString, 0);
+		jsB_propf(J, "String.prototype.valueOf", Sp_valueOf, 0);
+		jsB_propf(J, "String.prototype.charAt", Sp_charAt, 1);
+		jsB_propf(J, "String.prototype.charCodeAt", Sp_charCodeAt, 1);
+		jsB_propf(J, "String.prototype.concat", Sp_concat, 0); /* 1 */
+		jsB_propf(J, "String.prototype.indexOf", Sp_indexOf, 1);
+		jsB_propf(J, "String.prototype.lastIndexOf", Sp_lastIndexOf, 1);
+		jsB_propf(J, "String.prototype.localeCompare", Sp_localeCompare, 1);
+		jsB_propf(J, "String.prototype.match", Sp_match, 1);
+		jsB_propf(J, "String.prototype.replace", Sp_replace, 2);
+		jsB_propf(J, "String.prototype.search", Sp_search, 1);
+		jsB_propf(J, "String.prototype.slice", Sp_slice, 2);
+		jsB_propf(J, "String.prototype.split", Sp_split, 2);
+		jsB_propf(J, "String.prototype.substring", Sp_substring, 2);
+		jsB_propf(J, "String.prototype.toLowerCase", Sp_toLowerCase, 0);
+		jsB_propf(J, "String.prototype.toLocaleLowerCase", Sp_toLowerCase, 0);
+		jsB_propf(J, "String.prototype.toUpperCase", Sp_toUpperCase, 0);
+		jsB_propf(J, "String.prototype.toLocaleUpperCase", Sp_toUpperCase, 0);
+
+		/* ES5 */
+		jsB_propf(J, "String.prototype.trim", Sp_trim, 0);
+	}
+	js_newcconstructor(J, jsB_String, jsB_new_String, "String", 0); /* 1 */
+	{
+		jsB_propf(J, "String.fromCharCode", S_fromCharCode, 0); /* 1 */
+	}
+	js_defglobal(J, "String", JS_DONTENUM);
+}