Mercurial > hgrepos > Python2 > PyMuPDF
diff mupdf-source/thirdparty/mujs/jsstring.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mupdf-source/thirdparty/mujs/jsstring.c Mon Sep 15 11:43:07 2025 +0200 @@ -0,0 +1,798 @@ +#include "jsi.h" +#include "utf.h" +#include "regexp.h" + +static int js_doregexec(js_State *J, Reprog *prog, const char *string, Resub *sub, int eflags) +{ + int result = js_regexec(prog, string, sub, eflags); + if (result < 0) + js_error(J, "regexec failed"); + return result; +} + +static const char *checkstring(js_State *J, int idx) +{ + if (!js_iscoercible(J, idx)) + js_typeerror(J, "string function called on null or undefined"); + return js_tostring(J, idx); +} + +int js_runeat(js_State *J, const char *s, int i) +{ + Rune rune = EOF; + while (i >= 0) { + rune = *(unsigned char*)s; + if (rune < Runeself) { + if (rune == 0) + return EOF; + ++s; + --i; + } else { + s += chartorune(&rune, s); + if (rune >= 0x10000) + i -= 2; + else + --i; + } + } + if (rune >= 0x10000) { + /* high surrogate */ + if (i == -2) + return 0xd800 + ((rune - 0x10000) >> 10); + /* low surrogate */ + else + return 0xdc00 + ((rune - 0x10000) & 0x3ff); + } + return rune; +} + +int js_utflen(const char *s) +{ + int c; + int n; + Rune rune; + + n = 0; + for(;;) { + c = *(unsigned char *)s; + if (c < Runeself) { + if (c == 0) + return n; + s++; + n++; + } else { + s += chartorune(&rune, s); + if (rune >= 0x10000) + n += 2; + else + n++; + } + } +} + +int js_utfptrtoidx(const char *s, const char *p) +{ + Rune rune; + int i = 0; + while (s < p) { + if (*(unsigned char *)s < Runeself) + ++s; + else + s += chartorune(&rune, s); + if (rune >= 0x10000) + i += 2; + else + i += 1; + } + return i; +} + +static void jsB_new_String(js_State *J) +{ + js_newstring(J, js_gettop(J) > 1 ? js_tostring(J, 1) : ""); +} + +static void jsB_String(js_State *J) +{ + js_pushstring(J, js_gettop(J) > 1 ? js_tostring(J, 1) : ""); +} + +static void Sp_toString(js_State *J) +{ + js_Object *self = js_toobject(J, 0); + if (self->type != JS_CSTRING) js_typeerror(J, "not a string"); + js_pushstring(J, self->u.s.string); +} + +static void Sp_valueOf(js_State *J) +{ + js_Object *self = js_toobject(J, 0); + if (self->type != JS_CSTRING) js_typeerror(J, "not a string"); + js_pushstring(J, self->u.s.string); +} + +static void Sp_charAt(js_State *J) +{ + char buf[UTFmax + 1]; + const char *s = checkstring(J, 0); + int pos = js_tointeger(J, 1); + Rune rune = js_runeat(J, s, pos); + if (rune >= 0) { + buf[runetochar(buf, &rune)] = 0; + js_pushstring(J, buf); + } else { + js_pushliteral(J, ""); + } +} + +static void Sp_charCodeAt(js_State *J) +{ + const char *s = checkstring(J, 0); + int pos = js_tointeger(J, 1); + Rune rune = js_runeat(J, s, pos); + if (rune >= 0) + js_pushnumber(J, rune); + else + js_pushnumber(J, NAN); +} + +static void Sp_concat(js_State *J) +{ + int i, top = js_gettop(J); + int n; + char * volatile out = NULL; + const char *s; + + if (top == 1) + return; + + s = checkstring(J, 0); + n = 1 + strlen(s); + + if (js_try(J)) { + js_free(J, out); + js_throw(J); + } + + if (n > JS_STRLIMIT) + js_rangeerror(J, "invalid string length"); + out = js_malloc(J, n); + strcpy(out, s); + + for (i = 1; i < top; ++i) { + s = js_tostring(J, i); + n += strlen(s); + if (n > JS_STRLIMIT) + js_rangeerror(J, "invalid string length"); + out = js_realloc(J, out, n); + strcat(out, s); + } + + js_pushstring(J, out); + js_endtry(J); + js_free(J, out); +} + +static void Sp_indexOf(js_State *J) +{ + const char *haystack = checkstring(J, 0); + const char *needle = js_tostring(J, 1); + int pos = js_tointeger(J, 2); + int len = strlen(needle); + int k = 0; + Rune rune; + while (*haystack) { + if (k >= pos && !strncmp(haystack, needle, len)) { + js_pushnumber(J, k); + return; + } + haystack += chartorune(&rune, haystack); + ++k; + } + js_pushnumber(J, -1); +} + +static void Sp_lastIndexOf(js_State *J) +{ + const char *haystack = checkstring(J, 0); + const char *needle = js_tostring(J, 1); + int pos = js_isdefined(J, 2) ? js_tointeger(J, 2) : (int)strlen(haystack); + int len = strlen(needle); + int k = 0, last = -1; + Rune rune; + while (*haystack && k <= pos) { + if (!strncmp(haystack, needle, len)) + last = k; + haystack += chartorune(&rune, haystack); + ++k; + } + js_pushnumber(J, last); +} + +static void Sp_localeCompare(js_State *J) +{ + const char *a = checkstring(J, 0); + const char *b = js_tostring(J, 1); + js_pushnumber(J, strcmp(a, b)); +} + +static void Sp_substring_imp(js_State *J, const char *s, int a, int n) +{ + Rune head_rune = 0, tail_rune = 0; + const char *head, *tail; + char *p; + int i, k, head_len, tail_len; + + /* find start of substring */ + head = s; + for (i = 0; i < a; ++i) { + head += chartorune(&head_rune, head); + if (head_rune >= 0x10000) + ++i; + } + + /* find end of substring */ + tail = head; + for (k = i - a; k < n; ++k) { + tail += chartorune(&tail_rune, tail); + if (tail_rune >= 0x10000) + ++k; + } + + /* no surrogate pair splits! */ + if (i == a && k == n) { + js_pushlstring(J, head, tail - head); + return; + } + + if (js_try(J)) { + js_free(J, p); + js_throw(J); + } + + p = js_malloc(J, UTFmax + (tail - head)); + + /* substring starts with low surrogate (head is just after character) */ + if (i > a) { + head_rune = 0xdc00 + ((head_rune - 0x10000) & 0x3ff); + head_len = runetochar(p, &head_rune); + memcpy(p + head_len, head, tail - head); + js_pushlstring(J, p, head_len + (tail - head)); + } + + /* substring ends with high surrogate (tail is just after character) */ + if (k > n) { + tail -= runelen(tail_rune); + memcpy(p, head, tail - head); + tail_rune = 0xd800 + ((tail_rune - 0x10000) >> 10); + tail_len = runetochar(p + (tail - head), &tail_rune); + js_pushlstring(J, p, (tail - head) + tail_len); + } + + js_endtry(J); + js_free(J, p); +} + +static void Sp_slice(js_State *J) +{ + const char *str = checkstring(J, 0); + int len = js_utflen(str); + int s = js_tointeger(J, 1); + int e = js_isdefined(J, 2) ? js_tointeger(J, 2) : len; + + s = s < 0 ? s + len : s; + e = e < 0 ? e + len : e; + + s = s < 0 ? 0 : s > len ? len : s; + e = e < 0 ? 0 : e > len ? len : e; + + if (s < e) + Sp_substring_imp(J, str, s, e - s); + else + Sp_substring_imp(J, str, e, s - e); +} + +static void Sp_substring(js_State *J) +{ + const char *str = checkstring(J, 0); + int len = js_utflen(str); + int s = js_tointeger(J, 1); + int e = js_isdefined(J, 2) ? js_tointeger(J, 2) : len; + + s = s < 0 ? 0 : s > len ? len : s; + e = e < 0 ? 0 : e > len ? len : e; + + if (s < e) + Sp_substring_imp(J, str, s, e - s); + else + Sp_substring_imp(J, str, e, s - e); +} + +static void Sp_toLowerCase(js_State *J) +{ + const char *s = checkstring(J, 0); + char * volatile dst = NULL; + char *d; + Rune rune; + + if (js_try(J)) { + js_free(J, dst); + js_throw(J); + } + + d = dst = js_malloc(J, UTFmax * strlen(s) + 1); + while (*s) { + s += chartorune(&rune, s); + rune = tolowerrune(rune); + d += runetochar(d, &rune); + } + *d = 0; + + js_pushstring(J, dst); + js_endtry(J); + js_free(J, dst); +} + +static void Sp_toUpperCase(js_State *J) +{ + const char *s = checkstring(J, 0); + char * volatile dst = NULL; + char *d; + Rune rune; + + if (js_try(J)) { + js_free(J, dst); + js_throw(J); + } + + d = dst = js_malloc(J, UTFmax * strlen(s) + 1); + while (*s) { + s += chartorune(&rune, s); + rune = toupperrune(rune); + d += runetochar(d, &rune); + } + *d = 0; + + js_pushstring(J, dst); + js_endtry(J); + js_free(J, dst); +} + +static int istrim(int c) +{ + return c == 0x9 || c == 0xB || c == 0xC || c == 0x20 || c == 0xA0 || c == 0xFEFF || + c == 0xA || c == 0xD || c == 0x2028 || c == 0x2029; +} + +static void Sp_trim(js_State *J) +{ + const char *s, *e; + s = checkstring(J, 0); + while (istrim(*s)) + ++s; + e = s + strlen(s); + while (e > s && istrim(e[-1])) + --e; + js_pushlstring(J, s, e - s); +} + +static void S_fromCharCode(js_State *J) +{ + int i, top = js_gettop(J); + char * volatile s = NULL; + char *p; + Rune c; + + if (js_try(J)) { + js_free(J, s); + js_throw(J); + } + + s = p = js_malloc(J, (top-1) * UTFmax + 1); + + for (i = 1; i < top; ++i) { + c = js_touint32(J, i); + p += runetochar(p, &c); + } + *p = 0; + + js_pushstring(J, s); + js_endtry(J); + js_free(J, s); +} + +static void Sp_match(js_State *J) +{ + js_Regexp *re; + const char *text; + int len; + const char *a, *b, *c, *e; + Resub m; + + text = checkstring(J, 0); + + if (js_isregexp(J, 1)) + js_copy(J, 1); + else if (js_isundefined(J, 1)) + js_newregexp(J, "", 0); + else + js_newregexp(J, js_tostring(J, 1), 0); + + re = js_toregexp(J, -1); + if (!(re->flags & JS_REGEXP_G)) { + js_RegExp_prototype_exec(J, re, text); + return; + } + + re->last = 0; + + js_newarray(J); + + len = 0; + a = text; + e = text + strlen(text); + while (a <= e) { + if (js_doregexec(J, re->prog, a, &m, a > text ? REG_NOTBOL : 0)) + break; + + b = m.sub[0].sp; + c = m.sub[0].ep; + + js_pushlstring(J, b, c - b); + js_setindex(J, -2, len++); + + a = c; + if (c - b == 0) + ++a; + } + + if (len == 0) { + js_pop(J, 1); + js_pushnull(J); + } +} + +static void Sp_search(js_State *J) +{ + js_Regexp *re; + const char *text; + Resub m; + + text = checkstring(J, 0); + + if (js_isregexp(J, 1)) + js_copy(J, 1); + else if (js_isundefined(J, 1)) + js_newregexp(J, "", 0); + else + js_newregexp(J, js_tostring(J, 1), 0); + + re = js_toregexp(J, -1); + + if (!js_doregexec(J, re->prog, text, &m, 0)) + js_pushnumber(J, js_utfptrtoidx(text, m.sub[0].sp)); + else + js_pushnumber(J, -1); +} + +static void Sp_replace_regexp(js_State *J) +{ + js_Regexp *re; + const char *source, *s, *r; + js_Buffer *sb = NULL; + int n, x; + Resub m; + + source = checkstring(J, 0); + re = js_toregexp(J, 1); + + if (js_doregexec(J, re->prog, source, &m, 0)) { + js_copy(J, 0); + return; + } + + re->last = 0; + +loop: + s = m.sub[0].sp; + n = m.sub[0].ep - m.sub[0].sp; + + if (js_iscallable(J, 2)) { + js_copy(J, 2); + js_pushundefined(J); + for (x = 0; m.sub[x].sp; ++x) /* arg 0..x: substring and subexps that matched */ + js_pushlstring(J, m.sub[x].sp, m.sub[x].ep - m.sub[x].sp); + js_pushnumber(J, s - source); /* arg x+2: offset within search string */ + js_copy(J, 0); /* arg x+3: search string */ + js_call(J, 2 + x); + r = js_tostring(J, -1); + js_putm(J, &sb, source, s); + js_puts(J, &sb, r); + js_pop(J, 1); + } else { + r = js_tostring(J, 2); + js_putm(J, &sb, source, s); + while (*r) { + if (*r == '$') { + switch (*(++r)) { + case 0: --r; /* end of string; back up */ + /* fallthrough */ + case '$': js_putc(J, &sb, '$'); break; + case '`': js_putm(J, &sb, source, s); break; + case '\'': js_puts(J, &sb, s + n); break; + case '&': + js_putm(J, &sb, s, s + n); + break; + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + x = *r - '0'; + if (r[1] >= '0' && r[1] <= '9') + x = x * 10 + *(++r) - '0'; + if (x > 0 && x < m.nsub) { + js_putm(J, &sb, m.sub[x].sp, m.sub[x].ep); + } else { + js_putc(J, &sb, '$'); + if (x > 10) { + js_putc(J, &sb, '0' + x / 10); + js_putc(J, &sb, '0' + x % 10); + } else { + js_putc(J, &sb, '0' + x); + } + } + break; + default: + js_putc(J, &sb, '$'); + js_putc(J, &sb, *r); + break; + } + ++r; + } else { + js_putc(J, &sb, *r++); + } + } + } + + if (re->flags & JS_REGEXP_G) { + source = m.sub[0].ep; + if (n == 0) { + if (*source) + js_putc(J, &sb, *source++); + else + goto end; + } + if (!js_doregexec(J, re->prog, source, &m, REG_NOTBOL)) + goto loop; + } + +end: + js_puts(J, &sb, s + n); + js_putc(J, &sb, 0); + + if (js_try(J)) { + js_free(J, sb); + js_throw(J); + } + js_pushstring(J, sb ? sb->s : ""); + js_endtry(J); + js_free(J, sb); +} + +static void Sp_replace_string(js_State *J) +{ + const char *source, *needle, *s, *r; + js_Buffer *sb = NULL; + int n; + + source = checkstring(J, 0); + needle = js_tostring(J, 1); + + s = strstr(source, needle); + if (!s) { + js_copy(J, 0); + return; + } + n = strlen(needle); + + if (js_iscallable(J, 2)) { + js_copy(J, 2); + js_pushundefined(J); + js_pushlstring(J, s, n); /* arg 1: substring that matched */ + js_pushnumber(J, s - source); /* arg 2: offset within search string */ + js_copy(J, 0); /* arg 3: search string */ + js_call(J, 3); + r = js_tostring(J, -1); + js_putm(J, &sb, source, s); + js_puts(J, &sb, r); + js_puts(J, &sb, s + n); + js_putc(J, &sb, 0); + js_pop(J, 1); + } else { + r = js_tostring(J, 2); + js_putm(J, &sb, source, s); + while (*r) { + if (*r == '$') { + switch (*(++r)) { + case 0: --r; /* end of string; back up */ + /* fallthrough */ + case '$': js_putc(J, &sb, '$'); break; + case '&': js_putm(J, &sb, s, s + n); break; + case '`': js_putm(J, &sb, source, s); break; + case '\'': js_puts(J, &sb, s + n); break; + default: js_putc(J, &sb, '$'); js_putc(J, &sb, *r); break; + } + ++r; + } else { + js_putc(J, &sb, *r++); + } + } + js_puts(J, &sb, s + n); + js_putc(J, &sb, 0); + } + + if (js_try(J)) { + js_free(J, sb); + js_throw(J); + } + js_pushstring(J, sb ? sb->s : ""); + js_endtry(J); + js_free(J, sb); +} + +static void Sp_replace(js_State *J) +{ + if (js_isregexp(J, 1)) + Sp_replace_regexp(J); + else + Sp_replace_string(J); +} + +static void Sp_split_regexp(js_State *J) +{ + js_Regexp *re; + const char *text; + int limit, len, k; + const char *p, *a, *b, *c, *e; + Resub m; + + text = checkstring(J, 0); + re = js_toregexp(J, 1); + limit = js_isdefined(J, 2) ? js_tointeger(J, 2) : 1 << 30; + + js_newarray(J); + len = 0; + + if (limit == 0) + return; + + e = text + strlen(text); + + /* splitting the empty string */ + if (e == text) { + if (js_doregexec(J, re->prog, text, &m, 0)) { + js_pushliteral(J, ""); + js_setindex(J, -2, 0); + } + return; + } + + p = a = text; + while (a < e) { + if (js_doregexec(J, re->prog, a, &m, a > text ? REG_NOTBOL : 0)) + break; /* no match */ + + b = m.sub[0].sp; + c = m.sub[0].ep; + + /* empty string at end of last match */ + if (b == c && b == p) { + ++a; + continue; + } + + if (len == limit) return; + js_pushlstring(J, p, b - p); + js_setindex(J, -2, len++); + + for (k = 1; k < m.nsub; ++k) { + if (len == limit) return; + js_pushlstring(J, m.sub[k].sp, m.sub[k].ep - m.sub[k].sp); + js_setindex(J, -2, len++); + } + + a = p = c; + } + + if (len == limit) return; + js_pushstring(J, p); + js_setindex(J, -2, len); +} + +static void Sp_split_string(js_State *J) +{ + const char *str = checkstring(J, 0); + const char *sep = js_tostring(J, 1); + int limit = js_isdefined(J, 2) ? js_tointeger(J, 2) : 1 << 30; + int i, n; + + js_newarray(J); + + if (limit == 0) + return; + + n = strlen(sep); + + /* empty string */ + if (n == 0) { + Rune rune; + for (i = 0; *str && i < limit; ++i) { + n = chartorune(&rune, str); + js_pushlstring(J, str, n); + js_setindex(J, -2, i); + str += n; + } + return; + } + + for (i = 0; str && i < limit; ++i) { + const char *s = strstr(str, sep); + if (s) { + js_pushlstring(J, str, s-str); + js_setindex(J, -2, i); + str = s + n; + } else { + js_pushstring(J, str); + js_setindex(J, -2, i); + str = NULL; + } + } +} + +static void Sp_split(js_State *J) +{ + if (js_isundefined(J, 1)) { + js_newarray(J); + js_pushstring(J, js_tostring(J, 0)); + js_setindex(J, -2, 0); + } else if (js_isregexp(J, 1)) { + Sp_split_regexp(J); + } else { + Sp_split_string(J); + } +} + +void jsB_initstring(js_State *J) +{ + J->String_prototype->u.s.shrstr[0] = 0; + J->String_prototype->u.s.string = J->String_prototype->u.s.shrstr; + J->String_prototype->u.s.length = 0; + + js_pushobject(J, J->String_prototype); + { + jsB_propf(J, "String.prototype.toString", Sp_toString, 0); + jsB_propf(J, "String.prototype.valueOf", Sp_valueOf, 0); + jsB_propf(J, "String.prototype.charAt", Sp_charAt, 1); + jsB_propf(J, "String.prototype.charCodeAt", Sp_charCodeAt, 1); + jsB_propf(J, "String.prototype.concat", Sp_concat, 0); /* 1 */ + jsB_propf(J, "String.prototype.indexOf", Sp_indexOf, 1); + jsB_propf(J, "String.prototype.lastIndexOf", Sp_lastIndexOf, 1); + jsB_propf(J, "String.prototype.localeCompare", Sp_localeCompare, 1); + jsB_propf(J, "String.prototype.match", Sp_match, 1); + jsB_propf(J, "String.prototype.replace", Sp_replace, 2); + jsB_propf(J, "String.prototype.search", Sp_search, 1); + jsB_propf(J, "String.prototype.slice", Sp_slice, 2); + jsB_propf(J, "String.prototype.split", Sp_split, 2); + jsB_propf(J, "String.prototype.substring", Sp_substring, 2); + jsB_propf(J, "String.prototype.toLowerCase", Sp_toLowerCase, 0); + jsB_propf(J, "String.prototype.toLocaleLowerCase", Sp_toLowerCase, 0); + jsB_propf(J, "String.prototype.toUpperCase", Sp_toUpperCase, 0); + jsB_propf(J, "String.prototype.toLocaleUpperCase", Sp_toUpperCase, 0); + + /* ES5 */ + jsB_propf(J, "String.prototype.trim", Sp_trim, 0); + } + js_newcconstructor(J, jsB_String, jsB_new_String, "String", 0); /* 1 */ + { + jsB_propf(J, "String.fromCharCode", S_fromCharCode, 0); /* 1 */ + } + js_defglobal(J, "String", JS_DONTENUM); +}
