comparison mupdf-source/thirdparty/mujs/jsstring.c @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 #include "jsi.h"
2 #include "utf.h"
3 #include "regexp.h"
4
5 static int js_doregexec(js_State *J, Reprog *prog, const char *string, Resub *sub, int eflags)
6 {
7 int result = js_regexec(prog, string, sub, eflags);
8 if (result < 0)
9 js_error(J, "regexec failed");
10 return result;
11 }
12
13 static const char *checkstring(js_State *J, int idx)
14 {
15 if (!js_iscoercible(J, idx))
16 js_typeerror(J, "string function called on null or undefined");
17 return js_tostring(J, idx);
18 }
19
20 int js_runeat(js_State *J, const char *s, int i)
21 {
22 Rune rune = EOF;
23 while (i >= 0) {
24 rune = *(unsigned char*)s;
25 if (rune < Runeself) {
26 if (rune == 0)
27 return EOF;
28 ++s;
29 --i;
30 } else {
31 s += chartorune(&rune, s);
32 if (rune >= 0x10000)
33 i -= 2;
34 else
35 --i;
36 }
37 }
38 if (rune >= 0x10000) {
39 /* high surrogate */
40 if (i == -2)
41 return 0xd800 + ((rune - 0x10000) >> 10);
42 /* low surrogate */
43 else
44 return 0xdc00 + ((rune - 0x10000) & 0x3ff);
45 }
46 return rune;
47 }
48
49 int js_utflen(const char *s)
50 {
51 int c;
52 int n;
53 Rune rune;
54
55 n = 0;
56 for(;;) {
57 c = *(unsigned char *)s;
58 if (c < Runeself) {
59 if (c == 0)
60 return n;
61 s++;
62 n++;
63 } else {
64 s += chartorune(&rune, s);
65 if (rune >= 0x10000)
66 n += 2;
67 else
68 n++;
69 }
70 }
71 }
72
73 int js_utfptrtoidx(const char *s, const char *p)
74 {
75 Rune rune;
76 int i = 0;
77 while (s < p) {
78 if (*(unsigned char *)s < Runeself)
79 ++s;
80 else
81 s += chartorune(&rune, s);
82 if (rune >= 0x10000)
83 i += 2;
84 else
85 i += 1;
86 }
87 return i;
88 }
89
90 static void jsB_new_String(js_State *J)
91 {
92 js_newstring(J, js_gettop(J) > 1 ? js_tostring(J, 1) : "");
93 }
94
95 static void jsB_String(js_State *J)
96 {
97 js_pushstring(J, js_gettop(J) > 1 ? js_tostring(J, 1) : "");
98 }
99
100 static void Sp_toString(js_State *J)
101 {
102 js_Object *self = js_toobject(J, 0);
103 if (self->type != JS_CSTRING) js_typeerror(J, "not a string");
104 js_pushstring(J, self->u.s.string);
105 }
106
107 static void Sp_valueOf(js_State *J)
108 {
109 js_Object *self = js_toobject(J, 0);
110 if (self->type != JS_CSTRING) js_typeerror(J, "not a string");
111 js_pushstring(J, self->u.s.string);
112 }
113
114 static void Sp_charAt(js_State *J)
115 {
116 char buf[UTFmax + 1];
117 const char *s = checkstring(J, 0);
118 int pos = js_tointeger(J, 1);
119 Rune rune = js_runeat(J, s, pos);
120 if (rune >= 0) {
121 buf[runetochar(buf, &rune)] = 0;
122 js_pushstring(J, buf);
123 } else {
124 js_pushliteral(J, "");
125 }
126 }
127
128 static void Sp_charCodeAt(js_State *J)
129 {
130 const char *s = checkstring(J, 0);
131 int pos = js_tointeger(J, 1);
132 Rune rune = js_runeat(J, s, pos);
133 if (rune >= 0)
134 js_pushnumber(J, rune);
135 else
136 js_pushnumber(J, NAN);
137 }
138
139 static void Sp_concat(js_State *J)
140 {
141 int i, top = js_gettop(J);
142 int n;
143 char * volatile out = NULL;
144 const char *s;
145
146 if (top == 1)
147 return;
148
149 s = checkstring(J, 0);
150 n = 1 + strlen(s);
151
152 if (js_try(J)) {
153 js_free(J, out);
154 js_throw(J);
155 }
156
157 if (n > JS_STRLIMIT)
158 js_rangeerror(J, "invalid string length");
159 out = js_malloc(J, n);
160 strcpy(out, s);
161
162 for (i = 1; i < top; ++i) {
163 s = js_tostring(J, i);
164 n += strlen(s);
165 if (n > JS_STRLIMIT)
166 js_rangeerror(J, "invalid string length");
167 out = js_realloc(J, out, n);
168 strcat(out, s);
169 }
170
171 js_pushstring(J, out);
172 js_endtry(J);
173 js_free(J, out);
174 }
175
176 static void Sp_indexOf(js_State *J)
177 {
178 const char *haystack = checkstring(J, 0);
179 const char *needle = js_tostring(J, 1);
180 int pos = js_tointeger(J, 2);
181 int len = strlen(needle);
182 int k = 0;
183 Rune rune;
184 while (*haystack) {
185 if (k >= pos && !strncmp(haystack, needle, len)) {
186 js_pushnumber(J, k);
187 return;
188 }
189 haystack += chartorune(&rune, haystack);
190 ++k;
191 }
192 js_pushnumber(J, -1);
193 }
194
195 static void Sp_lastIndexOf(js_State *J)
196 {
197 const char *haystack = checkstring(J, 0);
198 const char *needle = js_tostring(J, 1);
199 int pos = js_isdefined(J, 2) ? js_tointeger(J, 2) : (int)strlen(haystack);
200 int len = strlen(needle);
201 int k = 0, last = -1;
202 Rune rune;
203 while (*haystack && k <= pos) {
204 if (!strncmp(haystack, needle, len))
205 last = k;
206 haystack += chartorune(&rune, haystack);
207 ++k;
208 }
209 js_pushnumber(J, last);
210 }
211
212 static void Sp_localeCompare(js_State *J)
213 {
214 const char *a = checkstring(J, 0);
215 const char *b = js_tostring(J, 1);
216 js_pushnumber(J, strcmp(a, b));
217 }
218
219 static void Sp_substring_imp(js_State *J, const char *s, int a, int n)
220 {
221 Rune head_rune = 0, tail_rune = 0;
222 const char *head, *tail;
223 char *p;
224 int i, k, head_len, tail_len;
225
226 /* find start of substring */
227 head = s;
228 for (i = 0; i < a; ++i) {
229 head += chartorune(&head_rune, head);
230 if (head_rune >= 0x10000)
231 ++i;
232 }
233
234 /* find end of substring */
235 tail = head;
236 for (k = i - a; k < n; ++k) {
237 tail += chartorune(&tail_rune, tail);
238 if (tail_rune >= 0x10000)
239 ++k;
240 }
241
242 /* no surrogate pair splits! */
243 if (i == a && k == n) {
244 js_pushlstring(J, head, tail - head);
245 return;
246 }
247
248 if (js_try(J)) {
249 js_free(J, p);
250 js_throw(J);
251 }
252
253 p = js_malloc(J, UTFmax + (tail - head));
254
255 /* substring starts with low surrogate (head is just after character) */
256 if (i > a) {
257 head_rune = 0xdc00 + ((head_rune - 0x10000) & 0x3ff);
258 head_len = runetochar(p, &head_rune);
259 memcpy(p + head_len, head, tail - head);
260 js_pushlstring(J, p, head_len + (tail - head));
261 }
262
263 /* substring ends with high surrogate (tail is just after character) */
264 if (k > n) {
265 tail -= runelen(tail_rune);
266 memcpy(p, head, tail - head);
267 tail_rune = 0xd800 + ((tail_rune - 0x10000) >> 10);
268 tail_len = runetochar(p + (tail - head), &tail_rune);
269 js_pushlstring(J, p, (tail - head) + tail_len);
270 }
271
272 js_endtry(J);
273 js_free(J, p);
274 }
275
276 static void Sp_slice(js_State *J)
277 {
278 const char *str = checkstring(J, 0);
279 int len = js_utflen(str);
280 int s = js_tointeger(J, 1);
281 int e = js_isdefined(J, 2) ? js_tointeger(J, 2) : len;
282
283 s = s < 0 ? s + len : s;
284 e = e < 0 ? e + len : e;
285
286 s = s < 0 ? 0 : s > len ? len : s;
287 e = e < 0 ? 0 : e > len ? len : e;
288
289 if (s < e)
290 Sp_substring_imp(J, str, s, e - s);
291 else
292 Sp_substring_imp(J, str, e, s - e);
293 }
294
295 static void Sp_substring(js_State *J)
296 {
297 const char *str = checkstring(J, 0);
298 int len = js_utflen(str);
299 int s = js_tointeger(J, 1);
300 int e = js_isdefined(J, 2) ? js_tointeger(J, 2) : len;
301
302 s = s < 0 ? 0 : s > len ? len : s;
303 e = e < 0 ? 0 : e > len ? len : e;
304
305 if (s < e)
306 Sp_substring_imp(J, str, s, e - s);
307 else
308 Sp_substring_imp(J, str, e, s - e);
309 }
310
311 static void Sp_toLowerCase(js_State *J)
312 {
313 const char *s = checkstring(J, 0);
314 char * volatile dst = NULL;
315 char *d;
316 Rune rune;
317
318 if (js_try(J)) {
319 js_free(J, dst);
320 js_throw(J);
321 }
322
323 d = dst = js_malloc(J, UTFmax * strlen(s) + 1);
324 while (*s) {
325 s += chartorune(&rune, s);
326 rune = tolowerrune(rune);
327 d += runetochar(d, &rune);
328 }
329 *d = 0;
330
331 js_pushstring(J, dst);
332 js_endtry(J);
333 js_free(J, dst);
334 }
335
336 static void Sp_toUpperCase(js_State *J)
337 {
338 const char *s = checkstring(J, 0);
339 char * volatile dst = NULL;
340 char *d;
341 Rune rune;
342
343 if (js_try(J)) {
344 js_free(J, dst);
345 js_throw(J);
346 }
347
348 d = dst = js_malloc(J, UTFmax * strlen(s) + 1);
349 while (*s) {
350 s += chartorune(&rune, s);
351 rune = toupperrune(rune);
352 d += runetochar(d, &rune);
353 }
354 *d = 0;
355
356 js_pushstring(J, dst);
357 js_endtry(J);
358 js_free(J, dst);
359 }
360
361 static int istrim(int c)
362 {
363 return c == 0x9 || c == 0xB || c == 0xC || c == 0x20 || c == 0xA0 || c == 0xFEFF ||
364 c == 0xA || c == 0xD || c == 0x2028 || c == 0x2029;
365 }
366
367 static void Sp_trim(js_State *J)
368 {
369 const char *s, *e;
370 s = checkstring(J, 0);
371 while (istrim(*s))
372 ++s;
373 e = s + strlen(s);
374 while (e > s && istrim(e[-1]))
375 --e;
376 js_pushlstring(J, s, e - s);
377 }
378
379 static void S_fromCharCode(js_State *J)
380 {
381 int i, top = js_gettop(J);
382 char * volatile s = NULL;
383 char *p;
384 Rune c;
385
386 if (js_try(J)) {
387 js_free(J, s);
388 js_throw(J);
389 }
390
391 s = p = js_malloc(J, (top-1) * UTFmax + 1);
392
393 for (i = 1; i < top; ++i) {
394 c = js_touint32(J, i);
395 p += runetochar(p, &c);
396 }
397 *p = 0;
398
399 js_pushstring(J, s);
400 js_endtry(J);
401 js_free(J, s);
402 }
403
404 static void Sp_match(js_State *J)
405 {
406 js_Regexp *re;
407 const char *text;
408 int len;
409 const char *a, *b, *c, *e;
410 Resub m;
411
412 text = checkstring(J, 0);
413
414 if (js_isregexp(J, 1))
415 js_copy(J, 1);
416 else if (js_isundefined(J, 1))
417 js_newregexp(J, "", 0);
418 else
419 js_newregexp(J, js_tostring(J, 1), 0);
420
421 re = js_toregexp(J, -1);
422 if (!(re->flags & JS_REGEXP_G)) {
423 js_RegExp_prototype_exec(J, re, text);
424 return;
425 }
426
427 re->last = 0;
428
429 js_newarray(J);
430
431 len = 0;
432 a = text;
433 e = text + strlen(text);
434 while (a <= e) {
435 if (js_doregexec(J, re->prog, a, &m, a > text ? REG_NOTBOL : 0))
436 break;
437
438 b = m.sub[0].sp;
439 c = m.sub[0].ep;
440
441 js_pushlstring(J, b, c - b);
442 js_setindex(J, -2, len++);
443
444 a = c;
445 if (c - b == 0)
446 ++a;
447 }
448
449 if (len == 0) {
450 js_pop(J, 1);
451 js_pushnull(J);
452 }
453 }
454
455 static void Sp_search(js_State *J)
456 {
457 js_Regexp *re;
458 const char *text;
459 Resub m;
460
461 text = checkstring(J, 0);
462
463 if (js_isregexp(J, 1))
464 js_copy(J, 1);
465 else if (js_isundefined(J, 1))
466 js_newregexp(J, "", 0);
467 else
468 js_newregexp(J, js_tostring(J, 1), 0);
469
470 re = js_toregexp(J, -1);
471
472 if (!js_doregexec(J, re->prog, text, &m, 0))
473 js_pushnumber(J, js_utfptrtoidx(text, m.sub[0].sp));
474 else
475 js_pushnumber(J, -1);
476 }
477
478 static void Sp_replace_regexp(js_State *J)
479 {
480 js_Regexp *re;
481 const char *source, *s, *r;
482 js_Buffer *sb = NULL;
483 int n, x;
484 Resub m;
485
486 source = checkstring(J, 0);
487 re = js_toregexp(J, 1);
488
489 if (js_doregexec(J, re->prog, source, &m, 0)) {
490 js_copy(J, 0);
491 return;
492 }
493
494 re->last = 0;
495
496 loop:
497 s = m.sub[0].sp;
498 n = m.sub[0].ep - m.sub[0].sp;
499
500 if (js_iscallable(J, 2)) {
501 js_copy(J, 2);
502 js_pushundefined(J);
503 for (x = 0; m.sub[x].sp; ++x) /* arg 0..x: substring and subexps that matched */
504 js_pushlstring(J, m.sub[x].sp, m.sub[x].ep - m.sub[x].sp);
505 js_pushnumber(J, s - source); /* arg x+2: offset within search string */
506 js_copy(J, 0); /* arg x+3: search string */
507 js_call(J, 2 + x);
508 r = js_tostring(J, -1);
509 js_putm(J, &sb, source, s);
510 js_puts(J, &sb, r);
511 js_pop(J, 1);
512 } else {
513 r = js_tostring(J, 2);
514 js_putm(J, &sb, source, s);
515 while (*r) {
516 if (*r == '$') {
517 switch (*(++r)) {
518 case 0: --r; /* end of string; back up */
519 /* fallthrough */
520 case '$': js_putc(J, &sb, '$'); break;
521 case '`': js_putm(J, &sb, source, s); break;
522 case '\'': js_puts(J, &sb, s + n); break;
523 case '&':
524 js_putm(J, &sb, s, s + n);
525 break;
526 case '0': case '1': case '2': case '3': case '4':
527 case '5': case '6': case '7': case '8': case '9':
528 x = *r - '0';
529 if (r[1] >= '0' && r[1] <= '9')
530 x = x * 10 + *(++r) - '0';
531 if (x > 0 && x < m.nsub) {
532 js_putm(J, &sb, m.sub[x].sp, m.sub[x].ep);
533 } else {
534 js_putc(J, &sb, '$');
535 if (x > 10) {
536 js_putc(J, &sb, '0' + x / 10);
537 js_putc(J, &sb, '0' + x % 10);
538 } else {
539 js_putc(J, &sb, '0' + x);
540 }
541 }
542 break;
543 default:
544 js_putc(J, &sb, '$');
545 js_putc(J, &sb, *r);
546 break;
547 }
548 ++r;
549 } else {
550 js_putc(J, &sb, *r++);
551 }
552 }
553 }
554
555 if (re->flags & JS_REGEXP_G) {
556 source = m.sub[0].ep;
557 if (n == 0) {
558 if (*source)
559 js_putc(J, &sb, *source++);
560 else
561 goto end;
562 }
563 if (!js_doregexec(J, re->prog, source, &m, REG_NOTBOL))
564 goto loop;
565 }
566
567 end:
568 js_puts(J, &sb, s + n);
569 js_putc(J, &sb, 0);
570
571 if (js_try(J)) {
572 js_free(J, sb);
573 js_throw(J);
574 }
575 js_pushstring(J, sb ? sb->s : "");
576 js_endtry(J);
577 js_free(J, sb);
578 }
579
580 static void Sp_replace_string(js_State *J)
581 {
582 const char *source, *needle, *s, *r;
583 js_Buffer *sb = NULL;
584 int n;
585
586 source = checkstring(J, 0);
587 needle = js_tostring(J, 1);
588
589 s = strstr(source, needle);
590 if (!s) {
591 js_copy(J, 0);
592 return;
593 }
594 n = strlen(needle);
595
596 if (js_iscallable(J, 2)) {
597 js_copy(J, 2);
598 js_pushundefined(J);
599 js_pushlstring(J, s, n); /* arg 1: substring that matched */
600 js_pushnumber(J, s - source); /* arg 2: offset within search string */
601 js_copy(J, 0); /* arg 3: search string */
602 js_call(J, 3);
603 r = js_tostring(J, -1);
604 js_putm(J, &sb, source, s);
605 js_puts(J, &sb, r);
606 js_puts(J, &sb, s + n);
607 js_putc(J, &sb, 0);
608 js_pop(J, 1);
609 } else {
610 r = js_tostring(J, 2);
611 js_putm(J, &sb, source, s);
612 while (*r) {
613 if (*r == '$') {
614 switch (*(++r)) {
615 case 0: --r; /* end of string; back up */
616 /* fallthrough */
617 case '$': js_putc(J, &sb, '$'); break;
618 case '&': js_putm(J, &sb, s, s + n); break;
619 case '`': js_putm(J, &sb, source, s); break;
620 case '\'': js_puts(J, &sb, s + n); break;
621 default: js_putc(J, &sb, '$'); js_putc(J, &sb, *r); break;
622 }
623 ++r;
624 } else {
625 js_putc(J, &sb, *r++);
626 }
627 }
628 js_puts(J, &sb, s + n);
629 js_putc(J, &sb, 0);
630 }
631
632 if (js_try(J)) {
633 js_free(J, sb);
634 js_throw(J);
635 }
636 js_pushstring(J, sb ? sb->s : "");
637 js_endtry(J);
638 js_free(J, sb);
639 }
640
641 static void Sp_replace(js_State *J)
642 {
643 if (js_isregexp(J, 1))
644 Sp_replace_regexp(J);
645 else
646 Sp_replace_string(J);
647 }
648
649 static void Sp_split_regexp(js_State *J)
650 {
651 js_Regexp *re;
652 const char *text;
653 int limit, len, k;
654 const char *p, *a, *b, *c, *e;
655 Resub m;
656
657 text = checkstring(J, 0);
658 re = js_toregexp(J, 1);
659 limit = js_isdefined(J, 2) ? js_tointeger(J, 2) : 1 << 30;
660
661 js_newarray(J);
662 len = 0;
663
664 if (limit == 0)
665 return;
666
667 e = text + strlen(text);
668
669 /* splitting the empty string */
670 if (e == text) {
671 if (js_doregexec(J, re->prog, text, &m, 0)) {
672 js_pushliteral(J, "");
673 js_setindex(J, -2, 0);
674 }
675 return;
676 }
677
678 p = a = text;
679 while (a < e) {
680 if (js_doregexec(J, re->prog, a, &m, a > text ? REG_NOTBOL : 0))
681 break; /* no match */
682
683 b = m.sub[0].sp;
684 c = m.sub[0].ep;
685
686 /* empty string at end of last match */
687 if (b == c && b == p) {
688 ++a;
689 continue;
690 }
691
692 if (len == limit) return;
693 js_pushlstring(J, p, b - p);
694 js_setindex(J, -2, len++);
695
696 for (k = 1; k < m.nsub; ++k) {
697 if (len == limit) return;
698 js_pushlstring(J, m.sub[k].sp, m.sub[k].ep - m.sub[k].sp);
699 js_setindex(J, -2, len++);
700 }
701
702 a = p = c;
703 }
704
705 if (len == limit) return;
706 js_pushstring(J, p);
707 js_setindex(J, -2, len);
708 }
709
710 static void Sp_split_string(js_State *J)
711 {
712 const char *str = checkstring(J, 0);
713 const char *sep = js_tostring(J, 1);
714 int limit = js_isdefined(J, 2) ? js_tointeger(J, 2) : 1 << 30;
715 int i, n;
716
717 js_newarray(J);
718
719 if (limit == 0)
720 return;
721
722 n = strlen(sep);
723
724 /* empty string */
725 if (n == 0) {
726 Rune rune;
727 for (i = 0; *str && i < limit; ++i) {
728 n = chartorune(&rune, str);
729 js_pushlstring(J, str, n);
730 js_setindex(J, -2, i);
731 str += n;
732 }
733 return;
734 }
735
736 for (i = 0; str && i < limit; ++i) {
737 const char *s = strstr(str, sep);
738 if (s) {
739 js_pushlstring(J, str, s-str);
740 js_setindex(J, -2, i);
741 str = s + n;
742 } else {
743 js_pushstring(J, str);
744 js_setindex(J, -2, i);
745 str = NULL;
746 }
747 }
748 }
749
750 static void Sp_split(js_State *J)
751 {
752 if (js_isundefined(J, 1)) {
753 js_newarray(J);
754 js_pushstring(J, js_tostring(J, 0));
755 js_setindex(J, -2, 0);
756 } else if (js_isregexp(J, 1)) {
757 Sp_split_regexp(J);
758 } else {
759 Sp_split_string(J);
760 }
761 }
762
763 void jsB_initstring(js_State *J)
764 {
765 J->String_prototype->u.s.shrstr[0] = 0;
766 J->String_prototype->u.s.string = J->String_prototype->u.s.shrstr;
767 J->String_prototype->u.s.length = 0;
768
769 js_pushobject(J, J->String_prototype);
770 {
771 jsB_propf(J, "String.prototype.toString", Sp_toString, 0);
772 jsB_propf(J, "String.prototype.valueOf", Sp_valueOf, 0);
773 jsB_propf(J, "String.prototype.charAt", Sp_charAt, 1);
774 jsB_propf(J, "String.prototype.charCodeAt", Sp_charCodeAt, 1);
775 jsB_propf(J, "String.prototype.concat", Sp_concat, 0); /* 1 */
776 jsB_propf(J, "String.prototype.indexOf", Sp_indexOf, 1);
777 jsB_propf(J, "String.prototype.lastIndexOf", Sp_lastIndexOf, 1);
778 jsB_propf(J, "String.prototype.localeCompare", Sp_localeCompare, 1);
779 jsB_propf(J, "String.prototype.match", Sp_match, 1);
780 jsB_propf(J, "String.prototype.replace", Sp_replace, 2);
781 jsB_propf(J, "String.prototype.search", Sp_search, 1);
782 jsB_propf(J, "String.prototype.slice", Sp_slice, 2);
783 jsB_propf(J, "String.prototype.split", Sp_split, 2);
784 jsB_propf(J, "String.prototype.substring", Sp_substring, 2);
785 jsB_propf(J, "String.prototype.toLowerCase", Sp_toLowerCase, 0);
786 jsB_propf(J, "String.prototype.toLocaleLowerCase", Sp_toLowerCase, 0);
787 jsB_propf(J, "String.prototype.toUpperCase", Sp_toUpperCase, 0);
788 jsB_propf(J, "String.prototype.toLocaleUpperCase", Sp_toUpperCase, 0);
789
790 /* ES5 */
791 jsB_propf(J, "String.prototype.trim", Sp_trim, 0);
792 }
793 js_newcconstructor(J, jsB_String, jsB_new_String, "String", 0); /* 1 */
794 {
795 jsB_propf(J, "String.fromCharCode", S_fromCharCode, 0); /* 1 */
796 }
797 js_defglobal(J, "String", JS_DONTENUM);
798 }