comparison mupdf-source/thirdparty/mujs/jslex.c @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 #include "jsi.h"
2 #include "utf.h"
3
4 JS_NORETURN static void jsY_error(js_State *J, const char *fmt, ...) JS_PRINTFLIKE(2,3);
5
6 static void jsY_error(js_State *J, const char *fmt, ...)
7 {
8 va_list ap;
9 char buf[512];
10 char msgbuf[256];
11
12 va_start(ap, fmt);
13 vsnprintf(msgbuf, 256, fmt, ap);
14 va_end(ap);
15
16 snprintf(buf, 256, "%s:%d: ", J->filename, J->lexline);
17 strcat(buf, msgbuf);
18
19 js_newsyntaxerror(J, buf);
20 js_throw(J);
21 }
22
23 static const char *tokenstring[] = {
24 "(end-of-file)",
25 "'\\x01'", "'\\x02'", "'\\x03'", "'\\x04'", "'\\x05'", "'\\x06'", "'\\x07'",
26 "'\\x08'", "'\\x09'", "'\\x0A'", "'\\x0B'", "'\\x0C'", "'\\x0D'", "'\\x0E'", "'\\x0F'",
27 "'\\x10'", "'\\x11'", "'\\x12'", "'\\x13'", "'\\x14'", "'\\x15'", "'\\x16'", "'\\x17'",
28 "'\\x18'", "'\\x19'", "'\\x1A'", "'\\x1B'", "'\\x1C'", "'\\x1D'", "'\\x1E'", "'\\x1F'",
29 "' '", "'!'", "'\"'", "'#'", "'$'", "'%'", "'&'", "'\\''",
30 "'('", "')'", "'*'", "'+'", "','", "'-'", "'.'", "'/'",
31 "'0'", "'1'", "'2'", "'3'", "'4'", "'5'", "'6'", "'7'",
32 "'8'", "'9'", "':'", "';'", "'<'", "'='", "'>'", "'?'",
33 "'@'", "'A'", "'B'", "'C'", "'D'", "'E'", "'F'", "'G'",
34 "'H'", "'I'", "'J'", "'K'", "'L'", "'M'", "'N'", "'O'",
35 "'P'", "'Q'", "'R'", "'S'", "'T'", "'U'", "'V'", "'W'",
36 "'X'", "'Y'", "'Z'", "'['", "'\'", "']'", "'^'", "'_'",
37 "'`'", "'a'", "'b'", "'c'", "'d'", "'e'", "'f'", "'g'",
38 "'h'", "'i'", "'j'", "'k'", "'l'", "'m'", "'n'", "'o'",
39 "'p'", "'q'", "'r'", "'s'", "'t'", "'u'", "'v'", "'w'",
40 "'x'", "'y'", "'z'", "'{'", "'|'", "'}'", "'~'", "'\\x7F'",
41
42 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
43 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
44 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
45 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
46 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
47 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
48 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
49 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
50
51 "(identifier)", "(number)", "(string)", "(regexp)",
52
53 "'<='", "'>='", "'=='", "'!='", "'==='", "'!=='",
54 "'<<'", "'>>'", "'>>>'", "'&&'", "'||'",
55 "'+='", "'-='", "'*='", "'/='", "'%='",
56 "'<<='", "'>>='", "'>>>='", "'&='", "'|='", "'^='",
57 "'++'", "'--'",
58
59 "'break'", "'case'", "'catch'", "'continue'", "'debugger'",
60 "'default'", "'delete'", "'do'", "'else'", "'false'", "'finally'", "'for'",
61 "'function'", "'if'", "'in'", "'instanceof'", "'new'", "'null'", "'return'",
62 "'switch'", "'this'", "'throw'", "'true'", "'try'", "'typeof'", "'var'",
63 "'void'", "'while'", "'with'",
64 };
65
66 const char *jsY_tokenstring(int token)
67 {
68 if (token >= 0 && token < (int)nelem(tokenstring))
69 if (tokenstring[token])
70 return tokenstring[token];
71 return "<unknown>";
72 }
73
74 static const char *keywords[] = {
75 "break", "case", "catch", "continue", "debugger", "default", "delete",
76 "do", "else", "false", "finally", "for", "function", "if", "in",
77 "instanceof", "new", "null", "return", "switch", "this", "throw",
78 "true", "try", "typeof", "var", "void", "while", "with",
79 };
80
81 int jsY_findword(const char *s, const char **list, int num)
82 {
83 int l = 0;
84 int r = num - 1;
85 while (l <= r) {
86 int m = (l + r) >> 1;
87 int c = strcmp(s, list[m]);
88 if (c < 0)
89 r = m - 1;
90 else if (c > 0)
91 l = m + 1;
92 else
93 return m;
94 }
95 return -1;
96 }
97
98 static int jsY_findkeyword(js_State *J, const char *s)
99 {
100 int i = jsY_findword(s, keywords, nelem(keywords));
101 if (i >= 0) {
102 J->text = keywords[i];
103 return TK_BREAK + i; /* first keyword + i */
104 }
105 J->text = js_intern(J, s);
106 return TK_IDENTIFIER;
107 }
108
109 int jsY_iswhite(int c)
110 {
111 return c == 0x9 || c == 0xB || c == 0xC || c == 0x20 || c == 0xA0 || c == 0xFEFF;
112 }
113
114 int jsY_isnewline(int c)
115 {
116 return c == 0xA || c == 0xD || c == 0x2028 || c == 0x2029;
117 }
118
119 #ifndef isalpha
120 #define isalpha(c) ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))
121 #endif
122 #ifndef isdigit
123 #define isdigit(c) (c >= '0' && c <= '9')
124 #endif
125 #ifndef ishex
126 #define ishex(c) ((c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'))
127 #endif
128
129 static int jsY_isidentifierstart(int c)
130 {
131 return isalpha(c) || c == '$' || c == '_' || isalpharune(c);
132 }
133
134 static int jsY_isidentifierpart(int c)
135 {
136 return isdigit(c) || isalpha(c) || c == '$' || c == '_' || isalpharune(c);
137 }
138
139 static int jsY_isdec(int c)
140 {
141 return isdigit(c);
142 }
143
144 int jsY_ishex(int c)
145 {
146 return isdigit(c) || ishex(c);
147 }
148
149 int jsY_tohex(int c)
150 {
151 if (c >= '0' && c <= '9') return c - '0';
152 if (c >= 'a' && c <= 'f') return c - 'a' + 0xA;
153 if (c >= 'A' && c <= 'F') return c - 'A' + 0xA;
154 return 0;
155 }
156
157 static void jsY_next(js_State *J)
158 {
159 Rune c;
160 if (*J->source == 0) {
161 J->lexchar = EOF;
162 return;
163 }
164 J->source += chartorune(&c, J->source);
165 /* consume CR LF as one unit */
166 if (c == '\r' && *J->source == '\n')
167 ++J->source;
168 if (jsY_isnewline(c)) {
169 J->line++;
170 c = '\n';
171 }
172 J->lexchar = c;
173 }
174
175 #define jsY_accept(J, x) (J->lexchar == x ? (jsY_next(J), 1) : 0)
176
177 #define jsY_expect(J, x) if (!jsY_accept(J, x)) jsY_error(J, "expected '%c'", x)
178
179 static void jsY_unescape(js_State *J)
180 {
181 if (jsY_accept(J, '\\')) {
182 if (jsY_accept(J, 'u')) {
183 int x = 0;
184 if (!jsY_ishex(J->lexchar)) { goto error; } x |= jsY_tohex(J->lexchar) << 12; jsY_next(J);
185 if (!jsY_ishex(J->lexchar)) { goto error; } x |= jsY_tohex(J->lexchar) << 8; jsY_next(J);
186 if (!jsY_ishex(J->lexchar)) { goto error; } x |= jsY_tohex(J->lexchar) << 4; jsY_next(J);
187 if (!jsY_ishex(J->lexchar)) { goto error; } x |= jsY_tohex(J->lexchar);
188 J->lexchar = x;
189 return;
190 }
191 error:
192 jsY_error(J, "unexpected escape sequence");
193 }
194 }
195
196 static void textinit(js_State *J)
197 {
198 if (!J->lexbuf.text) {
199 J->lexbuf.cap = 4096;
200 J->lexbuf.text = js_malloc(J, J->lexbuf.cap);
201 }
202 J->lexbuf.len = 0;
203 }
204
205 static void textpush(js_State *J, Rune c)
206 {
207 int n;
208 if (c == EOF)
209 n = 1;
210 else
211 n = runelen(c);
212 if (J->lexbuf.len + n > J->lexbuf.cap) {
213 J->lexbuf.cap = J->lexbuf.cap * 2;
214 J->lexbuf.text = js_realloc(J, J->lexbuf.text, J->lexbuf.cap);
215 }
216 if (c == EOF)
217 J->lexbuf.text[J->lexbuf.len++] = 0;
218 else
219 J->lexbuf.len += runetochar(J->lexbuf.text + J->lexbuf.len, &c);
220 }
221
222 static char *textend(js_State *J)
223 {
224 textpush(J, EOF);
225 return J->lexbuf.text;
226 }
227
228 static void lexlinecomment(js_State *J)
229 {
230 while (J->lexchar != EOF && J->lexchar != '\n')
231 jsY_next(J);
232 }
233
234 static int lexcomment(js_State *J)
235 {
236 /* already consumed initial '/' '*' sequence */
237 while (J->lexchar != EOF) {
238 if (jsY_accept(J, '*')) {
239 while (J->lexchar == '*')
240 jsY_next(J);
241 if (jsY_accept(J, '/'))
242 return 0;
243 }
244 else
245 jsY_next(J);
246 }
247 return -1;
248 }
249
250 static double lexhex(js_State *J)
251 {
252 double n = 0;
253 if (!jsY_ishex(J->lexchar))
254 jsY_error(J, "malformed hexadecimal number");
255 while (jsY_ishex(J->lexchar)) {
256 n = n * 16 + jsY_tohex(J->lexchar);
257 jsY_next(J);
258 }
259 return n;
260 }
261
262 #if 0
263
264 static double lexinteger(js_State *J)
265 {
266 double n = 0;
267 if (!jsY_isdec(J->lexchar))
268 jsY_error(J, "malformed number");
269 while (jsY_isdec(J->lexchar)) {
270 n = n * 10 + (J->lexchar - '0');
271 jsY_next(J);
272 }
273 return n;
274 }
275
276 static double lexfraction(js_State *J)
277 {
278 double n = 0;
279 double d = 1;
280 while (jsY_isdec(J->lexchar)) {
281 n = n * 10 + (J->lexchar - '0');
282 d = d * 10;
283 jsY_next(J);
284 }
285 return n / d;
286 }
287
288 static double lexexponent(js_State *J)
289 {
290 double sign;
291 if (jsY_accept(J, 'e') || jsY_accept(J, 'E')) {
292 if (jsY_accept(J, '-')) sign = -1;
293 else if (jsY_accept(J, '+')) sign = 1;
294 else sign = 1;
295 return sign * lexinteger(J);
296 }
297 return 0;
298 }
299
300 static int lexnumber(js_State *J)
301 {
302 double n;
303 double e;
304
305 if (jsY_accept(J, '0')) {
306 if (jsY_accept(J, 'x') || jsY_accept(J, 'X')) {
307 J->number = lexhex(J);
308 return TK_NUMBER;
309 }
310 if (jsY_isdec(J->lexchar))
311 jsY_error(J, "number with leading zero");
312 n = 0;
313 if (jsY_accept(J, '.'))
314 n += lexfraction(J);
315 } else if (jsY_accept(J, '.')) {
316 if (!jsY_isdec(J->lexchar))
317 return '.';
318 n = lexfraction(J);
319 } else {
320 n = lexinteger(J);
321 if (jsY_accept(J, '.'))
322 n += lexfraction(J);
323 }
324
325 e = lexexponent(J);
326 if (e < 0)
327 n /= pow(10, -e);
328 else if (e > 0)
329 n *= pow(10, e);
330
331 if (jsY_isidentifierstart(J->lexchar))
332 jsY_error(J, "number with letter suffix");
333
334 J->number = n;
335 return TK_NUMBER;
336 }
337
338 #else
339
340 static int lexnumber(js_State *J)
341 {
342 const char *s = J->source - 1;
343
344 if (jsY_accept(J, '0')) {
345 if (jsY_accept(J, 'x') || jsY_accept(J, 'X')) {
346 J->number = lexhex(J);
347 return TK_NUMBER;
348 }
349 if (jsY_isdec(J->lexchar))
350 jsY_error(J, "number with leading zero");
351 if (jsY_accept(J, '.')) {
352 while (jsY_isdec(J->lexchar))
353 jsY_next(J);
354 }
355 } else if (jsY_accept(J, '.')) {
356 if (!jsY_isdec(J->lexchar))
357 return '.';
358 while (jsY_isdec(J->lexchar))
359 jsY_next(J);
360 } else {
361 while (jsY_isdec(J->lexchar))
362 jsY_next(J);
363 if (jsY_accept(J, '.')) {
364 while (jsY_isdec(J->lexchar))
365 jsY_next(J);
366 }
367 }
368
369 if (jsY_accept(J, 'e') || jsY_accept(J, 'E')) {
370 if (J->lexchar == '-' || J->lexchar == '+')
371 jsY_next(J);
372 if (jsY_isdec(J->lexchar))
373 while (jsY_isdec(J->lexchar))
374 jsY_next(J);
375 else
376 jsY_error(J, "missing exponent");
377 }
378
379 if (jsY_isidentifierstart(J->lexchar))
380 jsY_error(J, "number with letter suffix");
381
382 J->number = js_strtod(s, NULL);
383 return TK_NUMBER;
384 }
385
386 #endif
387
388 static int lexescape(js_State *J)
389 {
390 int x = 0;
391
392 /* already consumed '\' */
393
394 if (jsY_accept(J, '\n'))
395 return 0;
396
397 switch (J->lexchar) {
398 case EOF: jsY_error(J, "unterminated escape sequence");
399 case 'u':
400 jsY_next(J);
401 if (!jsY_ishex(J->lexchar)) return 1; else { x |= jsY_tohex(J->lexchar) << 12; jsY_next(J); }
402 if (!jsY_ishex(J->lexchar)) return 1; else { x |= jsY_tohex(J->lexchar) << 8; jsY_next(J); }
403 if (!jsY_ishex(J->lexchar)) return 1; else { x |= jsY_tohex(J->lexchar) << 4; jsY_next(J); }
404 if (!jsY_ishex(J->lexchar)) return 1; else { x |= jsY_tohex(J->lexchar); jsY_next(J); }
405 textpush(J, x);
406 break;
407 case 'x':
408 jsY_next(J);
409 if (!jsY_ishex(J->lexchar)) return 1; else { x |= jsY_tohex(J->lexchar) << 4; jsY_next(J); }
410 if (!jsY_ishex(J->lexchar)) return 1; else { x |= jsY_tohex(J->lexchar); jsY_next(J); }
411 textpush(J, x);
412 break;
413 case '0': textpush(J, 0); jsY_next(J); break;
414 case '\\': textpush(J, '\\'); jsY_next(J); break;
415 case '\'': textpush(J, '\''); jsY_next(J); break;
416 case '"': textpush(J, '"'); jsY_next(J); break;
417 case 'b': textpush(J, '\b'); jsY_next(J); break;
418 case 'f': textpush(J, '\f'); jsY_next(J); break;
419 case 'n': textpush(J, '\n'); jsY_next(J); break;
420 case 'r': textpush(J, '\r'); jsY_next(J); break;
421 case 't': textpush(J, '\t'); jsY_next(J); break;
422 case 'v': textpush(J, '\v'); jsY_next(J); break;
423 default: textpush(J, J->lexchar); jsY_next(J); break;
424 }
425 return 0;
426 }
427
428 static int lexstring(js_State *J)
429 {
430 const char *s;
431
432 int q = J->lexchar;
433 jsY_next(J);
434
435 textinit(J);
436
437 while (J->lexchar != q) {
438 if (J->lexchar == EOF || J->lexchar == '\n')
439 jsY_error(J, "string not terminated");
440 if (jsY_accept(J, '\\')) {
441 if (lexescape(J))
442 jsY_error(J, "malformed escape sequence");
443 } else {
444 textpush(J, J->lexchar);
445 jsY_next(J);
446 }
447 }
448 jsY_expect(J, q);
449
450 s = textend(J);
451
452 J->text = js_intern(J, s);
453 return TK_STRING;
454 }
455
456 /* the ugliest language wart ever... */
457 static int isregexpcontext(int last)
458 {
459 switch (last) {
460 case ']':
461 case ')':
462 case '}':
463 case TK_IDENTIFIER:
464 case TK_NUMBER:
465 case TK_STRING:
466 case TK_FALSE:
467 case TK_NULL:
468 case TK_THIS:
469 case TK_TRUE:
470 return 0;
471 default:
472 return 1;
473 }
474 }
475
476 static int lexregexp(js_State *J)
477 {
478 const char *s;
479 int g, m, i;
480 int inclass = 0;
481
482 /* already consumed initial '/' */
483
484 textinit(J);
485
486 /* regexp body */
487 while (J->lexchar != '/' || inclass) {
488 if (J->lexchar == EOF || J->lexchar == '\n') {
489 jsY_error(J, "regular expression not terminated");
490 } else if (jsY_accept(J, '\\')) {
491 if (jsY_accept(J, '/')) {
492 textpush(J, '/');
493 } else {
494 textpush(J, '\\');
495 if (J->lexchar == EOF || J->lexchar == '\n')
496 jsY_error(J, "regular expression not terminated");
497 textpush(J, J->lexchar);
498 jsY_next(J);
499 }
500 } else {
501 if (J->lexchar == '[' && !inclass)
502 inclass = 1;
503 if (J->lexchar == ']' && inclass)
504 inclass = 0;
505 textpush(J, J->lexchar);
506 jsY_next(J);
507 }
508 }
509 jsY_expect(J, '/');
510
511 s = textend(J);
512
513 /* regexp flags */
514 g = i = m = 0;
515
516 while (jsY_isidentifierpart(J->lexchar)) {
517 if (jsY_accept(J, 'g')) ++g;
518 else if (jsY_accept(J, 'i')) ++i;
519 else if (jsY_accept(J, 'm')) ++m;
520 else jsY_error(J, "illegal flag in regular expression: %c", J->lexchar);
521 }
522
523 if (g > 1 || i > 1 || m > 1)
524 jsY_error(J, "duplicated flag in regular expression");
525
526 J->text = js_intern(J, s);
527 J->number = 0;
528 if (g) J->number += JS_REGEXP_G;
529 if (i) J->number += JS_REGEXP_I;
530 if (m) J->number += JS_REGEXP_M;
531 return TK_REGEXP;
532 }
533
534 /* simple "return [no Line Terminator here] ..." contexts */
535 static int isnlthcontext(int last)
536 {
537 switch (last) {
538 case TK_BREAK:
539 case TK_CONTINUE:
540 case TK_RETURN:
541 case TK_THROW:
542 return 1;
543 default:
544 return 0;
545 }
546 }
547
548 static int jsY_lexx(js_State *J)
549 {
550 J->newline = 0;
551
552 while (1) {
553 J->lexline = J->line; /* save location of beginning of token */
554
555 while (jsY_iswhite(J->lexchar))
556 jsY_next(J);
557
558 if (jsY_accept(J, '\n')) {
559 J->newline = 1;
560 if (isnlthcontext(J->lasttoken))
561 return ';';
562 continue;
563 }
564
565 if (jsY_accept(J, '/')) {
566 if (jsY_accept(J, '/')) {
567 lexlinecomment(J);
568 continue;
569 } else if (jsY_accept(J, '*')) {
570 if (lexcomment(J))
571 jsY_error(J, "multi-line comment not terminated");
572 continue;
573 } else if (isregexpcontext(J->lasttoken)) {
574 return lexregexp(J);
575 } else if (jsY_accept(J, '=')) {
576 return TK_DIV_ASS;
577 } else {
578 return '/';
579 }
580 }
581
582 if (J->lexchar >= '0' && J->lexchar <= '9') {
583 return lexnumber(J);
584 }
585
586 switch (J->lexchar) {
587 case '(': jsY_next(J); return '(';
588 case ')': jsY_next(J); return ')';
589 case ',': jsY_next(J); return ',';
590 case ':': jsY_next(J); return ':';
591 case ';': jsY_next(J); return ';';
592 case '?': jsY_next(J); return '?';
593 case '[': jsY_next(J); return '[';
594 case ']': jsY_next(J); return ']';
595 case '{': jsY_next(J); return '{';
596 case '}': jsY_next(J); return '}';
597 case '~': jsY_next(J); return '~';
598
599 case '\'':
600 case '"':
601 return lexstring(J);
602
603 case '.':
604 return lexnumber(J);
605
606 case '<':
607 jsY_next(J);
608 if (jsY_accept(J, '<')) {
609 if (jsY_accept(J, '='))
610 return TK_SHL_ASS;
611 return TK_SHL;
612 }
613 if (jsY_accept(J, '='))
614 return TK_LE;
615 return '<';
616
617 case '>':
618 jsY_next(J);
619 if (jsY_accept(J, '>')) {
620 if (jsY_accept(J, '>')) {
621 if (jsY_accept(J, '='))
622 return TK_USHR_ASS;
623 return TK_USHR;
624 }
625 if (jsY_accept(J, '='))
626 return TK_SHR_ASS;
627 return TK_SHR;
628 }
629 if (jsY_accept(J, '='))
630 return TK_GE;
631 return '>';
632
633 case '=':
634 jsY_next(J);
635 if (jsY_accept(J, '=')) {
636 if (jsY_accept(J, '='))
637 return TK_STRICTEQ;
638 return TK_EQ;
639 }
640 return '=';
641
642 case '!':
643 jsY_next(J);
644 if (jsY_accept(J, '=')) {
645 if (jsY_accept(J, '='))
646 return TK_STRICTNE;
647 return TK_NE;
648 }
649 return '!';
650
651 case '+':
652 jsY_next(J);
653 if (jsY_accept(J, '+'))
654 return TK_INC;
655 if (jsY_accept(J, '='))
656 return TK_ADD_ASS;
657 return '+';
658
659 case '-':
660 jsY_next(J);
661 if (jsY_accept(J, '-'))
662 return TK_DEC;
663 if (jsY_accept(J, '='))
664 return TK_SUB_ASS;
665 return '-';
666
667 case '*':
668 jsY_next(J);
669 if (jsY_accept(J, '='))
670 return TK_MUL_ASS;
671 return '*';
672
673 case '%':
674 jsY_next(J);
675 if (jsY_accept(J, '='))
676 return TK_MOD_ASS;
677 return '%';
678
679 case '&':
680 jsY_next(J);
681 if (jsY_accept(J, '&'))
682 return TK_AND;
683 if (jsY_accept(J, '='))
684 return TK_AND_ASS;
685 return '&';
686
687 case '|':
688 jsY_next(J);
689 if (jsY_accept(J, '|'))
690 return TK_OR;
691 if (jsY_accept(J, '='))
692 return TK_OR_ASS;
693 return '|';
694
695 case '^':
696 jsY_next(J);
697 if (jsY_accept(J, '='))
698 return TK_XOR_ASS;
699 return '^';
700
701 case EOF:
702 return 0; /* EOF */
703 }
704
705 /* Handle \uXXXX escapes in identifiers */
706 jsY_unescape(J);
707 if (jsY_isidentifierstart(J->lexchar)) {
708 textinit(J);
709 textpush(J, J->lexchar);
710
711 jsY_next(J);
712 jsY_unescape(J);
713 while (jsY_isidentifierpart(J->lexchar)) {
714 textpush(J, J->lexchar);
715 jsY_next(J);
716 jsY_unescape(J);
717 }
718
719 textend(J);
720
721 return jsY_findkeyword(J, J->lexbuf.text);
722 }
723
724 if (J->lexchar >= 0x20 && J->lexchar <= 0x7E)
725 jsY_error(J, "unexpected character: '%c'", J->lexchar);
726 jsY_error(J, "unexpected character: \\u%04X", J->lexchar);
727 }
728 }
729
730 void jsY_initlex(js_State *J, const char *filename, const char *source)
731 {
732 J->filename = filename;
733 J->source = source;
734 J->line = 1;
735 J->lasttoken = 0;
736 jsY_next(J); /* load first lookahead character */
737 }
738
739 int jsY_lex(js_State *J)
740 {
741 return J->lasttoken = jsY_lexx(J);
742 }
743
744 static int lexjsonnumber(js_State *J)
745 {
746 const char *s = J->source - 1;
747
748 if (J->lexchar == '-')
749 jsY_next(J);
750
751 if (J->lexchar == '0')
752 jsY_next(J);
753 else if (J->lexchar >= '1' && J->lexchar <= '9')
754 while (isdigit(J->lexchar))
755 jsY_next(J);
756 else
757 jsY_error(J, "unexpected non-digit");
758
759 if (jsY_accept(J, '.')) {
760 if (isdigit(J->lexchar))
761 while (isdigit(J->lexchar))
762 jsY_next(J);
763 else
764 jsY_error(J, "missing digits after decimal point");
765 }
766
767 if (jsY_accept(J, 'e') || jsY_accept(J, 'E')) {
768 if (J->lexchar == '-' || J->lexchar == '+')
769 jsY_next(J);
770 if (isdigit(J->lexchar))
771 while (isdigit(J->lexchar))
772 jsY_next(J);
773 else
774 jsY_error(J, "missing digits after exponent indicator");
775 }
776
777 J->number = js_strtod(s, NULL);
778 return TK_NUMBER;
779 }
780
781 static int lexjsonescape(js_State *J)
782 {
783 int x = 0;
784
785 /* already consumed '\' */
786
787 switch (J->lexchar) {
788 default: jsY_error(J, "invalid escape sequence");
789 case 'u':
790 jsY_next(J);
791 if (!jsY_ishex(J->lexchar)) return 1; else { x |= jsY_tohex(J->lexchar) << 12; jsY_next(J); }
792 if (!jsY_ishex(J->lexchar)) return 1; else { x |= jsY_tohex(J->lexchar) << 8; jsY_next(J); }
793 if (!jsY_ishex(J->lexchar)) return 1; else { x |= jsY_tohex(J->lexchar) << 4; jsY_next(J); }
794 if (!jsY_ishex(J->lexchar)) return 1; else { x |= jsY_tohex(J->lexchar); jsY_next(J); }
795 textpush(J, x);
796 break;
797 case '"': textpush(J, '"'); jsY_next(J); break;
798 case '\\': textpush(J, '\\'); jsY_next(J); break;
799 case '/': textpush(J, '/'); jsY_next(J); break;
800 case 'b': textpush(J, '\b'); jsY_next(J); break;
801 case 'f': textpush(J, '\f'); jsY_next(J); break;
802 case 'n': textpush(J, '\n'); jsY_next(J); break;
803 case 'r': textpush(J, '\r'); jsY_next(J); break;
804 case 't': textpush(J, '\t'); jsY_next(J); break;
805 }
806 return 0;
807 }
808
809 static int lexjsonstring(js_State *J)
810 {
811 const char *s;
812
813 textinit(J);
814
815 while (J->lexchar != '"') {
816 if (J->lexchar == EOF)
817 jsY_error(J, "unterminated string");
818 else if (J->lexchar < 32)
819 jsY_error(J, "invalid control character in string");
820 else if (jsY_accept(J, '\\'))
821 lexjsonescape(J);
822 else {
823 textpush(J, J->lexchar);
824 jsY_next(J);
825 }
826 }
827 jsY_expect(J, '"');
828
829 s = textend(J);
830
831 J->text = js_intern(J, s);
832 return TK_STRING;
833 }
834
835 int jsY_lexjson(js_State *J)
836 {
837 while (1) {
838 J->lexline = J->line; /* save location of beginning of token */
839
840 while (jsY_iswhite(J->lexchar) || J->lexchar == '\n')
841 jsY_next(J);
842
843 if ((J->lexchar >= '0' && J->lexchar <= '9') || J->lexchar == '-')
844 return lexjsonnumber(J);
845
846 switch (J->lexchar) {
847 case ',': jsY_next(J); return ',';
848 case ':': jsY_next(J); return ':';
849 case '[': jsY_next(J); return '[';
850 case ']': jsY_next(J); return ']';
851 case '{': jsY_next(J); return '{';
852 case '}': jsY_next(J); return '}';
853
854 case '"':
855 jsY_next(J);
856 return lexjsonstring(J);
857
858 case 'f':
859 jsY_next(J); jsY_expect(J, 'a'); jsY_expect(J, 'l'); jsY_expect(J, 's'); jsY_expect(J, 'e');
860 return TK_FALSE;
861
862 case 'n':
863 jsY_next(J); jsY_expect(J, 'u'); jsY_expect(J, 'l'); jsY_expect(J, 'l');
864 return TK_NULL;
865
866 case 't':
867 jsY_next(J); jsY_expect(J, 'r'); jsY_expect(J, 'u'); jsY_expect(J, 'e');
868 return TK_TRUE;
869
870 case EOF:
871 return 0; /* EOF */
872 }
873
874 if (J->lexchar >= 0x20 && J->lexchar <= 0x7E)
875 jsY_error(J, "unexpected character: '%c'", J->lexchar);
876 jsY_error(J, "unexpected character: \\u%04X", J->lexchar);
877 }
878 }