Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/mujs/jslex.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 #include "jsi.h" | |
| 2 #include "utf.h" | |
| 3 | |
| 4 JS_NORETURN static void jsY_error(js_State *J, const char *fmt, ...) JS_PRINTFLIKE(2,3); | |
| 5 | |
| 6 static void jsY_error(js_State *J, const char *fmt, ...) | |
| 7 { | |
| 8 va_list ap; | |
| 9 char buf[512]; | |
| 10 char msgbuf[256]; | |
| 11 | |
| 12 va_start(ap, fmt); | |
| 13 vsnprintf(msgbuf, 256, fmt, ap); | |
| 14 va_end(ap); | |
| 15 | |
| 16 snprintf(buf, 256, "%s:%d: ", J->filename, J->lexline); | |
| 17 strcat(buf, msgbuf); | |
| 18 | |
| 19 js_newsyntaxerror(J, buf); | |
| 20 js_throw(J); | |
| 21 } | |
| 22 | |
| 23 static const char *tokenstring[] = { | |
| 24 "(end-of-file)", | |
| 25 "'\\x01'", "'\\x02'", "'\\x03'", "'\\x04'", "'\\x05'", "'\\x06'", "'\\x07'", | |
| 26 "'\\x08'", "'\\x09'", "'\\x0A'", "'\\x0B'", "'\\x0C'", "'\\x0D'", "'\\x0E'", "'\\x0F'", | |
| 27 "'\\x10'", "'\\x11'", "'\\x12'", "'\\x13'", "'\\x14'", "'\\x15'", "'\\x16'", "'\\x17'", | |
| 28 "'\\x18'", "'\\x19'", "'\\x1A'", "'\\x1B'", "'\\x1C'", "'\\x1D'", "'\\x1E'", "'\\x1F'", | |
| 29 "' '", "'!'", "'\"'", "'#'", "'$'", "'%'", "'&'", "'\\''", | |
| 30 "'('", "')'", "'*'", "'+'", "','", "'-'", "'.'", "'/'", | |
| 31 "'0'", "'1'", "'2'", "'3'", "'4'", "'5'", "'6'", "'7'", | |
| 32 "'8'", "'9'", "':'", "';'", "'<'", "'='", "'>'", "'?'", | |
| 33 "'@'", "'A'", "'B'", "'C'", "'D'", "'E'", "'F'", "'G'", | |
| 34 "'H'", "'I'", "'J'", "'K'", "'L'", "'M'", "'N'", "'O'", | |
| 35 "'P'", "'Q'", "'R'", "'S'", "'T'", "'U'", "'V'", "'W'", | |
| 36 "'X'", "'Y'", "'Z'", "'['", "'\'", "']'", "'^'", "'_'", | |
| 37 "'`'", "'a'", "'b'", "'c'", "'d'", "'e'", "'f'", "'g'", | |
| 38 "'h'", "'i'", "'j'", "'k'", "'l'", "'m'", "'n'", "'o'", | |
| 39 "'p'", "'q'", "'r'", "'s'", "'t'", "'u'", "'v'", "'w'", | |
| 40 "'x'", "'y'", "'z'", "'{'", "'|'", "'}'", "'~'", "'\\x7F'", | |
| 41 | |
| 42 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, | |
| 43 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, | |
| 44 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, | |
| 45 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, | |
| 46 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, | |
| 47 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, | |
| 48 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, | |
| 49 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, | |
| 50 | |
| 51 "(identifier)", "(number)", "(string)", "(regexp)", | |
| 52 | |
| 53 "'<='", "'>='", "'=='", "'!='", "'==='", "'!=='", | |
| 54 "'<<'", "'>>'", "'>>>'", "'&&'", "'||'", | |
| 55 "'+='", "'-='", "'*='", "'/='", "'%='", | |
| 56 "'<<='", "'>>='", "'>>>='", "'&='", "'|='", "'^='", | |
| 57 "'++'", "'--'", | |
| 58 | |
| 59 "'break'", "'case'", "'catch'", "'continue'", "'debugger'", | |
| 60 "'default'", "'delete'", "'do'", "'else'", "'false'", "'finally'", "'for'", | |
| 61 "'function'", "'if'", "'in'", "'instanceof'", "'new'", "'null'", "'return'", | |
| 62 "'switch'", "'this'", "'throw'", "'true'", "'try'", "'typeof'", "'var'", | |
| 63 "'void'", "'while'", "'with'", | |
| 64 }; | |
| 65 | |
| 66 const char *jsY_tokenstring(int token) | |
| 67 { | |
| 68 if (token >= 0 && token < (int)nelem(tokenstring)) | |
| 69 if (tokenstring[token]) | |
| 70 return tokenstring[token]; | |
| 71 return "<unknown>"; | |
| 72 } | |
| 73 | |
| 74 static const char *keywords[] = { | |
| 75 "break", "case", "catch", "continue", "debugger", "default", "delete", | |
| 76 "do", "else", "false", "finally", "for", "function", "if", "in", | |
| 77 "instanceof", "new", "null", "return", "switch", "this", "throw", | |
| 78 "true", "try", "typeof", "var", "void", "while", "with", | |
| 79 }; | |
| 80 | |
| 81 int jsY_findword(const char *s, const char **list, int num) | |
| 82 { | |
| 83 int l = 0; | |
| 84 int r = num - 1; | |
| 85 while (l <= r) { | |
| 86 int m = (l + r) >> 1; | |
| 87 int c = strcmp(s, list[m]); | |
| 88 if (c < 0) | |
| 89 r = m - 1; | |
| 90 else if (c > 0) | |
| 91 l = m + 1; | |
| 92 else | |
| 93 return m; | |
| 94 } | |
| 95 return -1; | |
| 96 } | |
| 97 | |
| 98 static int jsY_findkeyword(js_State *J, const char *s) | |
| 99 { | |
| 100 int i = jsY_findword(s, keywords, nelem(keywords)); | |
| 101 if (i >= 0) { | |
| 102 J->text = keywords[i]; | |
| 103 return TK_BREAK + i; /* first keyword + i */ | |
| 104 } | |
| 105 J->text = js_intern(J, s); | |
| 106 return TK_IDENTIFIER; | |
| 107 } | |
| 108 | |
| 109 int jsY_iswhite(int c) | |
| 110 { | |
| 111 return c == 0x9 || c == 0xB || c == 0xC || c == 0x20 || c == 0xA0 || c == 0xFEFF; | |
| 112 } | |
| 113 | |
| 114 int jsY_isnewline(int c) | |
| 115 { | |
| 116 return c == 0xA || c == 0xD || c == 0x2028 || c == 0x2029; | |
| 117 } | |
| 118 | |
| 119 #ifndef isalpha | |
| 120 #define isalpha(c) ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) | |
| 121 #endif | |
| 122 #ifndef isdigit | |
| 123 #define isdigit(c) (c >= '0' && c <= '9') | |
| 124 #endif | |
| 125 #ifndef ishex | |
| 126 #define ishex(c) ((c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')) | |
| 127 #endif | |
| 128 | |
| 129 static int jsY_isidentifierstart(int c) | |
| 130 { | |
| 131 return isalpha(c) || c == '$' || c == '_' || isalpharune(c); | |
| 132 } | |
| 133 | |
| 134 static int jsY_isidentifierpart(int c) | |
| 135 { | |
| 136 return isdigit(c) || isalpha(c) || c == '$' || c == '_' || isalpharune(c); | |
| 137 } | |
| 138 | |
| 139 static int jsY_isdec(int c) | |
| 140 { | |
| 141 return isdigit(c); | |
| 142 } | |
| 143 | |
| 144 int jsY_ishex(int c) | |
| 145 { | |
| 146 return isdigit(c) || ishex(c); | |
| 147 } | |
| 148 | |
| 149 int jsY_tohex(int c) | |
| 150 { | |
| 151 if (c >= '0' && c <= '9') return c - '0'; | |
| 152 if (c >= 'a' && c <= 'f') return c - 'a' + 0xA; | |
| 153 if (c >= 'A' && c <= 'F') return c - 'A' + 0xA; | |
| 154 return 0; | |
| 155 } | |
| 156 | |
| 157 static void jsY_next(js_State *J) | |
| 158 { | |
| 159 Rune c; | |
| 160 if (*J->source == 0) { | |
| 161 J->lexchar = EOF; | |
| 162 return; | |
| 163 } | |
| 164 J->source += chartorune(&c, J->source); | |
| 165 /* consume CR LF as one unit */ | |
| 166 if (c == '\r' && *J->source == '\n') | |
| 167 ++J->source; | |
| 168 if (jsY_isnewline(c)) { | |
| 169 J->line++; | |
| 170 c = '\n'; | |
| 171 } | |
| 172 J->lexchar = c; | |
| 173 } | |
| 174 | |
| 175 #define jsY_accept(J, x) (J->lexchar == x ? (jsY_next(J), 1) : 0) | |
| 176 | |
| 177 #define jsY_expect(J, x) if (!jsY_accept(J, x)) jsY_error(J, "expected '%c'", x) | |
| 178 | |
| 179 static void jsY_unescape(js_State *J) | |
| 180 { | |
| 181 if (jsY_accept(J, '\\')) { | |
| 182 if (jsY_accept(J, 'u')) { | |
| 183 int x = 0; | |
| 184 if (!jsY_ishex(J->lexchar)) { goto error; } x |= jsY_tohex(J->lexchar) << 12; jsY_next(J); | |
| 185 if (!jsY_ishex(J->lexchar)) { goto error; } x |= jsY_tohex(J->lexchar) << 8; jsY_next(J); | |
| 186 if (!jsY_ishex(J->lexchar)) { goto error; } x |= jsY_tohex(J->lexchar) << 4; jsY_next(J); | |
| 187 if (!jsY_ishex(J->lexchar)) { goto error; } x |= jsY_tohex(J->lexchar); | |
| 188 J->lexchar = x; | |
| 189 return; | |
| 190 } | |
| 191 error: | |
| 192 jsY_error(J, "unexpected escape sequence"); | |
| 193 } | |
| 194 } | |
| 195 | |
| 196 static void textinit(js_State *J) | |
| 197 { | |
| 198 if (!J->lexbuf.text) { | |
| 199 J->lexbuf.cap = 4096; | |
| 200 J->lexbuf.text = js_malloc(J, J->lexbuf.cap); | |
| 201 } | |
| 202 J->lexbuf.len = 0; | |
| 203 } | |
| 204 | |
| 205 static void textpush(js_State *J, Rune c) | |
| 206 { | |
| 207 int n; | |
| 208 if (c == EOF) | |
| 209 n = 1; | |
| 210 else | |
| 211 n = runelen(c); | |
| 212 if (J->lexbuf.len + n > J->lexbuf.cap) { | |
| 213 J->lexbuf.cap = J->lexbuf.cap * 2; | |
| 214 J->lexbuf.text = js_realloc(J, J->lexbuf.text, J->lexbuf.cap); | |
| 215 } | |
| 216 if (c == EOF) | |
| 217 J->lexbuf.text[J->lexbuf.len++] = 0; | |
| 218 else | |
| 219 J->lexbuf.len += runetochar(J->lexbuf.text + J->lexbuf.len, &c); | |
| 220 } | |
| 221 | |
| 222 static char *textend(js_State *J) | |
| 223 { | |
| 224 textpush(J, EOF); | |
| 225 return J->lexbuf.text; | |
| 226 } | |
| 227 | |
| 228 static void lexlinecomment(js_State *J) | |
| 229 { | |
| 230 while (J->lexchar != EOF && J->lexchar != '\n') | |
| 231 jsY_next(J); | |
| 232 } | |
| 233 | |
| 234 static int lexcomment(js_State *J) | |
| 235 { | |
| 236 /* already consumed initial '/' '*' sequence */ | |
| 237 while (J->lexchar != EOF) { | |
| 238 if (jsY_accept(J, '*')) { | |
| 239 while (J->lexchar == '*') | |
| 240 jsY_next(J); | |
| 241 if (jsY_accept(J, '/')) | |
| 242 return 0; | |
| 243 } | |
| 244 else | |
| 245 jsY_next(J); | |
| 246 } | |
| 247 return -1; | |
| 248 } | |
| 249 | |
| 250 static double lexhex(js_State *J) | |
| 251 { | |
| 252 double n = 0; | |
| 253 if (!jsY_ishex(J->lexchar)) | |
| 254 jsY_error(J, "malformed hexadecimal number"); | |
| 255 while (jsY_ishex(J->lexchar)) { | |
| 256 n = n * 16 + jsY_tohex(J->lexchar); | |
| 257 jsY_next(J); | |
| 258 } | |
| 259 return n; | |
| 260 } | |
| 261 | |
| 262 #if 0 | |
| 263 | |
| 264 static double lexinteger(js_State *J) | |
| 265 { | |
| 266 double n = 0; | |
| 267 if (!jsY_isdec(J->lexchar)) | |
| 268 jsY_error(J, "malformed number"); | |
| 269 while (jsY_isdec(J->lexchar)) { | |
| 270 n = n * 10 + (J->lexchar - '0'); | |
| 271 jsY_next(J); | |
| 272 } | |
| 273 return n; | |
| 274 } | |
| 275 | |
| 276 static double lexfraction(js_State *J) | |
| 277 { | |
| 278 double n = 0; | |
| 279 double d = 1; | |
| 280 while (jsY_isdec(J->lexchar)) { | |
| 281 n = n * 10 + (J->lexchar - '0'); | |
| 282 d = d * 10; | |
| 283 jsY_next(J); | |
| 284 } | |
| 285 return n / d; | |
| 286 } | |
| 287 | |
| 288 static double lexexponent(js_State *J) | |
| 289 { | |
| 290 double sign; | |
| 291 if (jsY_accept(J, 'e') || jsY_accept(J, 'E')) { | |
| 292 if (jsY_accept(J, '-')) sign = -1; | |
| 293 else if (jsY_accept(J, '+')) sign = 1; | |
| 294 else sign = 1; | |
| 295 return sign * lexinteger(J); | |
| 296 } | |
| 297 return 0; | |
| 298 } | |
| 299 | |
| 300 static int lexnumber(js_State *J) | |
| 301 { | |
| 302 double n; | |
| 303 double e; | |
| 304 | |
| 305 if (jsY_accept(J, '0')) { | |
| 306 if (jsY_accept(J, 'x') || jsY_accept(J, 'X')) { | |
| 307 J->number = lexhex(J); | |
| 308 return TK_NUMBER; | |
| 309 } | |
| 310 if (jsY_isdec(J->lexchar)) | |
| 311 jsY_error(J, "number with leading zero"); | |
| 312 n = 0; | |
| 313 if (jsY_accept(J, '.')) | |
| 314 n += lexfraction(J); | |
| 315 } else if (jsY_accept(J, '.')) { | |
| 316 if (!jsY_isdec(J->lexchar)) | |
| 317 return '.'; | |
| 318 n = lexfraction(J); | |
| 319 } else { | |
| 320 n = lexinteger(J); | |
| 321 if (jsY_accept(J, '.')) | |
| 322 n += lexfraction(J); | |
| 323 } | |
| 324 | |
| 325 e = lexexponent(J); | |
| 326 if (e < 0) | |
| 327 n /= pow(10, -e); | |
| 328 else if (e > 0) | |
| 329 n *= pow(10, e); | |
| 330 | |
| 331 if (jsY_isidentifierstart(J->lexchar)) | |
| 332 jsY_error(J, "number with letter suffix"); | |
| 333 | |
| 334 J->number = n; | |
| 335 return TK_NUMBER; | |
| 336 } | |
| 337 | |
| 338 #else | |
| 339 | |
| 340 static int lexnumber(js_State *J) | |
| 341 { | |
| 342 const char *s = J->source - 1; | |
| 343 | |
| 344 if (jsY_accept(J, '0')) { | |
| 345 if (jsY_accept(J, 'x') || jsY_accept(J, 'X')) { | |
| 346 J->number = lexhex(J); | |
| 347 return TK_NUMBER; | |
| 348 } | |
| 349 if (jsY_isdec(J->lexchar)) | |
| 350 jsY_error(J, "number with leading zero"); | |
| 351 if (jsY_accept(J, '.')) { | |
| 352 while (jsY_isdec(J->lexchar)) | |
| 353 jsY_next(J); | |
| 354 } | |
| 355 } else if (jsY_accept(J, '.')) { | |
| 356 if (!jsY_isdec(J->lexchar)) | |
| 357 return '.'; | |
| 358 while (jsY_isdec(J->lexchar)) | |
| 359 jsY_next(J); | |
| 360 } else { | |
| 361 while (jsY_isdec(J->lexchar)) | |
| 362 jsY_next(J); | |
| 363 if (jsY_accept(J, '.')) { | |
| 364 while (jsY_isdec(J->lexchar)) | |
| 365 jsY_next(J); | |
| 366 } | |
| 367 } | |
| 368 | |
| 369 if (jsY_accept(J, 'e') || jsY_accept(J, 'E')) { | |
| 370 if (J->lexchar == '-' || J->lexchar == '+') | |
| 371 jsY_next(J); | |
| 372 if (jsY_isdec(J->lexchar)) | |
| 373 while (jsY_isdec(J->lexchar)) | |
| 374 jsY_next(J); | |
| 375 else | |
| 376 jsY_error(J, "missing exponent"); | |
| 377 } | |
| 378 | |
| 379 if (jsY_isidentifierstart(J->lexchar)) | |
| 380 jsY_error(J, "number with letter suffix"); | |
| 381 | |
| 382 J->number = js_strtod(s, NULL); | |
| 383 return TK_NUMBER; | |
| 384 } | |
| 385 | |
| 386 #endif | |
| 387 | |
| 388 static int lexescape(js_State *J) | |
| 389 { | |
| 390 int x = 0; | |
| 391 | |
| 392 /* already consumed '\' */ | |
| 393 | |
| 394 if (jsY_accept(J, '\n')) | |
| 395 return 0; | |
| 396 | |
| 397 switch (J->lexchar) { | |
| 398 case EOF: jsY_error(J, "unterminated escape sequence"); | |
| 399 case 'u': | |
| 400 jsY_next(J); | |
| 401 if (!jsY_ishex(J->lexchar)) return 1; else { x |= jsY_tohex(J->lexchar) << 12; jsY_next(J); } | |
| 402 if (!jsY_ishex(J->lexchar)) return 1; else { x |= jsY_tohex(J->lexchar) << 8; jsY_next(J); } | |
| 403 if (!jsY_ishex(J->lexchar)) return 1; else { x |= jsY_tohex(J->lexchar) << 4; jsY_next(J); } | |
| 404 if (!jsY_ishex(J->lexchar)) return 1; else { x |= jsY_tohex(J->lexchar); jsY_next(J); } | |
| 405 textpush(J, x); | |
| 406 break; | |
| 407 case 'x': | |
| 408 jsY_next(J); | |
| 409 if (!jsY_ishex(J->lexchar)) return 1; else { x |= jsY_tohex(J->lexchar) << 4; jsY_next(J); } | |
| 410 if (!jsY_ishex(J->lexchar)) return 1; else { x |= jsY_tohex(J->lexchar); jsY_next(J); } | |
| 411 textpush(J, x); | |
| 412 break; | |
| 413 case '0': textpush(J, 0); jsY_next(J); break; | |
| 414 case '\\': textpush(J, '\\'); jsY_next(J); break; | |
| 415 case '\'': textpush(J, '\''); jsY_next(J); break; | |
| 416 case '"': textpush(J, '"'); jsY_next(J); break; | |
| 417 case 'b': textpush(J, '\b'); jsY_next(J); break; | |
| 418 case 'f': textpush(J, '\f'); jsY_next(J); break; | |
| 419 case 'n': textpush(J, '\n'); jsY_next(J); break; | |
| 420 case 'r': textpush(J, '\r'); jsY_next(J); break; | |
| 421 case 't': textpush(J, '\t'); jsY_next(J); break; | |
| 422 case 'v': textpush(J, '\v'); jsY_next(J); break; | |
| 423 default: textpush(J, J->lexchar); jsY_next(J); break; | |
| 424 } | |
| 425 return 0; | |
| 426 } | |
| 427 | |
| 428 static int lexstring(js_State *J) | |
| 429 { | |
| 430 const char *s; | |
| 431 | |
| 432 int q = J->lexchar; | |
| 433 jsY_next(J); | |
| 434 | |
| 435 textinit(J); | |
| 436 | |
| 437 while (J->lexchar != q) { | |
| 438 if (J->lexchar == EOF || J->lexchar == '\n') | |
| 439 jsY_error(J, "string not terminated"); | |
| 440 if (jsY_accept(J, '\\')) { | |
| 441 if (lexescape(J)) | |
| 442 jsY_error(J, "malformed escape sequence"); | |
| 443 } else { | |
| 444 textpush(J, J->lexchar); | |
| 445 jsY_next(J); | |
| 446 } | |
| 447 } | |
| 448 jsY_expect(J, q); | |
| 449 | |
| 450 s = textend(J); | |
| 451 | |
| 452 J->text = js_intern(J, s); | |
| 453 return TK_STRING; | |
| 454 } | |
| 455 | |
| 456 /* the ugliest language wart ever... */ | |
| 457 static int isregexpcontext(int last) | |
| 458 { | |
| 459 switch (last) { | |
| 460 case ']': | |
| 461 case ')': | |
| 462 case '}': | |
| 463 case TK_IDENTIFIER: | |
| 464 case TK_NUMBER: | |
| 465 case TK_STRING: | |
| 466 case TK_FALSE: | |
| 467 case TK_NULL: | |
| 468 case TK_THIS: | |
| 469 case TK_TRUE: | |
| 470 return 0; | |
| 471 default: | |
| 472 return 1; | |
| 473 } | |
| 474 } | |
| 475 | |
| 476 static int lexregexp(js_State *J) | |
| 477 { | |
| 478 const char *s; | |
| 479 int g, m, i; | |
| 480 int inclass = 0; | |
| 481 | |
| 482 /* already consumed initial '/' */ | |
| 483 | |
| 484 textinit(J); | |
| 485 | |
| 486 /* regexp body */ | |
| 487 while (J->lexchar != '/' || inclass) { | |
| 488 if (J->lexchar == EOF || J->lexchar == '\n') { | |
| 489 jsY_error(J, "regular expression not terminated"); | |
| 490 } else if (jsY_accept(J, '\\')) { | |
| 491 if (jsY_accept(J, '/')) { | |
| 492 textpush(J, '/'); | |
| 493 } else { | |
| 494 textpush(J, '\\'); | |
| 495 if (J->lexchar == EOF || J->lexchar == '\n') | |
| 496 jsY_error(J, "regular expression not terminated"); | |
| 497 textpush(J, J->lexchar); | |
| 498 jsY_next(J); | |
| 499 } | |
| 500 } else { | |
| 501 if (J->lexchar == '[' && !inclass) | |
| 502 inclass = 1; | |
| 503 if (J->lexchar == ']' && inclass) | |
| 504 inclass = 0; | |
| 505 textpush(J, J->lexchar); | |
| 506 jsY_next(J); | |
| 507 } | |
| 508 } | |
| 509 jsY_expect(J, '/'); | |
| 510 | |
| 511 s = textend(J); | |
| 512 | |
| 513 /* regexp flags */ | |
| 514 g = i = m = 0; | |
| 515 | |
| 516 while (jsY_isidentifierpart(J->lexchar)) { | |
| 517 if (jsY_accept(J, 'g')) ++g; | |
| 518 else if (jsY_accept(J, 'i')) ++i; | |
| 519 else if (jsY_accept(J, 'm')) ++m; | |
| 520 else jsY_error(J, "illegal flag in regular expression: %c", J->lexchar); | |
| 521 } | |
| 522 | |
| 523 if (g > 1 || i > 1 || m > 1) | |
| 524 jsY_error(J, "duplicated flag in regular expression"); | |
| 525 | |
| 526 J->text = js_intern(J, s); | |
| 527 J->number = 0; | |
| 528 if (g) J->number += JS_REGEXP_G; | |
| 529 if (i) J->number += JS_REGEXP_I; | |
| 530 if (m) J->number += JS_REGEXP_M; | |
| 531 return TK_REGEXP; | |
| 532 } | |
| 533 | |
| 534 /* simple "return [no Line Terminator here] ..." contexts */ | |
| 535 static int isnlthcontext(int last) | |
| 536 { | |
| 537 switch (last) { | |
| 538 case TK_BREAK: | |
| 539 case TK_CONTINUE: | |
| 540 case TK_RETURN: | |
| 541 case TK_THROW: | |
| 542 return 1; | |
| 543 default: | |
| 544 return 0; | |
| 545 } | |
| 546 } | |
| 547 | |
| 548 static int jsY_lexx(js_State *J) | |
| 549 { | |
| 550 J->newline = 0; | |
| 551 | |
| 552 while (1) { | |
| 553 J->lexline = J->line; /* save location of beginning of token */ | |
| 554 | |
| 555 while (jsY_iswhite(J->lexchar)) | |
| 556 jsY_next(J); | |
| 557 | |
| 558 if (jsY_accept(J, '\n')) { | |
| 559 J->newline = 1; | |
| 560 if (isnlthcontext(J->lasttoken)) | |
| 561 return ';'; | |
| 562 continue; | |
| 563 } | |
| 564 | |
| 565 if (jsY_accept(J, '/')) { | |
| 566 if (jsY_accept(J, '/')) { | |
| 567 lexlinecomment(J); | |
| 568 continue; | |
| 569 } else if (jsY_accept(J, '*')) { | |
| 570 if (lexcomment(J)) | |
| 571 jsY_error(J, "multi-line comment not terminated"); | |
| 572 continue; | |
| 573 } else if (isregexpcontext(J->lasttoken)) { | |
| 574 return lexregexp(J); | |
| 575 } else if (jsY_accept(J, '=')) { | |
| 576 return TK_DIV_ASS; | |
| 577 } else { | |
| 578 return '/'; | |
| 579 } | |
| 580 } | |
| 581 | |
| 582 if (J->lexchar >= '0' && J->lexchar <= '9') { | |
| 583 return lexnumber(J); | |
| 584 } | |
| 585 | |
| 586 switch (J->lexchar) { | |
| 587 case '(': jsY_next(J); return '('; | |
| 588 case ')': jsY_next(J); return ')'; | |
| 589 case ',': jsY_next(J); return ','; | |
| 590 case ':': jsY_next(J); return ':'; | |
| 591 case ';': jsY_next(J); return ';'; | |
| 592 case '?': jsY_next(J); return '?'; | |
| 593 case '[': jsY_next(J); return '['; | |
| 594 case ']': jsY_next(J); return ']'; | |
| 595 case '{': jsY_next(J); return '{'; | |
| 596 case '}': jsY_next(J); return '}'; | |
| 597 case '~': jsY_next(J); return '~'; | |
| 598 | |
| 599 case '\'': | |
| 600 case '"': | |
| 601 return lexstring(J); | |
| 602 | |
| 603 case '.': | |
| 604 return lexnumber(J); | |
| 605 | |
| 606 case '<': | |
| 607 jsY_next(J); | |
| 608 if (jsY_accept(J, '<')) { | |
| 609 if (jsY_accept(J, '=')) | |
| 610 return TK_SHL_ASS; | |
| 611 return TK_SHL; | |
| 612 } | |
| 613 if (jsY_accept(J, '=')) | |
| 614 return TK_LE; | |
| 615 return '<'; | |
| 616 | |
| 617 case '>': | |
| 618 jsY_next(J); | |
| 619 if (jsY_accept(J, '>')) { | |
| 620 if (jsY_accept(J, '>')) { | |
| 621 if (jsY_accept(J, '=')) | |
| 622 return TK_USHR_ASS; | |
| 623 return TK_USHR; | |
| 624 } | |
| 625 if (jsY_accept(J, '=')) | |
| 626 return TK_SHR_ASS; | |
| 627 return TK_SHR; | |
| 628 } | |
| 629 if (jsY_accept(J, '=')) | |
| 630 return TK_GE; | |
| 631 return '>'; | |
| 632 | |
| 633 case '=': | |
| 634 jsY_next(J); | |
| 635 if (jsY_accept(J, '=')) { | |
| 636 if (jsY_accept(J, '=')) | |
| 637 return TK_STRICTEQ; | |
| 638 return TK_EQ; | |
| 639 } | |
| 640 return '='; | |
| 641 | |
| 642 case '!': | |
| 643 jsY_next(J); | |
| 644 if (jsY_accept(J, '=')) { | |
| 645 if (jsY_accept(J, '=')) | |
| 646 return TK_STRICTNE; | |
| 647 return TK_NE; | |
| 648 } | |
| 649 return '!'; | |
| 650 | |
| 651 case '+': | |
| 652 jsY_next(J); | |
| 653 if (jsY_accept(J, '+')) | |
| 654 return TK_INC; | |
| 655 if (jsY_accept(J, '=')) | |
| 656 return TK_ADD_ASS; | |
| 657 return '+'; | |
| 658 | |
| 659 case '-': | |
| 660 jsY_next(J); | |
| 661 if (jsY_accept(J, '-')) | |
| 662 return TK_DEC; | |
| 663 if (jsY_accept(J, '=')) | |
| 664 return TK_SUB_ASS; | |
| 665 return '-'; | |
| 666 | |
| 667 case '*': | |
| 668 jsY_next(J); | |
| 669 if (jsY_accept(J, '=')) | |
| 670 return TK_MUL_ASS; | |
| 671 return '*'; | |
| 672 | |
| 673 case '%': | |
| 674 jsY_next(J); | |
| 675 if (jsY_accept(J, '=')) | |
| 676 return TK_MOD_ASS; | |
| 677 return '%'; | |
| 678 | |
| 679 case '&': | |
| 680 jsY_next(J); | |
| 681 if (jsY_accept(J, '&')) | |
| 682 return TK_AND; | |
| 683 if (jsY_accept(J, '=')) | |
| 684 return TK_AND_ASS; | |
| 685 return '&'; | |
| 686 | |
| 687 case '|': | |
| 688 jsY_next(J); | |
| 689 if (jsY_accept(J, '|')) | |
| 690 return TK_OR; | |
| 691 if (jsY_accept(J, '=')) | |
| 692 return TK_OR_ASS; | |
| 693 return '|'; | |
| 694 | |
| 695 case '^': | |
| 696 jsY_next(J); | |
| 697 if (jsY_accept(J, '=')) | |
| 698 return TK_XOR_ASS; | |
| 699 return '^'; | |
| 700 | |
| 701 case EOF: | |
| 702 return 0; /* EOF */ | |
| 703 } | |
| 704 | |
| 705 /* Handle \uXXXX escapes in identifiers */ | |
| 706 jsY_unescape(J); | |
| 707 if (jsY_isidentifierstart(J->lexchar)) { | |
| 708 textinit(J); | |
| 709 textpush(J, J->lexchar); | |
| 710 | |
| 711 jsY_next(J); | |
| 712 jsY_unescape(J); | |
| 713 while (jsY_isidentifierpart(J->lexchar)) { | |
| 714 textpush(J, J->lexchar); | |
| 715 jsY_next(J); | |
| 716 jsY_unescape(J); | |
| 717 } | |
| 718 | |
| 719 textend(J); | |
| 720 | |
| 721 return jsY_findkeyword(J, J->lexbuf.text); | |
| 722 } | |
| 723 | |
| 724 if (J->lexchar >= 0x20 && J->lexchar <= 0x7E) | |
| 725 jsY_error(J, "unexpected character: '%c'", J->lexchar); | |
| 726 jsY_error(J, "unexpected character: \\u%04X", J->lexchar); | |
| 727 } | |
| 728 } | |
| 729 | |
| 730 void jsY_initlex(js_State *J, const char *filename, const char *source) | |
| 731 { | |
| 732 J->filename = filename; | |
| 733 J->source = source; | |
| 734 J->line = 1; | |
| 735 J->lasttoken = 0; | |
| 736 jsY_next(J); /* load first lookahead character */ | |
| 737 } | |
| 738 | |
| 739 int jsY_lex(js_State *J) | |
| 740 { | |
| 741 return J->lasttoken = jsY_lexx(J); | |
| 742 } | |
| 743 | |
| 744 static int lexjsonnumber(js_State *J) | |
| 745 { | |
| 746 const char *s = J->source - 1; | |
| 747 | |
| 748 if (J->lexchar == '-') | |
| 749 jsY_next(J); | |
| 750 | |
| 751 if (J->lexchar == '0') | |
| 752 jsY_next(J); | |
| 753 else if (J->lexchar >= '1' && J->lexchar <= '9') | |
| 754 while (isdigit(J->lexchar)) | |
| 755 jsY_next(J); | |
| 756 else | |
| 757 jsY_error(J, "unexpected non-digit"); | |
| 758 | |
| 759 if (jsY_accept(J, '.')) { | |
| 760 if (isdigit(J->lexchar)) | |
| 761 while (isdigit(J->lexchar)) | |
| 762 jsY_next(J); | |
| 763 else | |
| 764 jsY_error(J, "missing digits after decimal point"); | |
| 765 } | |
| 766 | |
| 767 if (jsY_accept(J, 'e') || jsY_accept(J, 'E')) { | |
| 768 if (J->lexchar == '-' || J->lexchar == '+') | |
| 769 jsY_next(J); | |
| 770 if (isdigit(J->lexchar)) | |
| 771 while (isdigit(J->lexchar)) | |
| 772 jsY_next(J); | |
| 773 else | |
| 774 jsY_error(J, "missing digits after exponent indicator"); | |
| 775 } | |
| 776 | |
| 777 J->number = js_strtod(s, NULL); | |
| 778 return TK_NUMBER; | |
| 779 } | |
| 780 | |
| 781 static int lexjsonescape(js_State *J) | |
| 782 { | |
| 783 int x = 0; | |
| 784 | |
| 785 /* already consumed '\' */ | |
| 786 | |
| 787 switch (J->lexchar) { | |
| 788 default: jsY_error(J, "invalid escape sequence"); | |
| 789 case 'u': | |
| 790 jsY_next(J); | |
| 791 if (!jsY_ishex(J->lexchar)) return 1; else { x |= jsY_tohex(J->lexchar) << 12; jsY_next(J); } | |
| 792 if (!jsY_ishex(J->lexchar)) return 1; else { x |= jsY_tohex(J->lexchar) << 8; jsY_next(J); } | |
| 793 if (!jsY_ishex(J->lexchar)) return 1; else { x |= jsY_tohex(J->lexchar) << 4; jsY_next(J); } | |
| 794 if (!jsY_ishex(J->lexchar)) return 1; else { x |= jsY_tohex(J->lexchar); jsY_next(J); } | |
| 795 textpush(J, x); | |
| 796 break; | |
| 797 case '"': textpush(J, '"'); jsY_next(J); break; | |
| 798 case '\\': textpush(J, '\\'); jsY_next(J); break; | |
| 799 case '/': textpush(J, '/'); jsY_next(J); break; | |
| 800 case 'b': textpush(J, '\b'); jsY_next(J); break; | |
| 801 case 'f': textpush(J, '\f'); jsY_next(J); break; | |
| 802 case 'n': textpush(J, '\n'); jsY_next(J); break; | |
| 803 case 'r': textpush(J, '\r'); jsY_next(J); break; | |
| 804 case 't': textpush(J, '\t'); jsY_next(J); break; | |
| 805 } | |
| 806 return 0; | |
| 807 } | |
| 808 | |
| 809 static int lexjsonstring(js_State *J) | |
| 810 { | |
| 811 const char *s; | |
| 812 | |
| 813 textinit(J); | |
| 814 | |
| 815 while (J->lexchar != '"') { | |
| 816 if (J->lexchar == EOF) | |
| 817 jsY_error(J, "unterminated string"); | |
| 818 else if (J->lexchar < 32) | |
| 819 jsY_error(J, "invalid control character in string"); | |
| 820 else if (jsY_accept(J, '\\')) | |
| 821 lexjsonescape(J); | |
| 822 else { | |
| 823 textpush(J, J->lexchar); | |
| 824 jsY_next(J); | |
| 825 } | |
| 826 } | |
| 827 jsY_expect(J, '"'); | |
| 828 | |
| 829 s = textend(J); | |
| 830 | |
| 831 J->text = js_intern(J, s); | |
| 832 return TK_STRING; | |
| 833 } | |
| 834 | |
| 835 int jsY_lexjson(js_State *J) | |
| 836 { | |
| 837 while (1) { | |
| 838 J->lexline = J->line; /* save location of beginning of token */ | |
| 839 | |
| 840 while (jsY_iswhite(J->lexchar) || J->lexchar == '\n') | |
| 841 jsY_next(J); | |
| 842 | |
| 843 if ((J->lexchar >= '0' && J->lexchar <= '9') || J->lexchar == '-') | |
| 844 return lexjsonnumber(J); | |
| 845 | |
| 846 switch (J->lexchar) { | |
| 847 case ',': jsY_next(J); return ','; | |
| 848 case ':': jsY_next(J); return ':'; | |
| 849 case '[': jsY_next(J); return '['; | |
| 850 case ']': jsY_next(J); return ']'; | |
| 851 case '{': jsY_next(J); return '{'; | |
| 852 case '}': jsY_next(J); return '}'; | |
| 853 | |
| 854 case '"': | |
| 855 jsY_next(J); | |
| 856 return lexjsonstring(J); | |
| 857 | |
| 858 case 'f': | |
| 859 jsY_next(J); jsY_expect(J, 'a'); jsY_expect(J, 'l'); jsY_expect(J, 's'); jsY_expect(J, 'e'); | |
| 860 return TK_FALSE; | |
| 861 | |
| 862 case 'n': | |
| 863 jsY_next(J); jsY_expect(J, 'u'); jsY_expect(J, 'l'); jsY_expect(J, 'l'); | |
| 864 return TK_NULL; | |
| 865 | |
| 866 case 't': | |
| 867 jsY_next(J); jsY_expect(J, 'r'); jsY_expect(J, 'u'); jsY_expect(J, 'e'); | |
| 868 return TK_TRUE; | |
| 869 | |
| 870 case EOF: | |
| 871 return 0; /* EOF */ | |
| 872 } | |
| 873 | |
| 874 if (J->lexchar >= 0x20 && J->lexchar <= 0x7E) | |
| 875 jsY_error(J, "unexpected character: '%c'", J->lexchar); | |
| 876 jsY_error(J, "unexpected character: \\u%04X", J->lexchar); | |
| 877 } | |
| 878 } |
