Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/source/html/css-parse.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 // Copyright (C) 2004-2025 Artifex Software, Inc. | |
| 2 // | |
| 3 // This file is part of MuPDF. | |
| 4 // | |
| 5 // MuPDF is free software: you can redistribute it and/or modify it under the | |
| 6 // terms of the GNU Affero General Public License as published by the Free | |
| 7 // Software Foundation, either version 3 of the License, or (at your option) | |
| 8 // any later version. | |
| 9 // | |
| 10 // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY | |
| 11 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | |
| 12 // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more | |
| 13 // details. | |
| 14 // | |
| 15 // You should have received a copy of the GNU Affero General Public License | |
| 16 // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html> | |
| 17 // | |
| 18 // Alternative licensing terms are available from the licensor. | |
| 19 // For commercial licensing, see <https://www.artifex.com/> or contact | |
| 20 // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, | |
| 21 // CA 94129, USA, for further information. | |
| 22 | |
| 23 #include "mupdf/fitz.h" | |
| 24 #include "html-imp.h" | |
| 25 | |
| 26 #include <string.h> | |
| 27 | |
| 28 #include "css-properties.h" | |
| 29 | |
| 30 struct lexbuf | |
| 31 { | |
| 32 fz_context *ctx; | |
| 33 fz_pool *pool; | |
| 34 const unsigned char *start; | |
| 35 const unsigned char *s; | |
| 36 const char *file; | |
| 37 int line; | |
| 38 int lookahead; | |
| 39 int c; | |
| 40 int string_len; | |
| 41 char string[1024]; | |
| 42 }; | |
| 43 | |
| 44 static fz_css_value *parse_expr(struct lexbuf *buf); | |
| 45 static fz_css_selector *parse_selector(struct lexbuf *buf); | |
| 46 | |
| 47 FZ_NORETURN static void fz_css_error(struct lexbuf *buf, const char *msg) | |
| 48 { | |
| 49 #define PRE_POST_SIZE 30 | |
| 50 unsigned char text[PRE_POST_SIZE * 2 + 4]; | |
| 51 unsigned char *d = text; | |
| 52 const unsigned char *s = buf->start; | |
| 53 int n; | |
| 54 | |
| 55 /* We want to make a helpful fragment for the error message. | |
| 56 * We want err_pos to be the point at which we just tripped | |
| 57 * the error. err_pos needs to be at least 1 byte behind | |
| 58 * our read pointer, as we've read that char. */ | |
| 59 const unsigned char *err_pos = buf->s; | |
| 60 n = 1; | |
| 61 | |
| 62 /* And if we're using lookahead, it's further behind. */ | |
| 63 if (buf->lookahead >= CSS_KEYWORD) | |
| 64 n += buf->string_len; | |
| 65 else if (buf->lookahead != EOF) | |
| 66 n += 1; | |
| 67 | |
| 68 /* But it can't be before the start of the buffer */ | |
| 69 n = fz_mini(n, err_pos - buf->start); | |
| 70 err_pos -= n; | |
| 71 | |
| 72 /* We're going to try to output: | |
| 73 * <section prior to the error> ">" <the char that tripped> "<" <section after the error> | |
| 74 */ | |
| 75 /* Is the section prior to the error too long? If so, truncate it with an ellipsis. */ | |
| 76 n = sizeof(text)-1; | |
| 77 if (err_pos - s > n-PRE_POST_SIZE - 3) | |
| 78 { | |
| 79 *d++ = '.'; | |
| 80 *d++ = '.'; | |
| 81 *d++ = '.'; | |
| 82 n -= 3; | |
| 83 s = err_pos - (n-PRE_POST_SIZE - 3); | |
| 84 } | |
| 85 | |
| 86 /* Copy the prefix (if there is one) */ | |
| 87 if (err_pos > s) | |
| 88 { | |
| 89 n = err_pos - s; | |
| 90 while (n) | |
| 91 { | |
| 92 unsigned char c = *s++; | |
| 93 *d++ = (c < 32 || c > 127) ? ' ' : c; | |
| 94 n--; | |
| 95 } | |
| 96 } | |
| 97 | |
| 98 /* Marker, char, end marker */ | |
| 99 *d++ = '>', n--; | |
| 100 if (*err_pos) | |
| 101 *d++ = *err_pos++, n--; | |
| 102 *d++ = '<', n--; | |
| 103 | |
| 104 /* Postfix */ | |
| 105 n = (int)strlen((const char *)err_pos); | |
| 106 if (n <= PRE_POST_SIZE) | |
| 107 { | |
| 108 while (n > 0) | |
| 109 { | |
| 110 unsigned char c = *err_pos++; | |
| 111 *d++ = (c < 32 || c > 127) ? ' ' : c; | |
| 112 n--; | |
| 113 } | |
| 114 } | |
| 115 else | |
| 116 { | |
| 117 for (n = PRE_POST_SIZE-3; n > 0; n--) | |
| 118 { | |
| 119 unsigned char c = *err_pos++; | |
| 120 *d++ = (c < 32 || c > 127) ? ' ' : c; | |
| 121 } | |
| 122 | |
| 123 *d++ = '.'; | |
| 124 *d++ = '.'; | |
| 125 *d++ = '.'; | |
| 126 } | |
| 127 *d = 0; | |
| 128 | |
| 129 fz_throw(buf->ctx, FZ_ERROR_SYNTAX, "css syntax error: %s (%s:%d) (%s)", msg, buf->file, buf->line, text); | |
| 130 } | |
| 131 | |
| 132 fz_css *fz_new_css(fz_context *ctx) | |
| 133 { | |
| 134 fz_pool *pool = fz_new_pool(ctx); | |
| 135 fz_css *css = NULL; | |
| 136 | |
| 137 fz_try(ctx) | |
| 138 { | |
| 139 css = fz_pool_alloc(ctx, pool, sizeof *css); | |
| 140 css->pool = pool; | |
| 141 css->rule = NULL; | |
| 142 } | |
| 143 fz_catch(ctx) | |
| 144 { | |
| 145 fz_drop_pool(ctx, pool); | |
| 146 fz_rethrow(ctx); | |
| 147 } | |
| 148 | |
| 149 return css; | |
| 150 } | |
| 151 | |
| 152 void fz_drop_css(fz_context *ctx, fz_css *css) | |
| 153 { | |
| 154 if (css) | |
| 155 fz_drop_pool(ctx, css->pool); | |
| 156 } | |
| 157 | |
| 158 static fz_css_rule *fz_new_css_rule(fz_context *ctx, fz_pool *pool, fz_css_selector *selector, fz_css_property *declaration) | |
| 159 { | |
| 160 fz_css_rule *rule = fz_pool_alloc(ctx, pool, sizeof *rule); | |
| 161 rule->selector = selector; | |
| 162 rule->declaration = declaration; | |
| 163 rule->next = NULL; | |
| 164 return rule; | |
| 165 } | |
| 166 | |
| 167 static fz_css_selector *fz_new_css_selector(fz_context *ctx, fz_pool *pool, const char *name) | |
| 168 { | |
| 169 fz_css_selector *sel = fz_pool_alloc(ctx, pool, sizeof *sel); | |
| 170 sel->name = name ? fz_pool_strdup(ctx, pool, name) : NULL; | |
| 171 sel->combine = 0; | |
| 172 sel->cond = NULL; | |
| 173 sel->left = NULL; | |
| 174 sel->right = NULL; | |
| 175 sel->next = NULL; | |
| 176 return sel; | |
| 177 } | |
| 178 | |
| 179 static fz_css_condition *fz_new_css_condition(fz_context *ctx, fz_pool *pool, int type, const char *key, const char *val) | |
| 180 { | |
| 181 fz_css_condition *cond = fz_pool_alloc(ctx, pool, sizeof *cond); | |
| 182 cond->type = type; | |
| 183 cond->key = key ? fz_pool_strdup(ctx, pool, key) : NULL; | |
| 184 cond->val = val ? fz_pool_strdup(ctx, pool, val) : NULL; | |
| 185 cond->next = NULL; | |
| 186 return cond; | |
| 187 } | |
| 188 | |
| 189 static fz_css_property *fz_new_css_property(fz_context *ctx, fz_pool *pool, const char *name, fz_css_value *value, int spec) | |
| 190 { | |
| 191 struct css_property_info *info = css_property_lookup(name, strlen(name)); | |
| 192 if (info) | |
| 193 { | |
| 194 fz_css_property *prop = fz_pool_alloc(ctx, pool, sizeof *prop); | |
| 195 prop->name = info->key; | |
| 196 prop->value = value; | |
| 197 prop->spec = spec; | |
| 198 prop->important = 0; | |
| 199 prop->next = NULL; | |
| 200 return prop; | |
| 201 } | |
| 202 return NULL; | |
| 203 } | |
| 204 | |
| 205 static fz_css_value *fz_new_css_value_x(fz_context *ctx, fz_pool *pool, int type) | |
| 206 { | |
| 207 fz_css_value *val = fz_pool_alloc(ctx, pool, sizeof *val); | |
| 208 val->type = type; | |
| 209 val->data = NULL; | |
| 210 val->args = NULL; | |
| 211 val->next = NULL; | |
| 212 return val; | |
| 213 } | |
| 214 | |
| 215 static fz_css_value *fz_new_css_value(fz_context *ctx, fz_pool *pool, int type, const char *data) | |
| 216 { | |
| 217 fz_css_value *val = fz_pool_alloc(ctx, pool, sizeof *val); | |
| 218 val->type = type; | |
| 219 val->data = fz_pool_strdup(ctx, pool, data); | |
| 220 val->args = NULL; | |
| 221 val->next = NULL; | |
| 222 return val; | |
| 223 } | |
| 224 | |
| 225 static void css_lex_next(struct lexbuf *buf) | |
| 226 { | |
| 227 if (buf->c == 0) | |
| 228 return; | |
| 229 buf->s += fz_chartorune(&buf->c, (const char *)buf->s); | |
| 230 if (buf->c == '\n') | |
| 231 ++buf->line; | |
| 232 buf->lookahead = EOF; | |
| 233 } | |
| 234 | |
| 235 static void css_lex_init(fz_context *ctx, struct lexbuf *buf, fz_pool *pool, const char *s, const char *file) | |
| 236 { | |
| 237 buf->ctx = ctx; | |
| 238 buf->pool = pool; | |
| 239 buf->s = (const unsigned char *)s; | |
| 240 buf->lookahead = EOF; | |
| 241 buf->start = buf->s; | |
| 242 buf->c = -1; | |
| 243 buf->file = file; | |
| 244 buf->line = 1; | |
| 245 css_lex_next(buf); | |
| 246 | |
| 247 buf->string_len = 0; | |
| 248 } | |
| 249 | |
| 250 static inline int iswhite(int c) | |
| 251 { | |
| 252 return c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '\f'; | |
| 253 } | |
| 254 | |
| 255 static int isnmstart(int c) | |
| 256 { | |
| 257 return c == '\\' || c == '_' || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || | |
| 258 (c >= 128 && c <= UCS_MAX); | |
| 259 } | |
| 260 | |
| 261 static int isnmchar(int c) | |
| 262 { | |
| 263 return c == '\\' || c == '_' || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || | |
| 264 (c >= '0' && c <= '9') || c == '-' || (c >= 128 && c <= UCS_MAX); | |
| 265 } | |
| 266 | |
| 267 static void css_push_char(struct lexbuf *buf, int c) | |
| 268 { | |
| 269 char out[4]; | |
| 270 int n = fz_runetochar(out, c); | |
| 271 if (buf->string_len + n >= (int)nelem(buf->string)) | |
| 272 fz_css_error(buf, "token too long"); | |
| 273 memcpy(buf->string + buf->string_len, out, n); | |
| 274 buf->string_len += n; | |
| 275 } | |
| 276 | |
| 277 static void css_push_zero(struct lexbuf *buf) | |
| 278 { | |
| 279 if (buf->string_len + 1 >= (int)nelem(buf->string)) | |
| 280 fz_css_error(buf, "token too long"); | |
| 281 buf->string[buf->string_len] = 0; | |
| 282 buf->string_len += 1; | |
| 283 } | |
| 284 | |
| 285 static int css_lex_accept(struct lexbuf *buf, int t) | |
| 286 { | |
| 287 if (buf->c == t) | |
| 288 { | |
| 289 css_lex_next(buf); | |
| 290 return 1; | |
| 291 } | |
| 292 return 0; | |
| 293 } | |
| 294 | |
| 295 static void css_lex_expect(struct lexbuf *buf, int t) | |
| 296 { | |
| 297 if (!css_lex_accept(buf, t)) | |
| 298 fz_css_error(buf, "unexpected character"); | |
| 299 } | |
| 300 | |
| 301 static int css_lex_number(struct lexbuf *buf) | |
| 302 { | |
| 303 while (buf->c >= '0' && buf->c <= '9') | |
| 304 { | |
| 305 css_push_char(buf, buf->c); | |
| 306 css_lex_next(buf); | |
| 307 } | |
| 308 | |
| 309 if (css_lex_accept(buf, '.')) | |
| 310 { | |
| 311 css_push_char(buf, '.'); | |
| 312 while (buf->c >= '0' && buf->c <= '9') | |
| 313 { | |
| 314 css_push_char(buf, buf->c); | |
| 315 css_lex_next(buf); | |
| 316 } | |
| 317 } | |
| 318 | |
| 319 if (css_lex_accept(buf, '%')) | |
| 320 { | |
| 321 css_push_char(buf, '%'); | |
| 322 css_push_zero(buf); | |
| 323 return CSS_PERCENT; | |
| 324 } | |
| 325 | |
| 326 if (isnmstart(buf->c)) | |
| 327 { | |
| 328 css_push_char(buf, buf->c); | |
| 329 css_lex_next(buf); | |
| 330 while (isnmchar(buf->c)) | |
| 331 { | |
| 332 css_push_char(buf, buf->c); | |
| 333 css_lex_next(buf); | |
| 334 } | |
| 335 css_push_zero(buf); | |
| 336 return CSS_LENGTH; | |
| 337 } | |
| 338 | |
| 339 css_push_zero(buf); | |
| 340 return CSS_NUMBER; | |
| 341 } | |
| 342 | |
| 343 static int css_lex_keyword(struct lexbuf *buf) | |
| 344 { | |
| 345 while (isnmchar(buf->c)) | |
| 346 { | |
| 347 css_push_char(buf, buf->c); | |
| 348 css_lex_next(buf); | |
| 349 } | |
| 350 css_push_zero(buf); | |
| 351 return CSS_KEYWORD; | |
| 352 } | |
| 353 | |
| 354 static int css_lex_hash(struct lexbuf *buf) | |
| 355 { | |
| 356 while (isnmchar(buf->c)) | |
| 357 { | |
| 358 css_push_char(buf, buf->c); | |
| 359 css_lex_next(buf); | |
| 360 } | |
| 361 css_push_zero(buf); | |
| 362 return CSS_HASH; | |
| 363 } | |
| 364 | |
| 365 static int css_lex_string(struct lexbuf *buf, int q) | |
| 366 { | |
| 367 while (buf->c && buf->c != q) | |
| 368 { | |
| 369 if (css_lex_accept(buf, '\\')) | |
| 370 { | |
| 371 if (css_lex_accept(buf, 'n')) | |
| 372 css_push_char(buf, '\n'); | |
| 373 else if (css_lex_accept(buf, 'r')) | |
| 374 css_push_char(buf, '\r'); | |
| 375 else if (css_lex_accept(buf, 'f')) | |
| 376 css_push_char(buf, '\f'); | |
| 377 else if (css_lex_accept(buf, '\f')) | |
| 378 /* line continuation */ ; | |
| 379 else if (css_lex_accept(buf, '\n')) | |
| 380 /* line continuation */ ; | |
| 381 else if (css_lex_accept(buf, '\r')) | |
| 382 css_lex_accept(buf, '\n'); | |
| 383 else | |
| 384 { | |
| 385 css_push_char(buf, buf->c); | |
| 386 css_lex_next(buf); | |
| 387 } | |
| 388 } | |
| 389 else | |
| 390 { | |
| 391 css_push_char(buf, buf->c); | |
| 392 css_lex_next(buf); | |
| 393 } | |
| 394 } | |
| 395 css_lex_expect(buf, q); | |
| 396 css_push_zero(buf); | |
| 397 return CSS_STRING; | |
| 398 } | |
| 399 | |
| 400 static void css_lex_uri(struct lexbuf *buf) | |
| 401 { | |
| 402 while (buf->c && buf->c != ')' && !iswhite(buf->c)) | |
| 403 { | |
| 404 if (css_lex_accept(buf, '\\')) | |
| 405 { | |
| 406 if (css_lex_accept(buf, 'n')) | |
| 407 css_push_char(buf, '\n'); | |
| 408 else if (css_lex_accept(buf, 'r')) | |
| 409 css_push_char(buf, '\r'); | |
| 410 else if (css_lex_accept(buf, 'f')) | |
| 411 css_push_char(buf, '\f'); | |
| 412 else | |
| 413 { | |
| 414 css_push_char(buf, buf->c); | |
| 415 css_lex_next(buf); | |
| 416 } | |
| 417 } | |
| 418 else if (buf->c == '!' || buf->c == '#' || buf->c == '$' || buf->c == '%' || buf->c == '&' || | |
| 419 (buf->c >= '*' && buf->c <= '[') || | |
| 420 (buf->c >= ']' && buf->c <= '~') || | |
| 421 buf->c > 159) | |
| 422 { | |
| 423 css_push_char(buf, buf->c); | |
| 424 css_lex_next(buf); | |
| 425 } | |
| 426 else | |
| 427 fz_css_error(buf, "unexpected character in url"); | |
| 428 } | |
| 429 css_push_zero(buf); | |
| 430 } | |
| 431 | |
| 432 static int css_lex(struct lexbuf *buf) | |
| 433 { | |
| 434 int t; | |
| 435 | |
| 436 // TODO: keyword escape sequences | |
| 437 | |
| 438 buf->string_len = 0; | |
| 439 | |
| 440 restart: | |
| 441 if (buf->c == 0) | |
| 442 return EOF; | |
| 443 | |
| 444 if (iswhite(buf->c)) | |
| 445 { | |
| 446 while (iswhite(buf->c)) | |
| 447 css_lex_next(buf); | |
| 448 return ' '; | |
| 449 } | |
| 450 | |
| 451 if (css_lex_accept(buf, '/')) | |
| 452 { | |
| 453 if (css_lex_accept(buf, '*')) | |
| 454 { | |
| 455 while (buf->c) | |
| 456 { | |
| 457 if (css_lex_accept(buf, '*')) | |
| 458 { | |
| 459 while (buf->c == '*') | |
| 460 css_lex_next(buf); | |
| 461 if (css_lex_accept(buf, '/')) | |
| 462 goto restart; | |
| 463 } | |
| 464 css_lex_next(buf); | |
| 465 } | |
| 466 fz_css_error(buf, "unterminated comment"); | |
| 467 } | |
| 468 return '/'; | |
| 469 } | |
| 470 | |
| 471 if (css_lex_accept(buf, '<')) | |
| 472 { | |
| 473 if (css_lex_accept(buf, '!')) | |
| 474 { | |
| 475 css_lex_expect(buf, '-'); | |
| 476 css_lex_expect(buf, '-'); | |
| 477 goto restart; /* ignore CDO */ | |
| 478 } | |
| 479 return '<'; | |
| 480 } | |
| 481 | |
| 482 if (css_lex_accept(buf, '-')) | |
| 483 { | |
| 484 if (css_lex_accept(buf, '-')) | |
| 485 { | |
| 486 if (css_lex_accept(buf, '>')) | |
| 487 goto restart; /* ignore CDC */ | |
| 488 } | |
| 489 if (isnmstart(buf->c)) | |
| 490 { | |
| 491 css_push_char(buf, '-'); | |
| 492 return css_lex_keyword(buf); | |
| 493 } | |
| 494 return '-'; | |
| 495 } | |
| 496 | |
| 497 if (css_lex_accept(buf, '.')) | |
| 498 { | |
| 499 if (buf->c >= '0' && buf->c <= '9') | |
| 500 { | |
| 501 css_push_char(buf, '.'); | |
| 502 return css_lex_number(buf); | |
| 503 } | |
| 504 return '.'; | |
| 505 } | |
| 506 | |
| 507 if (css_lex_accept(buf, '#')) | |
| 508 { | |
| 509 if (isnmchar(buf->c)) | |
| 510 return css_lex_hash(buf); | |
| 511 return '#'; | |
| 512 } | |
| 513 | |
| 514 if (css_lex_accept(buf, '"')) | |
| 515 return css_lex_string(buf, '"'); | |
| 516 if (css_lex_accept(buf, '\'')) | |
| 517 return css_lex_string(buf, '\''); | |
| 518 | |
| 519 if (buf->c >= '0' && buf->c <= '9') | |
| 520 return css_lex_number(buf); | |
| 521 | |
| 522 if (css_lex_accept(buf, 'u')) | |
| 523 { | |
| 524 if (css_lex_accept(buf, 'r')) | |
| 525 { | |
| 526 if (css_lex_accept(buf, 'l')) | |
| 527 { | |
| 528 if (css_lex_accept(buf, '(')) | |
| 529 { | |
| 530 while (iswhite(buf->c)) | |
| 531 css_lex_next(buf); | |
| 532 if (css_lex_accept(buf, '"')) | |
| 533 css_lex_string(buf, '"'); | |
| 534 else if (css_lex_accept(buf, '\'')) | |
| 535 css_lex_string(buf, '\''); | |
| 536 else | |
| 537 css_lex_uri(buf); | |
| 538 while (iswhite(buf->c)) | |
| 539 css_lex_next(buf); | |
| 540 css_lex_expect(buf, ')'); | |
| 541 return CSS_URI; | |
| 542 } | |
| 543 css_push_char(buf, 'u'); | |
| 544 css_push_char(buf, 'r'); | |
| 545 css_push_char(buf, 'l'); | |
| 546 return css_lex_keyword(buf); | |
| 547 } | |
| 548 css_push_char(buf, 'u'); | |
| 549 css_push_char(buf, 'r'); | |
| 550 return css_lex_keyword(buf); | |
| 551 } | |
| 552 css_push_char(buf, 'u'); | |
| 553 return css_lex_keyword(buf); | |
| 554 } | |
| 555 | |
| 556 if (isnmstart(buf->c)) | |
| 557 { | |
| 558 css_push_char(buf, buf->c); | |
| 559 css_lex_next(buf); | |
| 560 return css_lex_keyword(buf); | |
| 561 } | |
| 562 | |
| 563 t = buf->c; | |
| 564 css_lex_next(buf); | |
| 565 return t; | |
| 566 } | |
| 567 | |
| 568 static void next(struct lexbuf *buf) | |
| 569 { | |
| 570 buf->lookahead = css_lex(buf); | |
| 571 } | |
| 572 | |
| 573 static int accept(struct lexbuf *buf, int t) | |
| 574 { | |
| 575 if (buf->lookahead == t) | |
| 576 { | |
| 577 next(buf); | |
| 578 return 1; | |
| 579 } | |
| 580 return 0; | |
| 581 } | |
| 582 | |
| 583 static void expect(struct lexbuf *buf, int t) | |
| 584 { | |
| 585 if (accept(buf, t)) | |
| 586 return; | |
| 587 fz_css_error(buf, "unexpected token"); | |
| 588 } | |
| 589 | |
| 590 static void white(struct lexbuf *buf) | |
| 591 { | |
| 592 while (buf->lookahead == ' ') | |
| 593 next(buf); | |
| 594 } | |
| 595 | |
| 596 static int iscond(int t) | |
| 597 { | |
| 598 return t == ':' || t == '.' || t == '[' || t == CSS_HASH; | |
| 599 } | |
| 600 | |
| 601 static fz_css_value *parse_term(struct lexbuf *buf) | |
| 602 { | |
| 603 fz_css_value *v; | |
| 604 | |
| 605 if (buf->lookahead == '+' || buf->lookahead == '-') | |
| 606 { | |
| 607 float sign = buf->lookahead == '-' ? -1 : 1; | |
| 608 next(buf); | |
| 609 if (buf->lookahead != CSS_NUMBER && buf->lookahead != CSS_LENGTH && buf->lookahead != CSS_PERCENT) | |
| 610 fz_css_error(buf, "expected number"); | |
| 611 if (sign < 0) | |
| 612 { | |
| 613 v = fz_new_css_value_x(buf->ctx, buf->pool, buf->lookahead); | |
| 614 v->data = fz_pool_alloc(buf->ctx, buf->pool, strlen(buf->string) + 2); | |
| 615 v->data[0] = '-'; | |
| 616 strcpy(v->data + 1, buf->string); | |
| 617 } | |
| 618 else | |
| 619 { | |
| 620 v = fz_new_css_value(buf->ctx, buf->pool, buf->lookahead, buf->string); | |
| 621 } | |
| 622 next(buf); | |
| 623 white(buf); | |
| 624 return v; | |
| 625 } | |
| 626 | |
| 627 if (buf->lookahead == CSS_KEYWORD) | |
| 628 { | |
| 629 v = fz_new_css_value(buf->ctx, buf->pool, CSS_KEYWORD, buf->string); | |
| 630 next(buf); | |
| 631 if (accept(buf, '(')) | |
| 632 { | |
| 633 white(buf); | |
| 634 v->type = '('; | |
| 635 v->args = parse_expr(buf); | |
| 636 expect(buf, ')'); | |
| 637 } | |
| 638 white(buf); | |
| 639 return v; | |
| 640 } | |
| 641 | |
| 642 switch (buf->lookahead) | |
| 643 { | |
| 644 case CSS_HASH: | |
| 645 case CSS_STRING: | |
| 646 case CSS_URI: | |
| 647 case CSS_NUMBER: | |
| 648 case CSS_LENGTH: | |
| 649 case CSS_PERCENT: | |
| 650 v = fz_new_css_value(buf->ctx, buf->pool, buf->lookahead, buf->string); | |
| 651 next(buf); | |
| 652 white(buf); | |
| 653 return v; | |
| 654 } | |
| 655 | |
| 656 fz_css_error(buf, "expected value"); | |
| 657 } | |
| 658 | |
| 659 static fz_css_value *parse_expr(struct lexbuf *buf) | |
| 660 { | |
| 661 fz_css_value *head, *tail; | |
| 662 | |
| 663 head = tail = parse_term(buf); | |
| 664 | |
| 665 while (buf->lookahead != '}' && buf->lookahead != ';' && buf->lookahead != '!' && | |
| 666 buf->lookahead != ')' && buf->lookahead != EOF) | |
| 667 { | |
| 668 if (accept(buf, ',')) | |
| 669 { | |
| 670 white(buf); | |
| 671 if (buf->lookahead != ';') | |
| 672 { | |
| 673 tail = tail->next = fz_new_css_value(buf->ctx, buf->pool, ',', ","); | |
| 674 tail = tail->next = parse_term(buf); | |
| 675 } | |
| 676 } | |
| 677 else if (accept(buf, '/')) | |
| 678 { | |
| 679 white(buf); | |
| 680 tail = tail->next = fz_new_css_value(buf->ctx, buf->pool, '/', "/"); | |
| 681 tail = tail->next = parse_term(buf); | |
| 682 } | |
| 683 else | |
| 684 { | |
| 685 tail = tail->next = parse_term(buf); | |
| 686 } | |
| 687 } | |
| 688 | |
| 689 return head; | |
| 690 } | |
| 691 | |
| 692 static fz_css_property *parse_declaration(struct lexbuf *buf) | |
| 693 { | |
| 694 fz_css_property *p; | |
| 695 | |
| 696 if (buf->lookahead != CSS_KEYWORD) | |
| 697 fz_css_error(buf, "expected keyword in property"); | |
| 698 p = fz_new_css_property(buf->ctx, buf->pool, buf->string, NULL, 0); | |
| 699 next(buf); | |
| 700 | |
| 701 white(buf); | |
| 702 expect(buf, ':'); | |
| 703 white(buf); | |
| 704 | |
| 705 if (p) | |
| 706 p->value = parse_expr(buf); | |
| 707 else | |
| 708 (void) parse_expr(buf); | |
| 709 | |
| 710 /* !important */ | |
| 711 if (accept(buf, '!')) | |
| 712 { | |
| 713 white(buf); | |
| 714 if (buf->lookahead != CSS_KEYWORD || strcmp(buf->string, "important")) | |
| 715 fz_css_error(buf, "expected keyword 'important' after '!'"); | |
| 716 if (p) | |
| 717 p->important = 1; | |
| 718 next(buf); | |
| 719 white(buf); | |
| 720 } | |
| 721 | |
| 722 return p; | |
| 723 } | |
| 724 | |
| 725 static fz_css_property *parse_declaration_list(struct lexbuf *buf) | |
| 726 { | |
| 727 fz_css_property *head, *tail = NULL, *p; | |
| 728 | |
| 729 white(buf); | |
| 730 | |
| 731 if (buf->lookahead == '}' || buf->lookahead == EOF) | |
| 732 return NULL; | |
| 733 | |
| 734 p = parse_declaration(buf); | |
| 735 if (p) | |
| 736 tail = p; | |
| 737 head = tail; | |
| 738 | |
| 739 while (accept(buf, ';')) | |
| 740 { | |
| 741 white(buf); | |
| 742 | |
| 743 if (buf->lookahead != '}' && buf->lookahead != ';' && buf->lookahead != EOF) | |
| 744 { | |
| 745 p = parse_declaration(buf); | |
| 746 if (p) | |
| 747 { | |
| 748 if (!head) | |
| 749 head = tail = p; | |
| 750 else | |
| 751 tail = tail->next = p; | |
| 752 } | |
| 753 } | |
| 754 } | |
| 755 | |
| 756 return head; | |
| 757 } | |
| 758 | |
| 759 static char *parse_attrib_value(struct lexbuf *buf) | |
| 760 { | |
| 761 char *s; | |
| 762 | |
| 763 if (buf->lookahead == CSS_KEYWORD || buf->lookahead == CSS_STRING) | |
| 764 { | |
| 765 s = fz_pool_strdup(buf->ctx, buf->pool, buf->string); | |
| 766 next(buf); | |
| 767 white(buf); | |
| 768 return s; | |
| 769 } | |
| 770 | |
| 771 fz_css_error(buf, "expected attribute value"); | |
| 772 } | |
| 773 | |
| 774 static fz_css_condition *parse_condition(struct lexbuf *buf) | |
| 775 { | |
| 776 fz_css_condition *c; | |
| 777 | |
| 778 if (accept(buf, ':')) | |
| 779 { | |
| 780 (void)accept(buf, ':'); /* swallow css3 :: syntax and pretend it's a normal pseudo-class */ | |
| 781 if (buf->lookahead != CSS_KEYWORD) | |
| 782 fz_css_error(buf, "expected keyword after ':'"); | |
| 783 c = fz_new_css_condition(buf->ctx, buf->pool, ':', "pseudo", buf->string); | |
| 784 next(buf); | |
| 785 if (accept(buf, '(')) | |
| 786 { | |
| 787 white(buf); | |
| 788 if (accept(buf, CSS_KEYWORD)) | |
| 789 white(buf); | |
| 790 expect(buf, ')'); | |
| 791 } | |
| 792 return c; | |
| 793 } | |
| 794 | |
| 795 if (accept(buf, '.')) | |
| 796 { | |
| 797 if (buf->lookahead != CSS_KEYWORD) | |
| 798 fz_css_error(buf, "expected keyword after '.'"); | |
| 799 c = fz_new_css_condition(buf->ctx, buf->pool, '.', "class", buf->string); | |
| 800 next(buf); | |
| 801 return c; | |
| 802 } | |
| 803 | |
| 804 if (accept(buf, '[')) | |
| 805 { | |
| 806 white(buf); | |
| 807 | |
| 808 if (buf->lookahead != CSS_KEYWORD) | |
| 809 fz_css_error(buf, "expected keyword after '['"); | |
| 810 c = fz_new_css_condition(buf->ctx, buf->pool, '[', buf->string, NULL); | |
| 811 next(buf); | |
| 812 | |
| 813 white(buf); | |
| 814 | |
| 815 if (accept(buf, '=')) | |
| 816 { | |
| 817 c->type = '='; | |
| 818 c->val = parse_attrib_value(buf); | |
| 819 } | |
| 820 else if (accept(buf, '|')) | |
| 821 { | |
| 822 expect(buf, '='); | |
| 823 c->type = '|'; | |
| 824 c->val = parse_attrib_value(buf); | |
| 825 } | |
| 826 else if (accept(buf, '~')) | |
| 827 { | |
| 828 expect(buf, '='); | |
| 829 c->type = '~'; | |
| 830 c->val = parse_attrib_value(buf); | |
| 831 } | |
| 832 | |
| 833 expect(buf, ']'); | |
| 834 | |
| 835 return c; | |
| 836 } | |
| 837 | |
| 838 if (buf->lookahead == CSS_HASH) | |
| 839 { | |
| 840 c = fz_new_css_condition(buf->ctx, buf->pool, '#', "id", buf->string); | |
| 841 next(buf); | |
| 842 return c; | |
| 843 } | |
| 844 | |
| 845 fz_css_error(buf, "expected condition"); | |
| 846 } | |
| 847 | |
| 848 static fz_css_condition *parse_condition_list(struct lexbuf *buf) | |
| 849 { | |
| 850 fz_css_condition *head, *tail; | |
| 851 | |
| 852 head = tail = parse_condition(buf); | |
| 853 while (iscond(buf->lookahead)) | |
| 854 { | |
| 855 tail = tail->next = parse_condition(buf); | |
| 856 } | |
| 857 return head; | |
| 858 } | |
| 859 | |
| 860 static fz_css_selector *parse_simple_selector(struct lexbuf *buf) | |
| 861 { | |
| 862 fz_css_selector *s; | |
| 863 | |
| 864 if (accept(buf, '*')) | |
| 865 { | |
| 866 s = fz_new_css_selector(buf->ctx, buf->pool, NULL); | |
| 867 if (iscond(buf->lookahead)) | |
| 868 s->cond = parse_condition_list(buf); | |
| 869 return s; | |
| 870 } | |
| 871 else if (buf->lookahead == CSS_KEYWORD) | |
| 872 { | |
| 873 s = fz_new_css_selector(buf->ctx, buf->pool, buf->string); | |
| 874 next(buf); | |
| 875 if (iscond(buf->lookahead)) | |
| 876 s->cond = parse_condition_list(buf); | |
| 877 return s; | |
| 878 } | |
| 879 else if (iscond(buf->lookahead)) | |
| 880 { | |
| 881 s = fz_new_css_selector(buf->ctx, buf->pool, NULL); | |
| 882 s->cond = parse_condition_list(buf); | |
| 883 return s; | |
| 884 } | |
| 885 | |
| 886 fz_css_error(buf, "expected selector"); | |
| 887 } | |
| 888 | |
| 889 static fz_css_selector *parse_combinator(struct lexbuf *buf, int c, fz_css_selector *a) | |
| 890 { | |
| 891 fz_css_selector *sel, *b; | |
| 892 white(buf); | |
| 893 b = parse_simple_selector(buf); | |
| 894 sel = fz_new_css_selector(buf->ctx, buf->pool, NULL); | |
| 895 sel->combine = c; | |
| 896 sel->left = a; | |
| 897 sel->right = b; | |
| 898 return sel; | |
| 899 } | |
| 900 | |
| 901 static fz_css_selector *parse_selector(struct lexbuf *buf) | |
| 902 { | |
| 903 fz_css_selector *sel = parse_simple_selector(buf); | |
| 904 for (;;) | |
| 905 { | |
| 906 if (accept(buf, ' ')) | |
| 907 { | |
| 908 white(buf); | |
| 909 if (accept(buf, '+')) | |
| 910 sel = parse_combinator(buf, '+', sel); | |
| 911 else if (accept(buf, '>')) | |
| 912 sel = parse_combinator(buf, '>', sel); | |
| 913 else if (buf->lookahead != ',' && buf->lookahead != '{' && buf->lookahead != EOF) | |
| 914 sel = parse_combinator(buf, ' ', sel); | |
| 915 else | |
| 916 break; | |
| 917 } | |
| 918 else if (accept(buf, '+')) | |
| 919 sel = parse_combinator(buf, '+', sel); | |
| 920 else if (accept(buf, '>')) | |
| 921 sel = parse_combinator(buf, '>', sel); | |
| 922 else | |
| 923 break; | |
| 924 } | |
| 925 return sel; | |
| 926 } | |
| 927 | |
| 928 static fz_css_selector *parse_selector_list(struct lexbuf *buf) | |
| 929 { | |
| 930 fz_css_selector *head, *tail; | |
| 931 | |
| 932 head = tail = parse_selector(buf); | |
| 933 while (accept(buf, ',')) | |
| 934 { | |
| 935 white(buf); | |
| 936 tail = tail->next = parse_selector(buf); | |
| 937 } | |
| 938 return head; | |
| 939 } | |
| 940 | |
| 941 static fz_css_rule *parse_ruleset(struct lexbuf *buf) | |
| 942 { | |
| 943 fz_css_selector *s = NULL; | |
| 944 fz_css_property *p = NULL; | |
| 945 | |
| 946 fz_try(buf->ctx) | |
| 947 { | |
| 948 s = parse_selector_list(buf); | |
| 949 expect(buf, '{'); | |
| 950 p = parse_declaration_list(buf); | |
| 951 expect(buf, '}'); | |
| 952 white(buf); | |
| 953 } | |
| 954 fz_catch(buf->ctx) | |
| 955 { | |
| 956 fz_rethrow_unless(buf->ctx, FZ_ERROR_SYNTAX); | |
| 957 fz_report_error(buf->ctx); | |
| 958 | |
| 959 while (buf->lookahead != EOF) | |
| 960 { | |
| 961 if (accept(buf, '}')) | |
| 962 { | |
| 963 white(buf); | |
| 964 break; | |
| 965 } | |
| 966 next(buf); | |
| 967 } | |
| 968 return NULL; | |
| 969 } | |
| 970 | |
| 971 return fz_new_css_rule(buf->ctx, buf->pool, s, p); | |
| 972 } | |
| 973 | |
| 974 static fz_css_rule *parse_at_page(struct lexbuf *buf) | |
| 975 { | |
| 976 fz_css_selector *s = NULL; | |
| 977 fz_css_property *p = NULL; | |
| 978 | |
| 979 white(buf); | |
| 980 if (accept(buf, ':')) | |
| 981 { | |
| 982 expect(buf, CSS_KEYWORD); | |
| 983 white(buf); | |
| 984 } | |
| 985 expect(buf, '{'); | |
| 986 p = parse_declaration_list(buf); | |
| 987 expect(buf, '}'); | |
| 988 white(buf); | |
| 989 | |
| 990 s = fz_new_css_selector(buf->ctx, buf->pool, "@page"); | |
| 991 return fz_new_css_rule(buf->ctx, buf->pool, s, p); | |
| 992 } | |
| 993 | |
| 994 static fz_css_rule *parse_at_font_face(struct lexbuf *buf) | |
| 995 { | |
| 996 fz_css_selector *s = NULL; | |
| 997 fz_css_property *p = NULL; | |
| 998 | |
| 999 white(buf); | |
| 1000 expect(buf, '{'); | |
| 1001 p = parse_declaration_list(buf); | |
| 1002 expect(buf, '}'); | |
| 1003 white(buf); | |
| 1004 | |
| 1005 s = fz_new_css_selector(buf->ctx, buf->pool, "@font-face"); | |
| 1006 return fz_new_css_rule(buf->ctx, buf->pool, s, p); | |
| 1007 } | |
| 1008 | |
| 1009 static void parse_at_rule(struct lexbuf *buf) | |
| 1010 { | |
| 1011 expect(buf, CSS_KEYWORD); | |
| 1012 | |
| 1013 /* skip until '{' or ';' */ | |
| 1014 while (buf->lookahead != EOF) | |
| 1015 { | |
| 1016 if (accept(buf, ';')) | |
| 1017 { | |
| 1018 white(buf); | |
| 1019 return; | |
| 1020 } | |
| 1021 if (accept(buf, '{')) | |
| 1022 { | |
| 1023 int depth = 1; | |
| 1024 while (buf->lookahead != EOF && depth > 0) | |
| 1025 { | |
| 1026 if (accept(buf, '{')) | |
| 1027 ++depth; | |
| 1028 else if (accept(buf, '}')) | |
| 1029 --depth; | |
| 1030 else | |
| 1031 next(buf); | |
| 1032 } | |
| 1033 white(buf); | |
| 1034 return; | |
| 1035 } | |
| 1036 next(buf); | |
| 1037 } | |
| 1038 } | |
| 1039 | |
| 1040 static fz_css_rule *parse_stylesheet(struct lexbuf *buf, fz_css_rule *chain) | |
| 1041 { | |
| 1042 fz_css_rule *rule, **nextp, *tail; | |
| 1043 | |
| 1044 tail = chain; | |
| 1045 if (tail) | |
| 1046 { | |
| 1047 while (tail->next) | |
| 1048 tail = tail->next; | |
| 1049 nextp = &tail->next; | |
| 1050 } | |
| 1051 else | |
| 1052 { | |
| 1053 nextp = &tail; | |
| 1054 } | |
| 1055 | |
| 1056 white(buf); | |
| 1057 | |
| 1058 while (buf->lookahead != EOF) | |
| 1059 { | |
| 1060 if (accept(buf, '@')) | |
| 1061 { | |
| 1062 if (buf->lookahead == CSS_KEYWORD && !strcmp(buf->string, "page")) | |
| 1063 { | |
| 1064 next(buf); | |
| 1065 rule = *nextp = parse_at_page(buf); | |
| 1066 nextp = &rule->next; | |
| 1067 } | |
| 1068 else if (buf->lookahead == CSS_KEYWORD && !strcmp(buf->string, "font-face")) | |
| 1069 { | |
| 1070 next(buf); | |
| 1071 rule = *nextp = parse_at_font_face(buf); | |
| 1072 nextp = &rule->next; | |
| 1073 } | |
| 1074 else | |
| 1075 { | |
| 1076 parse_at_rule(buf); | |
| 1077 } | |
| 1078 } | |
| 1079 else | |
| 1080 { | |
| 1081 fz_css_rule *x = parse_ruleset(buf); | |
| 1082 if (x) | |
| 1083 { | |
| 1084 rule = *nextp = x; | |
| 1085 nextp = &rule->next; | |
| 1086 } | |
| 1087 } | |
| 1088 white(buf); | |
| 1089 } | |
| 1090 | |
| 1091 return chain ? chain : tail; | |
| 1092 } | |
| 1093 | |
| 1094 const char *fz_css_property_name(int key) | |
| 1095 { | |
| 1096 const char *name = "unknown"; | |
| 1097 size_t i; | |
| 1098 for (i = 0; i < nelem(css_property_list); ++i) | |
| 1099 if (*css_property_list[i].name && css_property_list[i].key == key) | |
| 1100 name = css_property_list[i].name; | |
| 1101 return name; | |
| 1102 } | |
| 1103 | |
| 1104 fz_css_property *fz_parse_css_properties(fz_context *ctx, fz_pool *pool, const char *source) | |
| 1105 { | |
| 1106 struct lexbuf buf; | |
| 1107 css_lex_init(ctx, &buf, pool, source, "<inline>"); | |
| 1108 next(&buf); | |
| 1109 return parse_declaration_list(&buf); | |
| 1110 } | |
| 1111 | |
| 1112 void fz_parse_css(fz_context *ctx, fz_css *css, const char *source, const char *file) | |
| 1113 { | |
| 1114 struct lexbuf buf; | |
| 1115 css_lex_init(ctx, &buf, css->pool, source, file); | |
| 1116 next(&buf); | |
| 1117 css->rule = parse_stylesheet(&buf, css->rule); | |
| 1118 } |
