comparison mupdf-source/source/html/css-parse.c @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 // Copyright (C) 2004-2025 Artifex Software, Inc.
2 //
3 // This file is part of MuPDF.
4 //
5 // MuPDF is free software: you can redistribute it and/or modify it under the
6 // terms of the GNU Affero General Public License as published by the Free
7 // Software Foundation, either version 3 of the License, or (at your option)
8 // any later version.
9 //
10 // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
11 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
13 // details.
14 //
15 // You should have received a copy of the GNU Affero General Public License
16 // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
17 //
18 // Alternative licensing terms are available from the licensor.
19 // For commercial licensing, see <https://www.artifex.com/> or contact
20 // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
21 // CA 94129, USA, for further information.
22
23 #include "mupdf/fitz.h"
24 #include "html-imp.h"
25
26 #include <string.h>
27
28 #include "css-properties.h"
29
30 struct lexbuf
31 {
32 fz_context *ctx;
33 fz_pool *pool;
34 const unsigned char *start;
35 const unsigned char *s;
36 const char *file;
37 int line;
38 int lookahead;
39 int c;
40 int string_len;
41 char string[1024];
42 };
43
44 static fz_css_value *parse_expr(struct lexbuf *buf);
45 static fz_css_selector *parse_selector(struct lexbuf *buf);
46
47 FZ_NORETURN static void fz_css_error(struct lexbuf *buf, const char *msg)
48 {
49 #define PRE_POST_SIZE 30
50 unsigned char text[PRE_POST_SIZE * 2 + 4];
51 unsigned char *d = text;
52 const unsigned char *s = buf->start;
53 int n;
54
55 /* We want to make a helpful fragment for the error message.
56 * We want err_pos to be the point at which we just tripped
57 * the error. err_pos needs to be at least 1 byte behind
58 * our read pointer, as we've read that char. */
59 const unsigned char *err_pos = buf->s;
60 n = 1;
61
62 /* And if we're using lookahead, it's further behind. */
63 if (buf->lookahead >= CSS_KEYWORD)
64 n += buf->string_len;
65 else if (buf->lookahead != EOF)
66 n += 1;
67
68 /* But it can't be before the start of the buffer */
69 n = fz_mini(n, err_pos - buf->start);
70 err_pos -= n;
71
72 /* We're going to try to output:
73 * <section prior to the error> ">" <the char that tripped> "<" <section after the error>
74 */
75 /* Is the section prior to the error too long? If so, truncate it with an ellipsis. */
76 n = sizeof(text)-1;
77 if (err_pos - s > n-PRE_POST_SIZE - 3)
78 {
79 *d++ = '.';
80 *d++ = '.';
81 *d++ = '.';
82 n -= 3;
83 s = err_pos - (n-PRE_POST_SIZE - 3);
84 }
85
86 /* Copy the prefix (if there is one) */
87 if (err_pos > s)
88 {
89 n = err_pos - s;
90 while (n)
91 {
92 unsigned char c = *s++;
93 *d++ = (c < 32 || c > 127) ? ' ' : c;
94 n--;
95 }
96 }
97
98 /* Marker, char, end marker */
99 *d++ = '>', n--;
100 if (*err_pos)
101 *d++ = *err_pos++, n--;
102 *d++ = '<', n--;
103
104 /* Postfix */
105 n = (int)strlen((const char *)err_pos);
106 if (n <= PRE_POST_SIZE)
107 {
108 while (n > 0)
109 {
110 unsigned char c = *err_pos++;
111 *d++ = (c < 32 || c > 127) ? ' ' : c;
112 n--;
113 }
114 }
115 else
116 {
117 for (n = PRE_POST_SIZE-3; n > 0; n--)
118 {
119 unsigned char c = *err_pos++;
120 *d++ = (c < 32 || c > 127) ? ' ' : c;
121 }
122
123 *d++ = '.';
124 *d++ = '.';
125 *d++ = '.';
126 }
127 *d = 0;
128
129 fz_throw(buf->ctx, FZ_ERROR_SYNTAX, "css syntax error: %s (%s:%d) (%s)", msg, buf->file, buf->line, text);
130 }
131
132 fz_css *fz_new_css(fz_context *ctx)
133 {
134 fz_pool *pool = fz_new_pool(ctx);
135 fz_css *css = NULL;
136
137 fz_try(ctx)
138 {
139 css = fz_pool_alloc(ctx, pool, sizeof *css);
140 css->pool = pool;
141 css->rule = NULL;
142 }
143 fz_catch(ctx)
144 {
145 fz_drop_pool(ctx, pool);
146 fz_rethrow(ctx);
147 }
148
149 return css;
150 }
151
152 void fz_drop_css(fz_context *ctx, fz_css *css)
153 {
154 if (css)
155 fz_drop_pool(ctx, css->pool);
156 }
157
158 static fz_css_rule *fz_new_css_rule(fz_context *ctx, fz_pool *pool, fz_css_selector *selector, fz_css_property *declaration)
159 {
160 fz_css_rule *rule = fz_pool_alloc(ctx, pool, sizeof *rule);
161 rule->selector = selector;
162 rule->declaration = declaration;
163 rule->next = NULL;
164 return rule;
165 }
166
167 static fz_css_selector *fz_new_css_selector(fz_context *ctx, fz_pool *pool, const char *name)
168 {
169 fz_css_selector *sel = fz_pool_alloc(ctx, pool, sizeof *sel);
170 sel->name = name ? fz_pool_strdup(ctx, pool, name) : NULL;
171 sel->combine = 0;
172 sel->cond = NULL;
173 sel->left = NULL;
174 sel->right = NULL;
175 sel->next = NULL;
176 return sel;
177 }
178
179 static fz_css_condition *fz_new_css_condition(fz_context *ctx, fz_pool *pool, int type, const char *key, const char *val)
180 {
181 fz_css_condition *cond = fz_pool_alloc(ctx, pool, sizeof *cond);
182 cond->type = type;
183 cond->key = key ? fz_pool_strdup(ctx, pool, key) : NULL;
184 cond->val = val ? fz_pool_strdup(ctx, pool, val) : NULL;
185 cond->next = NULL;
186 return cond;
187 }
188
189 static fz_css_property *fz_new_css_property(fz_context *ctx, fz_pool *pool, const char *name, fz_css_value *value, int spec)
190 {
191 struct css_property_info *info = css_property_lookup(name, strlen(name));
192 if (info)
193 {
194 fz_css_property *prop = fz_pool_alloc(ctx, pool, sizeof *prop);
195 prop->name = info->key;
196 prop->value = value;
197 prop->spec = spec;
198 prop->important = 0;
199 prop->next = NULL;
200 return prop;
201 }
202 return NULL;
203 }
204
205 static fz_css_value *fz_new_css_value_x(fz_context *ctx, fz_pool *pool, int type)
206 {
207 fz_css_value *val = fz_pool_alloc(ctx, pool, sizeof *val);
208 val->type = type;
209 val->data = NULL;
210 val->args = NULL;
211 val->next = NULL;
212 return val;
213 }
214
215 static fz_css_value *fz_new_css_value(fz_context *ctx, fz_pool *pool, int type, const char *data)
216 {
217 fz_css_value *val = fz_pool_alloc(ctx, pool, sizeof *val);
218 val->type = type;
219 val->data = fz_pool_strdup(ctx, pool, data);
220 val->args = NULL;
221 val->next = NULL;
222 return val;
223 }
224
225 static void css_lex_next(struct lexbuf *buf)
226 {
227 if (buf->c == 0)
228 return;
229 buf->s += fz_chartorune(&buf->c, (const char *)buf->s);
230 if (buf->c == '\n')
231 ++buf->line;
232 buf->lookahead = EOF;
233 }
234
235 static void css_lex_init(fz_context *ctx, struct lexbuf *buf, fz_pool *pool, const char *s, const char *file)
236 {
237 buf->ctx = ctx;
238 buf->pool = pool;
239 buf->s = (const unsigned char *)s;
240 buf->lookahead = EOF;
241 buf->start = buf->s;
242 buf->c = -1;
243 buf->file = file;
244 buf->line = 1;
245 css_lex_next(buf);
246
247 buf->string_len = 0;
248 }
249
250 static inline int iswhite(int c)
251 {
252 return c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '\f';
253 }
254
255 static int isnmstart(int c)
256 {
257 return c == '\\' || c == '_' || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') ||
258 (c >= 128 && c <= UCS_MAX);
259 }
260
261 static int isnmchar(int c)
262 {
263 return c == '\\' || c == '_' || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') ||
264 (c >= '0' && c <= '9') || c == '-' || (c >= 128 && c <= UCS_MAX);
265 }
266
267 static void css_push_char(struct lexbuf *buf, int c)
268 {
269 char out[4];
270 int n = fz_runetochar(out, c);
271 if (buf->string_len + n >= (int)nelem(buf->string))
272 fz_css_error(buf, "token too long");
273 memcpy(buf->string + buf->string_len, out, n);
274 buf->string_len += n;
275 }
276
277 static void css_push_zero(struct lexbuf *buf)
278 {
279 if (buf->string_len + 1 >= (int)nelem(buf->string))
280 fz_css_error(buf, "token too long");
281 buf->string[buf->string_len] = 0;
282 buf->string_len += 1;
283 }
284
285 static int css_lex_accept(struct lexbuf *buf, int t)
286 {
287 if (buf->c == t)
288 {
289 css_lex_next(buf);
290 return 1;
291 }
292 return 0;
293 }
294
295 static void css_lex_expect(struct lexbuf *buf, int t)
296 {
297 if (!css_lex_accept(buf, t))
298 fz_css_error(buf, "unexpected character");
299 }
300
301 static int css_lex_number(struct lexbuf *buf)
302 {
303 while (buf->c >= '0' && buf->c <= '9')
304 {
305 css_push_char(buf, buf->c);
306 css_lex_next(buf);
307 }
308
309 if (css_lex_accept(buf, '.'))
310 {
311 css_push_char(buf, '.');
312 while (buf->c >= '0' && buf->c <= '9')
313 {
314 css_push_char(buf, buf->c);
315 css_lex_next(buf);
316 }
317 }
318
319 if (css_lex_accept(buf, '%'))
320 {
321 css_push_char(buf, '%');
322 css_push_zero(buf);
323 return CSS_PERCENT;
324 }
325
326 if (isnmstart(buf->c))
327 {
328 css_push_char(buf, buf->c);
329 css_lex_next(buf);
330 while (isnmchar(buf->c))
331 {
332 css_push_char(buf, buf->c);
333 css_lex_next(buf);
334 }
335 css_push_zero(buf);
336 return CSS_LENGTH;
337 }
338
339 css_push_zero(buf);
340 return CSS_NUMBER;
341 }
342
343 static int css_lex_keyword(struct lexbuf *buf)
344 {
345 while (isnmchar(buf->c))
346 {
347 css_push_char(buf, buf->c);
348 css_lex_next(buf);
349 }
350 css_push_zero(buf);
351 return CSS_KEYWORD;
352 }
353
354 static int css_lex_hash(struct lexbuf *buf)
355 {
356 while (isnmchar(buf->c))
357 {
358 css_push_char(buf, buf->c);
359 css_lex_next(buf);
360 }
361 css_push_zero(buf);
362 return CSS_HASH;
363 }
364
365 static int css_lex_string(struct lexbuf *buf, int q)
366 {
367 while (buf->c && buf->c != q)
368 {
369 if (css_lex_accept(buf, '\\'))
370 {
371 if (css_lex_accept(buf, 'n'))
372 css_push_char(buf, '\n');
373 else if (css_lex_accept(buf, 'r'))
374 css_push_char(buf, '\r');
375 else if (css_lex_accept(buf, 'f'))
376 css_push_char(buf, '\f');
377 else if (css_lex_accept(buf, '\f'))
378 /* line continuation */ ;
379 else if (css_lex_accept(buf, '\n'))
380 /* line continuation */ ;
381 else if (css_lex_accept(buf, '\r'))
382 css_lex_accept(buf, '\n');
383 else
384 {
385 css_push_char(buf, buf->c);
386 css_lex_next(buf);
387 }
388 }
389 else
390 {
391 css_push_char(buf, buf->c);
392 css_lex_next(buf);
393 }
394 }
395 css_lex_expect(buf, q);
396 css_push_zero(buf);
397 return CSS_STRING;
398 }
399
400 static void css_lex_uri(struct lexbuf *buf)
401 {
402 while (buf->c && buf->c != ')' && !iswhite(buf->c))
403 {
404 if (css_lex_accept(buf, '\\'))
405 {
406 if (css_lex_accept(buf, 'n'))
407 css_push_char(buf, '\n');
408 else if (css_lex_accept(buf, 'r'))
409 css_push_char(buf, '\r');
410 else if (css_lex_accept(buf, 'f'))
411 css_push_char(buf, '\f');
412 else
413 {
414 css_push_char(buf, buf->c);
415 css_lex_next(buf);
416 }
417 }
418 else if (buf->c == '!' || buf->c == '#' || buf->c == '$' || buf->c == '%' || buf->c == '&' ||
419 (buf->c >= '*' && buf->c <= '[') ||
420 (buf->c >= ']' && buf->c <= '~') ||
421 buf->c > 159)
422 {
423 css_push_char(buf, buf->c);
424 css_lex_next(buf);
425 }
426 else
427 fz_css_error(buf, "unexpected character in url");
428 }
429 css_push_zero(buf);
430 }
431
432 static int css_lex(struct lexbuf *buf)
433 {
434 int t;
435
436 // TODO: keyword escape sequences
437
438 buf->string_len = 0;
439
440 restart:
441 if (buf->c == 0)
442 return EOF;
443
444 if (iswhite(buf->c))
445 {
446 while (iswhite(buf->c))
447 css_lex_next(buf);
448 return ' ';
449 }
450
451 if (css_lex_accept(buf, '/'))
452 {
453 if (css_lex_accept(buf, '*'))
454 {
455 while (buf->c)
456 {
457 if (css_lex_accept(buf, '*'))
458 {
459 while (buf->c == '*')
460 css_lex_next(buf);
461 if (css_lex_accept(buf, '/'))
462 goto restart;
463 }
464 css_lex_next(buf);
465 }
466 fz_css_error(buf, "unterminated comment");
467 }
468 return '/';
469 }
470
471 if (css_lex_accept(buf, '<'))
472 {
473 if (css_lex_accept(buf, '!'))
474 {
475 css_lex_expect(buf, '-');
476 css_lex_expect(buf, '-');
477 goto restart; /* ignore CDO */
478 }
479 return '<';
480 }
481
482 if (css_lex_accept(buf, '-'))
483 {
484 if (css_lex_accept(buf, '-'))
485 {
486 if (css_lex_accept(buf, '>'))
487 goto restart; /* ignore CDC */
488 }
489 if (isnmstart(buf->c))
490 {
491 css_push_char(buf, '-');
492 return css_lex_keyword(buf);
493 }
494 return '-';
495 }
496
497 if (css_lex_accept(buf, '.'))
498 {
499 if (buf->c >= '0' && buf->c <= '9')
500 {
501 css_push_char(buf, '.');
502 return css_lex_number(buf);
503 }
504 return '.';
505 }
506
507 if (css_lex_accept(buf, '#'))
508 {
509 if (isnmchar(buf->c))
510 return css_lex_hash(buf);
511 return '#';
512 }
513
514 if (css_lex_accept(buf, '"'))
515 return css_lex_string(buf, '"');
516 if (css_lex_accept(buf, '\''))
517 return css_lex_string(buf, '\'');
518
519 if (buf->c >= '0' && buf->c <= '9')
520 return css_lex_number(buf);
521
522 if (css_lex_accept(buf, 'u'))
523 {
524 if (css_lex_accept(buf, 'r'))
525 {
526 if (css_lex_accept(buf, 'l'))
527 {
528 if (css_lex_accept(buf, '('))
529 {
530 while (iswhite(buf->c))
531 css_lex_next(buf);
532 if (css_lex_accept(buf, '"'))
533 css_lex_string(buf, '"');
534 else if (css_lex_accept(buf, '\''))
535 css_lex_string(buf, '\'');
536 else
537 css_lex_uri(buf);
538 while (iswhite(buf->c))
539 css_lex_next(buf);
540 css_lex_expect(buf, ')');
541 return CSS_URI;
542 }
543 css_push_char(buf, 'u');
544 css_push_char(buf, 'r');
545 css_push_char(buf, 'l');
546 return css_lex_keyword(buf);
547 }
548 css_push_char(buf, 'u');
549 css_push_char(buf, 'r');
550 return css_lex_keyword(buf);
551 }
552 css_push_char(buf, 'u');
553 return css_lex_keyword(buf);
554 }
555
556 if (isnmstart(buf->c))
557 {
558 css_push_char(buf, buf->c);
559 css_lex_next(buf);
560 return css_lex_keyword(buf);
561 }
562
563 t = buf->c;
564 css_lex_next(buf);
565 return t;
566 }
567
568 static void next(struct lexbuf *buf)
569 {
570 buf->lookahead = css_lex(buf);
571 }
572
573 static int accept(struct lexbuf *buf, int t)
574 {
575 if (buf->lookahead == t)
576 {
577 next(buf);
578 return 1;
579 }
580 return 0;
581 }
582
583 static void expect(struct lexbuf *buf, int t)
584 {
585 if (accept(buf, t))
586 return;
587 fz_css_error(buf, "unexpected token");
588 }
589
590 static void white(struct lexbuf *buf)
591 {
592 while (buf->lookahead == ' ')
593 next(buf);
594 }
595
596 static int iscond(int t)
597 {
598 return t == ':' || t == '.' || t == '[' || t == CSS_HASH;
599 }
600
601 static fz_css_value *parse_term(struct lexbuf *buf)
602 {
603 fz_css_value *v;
604
605 if (buf->lookahead == '+' || buf->lookahead == '-')
606 {
607 float sign = buf->lookahead == '-' ? -1 : 1;
608 next(buf);
609 if (buf->lookahead != CSS_NUMBER && buf->lookahead != CSS_LENGTH && buf->lookahead != CSS_PERCENT)
610 fz_css_error(buf, "expected number");
611 if (sign < 0)
612 {
613 v = fz_new_css_value_x(buf->ctx, buf->pool, buf->lookahead);
614 v->data = fz_pool_alloc(buf->ctx, buf->pool, strlen(buf->string) + 2);
615 v->data[0] = '-';
616 strcpy(v->data + 1, buf->string);
617 }
618 else
619 {
620 v = fz_new_css_value(buf->ctx, buf->pool, buf->lookahead, buf->string);
621 }
622 next(buf);
623 white(buf);
624 return v;
625 }
626
627 if (buf->lookahead == CSS_KEYWORD)
628 {
629 v = fz_new_css_value(buf->ctx, buf->pool, CSS_KEYWORD, buf->string);
630 next(buf);
631 if (accept(buf, '('))
632 {
633 white(buf);
634 v->type = '(';
635 v->args = parse_expr(buf);
636 expect(buf, ')');
637 }
638 white(buf);
639 return v;
640 }
641
642 switch (buf->lookahead)
643 {
644 case CSS_HASH:
645 case CSS_STRING:
646 case CSS_URI:
647 case CSS_NUMBER:
648 case CSS_LENGTH:
649 case CSS_PERCENT:
650 v = fz_new_css_value(buf->ctx, buf->pool, buf->lookahead, buf->string);
651 next(buf);
652 white(buf);
653 return v;
654 }
655
656 fz_css_error(buf, "expected value");
657 }
658
659 static fz_css_value *parse_expr(struct lexbuf *buf)
660 {
661 fz_css_value *head, *tail;
662
663 head = tail = parse_term(buf);
664
665 while (buf->lookahead != '}' && buf->lookahead != ';' && buf->lookahead != '!' &&
666 buf->lookahead != ')' && buf->lookahead != EOF)
667 {
668 if (accept(buf, ','))
669 {
670 white(buf);
671 if (buf->lookahead != ';')
672 {
673 tail = tail->next = fz_new_css_value(buf->ctx, buf->pool, ',', ",");
674 tail = tail->next = parse_term(buf);
675 }
676 }
677 else if (accept(buf, '/'))
678 {
679 white(buf);
680 tail = tail->next = fz_new_css_value(buf->ctx, buf->pool, '/', "/");
681 tail = tail->next = parse_term(buf);
682 }
683 else
684 {
685 tail = tail->next = parse_term(buf);
686 }
687 }
688
689 return head;
690 }
691
692 static fz_css_property *parse_declaration(struct lexbuf *buf)
693 {
694 fz_css_property *p;
695
696 if (buf->lookahead != CSS_KEYWORD)
697 fz_css_error(buf, "expected keyword in property");
698 p = fz_new_css_property(buf->ctx, buf->pool, buf->string, NULL, 0);
699 next(buf);
700
701 white(buf);
702 expect(buf, ':');
703 white(buf);
704
705 if (p)
706 p->value = parse_expr(buf);
707 else
708 (void) parse_expr(buf);
709
710 /* !important */
711 if (accept(buf, '!'))
712 {
713 white(buf);
714 if (buf->lookahead != CSS_KEYWORD || strcmp(buf->string, "important"))
715 fz_css_error(buf, "expected keyword 'important' after '!'");
716 if (p)
717 p->important = 1;
718 next(buf);
719 white(buf);
720 }
721
722 return p;
723 }
724
725 static fz_css_property *parse_declaration_list(struct lexbuf *buf)
726 {
727 fz_css_property *head, *tail = NULL, *p;
728
729 white(buf);
730
731 if (buf->lookahead == '}' || buf->lookahead == EOF)
732 return NULL;
733
734 p = parse_declaration(buf);
735 if (p)
736 tail = p;
737 head = tail;
738
739 while (accept(buf, ';'))
740 {
741 white(buf);
742
743 if (buf->lookahead != '}' && buf->lookahead != ';' && buf->lookahead != EOF)
744 {
745 p = parse_declaration(buf);
746 if (p)
747 {
748 if (!head)
749 head = tail = p;
750 else
751 tail = tail->next = p;
752 }
753 }
754 }
755
756 return head;
757 }
758
759 static char *parse_attrib_value(struct lexbuf *buf)
760 {
761 char *s;
762
763 if (buf->lookahead == CSS_KEYWORD || buf->lookahead == CSS_STRING)
764 {
765 s = fz_pool_strdup(buf->ctx, buf->pool, buf->string);
766 next(buf);
767 white(buf);
768 return s;
769 }
770
771 fz_css_error(buf, "expected attribute value");
772 }
773
774 static fz_css_condition *parse_condition(struct lexbuf *buf)
775 {
776 fz_css_condition *c;
777
778 if (accept(buf, ':'))
779 {
780 (void)accept(buf, ':'); /* swallow css3 :: syntax and pretend it's a normal pseudo-class */
781 if (buf->lookahead != CSS_KEYWORD)
782 fz_css_error(buf, "expected keyword after ':'");
783 c = fz_new_css_condition(buf->ctx, buf->pool, ':', "pseudo", buf->string);
784 next(buf);
785 if (accept(buf, '('))
786 {
787 white(buf);
788 if (accept(buf, CSS_KEYWORD))
789 white(buf);
790 expect(buf, ')');
791 }
792 return c;
793 }
794
795 if (accept(buf, '.'))
796 {
797 if (buf->lookahead != CSS_KEYWORD)
798 fz_css_error(buf, "expected keyword after '.'");
799 c = fz_new_css_condition(buf->ctx, buf->pool, '.', "class", buf->string);
800 next(buf);
801 return c;
802 }
803
804 if (accept(buf, '['))
805 {
806 white(buf);
807
808 if (buf->lookahead != CSS_KEYWORD)
809 fz_css_error(buf, "expected keyword after '['");
810 c = fz_new_css_condition(buf->ctx, buf->pool, '[', buf->string, NULL);
811 next(buf);
812
813 white(buf);
814
815 if (accept(buf, '='))
816 {
817 c->type = '=';
818 c->val = parse_attrib_value(buf);
819 }
820 else if (accept(buf, '|'))
821 {
822 expect(buf, '=');
823 c->type = '|';
824 c->val = parse_attrib_value(buf);
825 }
826 else if (accept(buf, '~'))
827 {
828 expect(buf, '=');
829 c->type = '~';
830 c->val = parse_attrib_value(buf);
831 }
832
833 expect(buf, ']');
834
835 return c;
836 }
837
838 if (buf->lookahead == CSS_HASH)
839 {
840 c = fz_new_css_condition(buf->ctx, buf->pool, '#', "id", buf->string);
841 next(buf);
842 return c;
843 }
844
845 fz_css_error(buf, "expected condition");
846 }
847
848 static fz_css_condition *parse_condition_list(struct lexbuf *buf)
849 {
850 fz_css_condition *head, *tail;
851
852 head = tail = parse_condition(buf);
853 while (iscond(buf->lookahead))
854 {
855 tail = tail->next = parse_condition(buf);
856 }
857 return head;
858 }
859
860 static fz_css_selector *parse_simple_selector(struct lexbuf *buf)
861 {
862 fz_css_selector *s;
863
864 if (accept(buf, '*'))
865 {
866 s = fz_new_css_selector(buf->ctx, buf->pool, NULL);
867 if (iscond(buf->lookahead))
868 s->cond = parse_condition_list(buf);
869 return s;
870 }
871 else if (buf->lookahead == CSS_KEYWORD)
872 {
873 s = fz_new_css_selector(buf->ctx, buf->pool, buf->string);
874 next(buf);
875 if (iscond(buf->lookahead))
876 s->cond = parse_condition_list(buf);
877 return s;
878 }
879 else if (iscond(buf->lookahead))
880 {
881 s = fz_new_css_selector(buf->ctx, buf->pool, NULL);
882 s->cond = parse_condition_list(buf);
883 return s;
884 }
885
886 fz_css_error(buf, "expected selector");
887 }
888
889 static fz_css_selector *parse_combinator(struct lexbuf *buf, int c, fz_css_selector *a)
890 {
891 fz_css_selector *sel, *b;
892 white(buf);
893 b = parse_simple_selector(buf);
894 sel = fz_new_css_selector(buf->ctx, buf->pool, NULL);
895 sel->combine = c;
896 sel->left = a;
897 sel->right = b;
898 return sel;
899 }
900
901 static fz_css_selector *parse_selector(struct lexbuf *buf)
902 {
903 fz_css_selector *sel = parse_simple_selector(buf);
904 for (;;)
905 {
906 if (accept(buf, ' '))
907 {
908 white(buf);
909 if (accept(buf, '+'))
910 sel = parse_combinator(buf, '+', sel);
911 else if (accept(buf, '>'))
912 sel = parse_combinator(buf, '>', sel);
913 else if (buf->lookahead != ',' && buf->lookahead != '{' && buf->lookahead != EOF)
914 sel = parse_combinator(buf, ' ', sel);
915 else
916 break;
917 }
918 else if (accept(buf, '+'))
919 sel = parse_combinator(buf, '+', sel);
920 else if (accept(buf, '>'))
921 sel = parse_combinator(buf, '>', sel);
922 else
923 break;
924 }
925 return sel;
926 }
927
928 static fz_css_selector *parse_selector_list(struct lexbuf *buf)
929 {
930 fz_css_selector *head, *tail;
931
932 head = tail = parse_selector(buf);
933 while (accept(buf, ','))
934 {
935 white(buf);
936 tail = tail->next = parse_selector(buf);
937 }
938 return head;
939 }
940
941 static fz_css_rule *parse_ruleset(struct lexbuf *buf)
942 {
943 fz_css_selector *s = NULL;
944 fz_css_property *p = NULL;
945
946 fz_try(buf->ctx)
947 {
948 s = parse_selector_list(buf);
949 expect(buf, '{');
950 p = parse_declaration_list(buf);
951 expect(buf, '}');
952 white(buf);
953 }
954 fz_catch(buf->ctx)
955 {
956 fz_rethrow_unless(buf->ctx, FZ_ERROR_SYNTAX);
957 fz_report_error(buf->ctx);
958
959 while (buf->lookahead != EOF)
960 {
961 if (accept(buf, '}'))
962 {
963 white(buf);
964 break;
965 }
966 next(buf);
967 }
968 return NULL;
969 }
970
971 return fz_new_css_rule(buf->ctx, buf->pool, s, p);
972 }
973
974 static fz_css_rule *parse_at_page(struct lexbuf *buf)
975 {
976 fz_css_selector *s = NULL;
977 fz_css_property *p = NULL;
978
979 white(buf);
980 if (accept(buf, ':'))
981 {
982 expect(buf, CSS_KEYWORD);
983 white(buf);
984 }
985 expect(buf, '{');
986 p = parse_declaration_list(buf);
987 expect(buf, '}');
988 white(buf);
989
990 s = fz_new_css_selector(buf->ctx, buf->pool, "@page");
991 return fz_new_css_rule(buf->ctx, buf->pool, s, p);
992 }
993
994 static fz_css_rule *parse_at_font_face(struct lexbuf *buf)
995 {
996 fz_css_selector *s = NULL;
997 fz_css_property *p = NULL;
998
999 white(buf);
1000 expect(buf, '{');
1001 p = parse_declaration_list(buf);
1002 expect(buf, '}');
1003 white(buf);
1004
1005 s = fz_new_css_selector(buf->ctx, buf->pool, "@font-face");
1006 return fz_new_css_rule(buf->ctx, buf->pool, s, p);
1007 }
1008
1009 static void parse_at_rule(struct lexbuf *buf)
1010 {
1011 expect(buf, CSS_KEYWORD);
1012
1013 /* skip until '{' or ';' */
1014 while (buf->lookahead != EOF)
1015 {
1016 if (accept(buf, ';'))
1017 {
1018 white(buf);
1019 return;
1020 }
1021 if (accept(buf, '{'))
1022 {
1023 int depth = 1;
1024 while (buf->lookahead != EOF && depth > 0)
1025 {
1026 if (accept(buf, '{'))
1027 ++depth;
1028 else if (accept(buf, '}'))
1029 --depth;
1030 else
1031 next(buf);
1032 }
1033 white(buf);
1034 return;
1035 }
1036 next(buf);
1037 }
1038 }
1039
1040 static fz_css_rule *parse_stylesheet(struct lexbuf *buf, fz_css_rule *chain)
1041 {
1042 fz_css_rule *rule, **nextp, *tail;
1043
1044 tail = chain;
1045 if (tail)
1046 {
1047 while (tail->next)
1048 tail = tail->next;
1049 nextp = &tail->next;
1050 }
1051 else
1052 {
1053 nextp = &tail;
1054 }
1055
1056 white(buf);
1057
1058 while (buf->lookahead != EOF)
1059 {
1060 if (accept(buf, '@'))
1061 {
1062 if (buf->lookahead == CSS_KEYWORD && !strcmp(buf->string, "page"))
1063 {
1064 next(buf);
1065 rule = *nextp = parse_at_page(buf);
1066 nextp = &rule->next;
1067 }
1068 else if (buf->lookahead == CSS_KEYWORD && !strcmp(buf->string, "font-face"))
1069 {
1070 next(buf);
1071 rule = *nextp = parse_at_font_face(buf);
1072 nextp = &rule->next;
1073 }
1074 else
1075 {
1076 parse_at_rule(buf);
1077 }
1078 }
1079 else
1080 {
1081 fz_css_rule *x = parse_ruleset(buf);
1082 if (x)
1083 {
1084 rule = *nextp = x;
1085 nextp = &rule->next;
1086 }
1087 }
1088 white(buf);
1089 }
1090
1091 return chain ? chain : tail;
1092 }
1093
1094 const char *fz_css_property_name(int key)
1095 {
1096 const char *name = "unknown";
1097 size_t i;
1098 for (i = 0; i < nelem(css_property_list); ++i)
1099 if (*css_property_list[i].name && css_property_list[i].key == key)
1100 name = css_property_list[i].name;
1101 return name;
1102 }
1103
1104 fz_css_property *fz_parse_css_properties(fz_context *ctx, fz_pool *pool, const char *source)
1105 {
1106 struct lexbuf buf;
1107 css_lex_init(ctx, &buf, pool, source, "<inline>");
1108 next(&buf);
1109 return parse_declaration_list(&buf);
1110 }
1111
1112 void fz_parse_css(fz_context *ctx, fz_css *css, const char *source, const char *file)
1113 {
1114 struct lexbuf buf;
1115 css_lex_init(ctx, &buf, css->pool, source, file);
1116 next(&buf);
1117 css->rule = parse_stylesheet(&buf, css->rule);
1118 }