comparison mupdf-source/source/fitz/json.c @ 3:2c135c81b16c

MERGE: upstream PyMuPDF 1.26.4 with MuPDF 1.26.7
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:44:09 +0200
parents b50eed0cc0ef
children
comparison
equal deleted inserted replaced
0:6015a75abc2d 3:2c135c81b16c
1 // Copyright (C) 2004-2025 Artifex Software, Inc.
2 //
3 // This file is part of MuPDF.
4 //
5 // MuPDF is free software: you can redistribute it and/or modify it under the
6 // terms of the GNU Affero General Public License as published by the Free
7 // Software Foundation, either version 3 of the License, or (at your option)
8 // any later version.
9 //
10 // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
11 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
13 // details.
14 //
15 // You should have received a copy of the GNU Affero General Public License
16 // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
17 //
18 // Alternative licensing terms are available from the licensor.
19 // For commercial licensing, see <https://www.artifex.com/> or contact
20 // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
21 // CA 94129, USA, for further information.
22
23 #include "mupdf/fitz.h"
24
25 #include <string.h>
26 #include <stdlib.h>
27 #include <limits.h>
28
29 /* JSON parse */
30
31 struct json_parser {
32 fz_context *ctx;
33 fz_pool *pool;
34 const char *s;
35 };
36
37 static fz_json *json_parse_element(struct json_parser *pp);
38
39 static void json_error(struct json_parser *pp, const char *error)
40 {
41 fz_throw(pp->ctx, FZ_ERROR_SYNTAX, "%s in JSON", error);
42 }
43
44 static fz_json *json_new_value(fz_context *ctx, fz_pool *pool, int type)
45 {
46 fz_json *val = fz_pool_alloc(ctx, pool, sizeof(fz_json));
47 val->type = type;
48 return val;
49 }
50
51 static fz_json_array *json_new_array(fz_context *ctx, fz_pool *pool, fz_json *value)
52 {
53 fz_json_array *array = fz_pool_alloc(ctx, pool, sizeof(fz_json_array));
54 array->value = value;
55 array->next = NULL;
56 return array;
57 }
58
59 static fz_json_object *json_new_object(fz_context *ctx, fz_pool *pool, const char *key, fz_json *value)
60 {
61 fz_json_object *object = fz_pool_alloc(ctx, pool, sizeof(fz_json_object));
62 object->key = key;
63 object->value = value;
64 object->next = NULL;
65 return object;
66 }
67
68 static int json_accept(struct json_parser *pp, int c)
69 {
70 if (*pp->s == c)
71 {
72 pp->s++;
73 return 1;
74 }
75 return 0;
76 }
77
78 static int json_accept_range(struct json_parser *pp, int a, int b)
79 {
80 if (*pp->s >= a && *pp->s <= b)
81 {
82 pp->s++;
83 return 1;
84 }
85 return 0;
86 }
87
88 static void json_expect_range(struct json_parser *pp, int a, int b)
89 {
90 if (!json_accept_range(pp, a, b))
91 fz_throw(pp->ctx, FZ_ERROR_SYNTAX, "expected '%c'-'%c' in JSON", a, b);
92 }
93
94 static void json_expect(struct json_parser *pp, int c)
95 {
96 if (!json_accept(pp, c))
97 fz_throw(pp->ctx, FZ_ERROR_SYNTAX, "expected '%c' in JSON", c);
98 }
99
100 static void json_parse_ws(struct json_parser *pp)
101 {
102 int c = *pp->s;
103 while (c == 0x0a || c == 0x0d || c == 0x09 || c == 0x20)
104 c = *(++pp->s);
105 }
106
107 static int unhex(int c)
108 {
109 if (c >= '0' && c <= '9') return c - '0';
110 if (c >= 'a' && c <= 'f') return c - 'a' + 0xA;
111 if (c >= 'A' && c <= 'F') return c - 'A' + 0xA;
112 return 0;
113 }
114
115 static int json_expect_hex(struct json_parser *pp)
116 {
117 int c = *pp->s++;
118 if (c >= '0' && c <= '9') return c - '0';
119 if (c >= 'a' && c <= 'f') return c - 'a' + 0xA;
120 if (c >= 'A' && c <= 'F') return c - 'A' + 0xA;
121 json_error(pp, "invalid unicode escape sequence");
122 return 0;
123 }
124
125 static const char *
126 json_unescape_string(struct json_parser *pp, const char *s, const char *end, int n)
127 {
128 char *str, *p;
129 int x;
130 str = p = fz_pool_alloc(pp->ctx, pp->pool, n + 1);
131 while (s < end)
132 {
133 if (*s == '\\')
134 {
135 s++;
136 switch (*s++)
137 {
138 case 'u':
139 x = unhex(*s++) << 12;
140 x |= unhex(*s++) << 8;
141 x |= unhex(*s++) << 4;
142 x |= unhex(*s++);
143 p += fz_runetochar(p, x);
144 break;
145 case '\\': *p++ = '\\'; break;
146 case '/': *p++ = '/'; break;
147 case 'b': *p++ = '\b'; break;
148 case 'f': *p++ = '\f'; break;
149 case 'n': *p++ = '\n'; break;
150 case 'r': *p++ = '\r'; break;
151 case 't': *p++ = '\t'; break;
152 }
153 }
154 else
155 {
156 *p++ = *s++;
157 }
158 }
159 *p = 0;
160 return str;
161 }
162
163 static const char *
164 json_parse_string(struct json_parser *pp)
165 {
166 const char *mark;
167 int n, c, x;
168
169 json_expect(pp, '"');
170 mark = pp->s;
171 n = 0;
172
173 for (;;)
174 {
175 c = (unsigned char) *pp->s++;
176 if (c < 0x20)
177 json_error(pp, "bad control character in string literal");
178 if (c == '"')
179 break;
180 if (c == '\\')
181 {
182 c = *pp->s++;
183 if (c == 'u')
184 {
185 x = json_expect_hex(pp) << 12;
186 x |= json_expect_hex(pp) << 8;
187 x |= json_expect_hex(pp) << 4;
188 x |= json_expect_hex(pp);
189 n += fz_runelen(x);
190 }
191 else if (c == '"' || c == '\\' || c == '/' || c == 'b' || c == 'f' || c == 'n' || c == 'r' || c == 't')
192 n += 1;
193 else
194 json_error(pp, "bad escaped character");
195 }
196 else
197 {
198 n += 1;
199 }
200 }
201
202 return json_unescape_string(pp, mark, pp->s - 1, n);
203 }
204
205 static fz_json *
206 json_parse_number(struct json_parser *pp)
207 {
208 fz_json *val;
209 const char *mark = pp->s;
210
211 json_accept(pp, '-');
212 if (json_accept(pp, '0'))
213 {
214 }
215 else
216 {
217 json_expect_range(pp, '1', '9');
218 while (json_accept_range(pp, '0', '9'))
219 ;
220 }
221
222 // fraction
223 if (json_accept(pp, '.'))
224 {
225 json_expect_range(pp, '0', '9');
226 while (json_accept_range(pp, '0', '9'))
227 ;
228 }
229
230 // exponent
231 if (json_accept(pp, 'e') || json_accept(pp, 'E'))
232 {
233 if (json_accept(pp, '-') || json_accept(pp, '+'))
234 ;
235 json_expect_range(pp, '0', '9');
236 while (json_accept_range(pp, '0', '9'))
237 ;
238 }
239
240 val = json_new_value(pp->ctx, pp->pool, FZ_JSON_NUMBER);
241 val->u.number = fz_atof(mark);
242
243 return val;
244 }
245
246 static fz_json *
247 json_parse_object(struct json_parser *pp)
248 {
249 fz_json *obj;
250 fz_json_object **tail;
251 const char *key;
252 fz_json *val;
253
254 json_expect(pp, '{');
255
256 obj = json_new_value(pp->ctx, pp->pool, FZ_JSON_OBJECT);
257 tail = &obj->u.object;
258
259 json_parse_ws(pp);
260 if (json_accept(pp, '}'))
261 return obj;
262
263 for (;;)
264 {
265 json_parse_ws(pp);
266 key = json_parse_string(pp);
267 json_parse_ws(pp);
268 json_expect(pp, ':');
269 val = json_parse_element(pp);
270 *tail = json_new_object(pp->ctx, pp->pool, key, val);
271 tail = &(*tail)->next;
272 if (json_accept(pp, '}'))
273 break;
274 json_expect(pp, ',');
275 }
276
277 return obj;
278 }
279
280 static fz_json *
281 json_parse_array(struct json_parser *pp)
282 {
283 fz_json *arr;
284 fz_json_array **tail;
285 fz_json *val;
286
287 json_expect(pp, '[');
288
289 arr = json_new_value(pp->ctx, pp->pool, FZ_JSON_ARRAY);
290 tail = &arr->u.array;
291
292 json_parse_ws(pp);
293 if (json_accept(pp, ']'))
294 return arr;
295
296 for (;;)
297 {
298 val = json_parse_element(pp);
299 *tail = json_new_array(pp->ctx, pp->pool, val);
300 tail = &(*tail)->next;
301 if (json_accept(pp, ']'))
302 break;
303 json_expect(pp, ',');
304 }
305
306 return arr;
307 }
308
309 static fz_json *
310 json_parse_value(struct json_parser *pp)
311 {
312 fz_json *val;
313 int lookahead = *pp->s;
314 if (lookahead == '{')
315 return json_parse_object(pp);
316 if (lookahead == '[')
317 return json_parse_array(pp);
318 if (lookahead == '"')
319 {
320 val = json_new_value(pp->ctx, pp->pool, FZ_JSON_STRING);
321 val->u.string = json_parse_string(pp);
322 return val;
323 }
324 if (lookahead == '-' || (lookahead >= '0' && lookahead <= '9'))
325 return json_parse_number(pp);
326 if (json_accept(pp, 'n'))
327 {
328 json_expect(pp, 'u');
329 json_expect(pp, 'l');
330 json_expect(pp, 'l');
331 return json_new_value(pp->ctx, pp->pool, FZ_JSON_NULL);
332 }
333 if (json_accept(pp, 't'))
334 {
335 json_expect(pp, 'r');
336 json_expect(pp, 'u');
337 json_expect(pp, 'e');
338 return json_new_value(pp->ctx, pp->pool, FZ_JSON_TRUE);
339 }
340 if (json_accept(pp, 'f'))
341 {
342 json_expect(pp, 'a');
343 json_expect(pp, 'l');
344 json_expect(pp, 's');
345 json_expect(pp, 'e');
346 return json_new_value(pp->ctx, pp->pool, FZ_JSON_FALSE);
347 }
348 json_error(pp, "unexpected token");
349 return NULL;
350 }
351
352 static fz_json *
353 json_parse_element(struct json_parser *pp)
354 {
355 fz_json *result;
356 json_parse_ws(pp);
357 result = json_parse_value(pp);
358 json_parse_ws(pp);
359 return result;
360 }
361
362 fz_json *
363 fz_parse_json(fz_context *ctx, fz_pool *pool, const char *s)
364 {
365 struct json_parser p = { ctx, pool, s };
366 fz_json *result = json_parse_element(&p);
367 json_expect(&p, 0);
368 return result;
369 }
370
371 /* JSON stringify */
372
373 static void
374 append_json_string(fz_context *ctx, fz_buffer *out, const char *s)
375 {
376 int c;
377 fz_append_byte(ctx, out, '"');
378 while (*s)
379 {
380 s += fz_chartorune(&c, s);
381 if (c < 20 || c == '"' || c == '\\' || c >= 127)
382 {
383 fz_append_byte(ctx, out, '\\');
384 switch (c)
385 {
386 case '"': fz_append_byte(ctx, out, '"'); break;
387 case '\\': fz_append_byte(ctx, out, '\\'); break;
388 case '\n': fz_append_byte(ctx, out, 'n'); break;
389 case '\r': fz_append_byte(ctx, out, 'r'); break;
390 case '\t': fz_append_byte(ctx, out, 't'); break;
391 default: fz_append_printf(ctx, out, "u%04x", c); break;
392 }
393 }
394 else
395 {
396 fz_append_byte(ctx, out, c);
397 }
398 }
399 fz_append_byte(ctx, out, '"');
400 }
401
402 static void
403 write_json_string(fz_context *ctx, fz_output *out, const char *s)
404 {
405 int c;
406 fz_write_byte(ctx, out, '"');
407 while (*s)
408 {
409 s += fz_chartorune(&c, s);
410 if (c < 20 || c == '"' || c == '\\' || c >= 127)
411 {
412 fz_write_byte(ctx, out, '\\');
413 switch (c)
414 {
415 case '"': fz_write_byte(ctx, out, '"'); break;
416 case '\\': fz_write_byte(ctx, out, '\\'); break;
417 case '\n': fz_write_byte(ctx, out, 'n'); break;
418 case '\r': fz_write_byte(ctx, out, 'r'); break;
419 case '\t': fz_write_byte(ctx, out, 't'); break;
420 default: fz_write_printf(ctx, out, "u%04x", c); break;
421 }
422 }
423 else
424 {
425 fz_write_byte(ctx, out, c);
426 }
427 }
428 fz_write_byte(ctx, out, '"');
429 }
430
431 static const char *format_json_integer(char *out, int v)
432 {
433 char buf[32], *s = out;
434 unsigned int a;
435 int i = 0;
436 if (v < 0) {
437 a = -v;
438 *s++ = '-';
439 } else {
440 a = v;
441 }
442 while (a) {
443 buf[i++] = (a % 10) + '0';
444 a /= 10;
445 }
446 if (i == 0)
447 buf[i++] = '0';
448 while (i > 0)
449 *s++ = buf[--i];
450 *s = 0;
451 return out;
452 }
453
454 static const char *format_json_exponent(char *p, int e)
455 {
456 *p++ = 'e';
457 if (e < 0)
458 {
459 *p++ = '-';
460 return format_json_integer(p, -e);
461 }
462 else
463 {
464 *p++ = '+';
465 return format_json_integer(p, e);
466 }
467 }
468
469 static const char *format_json_number(char buf[32], double f)
470 {
471 char digits[32], *p = buf, *s = digits;
472 int exp, ndigits, point;
473
474 if (f == 0) return "0";
475 if (isnan(f)) return "null";
476 if (isinf(f)) return "null";
477
478 /* Fast case for integers. This only works assuming all integers can be
479 * exactly represented by a float. This is true for 32-bit integers and
480 * 64-bit floats. */
481 if (f >= INT_MIN && f <= INT_MAX) {
482 int i = (int)f;
483 if ((double)i == f)
484 return format_json_integer(buf, i);
485 }
486
487 // TODO: use double precision grisu algorithm!
488 ndigits = fz_grisu(f, digits, &exp);
489 point = ndigits + exp;
490
491 if (signbit(f))
492 *p++ = '-';
493
494 if (point < -5 || point > 21) {
495 *p++ = *s++;
496 if (ndigits > 1) {
497 int n = ndigits - 1;
498 *p++ = '.';
499 while (n--)
500 *p++ = *s++;
501 }
502 format_json_exponent(p, point - 1);
503 }
504
505 else if (point <= 0) {
506 *p++ = '0';
507 *p++ = '.';
508 while (point++ < 0)
509 *p++ = '0';
510 while (ndigits-- > 0)
511 *p++ = *s++;
512 *p = 0;
513 }
514
515 else {
516 while (ndigits-- > 0) {
517 *p++ = *s++;
518 if (--point == 0 && ndigits > 0)
519 *p++ = '.';
520 }
521 while (point-- > 0)
522 *p++ = '0';
523 *p = 0;
524 }
525
526 return buf;
527 }
528
529 void
530 fz_append_json(fz_context *ctx, fz_buffer *out, fz_json *value)
531 {
532 fz_json_array *arr;
533 fz_json_object *obj;
534 char buf[40];
535 switch (value->type)
536 {
537 case FZ_JSON_NULL:
538 fz_append_string(ctx, out, "null");
539 break;
540 case FZ_JSON_TRUE:
541 fz_append_string(ctx, out, "true");
542 break;
543 case FZ_JSON_FALSE:
544 fz_append_string(ctx, out, "false");
545 break;
546 case FZ_JSON_NUMBER:
547 fz_append_string(ctx, out, format_json_number(buf, value->u.number));
548 break;
549 case FZ_JSON_STRING:
550 append_json_string(ctx, out, value->u.string);
551 break;
552 case FZ_JSON_ARRAY:
553 fz_append_byte(ctx, out, '[');
554 for (arr = value->u.array; arr; arr = arr->next)
555 {
556 if (arr != value->u.array)
557 fz_append_byte(ctx, out, ',');
558 fz_append_json(ctx, out, arr->value);
559 }
560 fz_append_byte(ctx, out, ']');
561 break;
562 case FZ_JSON_OBJECT:
563 fz_append_byte(ctx, out, '{');
564 for (obj = value->u.object; obj; obj = obj->next)
565 {
566 if (obj != value->u.object)
567 fz_append_byte(ctx, out, ',');
568 append_json_string(ctx, out, obj->key);
569 fz_append_byte(ctx, out, ':');
570 fz_append_json(ctx, out, obj->value);
571 }
572 fz_append_byte(ctx, out, '}');
573 break;
574 }
575 }
576
577 void
578 fz_write_json(fz_context *ctx, fz_output *out, fz_json *value)
579 {
580 fz_json_array *arr;
581 fz_json_object *obj;
582 char buf[40];
583 switch (value->type)
584 {
585 case FZ_JSON_NULL:
586 fz_write_string(ctx, out, "null");
587 break;
588 case FZ_JSON_TRUE:
589 fz_write_string(ctx, out, "true");
590 break;
591 case FZ_JSON_FALSE:
592 fz_write_string(ctx, out, "false");
593 break;
594 case FZ_JSON_NUMBER:
595 fz_write_string(ctx, out, format_json_number(buf, value->u.number));
596 break;
597 case FZ_JSON_STRING:
598 write_json_string(ctx, out, value->u.string);
599 break;
600 case FZ_JSON_ARRAY:
601 fz_write_byte(ctx, out, '[');
602 for (arr = value->u.array; arr; arr = arr->next)
603 {
604 if (arr != value->u.array)
605 fz_write_byte(ctx, out, ',');
606 fz_write_json(ctx, out, arr->value);
607 }
608 fz_write_byte(ctx, out, ']');
609 break;
610 case FZ_JSON_OBJECT:
611 fz_write_byte(ctx, out, '{');
612 for (obj = value->u.object; obj; obj = obj->next)
613 {
614 if (obj != value->u.object)
615 fz_write_byte(ctx, out, ',');
616 write_json_string(ctx, out, obj->key);
617 fz_write_byte(ctx, out, ':');
618 fz_write_json(ctx, out, obj->value);
619 }
620 fz_write_byte(ctx, out, '}');
621 break;
622 }
623 }
624
625 /* JSON accessors */
626
627 int fz_json_is_null(fz_context *ctx, fz_json *json)
628 {
629 return json && json->type == FZ_JSON_NULL;
630 }
631
632 int fz_json_is_boolean(fz_context *ctx, fz_json *json)
633 {
634 return json && (json->type == FZ_JSON_TRUE || json->type == FZ_JSON_FALSE);
635 }
636
637 int fz_json_is_number(fz_context *ctx, fz_json *json)
638 {
639 return json && json->type == FZ_JSON_NUMBER;
640 }
641
642 int fz_json_is_string(fz_context *ctx, fz_json *json)
643 {
644 return json && json->type == FZ_JSON_STRING;
645 }
646
647 int fz_json_is_array(fz_context *ctx, fz_json *json)
648 {
649 return json && json->type == FZ_JSON_ARRAY;
650 }
651
652 int fz_json_is_object(fz_context *ctx, fz_json *json)
653 {
654 return json && json->type == FZ_JSON_OBJECT;
655 }
656
657 int fz_json_to_boolean(fz_context *ctx, fz_json *json)
658 {
659 return json && json->type == FZ_JSON_TRUE;
660 }
661
662 double fz_json_to_number(fz_context *ctx, fz_json *json)
663 {
664 if (json && json->type == FZ_JSON_NUMBER)
665 return json->u.number;
666 return 0;
667 }
668
669 const char *fz_json_to_string(fz_context *ctx, fz_json *json)
670 {
671 if (json && json->type == FZ_JSON_STRING)
672 return json->u.string;
673 return "";
674 }
675
676 int fz_json_array_length(fz_context *ctx, fz_json *array)
677 {
678 fz_json_array *entry;
679 int n = 0;
680 if (array->type != FZ_JSON_ARRAY)
681 fz_throw(ctx, FZ_ERROR_ARGUMENT, "not an array");
682 for (entry = array->u.array; entry; entry = entry->next)
683 ++n;
684 return n;
685 }
686
687 fz_json *fz_json_array_get(fz_context *ctx, fz_json *array, int ix)
688 {
689 fz_json_array *entry;
690 int n = 0;
691 if (array->type != FZ_JSON_ARRAY)
692 fz_throw(ctx, FZ_ERROR_ARGUMENT, "not an array");
693 for (entry = array->u.array; entry; entry = entry->next)
694 {
695 if (n == ix)
696 return entry->value;
697 ++n;
698 }
699 return NULL;
700 }
701
702 fz_json *fz_json_object_get(fz_context *ctx, fz_json *object, const char *key)
703 {
704 fz_json_object *entry;
705 if (object->type != FZ_JSON_OBJECT)
706 fz_throw(ctx, FZ_ERROR_ARGUMENT, "not an object");
707 for (entry = object->u.object; entry; entry = entry->next)
708 if (!strcmp(entry->key, key))
709 return entry->value;
710 return NULL;
711 }
712
713 /* JSON build objects */
714
715 fz_json *fz_json_new_object(fz_context *ctx, fz_pool *pool)
716 {
717 return json_new_value(ctx, pool, FZ_JSON_OBJECT);
718 }
719
720 fz_json *fz_json_new_array(fz_context *ctx, fz_pool *pool)
721 {
722 return json_new_value(ctx, pool, FZ_JSON_ARRAY);
723 }
724
725 void fz_json_array_push(fz_context *ctx, fz_pool *pool, fz_json *array, fz_json *item)
726 {
727 fz_json_array **tail;
728 if (array->type != FZ_JSON_ARRAY)
729 fz_throw(ctx, FZ_ERROR_ARGUMENT, "not an array");
730 tail = &array->u.array;
731 while (*tail != NULL)
732 tail = &(*tail)->next;
733 *tail = json_new_array(ctx, pool, item);
734 }
735
736 void fz_json_object_set(fz_context *ctx, fz_pool *pool, fz_json *object, const char *key, fz_json *item)
737 {
738 fz_json_object **tail;
739 if (object->type != FZ_JSON_OBJECT)
740 fz_throw(ctx, FZ_ERROR_ARGUMENT, "not an object");
741 tail = &object->u.object;
742 while (*tail != NULL)
743 {
744 if (!strcmp((*tail)->key, key))
745 {
746 // replace old value!
747 (*tail)->value = item;
748 return;
749 }
750 tail = &(*tail)->next;
751 }
752 *tail = json_new_object(ctx, pool, fz_pool_strdup(ctx, pool, key), item);
753 }
754
755 fz_json *fz_json_new_number(fz_context *ctx, fz_pool *pool, double number)
756 {
757 fz_json *val = json_new_value(ctx, pool, FZ_JSON_NUMBER);
758 val->u.number = number;
759 return val;
760 }
761
762 fz_json *fz_json_new_string(fz_context *ctx, fz_pool *pool, const char *string)
763 {
764 fz_json *val = json_new_value(ctx, pool, FZ_JSON_STRING);
765 val->u.string = fz_pool_strdup(ctx, pool, string);
766 return val;
767 }
768
769 fz_json *fz_json_new_boolean(fz_context *ctx, fz_pool *pool, int x)
770 {
771 if (x)
772 return json_new_value(ctx, pool, FZ_JSON_TRUE);
773 return json_new_value(ctx, pool, FZ_JSON_FALSE);
774 }
775
776 fz_json *fz_json_new_null(fz_context *ctx, fz_pool *pool)
777 {
778 return json_new_value(ctx, pool, FZ_JSON_NULL);
779 }