Mercurial > hgrepos > Python2 > PyMuPDF
diff mupdf-source/source/fitz/json.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mupdf-source/source/fitz/json.c Mon Sep 15 11:43:07 2025 +0200 @@ -0,0 +1,779 @@ +// Copyright (C) 2004-2025 Artifex Software, Inc. +// +// This file is part of MuPDF. +// +// MuPDF is free software: you can redistribute it and/or modify it under the +// terms of the GNU Affero General Public License as published by the Free +// Software Foundation, either version 3 of the License, or (at your option) +// any later version. +// +// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more +// details. +// +// You should have received a copy of the GNU Affero General Public License +// along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html> +// +// Alternative licensing terms are available from the licensor. +// For commercial licensing, see <https://www.artifex.com/> or contact +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. + +#include "mupdf/fitz.h" + +#include <string.h> +#include <stdlib.h> +#include <limits.h> + +/* JSON parse */ + +struct json_parser { + fz_context *ctx; + fz_pool *pool; + const char *s; +}; + +static fz_json *json_parse_element(struct json_parser *pp); + +static void json_error(struct json_parser *pp, const char *error) +{ + fz_throw(pp->ctx, FZ_ERROR_SYNTAX, "%s in JSON", error); +} + +static fz_json *json_new_value(fz_context *ctx, fz_pool *pool, int type) +{ + fz_json *val = fz_pool_alloc(ctx, pool, sizeof(fz_json)); + val->type = type; + return val; +} + +static fz_json_array *json_new_array(fz_context *ctx, fz_pool *pool, fz_json *value) +{ + fz_json_array *array = fz_pool_alloc(ctx, pool, sizeof(fz_json_array)); + array->value = value; + array->next = NULL; + return array; +} + +static fz_json_object *json_new_object(fz_context *ctx, fz_pool *pool, const char *key, fz_json *value) +{ + fz_json_object *object = fz_pool_alloc(ctx, pool, sizeof(fz_json_object)); + object->key = key; + object->value = value; + object->next = NULL; + return object; +} + +static int json_accept(struct json_parser *pp, int c) +{ + if (*pp->s == c) + { + pp->s++; + return 1; + } + return 0; +} + +static int json_accept_range(struct json_parser *pp, int a, int b) +{ + if (*pp->s >= a && *pp->s <= b) + { + pp->s++; + return 1; + } + return 0; +} + +static void json_expect_range(struct json_parser *pp, int a, int b) +{ + if (!json_accept_range(pp, a, b)) + fz_throw(pp->ctx, FZ_ERROR_SYNTAX, "expected '%c'-'%c' in JSON", a, b); +} + +static void json_expect(struct json_parser *pp, int c) +{ + if (!json_accept(pp, c)) + fz_throw(pp->ctx, FZ_ERROR_SYNTAX, "expected '%c' in JSON", c); +} + +static void json_parse_ws(struct json_parser *pp) +{ + int c = *pp->s; + while (c == 0x0a || c == 0x0d || c == 0x09 || c == 0x20) + c = *(++pp->s); +} + +static int unhex(int c) +{ + if (c >= '0' && c <= '9') return c - '0'; + if (c >= 'a' && c <= 'f') return c - 'a' + 0xA; + if (c >= 'A' && c <= 'F') return c - 'A' + 0xA; + return 0; +} + +static int json_expect_hex(struct json_parser *pp) +{ + int c = *pp->s++; + if (c >= '0' && c <= '9') return c - '0'; + if (c >= 'a' && c <= 'f') return c - 'a' + 0xA; + if (c >= 'A' && c <= 'F') return c - 'A' + 0xA; + json_error(pp, "invalid unicode escape sequence"); + return 0; +} + +static const char * +json_unescape_string(struct json_parser *pp, const char *s, const char *end, int n) +{ + char *str, *p; + int x; + str = p = fz_pool_alloc(pp->ctx, pp->pool, n + 1); + while (s < end) + { + if (*s == '\\') + { + s++; + switch (*s++) + { + case 'u': + x = unhex(*s++) << 12; + x |= unhex(*s++) << 8; + x |= unhex(*s++) << 4; + x |= unhex(*s++); + p += fz_runetochar(p, x); + break; + case '\\': *p++ = '\\'; break; + case '/': *p++ = '/'; break; + case 'b': *p++ = '\b'; break; + case 'f': *p++ = '\f'; break; + case 'n': *p++ = '\n'; break; + case 'r': *p++ = '\r'; break; + case 't': *p++ = '\t'; break; + } + } + else + { + *p++ = *s++; + } + } + *p = 0; + return str; +} + +static const char * +json_parse_string(struct json_parser *pp) +{ + const char *mark; + int n, c, x; + + json_expect(pp, '"'); + mark = pp->s; + n = 0; + + for (;;) + { + c = (unsigned char) *pp->s++; + if (c < 0x20) + json_error(pp, "bad control character in string literal"); + if (c == '"') + break; + if (c == '\\') + { + c = *pp->s++; + if (c == 'u') + { + x = json_expect_hex(pp) << 12; + x |= json_expect_hex(pp) << 8; + x |= json_expect_hex(pp) << 4; + x |= json_expect_hex(pp); + n += fz_runelen(x); + } + else if (c == '"' || c == '\\' || c == '/' || c == 'b' || c == 'f' || c == 'n' || c == 'r' || c == 't') + n += 1; + else + json_error(pp, "bad escaped character"); + } + else + { + n += 1; + } + } + + return json_unescape_string(pp, mark, pp->s - 1, n); +} + +static fz_json * +json_parse_number(struct json_parser *pp) +{ + fz_json *val; + const char *mark = pp->s; + + json_accept(pp, '-'); + if (json_accept(pp, '0')) + { + } + else + { + json_expect_range(pp, '1', '9'); + while (json_accept_range(pp, '0', '9')) + ; + } + + // fraction + if (json_accept(pp, '.')) + { + json_expect_range(pp, '0', '9'); + while (json_accept_range(pp, '0', '9')) + ; + } + + // exponent + if (json_accept(pp, 'e') || json_accept(pp, 'E')) + { + if (json_accept(pp, '-') || json_accept(pp, '+')) + ; + json_expect_range(pp, '0', '9'); + while (json_accept_range(pp, '0', '9')) + ; + } + + val = json_new_value(pp->ctx, pp->pool, FZ_JSON_NUMBER); + val->u.number = fz_atof(mark); + + return val; +} + +static fz_json * +json_parse_object(struct json_parser *pp) +{ + fz_json *obj; + fz_json_object **tail; + const char *key; + fz_json *val; + + json_expect(pp, '{'); + + obj = json_new_value(pp->ctx, pp->pool, FZ_JSON_OBJECT); + tail = &obj->u.object; + + json_parse_ws(pp); + if (json_accept(pp, '}')) + return obj; + + for (;;) + { + json_parse_ws(pp); + key = json_parse_string(pp); + json_parse_ws(pp); + json_expect(pp, ':'); + val = json_parse_element(pp); + *tail = json_new_object(pp->ctx, pp->pool, key, val); + tail = &(*tail)->next; + if (json_accept(pp, '}')) + break; + json_expect(pp, ','); + } + + return obj; +} + +static fz_json * +json_parse_array(struct json_parser *pp) +{ + fz_json *arr; + fz_json_array **tail; + fz_json *val; + + json_expect(pp, '['); + + arr = json_new_value(pp->ctx, pp->pool, FZ_JSON_ARRAY); + tail = &arr->u.array; + + json_parse_ws(pp); + if (json_accept(pp, ']')) + return arr; + + for (;;) + { + val = json_parse_element(pp); + *tail = json_new_array(pp->ctx, pp->pool, val); + tail = &(*tail)->next; + if (json_accept(pp, ']')) + break; + json_expect(pp, ','); + } + + return arr; +} + +static fz_json * +json_parse_value(struct json_parser *pp) +{ + fz_json *val; + int lookahead = *pp->s; + if (lookahead == '{') + return json_parse_object(pp); + if (lookahead == '[') + return json_parse_array(pp); + if (lookahead == '"') + { + val = json_new_value(pp->ctx, pp->pool, FZ_JSON_STRING); + val->u.string = json_parse_string(pp); + return val; + } + if (lookahead == '-' || (lookahead >= '0' && lookahead <= '9')) + return json_parse_number(pp); + if (json_accept(pp, 'n')) + { + json_expect(pp, 'u'); + json_expect(pp, 'l'); + json_expect(pp, 'l'); + return json_new_value(pp->ctx, pp->pool, FZ_JSON_NULL); + } + if (json_accept(pp, 't')) + { + json_expect(pp, 'r'); + json_expect(pp, 'u'); + json_expect(pp, 'e'); + return json_new_value(pp->ctx, pp->pool, FZ_JSON_TRUE); + } + if (json_accept(pp, 'f')) + { + json_expect(pp, 'a'); + json_expect(pp, 'l'); + json_expect(pp, 's'); + json_expect(pp, 'e'); + return json_new_value(pp->ctx, pp->pool, FZ_JSON_FALSE); + } + json_error(pp, "unexpected token"); + return NULL; +} + +static fz_json * +json_parse_element(struct json_parser *pp) +{ + fz_json *result; + json_parse_ws(pp); + result = json_parse_value(pp); + json_parse_ws(pp); + return result; +} + +fz_json * +fz_parse_json(fz_context *ctx, fz_pool *pool, const char *s) +{ + struct json_parser p = { ctx, pool, s }; + fz_json *result = json_parse_element(&p); + json_expect(&p, 0); + return result; +} + +/* JSON stringify */ + +static void +append_json_string(fz_context *ctx, fz_buffer *out, const char *s) +{ + int c; + fz_append_byte(ctx, out, '"'); + while (*s) + { + s += fz_chartorune(&c, s); + if (c < 20 || c == '"' || c == '\\' || c >= 127) + { + fz_append_byte(ctx, out, '\\'); + switch (c) + { + case '"': fz_append_byte(ctx, out, '"'); break; + case '\\': fz_append_byte(ctx, out, '\\'); break; + case '\n': fz_append_byte(ctx, out, 'n'); break; + case '\r': fz_append_byte(ctx, out, 'r'); break; + case '\t': fz_append_byte(ctx, out, 't'); break; + default: fz_append_printf(ctx, out, "u%04x", c); break; + } + } + else + { + fz_append_byte(ctx, out, c); + } + } + fz_append_byte(ctx, out, '"'); +} + +static void +write_json_string(fz_context *ctx, fz_output *out, const char *s) +{ + int c; + fz_write_byte(ctx, out, '"'); + while (*s) + { + s += fz_chartorune(&c, s); + if (c < 20 || c == '"' || c == '\\' || c >= 127) + { + fz_write_byte(ctx, out, '\\'); + switch (c) + { + case '"': fz_write_byte(ctx, out, '"'); break; + case '\\': fz_write_byte(ctx, out, '\\'); break; + case '\n': fz_write_byte(ctx, out, 'n'); break; + case '\r': fz_write_byte(ctx, out, 'r'); break; + case '\t': fz_write_byte(ctx, out, 't'); break; + default: fz_write_printf(ctx, out, "u%04x", c); break; + } + } + else + { + fz_write_byte(ctx, out, c); + } + } + fz_write_byte(ctx, out, '"'); +} + +static const char *format_json_integer(char *out, int v) +{ + char buf[32], *s = out; + unsigned int a; + int i = 0; + if (v < 0) { + a = -v; + *s++ = '-'; + } else { + a = v; + } + while (a) { + buf[i++] = (a % 10) + '0'; + a /= 10; + } + if (i == 0) + buf[i++] = '0'; + while (i > 0) + *s++ = buf[--i]; + *s = 0; + return out; +} + +static const char *format_json_exponent(char *p, int e) +{ + *p++ = 'e'; + if (e < 0) + { + *p++ = '-'; + return format_json_integer(p, -e); + } + else + { + *p++ = '+'; + return format_json_integer(p, e); + } +} + +static const char *format_json_number(char buf[32], double f) +{ + char digits[32], *p = buf, *s = digits; + int exp, ndigits, point; + + if (f == 0) return "0"; + if (isnan(f)) return "null"; + if (isinf(f)) return "null"; + + /* Fast case for integers. This only works assuming all integers can be + * exactly represented by a float. This is true for 32-bit integers and + * 64-bit floats. */ + if (f >= INT_MIN && f <= INT_MAX) { + int i = (int)f; + if ((double)i == f) + return format_json_integer(buf, i); + } + + // TODO: use double precision grisu algorithm! + ndigits = fz_grisu(f, digits, &exp); + point = ndigits + exp; + + if (signbit(f)) + *p++ = '-'; + + if (point < -5 || point > 21) { + *p++ = *s++; + if (ndigits > 1) { + int n = ndigits - 1; + *p++ = '.'; + while (n--) + *p++ = *s++; + } + format_json_exponent(p, point - 1); + } + + else if (point <= 0) { + *p++ = '0'; + *p++ = '.'; + while (point++ < 0) + *p++ = '0'; + while (ndigits-- > 0) + *p++ = *s++; + *p = 0; + } + + else { + while (ndigits-- > 0) { + *p++ = *s++; + if (--point == 0 && ndigits > 0) + *p++ = '.'; + } + while (point-- > 0) + *p++ = '0'; + *p = 0; + } + + return buf; +} + +void +fz_append_json(fz_context *ctx, fz_buffer *out, fz_json *value) +{ + fz_json_array *arr; + fz_json_object *obj; + char buf[40]; + switch (value->type) + { + case FZ_JSON_NULL: + fz_append_string(ctx, out, "null"); + break; + case FZ_JSON_TRUE: + fz_append_string(ctx, out, "true"); + break; + case FZ_JSON_FALSE: + fz_append_string(ctx, out, "false"); + break; + case FZ_JSON_NUMBER: + fz_append_string(ctx, out, format_json_number(buf, value->u.number)); + break; + case FZ_JSON_STRING: + append_json_string(ctx, out, value->u.string); + break; + case FZ_JSON_ARRAY: + fz_append_byte(ctx, out, '['); + for (arr = value->u.array; arr; arr = arr->next) + { + if (arr != value->u.array) + fz_append_byte(ctx, out, ','); + fz_append_json(ctx, out, arr->value); + } + fz_append_byte(ctx, out, ']'); + break; + case FZ_JSON_OBJECT: + fz_append_byte(ctx, out, '{'); + for (obj = value->u.object; obj; obj = obj->next) + { + if (obj != value->u.object) + fz_append_byte(ctx, out, ','); + append_json_string(ctx, out, obj->key); + fz_append_byte(ctx, out, ':'); + fz_append_json(ctx, out, obj->value); + } + fz_append_byte(ctx, out, '}'); + break; + } +} + +void +fz_write_json(fz_context *ctx, fz_output *out, fz_json *value) +{ + fz_json_array *arr; + fz_json_object *obj; + char buf[40]; + switch (value->type) + { + case FZ_JSON_NULL: + fz_write_string(ctx, out, "null"); + break; + case FZ_JSON_TRUE: + fz_write_string(ctx, out, "true"); + break; + case FZ_JSON_FALSE: + fz_write_string(ctx, out, "false"); + break; + case FZ_JSON_NUMBER: + fz_write_string(ctx, out, format_json_number(buf, value->u.number)); + break; + case FZ_JSON_STRING: + write_json_string(ctx, out, value->u.string); + break; + case FZ_JSON_ARRAY: + fz_write_byte(ctx, out, '['); + for (arr = value->u.array; arr; arr = arr->next) + { + if (arr != value->u.array) + fz_write_byte(ctx, out, ','); + fz_write_json(ctx, out, arr->value); + } + fz_write_byte(ctx, out, ']'); + break; + case FZ_JSON_OBJECT: + fz_write_byte(ctx, out, '{'); + for (obj = value->u.object; obj; obj = obj->next) + { + if (obj != value->u.object) + fz_write_byte(ctx, out, ','); + write_json_string(ctx, out, obj->key); + fz_write_byte(ctx, out, ':'); + fz_write_json(ctx, out, obj->value); + } + fz_write_byte(ctx, out, '}'); + break; + } +} + +/* JSON accessors */ + +int fz_json_is_null(fz_context *ctx, fz_json *json) +{ + return json && json->type == FZ_JSON_NULL; +} + +int fz_json_is_boolean(fz_context *ctx, fz_json *json) +{ + return json && (json->type == FZ_JSON_TRUE || json->type == FZ_JSON_FALSE); +} + +int fz_json_is_number(fz_context *ctx, fz_json *json) +{ + return json && json->type == FZ_JSON_NUMBER; +} + +int fz_json_is_string(fz_context *ctx, fz_json *json) +{ + return json && json->type == FZ_JSON_STRING; +} + +int fz_json_is_array(fz_context *ctx, fz_json *json) +{ + return json && json->type == FZ_JSON_ARRAY; +} + +int fz_json_is_object(fz_context *ctx, fz_json *json) +{ + return json && json->type == FZ_JSON_OBJECT; +} + +int fz_json_to_boolean(fz_context *ctx, fz_json *json) +{ + return json && json->type == FZ_JSON_TRUE; +} + +double fz_json_to_number(fz_context *ctx, fz_json *json) +{ + if (json && json->type == FZ_JSON_NUMBER) + return json->u.number; + return 0; +} + +const char *fz_json_to_string(fz_context *ctx, fz_json *json) +{ + if (json && json->type == FZ_JSON_STRING) + return json->u.string; + return ""; +} + +int fz_json_array_length(fz_context *ctx, fz_json *array) +{ + fz_json_array *entry; + int n = 0; + if (array->type != FZ_JSON_ARRAY) + fz_throw(ctx, FZ_ERROR_ARGUMENT, "not an array"); + for (entry = array->u.array; entry; entry = entry->next) + ++n; + return n; +} + +fz_json *fz_json_array_get(fz_context *ctx, fz_json *array, int ix) +{ + fz_json_array *entry; + int n = 0; + if (array->type != FZ_JSON_ARRAY) + fz_throw(ctx, FZ_ERROR_ARGUMENT, "not an array"); + for (entry = array->u.array; entry; entry = entry->next) + { + if (n == ix) + return entry->value; + ++n; + } + return NULL; +} + +fz_json *fz_json_object_get(fz_context *ctx, fz_json *object, const char *key) +{ + fz_json_object *entry; + if (object->type != FZ_JSON_OBJECT) + fz_throw(ctx, FZ_ERROR_ARGUMENT, "not an object"); + for (entry = object->u.object; entry; entry = entry->next) + if (!strcmp(entry->key, key)) + return entry->value; + return NULL; +} + +/* JSON build objects */ + +fz_json *fz_json_new_object(fz_context *ctx, fz_pool *pool) +{ + return json_new_value(ctx, pool, FZ_JSON_OBJECT); +} + +fz_json *fz_json_new_array(fz_context *ctx, fz_pool *pool) +{ + return json_new_value(ctx, pool, FZ_JSON_ARRAY); +} + +void fz_json_array_push(fz_context *ctx, fz_pool *pool, fz_json *array, fz_json *item) +{ + fz_json_array **tail; + if (array->type != FZ_JSON_ARRAY) + fz_throw(ctx, FZ_ERROR_ARGUMENT, "not an array"); + tail = &array->u.array; + while (*tail != NULL) + tail = &(*tail)->next; + *tail = json_new_array(ctx, pool, item); +} + +void fz_json_object_set(fz_context *ctx, fz_pool *pool, fz_json *object, const char *key, fz_json *item) +{ + fz_json_object **tail; + if (object->type != FZ_JSON_OBJECT) + fz_throw(ctx, FZ_ERROR_ARGUMENT, "not an object"); + tail = &object->u.object; + while (*tail != NULL) + { + if (!strcmp((*tail)->key, key)) + { + // replace old value! + (*tail)->value = item; + return; + } + tail = &(*tail)->next; + } + *tail = json_new_object(ctx, pool, fz_pool_strdup(ctx, pool, key), item); +} + +fz_json *fz_json_new_number(fz_context *ctx, fz_pool *pool, double number) +{ + fz_json *val = json_new_value(ctx, pool, FZ_JSON_NUMBER); + val->u.number = number; + return val; +} + +fz_json *fz_json_new_string(fz_context *ctx, fz_pool *pool, const char *string) +{ + fz_json *val = json_new_value(ctx, pool, FZ_JSON_STRING); + val->u.string = fz_pool_strdup(ctx, pool, string); + return val; +} + +fz_json *fz_json_new_boolean(fz_context *ctx, fz_pool *pool, int x) +{ + if (x) + return json_new_value(ctx, pool, FZ_JSON_TRUE); + return json_new_value(ctx, pool, FZ_JSON_FALSE); +} + +fz_json *fz_json_new_null(fz_context *ctx, fz_pool *pool) +{ + return json_new_value(ctx, pool, FZ_JSON_NULL); +}
