Mercurial > hgrepos > Python2 > PyMuPDF
diff mupdf-source/source/fitz/stream-read.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mupdf-source/source/fitz/stream-read.c Mon Sep 15 11:43:07 2025 +0200 @@ -0,0 +1,591 @@ +// Copyright (C) 2004-2021 Artifex Software, Inc. +// +// This file is part of MuPDF. +// +// MuPDF is free software: you can redistribute it and/or modify it under the +// terms of the GNU Affero General Public License as published by the Free +// Software Foundation, either version 3 of the License, or (at your option) +// any later version. +// +// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more +// details. +// +// You should have received a copy of the GNU Affero General Public License +// along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html> +// +// Alternative licensing terms are available from the licensor. +// For commercial licensing, see <https://www.artifex.com/> or contact +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. + +#include "mupdf/fitz.h" + +#include <string.h> + +#define MIN_BOMB (100 << 20) + +size_t +fz_read(fz_context *ctx, fz_stream *stm, unsigned char *buf, size_t len) +{ + size_t count, n; + + count = 0; + do + { + n = fz_available(ctx, stm, len); + if (n > len) + n = len; + if (n == 0) + break; + + memcpy(buf, stm->rp, n); + stm->rp += n; + buf += n; + count += n; + len -= n; + } + while (len > 0); + + return count; +} + +static unsigned char skip_buf[4096]; + +size_t fz_skip(fz_context *ctx, fz_stream *stm, size_t len) +{ + size_t count, l, total = 0; + + while (len) + { + l = len; + if (l > sizeof(skip_buf)) + l = sizeof(skip_buf); + count = fz_read(ctx, stm, skip_buf, l); + total += count; + if (count < l) + break; + len -= count; + } + return total; +} + +fz_buffer * +fz_read_all(fz_context *ctx, fz_stream *stm, size_t initial) +{ + return fz_read_best(ctx, stm, initial, NULL, 0); +} + +fz_buffer * +fz_read_best(fz_context *ctx, fz_stream *stm, size_t initial, int *truncated, size_t worst_case) +{ + fz_buffer *buf = NULL; + int check_bomb = (initial > 0); + size_t n; + + fz_var(buf); + + if (truncated) + *truncated = 0; + + if (worst_case == 0) + worst_case = initial * 200; + if (worst_case < MIN_BOMB) + worst_case = MIN_BOMB; + + fz_try(ctx) + { + if (initial < 1024) + initial = 1024; + + buf = fz_new_buffer(ctx, initial+1); + + while (1) + { + if (buf->len == buf->cap) + fz_grow_buffer(ctx, buf); + + if (check_bomb && buf->len > worst_case) + fz_throw(ctx, FZ_ERROR_FORMAT, "compression bomb detected"); + + n = fz_read(ctx, stm, buf->data + buf->len, buf->cap - buf->len); + if (n == 0) + break; + + buf->len += n; + } + } + fz_catch(ctx) + { + if (fz_caught(ctx) == FZ_ERROR_TRYLATER || fz_caught(ctx) == FZ_ERROR_SYSTEM) + { + fz_drop_buffer(ctx, buf); + fz_rethrow(ctx); + } + if (truncated) + { + *truncated = 1; + fz_report_error(ctx); + } + else + { + fz_drop_buffer(ctx, buf); + fz_rethrow(ctx); + } + } + + return buf; +} + +char * +fz_read_line(fz_context *ctx, fz_stream *stm, char *mem, size_t n) +{ + char *s = mem; + int c = EOF; + while (n > 1) + { + c = fz_read_byte(ctx, stm); + if (c == EOF) + break; + if (c == '\r') { + c = fz_peek_byte(ctx, stm); + if (c == '\n') + fz_read_byte(ctx, stm); + break; + } + if (c == '\n') + break; + *s++ = c; + n--; + } + if (n) + *s = '\0'; + return (s == mem && c == EOF) ? NULL : mem; +} + +int64_t +fz_tell(fz_context *ctx, fz_stream *stm) +{ + return stm->pos - (stm->wp - stm->rp); +} + +void +fz_seek(fz_context *ctx, fz_stream *stm, int64_t offset, int whence) +{ + stm->avail = 0; /* Reset bit reading */ + if (stm->seek) + { + if (whence == 1) + { + offset += fz_tell(ctx, stm); + whence = 0; + } + stm->seek(ctx, stm, offset, whence); + stm->eof = 0; + } + else if (whence != 2) + { + if (whence == 0) + offset -= fz_tell(ctx, stm); + if (offset < 0) + fz_warn(ctx, "cannot seek backwards"); + /* dog slow, but rare enough */ + while (offset-- > 0) + { + if (fz_read_byte(ctx, stm) == EOF) + { + fz_warn(ctx, "seek failed"); + break; + } + } + } + else + fz_warn(ctx, "cannot seek"); +} + +fz_buffer * +fz_read_file(fz_context *ctx, const char *filename) +{ + fz_stream *stm; + fz_buffer *buf = NULL; + + fz_var(buf); + + stm = fz_open_file(ctx, filename); + fz_try(ctx) + { + buf = fz_read_all(ctx, stm, 0); + } + fz_always(ctx) + { + fz_drop_stream(ctx, stm); + } + fz_catch(ctx) + { + fz_rethrow(ctx); + } + + return buf; +} + +fz_buffer * +fz_try_read_file(fz_context *ctx, const char *filename) +{ + fz_stream *stm; + fz_buffer *buf = NULL; + + fz_var(buf); + + stm = fz_try_open_file(ctx, filename); + if (stm == NULL) + return NULL; + fz_try(ctx) + { + buf = fz_read_all(ctx, stm, 0); + } + fz_always(ctx) + { + fz_drop_stream(ctx, stm); + } + fz_catch(ctx) + { + fz_rethrow(ctx); + } + + return buf; +} + +uint16_t fz_read_uint16(fz_context *ctx, fz_stream *stm) +{ + int a = fz_read_byte(ctx, stm); + int b = fz_read_byte(ctx, stm); + if (a == EOF || b == EOF) + fz_throw(ctx, FZ_ERROR_FORMAT, "premature end of file in int16"); + return ((uint16_t)a<<8) | ((uint16_t)b); +} + +uint32_t fz_read_uint24(fz_context *ctx, fz_stream *stm) +{ + int a = fz_read_byte(ctx, stm); + int b = fz_read_byte(ctx, stm); + int c = fz_read_byte(ctx, stm); + if (a == EOF || b == EOF || c == EOF) + fz_throw(ctx, FZ_ERROR_FORMAT, "premature end of file in int24"); + return ((uint32_t)a<<16) | ((uint32_t)b<<8) | ((uint32_t)c); +} + +uint32_t fz_read_uint32(fz_context *ctx, fz_stream *stm) +{ + int a = fz_read_byte(ctx, stm); + int b = fz_read_byte(ctx, stm); + int c = fz_read_byte(ctx, stm); + int d = fz_read_byte(ctx, stm); + if (a == EOF || b == EOF || c == EOF || d == EOF) + fz_throw(ctx, FZ_ERROR_FORMAT, "premature end of file in int32"); + return ((uint32_t)a<<24) | ((uint32_t)b<<16) | ((uint32_t)c<<8) | ((uint32_t)d); +} + +uint64_t fz_read_uint64(fz_context *ctx, fz_stream *stm) +{ + int a = fz_read_byte(ctx, stm); + int b = fz_read_byte(ctx, stm); + int c = fz_read_byte(ctx, stm); + int d = fz_read_byte(ctx, stm); + int e = fz_read_byte(ctx, stm); + int f = fz_read_byte(ctx, stm); + int g = fz_read_byte(ctx, stm); + int h = fz_read_byte(ctx, stm); + if (a == EOF || b == EOF || c == EOF || d == EOF || e == EOF || f == EOF || g == EOF || h == EOF) + fz_throw(ctx, FZ_ERROR_FORMAT, "premature end of file in int64"); + return ((uint64_t)a<<56) | ((uint64_t)b<<48) | ((uint64_t)c<<40) | ((uint64_t)d<<32) + | ((uint64_t)e<<24) | ((uint64_t)f<<16) | ((uint64_t)g<<8) | ((uint64_t)h); +} + +uint16_t fz_read_uint16_le(fz_context *ctx, fz_stream *stm) +{ + int a = fz_read_byte(ctx, stm); + int b = fz_read_byte(ctx, stm); + if (a == EOF || b == EOF) + fz_throw(ctx, FZ_ERROR_FORMAT, "premature end of file in int16"); + return ((uint16_t)a) | ((uint16_t)b<<8); +} + +uint32_t fz_read_uint24_le(fz_context *ctx, fz_stream *stm) +{ + int a = fz_read_byte(ctx, stm); + int b = fz_read_byte(ctx, stm); + int c = fz_read_byte(ctx, stm); + if (a == EOF || b == EOF || c == EOF) + fz_throw(ctx, FZ_ERROR_FORMAT, "premature end of file in int24"); + return ((uint32_t)a) | ((uint32_t)b<<8) | ((uint32_t)c<<16); +} + +uint32_t fz_read_uint32_le(fz_context *ctx, fz_stream *stm) +{ + int a = fz_read_byte(ctx, stm); + int b = fz_read_byte(ctx, stm); + int c = fz_read_byte(ctx, stm); + int d = fz_read_byte(ctx, stm); + if (a == EOF || b == EOF || c == EOF || d == EOF) + fz_throw(ctx, FZ_ERROR_FORMAT, "premature end of file in int32"); + return ((uint32_t)a) | ((uint32_t)b<<8) | ((uint32_t)c<<16) | ((uint32_t)d<<24); +} + +uint64_t fz_read_uint64_le(fz_context *ctx, fz_stream *stm) +{ + int a = fz_read_byte(ctx, stm); + int b = fz_read_byte(ctx, stm); + int c = fz_read_byte(ctx, stm); + int d = fz_read_byte(ctx, stm); + int e = fz_read_byte(ctx, stm); + int f = fz_read_byte(ctx, stm); + int g = fz_read_byte(ctx, stm); + int h = fz_read_byte(ctx, stm); + if (a == EOF || b == EOF || c == EOF || d == EOF || e == EOF || f == EOF || g == EOF || h == EOF) + fz_throw(ctx, FZ_ERROR_FORMAT, "premature end of file in int64"); + return ((uint64_t)a) | ((uint64_t)b<<8) | ((uint64_t)c<<16) | ((uint64_t)d<<24) + | ((uint64_t)e<<32) | ((uint64_t)f<<40) | ((uint64_t)g<<48) | ((uint64_t)h<<56); +} + +int16_t fz_read_int16(fz_context *ctx, fz_stream *stm) { return (int16_t)fz_read_uint16(ctx, stm); } +int32_t fz_read_int32(fz_context *ctx, fz_stream *stm) { return (int32_t)fz_read_uint32(ctx, stm); } +int64_t fz_read_int64(fz_context *ctx, fz_stream *stm) { return (int64_t)fz_read_uint64(ctx, stm); } + +int16_t fz_read_int16_le(fz_context *ctx, fz_stream *stm) { return (int16_t)fz_read_uint16_le(ctx, stm); } +int32_t fz_read_int32_le(fz_context *ctx, fz_stream *stm) { return (int32_t)fz_read_uint32_le(ctx, stm); } +int64_t fz_read_int64_le(fz_context *ctx, fz_stream *stm) { return (int64_t)fz_read_uint64_le(ctx, stm); } + +float +fz_read_float_le(fz_context *ctx, fz_stream *stm) +{ + union {float f;int32_t i;} u; + + u.i = fz_read_int32_le(ctx, stm); + return u.f; +} + +float +fz_read_float(fz_context *ctx, fz_stream *stm) +{ + union {float f;int32_t i;} u; + + u.i = fz_read_int32(ctx, stm); + return u.f; +} + +void fz_read_string(fz_context *ctx, fz_stream *stm, char *buffer, int len) +{ + int c; + do + { + if (len <= 0) + fz_throw(ctx, FZ_ERROR_FORMAT, "Buffer overrun reading null terminated string"); + + c = fz_read_byte(ctx, stm); + if (c == EOF) + fz_throw(ctx, FZ_ERROR_FORMAT, "EOF reading null terminated string"); + *buffer++ = c; + len--; + } + while (c != 0); +} + +int fz_read_rune(fz_context *ctx, fz_stream *in) +{ + uint8_t d, e, f; + int x; + int c = fz_read_byte(ctx, in); + if (c == EOF) + return EOF; + + if ((c & 0xF8) == 0xF0) + { + x = fz_read_byte(ctx, in); + if (x == EOF) + return 0xFFFD; + d = (uint8_t)x; + c = (c & 7)<<18; + if ((d & 0xC0) == 0x80) + { + x = fz_read_byte(ctx, in); + if (x == EOF) + return 0xFFFD; + e = (uint8_t)x; + c += (d & 0x3f)<<12; + if ((e & 0xC0) == 0x80) + { + x = fz_read_byte(ctx, in); + if (x == EOF) + return 0xFFFD; + f = (uint8_t)x; + c += (e & 0x3f)<<6; + if ((f & 0xC0) == 0x80) + { + c += f & 0x3f; + } + else + goto bad_byte; + } + else + goto bad_byte; + } + else + goto bad_byte; + } + else if ((c & 0xF0) == 0xE0) + { + x = fz_read_byte(ctx, in); + if (x == EOF) + return 0xFFFD; + d = (uint8_t)x; + c = (c & 15)<<12; + if ((d & 0xC0) == 0x80) + { + x = fz_read_byte(ctx, in); + if (x == EOF) + return 0xFFFD; + e = (uint8_t)x; + c += (d & 0x3f)<<6; + if ((e & 0xC0) == 0x80) + { + c += e & 0x3f; + } + else + goto bad_byte; + } + else + goto bad_byte; + } + else if ((c & 0xE0) == 0xC0) + { + x = fz_read_byte(ctx, in); + if (x == EOF) + return 0xFFFD; + d = (uint8_t)x; + c = (c & 31)<<6; + if ((d & 0xC0) == 0x80) + { + c += d & 0x3f; + } + else + fz_unread_byte(ctx, in); + } + else if ((c & 0xc0) == 0x80) + { +bad_byte: + fz_unread_byte(ctx, in); + return 0xFFFD; + } + + return c; + +} + +int fz_read_utf16_le(fz_context *ctx, fz_stream *stm) +{ + int c = fz_read_byte(ctx, stm); + int d, e; + + if (c == EOF) + return EOF; + + d = fz_read_byte(ctx, stm); + if (d == EOF) + return c; /* Might be wrong, but the best we can do. */ + + c |= d<<8; + + /* If it's not a surrogate, we're done. */ + if (c < 0xd800 || c >= 0xe000) + return c; + + /* It *ought* to be a leading (high) surrogate. If it's not, + * then we're in trouble. */ + if (c >= 0xdc00) + return 0x10000 + c - 0xdc00; /* Imagine the high surrogate was 0. */ + + /* Our stream abstraction only enables us to peek 1 byte ahead, and we'd need + * 2 to tell if it was a low surrogate. Just assume it is. */ + d = fz_read_byte(ctx, stm); + if (d == EOF) + { + /* Failure! Imagine the trailing surrogate was 0. */ + return 0x10000 + ((c - 0xd800)<<10); + } + e = fz_read_byte(ctx, stm); + if (e == EOF) + { + e = 0xDC; /* Fudge a low surrogate */ + } + + d |= e<<8; + + if (d < 0xdc00 || d >= 0xe000) + { + /* Bad encoding! This is nasty, because we've eaten 2 bytes from the + * stream which ideally we would not have. Serves you right for + * having a broken stream. */ + return 0x10000 + ((c - 0xd800)<<10); /* Imagine the high surrogate was 0. */ + } + + c -= 0xd800; + d -= 0xdc00; + + return 0x10000 + (c<<10) + d; +} + +int fz_read_utf16_be(fz_context *ctx, fz_stream *stm) +{ + int c = fz_read_byte(ctx, stm); + int d, e; + + if (c == EOF) + return EOF; + + d = fz_read_byte(ctx, stm); + if (d == EOF) + return c; /* Might be wrong, but the best we can do. */ + + c = (c<<8) | d; + + /* If it's not a surrogate, we're done. */ + if (c < 0xd800 || c >= 0xe000) + return c; + + /* It *ought* to be a leading (high) surrogate. If it's not, + * then we're in trouble. */ + if (c >= 0xdc00) + return 0x10000 + c - 0xdc00; /* Imagine the high surrogate was 0. */ + + /* Our stream abstraction only enables us to peek 1 byte ahead, and we'd need + * 2 to tell if it was a low surrogate. Just assume it is. */ + d = fz_read_byte(ctx, stm); + if (d == EOF) + { + /* Failure! Imagine the trailing surrogate was 0. */ + return 0x10000 + ((c - 0xd800)<<10); + } + + /* The next byte ought to be the start of a trailing (low) surrogate. */ + if (d < 0xdc || d >= 0xe0) + { + /* It wasn't. Put the byte back. */ + fz_unread_byte(ctx, stm); + d = 0xdc00; /* Pretend it was a 0 surrogate. */ + } + else + { + e = fz_read_byte(ctx, stm); + if (e == EOF) + { + e = 0; + } + d = (d<<8) | e; + } + + c -= 0xd800; + d -= 0xdc00; + + return 0x10000 + (c<<10) + d; +}
