comparison mupdf-source/source/fitz/stream-read.c @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 // Copyright (C) 2004-2021 Artifex Software, Inc.
2 //
3 // This file is part of MuPDF.
4 //
5 // MuPDF is free software: you can redistribute it and/or modify it under the
6 // terms of the GNU Affero General Public License as published by the Free
7 // Software Foundation, either version 3 of the License, or (at your option)
8 // any later version.
9 //
10 // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
11 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
13 // details.
14 //
15 // You should have received a copy of the GNU Affero General Public License
16 // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
17 //
18 // Alternative licensing terms are available from the licensor.
19 // For commercial licensing, see <https://www.artifex.com/> or contact
20 // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
21 // CA 94129, USA, for further information.
22
23 #include "mupdf/fitz.h"
24
25 #include <string.h>
26
27 #define MIN_BOMB (100 << 20)
28
29 size_t
30 fz_read(fz_context *ctx, fz_stream *stm, unsigned char *buf, size_t len)
31 {
32 size_t count, n;
33
34 count = 0;
35 do
36 {
37 n = fz_available(ctx, stm, len);
38 if (n > len)
39 n = len;
40 if (n == 0)
41 break;
42
43 memcpy(buf, stm->rp, n);
44 stm->rp += n;
45 buf += n;
46 count += n;
47 len -= n;
48 }
49 while (len > 0);
50
51 return count;
52 }
53
54 static unsigned char skip_buf[4096];
55
56 size_t fz_skip(fz_context *ctx, fz_stream *stm, size_t len)
57 {
58 size_t count, l, total = 0;
59
60 while (len)
61 {
62 l = len;
63 if (l > sizeof(skip_buf))
64 l = sizeof(skip_buf);
65 count = fz_read(ctx, stm, skip_buf, l);
66 total += count;
67 if (count < l)
68 break;
69 len -= count;
70 }
71 return total;
72 }
73
74 fz_buffer *
75 fz_read_all(fz_context *ctx, fz_stream *stm, size_t initial)
76 {
77 return fz_read_best(ctx, stm, initial, NULL, 0);
78 }
79
80 fz_buffer *
81 fz_read_best(fz_context *ctx, fz_stream *stm, size_t initial, int *truncated, size_t worst_case)
82 {
83 fz_buffer *buf = NULL;
84 int check_bomb = (initial > 0);
85 size_t n;
86
87 fz_var(buf);
88
89 if (truncated)
90 *truncated = 0;
91
92 if (worst_case == 0)
93 worst_case = initial * 200;
94 if (worst_case < MIN_BOMB)
95 worst_case = MIN_BOMB;
96
97 fz_try(ctx)
98 {
99 if (initial < 1024)
100 initial = 1024;
101
102 buf = fz_new_buffer(ctx, initial+1);
103
104 while (1)
105 {
106 if (buf->len == buf->cap)
107 fz_grow_buffer(ctx, buf);
108
109 if (check_bomb && buf->len > worst_case)
110 fz_throw(ctx, FZ_ERROR_FORMAT, "compression bomb detected");
111
112 n = fz_read(ctx, stm, buf->data + buf->len, buf->cap - buf->len);
113 if (n == 0)
114 break;
115
116 buf->len += n;
117 }
118 }
119 fz_catch(ctx)
120 {
121 if (fz_caught(ctx) == FZ_ERROR_TRYLATER || fz_caught(ctx) == FZ_ERROR_SYSTEM)
122 {
123 fz_drop_buffer(ctx, buf);
124 fz_rethrow(ctx);
125 }
126 if (truncated)
127 {
128 *truncated = 1;
129 fz_report_error(ctx);
130 }
131 else
132 {
133 fz_drop_buffer(ctx, buf);
134 fz_rethrow(ctx);
135 }
136 }
137
138 return buf;
139 }
140
141 char *
142 fz_read_line(fz_context *ctx, fz_stream *stm, char *mem, size_t n)
143 {
144 char *s = mem;
145 int c = EOF;
146 while (n > 1)
147 {
148 c = fz_read_byte(ctx, stm);
149 if (c == EOF)
150 break;
151 if (c == '\r') {
152 c = fz_peek_byte(ctx, stm);
153 if (c == '\n')
154 fz_read_byte(ctx, stm);
155 break;
156 }
157 if (c == '\n')
158 break;
159 *s++ = c;
160 n--;
161 }
162 if (n)
163 *s = '\0';
164 return (s == mem && c == EOF) ? NULL : mem;
165 }
166
167 int64_t
168 fz_tell(fz_context *ctx, fz_stream *stm)
169 {
170 return stm->pos - (stm->wp - stm->rp);
171 }
172
173 void
174 fz_seek(fz_context *ctx, fz_stream *stm, int64_t offset, int whence)
175 {
176 stm->avail = 0; /* Reset bit reading */
177 if (stm->seek)
178 {
179 if (whence == 1)
180 {
181 offset += fz_tell(ctx, stm);
182 whence = 0;
183 }
184 stm->seek(ctx, stm, offset, whence);
185 stm->eof = 0;
186 }
187 else if (whence != 2)
188 {
189 if (whence == 0)
190 offset -= fz_tell(ctx, stm);
191 if (offset < 0)
192 fz_warn(ctx, "cannot seek backwards");
193 /* dog slow, but rare enough */
194 while (offset-- > 0)
195 {
196 if (fz_read_byte(ctx, stm) == EOF)
197 {
198 fz_warn(ctx, "seek failed");
199 break;
200 }
201 }
202 }
203 else
204 fz_warn(ctx, "cannot seek");
205 }
206
207 fz_buffer *
208 fz_read_file(fz_context *ctx, const char *filename)
209 {
210 fz_stream *stm;
211 fz_buffer *buf = NULL;
212
213 fz_var(buf);
214
215 stm = fz_open_file(ctx, filename);
216 fz_try(ctx)
217 {
218 buf = fz_read_all(ctx, stm, 0);
219 }
220 fz_always(ctx)
221 {
222 fz_drop_stream(ctx, stm);
223 }
224 fz_catch(ctx)
225 {
226 fz_rethrow(ctx);
227 }
228
229 return buf;
230 }
231
232 fz_buffer *
233 fz_try_read_file(fz_context *ctx, const char *filename)
234 {
235 fz_stream *stm;
236 fz_buffer *buf = NULL;
237
238 fz_var(buf);
239
240 stm = fz_try_open_file(ctx, filename);
241 if (stm == NULL)
242 return NULL;
243 fz_try(ctx)
244 {
245 buf = fz_read_all(ctx, stm, 0);
246 }
247 fz_always(ctx)
248 {
249 fz_drop_stream(ctx, stm);
250 }
251 fz_catch(ctx)
252 {
253 fz_rethrow(ctx);
254 }
255
256 return buf;
257 }
258
259 uint16_t fz_read_uint16(fz_context *ctx, fz_stream *stm)
260 {
261 int a = fz_read_byte(ctx, stm);
262 int b = fz_read_byte(ctx, stm);
263 if (a == EOF || b == EOF)
264 fz_throw(ctx, FZ_ERROR_FORMAT, "premature end of file in int16");
265 return ((uint16_t)a<<8) | ((uint16_t)b);
266 }
267
268 uint32_t fz_read_uint24(fz_context *ctx, fz_stream *stm)
269 {
270 int a = fz_read_byte(ctx, stm);
271 int b = fz_read_byte(ctx, stm);
272 int c = fz_read_byte(ctx, stm);
273 if (a == EOF || b == EOF || c == EOF)
274 fz_throw(ctx, FZ_ERROR_FORMAT, "premature end of file in int24");
275 return ((uint32_t)a<<16) | ((uint32_t)b<<8) | ((uint32_t)c);
276 }
277
278 uint32_t fz_read_uint32(fz_context *ctx, fz_stream *stm)
279 {
280 int a = fz_read_byte(ctx, stm);
281 int b = fz_read_byte(ctx, stm);
282 int c = fz_read_byte(ctx, stm);
283 int d = fz_read_byte(ctx, stm);
284 if (a == EOF || b == EOF || c == EOF || d == EOF)
285 fz_throw(ctx, FZ_ERROR_FORMAT, "premature end of file in int32");
286 return ((uint32_t)a<<24) | ((uint32_t)b<<16) | ((uint32_t)c<<8) | ((uint32_t)d);
287 }
288
289 uint64_t fz_read_uint64(fz_context *ctx, fz_stream *stm)
290 {
291 int a = fz_read_byte(ctx, stm);
292 int b = fz_read_byte(ctx, stm);
293 int c = fz_read_byte(ctx, stm);
294 int d = fz_read_byte(ctx, stm);
295 int e = fz_read_byte(ctx, stm);
296 int f = fz_read_byte(ctx, stm);
297 int g = fz_read_byte(ctx, stm);
298 int h = fz_read_byte(ctx, stm);
299 if (a == EOF || b == EOF || c == EOF || d == EOF || e == EOF || f == EOF || g == EOF || h == EOF)
300 fz_throw(ctx, FZ_ERROR_FORMAT, "premature end of file in int64");
301 return ((uint64_t)a<<56) | ((uint64_t)b<<48) | ((uint64_t)c<<40) | ((uint64_t)d<<32)
302 | ((uint64_t)e<<24) | ((uint64_t)f<<16) | ((uint64_t)g<<8) | ((uint64_t)h);
303 }
304
305 uint16_t fz_read_uint16_le(fz_context *ctx, fz_stream *stm)
306 {
307 int a = fz_read_byte(ctx, stm);
308 int b = fz_read_byte(ctx, stm);
309 if (a == EOF || b == EOF)
310 fz_throw(ctx, FZ_ERROR_FORMAT, "premature end of file in int16");
311 return ((uint16_t)a) | ((uint16_t)b<<8);
312 }
313
314 uint32_t fz_read_uint24_le(fz_context *ctx, fz_stream *stm)
315 {
316 int a = fz_read_byte(ctx, stm);
317 int b = fz_read_byte(ctx, stm);
318 int c = fz_read_byte(ctx, stm);
319 if (a == EOF || b == EOF || c == EOF)
320 fz_throw(ctx, FZ_ERROR_FORMAT, "premature end of file in int24");
321 return ((uint32_t)a) | ((uint32_t)b<<8) | ((uint32_t)c<<16);
322 }
323
324 uint32_t fz_read_uint32_le(fz_context *ctx, fz_stream *stm)
325 {
326 int a = fz_read_byte(ctx, stm);
327 int b = fz_read_byte(ctx, stm);
328 int c = fz_read_byte(ctx, stm);
329 int d = fz_read_byte(ctx, stm);
330 if (a == EOF || b == EOF || c == EOF || d == EOF)
331 fz_throw(ctx, FZ_ERROR_FORMAT, "premature end of file in int32");
332 return ((uint32_t)a) | ((uint32_t)b<<8) | ((uint32_t)c<<16) | ((uint32_t)d<<24);
333 }
334
335 uint64_t fz_read_uint64_le(fz_context *ctx, fz_stream *stm)
336 {
337 int a = fz_read_byte(ctx, stm);
338 int b = fz_read_byte(ctx, stm);
339 int c = fz_read_byte(ctx, stm);
340 int d = fz_read_byte(ctx, stm);
341 int e = fz_read_byte(ctx, stm);
342 int f = fz_read_byte(ctx, stm);
343 int g = fz_read_byte(ctx, stm);
344 int h = fz_read_byte(ctx, stm);
345 if (a == EOF || b == EOF || c == EOF || d == EOF || e == EOF || f == EOF || g == EOF || h == EOF)
346 fz_throw(ctx, FZ_ERROR_FORMAT, "premature end of file in int64");
347 return ((uint64_t)a) | ((uint64_t)b<<8) | ((uint64_t)c<<16) | ((uint64_t)d<<24)
348 | ((uint64_t)e<<32) | ((uint64_t)f<<40) | ((uint64_t)g<<48) | ((uint64_t)h<<56);
349 }
350
351 int16_t fz_read_int16(fz_context *ctx, fz_stream *stm) { return (int16_t)fz_read_uint16(ctx, stm); }
352 int32_t fz_read_int32(fz_context *ctx, fz_stream *stm) { return (int32_t)fz_read_uint32(ctx, stm); }
353 int64_t fz_read_int64(fz_context *ctx, fz_stream *stm) { return (int64_t)fz_read_uint64(ctx, stm); }
354
355 int16_t fz_read_int16_le(fz_context *ctx, fz_stream *stm) { return (int16_t)fz_read_uint16_le(ctx, stm); }
356 int32_t fz_read_int32_le(fz_context *ctx, fz_stream *stm) { return (int32_t)fz_read_uint32_le(ctx, stm); }
357 int64_t fz_read_int64_le(fz_context *ctx, fz_stream *stm) { return (int64_t)fz_read_uint64_le(ctx, stm); }
358
359 float
360 fz_read_float_le(fz_context *ctx, fz_stream *stm)
361 {
362 union {float f;int32_t i;} u;
363
364 u.i = fz_read_int32_le(ctx, stm);
365 return u.f;
366 }
367
368 float
369 fz_read_float(fz_context *ctx, fz_stream *stm)
370 {
371 union {float f;int32_t i;} u;
372
373 u.i = fz_read_int32(ctx, stm);
374 return u.f;
375 }
376
377 void fz_read_string(fz_context *ctx, fz_stream *stm, char *buffer, int len)
378 {
379 int c;
380 do
381 {
382 if (len <= 0)
383 fz_throw(ctx, FZ_ERROR_FORMAT, "Buffer overrun reading null terminated string");
384
385 c = fz_read_byte(ctx, stm);
386 if (c == EOF)
387 fz_throw(ctx, FZ_ERROR_FORMAT, "EOF reading null terminated string");
388 *buffer++ = c;
389 len--;
390 }
391 while (c != 0);
392 }
393
394 int fz_read_rune(fz_context *ctx, fz_stream *in)
395 {
396 uint8_t d, e, f;
397 int x;
398 int c = fz_read_byte(ctx, in);
399 if (c == EOF)
400 return EOF;
401
402 if ((c & 0xF8) == 0xF0)
403 {
404 x = fz_read_byte(ctx, in);
405 if (x == EOF)
406 return 0xFFFD;
407 d = (uint8_t)x;
408 c = (c & 7)<<18;
409 if ((d & 0xC0) == 0x80)
410 {
411 x = fz_read_byte(ctx, in);
412 if (x == EOF)
413 return 0xFFFD;
414 e = (uint8_t)x;
415 c += (d & 0x3f)<<12;
416 if ((e & 0xC0) == 0x80)
417 {
418 x = fz_read_byte(ctx, in);
419 if (x == EOF)
420 return 0xFFFD;
421 f = (uint8_t)x;
422 c += (e & 0x3f)<<6;
423 if ((f & 0xC0) == 0x80)
424 {
425 c += f & 0x3f;
426 }
427 else
428 goto bad_byte;
429 }
430 else
431 goto bad_byte;
432 }
433 else
434 goto bad_byte;
435 }
436 else if ((c & 0xF0) == 0xE0)
437 {
438 x = fz_read_byte(ctx, in);
439 if (x == EOF)
440 return 0xFFFD;
441 d = (uint8_t)x;
442 c = (c & 15)<<12;
443 if ((d & 0xC0) == 0x80)
444 {
445 x = fz_read_byte(ctx, in);
446 if (x == EOF)
447 return 0xFFFD;
448 e = (uint8_t)x;
449 c += (d & 0x3f)<<6;
450 if ((e & 0xC0) == 0x80)
451 {
452 c += e & 0x3f;
453 }
454 else
455 goto bad_byte;
456 }
457 else
458 goto bad_byte;
459 }
460 else if ((c & 0xE0) == 0xC0)
461 {
462 x = fz_read_byte(ctx, in);
463 if (x == EOF)
464 return 0xFFFD;
465 d = (uint8_t)x;
466 c = (c & 31)<<6;
467 if ((d & 0xC0) == 0x80)
468 {
469 c += d & 0x3f;
470 }
471 else
472 fz_unread_byte(ctx, in);
473 }
474 else if ((c & 0xc0) == 0x80)
475 {
476 bad_byte:
477 fz_unread_byte(ctx, in);
478 return 0xFFFD;
479 }
480
481 return c;
482
483 }
484
485 int fz_read_utf16_le(fz_context *ctx, fz_stream *stm)
486 {
487 int c = fz_read_byte(ctx, stm);
488 int d, e;
489
490 if (c == EOF)
491 return EOF;
492
493 d = fz_read_byte(ctx, stm);
494 if (d == EOF)
495 return c; /* Might be wrong, but the best we can do. */
496
497 c |= d<<8;
498
499 /* If it's not a surrogate, we're done. */
500 if (c < 0xd800 || c >= 0xe000)
501 return c;
502
503 /* It *ought* to be a leading (high) surrogate. If it's not,
504 * then we're in trouble. */
505 if (c >= 0xdc00)
506 return 0x10000 + c - 0xdc00; /* Imagine the high surrogate was 0. */
507
508 /* Our stream abstraction only enables us to peek 1 byte ahead, and we'd need
509 * 2 to tell if it was a low surrogate. Just assume it is. */
510 d = fz_read_byte(ctx, stm);
511 if (d == EOF)
512 {
513 /* Failure! Imagine the trailing surrogate was 0. */
514 return 0x10000 + ((c - 0xd800)<<10);
515 }
516 e = fz_read_byte(ctx, stm);
517 if (e == EOF)
518 {
519 e = 0xDC; /* Fudge a low surrogate */
520 }
521
522 d |= e<<8;
523
524 if (d < 0xdc00 || d >= 0xe000)
525 {
526 /* Bad encoding! This is nasty, because we've eaten 2 bytes from the
527 * stream which ideally we would not have. Serves you right for
528 * having a broken stream. */
529 return 0x10000 + ((c - 0xd800)<<10); /* Imagine the high surrogate was 0. */
530 }
531
532 c -= 0xd800;
533 d -= 0xdc00;
534
535 return 0x10000 + (c<<10) + d;
536 }
537
538 int fz_read_utf16_be(fz_context *ctx, fz_stream *stm)
539 {
540 int c = fz_read_byte(ctx, stm);
541 int d, e;
542
543 if (c == EOF)
544 return EOF;
545
546 d = fz_read_byte(ctx, stm);
547 if (d == EOF)
548 return c; /* Might be wrong, but the best we can do. */
549
550 c = (c<<8) | d;
551
552 /* If it's not a surrogate, we're done. */
553 if (c < 0xd800 || c >= 0xe000)
554 return c;
555
556 /* It *ought* to be a leading (high) surrogate. If it's not,
557 * then we're in trouble. */
558 if (c >= 0xdc00)
559 return 0x10000 + c - 0xdc00; /* Imagine the high surrogate was 0. */
560
561 /* Our stream abstraction only enables us to peek 1 byte ahead, and we'd need
562 * 2 to tell if it was a low surrogate. Just assume it is. */
563 d = fz_read_byte(ctx, stm);
564 if (d == EOF)
565 {
566 /* Failure! Imagine the trailing surrogate was 0. */
567 return 0x10000 + ((c - 0xd800)<<10);
568 }
569
570 /* The next byte ought to be the start of a trailing (low) surrogate. */
571 if (d < 0xdc || d >= 0xe0)
572 {
573 /* It wasn't. Put the byte back. */
574 fz_unread_byte(ctx, stm);
575 d = 0xdc00; /* Pretend it was a 0 surrogate. */
576 }
577 else
578 {
579 e = fz_read_byte(ctx, stm);
580 if (e == EOF)
581 {
582 e = 0;
583 }
584 d = (d<<8) | e;
585 }
586
587 c -= 0xd800;
588 d -= 0xdc00;
589
590 return 0x10000 + (c<<10) + d;
591 }