comparison mupdf-source/source/fitz/buffer.c @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 // Copyright (C) 2004-2024 Artifex Software, Inc.
2 //
3 // This file is part of MuPDF.
4 //
5 // MuPDF is free software: you can redistribute it and/or modify it under the
6 // terms of the GNU Affero General Public License as published by the Free
7 // Software Foundation, either version 3 of the License, or (at your option)
8 // any later version.
9 //
10 // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
11 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
13 // details.
14 //
15 // You should have received a copy of the GNU Affero General Public License
16 // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
17 //
18 // Alternative licensing terms are available from the licensor.
19 // For commercial licensing, see <https://www.artifex.com/> or contact
20 // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
21 // CA 94129, USA, for further information.
22
23 #include "mupdf/fitz.h"
24
25 #include <string.h>
26 #include <stdarg.h>
27
28 fz_buffer *
29 fz_new_buffer(fz_context *ctx, size_t size)
30 {
31 fz_buffer *b;
32
33 size = size > 1 ? size : 16;
34
35 b = fz_malloc_struct(ctx, fz_buffer);
36 b->refs = 1;
37 fz_try(ctx)
38 {
39 b->data = Memento_label(fz_malloc(ctx, size), "fz_buffer_data");
40 }
41 fz_catch(ctx)
42 {
43 fz_free(ctx, b);
44 fz_rethrow(ctx);
45 }
46 b->cap = size;
47 b->len = 0;
48 b->unused_bits = 0;
49
50 return b;
51 }
52
53 fz_buffer *
54 fz_new_buffer_from_data(fz_context *ctx, unsigned char *data, size_t size)
55 {
56 fz_buffer *b = NULL;
57
58 fz_try(ctx)
59 {
60 b = fz_malloc_struct(ctx, fz_buffer);
61 b->refs = 1;
62 b->data = data;
63 b->cap = size;
64 b->len = size;
65 b->unused_bits = 0;
66 }
67 fz_catch(ctx)
68 {
69 fz_free(ctx, data);
70 fz_rethrow(ctx);
71 }
72
73 return b;
74 }
75
76 fz_buffer *
77 fz_new_buffer_from_shared_data(fz_context *ctx, const unsigned char *data, size_t size)
78 {
79 fz_buffer *b;
80
81 b = fz_malloc_struct(ctx, fz_buffer);
82 b->refs = 1;
83 b->data = (unsigned char *)data; /* cast away const */
84 b->cap = size;
85 b->len = size;
86 b->unused_bits = 0;
87 b->shared = 1;
88
89 return b;
90 }
91
92 fz_buffer *
93 fz_new_buffer_from_copied_data(fz_context *ctx, const unsigned char *data, size_t size)
94 {
95 fz_buffer *b;
96 if (size > 0 && data == NULL)
97 fz_throw(ctx, FZ_ERROR_ARGUMENT, "no data provided");
98 b = fz_new_buffer(ctx, size);
99 b->len = size;
100 memcpy(b->data, data, size);
101 return b;
102 }
103
104 fz_buffer *fz_clone_buffer(fz_context *ctx, fz_buffer *buf)
105 {
106 return fz_new_buffer_from_copied_data(ctx, buf ? buf->data : NULL, buf ? buf->len : 0);
107 }
108
109 static inline int iswhite(int a)
110 {
111 switch (a) {
112 case '\n': case '\r': case '\t': case ' ':
113 case '\f':
114 return 1;
115 }
116 return 0;
117 }
118
119 fz_buffer *
120 fz_new_buffer_from_base64(fz_context *ctx, const char *data, size_t size)
121 {
122 fz_buffer *out = fz_new_buffer(ctx, size > 0 ? size : strlen(data));
123 const char *end = data + (size > 0 ? size : strlen(data));
124 const char *s = data;
125 uint32_t buf = 0;
126 int bits = 0;
127
128 /* This is https://infra.spec.whatwg.org/#forgiving-base64-decode
129 * but even more relaxed. We allow any number of trailing '=' code
130 * points and instead of returning failure on invalid characters, we
131 * warn and truncate.
132 */
133
134 while (s < end && iswhite(*s))
135 s++;
136 while (s < end && iswhite(end[-1]))
137 end--;
138 while (s < end && end[-1] == '=')
139 end--;
140
141 fz_try(ctx)
142 {
143 while (s < end)
144 {
145 int c = *s++;
146
147 if (c >= 'A' && c <= 'Z')
148 c = c - 'A';
149 else if (c >= 'a' && c <= 'z')
150 c = c - 'a' + 26;
151 else if (c >= '0' && c <= '9')
152 c = c - '0' + 52;
153 else if (c == '+')
154 c = 62;
155 else if (c == '/')
156 c = 63;
157 else if (iswhite(c))
158 continue;
159 else
160 {
161 fz_warn(ctx, "invalid character in base64");
162 break;
163 }
164
165 buf <<= 6;
166 buf |= c & 0x3f;
167 bits += 6;
168
169 if (bits == 24)
170 {
171 fz_append_byte(ctx, out, buf >> 16);
172 fz_append_byte(ctx, out, buf >> 8);
173 fz_append_byte(ctx, out, buf >> 0);
174 bits = 0;
175 }
176 }
177
178 if (bits == 18)
179 {
180 fz_append_byte(ctx, out, buf >> 10);
181 fz_append_byte(ctx, out, buf >> 2);
182 }
183 else if (bits == 12)
184 {
185 fz_append_byte(ctx, out, buf >> 4);
186 }
187 }
188 fz_catch(ctx)
189 {
190 fz_drop_buffer(ctx, out);
191 fz_rethrow(ctx);
192 }
193 return out;
194 }
195
196 fz_buffer *
197 fz_keep_buffer(fz_context *ctx, fz_buffer *buf)
198 {
199 return fz_keep_imp(ctx, buf, &buf->refs);
200 }
201
202 void
203 fz_drop_buffer(fz_context *ctx, fz_buffer *buf)
204 {
205 if (fz_drop_imp(ctx, buf, &buf->refs))
206 {
207 if (!buf->shared)
208 fz_free(ctx, buf->data);
209 fz_free(ctx, buf);
210 }
211 }
212
213 void
214 fz_resize_buffer(fz_context *ctx, fz_buffer *buf, size_t size)
215 {
216 if (buf->shared)
217 fz_throw(ctx, FZ_ERROR_ARGUMENT, "cannot resize a buffer with shared storage");
218 buf->data = fz_realloc(ctx, buf->data, size);
219 buf->cap = size;
220 if (buf->len > buf->cap)
221 buf->len = buf->cap;
222 }
223
224 void
225 fz_grow_buffer(fz_context *ctx, fz_buffer *buf)
226 {
227 size_t newsize = (buf->cap * 3) / 2;
228 if (newsize == 0)
229 newsize = 256;
230 fz_resize_buffer(ctx, buf, newsize);
231 }
232
233 static void
234 fz_ensure_buffer(fz_context *ctx, fz_buffer *buf, size_t min)
235 {
236 size_t newsize = buf->cap;
237 if (newsize < 16)
238 newsize = 16;
239 while (newsize < min)
240 {
241 newsize = (newsize * 3) / 2;
242 }
243 fz_resize_buffer(ctx, buf, newsize);
244 }
245
246 void
247 fz_trim_buffer(fz_context *ctx, fz_buffer *buf)
248 {
249 if (buf->cap > buf->len+1)
250 fz_resize_buffer(ctx, buf, buf->len);
251 }
252
253 void
254 fz_clear_buffer(fz_context *ctx, fz_buffer *buf)
255 {
256 buf->len = 0;
257 }
258
259 void
260 fz_terminate_buffer(fz_context *ctx, fz_buffer *buf)
261 {
262 /* ensure that there is a zero-byte after the end of the data */
263 if (buf->len + 1 > buf->cap)
264 fz_grow_buffer(ctx, buf);
265 buf->data[buf->len] = 0;
266 }
267
268 size_t
269 fz_buffer_storage(fz_context *ctx, fz_buffer *buf, unsigned char **datap)
270 {
271 if (datap)
272 *datap = (buf ? buf->data : NULL);
273 return (buf ? buf->len : 0);
274 }
275
276 const char *
277 fz_string_from_buffer(fz_context *ctx, fz_buffer *buf)
278 {
279 if (!buf)
280 return "";
281 fz_terminate_buffer(ctx, buf);
282 return (const char *)buf->data;
283 }
284
285 size_t
286 fz_buffer_extract(fz_context *ctx, fz_buffer *buf, unsigned char **datap)
287 {
288 size_t len = buf ? buf->len : 0;
289 *datap = (buf ? buf->data : NULL);
290
291 if (buf)
292 {
293 buf->data = NULL;
294 buf->len = 0;
295 }
296 return len;
297 }
298
299 fz_buffer *
300 fz_slice_buffer(fz_context *ctx, fz_buffer *buf, int64_t start, int64_t end)
301 {
302 unsigned char *src = NULL;
303 size_t size = fz_buffer_storage(ctx, buf, &src);
304 size_t s, e;
305
306 if (start < 0)
307 start += size;
308 if (end < 0)
309 end += size;
310
311 s = fz_clamp64(start, 0, size);
312 e = fz_clamp64(end, 0, size);
313
314 if (s == size || e <= s)
315 return fz_new_buffer(ctx, 0);
316
317 return fz_new_buffer_from_copied_data(ctx, &src[s], e - s);
318 }
319
320 void
321 fz_append_buffer(fz_context *ctx, fz_buffer *buf, fz_buffer *extra)
322 {
323 if (buf->cap - buf->len < extra->len)
324 {
325 buf->data = fz_realloc(ctx, buf->data, buf->len + extra->len);
326 buf->cap = buf->len + extra->len;
327 }
328
329 memcpy(buf->data + buf->len, extra->data, extra->len);
330 buf->len += extra->len;
331 }
332
333 void
334 fz_append_data(fz_context *ctx, fz_buffer *buf, const void *data, size_t len)
335 {
336 if (buf->len + len > buf->cap)
337 fz_ensure_buffer(ctx, buf, buf->len + len);
338 memcpy(buf->data + buf->len, data, len);
339 buf->len += len;
340 buf->unused_bits = 0;
341 }
342
343 void
344 fz_append_string(fz_context *ctx, fz_buffer *buf, const char *data)
345 {
346 size_t len = strlen(data);
347 if (buf->len + len > buf->cap)
348 fz_ensure_buffer(ctx, buf, buf->len + len);
349 memcpy(buf->data + buf->len, data, len);
350 buf->len += len;
351 buf->unused_bits = 0;
352 }
353
354 void
355 fz_append_byte(fz_context *ctx, fz_buffer *buf, int val)
356 {
357 if (buf->len + 1 > buf->cap)
358 fz_grow_buffer(ctx, buf);
359 buf->data[buf->len++] = val;
360 buf->unused_bits = 0;
361 }
362
363 void
364 fz_append_rune(fz_context *ctx, fz_buffer *buf, int c)
365 {
366 char data[10];
367 int len = fz_runetochar(data, c);
368 if (buf->len + len > buf->cap)
369 fz_ensure_buffer(ctx, buf, buf->len + len);
370 memcpy(buf->data + buf->len, data, len);
371 buf->len += len;
372 buf->unused_bits = 0;
373 }
374
375 void
376 fz_append_int32_be(fz_context *ctx, fz_buffer *buf, int x)
377 {
378 fz_append_byte(ctx, buf, (x >> 24) & 0xFF);
379 fz_append_byte(ctx, buf, (x >> 16) & 0xFF);
380 fz_append_byte(ctx, buf, (x >> 8) & 0xFF);
381 fz_append_byte(ctx, buf, (x) & 0xFF);
382 }
383
384 void
385 fz_append_int16_be(fz_context *ctx, fz_buffer *buf, int x)
386 {
387 fz_append_byte(ctx, buf, (x >> 8) & 0xFF);
388 fz_append_byte(ctx, buf, (x) & 0xFF);
389 }
390
391 void
392 fz_append_int32_le(fz_context *ctx, fz_buffer *buf, int x)
393 {
394 fz_append_byte(ctx, buf, (x)&0xFF);
395 fz_append_byte(ctx, buf, (x>>8)&0xFF);
396 fz_append_byte(ctx, buf, (x>>16)&0xFF);
397 fz_append_byte(ctx, buf, (x>>24)&0xFF);
398 }
399
400 void
401 fz_append_int16_le(fz_context *ctx, fz_buffer *buf, int x)
402 {
403 fz_append_byte(ctx, buf, (x)&0xFF);
404 fz_append_byte(ctx, buf, (x>>8)&0xFF);
405 }
406
407 void
408 fz_append_bits(fz_context *ctx, fz_buffer *buf, int val, int bits)
409 {
410 int shift;
411
412 /* Throughout this code, the invariant is that we need to write the
413 * bottom 'bits' bits of 'val' into the stream. On entry we assume
414 * that val & ((1<<bits)-1) == val, but we do not rely on this after
415 * having written the first partial byte. */
416
417 if (bits == 0)
418 return;
419
420 /* buf->len always covers all the bits in the buffer, including
421 * any unused ones in the last byte, which will always be 0.
422 * buf->unused_bits = the number of unused bits in the last byte.
423 */
424
425 /* Find the amount we need to shift val up by so that it will be in
426 * the correct position to be inserted into any existing data byte. */
427 shift = (buf->unused_bits - bits);
428
429 /* Extend the buffer as required before we start; that way we never
430 * fail part way during writing. If shift < 0, then we'll need -shift
431 * more bits. */
432 if (shift < 0)
433 {
434 int extra = (7-shift)>>3; /* Round up to bytes */
435 fz_ensure_buffer(ctx, buf, buf->len + extra);
436 }
437
438 /* Write any bits that will fit into the existing byte */
439 if (buf->unused_bits)
440 {
441 buf->data[buf->len-1] |= (shift >= 0 ? (((unsigned int)val)<<shift) : (((unsigned int)val)>>-shift));
442 if (shift >= 0)
443 {
444 /* If we were shifting up, we're done. */
445 buf->unused_bits -= bits;
446 return;
447 }
448 /* The number of bits left to write is the number that didn't
449 * fit in this first byte. */
450 bits = -shift;
451 }
452
453 /* Write any whole bytes */
454 while (bits >= 8)
455 {
456 bits -= 8;
457 buf->data[buf->len++] = val>>bits;
458 }
459
460 /* Write trailing bits (with 0's in unused bits) */
461 if (bits > 0)
462 {
463 bits = 8-bits;
464 buf->data[buf->len++] = val<<bits;
465 }
466 buf->unused_bits = bits;
467 }
468
469 void
470 fz_append_bits_pad(fz_context *ctx, fz_buffer *buf)
471 {
472 buf->unused_bits = 0;
473 }
474
475 static void fz_append_emit(fz_context *ctx, void *buffer, int c)
476 {
477 fz_append_byte(ctx, buffer, c);
478 }
479
480 void
481 fz_append_printf(fz_context *ctx, fz_buffer *buffer, const char *fmt, ...)
482 {
483 va_list args;
484 va_start(args, fmt);
485 fz_format_string(ctx, buffer, fz_append_emit, fmt, args);
486 va_end(args);
487 }
488
489 void
490 fz_append_vprintf(fz_context *ctx, fz_buffer *buffer, const char *fmt, va_list args)
491 {
492 fz_format_string(ctx, buffer, fz_append_emit, fmt, args);
493 }
494
495 void
496 fz_append_pdf_string(fz_context *ctx, fz_buffer *buffer, const char *text)
497 {
498 size_t len = 2;
499 const char *s = text;
500 char *d;
501 char c;
502
503 while ((c = *s++) != 0)
504 {
505 switch (c)
506 {
507 case '\n':
508 case '\r':
509 case '\t':
510 case '\b':
511 case '\f':
512 case '(':
513 case ')':
514 case '\\':
515 len++;
516 break;
517 }
518 len++;
519 }
520
521 while(buffer->cap - buffer->len < len)
522 fz_grow_buffer(ctx, buffer);
523
524 s = text;
525 d = (char *)buffer->data + buffer->len;
526 *d++ = '(';
527 while ((c = *s++) != 0)
528 {
529 switch (c)
530 {
531 case '\n':
532 *d++ = '\\';
533 *d++ = 'n';
534 break;
535 case '\r':
536 *d++ = '\\';
537 *d++ = 'r';
538 break;
539 case '\t':
540 *d++ = '\\';
541 *d++ = 't';
542 break;
543 case '\b':
544 *d++ = '\\';
545 *d++ = 'b';
546 break;
547 case '\f':
548 *d++ = '\\';
549 *d++ = 'f';
550 break;
551 case '(':
552 *d++ = '\\';
553 *d++ = '(';
554 break;
555 case ')':
556 *d++ = '\\';
557 *d++ = ')';
558 break;
559 case '\\':
560 *d++ = '\\';
561 *d++ = '\\';
562 break;
563 default:
564 *d++ = c;
565 }
566 }
567 *d = ')';
568 buffer->len += len;
569 }
570
571 void
572 fz_md5_buffer(fz_context *ctx, fz_buffer *buffer, unsigned char digest[16])
573 {
574 fz_md5 state;
575 fz_md5_init(&state);
576 if (buffer)
577 fz_md5_update(&state, buffer->data, buffer->len);
578 fz_md5_final(&state, digest);
579 }
580
581 #ifdef TEST_BUFFER_WRITE
582
583 #define TEST_LEN 1024
584
585 void
586 fz_test_buffer_write(fz_context *ctx)
587 {
588 fz_buffer *master = fz_new_buffer(ctx, TEST_LEN);
589 fz_buffer *copy = fz_new_buffer(ctx, TEST_LEN);
590 fz_stream *stm;
591 int i, j, k;
592
593 /* Make us a dummy buffer */
594 for (i = 0; i < TEST_LEN; i++)
595 {
596 master->data[i] = rand();
597 }
598 master->len = TEST_LEN;
599
600 /* Now copy that buffer several times, checking it for validity */
601 stm = fz_open_buffer(ctx, master);
602 for (i = 0; i < 256; i++)
603 {
604 memset(copy->data, i, TEST_LEN);
605 copy->len = 0;
606 j = TEST_LEN * 8;
607 do
608 {
609 k = (rand() & 31)+1;
610 if (k > j)
611 k = j;
612 fz_append_bits(ctx, copy, fz_read_bits(ctx, stm, k), k);
613 j -= k;
614 }
615 while (j);
616
617 if (memcmp(copy->data, master->data, TEST_LEN) != 0)
618 fprintf(stderr, "Copied buffer is different!\n");
619 fz_seek(stm, 0, 0);
620 }
621 fz_drop_stream(stm);
622 fz_drop_buffer(ctx, master);
623 fz_drop_buffer(ctx, copy);
624 }
625 #endif