comparison mupdf-source/thirdparty/extract/src/buffer.c @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 #include "extract/buffer.h"
2 #include "extract/alloc.h"
3
4 #include "outf.h"
5
6 #include <assert.h>
7 #include <errno.h>
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <string.h>
11
12 /* TODO: Check whether the whole complexity of the cache is actually justified. */
13
14 struct extract_buffer_t
15 {
16 /* First member must be extract_buffer_cache_t - required by inline
17 implementations of extract_buffer_read() and extract_buffer_write(). */
18 extract_buffer_cache_t cache;
19 extract_alloc_t *alloc;
20 void *handle;
21 extract_buffer_fn_read *fn_read;
22 extract_buffer_fn_write *fn_write;
23 extract_buffer_fn_cache *fn_cache;
24 extract_buffer_fn_close *fn_close;
25 size_t pos; /* Does not include bytes currently read/written to cache. */
26 };
27
28
29 extract_alloc_t *extract_buffer_alloc(extract_buffer_t* buffer)
30 {
31 return buffer->alloc;
32 }
33
34
35 int extract_buffer_open(extract_alloc_t *alloc,
36 void *handle,
37 extract_buffer_fn_read *fn_read,
38 extract_buffer_fn_write *fn_write,
39 extract_buffer_fn_cache *fn_cache,
40 extract_buffer_fn_close *fn_close,
41 extract_buffer_t **o_buffer)
42 {
43 extract_buffer_t *buffer;
44
45 if (extract_malloc(alloc, &buffer, sizeof(*buffer)))
46 return -1;
47
48 buffer->alloc = alloc;
49 buffer->handle = handle;
50 buffer->fn_read = fn_read;
51 buffer->fn_write = fn_write;
52 buffer->fn_cache = fn_cache;
53 buffer->fn_close = fn_close;
54 buffer->cache.cache = NULL;
55 buffer->cache.numbytes = 0;
56 buffer->cache.pos = 0;
57 buffer->pos = 0;
58
59 *o_buffer = buffer;
60
61 return 0;
62 }
63
64
65 size_t extract_buffer_pos(extract_buffer_t *buffer)
66 {
67 size_t ret = buffer->pos;
68
69 if (buffer->cache.cache)
70 ret += buffer->cache.pos;
71
72 return ret;
73 }
74
75
76 /* Send contents of cache to fn_write() using a loop to cope with short
77 writes. Returns with *o_actual containing the number of bytes successfully
78 sent, and buffer->cache.{cache,numbytes,pos} all set to zero.
79
80 If we return zero but *actual is less than original buffer->cache.numbytes,
81 then fn_write returned EOF. */
82 static int cache_flush(extract_buffer_t *buffer, size_t *o_actual)
83 {
84 int e = -1;
85 size_t p = 0;
86
87 assert(buffer->cache.pos <= buffer->cache.numbytes);
88
89 while (p != buffer->cache.pos)
90 {
91 size_t actual;
92 if (buffer->fn_write(
93 buffer->handle,
94 (char*) buffer->cache.cache + p,
95 buffer->cache.pos - p,
96 &actual
97 )) goto end;
98 buffer->pos += actual;
99 p += actual;
100 if (actual == 0)
101 {
102 /* EOF while flushing cache. We set <pos> to the
103 * number of bytes in data..+numbytes that we know
104 * have been successfully handled by buffer->fn_write().
105 * This can be negative if we failed to flush
106 * earlier data. */
107 outf("*** buffer->fn_write() EOF\n");
108 e = 0;
109 goto end;
110 }
111 }
112 outfx("cache flush, buffer->pos=%i p=buffer->cache.pos=%i\n",
113 buffer->pos, p);
114 buffer->cache.cache = NULL;
115 buffer->cache.numbytes = 0;
116 buffer->cache.pos = 0;
117
118 e = 0;
119 end:
120 *o_actual = p;
121
122 return e;
123 }
124
125 int extract_buffer_close(extract_buffer_t **p_buffer)
126 {
127 extract_buffer_t *buffer = *p_buffer;
128 int e = -1;
129
130 if (buffer == NULL)
131 return 0;
132
133 if (buffer->cache.cache && buffer->fn_write)
134 {
135 /* Flush cache. */
136 size_t cache_bytes = buffer->cache.pos;
137 size_t actual;
138 if (cache_flush(buffer, &actual)) goto end;
139 if (actual != cache_bytes)
140 {
141 e = 1;
142 goto end;
143 }
144 }
145
146 if (buffer->fn_close)
147 buffer->fn_close(buffer->handle);
148
149 e = 0;
150 end:
151 extract_free(buffer->alloc, &buffer);
152 *p_buffer = NULL;
153
154 return e;
155 }
156
157 static int simple_cache(void *handle, void **o_cache, size_t *o_numbytes)
158 {
159 /* Indicate EOF. */
160 (void) handle;
161 *o_cache = NULL;
162 *o_numbytes = 0;
163
164 return 0;
165 }
166
167 int extract_buffer_open_simple(extract_alloc_t *alloc,
168 const void *data,
169 size_t numbytes,
170 void *handle,
171 extract_buffer_fn_close *fn_close,
172 extract_buffer_t **o_buffer)
173 {
174 extract_buffer_t *buffer;
175
176 if (extract_malloc(alloc, &buffer, sizeof(*buffer)))
177 return -1;
178
179 /* We need cast away the const here. data[] will be written-to if caller
180 uses us as a write buffer. */
181 buffer->alloc = alloc;
182 buffer->cache.cache = (void*) data;
183 buffer->cache.numbytes = numbytes;
184 buffer->cache.pos = 0;
185 buffer->handle = handle;
186 buffer->fn_read = NULL;
187 buffer->fn_write = NULL;
188 buffer->fn_cache = simple_cache;
189 buffer->fn_close = fn_close;
190 *o_buffer = buffer;
191
192 return 0;
193 }
194
195
196 /* Implementation of extract_buffer_file*. */
197
198 static int file_read(void *handle, void *data, size_t numbytes, size_t *o_actual)
199 {
200 FILE *file = handle;
201 size_t n = fread(data, 1, numbytes, file);
202
203 outfx("file=%p numbytes=%i => n=%zi", file, numbytes, n);
204 assert(o_actual); /* We are called by other extract_buffer fns, not by user code. */
205
206 *o_actual = n;
207 if (n == 0 && ferror(file))
208 {
209 errno = EIO;
210 return -1;
211 }
212
213 return 0;
214 }
215
216 static int file_write(void *handle, const void *data, size_t numbytes, size_t *o_actual)
217 {
218 FILE *file = handle;
219 size_t n = fwrite(data, 1 /*size*/, numbytes /*nmemb*/, file);
220
221 outfx("file=%p numbytes=%i => n=%zi", file, numbytes, n);
222 assert(o_actual); /* We are called by other extract_buffer fns, not by user code. */
223
224 *o_actual = n;
225 if (n == 0 && ferror(file))
226 {
227 errno = EIO;
228 return -1;
229 }
230
231 return 0;
232 }
233
234 static void file_close(void *handle)
235 {
236 FILE *file = handle;
237
238 if (file)
239 fclose(file);
240 }
241
242 int extract_buffer_open_file(extract_alloc_t *alloc, const char *path, int writable, extract_buffer_t **o_buffer)
243 {
244 int e = -1;
245 FILE *file = fopen(path, (writable) ? "wb" : "rb");
246
247 if (!file)
248 {
249 outf("failed to open '%s': %s", path, strerror(errno));
250 goto end;
251 }
252
253 if (extract_buffer_open(alloc,
254 file /*handle*/,
255 writable ? NULL : file_read,
256 writable ? file_write : NULL,
257 NULL /*fn_cache*/,
258 file_close,
259 o_buffer)) goto end;
260
261 e = 0;
262 end:
263
264 if (e)
265 {
266 if (file)
267 fclose(file);
268 *o_buffer = NULL;
269 }
270
271 return e;
272 }
273
274
275 /* Support for read/write. */
276
277 /* Called by extract_buffer_read() if not enough space in buffer->cache. */
278 int extract_buffer_read_internal(extract_buffer_t *buffer,
279 void *destination,
280 size_t numbytes,
281 size_t *o_actual)
282 {
283 int e = -1;
284 size_t pos = 0; /* Number of bytes read so far. */
285
286 /* In each iteration we either read from cache, or use buffer->fn_read()
287 directly or repopulate the cache. */
288 while (pos != numbytes)
289 {
290 size_t n = buffer->cache.numbytes - buffer->cache.pos;
291 if (n)
292 {
293 /* There is data in cache. */
294 if (n > numbytes - pos) n = numbytes - pos;
295 memcpy((char *)destination + pos, (char *)buffer->cache.cache + buffer->cache.pos, n);
296 pos += n;
297 buffer->cache.pos += n;
298 }
299 /* No data in the cache - do we use fn_read or fn_cache ? */
300 else if (buffer->fn_read &&
301 (buffer->fn_cache == NULL ||
302 (buffer->cache.numbytes && numbytes - pos > buffer->cache.numbytes / 2)))
303 {
304 /* Either there is no cache, or this read is large
305 * compared to previously-returned cache size, so
306 * let's ignore buffer->fn_cache and use
307 * buffer->fn_read() directly instead. */
308 /* Carry on looping in case of short read. */
309 size_t actual;
310 outfx("using buffer->fn_read() directly for numbytes-pos=%i\n", numbytes-pos);
311 if (buffer->fn_read(buffer->handle, (char*) destination + pos, numbytes - pos, &actual))
312 goto end;
313 if (actual == 0)
314 break; /* EOF. */
315 pos += actual;
316 buffer->pos += actual;
317 }
318 else
319 {
320 /* Repopulate cache. */
321 outfx("using buffer->fn_cache() for buffer->cache.numbytes=%i\n", buffer->cache.numbytes);
322 if (buffer->fn_cache(buffer->handle, &buffer->cache.cache, &buffer->cache.numbytes))
323 goto end;
324 buffer->pos += buffer->cache.pos;
325 buffer->cache.pos = 0;
326 if (buffer->cache.numbytes == 0)
327 break; /* EOF. */
328 }
329 }
330
331 e = 0;
332 end:
333
334 if (o_actual)
335 *o_actual = pos;
336 if (e == 0 && pos != numbytes)
337 return +1; /* EOF. */
338
339 return e;
340 }
341
342
343 int extract_buffer_write_internal(extract_buffer_t *buffer,
344 const void *source,
345 size_t numbytes,
346 size_t *o_actual)
347 {
348 int e = -1;
349 size_t pos = 0; /* Number of bytes written so far. */
350
351 if (buffer->fn_write == NULL)
352 {
353 errno = EINVAL;
354 return -1;
355 }
356
357 /* In each iteration we either write to cache, or use buffer->fn_write()
358 directly or flush the cache. */
359 while (pos != numbytes)
360 {
361 size_t n = buffer->cache.numbytes - buffer->cache.pos;
362 outfx("numbytes=%i pos=%i. buffer->cache.numbytes=%i buffer->cache.pos=%i\n",
363 numbytes, pos, buffer->cache.numbytes, buffer->cache.pos);
364 if (n)
365 {
366 /* There is space in cache for writing. */
367 if (n > numbytes - pos)
368 n = numbytes - pos;
369 outfx("writing to cache: numbytes=%i n=%i\n", numbytes, n);
370 memcpy((char*) buffer->cache.cache + buffer->cache.pos, (char*) source + pos, n);
371 pos += n;
372 buffer->cache.pos += n;
373 }
374 else
375 {
376 /* No space left in cache. */
377 outfx("cache empty. pos=%i. buffer->cache.numbytes=%i buffer->cache.pos=%i\n",
378 pos, buffer->cache.numbytes, buffer->cache.pos);
379 {
380 /* Flush the cache. */
381 size_t actual;
382 size_t b = buffer->cache.numbytes;
383 ptrdiff_t delta;
384 int ee = cache_flush(buffer, &actual);
385 assert(actual <= b);
386 delta = actual - b;
387 pos += delta;
388 buffer->pos += delta;
389 if (delta)
390 {
391 /* We have only partially flushed the cache. This
392 * is not recoverable. <pos> will be the number of
393 * bytes in source..+numbytes that have been
394 * successfully flushed, and could be negative
395 * if we failed to flush earlier data. */
396 outf("failed to flush. actual=%li delta=%li\n", (long) actual, (long) delta);
397 e = 0;
398 goto end;
399 }
400 if (ee) goto end;
401 }
402
403 if (buffer->fn_cache == NULL ||
404 (buffer->cache.numbytes && numbytes - pos > buffer->cache.numbytes / 2))
405 {
406 /* Either there is no cache, or this write is large
407 * compared to previously-returned cache size, so let's
408 * ignore the cache and call buffer->fn_write()
409 * directly instead. Carry on looping in case of short
410 * write. */
411 size_t actual;
412 if (buffer->fn_write(buffer->handle, (char*) source + pos, numbytes - pos, &actual))
413 goto end;
414 if (actual == 0)
415 break; /* EOF. */
416 outfx("direct write numbytes-pos=%i actual=%i buffer->pos=%i => %i\n",
417 numbytes-pos, actual, buffer->pos, buffer->pos + actual);
418 pos += actual;
419 buffer->pos += actual;
420 }
421 else
422 {
423 /* Repopulate cache. */
424 outfx("repopulating cache buffer->pos=%i", buffer->pos);
425 if (buffer->fn_cache(buffer->handle, &buffer->cache.cache, &buffer->cache.numbytes))
426 goto end;
427 buffer->cache.pos = 0;
428 if (buffer->cache.numbytes == 0)
429 break; /* EOF. */
430 }
431 }
432 }
433
434 e = 0;
435 end:
436
437 if (o_actual)
438 *o_actual = pos;
439 if (e == 0 && pos != numbytes)
440 e = +1; /* EOF. */
441
442 return e;
443 }
444
445
446 static int expanding_memory_buffer_write(void *handle, const void *source, size_t numbytes, size_t *o_actual)
447 {
448 /* We realloc our memory region as required. For efficiency, we also use
449 * any currently-unused region of our memory buffer as an extract_buffer
450 * cache. So we can be called either to 'flush the cache' (in which case we
451 * don't actually copy any data) or to accept data from somewhere else (in
452 * which case we need to increase the size of our memory region. */
453 extract_buffer_expanding_t *ebe = handle;
454 if ((char *)source >= ebe->data && (char *)source < ebe->data + ebe->alloc_size)
455 {
456 /* Source is inside our memory region so we are being called by
457 * extract_buffer_write_internal() to re-populate the cache. We don't
458 * actually have to copy anything. */
459 assert((size_t) ((char *)source - ebe->data) == ebe->data_size);
460 assert((size_t) ((char *)source - ebe->data + numbytes) <= ebe->alloc_size);
461 ebe->data_size += numbytes;
462 }
463 else
464 {
465 /* Data is external, so copy into our buffer. We will have already been
466 called to flush the cache. */
467 if (extract_realloc2(ebe->buffer->alloc, &ebe->data, ebe->alloc_size, ebe->data_size + numbytes))
468 return -1;
469 ebe->alloc_size = ebe->data_size + numbytes;
470 memcpy(ebe->data + ebe->data_size, source, numbytes);
471 ebe->data_size += numbytes;
472 }
473 *o_actual = numbytes;
474
475 return 0;
476 }
477
478 static int expanding_memory_buffer_cache(void *handle, void **o_cache, size_t *o_numbytes)
479 {
480 extract_buffer_expanding_t *ebe = handle;
481 size_t delta = 4096;
482
483 if (extract_realloc2(ebe->buffer->alloc, &ebe->data, ebe->alloc_size, ebe->data_size + delta))
484 return -1;
485
486 ebe->alloc_size = ebe->data_size + delta;
487 *o_cache = ebe->data + ebe->data_size;
488 *o_numbytes = delta;
489
490 return 0;
491 }
492
493 int extract_buffer_expanding_create(extract_alloc_t *alloc, extract_buffer_expanding_t *ebe)
494 {
495 ebe->data = NULL;
496 ebe->data_size = 0;
497 ebe->alloc_size = 0;
498 if (extract_buffer_open(alloc,
499 ebe,
500 NULL /*fn_read*/,
501 expanding_memory_buffer_write,
502 expanding_memory_buffer_cache,
503 NULL /*fn_close*/,
504 &ebe->buffer))
505 return -1;
506
507 return 0;
508 }