Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/extract/src/buffer.c @ 3:2c135c81b16c
MERGE: upstream PyMuPDF 1.26.4 with MuPDF 1.26.7
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:44:09 +0200 |
| parents | b50eed0cc0ef |
| children |
comparison
equal
deleted
inserted
replaced
| 0:6015a75abc2d | 3:2c135c81b16c |
|---|---|
| 1 #include "extract/buffer.h" | |
| 2 #include "extract/alloc.h" | |
| 3 | |
| 4 #include "outf.h" | |
| 5 | |
| 6 #include <assert.h> | |
| 7 #include <errno.h> | |
| 8 #include <stdio.h> | |
| 9 #include <stdlib.h> | |
| 10 #include <string.h> | |
| 11 | |
| 12 /* TODO: Check whether the whole complexity of the cache is actually justified. */ | |
| 13 | |
| 14 struct extract_buffer_t | |
| 15 { | |
| 16 /* First member must be extract_buffer_cache_t - required by inline | |
| 17 implementations of extract_buffer_read() and extract_buffer_write(). */ | |
| 18 extract_buffer_cache_t cache; | |
| 19 extract_alloc_t *alloc; | |
| 20 void *handle; | |
| 21 extract_buffer_fn_read *fn_read; | |
| 22 extract_buffer_fn_write *fn_write; | |
| 23 extract_buffer_fn_cache *fn_cache; | |
| 24 extract_buffer_fn_close *fn_close; | |
| 25 size_t pos; /* Does not include bytes currently read/written to cache. */ | |
| 26 }; | |
| 27 | |
| 28 | |
| 29 extract_alloc_t *extract_buffer_alloc(extract_buffer_t* buffer) | |
| 30 { | |
| 31 return buffer->alloc; | |
| 32 } | |
| 33 | |
| 34 | |
| 35 int extract_buffer_open(extract_alloc_t *alloc, | |
| 36 void *handle, | |
| 37 extract_buffer_fn_read *fn_read, | |
| 38 extract_buffer_fn_write *fn_write, | |
| 39 extract_buffer_fn_cache *fn_cache, | |
| 40 extract_buffer_fn_close *fn_close, | |
| 41 extract_buffer_t **o_buffer) | |
| 42 { | |
| 43 extract_buffer_t *buffer; | |
| 44 | |
| 45 if (extract_malloc(alloc, &buffer, sizeof(*buffer))) | |
| 46 return -1; | |
| 47 | |
| 48 buffer->alloc = alloc; | |
| 49 buffer->handle = handle; | |
| 50 buffer->fn_read = fn_read; | |
| 51 buffer->fn_write = fn_write; | |
| 52 buffer->fn_cache = fn_cache; | |
| 53 buffer->fn_close = fn_close; | |
| 54 buffer->cache.cache = NULL; | |
| 55 buffer->cache.numbytes = 0; | |
| 56 buffer->cache.pos = 0; | |
| 57 buffer->pos = 0; | |
| 58 | |
| 59 *o_buffer = buffer; | |
| 60 | |
| 61 return 0; | |
| 62 } | |
| 63 | |
| 64 | |
| 65 size_t extract_buffer_pos(extract_buffer_t *buffer) | |
| 66 { | |
| 67 size_t ret = buffer->pos; | |
| 68 | |
| 69 if (buffer->cache.cache) | |
| 70 ret += buffer->cache.pos; | |
| 71 | |
| 72 return ret; | |
| 73 } | |
| 74 | |
| 75 | |
| 76 /* Send contents of cache to fn_write() using a loop to cope with short | |
| 77 writes. Returns with *o_actual containing the number of bytes successfully | |
| 78 sent, and buffer->cache.{cache,numbytes,pos} all set to zero. | |
| 79 | |
| 80 If we return zero but *actual is less than original buffer->cache.numbytes, | |
| 81 then fn_write returned EOF. */ | |
| 82 static int cache_flush(extract_buffer_t *buffer, size_t *o_actual) | |
| 83 { | |
| 84 int e = -1; | |
| 85 size_t p = 0; | |
| 86 | |
| 87 assert(buffer->cache.pos <= buffer->cache.numbytes); | |
| 88 | |
| 89 while (p != buffer->cache.pos) | |
| 90 { | |
| 91 size_t actual; | |
| 92 if (buffer->fn_write( | |
| 93 buffer->handle, | |
| 94 (char*) buffer->cache.cache + p, | |
| 95 buffer->cache.pos - p, | |
| 96 &actual | |
| 97 )) goto end; | |
| 98 buffer->pos += actual; | |
| 99 p += actual; | |
| 100 if (actual == 0) | |
| 101 { | |
| 102 /* EOF while flushing cache. We set <pos> to the | |
| 103 * number of bytes in data..+numbytes that we know | |
| 104 * have been successfully handled by buffer->fn_write(). | |
| 105 * This can be negative if we failed to flush | |
| 106 * earlier data. */ | |
| 107 outf("*** buffer->fn_write() EOF\n"); | |
| 108 e = 0; | |
| 109 goto end; | |
| 110 } | |
| 111 } | |
| 112 outfx("cache flush, buffer->pos=%i p=buffer->cache.pos=%i\n", | |
| 113 buffer->pos, p); | |
| 114 buffer->cache.cache = NULL; | |
| 115 buffer->cache.numbytes = 0; | |
| 116 buffer->cache.pos = 0; | |
| 117 | |
| 118 e = 0; | |
| 119 end: | |
| 120 *o_actual = p; | |
| 121 | |
| 122 return e; | |
| 123 } | |
| 124 | |
| 125 int extract_buffer_close(extract_buffer_t **p_buffer) | |
| 126 { | |
| 127 extract_buffer_t *buffer = *p_buffer; | |
| 128 int e = -1; | |
| 129 | |
| 130 if (buffer == NULL) | |
| 131 return 0; | |
| 132 | |
| 133 if (buffer->cache.cache && buffer->fn_write) | |
| 134 { | |
| 135 /* Flush cache. */ | |
| 136 size_t cache_bytes = buffer->cache.pos; | |
| 137 size_t actual; | |
| 138 if (cache_flush(buffer, &actual)) goto end; | |
| 139 if (actual != cache_bytes) | |
| 140 { | |
| 141 e = 1; | |
| 142 goto end; | |
| 143 } | |
| 144 } | |
| 145 | |
| 146 if (buffer->fn_close) | |
| 147 buffer->fn_close(buffer->handle); | |
| 148 | |
| 149 e = 0; | |
| 150 end: | |
| 151 extract_free(buffer->alloc, &buffer); | |
| 152 *p_buffer = NULL; | |
| 153 | |
| 154 return e; | |
| 155 } | |
| 156 | |
| 157 static int simple_cache(void *handle, void **o_cache, size_t *o_numbytes) | |
| 158 { | |
| 159 /* Indicate EOF. */ | |
| 160 (void) handle; | |
| 161 *o_cache = NULL; | |
| 162 *o_numbytes = 0; | |
| 163 | |
| 164 return 0; | |
| 165 } | |
| 166 | |
| 167 int extract_buffer_open_simple(extract_alloc_t *alloc, | |
| 168 const void *data, | |
| 169 size_t numbytes, | |
| 170 void *handle, | |
| 171 extract_buffer_fn_close *fn_close, | |
| 172 extract_buffer_t **o_buffer) | |
| 173 { | |
| 174 extract_buffer_t *buffer; | |
| 175 | |
| 176 if (extract_malloc(alloc, &buffer, sizeof(*buffer))) | |
| 177 return -1; | |
| 178 | |
| 179 /* We need cast away the const here. data[] will be written-to if caller | |
| 180 uses us as a write buffer. */ | |
| 181 buffer->alloc = alloc; | |
| 182 buffer->cache.cache = (void*) data; | |
| 183 buffer->cache.numbytes = numbytes; | |
| 184 buffer->cache.pos = 0; | |
| 185 buffer->handle = handle; | |
| 186 buffer->fn_read = NULL; | |
| 187 buffer->fn_write = NULL; | |
| 188 buffer->fn_cache = simple_cache; | |
| 189 buffer->fn_close = fn_close; | |
| 190 *o_buffer = buffer; | |
| 191 | |
| 192 return 0; | |
| 193 } | |
| 194 | |
| 195 | |
| 196 /* Implementation of extract_buffer_file*. */ | |
| 197 | |
| 198 static int file_read(void *handle, void *data, size_t numbytes, size_t *o_actual) | |
| 199 { | |
| 200 FILE *file = handle; | |
| 201 size_t n = fread(data, 1, numbytes, file); | |
| 202 | |
| 203 outfx("file=%p numbytes=%i => n=%zi", file, numbytes, n); | |
| 204 assert(o_actual); /* We are called by other extract_buffer fns, not by user code. */ | |
| 205 | |
| 206 *o_actual = n; | |
| 207 if (n == 0 && ferror(file)) | |
| 208 { | |
| 209 errno = EIO; | |
| 210 return -1; | |
| 211 } | |
| 212 | |
| 213 return 0; | |
| 214 } | |
| 215 | |
| 216 static int file_write(void *handle, const void *data, size_t numbytes, size_t *o_actual) | |
| 217 { | |
| 218 FILE *file = handle; | |
| 219 size_t n = fwrite(data, 1 /*size*/, numbytes /*nmemb*/, file); | |
| 220 | |
| 221 outfx("file=%p numbytes=%i => n=%zi", file, numbytes, n); | |
| 222 assert(o_actual); /* We are called by other extract_buffer fns, not by user code. */ | |
| 223 | |
| 224 *o_actual = n; | |
| 225 if (n == 0 && ferror(file)) | |
| 226 { | |
| 227 errno = EIO; | |
| 228 return -1; | |
| 229 } | |
| 230 | |
| 231 return 0; | |
| 232 } | |
| 233 | |
| 234 static void file_close(void *handle) | |
| 235 { | |
| 236 FILE *file = handle; | |
| 237 | |
| 238 if (file) | |
| 239 fclose(file); | |
| 240 } | |
| 241 | |
| 242 int extract_buffer_open_file(extract_alloc_t *alloc, const char *path, int writable, extract_buffer_t **o_buffer) | |
| 243 { | |
| 244 int e = -1; | |
| 245 FILE *file = fopen(path, (writable) ? "wb" : "rb"); | |
| 246 | |
| 247 if (!file) | |
| 248 { | |
| 249 outf("failed to open '%s': %s", path, strerror(errno)); | |
| 250 goto end; | |
| 251 } | |
| 252 | |
| 253 if (extract_buffer_open(alloc, | |
| 254 file /*handle*/, | |
| 255 writable ? NULL : file_read, | |
| 256 writable ? file_write : NULL, | |
| 257 NULL /*fn_cache*/, | |
| 258 file_close, | |
| 259 o_buffer)) goto end; | |
| 260 | |
| 261 e = 0; | |
| 262 end: | |
| 263 | |
| 264 if (e) | |
| 265 { | |
| 266 if (file) | |
| 267 fclose(file); | |
| 268 *o_buffer = NULL; | |
| 269 } | |
| 270 | |
| 271 return e; | |
| 272 } | |
| 273 | |
| 274 | |
| 275 /* Support for read/write. */ | |
| 276 | |
| 277 /* Called by extract_buffer_read() if not enough space in buffer->cache. */ | |
| 278 int extract_buffer_read_internal(extract_buffer_t *buffer, | |
| 279 void *destination, | |
| 280 size_t numbytes, | |
| 281 size_t *o_actual) | |
| 282 { | |
| 283 int e = -1; | |
| 284 size_t pos = 0; /* Number of bytes read so far. */ | |
| 285 | |
| 286 /* In each iteration we either read from cache, or use buffer->fn_read() | |
| 287 directly or repopulate the cache. */ | |
| 288 while (pos != numbytes) | |
| 289 { | |
| 290 size_t n = buffer->cache.numbytes - buffer->cache.pos; | |
| 291 if (n) | |
| 292 { | |
| 293 /* There is data in cache. */ | |
| 294 if (n > numbytes - pos) n = numbytes - pos; | |
| 295 memcpy((char *)destination + pos, (char *)buffer->cache.cache + buffer->cache.pos, n); | |
| 296 pos += n; | |
| 297 buffer->cache.pos += n; | |
| 298 } | |
| 299 /* No data in the cache - do we use fn_read or fn_cache ? */ | |
| 300 else if (buffer->fn_read && | |
| 301 (buffer->fn_cache == NULL || | |
| 302 (buffer->cache.numbytes && numbytes - pos > buffer->cache.numbytes / 2))) | |
| 303 { | |
| 304 /* Either there is no cache, or this read is large | |
| 305 * compared to previously-returned cache size, so | |
| 306 * let's ignore buffer->fn_cache and use | |
| 307 * buffer->fn_read() directly instead. */ | |
| 308 /* Carry on looping in case of short read. */ | |
| 309 size_t actual; | |
| 310 outfx("using buffer->fn_read() directly for numbytes-pos=%i\n", numbytes-pos); | |
| 311 if (buffer->fn_read(buffer->handle, (char*) destination + pos, numbytes - pos, &actual)) | |
| 312 goto end; | |
| 313 if (actual == 0) | |
| 314 break; /* EOF. */ | |
| 315 pos += actual; | |
| 316 buffer->pos += actual; | |
| 317 } | |
| 318 else | |
| 319 { | |
| 320 /* Repopulate cache. */ | |
| 321 outfx("using buffer->fn_cache() for buffer->cache.numbytes=%i\n", buffer->cache.numbytes); | |
| 322 if (buffer->fn_cache(buffer->handle, &buffer->cache.cache, &buffer->cache.numbytes)) | |
| 323 goto end; | |
| 324 buffer->pos += buffer->cache.pos; | |
| 325 buffer->cache.pos = 0; | |
| 326 if (buffer->cache.numbytes == 0) | |
| 327 break; /* EOF. */ | |
| 328 } | |
| 329 } | |
| 330 | |
| 331 e = 0; | |
| 332 end: | |
| 333 | |
| 334 if (o_actual) | |
| 335 *o_actual = pos; | |
| 336 if (e == 0 && pos != numbytes) | |
| 337 return +1; /* EOF. */ | |
| 338 | |
| 339 return e; | |
| 340 } | |
| 341 | |
| 342 | |
| 343 int extract_buffer_write_internal(extract_buffer_t *buffer, | |
| 344 const void *source, | |
| 345 size_t numbytes, | |
| 346 size_t *o_actual) | |
| 347 { | |
| 348 int e = -1; | |
| 349 size_t pos = 0; /* Number of bytes written so far. */ | |
| 350 | |
| 351 if (buffer->fn_write == NULL) | |
| 352 { | |
| 353 errno = EINVAL; | |
| 354 return -1; | |
| 355 } | |
| 356 | |
| 357 /* In each iteration we either write to cache, or use buffer->fn_write() | |
| 358 directly or flush the cache. */ | |
| 359 while (pos != numbytes) | |
| 360 { | |
| 361 size_t n = buffer->cache.numbytes - buffer->cache.pos; | |
| 362 outfx("numbytes=%i pos=%i. buffer->cache.numbytes=%i buffer->cache.pos=%i\n", | |
| 363 numbytes, pos, buffer->cache.numbytes, buffer->cache.pos); | |
| 364 if (n) | |
| 365 { | |
| 366 /* There is space in cache for writing. */ | |
| 367 if (n > numbytes - pos) | |
| 368 n = numbytes - pos; | |
| 369 outfx("writing to cache: numbytes=%i n=%i\n", numbytes, n); | |
| 370 memcpy((char*) buffer->cache.cache + buffer->cache.pos, (char*) source + pos, n); | |
| 371 pos += n; | |
| 372 buffer->cache.pos += n; | |
| 373 } | |
| 374 else | |
| 375 { | |
| 376 /* No space left in cache. */ | |
| 377 outfx("cache empty. pos=%i. buffer->cache.numbytes=%i buffer->cache.pos=%i\n", | |
| 378 pos, buffer->cache.numbytes, buffer->cache.pos); | |
| 379 { | |
| 380 /* Flush the cache. */ | |
| 381 size_t actual; | |
| 382 size_t b = buffer->cache.numbytes; | |
| 383 ptrdiff_t delta; | |
| 384 int ee = cache_flush(buffer, &actual); | |
| 385 assert(actual <= b); | |
| 386 delta = actual - b; | |
| 387 pos += delta; | |
| 388 buffer->pos += delta; | |
| 389 if (delta) | |
| 390 { | |
| 391 /* We have only partially flushed the cache. This | |
| 392 * is not recoverable. <pos> will be the number of | |
| 393 * bytes in source..+numbytes that have been | |
| 394 * successfully flushed, and could be negative | |
| 395 * if we failed to flush earlier data. */ | |
| 396 outf("failed to flush. actual=%li delta=%li\n", (long) actual, (long) delta); | |
| 397 e = 0; | |
| 398 goto end; | |
| 399 } | |
| 400 if (ee) goto end; | |
| 401 } | |
| 402 | |
| 403 if (buffer->fn_cache == NULL || | |
| 404 (buffer->cache.numbytes && numbytes - pos > buffer->cache.numbytes / 2)) | |
| 405 { | |
| 406 /* Either there is no cache, or this write is large | |
| 407 * compared to previously-returned cache size, so let's | |
| 408 * ignore the cache and call buffer->fn_write() | |
| 409 * directly instead. Carry on looping in case of short | |
| 410 * write. */ | |
| 411 size_t actual; | |
| 412 if (buffer->fn_write(buffer->handle, (char*) source + pos, numbytes - pos, &actual)) | |
| 413 goto end; | |
| 414 if (actual == 0) | |
| 415 break; /* EOF. */ | |
| 416 outfx("direct write numbytes-pos=%i actual=%i buffer->pos=%i => %i\n", | |
| 417 numbytes-pos, actual, buffer->pos, buffer->pos + actual); | |
| 418 pos += actual; | |
| 419 buffer->pos += actual; | |
| 420 } | |
| 421 else | |
| 422 { | |
| 423 /* Repopulate cache. */ | |
| 424 outfx("repopulating cache buffer->pos=%i", buffer->pos); | |
| 425 if (buffer->fn_cache(buffer->handle, &buffer->cache.cache, &buffer->cache.numbytes)) | |
| 426 goto end; | |
| 427 buffer->cache.pos = 0; | |
| 428 if (buffer->cache.numbytes == 0) | |
| 429 break; /* EOF. */ | |
| 430 } | |
| 431 } | |
| 432 } | |
| 433 | |
| 434 e = 0; | |
| 435 end: | |
| 436 | |
| 437 if (o_actual) | |
| 438 *o_actual = pos; | |
| 439 if (e == 0 && pos != numbytes) | |
| 440 e = +1; /* EOF. */ | |
| 441 | |
| 442 return e; | |
| 443 } | |
| 444 | |
| 445 | |
| 446 static int expanding_memory_buffer_write(void *handle, const void *source, size_t numbytes, size_t *o_actual) | |
| 447 { | |
| 448 /* We realloc our memory region as required. For efficiency, we also use | |
| 449 * any currently-unused region of our memory buffer as an extract_buffer | |
| 450 * cache. So we can be called either to 'flush the cache' (in which case we | |
| 451 * don't actually copy any data) or to accept data from somewhere else (in | |
| 452 * which case we need to increase the size of our memory region. */ | |
| 453 extract_buffer_expanding_t *ebe = handle; | |
| 454 if ((char *)source >= ebe->data && (char *)source < ebe->data + ebe->alloc_size) | |
| 455 { | |
| 456 /* Source is inside our memory region so we are being called by | |
| 457 * extract_buffer_write_internal() to re-populate the cache. We don't | |
| 458 * actually have to copy anything. */ | |
| 459 assert((size_t) ((char *)source - ebe->data) == ebe->data_size); | |
| 460 assert((size_t) ((char *)source - ebe->data + numbytes) <= ebe->alloc_size); | |
| 461 ebe->data_size += numbytes; | |
| 462 } | |
| 463 else | |
| 464 { | |
| 465 /* Data is external, so copy into our buffer. We will have already been | |
| 466 called to flush the cache. */ | |
| 467 if (extract_realloc2(ebe->buffer->alloc, &ebe->data, ebe->alloc_size, ebe->data_size + numbytes)) | |
| 468 return -1; | |
| 469 ebe->alloc_size = ebe->data_size + numbytes; | |
| 470 memcpy(ebe->data + ebe->data_size, source, numbytes); | |
| 471 ebe->data_size += numbytes; | |
| 472 } | |
| 473 *o_actual = numbytes; | |
| 474 | |
| 475 return 0; | |
| 476 } | |
| 477 | |
| 478 static int expanding_memory_buffer_cache(void *handle, void **o_cache, size_t *o_numbytes) | |
| 479 { | |
| 480 extract_buffer_expanding_t *ebe = handle; | |
| 481 size_t delta = 4096; | |
| 482 | |
| 483 if (extract_realloc2(ebe->buffer->alloc, &ebe->data, ebe->alloc_size, ebe->data_size + delta)) | |
| 484 return -1; | |
| 485 | |
| 486 ebe->alloc_size = ebe->data_size + delta; | |
| 487 *o_cache = ebe->data + ebe->data_size; | |
| 488 *o_numbytes = delta; | |
| 489 | |
| 490 return 0; | |
| 491 } | |
| 492 | |
| 493 int extract_buffer_expanding_create(extract_alloc_t *alloc, extract_buffer_expanding_t *ebe) | |
| 494 { | |
| 495 ebe->data = NULL; | |
| 496 ebe->data_size = 0; | |
| 497 ebe->alloc_size = 0; | |
| 498 if (extract_buffer_open(alloc, | |
| 499 ebe, | |
| 500 NULL /*fn_read*/, | |
| 501 expanding_memory_buffer_write, | |
| 502 expanding_memory_buffer_cache, | |
| 503 NULL /*fn_close*/, | |
| 504 &ebe->buffer)) | |
| 505 return -1; | |
| 506 | |
| 507 return 0; | |
| 508 } |
