comparison mupdf-source/platform/x11/curl_stream.c @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 // Copyright (C) 2004-2021 Artifex Software, Inc.
2 //
3 // This file is part of MuPDF.
4 //
5 // MuPDF is free software: you can redistribute it and/or modify it under the
6 // terms of the GNU Affero General Public License as published by the Free
7 // Software Foundation, either version 3 of the License, or (at your option)
8 // any later version.
9 //
10 // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
11 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
13 // details.
14 //
15 // You should have received a copy of the GNU Affero General Public License
16 // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
17 //
18 // Alternative licensing terms are available from the licensor.
19 // For commercial licensing, see <https://www.artifex.com/> or contact
20 // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
21 // CA 94129, USA, for further information.
22
23 #include "mupdf/fitz.h"
24 #include "curl_stream.h"
25
26 #include <assert.h>
27 #include <string.h>
28 #include <ctype.h>
29
30 #include <curl/curl.h>
31
32 #ifdef _WIN32
33 #include <windows.h>
34 #else
35 #include <pthread.h>
36 #endif
37
38 #undef DEBUG_BLOCK_FETCHING
39
40 #ifdef DEBUG_BLOCK_FETCHING
41 #ifdef _WIN32
42 #include <varargs.h>
43 static void
44 output(const char *fmt, ...)
45 {
46 va_list args;
47 char text[256];
48
49 va_start(args, fmt);
50 vsnprintf(text, sizeof(text), fmt, args);
51 va_end(args);
52
53 OutputDebugString(text);
54 }
55 #else
56 #define output printf
57 #endif
58
59 #define DEBUG_MESSAGE(A) do { output A; } while(0)
60 #else
61 #define DEBUG_MESSAGE(A) do { } while(0)
62 #endif
63
64 #define BLOCK_SHIFT 18
65 #define BLOCK_SIZE (1<<BLOCK_SHIFT)
66
67 #define HAVE_BLOCK(map, num) (((map)[(num)>>3] & (1<<((num) & 7))) != 0)
68
69 typedef struct curlstate
70 {
71 fz_context *ctx;
72 CURL *easy;
73
74 /* START: The following entries are protected by the lock */
75 CURLcode curl_error;
76 char error_buffer[CURL_ERROR_SIZE];
77 int data_arrived;
78 int complete;
79 int kill_thread;
80 int accept_ranges;
81 int head;
82
83 /* content buffer */
84 size_t content_length; /* 0 => Unknown length */
85 unsigned char *buffer;
86 size_t buffer_fill;
87 size_t buffer_max;
88
89 /* map of which blocks we have */
90 unsigned char *map;
91 size_t map_length;
92
93 /* outstanding curl request info */
94 size_t next_fill_start; /* The next file offset we will fetch to */
95 size_t current_fill_start; /* The current file offset we are fetching to */
96 size_t current_fill_end;
97 /* END: The above entries are protected by the lock */
98
99 void (*more_data)(void *,int);
100 void *more_data_arg;
101
102 unsigned char public_buffer[4096];
103
104 /* We assume either Windows threads or pthreads here. */
105 #ifdef _WIN32
106 void *thread;
107 DWORD thread_id;
108 HANDLE mutex;
109 #else
110 pthread_t thread;
111 pthread_mutex_t mutex;
112 #endif
113 } curlstate;
114
115 #ifdef _WIN32
116 static int locked;
117
118 static void
119 lock(curlstate *state)
120 {
121 WaitForSingleObject(state->mutex, INFINITE);
122 assert(locked == 0);
123 locked = 1;
124 }
125
126 static void
127 unlock(curlstate *state)
128 {
129 assert(locked == 1);
130 locked = 0;
131 ReleaseMutex(state->mutex);
132 }
133 #else
134 static void
135 lock(curlstate *state)
136 {
137 pthread_mutex_lock(&state->mutex);
138 }
139
140 static void
141 unlock(curlstate *state)
142 {
143 pthread_mutex_unlock(&state->mutex);
144 }
145 #endif
146
147 static size_t on_curl_header(void *ptr, size_t size, size_t nmemb, void *state_)
148 {
149 struct curlstate *state = state_;
150
151 lock(state);
152 if (fz_strncasecmp(ptr, "Accept-Ranges: bytes", 20) == 0)
153 {
154 DEBUG_MESSAGE(("header arrived with Accept-Ranges!\n"));
155 state->accept_ranges = 1;
156 }
157
158 if (fz_strncasecmp(ptr, "Content-Length:", 15) == 0)
159 {
160 char *s = ptr;
161 state->content_length = fz_atoi(s + 15);
162 DEBUG_MESSAGE(("header arrived with Content-Length: %zu\n", state->content_length));
163 }
164 unlock(state);
165
166 return nmemb * size;
167 }
168
169 static size_t on_curl_data(void *ptr, size_t size, size_t nmemb, void *state_)
170 {
171 struct curlstate *state = state_;
172 size_t old_start;
173
174 size *= nmemb;
175
176 lock(state);
177 if (state->data_arrived == 0)
178 {
179 /* This is the first time data has arrived.
180 * If the header has Accept-Ranges then we can do byte requests.
181 * We know the Content-Length from having processed the header already.
182 */
183 if (state->content_length == 0)
184 {
185 /* What a crap server. Won't tell us how big the file
186 * is. We'll have to expand as data as arrives. */
187 DEBUG_MESSAGE(("have no length!\n"));
188 }
189 else if (state->accept_ranges)
190 {
191 /* We got a range header, and the correct http response
192 * code. We can assume that byte fetches are accepted
193 * and we'll run without progressive mode. */
194 size_t len = state->content_length;
195 state->map_length = (len+BLOCK_SIZE-1)>>BLOCK_SHIFT;
196 state->map = fz_malloc_no_throw(state->ctx, (state->map_length+7)>>3);
197 state->buffer = fz_malloc_no_throw(state->ctx, len);
198 state->buffer_max = len;
199 if (state->map == NULL || state->buffer == NULL)
200 {
201 unlock(state);
202 return 0;
203 }
204 memset(state->map, 0, (state->map_length+7)>>3);
205 DEBUG_MESSAGE(("have range header content_length=%zu!\n", state->content_length));
206 }
207 else
208 {
209 /* We know the length, and that we can use ByteRanges -
210 * we can run as a progressive file. */
211 state->buffer = fz_malloc_no_throw(state->ctx, state->content_length);
212 if (state->buffer == NULL)
213 {
214 unlock(state);
215 return 0;
216 }
217 state->buffer_max = state->content_length;
218 }
219
220 state->data_arrived = 1;
221 }
222
223 if (state->content_length == 0)
224 {
225 size_t newsize = (state->current_fill_start + size);
226 if (newsize > state->buffer_max)
227 {
228 /* Expand the buffer */
229 size_t new_max = state->buffer_max * 2;
230 if (new_max == 0)
231 new_max = 4096;
232 fz_try(state->ctx)
233 state->buffer = fz_realloc_array(state->ctx, state->buffer, new_max, unsigned char);
234 fz_catch(state->ctx)
235 {
236 unlock(state);
237 return 0;
238 }
239 state->buffer_max = new_max;
240 }
241 }
242
243 DEBUG_MESSAGE(("data arrived: offset=%ld len=%ld\n", state->current_fill_start, size));
244 /* Although we always trigger fills starting on block boundaries,
245 * code this to allow for curl calling us to copy smaller blocks
246 * as they arrive. */
247 old_start = state->current_fill_start;
248 if (state->current_fill_start + size > state->buffer_max) {
249 unlock(state);
250 return 0;
251 }
252 memcpy(state->buffer + state->current_fill_start, ptr, size);
253 state->current_fill_start += size;
254 /* If we've reached the end, or at least a different block
255 * mark that we've got that block. */
256 if (state->map && (state->current_fill_start == state->content_length ||
257 (((state->current_fill_start ^ old_start) & ~(BLOCK_SIZE-1)) != 0)))
258 {
259 old_start >>= BLOCK_SHIFT;
260 state->map[old_start>>3] |= 1<<(old_start & 7);
261 }
262 unlock(state);
263
264 return size;
265 }
266
267 static void fetch_chunk(struct curlstate *state)
268 {
269 char text[32];
270 size_t block, start, end;
271 CURLcode ret;
272
273 ret = curl_easy_perform(state->easy);
274 if (ret != CURLE_OK) {
275 /* If we get an error, store it, and kill the thread.
276 * The next fetch will return it. */
277 lock(state);
278 state->curl_error = ret;
279 state->kill_thread = 1;
280 unlock(state);
281 return;
282 }
283
284 /* We finished the header, now request the body. */
285 lock(state);
286 if (state->head)
287 {
288 state->head = 0;
289 curl_easy_setopt(state->easy, CURLOPT_NOBODY, 0);
290 curl_easy_setopt(state->easy, CURLOPT_HEADERFUNCTION, NULL);
291 curl_easy_setopt(state->easy, CURLOPT_WRITEHEADER, NULL);
292 if (state->accept_ranges)
293 {
294 fz_snprintf(text, 32, "%d-%d", 0, BLOCK_SIZE-1);
295 curl_easy_setopt(state->easy, CURLOPT_RANGE, text);
296 state->next_fill_start = BLOCK_SIZE;
297 }
298 unlock(state);
299 return;
300 }
301
302 /* We finished the current body. If not accepting ranges, that's the end. */
303 if (!state->accept_ranges)
304 {
305 DEBUG_MESSAGE(("we got it all, in one request.\n"));
306 state->complete = 1;
307 state->kill_thread = 1;
308 unlock(state);
309 return;
310 }
311
312 /* Find the next block to fetch */
313 assert((state->next_fill_start & (BLOCK_SHIFT-1)) == 0);
314 block = state->next_fill_start>>BLOCK_SHIFT;
315 if (state->content_length > 0)
316 {
317 /* Find the next block that we haven't got */
318 size_t map_length = state->map_length;
319 unsigned char *map = state->map;
320 while (block < map_length && HAVE_BLOCK(map, block))
321 ++block;
322 if (block == map_length)
323 {
324 block = 0;
325 while (block < map_length && HAVE_BLOCK(map, block))
326 ++block;
327 if (block == map_length)
328 {
329 /* We've got it all! */
330 DEBUG_MESSAGE(("we got it all block=%zu map_length=%zu!\n", block, map_length));
331 state->complete = 1;
332 state->kill_thread = 1;
333 unlock(state);
334 return;
335 }
336 }
337 }
338 else
339 {
340 state->complete = 1;
341 state->kill_thread = 1;
342 unlock(state);
343 return;
344 }
345
346 DEBUG_MESSAGE(("block requested was %zu, fetching %zu\n", state->next_fill_start>>BLOCK_SHIFT, block));
347
348 /* Set up fetch of that block */
349 start = block<<BLOCK_SHIFT;
350 end = start + BLOCK_SIZE-1;
351 state->current_fill_start = start;
352 if (state->content_length > 0 && end >= state->content_length)
353 end = state->content_length-1;
354 state->current_fill_end = end;
355 fz_snprintf(text, 32, "%d-%d", start, end);
356
357 /* Unless anyone changes this in the meantime, the
358 * next block we fetch will follow on from this one. */
359 state->next_fill_start = state->current_fill_start+BLOCK_SIZE;
360 unlock(state);
361
362 /* Request next range! */
363 DEBUG_MESSAGE(("requesting range %s\n", text));
364 curl_easy_setopt(state->easy, CURLOPT_RANGE, text);
365 }
366
367 static int cs_next(fz_context *ctx, fz_stream *stream, size_t len)
368 {
369 struct curlstate *state = stream->state;
370 size_t len_read = 0;
371 int64_t read_point = stream->pos;
372 int block = read_point>>BLOCK_SHIFT;
373 size_t left_over = (-read_point) & (BLOCK_SIZE-1);
374 unsigned char *buf = state->public_buffer;
375 int err_type;
376
377 assert(len != 0);
378
379 stream->rp = stream->wp = buf;
380 lock(state);
381 err_type = state->complete ? FZ_ERROR_GENERIC : FZ_ERROR_TRYLATER;
382
383 /* If we got an error from the fetching thread,
384 * throw it here (but just once). */
385 if (state->curl_error)
386 {
387 CURLcode err = state->curl_error;
388 char errstr[CURL_ERROR_SIZE];
389 memcpy(errstr, state->error_buffer, CURL_ERROR_SIZE);
390 memset(state->error_buffer, 0, CURL_ERROR_SIZE);
391 state->curl_error = 0;
392 unlock(state);
393 fz_throw(ctx, FZ_ERROR_GENERIC, "cannot fetch data: %s: %s", curl_easy_strerror(err), errstr);
394 }
395
396 if ((size_t) read_point > state->content_length)
397 {
398 unlock(state);
399 if (state->data_arrived == 0)
400 fz_throw(ctx, err_type, "read of a block we don't have (A) (offset=%ld)", read_point);
401 return EOF;
402 }
403
404 if (len > sizeof(state->public_buffer))
405 len = sizeof(state->public_buffer);
406
407 if (state->map == NULL)
408 {
409 /* We are doing a simple linear fetch as we don't know the
410 * content length. */
411 if (read_point + len > state->current_fill_start)
412 {
413 unlock(state);
414 fz_throw(ctx, err_type, "read of a block we don't have (B) (offset=%ld)", read_point);
415 }
416 memcpy(buf, state->buffer + read_point, len);
417 unlock(state);
418 stream->wp = buf + len;
419 stream->pos += len;
420 if (len == 0)
421 return EOF;
422 return *stream->rp++;
423 }
424
425 /* We are reading from a "mapped" file */
426 if (read_point + len > state->content_length)
427 len = state->content_length - read_point;
428 if (left_over > len)
429 left_over = len;
430 if (left_over > 0)
431 {
432 /* We are starting midway through a block */
433 if (!HAVE_BLOCK(state->map, block))
434 {
435 state->next_fill_start = block<<BLOCK_SHIFT;
436 unlock(state);
437 fz_throw(ctx, err_type, "read of a block we don't have (C) (offset=%ld)", read_point);
438 }
439 block++;
440 memcpy(buf, state->buffer + read_point, left_over);
441 buf += left_over;
442 read_point += left_over;
443 len -= left_over;
444 len_read += left_over;
445 }
446
447 /* Copy any complete blocks */
448 while (len > BLOCK_SIZE)
449 {
450 if (!HAVE_BLOCK(state->map, block))
451 {
452 /* We don't have enough data to fulfill the request. */
453 /* Fetch the next block from here. */
454 unlock(state);
455 state->next_fill_start = block<<BLOCK_SHIFT;
456 stream->wp += len_read;
457 stream->pos += len_read;
458 /* If we haven't fetched anything, throw. */
459 if (len_read == 0)
460 fz_throw(ctx, err_type, "read of a block we don't have (D) (offset=%ld)", read_point);
461 /* Otherwise, we got at least one byte, so we can safely return that. */
462 return *stream->rp++;
463 }
464 block++;
465 memcpy(buf, state->buffer + read_point, BLOCK_SIZE);
466 buf += BLOCK_SIZE;
467 read_point += BLOCK_SIZE;
468 len -= BLOCK_SIZE;
469 len_read += BLOCK_SIZE;
470 }
471
472 /* Copy any trailing bytes */
473 if (len > 0)
474 {
475 if (!HAVE_BLOCK(state->map, block))
476 {
477 /* We don't have enough data to fulfill the request. */
478 /* Fetch the next block from here. */
479 unlock(state);
480 state->next_fill_start = block<<BLOCK_SHIFT;
481 stream->wp += len_read;
482 stream->pos += len_read;
483 /* If we haven't fetched anything, throw. */
484 if (len_read == 0)
485 fz_throw(ctx, err_type, "read of a block we don't have (E) (offset=%ld)", read_point);
486 /* Otherwise, we got at least one byte, so we can safely return that. */
487 return *stream->rp++;
488 }
489 memcpy(buf, state->buffer + read_point, len);
490 len_read += len;
491 }
492
493 unlock(state);
494 stream->wp += len_read;
495 stream->pos += len_read;
496 if (len_read == 0)
497 return EOF;
498 return *stream->rp++;
499 }
500
501 static void cs_close(fz_context *ctx, void *state_)
502 {
503 struct curlstate *state = state_;
504
505 lock(state);
506 state->kill_thread = 1;
507 unlock(state);
508
509 #ifdef _WIN32
510 WaitForSingleObject(state->thread, INFINITE);
511 CloseHandle(state->thread);
512 CloseHandle(state->mutex);
513 #else
514 pthread_join(state->thread, NULL);
515 pthread_mutex_destroy(&state->mutex);
516 #endif
517
518 curl_easy_cleanup(state->easy);
519 fz_free(ctx, state->buffer);
520 fz_free(ctx, state->map);
521 fz_free(ctx, state);
522 }
523
524 static void cs_seek(fz_context *ctx, fz_stream *stm, int64_t offset, int whence)
525 {
526 struct curlstate *state = stm->state;
527
528 stm->wp = stm->rp;
529 if (whence == SEEK_END)
530 {
531 size_t clen;
532 int data_arrived;
533 int complete;
534 lock(state);
535 data_arrived = state->data_arrived;
536 clen = state->content_length;
537 complete = state->complete;
538 unlock(state);
539 if (!data_arrived && !complete)
540 fz_throw(ctx, FZ_ERROR_TRYLATER, "still awaiting file length");
541 stm->pos = clen + offset;
542 }
543 else if (whence == SEEK_CUR)
544 stm->pos += offset;
545 else
546 stm->pos = offset;
547 if (stm->pos < 0)
548 stm->pos = 0;
549 }
550
551 static void
552 fetcher_thread(curlstate *state)
553 {
554 /* Keep fetching chunks on a background thread until
555 * either we have to kill the thread, or the fetch
556 * is complete. */
557 while (1) {
558 int complete;
559 lock(state);
560 complete = state->complete || state->kill_thread;
561 unlock(state);
562 if (complete)
563 break;
564 fetch_chunk(state);
565 if (state->more_data)
566 state->more_data(state->more_data_arg, 0);
567 }
568 if (state->more_data)
569 state->more_data(state->more_data_arg, 1);
570 lock(state);
571 state->complete = 1;
572 unlock(state);
573 }
574
575 #ifdef _WIN32
576 static DWORD WINAPI
577 win_thread(void *lparam)
578 {
579 fetcher_thread((curlstate *)lparam);
580
581 return 0;
582 }
583 #else
584 static void *
585 pthread_thread(void *arg)
586 {
587 fetcher_thread((curlstate *)arg);
588 return NULL;
589 }
590 #endif
591
592 fz_stream *fz_open_url(fz_context *ctx, const char *url, int kbps, void (*more_data)(void *,int), void *more_data_arg)
593 {
594 struct curlstate *state;
595 fz_stream *stm;
596 CURLcode code;
597
598 state = fz_malloc_struct(ctx, struct curlstate);
599 state->ctx = ctx;
600
601 code = curl_global_init(CURL_GLOBAL_ALL);
602 if (code != CURLE_OK)
603 fz_throw(ctx, FZ_ERROR_GENERIC, "curl_global_init failed");
604
605 state->easy = curl_easy_init();
606 if (!state->easy)
607 fz_throw(ctx, FZ_ERROR_GENERIC, "curl_easy_init failed");
608
609 curl_easy_setopt(state->easy, CURLOPT_URL, url);
610 curl_easy_setopt(state->easy, CURLOPT_FOLLOWLOCATION, 1);
611 curl_easy_setopt(state->easy, CURLOPT_MAXREDIRS, 12);
612 curl_easy_setopt(state->easy, CURLOPT_SSL_VERIFYPEER, 0);
613 curl_easy_setopt(state->easy, CURLOPT_SSL_VERIFYHOST, 0);
614 curl_easy_setopt(state->easy, CURLOPT_MAX_RECV_SPEED_LARGE, kbps * 1024);
615 curl_easy_setopt(state->easy, CURLOPT_HEADERFUNCTION, on_curl_header);
616 curl_easy_setopt(state->easy, CURLOPT_WRITEHEADER, state);
617 curl_easy_setopt(state->easy, CURLOPT_WRITEFUNCTION, on_curl_data);
618 curl_easy_setopt(state->easy, CURLOPT_WRITEDATA, state);
619 curl_easy_setopt(state->easy, CURLOPT_FAILONERROR, 1L);
620 curl_easy_setopt(state->easy, CURLOPT_ERRORBUFFER, &state->error_buffer);
621 #ifdef DEBUG_BLOCK_FETCHING
622 curl_easy_setopt(state->easy, CURLOPT_VERBOSE, 1L);
623 #endif
624
625 /* Get only the HEAD first. */
626 state->head = 1;
627 curl_easy_setopt(state->easy, CURLOPT_NOBODY, 1);
628
629 #ifdef _WIN32
630 state->mutex = CreateMutex(NULL, FALSE, NULL);
631 if (state->mutex == NULL)
632 fz_throw(ctx, FZ_ERROR_GENERIC, "mutex creation failed");
633
634 state->thread = CreateThread(NULL, 0, win_thread, state, 0, &state->thread_id);
635 if (state->thread == NULL)
636 fz_throw(ctx, FZ_ERROR_GENERIC, "thread creation failed");
637 #else
638 if (pthread_mutex_init(&state->mutex, NULL))
639 fz_throw(ctx, FZ_ERROR_GENERIC, "mutex creation failed");
640
641 if (pthread_create(&state->thread, NULL, pthread_thread, state))
642 fz_throw(ctx, FZ_ERROR_GENERIC, "thread creation failed");
643 #endif
644 state->more_data = more_data;
645 state->more_data_arg = more_data_arg;
646
647 stm = fz_new_stream(ctx, state, cs_next, cs_close);
648 stm->progressive = 1;
649 stm->seek = cs_seek;
650 return stm;
651 }