comparison mupdf-source/source/pdf/pdf-stream.c @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 // Copyright (C) 2004-2025 Artifex Software, Inc.
2 //
3 // This file is part of MuPDF.
4 //
5 // MuPDF is free software: you can redistribute it and/or modify it under the
6 // terms of the GNU Affero General Public License as published by the Free
7 // Software Foundation, either version 3 of the License, or (at your option)
8 // any later version.
9 //
10 // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
11 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
13 // details.
14 //
15 // You should have received a copy of the GNU Affero General Public License
16 // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
17 //
18 // Alternative licensing terms are available from the licensor.
19 // For commercial licensing, see <https://www.artifex.com/> or contact
20 // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
21 // CA 94129, USA, for further information.
22
23 #include "mupdf/fitz.h"
24 #include "mupdf/pdf.h"
25
26 #include <string.h>
27
28 int
29 pdf_obj_num_is_stream(fz_context *ctx, pdf_document *doc, int num)
30 {
31 pdf_xref_entry *entry;
32
33 if (num <= 0 || num >= pdf_xref_len(ctx, doc))
34 return 0;
35
36 fz_try(ctx)
37 entry = pdf_cache_object(ctx, doc, num);
38 fz_catch(ctx)
39 {
40 fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
41 fz_rethrow_if(ctx, FZ_ERROR_SYSTEM);
42 fz_report_error(ctx);
43 return 0;
44 }
45
46 return entry->stm_ofs != 0 || entry->stm_buf;
47 }
48
49 int
50 pdf_is_stream(fz_context *ctx, pdf_obj *ref)
51 {
52 pdf_document *doc = pdf_get_indirect_document(ctx, ref);
53 if (doc)
54 return pdf_obj_num_is_stream(ctx, doc, pdf_to_num(ctx, ref));
55 return 0;
56 }
57
58 /*
59 * Scan stream dictionary for an explicit /Crypt filter
60 */
61 static int
62 pdf_stream_has_crypt(fz_context *ctx, pdf_obj *stm)
63 {
64 pdf_obj *filters;
65 pdf_obj *obj;
66 int i;
67
68 filters = pdf_dict_geta(ctx, stm, PDF_NAME(Filter), PDF_NAME(F));
69 if (filters)
70 {
71 if (pdf_name_eq(ctx, filters, PDF_NAME(Crypt)))
72 return 1;
73 if (pdf_is_array(ctx, filters))
74 {
75 int n = pdf_array_len(ctx, filters);
76 for (i = 0; i < n; i++)
77 {
78 obj = pdf_array_get(ctx, filters, i);
79 if (pdf_name_eq(ctx, obj, PDF_NAME(Crypt)))
80 return 1;
81 }
82 }
83 }
84 return 0;
85 }
86
87 static fz_jbig2_globals *
88 pdf_load_jbig2_globals(fz_context *ctx, pdf_obj *dict)
89 {
90 fz_jbig2_globals *globals;
91 fz_buffer *buf = NULL;
92
93 fz_var(buf);
94
95 if ((globals = pdf_find_item(ctx, fz_drop_jbig2_globals_imp, dict)) != NULL)
96 return globals;
97
98 if (pdf_mark_obj(ctx, dict))
99 fz_throw(ctx, FZ_ERROR_FORMAT, "cyclic reference when loading JBIG2 globals");
100
101 fz_try(ctx)
102 {
103 buf = pdf_load_stream(ctx, dict);
104 globals = fz_load_jbig2_globals(ctx, buf);
105 if (globals)
106 pdf_store_item(ctx, dict, globals, fz_buffer_storage(ctx, buf, NULL));
107 }
108 fz_always(ctx)
109 {
110 fz_drop_buffer(ctx, buf);
111 pdf_unmark_obj(ctx, dict);
112 }
113 fz_catch(ctx)
114 {
115 fz_rethrow(ctx);
116 }
117
118 return globals;
119 }
120
121 static void
122 build_compression_params(fz_context *ctx, pdf_obj *f, pdf_obj *p, fz_compression_params *params)
123 {
124 params->type = FZ_IMAGE_RAW;
125
126 if (pdf_name_eq(ctx, f, PDF_NAME(CCITTFaxDecode)) || pdf_name_eq(ctx, f, PDF_NAME(CCF)))
127 {
128 params->type = FZ_IMAGE_FAX;
129 params->u.fax.k = pdf_dict_get_int_default(ctx, p, PDF_NAME(K), 0);
130 params->u.fax.end_of_line = pdf_dict_get_bool_default(ctx, p, PDF_NAME(EndOfLine), 0);
131 params->u.fax.encoded_byte_align = pdf_dict_get_bool_default(ctx, p, PDF_NAME(EncodedByteAlign), 0);
132 params->u.fax.columns = pdf_dict_get_int_default(ctx, p, PDF_NAME(Columns), 1728);
133 params->u.fax.rows = pdf_dict_get_int_default(ctx, p, PDF_NAME(Rows), 0);
134 params->u.fax.end_of_block = pdf_dict_get_bool_default(ctx, p, PDF_NAME(EndOfBlock), 1);
135 params->u.fax.black_is_1 = pdf_dict_get_bool_default(ctx, p, PDF_NAME(BlackIs1), 0);
136 }
137 else if (pdf_name_eq(ctx, f, PDF_NAME(DCTDecode)) || pdf_name_eq(ctx, f, PDF_NAME(DCT)))
138 {
139 params->type = FZ_IMAGE_JPEG;
140 params->u.jpeg.color_transform = pdf_dict_get_int_default(ctx, p, PDF_NAME(ColorTransform), -1);
141 params->u.jpeg.invert_cmyk = 0;
142 }
143 else if (pdf_name_eq(ctx, f, PDF_NAME(RunLengthDecode)) || pdf_name_eq(ctx, f, PDF_NAME(RL)))
144 {
145 params->type = FZ_IMAGE_RLD;
146 }
147 else if (pdf_name_eq(ctx, f, PDF_NAME(FlateDecode)) || pdf_name_eq(ctx, f, PDF_NAME(Fl)))
148 {
149 params->type = FZ_IMAGE_FLATE;
150 params->u.flate.predictor = pdf_dict_get_int_default(ctx, p, PDF_NAME(Predictor), 1);
151 params->u.flate.columns = pdf_dict_get_int_default(ctx, p, PDF_NAME(Columns), 1);
152 params->u.flate.colors = pdf_dict_get_int_default(ctx, p, PDF_NAME(Colors), 1);
153 params->u.flate.bpc = pdf_dict_get_int_default(ctx, p, PDF_NAME(BitsPerComponent), 8);
154 }
155 else if (pdf_name_eq(ctx, f, PDF_NAME(BrotliDecode)) || pdf_name_eq(ctx, f, PDF_NAME(Br)))
156 {
157 params->type = FZ_IMAGE_BROTLI;
158 params->u.brotli.predictor = pdf_dict_get_int_default(ctx, p, PDF_NAME(Predictor), 1);
159 params->u.brotli.columns = pdf_dict_get_int_default(ctx, p, PDF_NAME(Columns), 1);
160 params->u.brotli.colors = pdf_dict_get_int_default(ctx, p, PDF_NAME(Colors), 1);
161 params->u.brotli.bpc = pdf_dict_get_int_default(ctx, p, PDF_NAME(BitsPerComponent), 8);
162 }
163 else if (pdf_name_eq(ctx, f, PDF_NAME(LZWDecode)) || pdf_name_eq(ctx, f, PDF_NAME(LZW)))
164 {
165 params->type = FZ_IMAGE_LZW;
166 params->u.lzw.predictor = pdf_dict_get_int_default(ctx, p, PDF_NAME(Predictor), 1);
167 params->u.lzw.columns = pdf_dict_get_int_default(ctx, p, PDF_NAME(Columns), 1);
168 params->u.lzw.colors = pdf_dict_get_int_default(ctx, p, PDF_NAME(Colors), 1);
169 params->u.lzw.bpc = pdf_dict_get_int_default(ctx, p, PDF_NAME(BitsPerComponent), 8);
170 params->u.lzw.early_change = pdf_dict_get_int_default(ctx, p, PDF_NAME(EarlyChange), 1);
171 }
172 else if (pdf_name_eq(ctx, f, PDF_NAME(JBIG2Decode)))
173 {
174 pdf_obj *g = pdf_dict_get(ctx, p, PDF_NAME(JBIG2Globals));
175
176 params->type = FZ_IMAGE_JBIG2;
177 params->u.jbig2.globals = NULL;
178 params->u.jbig2.embedded = 1; /* jbig2 streams are always embedded without file headers */
179 if (g)
180 {
181 if (!pdf_is_stream(ctx, g))
182 fz_warn(ctx, "jbig2 globals is not a stream, skipping globals");
183 else
184 params->u.jbig2.globals = pdf_load_jbig2_globals(ctx, g);
185 }
186 }
187 }
188
189 /*
190 * Create a filter given a name and param dictionary.
191 */
192 static fz_stream *
193 build_filter(fz_context *ctx, fz_stream *chain, pdf_document *doc, pdf_obj *f, pdf_obj *p, int num, int gen, fz_compression_params *params, int might_be_image)
194 {
195 fz_compression_params local_params;
196
197 local_params.u.jbig2.globals = NULL;
198 if (params == NULL)
199 params = &local_params;
200
201 if (!might_be_image &&
202 (pdf_name_eq(ctx, f, PDF_NAME(CCITTFaxDecode)) ||
203 pdf_name_eq(ctx, f, PDF_NAME(CCF)) ||
204 pdf_name_eq(ctx, f, PDF_NAME(DCTDecode)) ||
205 pdf_name_eq(ctx, f, PDF_NAME(DCT)) ||
206 pdf_name_eq(ctx, f, PDF_NAME(JBIG2Decode)) ||
207 pdf_name_eq(ctx, f, PDF_NAME(JPXDecode))))
208 {
209 fz_warn(ctx, "Can't open image only stream for non-image purposes");
210 return fz_open_memory(ctx, (unsigned char *)"", 0);
211 }
212
213 build_compression_params(ctx, f, p, params);
214
215 /* If we were using params we were passed in, and we successfully
216 * recognised the image type, we can use the existing filter and
217 * shortstop here. */
218 if (params != &local_params && params->type != FZ_IMAGE_RAW)
219 return fz_keep_stream(ctx, chain); /* nothing to do */
220
221 else if (params->type == FZ_IMAGE_JBIG2)
222 {
223 fz_stream *stm;
224 fz_try(ctx)
225 stm = fz_open_image_decomp_stream(ctx, chain, params, NULL);
226 fz_always(ctx)
227 fz_drop_jbig2_globals(ctx, local_params.u.jbig2.globals);
228 fz_catch(ctx)
229 fz_rethrow(ctx);
230 return stm;
231 }
232
233 else if (params->type != FZ_IMAGE_RAW)
234 return fz_open_image_decomp_stream(ctx, chain, params, NULL);
235
236 else if (pdf_name_eq(ctx, f, PDF_NAME(ASCIIHexDecode)) || pdf_name_eq(ctx, f, PDF_NAME(AHx)))
237 return fz_open_ahxd(ctx, chain);
238
239 else if (pdf_name_eq(ctx, f, PDF_NAME(ASCII85Decode)) || pdf_name_eq(ctx, f, PDF_NAME(A85)))
240 return fz_open_a85d(ctx, chain);
241
242 else if (pdf_name_eq(ctx, f, PDF_NAME(JPXDecode)))
243 return fz_keep_stream(ctx, chain); /* JPX decoding is special cased in the image loading code */
244
245 else if (pdf_name_eq(ctx, f, PDF_NAME(Crypt)))
246 {
247 if (!doc->crypt)
248 fz_warn(ctx, "crypt filter in unencrypted document");
249 else
250 {
251 pdf_obj *name = pdf_dict_get(ctx, p, PDF_NAME(Name));
252 if (pdf_is_name(ctx, name))
253 return pdf_open_crypt_with_filter(ctx, chain, doc->crypt, name, num, gen);
254 }
255 }
256
257 else
258 fz_warn(ctx, "unknown filter name (%s)", pdf_to_name(ctx, f));
259
260 return fz_keep_stream(ctx, chain);
261 }
262
263 /* Build filter, and assume ownership of chain */
264 static fz_stream *
265 build_filter_drop(fz_context *ctx, fz_stream *tail, pdf_document *doc, pdf_obj *f, pdf_obj *p, int num, int gen, fz_compression_params *params, int might_be_image)
266 {
267 fz_stream *head;
268 fz_try(ctx)
269 head = build_filter(ctx, tail, doc, f, p, num, gen, params, might_be_image);
270 fz_always(ctx)
271 fz_drop_stream(ctx, tail);
272 fz_catch(ctx)
273 fz_rethrow(ctx);
274 return head;
275 }
276
277 /*
278 * Build a chain of filters given filter names and param dicts.
279 * If chain is given, start filter chain with it.
280 * Assume ownership of chain.
281 */
282 static fz_stream *
283 build_filter_chain_drop(fz_context *ctx, fz_stream *chain, pdf_document *doc, pdf_obj *fs, pdf_obj *ps, int num, int gen, fz_compression_params *params, int might_be_image)
284 {
285 fz_var(chain);
286 fz_try(ctx)
287 {
288 int i, n = pdf_array_len(ctx, fs);
289 for (i = 0; i < n; i++)
290 {
291 pdf_obj *f = pdf_array_get(ctx, fs, i);
292 pdf_obj *p = pdf_array_get(ctx, ps, i);
293 chain = build_filter_drop(ctx, chain, doc, f, p, num, gen, (i == n-1 ? params : NULL), might_be_image);
294 }
295 }
296 fz_catch(ctx)
297 fz_rethrow(ctx);
298 return chain;
299 }
300
301 static fz_stream *
302 build_filter_chain(fz_context *ctx, fz_stream *chain, pdf_document *doc, pdf_obj *fs, pdf_obj *ps, int num, int gen, fz_compression_params *params, int might_be_image)
303 {
304 return build_filter_chain_drop(ctx, fz_keep_stream(ctx, chain), doc, fs, ps, num, gen, params, might_be_image);
305 }
306
307 /*
308 * Build a filter for reading raw stream data.
309 * This is a null filter to constrain reading to the stream length (and to
310 * allow for other people accessing the file), followed by a decryption
311 * filter.
312 *
313 * orig_num and orig_gen are used purely to seed the encryption.
314 */
315 static fz_stream *
316 pdf_open_raw_filter(fz_context *ctx, fz_stream *file_stm, pdf_document *doc, pdf_obj *stmobj, int num, int *orig_num, int *orig_gen, int64_t offset)
317 {
318 pdf_xref_entry *x = NULL;
319 fz_stream *null_stm, *crypt_stm;
320 int hascrypt;
321 int64_t len;
322
323 if (num > 0 && num < pdf_xref_len(ctx, doc))
324 {
325 x = pdf_get_xref_entry(ctx, doc, num);
326 }
327 if (x == NULL)
328 {
329 /* We only end up here when called from pdf_open_stream_with_offset to parse new format XRef sections. */
330 /* New style XRef sections must have generation number 0. */
331 *orig_num = num;
332 *orig_gen = 0;
333 }
334 else
335 {
336 *orig_num = x->num;
337 *orig_gen = x->gen;
338 if (x->stm_buf)
339 return fz_open_buffer(ctx, x->stm_buf);
340 }
341
342 hascrypt = pdf_stream_has_crypt(ctx, stmobj);
343 len = pdf_dict_get_int64(ctx, stmobj, PDF_NAME(Length));
344 if (len < 0)
345 len = 0;
346 null_stm = fz_open_endstream_filter(ctx, file_stm, (uint64_t)len, offset);
347 if (doc->crypt && !hascrypt)
348 {
349 fz_try(ctx)
350 crypt_stm = pdf_open_crypt(ctx, null_stm, doc->crypt, *orig_num, *orig_gen);
351 fz_always(ctx)
352 fz_drop_stream(ctx, null_stm);
353 fz_catch(ctx)
354 fz_rethrow(ctx);
355 return crypt_stm;
356 }
357 return null_stm;
358 }
359
360 /*
361 * Construct a filter to decode a stream, constraining
362 * to stream length and decrypting.
363 */
364 static fz_stream *
365 pdf_open_filter(fz_context *ctx, pdf_document *doc, fz_stream *file_stm, pdf_obj *stmobj, int num, int64_t offset, fz_compression_params *imparams, int might_be_image)
366 {
367 pdf_obj *filters = pdf_dict_geta(ctx, stmobj, PDF_NAME(Filter), PDF_NAME(F));
368 pdf_obj *params = pdf_dict_geta(ctx, stmobj, PDF_NAME(DecodeParms), PDF_NAME(DP));
369 int orig_num, orig_gen;
370 fz_stream *rstm, *fstm;
371
372 rstm = pdf_open_raw_filter(ctx, file_stm, doc, stmobj, num, &orig_num, &orig_gen, offset);
373 fz_try(ctx)
374 {
375 if (pdf_is_name(ctx, filters))
376 fstm = build_filter(ctx, rstm, doc, filters, params, orig_num, orig_gen, imparams, might_be_image);
377 else if (pdf_array_len(ctx, filters) > 0)
378 fstm = build_filter_chain(ctx, rstm, doc, filters, params, orig_num, orig_gen, imparams, might_be_image);
379 else
380 {
381 if (imparams)
382 imparams->type = FZ_IMAGE_RAW;
383 fstm = fz_keep_stream(ctx, rstm);
384 }
385 }
386 fz_always(ctx)
387 fz_drop_stream(ctx, rstm);
388 fz_catch(ctx)
389 fz_rethrow(ctx);
390
391 return fstm;
392 }
393
394 fz_stream *
395 pdf_open_inline_stream(fz_context *ctx, pdf_document *doc, pdf_obj *stmobj, int length, fz_stream *file_stm, fz_compression_params *imparams)
396 {
397 pdf_obj *filters = pdf_dict_geta(ctx, stmobj, PDF_NAME(Filter), PDF_NAME(F));
398 pdf_obj *params = pdf_dict_geta(ctx, stmobj, PDF_NAME(DecodeParms), PDF_NAME(DP));
399
400 if (pdf_is_name(ctx, filters))
401 return build_filter(ctx, file_stm, doc, filters, params, 0, 0, imparams, 1);
402 else if (pdf_array_len(ctx, filters) > 0)
403 return build_filter_chain(ctx, file_stm, doc, filters, params, 0, 0, imparams, 1);
404
405 if (imparams)
406 imparams->type = FZ_IMAGE_RAW;
407 return fz_open_null_filter(ctx, file_stm, length, fz_tell(ctx, file_stm));
408 }
409
410 void
411 pdf_load_compressed_inline_image(fz_context *ctx, pdf_document *doc, pdf_obj *dict, int length, fz_stream *file_stm, int indexed, fz_compressed_image *image)
412 {
413 fz_stream *istm = NULL, *leech = NULL, *decomp = NULL;
414 fz_pixmap *pixmap = NULL;
415 fz_compressed_buffer *bc;
416 int dummy_l2factor = 0;
417
418 fz_var(istm);
419 fz_var(leech);
420 fz_var(decomp);
421 fz_var(pixmap);
422
423 bc = fz_new_compressed_buffer(ctx);
424 fz_try(ctx)
425 {
426 bc->buffer = fz_new_buffer(ctx, 1024);
427 istm = pdf_open_inline_stream(ctx, doc, dict, length, file_stm, &bc->params);
428 leech = fz_open_leecher(ctx, istm, bc->buffer);
429 decomp = fz_open_image_decomp_stream(ctx, leech, &bc->params, &dummy_l2factor);
430 pixmap = fz_decomp_image_from_stream(ctx, decomp, image, NULL, indexed, 0, NULL);
431 fz_set_compressed_image_buffer(ctx, image, bc);
432 }
433 fz_always(ctx)
434 {
435 fz_drop_stream(ctx, istm);
436 fz_drop_stream(ctx, leech);
437 fz_drop_stream(ctx, decomp);
438 fz_drop_pixmap(ctx, pixmap);
439 }
440 fz_catch(ctx)
441 {
442 fz_drop_compressed_buffer(ctx, bc);
443 fz_rethrow(ctx);
444 }
445 }
446
447 fz_stream *
448 pdf_open_raw_stream_number(fz_context *ctx, pdf_document *doc, int num)
449 {
450 pdf_xref_entry *x;
451 int orig_num, orig_gen;
452
453 x = pdf_cache_object(ctx, doc, num);
454 if (x->stm_ofs == 0)
455 fz_throw(ctx, FZ_ERROR_FORMAT, "object is not a stream");
456
457 return pdf_open_raw_filter(ctx, doc->file, doc, x->obj, num, &orig_num, &orig_gen, x->stm_ofs);
458 }
459
460 static fz_stream *
461 pdf_open_image_stream(fz_context *ctx, pdf_document *doc, int num, fz_compression_params *params, int might_be_image)
462 {
463 pdf_xref_entry *x;
464
465 x = pdf_cache_object(ctx, doc, num);
466 if (x->stm_ofs == 0 && x->stm_buf == NULL)
467 fz_throw(ctx, FZ_ERROR_FORMAT, "object is not a stream");
468
469 return pdf_open_filter(ctx, doc, doc->file, x->obj, num, x->stm_ofs, params, might_be_image);
470 }
471
472 fz_stream *
473 pdf_open_stream_number(fz_context *ctx, pdf_document *doc, int num)
474 {
475 return pdf_open_image_stream(ctx, doc, num, NULL, 1);
476 }
477
478 fz_stream *
479 pdf_open_stream_with_offset(fz_context *ctx, pdf_document *doc, int num, pdf_obj *dict, int64_t stm_ofs)
480 {
481 if (stm_ofs == 0)
482 fz_throw(ctx, FZ_ERROR_FORMAT, "object is not a stream");
483 return pdf_open_filter(ctx, doc, doc->file, dict, num, stm_ofs, NULL, 1);
484 }
485
486 fz_buffer *
487 pdf_load_raw_stream_number(fz_context *ctx, pdf_document *doc, int num)
488 {
489 fz_stream *stm;
490 pdf_obj *dict;
491 int64_t len;
492 fz_buffer *buf = NULL;
493 pdf_xref_entry *x;
494
495 if (num > 0 && num < pdf_xref_len(ctx, doc))
496 {
497 x = pdf_get_xref_entry_no_null(ctx, doc, num);
498 if (x->stm_buf)
499 return fz_keep_buffer(ctx, x->stm_buf);
500 }
501
502 dict = pdf_load_object(ctx, doc, num);
503
504 fz_try(ctx)
505 len = pdf_dict_get_int64(ctx, dict, PDF_NAME(Length));
506 fz_always(ctx)
507 pdf_drop_obj(ctx, dict);
508 fz_catch(ctx)
509 fz_rethrow(ctx);
510
511 stm = pdf_open_raw_stream_number(ctx, doc, num);
512
513 if (len < 0)
514 len = 1024;
515
516 fz_try(ctx)
517 buf = fz_read_all(ctx, stm, (size_t)len);
518 fz_always(ctx)
519 fz_drop_stream(ctx, stm);
520 fz_catch(ctx)
521 fz_rethrow(ctx);
522
523 return buf;
524 }
525
526 static size_t
527 pdf_guess_filter_length(size_t len, const char *filter)
528 {
529 size_t nlen = len;
530
531 /* First ones get smaller, no overflow check required. */
532 if (!strcmp(filter, "ASCIIHexDecode"))
533 return len / 2;
534 else if (!strcmp(filter, "ASCII85Decode"))
535 return len * 4 / 5;
536
537 if (!strcmp(filter, "FlateDecode"))
538 nlen = len * 3;
539 else if (!strcmp(filter, "BrotliDecode"))
540 nlen = len * 4;
541 else if (!strcmp(filter, "RunLengthDecode"))
542 nlen = len * 3;
543 else if (!strcmp(filter, "LZWDecode"))
544 nlen = len * 2;
545
546 /* Live with a bad estimate - we'll malloc up as we go, but
547 * it's probably destined to fail anyway. */
548 if (nlen < len)
549 return len;
550
551 return nlen;
552 }
553
554 /* Check if an entry has a cached stream and return whether it is directly
555 * reusable. A buffer is directly reusable only if the stream is
556 * uncompressed, or if it is compressed purely a compression method we can
557 * return details of in fz_compression_params.
558 *
559 * If the stream is reusable return 1, and set params as required, otherwise
560 * return 0. */
561 static int
562 can_reuse_buffer(fz_context *ctx, pdf_xref_entry *entry, fz_compression_params *params)
563 {
564 pdf_obj *f;
565 pdf_obj *p;
566
567 if (!entry || !entry->obj || !entry->stm_buf)
568 return 0;
569
570 if (params)
571 params->type = FZ_IMAGE_RAW;
572
573 f = pdf_dict_geta(ctx, entry->obj, PDF_NAME(Filter), PDF_NAME(F));
574 /* If there are no filters, it's uncompressed, and we can use it */
575 if (!f)
576 return 1;
577
578 p = pdf_dict_geta(ctx, entry->obj, PDF_NAME(DecodeParms), PDF_NAME(DP));
579 if (pdf_is_array(ctx, f))
580 {
581 int len = pdf_array_len(ctx, f);
582
583 /* Empty array of filters. Its uncompressed. We can cope. */
584 if (len == 0)
585 return 1;
586 /* 1 filter is the most we can hope to cope with - if more,*/
587 if (len != 1)
588 return 0;
589 p = pdf_array_get(ctx, p, 0);
590 }
591 if (pdf_is_null(ctx, f))
592 return 1; /* Null filter is uncompressed */
593 if (!pdf_is_name(ctx, f))
594 return 0;
595
596 /* There are filters, so unless we have the option of shortstopping,
597 * we can't use the existing buffer. */
598 if (!params)
599 return 0;
600
601 build_compression_params(ctx, f, p, params);
602
603 return (params->type == FZ_IMAGE_RAW) ? 0 : 1;
604 }
605
606 static fz_buffer *
607 pdf_load_image_stream(fz_context *ctx, pdf_document *doc, int num, fz_compression_params *params, int *truncated, size_t worst_case)
608 {
609 fz_stream *stm = NULL;
610 pdf_obj *dict, *obj;
611 int i, n;
612 size_t len;
613 fz_buffer *buf;
614
615 fz_var(buf);
616
617 if (num > 0 && num < pdf_xref_len(ctx, doc))
618 {
619 pdf_xref_entry *entry = pdf_get_xref_entry(ctx, doc, num);
620 /* Return ref to existing buffer, but only if uncompressed,
621 * or shortstoppable */
622 if (can_reuse_buffer(ctx, entry, params))
623 return fz_keep_buffer(ctx, entry->stm_buf);
624 }
625
626 dict = pdf_load_object(ctx, doc, num);
627 fz_try(ctx)
628 {
629 int64_t ilen = pdf_dict_get_int64(ctx, dict, PDF_NAME(Length));
630 if (ilen < 0)
631 ilen = 0;
632 len = (size_t)ilen;
633 /* In 32 bit builds, we might find a length being too
634 * large for a size_t. */
635 if ((int64_t)len != ilen)
636 fz_throw(ctx, FZ_ERROR_LIMIT, "Stream too large");
637 obj = pdf_dict_get(ctx, dict, PDF_NAME(Filter));
638 len = pdf_guess_filter_length(len, pdf_to_name(ctx, obj));
639 n = pdf_array_len(ctx, obj);
640 for (i = 0; i < n; i++)
641 len = pdf_guess_filter_length(len, pdf_array_get_name(ctx, obj, i));
642 }
643 fz_always(ctx)
644 {
645 pdf_drop_obj(ctx, dict);
646 }
647 fz_catch(ctx)
648 {
649 fz_rethrow(ctx);
650 }
651
652 stm = pdf_open_image_stream(ctx, doc, num, params, 1);
653
654 fz_try(ctx)
655 {
656 buf = fz_read_best(ctx, stm, len, truncated, worst_case);
657 }
658 fz_always(ctx)
659 {
660 fz_drop_stream(ctx, stm);
661 }
662 fz_catch(ctx)
663 {
664 fz_rethrow(ctx);
665 }
666
667 return buf;
668 }
669
670 fz_buffer *
671 pdf_load_stream_number(fz_context *ctx, pdf_document *doc, int num)
672 {
673 return pdf_load_image_stream(ctx, doc, num, NULL, NULL, 0);
674 }
675
676 fz_compressed_buffer *
677 pdf_load_compressed_stream(fz_context *ctx, pdf_document *doc, int num, size_t worst_case)
678 {
679 fz_compressed_buffer *bc = fz_new_compressed_buffer(ctx);
680
681 fz_try(ctx)
682 {
683 bc->buffer = pdf_load_image_stream(ctx, doc, num, &bc->params, NULL, worst_case);
684 }
685 fz_catch(ctx)
686 {
687 fz_free(ctx, bc);
688 fz_rethrow(ctx);
689 }
690 return bc;
691 }
692
693 static fz_stream *
694 pdf_open_object_array(fz_context *ctx, pdf_document *doc, pdf_obj *list)
695 {
696 fz_stream *stm;
697 int i, n;
698
699 n = pdf_array_len(ctx, list);
700 stm = fz_open_concat(ctx, n, 1);
701
702 for (i = 0; i < n; i++)
703 {
704 pdf_obj *obj = pdf_array_get(ctx, list, i);
705 fz_try(ctx)
706 fz_concat_push_drop(ctx, stm, pdf_open_stream(ctx, obj));
707 fz_catch(ctx)
708 {
709 if (fz_caught(ctx) == FZ_ERROR_TRYLATER || fz_caught(ctx) == FZ_ERROR_SYSTEM)
710 {
711 fz_drop_stream(ctx, stm);
712 fz_rethrow(ctx);
713 }
714 fz_report_error(ctx);
715 fz_warn(ctx, "cannot load content stream part %d/%d", i + 1, n);
716 }
717 }
718
719 return stm;
720 }
721
722 fz_stream *
723 pdf_open_contents_stream(fz_context *ctx, pdf_document *doc, pdf_obj *obj)
724 {
725 int num;
726
727 if (pdf_is_array(ctx, obj))
728 return pdf_open_object_array(ctx, doc, obj);
729
730 num = pdf_to_num(ctx, obj);
731 if (pdf_is_stream(ctx, obj))
732 return pdf_open_image_stream(ctx, doc, num, NULL, 0);
733
734 fz_warn(ctx, "content stream is not a stream (%d 0 R)", num);
735 return fz_open_memory(ctx, (unsigned char *)"", 0);
736 }
737
738 fz_buffer *pdf_load_raw_stream(fz_context *ctx, pdf_obj *ref)
739 {
740 if (pdf_is_stream(ctx, ref))
741 return pdf_load_raw_stream_number(ctx, pdf_get_indirect_document(ctx, ref), pdf_to_num(ctx, ref));
742 fz_throw(ctx, FZ_ERROR_FORMAT, "object is not a stream");
743 }
744
745 fz_buffer *pdf_load_stream(fz_context *ctx, pdf_obj *ref)
746 {
747 if (pdf_is_stream(ctx, ref))
748 return pdf_load_stream_number(ctx, pdf_get_indirect_document(ctx, ref), pdf_to_num(ctx, ref));
749 fz_throw(ctx, FZ_ERROR_FORMAT, "object is not a stream");
750 }
751
752 fz_stream *pdf_open_raw_stream(fz_context *ctx, pdf_obj *ref)
753 {
754 if (pdf_is_stream(ctx, ref))
755 return pdf_open_raw_stream_number(ctx, pdf_get_indirect_document(ctx, ref), pdf_to_num(ctx, ref));
756 fz_throw(ctx, FZ_ERROR_FORMAT, "object is not a stream");
757 }
758
759 fz_stream *pdf_open_stream(fz_context *ctx, pdf_obj *ref)
760 {
761 if (pdf_is_stream(ctx, ref))
762 return pdf_open_stream_number(ctx, pdf_get_indirect_document(ctx, ref), pdf_to_num(ctx, ref));
763 fz_throw(ctx, FZ_ERROR_FORMAT, "object is not a stream");
764 }