Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/source/pdf/pdf-stream.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 // Copyright (C) 2004-2025 Artifex Software, Inc. | |
| 2 // | |
| 3 // This file is part of MuPDF. | |
| 4 // | |
| 5 // MuPDF is free software: you can redistribute it and/or modify it under the | |
| 6 // terms of the GNU Affero General Public License as published by the Free | |
| 7 // Software Foundation, either version 3 of the License, or (at your option) | |
| 8 // any later version. | |
| 9 // | |
| 10 // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY | |
| 11 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | |
| 12 // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more | |
| 13 // details. | |
| 14 // | |
| 15 // You should have received a copy of the GNU Affero General Public License | |
| 16 // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html> | |
| 17 // | |
| 18 // Alternative licensing terms are available from the licensor. | |
| 19 // For commercial licensing, see <https://www.artifex.com/> or contact | |
| 20 // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, | |
| 21 // CA 94129, USA, for further information. | |
| 22 | |
| 23 #include "mupdf/fitz.h" | |
| 24 #include "mupdf/pdf.h" | |
| 25 | |
| 26 #include <string.h> | |
| 27 | |
| 28 int | |
| 29 pdf_obj_num_is_stream(fz_context *ctx, pdf_document *doc, int num) | |
| 30 { | |
| 31 pdf_xref_entry *entry; | |
| 32 | |
| 33 if (num <= 0 || num >= pdf_xref_len(ctx, doc)) | |
| 34 return 0; | |
| 35 | |
| 36 fz_try(ctx) | |
| 37 entry = pdf_cache_object(ctx, doc, num); | |
| 38 fz_catch(ctx) | |
| 39 { | |
| 40 fz_rethrow_if(ctx, FZ_ERROR_TRYLATER); | |
| 41 fz_rethrow_if(ctx, FZ_ERROR_SYSTEM); | |
| 42 fz_report_error(ctx); | |
| 43 return 0; | |
| 44 } | |
| 45 | |
| 46 return entry->stm_ofs != 0 || entry->stm_buf; | |
| 47 } | |
| 48 | |
| 49 int | |
| 50 pdf_is_stream(fz_context *ctx, pdf_obj *ref) | |
| 51 { | |
| 52 pdf_document *doc = pdf_get_indirect_document(ctx, ref); | |
| 53 if (doc) | |
| 54 return pdf_obj_num_is_stream(ctx, doc, pdf_to_num(ctx, ref)); | |
| 55 return 0; | |
| 56 } | |
| 57 | |
| 58 /* | |
| 59 * Scan stream dictionary for an explicit /Crypt filter | |
| 60 */ | |
| 61 static int | |
| 62 pdf_stream_has_crypt(fz_context *ctx, pdf_obj *stm) | |
| 63 { | |
| 64 pdf_obj *filters; | |
| 65 pdf_obj *obj; | |
| 66 int i; | |
| 67 | |
| 68 filters = pdf_dict_geta(ctx, stm, PDF_NAME(Filter), PDF_NAME(F)); | |
| 69 if (filters) | |
| 70 { | |
| 71 if (pdf_name_eq(ctx, filters, PDF_NAME(Crypt))) | |
| 72 return 1; | |
| 73 if (pdf_is_array(ctx, filters)) | |
| 74 { | |
| 75 int n = pdf_array_len(ctx, filters); | |
| 76 for (i = 0; i < n; i++) | |
| 77 { | |
| 78 obj = pdf_array_get(ctx, filters, i); | |
| 79 if (pdf_name_eq(ctx, obj, PDF_NAME(Crypt))) | |
| 80 return 1; | |
| 81 } | |
| 82 } | |
| 83 } | |
| 84 return 0; | |
| 85 } | |
| 86 | |
| 87 static fz_jbig2_globals * | |
| 88 pdf_load_jbig2_globals(fz_context *ctx, pdf_obj *dict) | |
| 89 { | |
| 90 fz_jbig2_globals *globals; | |
| 91 fz_buffer *buf = NULL; | |
| 92 | |
| 93 fz_var(buf); | |
| 94 | |
| 95 if ((globals = pdf_find_item(ctx, fz_drop_jbig2_globals_imp, dict)) != NULL) | |
| 96 return globals; | |
| 97 | |
| 98 if (pdf_mark_obj(ctx, dict)) | |
| 99 fz_throw(ctx, FZ_ERROR_FORMAT, "cyclic reference when loading JBIG2 globals"); | |
| 100 | |
| 101 fz_try(ctx) | |
| 102 { | |
| 103 buf = pdf_load_stream(ctx, dict); | |
| 104 globals = fz_load_jbig2_globals(ctx, buf); | |
| 105 if (globals) | |
| 106 pdf_store_item(ctx, dict, globals, fz_buffer_storage(ctx, buf, NULL)); | |
| 107 } | |
| 108 fz_always(ctx) | |
| 109 { | |
| 110 fz_drop_buffer(ctx, buf); | |
| 111 pdf_unmark_obj(ctx, dict); | |
| 112 } | |
| 113 fz_catch(ctx) | |
| 114 { | |
| 115 fz_rethrow(ctx); | |
| 116 } | |
| 117 | |
| 118 return globals; | |
| 119 } | |
| 120 | |
| 121 static void | |
| 122 build_compression_params(fz_context *ctx, pdf_obj *f, pdf_obj *p, fz_compression_params *params) | |
| 123 { | |
| 124 params->type = FZ_IMAGE_RAW; | |
| 125 | |
| 126 if (pdf_name_eq(ctx, f, PDF_NAME(CCITTFaxDecode)) || pdf_name_eq(ctx, f, PDF_NAME(CCF))) | |
| 127 { | |
| 128 params->type = FZ_IMAGE_FAX; | |
| 129 params->u.fax.k = pdf_dict_get_int_default(ctx, p, PDF_NAME(K), 0); | |
| 130 params->u.fax.end_of_line = pdf_dict_get_bool_default(ctx, p, PDF_NAME(EndOfLine), 0); | |
| 131 params->u.fax.encoded_byte_align = pdf_dict_get_bool_default(ctx, p, PDF_NAME(EncodedByteAlign), 0); | |
| 132 params->u.fax.columns = pdf_dict_get_int_default(ctx, p, PDF_NAME(Columns), 1728); | |
| 133 params->u.fax.rows = pdf_dict_get_int_default(ctx, p, PDF_NAME(Rows), 0); | |
| 134 params->u.fax.end_of_block = pdf_dict_get_bool_default(ctx, p, PDF_NAME(EndOfBlock), 1); | |
| 135 params->u.fax.black_is_1 = pdf_dict_get_bool_default(ctx, p, PDF_NAME(BlackIs1), 0); | |
| 136 } | |
| 137 else if (pdf_name_eq(ctx, f, PDF_NAME(DCTDecode)) || pdf_name_eq(ctx, f, PDF_NAME(DCT))) | |
| 138 { | |
| 139 params->type = FZ_IMAGE_JPEG; | |
| 140 params->u.jpeg.color_transform = pdf_dict_get_int_default(ctx, p, PDF_NAME(ColorTransform), -1); | |
| 141 params->u.jpeg.invert_cmyk = 0; | |
| 142 } | |
| 143 else if (pdf_name_eq(ctx, f, PDF_NAME(RunLengthDecode)) || pdf_name_eq(ctx, f, PDF_NAME(RL))) | |
| 144 { | |
| 145 params->type = FZ_IMAGE_RLD; | |
| 146 } | |
| 147 else if (pdf_name_eq(ctx, f, PDF_NAME(FlateDecode)) || pdf_name_eq(ctx, f, PDF_NAME(Fl))) | |
| 148 { | |
| 149 params->type = FZ_IMAGE_FLATE; | |
| 150 params->u.flate.predictor = pdf_dict_get_int_default(ctx, p, PDF_NAME(Predictor), 1); | |
| 151 params->u.flate.columns = pdf_dict_get_int_default(ctx, p, PDF_NAME(Columns), 1); | |
| 152 params->u.flate.colors = pdf_dict_get_int_default(ctx, p, PDF_NAME(Colors), 1); | |
| 153 params->u.flate.bpc = pdf_dict_get_int_default(ctx, p, PDF_NAME(BitsPerComponent), 8); | |
| 154 } | |
| 155 else if (pdf_name_eq(ctx, f, PDF_NAME(BrotliDecode)) || pdf_name_eq(ctx, f, PDF_NAME(Br))) | |
| 156 { | |
| 157 params->type = FZ_IMAGE_BROTLI; | |
| 158 params->u.brotli.predictor = pdf_dict_get_int_default(ctx, p, PDF_NAME(Predictor), 1); | |
| 159 params->u.brotli.columns = pdf_dict_get_int_default(ctx, p, PDF_NAME(Columns), 1); | |
| 160 params->u.brotli.colors = pdf_dict_get_int_default(ctx, p, PDF_NAME(Colors), 1); | |
| 161 params->u.brotli.bpc = pdf_dict_get_int_default(ctx, p, PDF_NAME(BitsPerComponent), 8); | |
| 162 } | |
| 163 else if (pdf_name_eq(ctx, f, PDF_NAME(LZWDecode)) || pdf_name_eq(ctx, f, PDF_NAME(LZW))) | |
| 164 { | |
| 165 params->type = FZ_IMAGE_LZW; | |
| 166 params->u.lzw.predictor = pdf_dict_get_int_default(ctx, p, PDF_NAME(Predictor), 1); | |
| 167 params->u.lzw.columns = pdf_dict_get_int_default(ctx, p, PDF_NAME(Columns), 1); | |
| 168 params->u.lzw.colors = pdf_dict_get_int_default(ctx, p, PDF_NAME(Colors), 1); | |
| 169 params->u.lzw.bpc = pdf_dict_get_int_default(ctx, p, PDF_NAME(BitsPerComponent), 8); | |
| 170 params->u.lzw.early_change = pdf_dict_get_int_default(ctx, p, PDF_NAME(EarlyChange), 1); | |
| 171 } | |
| 172 else if (pdf_name_eq(ctx, f, PDF_NAME(JBIG2Decode))) | |
| 173 { | |
| 174 pdf_obj *g = pdf_dict_get(ctx, p, PDF_NAME(JBIG2Globals)); | |
| 175 | |
| 176 params->type = FZ_IMAGE_JBIG2; | |
| 177 params->u.jbig2.globals = NULL; | |
| 178 params->u.jbig2.embedded = 1; /* jbig2 streams are always embedded without file headers */ | |
| 179 if (g) | |
| 180 { | |
| 181 if (!pdf_is_stream(ctx, g)) | |
| 182 fz_warn(ctx, "jbig2 globals is not a stream, skipping globals"); | |
| 183 else | |
| 184 params->u.jbig2.globals = pdf_load_jbig2_globals(ctx, g); | |
| 185 } | |
| 186 } | |
| 187 } | |
| 188 | |
| 189 /* | |
| 190 * Create a filter given a name and param dictionary. | |
| 191 */ | |
| 192 static fz_stream * | |
| 193 build_filter(fz_context *ctx, fz_stream *chain, pdf_document *doc, pdf_obj *f, pdf_obj *p, int num, int gen, fz_compression_params *params, int might_be_image) | |
| 194 { | |
| 195 fz_compression_params local_params; | |
| 196 | |
| 197 local_params.u.jbig2.globals = NULL; | |
| 198 if (params == NULL) | |
| 199 params = &local_params; | |
| 200 | |
| 201 if (!might_be_image && | |
| 202 (pdf_name_eq(ctx, f, PDF_NAME(CCITTFaxDecode)) || | |
| 203 pdf_name_eq(ctx, f, PDF_NAME(CCF)) || | |
| 204 pdf_name_eq(ctx, f, PDF_NAME(DCTDecode)) || | |
| 205 pdf_name_eq(ctx, f, PDF_NAME(DCT)) || | |
| 206 pdf_name_eq(ctx, f, PDF_NAME(JBIG2Decode)) || | |
| 207 pdf_name_eq(ctx, f, PDF_NAME(JPXDecode)))) | |
| 208 { | |
| 209 fz_warn(ctx, "Can't open image only stream for non-image purposes"); | |
| 210 return fz_open_memory(ctx, (unsigned char *)"", 0); | |
| 211 } | |
| 212 | |
| 213 build_compression_params(ctx, f, p, params); | |
| 214 | |
| 215 /* If we were using params we were passed in, and we successfully | |
| 216 * recognised the image type, we can use the existing filter and | |
| 217 * shortstop here. */ | |
| 218 if (params != &local_params && params->type != FZ_IMAGE_RAW) | |
| 219 return fz_keep_stream(ctx, chain); /* nothing to do */ | |
| 220 | |
| 221 else if (params->type == FZ_IMAGE_JBIG2) | |
| 222 { | |
| 223 fz_stream *stm; | |
| 224 fz_try(ctx) | |
| 225 stm = fz_open_image_decomp_stream(ctx, chain, params, NULL); | |
| 226 fz_always(ctx) | |
| 227 fz_drop_jbig2_globals(ctx, local_params.u.jbig2.globals); | |
| 228 fz_catch(ctx) | |
| 229 fz_rethrow(ctx); | |
| 230 return stm; | |
| 231 } | |
| 232 | |
| 233 else if (params->type != FZ_IMAGE_RAW) | |
| 234 return fz_open_image_decomp_stream(ctx, chain, params, NULL); | |
| 235 | |
| 236 else if (pdf_name_eq(ctx, f, PDF_NAME(ASCIIHexDecode)) || pdf_name_eq(ctx, f, PDF_NAME(AHx))) | |
| 237 return fz_open_ahxd(ctx, chain); | |
| 238 | |
| 239 else if (pdf_name_eq(ctx, f, PDF_NAME(ASCII85Decode)) || pdf_name_eq(ctx, f, PDF_NAME(A85))) | |
| 240 return fz_open_a85d(ctx, chain); | |
| 241 | |
| 242 else if (pdf_name_eq(ctx, f, PDF_NAME(JPXDecode))) | |
| 243 return fz_keep_stream(ctx, chain); /* JPX decoding is special cased in the image loading code */ | |
| 244 | |
| 245 else if (pdf_name_eq(ctx, f, PDF_NAME(Crypt))) | |
| 246 { | |
| 247 if (!doc->crypt) | |
| 248 fz_warn(ctx, "crypt filter in unencrypted document"); | |
| 249 else | |
| 250 { | |
| 251 pdf_obj *name = pdf_dict_get(ctx, p, PDF_NAME(Name)); | |
| 252 if (pdf_is_name(ctx, name)) | |
| 253 return pdf_open_crypt_with_filter(ctx, chain, doc->crypt, name, num, gen); | |
| 254 } | |
| 255 } | |
| 256 | |
| 257 else | |
| 258 fz_warn(ctx, "unknown filter name (%s)", pdf_to_name(ctx, f)); | |
| 259 | |
| 260 return fz_keep_stream(ctx, chain); | |
| 261 } | |
| 262 | |
| 263 /* Build filter, and assume ownership of chain */ | |
| 264 static fz_stream * | |
| 265 build_filter_drop(fz_context *ctx, fz_stream *tail, pdf_document *doc, pdf_obj *f, pdf_obj *p, int num, int gen, fz_compression_params *params, int might_be_image) | |
| 266 { | |
| 267 fz_stream *head; | |
| 268 fz_try(ctx) | |
| 269 head = build_filter(ctx, tail, doc, f, p, num, gen, params, might_be_image); | |
| 270 fz_always(ctx) | |
| 271 fz_drop_stream(ctx, tail); | |
| 272 fz_catch(ctx) | |
| 273 fz_rethrow(ctx); | |
| 274 return head; | |
| 275 } | |
| 276 | |
| 277 /* | |
| 278 * Build a chain of filters given filter names and param dicts. | |
| 279 * If chain is given, start filter chain with it. | |
| 280 * Assume ownership of chain. | |
| 281 */ | |
| 282 static fz_stream * | |
| 283 build_filter_chain_drop(fz_context *ctx, fz_stream *chain, pdf_document *doc, pdf_obj *fs, pdf_obj *ps, int num, int gen, fz_compression_params *params, int might_be_image) | |
| 284 { | |
| 285 fz_var(chain); | |
| 286 fz_try(ctx) | |
| 287 { | |
| 288 int i, n = pdf_array_len(ctx, fs); | |
| 289 for (i = 0; i < n; i++) | |
| 290 { | |
| 291 pdf_obj *f = pdf_array_get(ctx, fs, i); | |
| 292 pdf_obj *p = pdf_array_get(ctx, ps, i); | |
| 293 chain = build_filter_drop(ctx, chain, doc, f, p, num, gen, (i == n-1 ? params : NULL), might_be_image); | |
| 294 } | |
| 295 } | |
| 296 fz_catch(ctx) | |
| 297 fz_rethrow(ctx); | |
| 298 return chain; | |
| 299 } | |
| 300 | |
| 301 static fz_stream * | |
| 302 build_filter_chain(fz_context *ctx, fz_stream *chain, pdf_document *doc, pdf_obj *fs, pdf_obj *ps, int num, int gen, fz_compression_params *params, int might_be_image) | |
| 303 { | |
| 304 return build_filter_chain_drop(ctx, fz_keep_stream(ctx, chain), doc, fs, ps, num, gen, params, might_be_image); | |
| 305 } | |
| 306 | |
| 307 /* | |
| 308 * Build a filter for reading raw stream data. | |
| 309 * This is a null filter to constrain reading to the stream length (and to | |
| 310 * allow for other people accessing the file), followed by a decryption | |
| 311 * filter. | |
| 312 * | |
| 313 * orig_num and orig_gen are used purely to seed the encryption. | |
| 314 */ | |
| 315 static fz_stream * | |
| 316 pdf_open_raw_filter(fz_context *ctx, fz_stream *file_stm, pdf_document *doc, pdf_obj *stmobj, int num, int *orig_num, int *orig_gen, int64_t offset) | |
| 317 { | |
| 318 pdf_xref_entry *x = NULL; | |
| 319 fz_stream *null_stm, *crypt_stm; | |
| 320 int hascrypt; | |
| 321 int64_t len; | |
| 322 | |
| 323 if (num > 0 && num < pdf_xref_len(ctx, doc)) | |
| 324 { | |
| 325 x = pdf_get_xref_entry(ctx, doc, num); | |
| 326 } | |
| 327 if (x == NULL) | |
| 328 { | |
| 329 /* We only end up here when called from pdf_open_stream_with_offset to parse new format XRef sections. */ | |
| 330 /* New style XRef sections must have generation number 0. */ | |
| 331 *orig_num = num; | |
| 332 *orig_gen = 0; | |
| 333 } | |
| 334 else | |
| 335 { | |
| 336 *orig_num = x->num; | |
| 337 *orig_gen = x->gen; | |
| 338 if (x->stm_buf) | |
| 339 return fz_open_buffer(ctx, x->stm_buf); | |
| 340 } | |
| 341 | |
| 342 hascrypt = pdf_stream_has_crypt(ctx, stmobj); | |
| 343 len = pdf_dict_get_int64(ctx, stmobj, PDF_NAME(Length)); | |
| 344 if (len < 0) | |
| 345 len = 0; | |
| 346 null_stm = fz_open_endstream_filter(ctx, file_stm, (uint64_t)len, offset); | |
| 347 if (doc->crypt && !hascrypt) | |
| 348 { | |
| 349 fz_try(ctx) | |
| 350 crypt_stm = pdf_open_crypt(ctx, null_stm, doc->crypt, *orig_num, *orig_gen); | |
| 351 fz_always(ctx) | |
| 352 fz_drop_stream(ctx, null_stm); | |
| 353 fz_catch(ctx) | |
| 354 fz_rethrow(ctx); | |
| 355 return crypt_stm; | |
| 356 } | |
| 357 return null_stm; | |
| 358 } | |
| 359 | |
| 360 /* | |
| 361 * Construct a filter to decode a stream, constraining | |
| 362 * to stream length and decrypting. | |
| 363 */ | |
| 364 static fz_stream * | |
| 365 pdf_open_filter(fz_context *ctx, pdf_document *doc, fz_stream *file_stm, pdf_obj *stmobj, int num, int64_t offset, fz_compression_params *imparams, int might_be_image) | |
| 366 { | |
| 367 pdf_obj *filters = pdf_dict_geta(ctx, stmobj, PDF_NAME(Filter), PDF_NAME(F)); | |
| 368 pdf_obj *params = pdf_dict_geta(ctx, stmobj, PDF_NAME(DecodeParms), PDF_NAME(DP)); | |
| 369 int orig_num, orig_gen; | |
| 370 fz_stream *rstm, *fstm; | |
| 371 | |
| 372 rstm = pdf_open_raw_filter(ctx, file_stm, doc, stmobj, num, &orig_num, &orig_gen, offset); | |
| 373 fz_try(ctx) | |
| 374 { | |
| 375 if (pdf_is_name(ctx, filters)) | |
| 376 fstm = build_filter(ctx, rstm, doc, filters, params, orig_num, orig_gen, imparams, might_be_image); | |
| 377 else if (pdf_array_len(ctx, filters) > 0) | |
| 378 fstm = build_filter_chain(ctx, rstm, doc, filters, params, orig_num, orig_gen, imparams, might_be_image); | |
| 379 else | |
| 380 { | |
| 381 if (imparams) | |
| 382 imparams->type = FZ_IMAGE_RAW; | |
| 383 fstm = fz_keep_stream(ctx, rstm); | |
| 384 } | |
| 385 } | |
| 386 fz_always(ctx) | |
| 387 fz_drop_stream(ctx, rstm); | |
| 388 fz_catch(ctx) | |
| 389 fz_rethrow(ctx); | |
| 390 | |
| 391 return fstm; | |
| 392 } | |
| 393 | |
| 394 fz_stream * | |
| 395 pdf_open_inline_stream(fz_context *ctx, pdf_document *doc, pdf_obj *stmobj, int length, fz_stream *file_stm, fz_compression_params *imparams) | |
| 396 { | |
| 397 pdf_obj *filters = pdf_dict_geta(ctx, stmobj, PDF_NAME(Filter), PDF_NAME(F)); | |
| 398 pdf_obj *params = pdf_dict_geta(ctx, stmobj, PDF_NAME(DecodeParms), PDF_NAME(DP)); | |
| 399 | |
| 400 if (pdf_is_name(ctx, filters)) | |
| 401 return build_filter(ctx, file_stm, doc, filters, params, 0, 0, imparams, 1); | |
| 402 else if (pdf_array_len(ctx, filters) > 0) | |
| 403 return build_filter_chain(ctx, file_stm, doc, filters, params, 0, 0, imparams, 1); | |
| 404 | |
| 405 if (imparams) | |
| 406 imparams->type = FZ_IMAGE_RAW; | |
| 407 return fz_open_null_filter(ctx, file_stm, length, fz_tell(ctx, file_stm)); | |
| 408 } | |
| 409 | |
| 410 void | |
| 411 pdf_load_compressed_inline_image(fz_context *ctx, pdf_document *doc, pdf_obj *dict, int length, fz_stream *file_stm, int indexed, fz_compressed_image *image) | |
| 412 { | |
| 413 fz_stream *istm = NULL, *leech = NULL, *decomp = NULL; | |
| 414 fz_pixmap *pixmap = NULL; | |
| 415 fz_compressed_buffer *bc; | |
| 416 int dummy_l2factor = 0; | |
| 417 | |
| 418 fz_var(istm); | |
| 419 fz_var(leech); | |
| 420 fz_var(decomp); | |
| 421 fz_var(pixmap); | |
| 422 | |
| 423 bc = fz_new_compressed_buffer(ctx); | |
| 424 fz_try(ctx) | |
| 425 { | |
| 426 bc->buffer = fz_new_buffer(ctx, 1024); | |
| 427 istm = pdf_open_inline_stream(ctx, doc, dict, length, file_stm, &bc->params); | |
| 428 leech = fz_open_leecher(ctx, istm, bc->buffer); | |
| 429 decomp = fz_open_image_decomp_stream(ctx, leech, &bc->params, &dummy_l2factor); | |
| 430 pixmap = fz_decomp_image_from_stream(ctx, decomp, image, NULL, indexed, 0, NULL); | |
| 431 fz_set_compressed_image_buffer(ctx, image, bc); | |
| 432 } | |
| 433 fz_always(ctx) | |
| 434 { | |
| 435 fz_drop_stream(ctx, istm); | |
| 436 fz_drop_stream(ctx, leech); | |
| 437 fz_drop_stream(ctx, decomp); | |
| 438 fz_drop_pixmap(ctx, pixmap); | |
| 439 } | |
| 440 fz_catch(ctx) | |
| 441 { | |
| 442 fz_drop_compressed_buffer(ctx, bc); | |
| 443 fz_rethrow(ctx); | |
| 444 } | |
| 445 } | |
| 446 | |
| 447 fz_stream * | |
| 448 pdf_open_raw_stream_number(fz_context *ctx, pdf_document *doc, int num) | |
| 449 { | |
| 450 pdf_xref_entry *x; | |
| 451 int orig_num, orig_gen; | |
| 452 | |
| 453 x = pdf_cache_object(ctx, doc, num); | |
| 454 if (x->stm_ofs == 0) | |
| 455 fz_throw(ctx, FZ_ERROR_FORMAT, "object is not a stream"); | |
| 456 | |
| 457 return pdf_open_raw_filter(ctx, doc->file, doc, x->obj, num, &orig_num, &orig_gen, x->stm_ofs); | |
| 458 } | |
| 459 | |
| 460 static fz_stream * | |
| 461 pdf_open_image_stream(fz_context *ctx, pdf_document *doc, int num, fz_compression_params *params, int might_be_image) | |
| 462 { | |
| 463 pdf_xref_entry *x; | |
| 464 | |
| 465 x = pdf_cache_object(ctx, doc, num); | |
| 466 if (x->stm_ofs == 0 && x->stm_buf == NULL) | |
| 467 fz_throw(ctx, FZ_ERROR_FORMAT, "object is not a stream"); | |
| 468 | |
| 469 return pdf_open_filter(ctx, doc, doc->file, x->obj, num, x->stm_ofs, params, might_be_image); | |
| 470 } | |
| 471 | |
| 472 fz_stream * | |
| 473 pdf_open_stream_number(fz_context *ctx, pdf_document *doc, int num) | |
| 474 { | |
| 475 return pdf_open_image_stream(ctx, doc, num, NULL, 1); | |
| 476 } | |
| 477 | |
| 478 fz_stream * | |
| 479 pdf_open_stream_with_offset(fz_context *ctx, pdf_document *doc, int num, pdf_obj *dict, int64_t stm_ofs) | |
| 480 { | |
| 481 if (stm_ofs == 0) | |
| 482 fz_throw(ctx, FZ_ERROR_FORMAT, "object is not a stream"); | |
| 483 return pdf_open_filter(ctx, doc, doc->file, dict, num, stm_ofs, NULL, 1); | |
| 484 } | |
| 485 | |
| 486 fz_buffer * | |
| 487 pdf_load_raw_stream_number(fz_context *ctx, pdf_document *doc, int num) | |
| 488 { | |
| 489 fz_stream *stm; | |
| 490 pdf_obj *dict; | |
| 491 int64_t len; | |
| 492 fz_buffer *buf = NULL; | |
| 493 pdf_xref_entry *x; | |
| 494 | |
| 495 if (num > 0 && num < pdf_xref_len(ctx, doc)) | |
| 496 { | |
| 497 x = pdf_get_xref_entry_no_null(ctx, doc, num); | |
| 498 if (x->stm_buf) | |
| 499 return fz_keep_buffer(ctx, x->stm_buf); | |
| 500 } | |
| 501 | |
| 502 dict = pdf_load_object(ctx, doc, num); | |
| 503 | |
| 504 fz_try(ctx) | |
| 505 len = pdf_dict_get_int64(ctx, dict, PDF_NAME(Length)); | |
| 506 fz_always(ctx) | |
| 507 pdf_drop_obj(ctx, dict); | |
| 508 fz_catch(ctx) | |
| 509 fz_rethrow(ctx); | |
| 510 | |
| 511 stm = pdf_open_raw_stream_number(ctx, doc, num); | |
| 512 | |
| 513 if (len < 0) | |
| 514 len = 1024; | |
| 515 | |
| 516 fz_try(ctx) | |
| 517 buf = fz_read_all(ctx, stm, (size_t)len); | |
| 518 fz_always(ctx) | |
| 519 fz_drop_stream(ctx, stm); | |
| 520 fz_catch(ctx) | |
| 521 fz_rethrow(ctx); | |
| 522 | |
| 523 return buf; | |
| 524 } | |
| 525 | |
| 526 static size_t | |
| 527 pdf_guess_filter_length(size_t len, const char *filter) | |
| 528 { | |
| 529 size_t nlen = len; | |
| 530 | |
| 531 /* First ones get smaller, no overflow check required. */ | |
| 532 if (!strcmp(filter, "ASCIIHexDecode")) | |
| 533 return len / 2; | |
| 534 else if (!strcmp(filter, "ASCII85Decode")) | |
| 535 return len * 4 / 5; | |
| 536 | |
| 537 if (!strcmp(filter, "FlateDecode")) | |
| 538 nlen = len * 3; | |
| 539 else if (!strcmp(filter, "BrotliDecode")) | |
| 540 nlen = len * 4; | |
| 541 else if (!strcmp(filter, "RunLengthDecode")) | |
| 542 nlen = len * 3; | |
| 543 else if (!strcmp(filter, "LZWDecode")) | |
| 544 nlen = len * 2; | |
| 545 | |
| 546 /* Live with a bad estimate - we'll malloc up as we go, but | |
| 547 * it's probably destined to fail anyway. */ | |
| 548 if (nlen < len) | |
| 549 return len; | |
| 550 | |
| 551 return nlen; | |
| 552 } | |
| 553 | |
| 554 /* Check if an entry has a cached stream and return whether it is directly | |
| 555 * reusable. A buffer is directly reusable only if the stream is | |
| 556 * uncompressed, or if it is compressed purely a compression method we can | |
| 557 * return details of in fz_compression_params. | |
| 558 * | |
| 559 * If the stream is reusable return 1, and set params as required, otherwise | |
| 560 * return 0. */ | |
| 561 static int | |
| 562 can_reuse_buffer(fz_context *ctx, pdf_xref_entry *entry, fz_compression_params *params) | |
| 563 { | |
| 564 pdf_obj *f; | |
| 565 pdf_obj *p; | |
| 566 | |
| 567 if (!entry || !entry->obj || !entry->stm_buf) | |
| 568 return 0; | |
| 569 | |
| 570 if (params) | |
| 571 params->type = FZ_IMAGE_RAW; | |
| 572 | |
| 573 f = pdf_dict_geta(ctx, entry->obj, PDF_NAME(Filter), PDF_NAME(F)); | |
| 574 /* If there are no filters, it's uncompressed, and we can use it */ | |
| 575 if (!f) | |
| 576 return 1; | |
| 577 | |
| 578 p = pdf_dict_geta(ctx, entry->obj, PDF_NAME(DecodeParms), PDF_NAME(DP)); | |
| 579 if (pdf_is_array(ctx, f)) | |
| 580 { | |
| 581 int len = pdf_array_len(ctx, f); | |
| 582 | |
| 583 /* Empty array of filters. Its uncompressed. We can cope. */ | |
| 584 if (len == 0) | |
| 585 return 1; | |
| 586 /* 1 filter is the most we can hope to cope with - if more,*/ | |
| 587 if (len != 1) | |
| 588 return 0; | |
| 589 p = pdf_array_get(ctx, p, 0); | |
| 590 } | |
| 591 if (pdf_is_null(ctx, f)) | |
| 592 return 1; /* Null filter is uncompressed */ | |
| 593 if (!pdf_is_name(ctx, f)) | |
| 594 return 0; | |
| 595 | |
| 596 /* There are filters, so unless we have the option of shortstopping, | |
| 597 * we can't use the existing buffer. */ | |
| 598 if (!params) | |
| 599 return 0; | |
| 600 | |
| 601 build_compression_params(ctx, f, p, params); | |
| 602 | |
| 603 return (params->type == FZ_IMAGE_RAW) ? 0 : 1; | |
| 604 } | |
| 605 | |
| 606 static fz_buffer * | |
| 607 pdf_load_image_stream(fz_context *ctx, pdf_document *doc, int num, fz_compression_params *params, int *truncated, size_t worst_case) | |
| 608 { | |
| 609 fz_stream *stm = NULL; | |
| 610 pdf_obj *dict, *obj; | |
| 611 int i, n; | |
| 612 size_t len; | |
| 613 fz_buffer *buf; | |
| 614 | |
| 615 fz_var(buf); | |
| 616 | |
| 617 if (num > 0 && num < pdf_xref_len(ctx, doc)) | |
| 618 { | |
| 619 pdf_xref_entry *entry = pdf_get_xref_entry(ctx, doc, num); | |
| 620 /* Return ref to existing buffer, but only if uncompressed, | |
| 621 * or shortstoppable */ | |
| 622 if (can_reuse_buffer(ctx, entry, params)) | |
| 623 return fz_keep_buffer(ctx, entry->stm_buf); | |
| 624 } | |
| 625 | |
| 626 dict = pdf_load_object(ctx, doc, num); | |
| 627 fz_try(ctx) | |
| 628 { | |
| 629 int64_t ilen = pdf_dict_get_int64(ctx, dict, PDF_NAME(Length)); | |
| 630 if (ilen < 0) | |
| 631 ilen = 0; | |
| 632 len = (size_t)ilen; | |
| 633 /* In 32 bit builds, we might find a length being too | |
| 634 * large for a size_t. */ | |
| 635 if ((int64_t)len != ilen) | |
| 636 fz_throw(ctx, FZ_ERROR_LIMIT, "Stream too large"); | |
| 637 obj = pdf_dict_get(ctx, dict, PDF_NAME(Filter)); | |
| 638 len = pdf_guess_filter_length(len, pdf_to_name(ctx, obj)); | |
| 639 n = pdf_array_len(ctx, obj); | |
| 640 for (i = 0; i < n; i++) | |
| 641 len = pdf_guess_filter_length(len, pdf_array_get_name(ctx, obj, i)); | |
| 642 } | |
| 643 fz_always(ctx) | |
| 644 { | |
| 645 pdf_drop_obj(ctx, dict); | |
| 646 } | |
| 647 fz_catch(ctx) | |
| 648 { | |
| 649 fz_rethrow(ctx); | |
| 650 } | |
| 651 | |
| 652 stm = pdf_open_image_stream(ctx, doc, num, params, 1); | |
| 653 | |
| 654 fz_try(ctx) | |
| 655 { | |
| 656 buf = fz_read_best(ctx, stm, len, truncated, worst_case); | |
| 657 } | |
| 658 fz_always(ctx) | |
| 659 { | |
| 660 fz_drop_stream(ctx, stm); | |
| 661 } | |
| 662 fz_catch(ctx) | |
| 663 { | |
| 664 fz_rethrow(ctx); | |
| 665 } | |
| 666 | |
| 667 return buf; | |
| 668 } | |
| 669 | |
| 670 fz_buffer * | |
| 671 pdf_load_stream_number(fz_context *ctx, pdf_document *doc, int num) | |
| 672 { | |
| 673 return pdf_load_image_stream(ctx, doc, num, NULL, NULL, 0); | |
| 674 } | |
| 675 | |
| 676 fz_compressed_buffer * | |
| 677 pdf_load_compressed_stream(fz_context *ctx, pdf_document *doc, int num, size_t worst_case) | |
| 678 { | |
| 679 fz_compressed_buffer *bc = fz_new_compressed_buffer(ctx); | |
| 680 | |
| 681 fz_try(ctx) | |
| 682 { | |
| 683 bc->buffer = pdf_load_image_stream(ctx, doc, num, &bc->params, NULL, worst_case); | |
| 684 } | |
| 685 fz_catch(ctx) | |
| 686 { | |
| 687 fz_free(ctx, bc); | |
| 688 fz_rethrow(ctx); | |
| 689 } | |
| 690 return bc; | |
| 691 } | |
| 692 | |
| 693 static fz_stream * | |
| 694 pdf_open_object_array(fz_context *ctx, pdf_document *doc, pdf_obj *list) | |
| 695 { | |
| 696 fz_stream *stm; | |
| 697 int i, n; | |
| 698 | |
| 699 n = pdf_array_len(ctx, list); | |
| 700 stm = fz_open_concat(ctx, n, 1); | |
| 701 | |
| 702 for (i = 0; i < n; i++) | |
| 703 { | |
| 704 pdf_obj *obj = pdf_array_get(ctx, list, i); | |
| 705 fz_try(ctx) | |
| 706 fz_concat_push_drop(ctx, stm, pdf_open_stream(ctx, obj)); | |
| 707 fz_catch(ctx) | |
| 708 { | |
| 709 if (fz_caught(ctx) == FZ_ERROR_TRYLATER || fz_caught(ctx) == FZ_ERROR_SYSTEM) | |
| 710 { | |
| 711 fz_drop_stream(ctx, stm); | |
| 712 fz_rethrow(ctx); | |
| 713 } | |
| 714 fz_report_error(ctx); | |
| 715 fz_warn(ctx, "cannot load content stream part %d/%d", i + 1, n); | |
| 716 } | |
| 717 } | |
| 718 | |
| 719 return stm; | |
| 720 } | |
| 721 | |
| 722 fz_stream * | |
| 723 pdf_open_contents_stream(fz_context *ctx, pdf_document *doc, pdf_obj *obj) | |
| 724 { | |
| 725 int num; | |
| 726 | |
| 727 if (pdf_is_array(ctx, obj)) | |
| 728 return pdf_open_object_array(ctx, doc, obj); | |
| 729 | |
| 730 num = pdf_to_num(ctx, obj); | |
| 731 if (pdf_is_stream(ctx, obj)) | |
| 732 return pdf_open_image_stream(ctx, doc, num, NULL, 0); | |
| 733 | |
| 734 fz_warn(ctx, "content stream is not a stream (%d 0 R)", num); | |
| 735 return fz_open_memory(ctx, (unsigned char *)"", 0); | |
| 736 } | |
| 737 | |
| 738 fz_buffer *pdf_load_raw_stream(fz_context *ctx, pdf_obj *ref) | |
| 739 { | |
| 740 if (pdf_is_stream(ctx, ref)) | |
| 741 return pdf_load_raw_stream_number(ctx, pdf_get_indirect_document(ctx, ref), pdf_to_num(ctx, ref)); | |
| 742 fz_throw(ctx, FZ_ERROR_FORMAT, "object is not a stream"); | |
| 743 } | |
| 744 | |
| 745 fz_buffer *pdf_load_stream(fz_context *ctx, pdf_obj *ref) | |
| 746 { | |
| 747 if (pdf_is_stream(ctx, ref)) | |
| 748 return pdf_load_stream_number(ctx, pdf_get_indirect_document(ctx, ref), pdf_to_num(ctx, ref)); | |
| 749 fz_throw(ctx, FZ_ERROR_FORMAT, "object is not a stream"); | |
| 750 } | |
| 751 | |
| 752 fz_stream *pdf_open_raw_stream(fz_context *ctx, pdf_obj *ref) | |
| 753 { | |
| 754 if (pdf_is_stream(ctx, ref)) | |
| 755 return pdf_open_raw_stream_number(ctx, pdf_get_indirect_document(ctx, ref), pdf_to_num(ctx, ref)); | |
| 756 fz_throw(ctx, FZ_ERROR_FORMAT, "object is not a stream"); | |
| 757 } | |
| 758 | |
| 759 fz_stream *pdf_open_stream(fz_context *ctx, pdf_obj *ref) | |
| 760 { | |
| 761 if (pdf_is_stream(ctx, ref)) | |
| 762 return pdf_open_stream_number(ctx, pdf_get_indirect_document(ctx, ref), pdf_to_num(ctx, ref)); | |
| 763 fz_throw(ctx, FZ_ERROR_FORMAT, "object is not a stream"); | |
| 764 } |
