Mercurial > hgrepos > Python2 > PyMuPDF
comparison src_classic/helper-pdfinfo.i @ 1:1d09e1dec1d9 upstream
ADD: PyMuPDF v1.26.4: the original sdist.
It does not yet contain MuPDF. This normally will be downloaded when
building PyMuPDF.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:37:51 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 1:1d09e1dec1d9 |
|---|---|
| 1 %{ | |
| 2 /* | |
| 3 # ------------------------------------------------------------------------ | |
| 4 # Copyright 2020-2022, Harald Lieder, mailto:harald.lieder@outlook.com | |
| 5 # License: GNU AFFERO GPL 3.0, https://www.gnu.org/licenses/agpl-3.0.html | |
| 6 # | |
| 7 # Part of "PyMuPDF", a Python binding for "MuPDF" (http://mupdf.com), a | |
| 8 # lightweight PDF, XPS, and E-book viewer, renderer and toolkit which is | |
| 9 # maintained and developed by Artifex Software, Inc. https://artifex.com. | |
| 10 # ------------------------------------------------------------------------ | |
| 11 */ | |
| 12 //------------------------------------------------------------------------ | |
| 13 // Store ID in PDF trailer | |
| 14 //------------------------------------------------------------------------ | |
| 15 void JM_ensure_identity(fz_context *ctx, pdf_document *pdf) | |
| 16 { | |
| 17 unsigned char rnd[16]; | |
| 18 pdf_obj *id; | |
| 19 id = pdf_dict_get(ctx, pdf_trailer(ctx, pdf), PDF_NAME(ID)); | |
| 20 if (!id) { | |
| 21 fz_memrnd(ctx, rnd, nelem(rnd)); | |
| 22 id = pdf_dict_put_array(ctx, pdf_trailer(ctx, pdf), PDF_NAME(ID), 2); | |
| 23 pdf_array_push_drop(ctx, id, pdf_new_string(ctx, (char *) rnd + 0, nelem(rnd))); | |
| 24 pdf_array_push_drop(ctx, id, pdf_new_string(ctx, (char *) rnd + 0, nelem(rnd))); | |
| 25 } | |
| 26 } | |
| 27 | |
| 28 | |
| 29 //------------------------------------------------------------------------ | |
| 30 // Ensure OCProperties, return /OCProperties key | |
| 31 //------------------------------------------------------------------------ | |
| 32 pdf_obj * | |
| 33 JM_ensure_ocproperties(fz_context *ctx, pdf_document *pdf) | |
| 34 { | |
| 35 pdf_obj *D, *ocp; | |
| 36 fz_try(ctx) { | |
| 37 ocp = pdf_dict_get(ctx, pdf_dict_get(ctx, pdf_trailer(ctx, pdf), PDF_NAME(Root)), PDF_NAME(OCProperties)); | |
| 38 if (ocp) goto finished; | |
| 39 pdf_obj *root = pdf_dict_get(ctx, pdf_trailer(ctx, pdf), PDF_NAME(Root)); | |
| 40 ocp = pdf_dict_put_dict(ctx, root, PDF_NAME(OCProperties), 2); | |
| 41 pdf_dict_put_array(ctx, ocp, PDF_NAME(OCGs), 0); | |
| 42 D = pdf_dict_put_dict(ctx, ocp, PDF_NAME(D), 5); | |
| 43 pdf_dict_put_array(ctx, D, PDF_NAME(ON), 0); | |
| 44 pdf_dict_put_array(ctx, D, PDF_NAME(OFF), 0); | |
| 45 pdf_dict_put_array(ctx, D, PDF_NAME(Order), 0); | |
| 46 pdf_dict_put_array(ctx, D, PDF_NAME(RBGroups), 0); | |
| 47 finished:; | |
| 48 } | |
| 49 fz_catch(ctx) { | |
| 50 fz_rethrow(ctx); | |
| 51 } | |
| 52 return ocp; | |
| 53 } | |
| 54 | |
| 55 | |
| 56 //------------------------------------------------------------------------ | |
| 57 // Add OC configuration to the PDF catalog | |
| 58 //------------------------------------------------------------------------ | |
| 59 void | |
| 60 JM_add_layer_config(fz_context *ctx, pdf_document *pdf, char *name, char *creator, PyObject *ON) | |
| 61 { | |
| 62 pdf_obj *D, *ocp, *configs; | |
| 63 fz_try(ctx) { | |
| 64 ocp = JM_ensure_ocproperties(ctx, pdf); | |
| 65 configs = pdf_dict_get(ctx, ocp, PDF_NAME(Configs)); | |
| 66 if (!pdf_is_array(ctx, configs)) { | |
| 67 configs = pdf_dict_put_array(ctx,ocp, PDF_NAME(Configs), 1); | |
| 68 } | |
| 69 D = pdf_new_dict(ctx, pdf, 5); | |
| 70 pdf_dict_put_text_string(ctx, D, PDF_NAME(Name), name); | |
| 71 if (creator) { | |
| 72 pdf_dict_put_text_string(ctx, D, PDF_NAME(Creator), creator); | |
| 73 } | |
| 74 pdf_dict_put(ctx, D, PDF_NAME(BaseState), PDF_NAME(OFF)); | |
| 75 pdf_obj *onarray = pdf_dict_put_array(ctx, D, PDF_NAME(ON), 5); | |
| 76 if (!EXISTS(ON) || !PySequence_Check(ON) || !PySequence_Size(ON)) { | |
| 77 ; | |
| 78 } else { | |
| 79 pdf_obj *ocgs = pdf_dict_get(ctx, ocp, PDF_NAME(OCGs)); | |
| 80 int i, n = PySequence_Size(ON); | |
| 81 for (i = 0; i < n; i++) { | |
| 82 int xref = 0; | |
| 83 if (JM_INT_ITEM(ON, (Py_ssize_t) i, &xref) == 1) continue; | |
| 84 pdf_obj *ind = pdf_new_indirect(ctx, pdf, xref, 0); | |
| 85 if (pdf_array_contains(ctx, ocgs, ind)) { | |
| 86 pdf_array_push_drop(ctx, onarray, ind); | |
| 87 } else { | |
| 88 pdf_drop_obj(ctx, ind); | |
| 89 } | |
| 90 } | |
| 91 } | |
| 92 pdf_array_push_drop(ctx, configs, D); | |
| 93 } | |
| 94 fz_catch(ctx) { | |
| 95 fz_rethrow(ctx); | |
| 96 } | |
| 97 } | |
| 98 | |
| 99 | |
| 100 //------------------------------------------------------------------------ | |
| 101 // Get OCG arrays from OC configuration | |
| 102 // Returns dict | |
| 103 // {"basestate":name, "on":list, "off":list, "rbg":list, "locked":list} | |
| 104 //------------------------------------------------------------------------ | |
| 105 static PyObject * | |
| 106 JM_get_ocg_arrays_imp(fz_context *ctx, pdf_obj *arr) | |
| 107 { | |
| 108 int i, n; | |
| 109 PyObject *list = PyList_New(0), *item = NULL; | |
| 110 pdf_obj *obj = NULL; | |
| 111 if (pdf_is_array(ctx, arr)) { | |
| 112 n = pdf_array_len(ctx, arr); | |
| 113 for (i = 0; i < n; i++) { | |
| 114 obj = pdf_array_get(ctx, arr, i); | |
| 115 item = Py_BuildValue("i", pdf_to_num(ctx, obj)); | |
| 116 if (!PySequence_Contains(list, item)) { | |
| 117 LIST_APPEND_DROP(list, item); | |
| 118 } else { | |
| 119 Py_DECREF(item); | |
| 120 } | |
| 121 } | |
| 122 } | |
| 123 return list; | |
| 124 } | |
| 125 | |
| 126 PyObject * | |
| 127 JM_get_ocg_arrays(fz_context *ctx, pdf_obj *conf) | |
| 128 { | |
| 129 PyObject *rc = PyDict_New(), *list = NULL, *list1 = NULL; | |
| 130 int i, n; | |
| 131 pdf_obj *arr = NULL, *obj = NULL; | |
| 132 fz_try(ctx) { | |
| 133 arr = pdf_dict_get(ctx, conf, PDF_NAME(ON)); | |
| 134 list = JM_get_ocg_arrays_imp(ctx, arr); | |
| 135 if (PySequence_Size(list)) { | |
| 136 PyDict_SetItemString(rc, "on", list); | |
| 137 } | |
| 138 Py_DECREF(list); | |
| 139 arr = pdf_dict_get(ctx, conf, PDF_NAME(OFF)); | |
| 140 list = JM_get_ocg_arrays_imp(ctx, arr); | |
| 141 if (PySequence_Size(list)) { | |
| 142 PyDict_SetItemString(rc, "off", list); | |
| 143 } | |
| 144 Py_DECREF(list); | |
| 145 arr = pdf_dict_get(ctx, conf, PDF_NAME(Locked)); | |
| 146 list = JM_get_ocg_arrays_imp(ctx, arr); | |
| 147 if (PySequence_Size(list)) { | |
| 148 PyDict_SetItemString(rc, "locked", list); | |
| 149 } | |
| 150 Py_DECREF(list); | |
| 151 list = PyList_New(0); | |
| 152 arr = pdf_dict_get(ctx, conf, PDF_NAME(RBGroups)); | |
| 153 if (pdf_is_array(ctx, arr)) { | |
| 154 n = pdf_array_len(ctx, arr); | |
| 155 for (i = 0; i < n; i++) { | |
| 156 obj = pdf_array_get(ctx, arr, i); | |
| 157 list1 = JM_get_ocg_arrays_imp(ctx, obj); | |
| 158 LIST_APPEND_DROP(list, list1); | |
| 159 } | |
| 160 } | |
| 161 if (PySequence_Size(list)) { | |
| 162 PyDict_SetItemString(rc, "rbgroups", list); | |
| 163 } | |
| 164 Py_DECREF(list); | |
| 165 obj = pdf_dict_get(ctx, conf, PDF_NAME(BaseState)); | |
| 166 | |
| 167 if (obj) { | |
| 168 PyObject *state = NULL; | |
| 169 state = Py_BuildValue("s", pdf_to_name(ctx, obj)); | |
| 170 PyDict_SetItemString(rc, "basestate", state); | |
| 171 Py_DECREF(state); | |
| 172 } | |
| 173 } | |
| 174 fz_always(ctx) { | |
| 175 } | |
| 176 fz_catch(ctx) { | |
| 177 Py_CLEAR(rc); | |
| 178 PyErr_Clear(); | |
| 179 fz_rethrow(ctx); | |
| 180 } | |
| 181 return rc; | |
| 182 } | |
| 183 | |
| 184 | |
| 185 //------------------------------------------------------------------------ | |
| 186 // Set OCG arrays from dict of Python lists | |
| 187 // Works with dict like {"basestate":name, "on":list, "off":list, "rbg":list} | |
| 188 //------------------------------------------------------------------------ | |
| 189 static void | |
| 190 JM_set_ocg_arrays_imp(fz_context *ctx, pdf_obj *arr, PyObject *list) | |
| 191 { | |
| 192 int i, n = PySequence_Size(list); | |
| 193 pdf_obj *obj = NULL; | |
| 194 pdf_document *pdf = pdf_get_bound_document(ctx, arr); | |
| 195 for (i = 0; i < n; i++) { | |
| 196 int xref = 0; | |
| 197 if (JM_INT_ITEM(list, i, &xref) == 1) continue; | |
| 198 obj = pdf_new_indirect(ctx, pdf, xref, 0); | |
| 199 pdf_array_push_drop(ctx, arr, obj); | |
| 200 } | |
| 201 return; | |
| 202 } | |
| 203 | |
| 204 static void | |
| 205 JM_set_ocg_arrays(fz_context *ctx, pdf_obj *conf, const char *basestate, | |
| 206 PyObject *on, PyObject *off, PyObject *rbgroups, PyObject *locked) | |
| 207 { | |
| 208 int i, n; | |
| 209 pdf_obj *arr = NULL, *obj = NULL; | |
| 210 fz_try(ctx) { | |
| 211 if (basestate) { | |
| 212 pdf_dict_put_name(ctx, conf, PDF_NAME(BaseState), basestate); | |
| 213 } | |
| 214 | |
| 215 if (on != Py_None) { | |
| 216 pdf_dict_del(ctx, conf, PDF_NAME(ON)); | |
| 217 if (PySequence_Size(on)) { | |
| 218 arr = pdf_dict_put_array(ctx, conf, PDF_NAME(ON), 1); | |
| 219 JM_set_ocg_arrays_imp(ctx, arr, on); | |
| 220 } | |
| 221 } | |
| 222 | |
| 223 if (off != Py_None) { | |
| 224 pdf_dict_del(ctx, conf, PDF_NAME(OFF)); | |
| 225 if (PySequence_Size(off)) { | |
| 226 arr = pdf_dict_put_array(ctx, conf, PDF_NAME(OFF), 1); | |
| 227 JM_set_ocg_arrays_imp(ctx, arr, off); | |
| 228 } | |
| 229 } | |
| 230 | |
| 231 if (locked != Py_None) { | |
| 232 pdf_dict_del(ctx, conf, PDF_NAME(Locked)); | |
| 233 if (PySequence_Size(locked)) { | |
| 234 arr = pdf_dict_put_array(ctx, conf, PDF_NAME(Locked), 1); | |
| 235 JM_set_ocg_arrays_imp(ctx, arr, locked); | |
| 236 } | |
| 237 } | |
| 238 | |
| 239 if (rbgroups != Py_None) { | |
| 240 pdf_dict_del(ctx, conf, PDF_NAME(RBGroups)); | |
| 241 if (PySequence_Size(rbgroups)) { | |
| 242 arr = pdf_dict_put_array(ctx, conf, PDF_NAME(RBGroups), 1); | |
| 243 n = PySequence_Size(rbgroups); | |
| 244 for (i = 0; i < n; i++) { | |
| 245 PyObject *item0 = PySequence_ITEM(rbgroups, i); | |
| 246 obj = pdf_array_push_array(ctx, arr, 1); | |
| 247 JM_set_ocg_arrays_imp(ctx, obj, item0); | |
| 248 Py_DECREF(item0); | |
| 249 } | |
| 250 } | |
| 251 } | |
| 252 } | |
| 253 fz_catch(ctx) { | |
| 254 fz_rethrow(ctx); | |
| 255 } | |
| 256 return; | |
| 257 } | |
| 258 | |
| 259 | |
| 260 //------------------------------------------------------------------------ | |
| 261 // Return the items of Resources/Properties (used for Marked Content) | |
| 262 // Argument may be e.g. a page object or a Form XObject | |
| 263 //------------------------------------------------------------------------ | |
| 264 PyObject * | |
| 265 JM_get_resource_properties(fz_context *ctx, pdf_obj *ref) | |
| 266 { | |
| 267 PyObject *rc = NULL; | |
| 268 fz_try(ctx) { | |
| 269 pdf_obj *properties = pdf_dict_getl(ctx, ref, | |
| 270 PDF_NAME(Resources), | |
| 271 PDF_NAME(Properties), NULL); | |
| 272 if (!properties) { | |
| 273 rc = PyTuple_New(0); | |
| 274 } else { | |
| 275 int i, n = pdf_dict_len(ctx, properties); | |
| 276 if (n < 1) { | |
| 277 rc = PyTuple_New(0); | |
| 278 goto finished; | |
| 279 } | |
| 280 rc = PyTuple_New(n); | |
| 281 for (i = 0; i < n; i++) { | |
| 282 pdf_obj *key = pdf_dict_get_key(ctx, properties, i); | |
| 283 pdf_obj *val = pdf_dict_get_val(ctx, properties, i); | |
| 284 const char *c = pdf_to_name(ctx, key); | |
| 285 int xref = pdf_to_num(ctx, val); | |
| 286 PyTuple_SET_ITEM(rc, i, Py_BuildValue("si", c, xref)); | |
| 287 } | |
| 288 } | |
| 289 finished:; | |
| 290 } | |
| 291 fz_catch(ctx) { | |
| 292 Py_CLEAR(rc); | |
| 293 fz_rethrow(ctx); | |
| 294 } | |
| 295 return rc; | |
| 296 } | |
| 297 | |
| 298 | |
| 299 //------------------------------------------------------------------------ | |
| 300 // Insert an item into Resources/Properties (used for Marked Content) | |
| 301 // Arguments: | |
| 302 // (1) e.g. page object, Form XObject | |
| 303 // (2) marked content name | |
| 304 // (3) xref of the referenced object (insert as indirect reference) | |
| 305 //------------------------------------------------------------------------ | |
| 306 void | |
| 307 JM_set_resource_property(fz_context *ctx, pdf_obj *ref, const char *name, int xref) | |
| 308 { | |
| 309 pdf_obj *ind = NULL; | |
| 310 pdf_obj *properties = NULL; | |
| 311 pdf_document *pdf = pdf_get_bound_document(ctx, ref); | |
| 312 pdf_obj *name2 = NULL; | |
| 313 fz_var(ind); | |
| 314 fz_var(name2); | |
| 315 fz_try(ctx) { | |
| 316 ind = pdf_new_indirect(ctx, pdf, xref, 0); | |
| 317 if (!ind) { | |
| 318 RAISEPY(ctx, MSG_BAD_XREF, PyExc_ValueError); | |
| 319 } | |
| 320 pdf_obj *resources = pdf_dict_get(ctx, ref, PDF_NAME(Resources)); | |
| 321 if (!resources) { | |
| 322 resources = pdf_dict_put_dict(ctx, ref, PDF_NAME(Resources), 1); | |
| 323 } | |
| 324 properties = pdf_dict_get(ctx, resources, PDF_NAME(Properties)); | |
| 325 if (!properties) { | |
| 326 properties = pdf_dict_put_dict(ctx, resources, PDF_NAME(Properties), 1); | |
| 327 } | |
| 328 name2 = pdf_new_name(ctx, name); | |
| 329 pdf_dict_put(ctx, properties, name2, ind); | |
| 330 } | |
| 331 fz_always(ctx) { | |
| 332 pdf_drop_obj(ctx, ind); | |
| 333 pdf_drop_obj(ctx, name2); | |
| 334 } | |
| 335 fz_catch(ctx) { | |
| 336 fz_rethrow(ctx); | |
| 337 } | |
| 338 return; | |
| 339 } | |
| 340 | |
| 341 | |
| 342 //------------------------------------------------------------------------ | |
| 343 // Add OC object reference to a dictionary | |
| 344 //------------------------------------------------------------------------ | |
| 345 void | |
| 346 JM_add_oc_object(fz_context *ctx, pdf_document *pdf, pdf_obj *ref, int xref) | |
| 347 { | |
| 348 pdf_obj *indobj = NULL; | |
| 349 fz_try(ctx) { | |
| 350 indobj = pdf_new_indirect(ctx, pdf, xref, 0); | |
| 351 if (!pdf_is_dict(ctx, indobj)) { | |
| 352 RAISEPY(ctx, MSG_BAD_OC_REF, PyExc_ValueError); | |
| 353 } | |
| 354 pdf_obj *type = pdf_dict_get(ctx, indobj, PDF_NAME(Type)); | |
| 355 if (pdf_objcmp(ctx, type, PDF_NAME(OCG)) == 0 || | |
| 356 pdf_objcmp(ctx, type, PDF_NAME(OCMD)) == 0) { | |
| 357 pdf_dict_put(ctx, ref, PDF_NAME(OC), indobj); | |
| 358 } else { | |
| 359 RAISEPY(ctx, MSG_BAD_OC_REF, PyExc_ValueError); | |
| 360 } | |
| 361 } | |
| 362 fz_always(ctx) { | |
| 363 pdf_drop_obj(ctx, indobj); | |
| 364 } | |
| 365 fz_catch(ctx) { | |
| 366 fz_rethrow(ctx); | |
| 367 } | |
| 368 } | |
| 369 | |
| 370 | |
| 371 //------------------------------------------------------------------------- | |
| 372 // Store info of a font in Python list | |
| 373 //------------------------------------------------------------------------- | |
| 374 int JM_gather_fonts(fz_context *ctx, pdf_document *pdf, pdf_obj *dict, | |
| 375 PyObject *fontlist, int stream_xref) | |
| 376 { | |
| 377 int i, n, rc = 1; | |
| 378 n = pdf_dict_len(ctx, dict); | |
| 379 for (i = 0; i < n; i++) { | |
| 380 pdf_obj *fontdict = NULL; | |
| 381 pdf_obj *subtype = NULL; | |
| 382 pdf_obj *basefont = NULL; | |
| 383 pdf_obj *name = NULL; | |
| 384 pdf_obj *refname = NULL; | |
| 385 pdf_obj *encoding = NULL; | |
| 386 | |
| 387 refname = pdf_dict_get_key(ctx, dict, i); | |
| 388 fontdict = pdf_dict_get_val(ctx, dict, i); | |
| 389 if (!pdf_is_dict(ctx, fontdict)) { | |
| 390 fz_warn(ctx, "'%s' is no font dict (%d 0 R)", | |
| 391 pdf_to_name(ctx, refname), pdf_to_num(ctx, fontdict)); | |
| 392 continue; | |
| 393 } | |
| 394 | |
| 395 subtype = pdf_dict_get(ctx, fontdict, PDF_NAME(Subtype)); | |
| 396 basefont = pdf_dict_get(ctx, fontdict, PDF_NAME(BaseFont)); | |
| 397 if (!basefont || pdf_is_null(ctx, basefont)) { | |
| 398 name = pdf_dict_get(ctx, fontdict, PDF_NAME(Name)); | |
| 399 } else { | |
| 400 name = basefont; | |
| 401 } | |
| 402 encoding = pdf_dict_get(ctx, fontdict, PDF_NAME(Encoding)); | |
| 403 if (pdf_is_dict(ctx, encoding)) { | |
| 404 encoding = pdf_dict_get(ctx, encoding, PDF_NAME(BaseEncoding)); | |
| 405 } | |
| 406 int xref = pdf_to_num(ctx, fontdict); | |
| 407 char *ext = "n/a"; | |
| 408 if (xref) { | |
| 409 ext = JM_get_fontextension(ctx, pdf, xref); | |
| 410 } | |
| 411 PyObject *entry = PyTuple_New(7); | |
| 412 PyTuple_SET_ITEM(entry, 0, Py_BuildValue("i", xref)); | |
| 413 PyTuple_SET_ITEM(entry, 1, Py_BuildValue("s", ext)); | |
| 414 PyTuple_SET_ITEM(entry, 2, Py_BuildValue("s", pdf_to_name(ctx, subtype))); | |
| 415 PyTuple_SET_ITEM(entry, 3, JM_EscapeStrFromStr(pdf_to_name(ctx, name))); | |
| 416 PyTuple_SET_ITEM(entry, 4, Py_BuildValue("s", pdf_to_name(ctx, refname))); | |
| 417 PyTuple_SET_ITEM(entry, 5, Py_BuildValue("s", pdf_to_name(ctx, encoding))); | |
| 418 PyTuple_SET_ITEM(entry, 6, Py_BuildValue("i", stream_xref)); | |
| 419 LIST_APPEND_DROP(fontlist, entry); | |
| 420 } | |
| 421 return rc; | |
| 422 } | |
| 423 | |
| 424 //------------------------------------------------------------------------- | |
| 425 // Store info of an image in Python list | |
| 426 //------------------------------------------------------------------------- | |
| 427 int JM_gather_images(fz_context *ctx, pdf_document *doc, pdf_obj *dict, | |
| 428 PyObject *imagelist, int stream_xref) | |
| 429 { | |
| 430 int i, n, rc = 1; | |
| 431 n = pdf_dict_len(ctx, dict); | |
| 432 for (i = 0; i < n; i++) { | |
| 433 pdf_obj *imagedict, *smask; | |
| 434 pdf_obj *refname = NULL; | |
| 435 pdf_obj *type; | |
| 436 pdf_obj *width; | |
| 437 pdf_obj *height; | |
| 438 pdf_obj *bpc = NULL; | |
| 439 pdf_obj *filter = NULL; | |
| 440 pdf_obj *cs = NULL; | |
| 441 pdf_obj *altcs; | |
| 442 | |
| 443 refname = pdf_dict_get_key(ctx, dict, i); | |
| 444 imagedict = pdf_dict_get_val(ctx, dict, i); | |
| 445 if (!pdf_is_dict(ctx, imagedict)) { | |
| 446 fz_warn(ctx, "'%s' is no image dict (%d 0 R)", | |
| 447 pdf_to_name(ctx, refname), pdf_to_num(ctx, imagedict)); | |
| 448 continue; | |
| 449 } | |
| 450 | |
| 451 type = pdf_dict_get(ctx, imagedict, PDF_NAME(Subtype)); | |
| 452 if (!pdf_name_eq(ctx, type, PDF_NAME(Image))) | |
| 453 continue; | |
| 454 | |
| 455 int xref = pdf_to_num(ctx, imagedict); | |
| 456 int gen = 0; | |
| 457 smask = pdf_dict_geta(ctx, imagedict, PDF_NAME(SMask), PDF_NAME(Mask)); | |
| 458 if (smask) | |
| 459 gen = pdf_to_num(ctx, smask); | |
| 460 | |
| 461 filter = pdf_dict_geta(ctx, imagedict, PDF_NAME(Filter), PDF_NAME(F)); | |
| 462 if (pdf_is_array(ctx, filter)) { | |
| 463 filter = pdf_array_get(ctx, filter, 0); | |
| 464 } | |
| 465 | |
| 466 altcs = NULL; | |
| 467 cs = pdf_dict_geta(ctx, imagedict, PDF_NAME(ColorSpace), PDF_NAME(CS)); | |
| 468 if (pdf_is_array(ctx, cs)) { | |
| 469 pdf_obj *cses = cs; | |
| 470 cs = pdf_array_get(ctx, cses, 0); | |
| 471 if (pdf_name_eq(ctx, cs, PDF_NAME(DeviceN)) || | |
| 472 pdf_name_eq(ctx, cs, PDF_NAME(Separation))) { | |
| 473 altcs = pdf_array_get(ctx, cses, 2); | |
| 474 if (pdf_is_array(ctx, altcs)) { | |
| 475 altcs = pdf_array_get(ctx, altcs, 0); | |
| 476 } | |
| 477 } | |
| 478 } | |
| 479 | |
| 480 width = pdf_dict_geta(ctx, imagedict, PDF_NAME(Width), PDF_NAME(W)); | |
| 481 height = pdf_dict_geta(ctx, imagedict, PDF_NAME(Height), PDF_NAME(H)); | |
| 482 bpc = pdf_dict_geta(ctx, imagedict, PDF_NAME(BitsPerComponent), PDF_NAME(BPC)); | |
| 483 | |
| 484 PyObject *entry = PyTuple_New(10); | |
| 485 PyTuple_SET_ITEM(entry, 0, Py_BuildValue("i", xref)); | |
| 486 PyTuple_SET_ITEM(entry, 1, Py_BuildValue("i", gen)); | |
| 487 PyTuple_SET_ITEM(entry, 2, Py_BuildValue("i", pdf_to_int(ctx, width))); | |
| 488 PyTuple_SET_ITEM(entry, 3, Py_BuildValue("i", pdf_to_int(ctx, height))); | |
| 489 PyTuple_SET_ITEM(entry, 4, Py_BuildValue("i", pdf_to_int(ctx, bpc))); | |
| 490 PyTuple_SET_ITEM(entry, 5, JM_EscapeStrFromStr(pdf_to_name(ctx, cs))); | |
| 491 PyTuple_SET_ITEM(entry, 6, JM_EscapeStrFromStr(pdf_to_name(ctx, altcs))); | |
| 492 PyTuple_SET_ITEM(entry, 7, JM_EscapeStrFromStr(pdf_to_name(ctx, refname))); | |
| 493 PyTuple_SET_ITEM(entry, 8, JM_EscapeStrFromStr(pdf_to_name(ctx, filter))); | |
| 494 PyTuple_SET_ITEM(entry, 9, Py_BuildValue("i", stream_xref)); | |
| 495 LIST_APPEND_DROP(imagelist, entry); | |
| 496 } | |
| 497 return rc; | |
| 498 } | |
| 499 | |
| 500 //------------------------------------------------------------------------- | |
| 501 // Store info of a /Form xobject in Python list | |
| 502 //------------------------------------------------------------------------- | |
| 503 int JM_gather_forms(fz_context *ctx, pdf_document *doc, pdf_obj *dict, | |
| 504 PyObject *imagelist, int stream_xref) | |
| 505 { | |
| 506 int i, rc = 1, n = pdf_dict_len(ctx, dict); | |
| 507 fz_rect bbox; | |
| 508 fz_matrix mat; | |
| 509 pdf_obj *o = NULL, *m = NULL; | |
| 510 for (i = 0; i < n; i++) { | |
| 511 pdf_obj *imagedict; | |
| 512 pdf_obj *refname = NULL; | |
| 513 pdf_obj *type; | |
| 514 | |
| 515 refname = pdf_dict_get_key(ctx, dict, i); | |
| 516 imagedict = pdf_dict_get_val(ctx, dict, i); | |
| 517 if (!pdf_is_dict(ctx, imagedict)) { | |
| 518 fz_warn(ctx, "'%s' is no form dict (%d 0 R)", | |
| 519 pdf_to_name(ctx, refname), pdf_to_num(ctx, imagedict)); | |
| 520 continue; | |
| 521 } | |
| 522 | |
| 523 type = pdf_dict_get(ctx, imagedict, PDF_NAME(Subtype)); | |
| 524 if (!pdf_name_eq(ctx, type, PDF_NAME(Form))) | |
| 525 continue; | |
| 526 | |
| 527 o = pdf_dict_get(ctx, imagedict, PDF_NAME(BBox)); | |
| 528 m = pdf_dict_get(ctx, imagedict, PDF_NAME(Matrix)); | |
| 529 if (m) { | |
| 530 mat = pdf_to_matrix(ctx, m); | |
| 531 } else { | |
| 532 mat = fz_identity; | |
| 533 } | |
| 534 if (o) { | |
| 535 bbox = fz_transform_rect(pdf_to_rect(ctx, o), mat); | |
| 536 } else { | |
| 537 bbox = fz_infinite_rect; | |
| 538 } | |
| 539 int xref = pdf_to_num(ctx, imagedict); | |
| 540 | |
| 541 PyObject *entry = PyTuple_New(4); | |
| 542 PyTuple_SET_ITEM(entry, 0, Py_BuildValue("i", xref)); | |
| 543 PyTuple_SET_ITEM(entry, 1, Py_BuildValue("s", pdf_to_name(ctx, refname))); | |
| 544 PyTuple_SET_ITEM(entry, 2, Py_BuildValue("i", stream_xref)); | |
| 545 PyTuple_SET_ITEM(entry, 3, JM_py_from_rect(bbox)); | |
| 546 LIST_APPEND_DROP(imagelist, entry); | |
| 547 } | |
| 548 return rc; | |
| 549 } | |
| 550 | |
| 551 //------------------------------------------------------------------------- | |
| 552 // Step through /Resources, looking up image, xobject or font information | |
| 553 //------------------------------------------------------------------------- | |
| 554 void JM_scan_resources(fz_context *ctx, pdf_document *pdf, pdf_obj *rsrc, | |
| 555 PyObject *liste, int what, int stream_xref, | |
| 556 PyObject *tracer) | |
| 557 { | |
| 558 pdf_obj *font, *xobj, *subrsrc; | |
| 559 int i, n, sxref; | |
| 560 if (pdf_mark_obj(ctx, rsrc)) { | |
| 561 fz_warn(ctx, "Circular dependencies! Consider page cleaning."); | |
| 562 return; // Circular dependencies! | |
| 563 } | |
| 564 | |
| 565 fz_try(ctx) { | |
| 566 | |
| 567 xobj = pdf_dict_get(ctx, rsrc, PDF_NAME(XObject)); | |
| 568 | |
| 569 if (what == 1) { // lookup fonts | |
| 570 font = pdf_dict_get(ctx, rsrc, PDF_NAME(Font)); | |
| 571 JM_gather_fonts(ctx, pdf, font, liste, stream_xref); | |
| 572 } else if (what == 2) { // look up images | |
| 573 JM_gather_images(ctx, pdf, xobj, liste, stream_xref); | |
| 574 } else if (what == 3) { // look up form xobjects | |
| 575 JM_gather_forms(ctx, pdf, xobj, liste, stream_xref); | |
| 576 } else { // should never happen | |
| 577 goto finished; | |
| 578 } | |
| 579 | |
| 580 // check if we need to recurse into Form XObjects | |
| 581 n = pdf_dict_len(ctx, xobj); | |
| 582 for (i = 0; i < n; i++) { | |
| 583 pdf_obj *obj = pdf_dict_get_val(ctx, xobj, i); | |
| 584 if (pdf_is_stream(ctx, obj)) { | |
| 585 sxref = pdf_to_num(ctx, obj); | |
| 586 } else { | |
| 587 sxref = 0; | |
| 588 } | |
| 589 subrsrc = pdf_dict_get(ctx, obj, PDF_NAME(Resources)); | |
| 590 if (subrsrc) { | |
| 591 PyObject *sxref_t = Py_BuildValue("i", sxref); | |
| 592 if (PySequence_Contains(tracer, sxref_t) == 0) { | |
| 593 LIST_APPEND_DROP(tracer, sxref_t); | |
| 594 JM_scan_resources(ctx, pdf, subrsrc, liste, what, sxref, tracer); | |
| 595 } else { | |
| 596 Py_DECREF(sxref_t); | |
| 597 PyErr_Clear(); | |
| 598 fz_warn(ctx, "Circular dependencies! Consider page cleaning."); | |
| 599 goto finished; | |
| 600 } | |
| 601 } | |
| 602 } | |
| 603 finished:; | |
| 604 } | |
| 605 fz_always(ctx) { | |
| 606 pdf_unmark_obj(ctx, rsrc); | |
| 607 } | |
| 608 fz_catch(ctx) { | |
| 609 fz_rethrow(ctx); | |
| 610 } | |
| 611 } | |
| 612 %} |
