Mercurial > hgrepos > Python2 > PyMuPDF
comparison src_classic/helper-xobject.i @ 1:1d09e1dec1d9 upstream
ADD: PyMuPDF v1.26.4: the original sdist.
It does not yet contain MuPDF. This normally will be downloaded when
building PyMuPDF.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:37:51 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 1:1d09e1dec1d9 |
|---|---|
| 1 %{ | |
| 2 /* | |
| 3 # ------------------------------------------------------------------------ | |
| 4 # Copyright 2020-2022, Harald Lieder, mailto:harald.lieder@outlook.com | |
| 5 # License: GNU AFFERO GPL 3.0, https://www.gnu.org/licenses/agpl-3.0.html | |
| 6 # | |
| 7 # Part of "PyMuPDF", a Python binding for "MuPDF" (http://mupdf.com), a | |
| 8 # lightweight PDF, XPS, and E-book viewer, renderer and toolkit which is | |
| 9 # maintained and developed by Artifex Software, Inc. https://artifex.com. | |
| 10 # ------------------------------------------------------------------------ | |
| 11 */ | |
| 12 //----------------------------------------------------------------------------- | |
| 13 // Read and concatenate a PDF page's /Conents object(s) in a buffer | |
| 14 //----------------------------------------------------------------------------- | |
| 15 fz_buffer *JM_read_contents(fz_context * ctx, pdf_obj * pageref) | |
| 16 { | |
| 17 fz_buffer *res = NULL, *nres = NULL; | |
| 18 int i; | |
| 19 fz_try(ctx) { | |
| 20 pdf_obj *contents = pdf_dict_get(ctx, pageref, PDF_NAME(Contents)); | |
| 21 if (pdf_is_array(ctx, contents)) { | |
| 22 res = fz_new_buffer(ctx, 1024); | |
| 23 for (i = 0; i < pdf_array_len(ctx, contents); i++) { | |
| 24 nres = pdf_load_stream(ctx, pdf_array_get(ctx, contents, i)); | |
| 25 fz_append_buffer(ctx, res, nres); | |
| 26 fz_drop_buffer(ctx, nres); | |
| 27 } | |
| 28 } | |
| 29 else if (contents) { | |
| 30 res = pdf_load_stream(ctx, contents); | |
| 31 } | |
| 32 } | |
| 33 fz_catch(ctx) { | |
| 34 fz_rethrow(ctx); | |
| 35 } | |
| 36 return res; | |
| 37 } | |
| 38 | |
| 39 //----------------------------------------------------------------------------- | |
| 40 // Make an XObject from a PDF page | |
| 41 // For a positive xref assume that its object can be used instead | |
| 42 //----------------------------------------------------------------------------- | |
| 43 pdf_obj *JM_xobject_from_page(fz_context * ctx, pdf_document * pdfout, fz_page * fsrcpage, int xref, pdf_graft_map *gmap) | |
| 44 { | |
| 45 pdf_obj *xobj1, *resources = NULL, *o, *spageref; | |
| 46 fz_try(ctx) { | |
| 47 if (xref > 0) { | |
| 48 xobj1 = pdf_new_indirect(ctx, pdfout, xref, 0); | |
| 49 } else { | |
| 50 fz_buffer *res = NULL; | |
| 51 fz_rect mediabox; | |
| 52 pdf_page *srcpage = pdf_page_from_fz_page(ctx, fsrcpage); | |
| 53 spageref = srcpage->obj; | |
| 54 mediabox = pdf_to_rect(ctx, pdf_dict_get_inheritable(ctx, spageref, PDF_NAME(MediaBox))); | |
| 55 // Deep-copy resources object of source page | |
| 56 o = pdf_dict_get_inheritable(ctx, spageref, PDF_NAME(Resources)); | |
| 57 if (gmap) // use graftmap when possible | |
| 58 resources = pdf_graft_mapped_object(ctx, gmap, o); | |
| 59 else | |
| 60 resources = pdf_graft_object(ctx, pdfout, o); | |
| 61 | |
| 62 // get spgage contents source | |
| 63 res = JM_read_contents(ctx, spageref); | |
| 64 | |
| 65 //------------------------------------------------------------- | |
| 66 // create XObject representing the source page | |
| 67 //------------------------------------------------------------- | |
| 68 xobj1 = pdf_new_xobject(ctx, pdfout, mediabox, fz_identity, NULL, res); | |
| 69 // store spage contents | |
| 70 JM_update_stream(ctx, pdfout, xobj1, res, 1); | |
| 71 fz_drop_buffer(ctx, res); | |
| 72 | |
| 73 // store spage resources | |
| 74 pdf_dict_put_drop(ctx, xobj1, PDF_NAME(Resources), resources); | |
| 75 } | |
| 76 } | |
| 77 fz_catch(ctx) { | |
| 78 fz_rethrow(ctx); | |
| 79 } | |
| 80 return xobj1; | |
| 81 } | |
| 82 | |
| 83 //----------------------------------------------------------------------------- | |
| 84 // Insert a buffer as a new separate /Contents object of a page. | |
| 85 // 1. Create a new stream object from buffer 'newcont' | |
| 86 // 2. If /Contents already is an array, then just prepend or append this object | |
| 87 // 3. Else, create new array and put old content obj and this object into it. | |
| 88 // If the page had no /Contents before, just create a 1-item array. | |
| 89 //----------------------------------------------------------------------------- | |
| 90 int JM_insert_contents(fz_context * ctx, pdf_document * pdf, | |
| 91 pdf_obj * pageref, fz_buffer * newcont, int overlay) | |
| 92 { | |
| 93 int xref = 0; | |
| 94 pdf_obj *newconts = NULL; | |
| 95 pdf_obj *carr = NULL; | |
| 96 fz_var(newconts); | |
| 97 fz_var(carr); | |
| 98 fz_try(ctx) { | |
| 99 pdf_obj *contents = pdf_dict_get(ctx, pageref, PDF_NAME(Contents)); | |
| 100 newconts = pdf_add_stream(ctx, pdf, newcont, NULL, 0); | |
| 101 xref = pdf_to_num(ctx, newconts); | |
| 102 if (pdf_is_array(ctx, contents)) { | |
| 103 if (overlay) // append new object | |
| 104 pdf_array_push(ctx, contents, newconts); | |
| 105 else // prepend new object | |
| 106 pdf_array_insert(ctx, contents, newconts, 0); | |
| 107 } else { | |
| 108 carr = pdf_new_array(ctx, pdf, 5); | |
| 109 if (overlay) { | |
| 110 if (contents) | |
| 111 pdf_array_push(ctx, carr, contents); | |
| 112 pdf_array_push(ctx, carr, newconts); | |
| 113 } else { | |
| 114 pdf_array_push(ctx, carr, newconts); | |
| 115 if (contents) | |
| 116 pdf_array_push(ctx, carr, contents); | |
| 117 } | |
| 118 pdf_dict_put(ctx, pageref, PDF_NAME(Contents), carr); | |
| 119 } | |
| 120 } | |
| 121 fz_always(ctx) { | |
| 122 pdf_drop_obj(ctx, newconts); | |
| 123 pdf_drop_obj(ctx, carr); | |
| 124 } | |
| 125 fz_catch(ctx) { | |
| 126 fz_rethrow(ctx); | |
| 127 } | |
| 128 return xref; | |
| 129 } | |
| 130 | |
| 131 static void show(const char* prefix, PyObject* obj) | |
| 132 { | |
| 133 if (!obj) | |
| 134 { | |
| 135 printf( "%s <null>\n", prefix); | |
| 136 return; | |
| 137 } | |
| 138 PyObject* obj_repr = PyObject_Repr( obj); | |
| 139 PyObject* obj_repr_u = PyUnicode_AsEncodedString( obj_repr, "utf-8", "~E~"); | |
| 140 const char* obj_repr_s = PyString_AsString( obj_repr_u); | |
| 141 printf( "%s%s\n", prefix, obj_repr_s); | |
| 142 fflush(stdout); | |
| 143 } | |
| 144 | |
| 145 static PyObject *g_img_info = NULL; | |
| 146 static fz_matrix g_img_info_matrix = {0}; | |
| 147 | |
| 148 static fz_image * | |
| 149 JM_image_filter(fz_context *ctx, void *opaque, fz_matrix ctm, const char *name, fz_image *image) | |
| 150 { | |
| 151 fz_quad q = fz_transform_quad(fz_quad_from_rect(fz_unit_rect), ctm); | |
| 152 #if FZ_VERSION_MAJOR == 1 && FZ_VERSION_MINOR >= 22 | |
| 153 q = fz_transform_quad( q, g_img_info_matrix); | |
| 154 #endif | |
| 155 PyObject *temp = Py_BuildValue("sN", name, JM_py_from_quad(q)); | |
| 156 | |
| 157 LIST_APPEND_DROP(g_img_info, temp); | |
| 158 return image; | |
| 159 } | |
| 160 | |
| 161 #if FZ_VERSION_MAJOR == 1 && FZ_VERSION_MINOR >= 22 | |
| 162 | |
| 163 static PyObject * | |
| 164 JM_image_reporter(fz_context *ctx, pdf_page *page) | |
| 165 { | |
| 166 pdf_document *doc = page->doc; | |
| 167 | |
| 168 pdf_page_transform(ctx, page, NULL, &g_img_info_matrix); | |
| 169 pdf_filter_options filter_options = {0}; | |
| 170 filter_options.recurse = 0; | |
| 171 filter_options.instance_forms = 1; | |
| 172 filter_options.ascii = 1; | |
| 173 filter_options.no_update = 1; | |
| 174 | |
| 175 pdf_sanitize_filter_options sanitize_filter_options = {0}; | |
| 176 sanitize_filter_options.opaque = page; | |
| 177 sanitize_filter_options.image_filter = JM_image_filter; | |
| 178 | |
| 179 pdf_filter_factory filter_factory[2] = {0}; | |
| 180 filter_factory[0].filter = pdf_new_sanitize_filter; | |
| 181 filter_factory[0].options = &sanitize_filter_options; | |
| 182 | |
| 183 filter_options.filters = filter_factory; // was & | |
| 184 | |
| 185 g_img_info = PyList_New(0); | |
| 186 | |
| 187 pdf_filter_page_contents(ctx, doc, page, &filter_options); | |
| 188 | |
| 189 PyObject *rc = PySequence_Tuple(g_img_info); | |
| 190 Py_CLEAR(g_img_info); | |
| 191 | |
| 192 return rc; | |
| 193 } | |
| 194 | |
| 195 #else | |
| 196 | |
| 197 void | |
| 198 JM_filter_content_stream( | |
| 199 fz_context * ctx, | |
| 200 pdf_document * doc, | |
| 201 pdf_obj * in_stm, | |
| 202 pdf_obj * in_res, | |
| 203 fz_matrix transform, | |
| 204 pdf_filter_options * filter, | |
| 205 int struct_parents, | |
| 206 fz_buffer **out_buf, | |
| 207 pdf_obj **out_res) | |
| 208 { | |
| 209 pdf_processor *proc_buffer = NULL; | |
| 210 pdf_processor *proc_filter = NULL; | |
| 211 | |
| 212 fz_var(proc_buffer); | |
| 213 fz_var(proc_filter); | |
| 214 | |
| 215 *out_buf = NULL; | |
| 216 *out_res = NULL; | |
| 217 | |
| 218 fz_try(ctx) { | |
| 219 *out_buf = fz_new_buffer(ctx, 1024); | |
| 220 proc_buffer = pdf_new_buffer_processor(ctx, *out_buf, filter->ascii); | |
| 221 if (filter->sanitize) { | |
| 222 *out_res = pdf_new_dict(ctx, doc, 1); | |
| 223 proc_filter = pdf_new_filter_processor(ctx, doc, proc_buffer, in_res, *out_res, struct_parents, transform, filter); | |
| 224 pdf_process_contents(ctx, proc_filter, doc, in_res, in_stm, NULL); | |
| 225 pdf_close_processor(ctx, proc_filter); | |
| 226 } else { | |
| 227 *out_res = pdf_keep_obj(ctx, in_res); | |
| 228 pdf_process_contents(ctx, proc_buffer, doc, in_res, in_stm, NULL); | |
| 229 } | |
| 230 pdf_close_processor(ctx, proc_buffer); | |
| 231 } | |
| 232 fz_always(ctx) { | |
| 233 pdf_drop_processor(ctx, proc_filter); | |
| 234 pdf_drop_processor(ctx, proc_buffer); | |
| 235 } | |
| 236 fz_catch(ctx) { | |
| 237 fz_drop_buffer(ctx, *out_buf); | |
| 238 *out_buf = NULL; | |
| 239 pdf_drop_obj(ctx, *out_res); | |
| 240 *out_res = NULL; | |
| 241 fz_rethrow(ctx); | |
| 242 } | |
| 243 } | |
| 244 | |
| 245 PyObject * | |
| 246 JM_image_reporter(fz_context *ctx, pdf_page *page) | |
| 247 { | |
| 248 pdf_document *doc = page->doc; | |
| 249 pdf_filter_options filter; | |
| 250 memset(&filter, 0, sizeof filter); | |
| 251 filter.opaque = page; | |
| 252 filter.text_filter = NULL; | |
| 253 filter.image_filter = JM_image_filter; | |
| 254 filter.end_page = NULL; | |
| 255 filter.recurse = 0; | |
| 256 filter.instance_forms = 1; | |
| 257 filter.sanitize = 1; | |
| 258 filter.ascii = 1; | |
| 259 | |
| 260 pdf_obj *contents, *old_res; | |
| 261 pdf_obj *struct_parents_obj; | |
| 262 pdf_obj *new_res; | |
| 263 fz_buffer *buffer; | |
| 264 int struct_parents; | |
| 265 fz_matrix ctm = fz_identity; | |
| 266 pdf_page_transform(ctx, page, NULL, &ctm); | |
| 267 struct_parents_obj = pdf_dict_get(ctx, page->obj, PDF_NAME(StructParents)); | |
| 268 struct_parents = -1; | |
| 269 if (pdf_is_number(ctx, struct_parents_obj)) | |
| 270 struct_parents = pdf_to_int(ctx, struct_parents_obj); | |
| 271 | |
| 272 contents = pdf_page_contents(ctx, page); | |
| 273 old_res = pdf_page_resources(ctx, page); | |
| 274 g_img_info = PyList_New(0); | |
| 275 JM_filter_content_stream(ctx, doc, contents, old_res, ctm, &filter, struct_parents, &buffer, &new_res); | |
| 276 fz_drop_buffer(ctx, buffer); | |
| 277 pdf_drop_obj(ctx, new_res); | |
| 278 PyObject *rc = PySequence_Tuple(g_img_info); | |
| 279 Py_CLEAR(g_img_info); | |
| 280 return rc; | |
| 281 } | |
| 282 | |
| 283 #endif | |
| 284 | |
| 285 %} |
