comparison src_classic/helper-xobject.i @ 1:1d09e1dec1d9 upstream

ADD: PyMuPDF v1.26.4: the original sdist. It does not yet contain MuPDF. This normally will be downloaded when building PyMuPDF.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:37:51 +0200
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 1:1d09e1dec1d9
1 %{
2 /*
3 # ------------------------------------------------------------------------
4 # Copyright 2020-2022, Harald Lieder, mailto:harald.lieder@outlook.com
5 # License: GNU AFFERO GPL 3.0, https://www.gnu.org/licenses/agpl-3.0.html
6 #
7 # Part of "PyMuPDF", a Python binding for "MuPDF" (http://mupdf.com), a
8 # lightweight PDF, XPS, and E-book viewer, renderer and toolkit which is
9 # maintained and developed by Artifex Software, Inc. https://artifex.com.
10 # ------------------------------------------------------------------------
11 */
12 //-----------------------------------------------------------------------------
13 // Read and concatenate a PDF page's /Conents object(s) in a buffer
14 //-----------------------------------------------------------------------------
15 fz_buffer *JM_read_contents(fz_context * ctx, pdf_obj * pageref)
16 {
17 fz_buffer *res = NULL, *nres = NULL;
18 int i;
19 fz_try(ctx) {
20 pdf_obj *contents = pdf_dict_get(ctx, pageref, PDF_NAME(Contents));
21 if (pdf_is_array(ctx, contents)) {
22 res = fz_new_buffer(ctx, 1024);
23 for (i = 0; i < pdf_array_len(ctx, contents); i++) {
24 nres = pdf_load_stream(ctx, pdf_array_get(ctx, contents, i));
25 fz_append_buffer(ctx, res, nres);
26 fz_drop_buffer(ctx, nres);
27 }
28 }
29 else if (contents) {
30 res = pdf_load_stream(ctx, contents);
31 }
32 }
33 fz_catch(ctx) {
34 fz_rethrow(ctx);
35 }
36 return res;
37 }
38
39 //-----------------------------------------------------------------------------
40 // Make an XObject from a PDF page
41 // For a positive xref assume that its object can be used instead
42 //-----------------------------------------------------------------------------
43 pdf_obj *JM_xobject_from_page(fz_context * ctx, pdf_document * pdfout, fz_page * fsrcpage, int xref, pdf_graft_map *gmap)
44 {
45 pdf_obj *xobj1, *resources = NULL, *o, *spageref;
46 fz_try(ctx) {
47 if (xref > 0) {
48 xobj1 = pdf_new_indirect(ctx, pdfout, xref, 0);
49 } else {
50 fz_buffer *res = NULL;
51 fz_rect mediabox;
52 pdf_page *srcpage = pdf_page_from_fz_page(ctx, fsrcpage);
53 spageref = srcpage->obj;
54 mediabox = pdf_to_rect(ctx, pdf_dict_get_inheritable(ctx, spageref, PDF_NAME(MediaBox)));
55 // Deep-copy resources object of source page
56 o = pdf_dict_get_inheritable(ctx, spageref, PDF_NAME(Resources));
57 if (gmap) // use graftmap when possible
58 resources = pdf_graft_mapped_object(ctx, gmap, o);
59 else
60 resources = pdf_graft_object(ctx, pdfout, o);
61
62 // get spgage contents source
63 res = JM_read_contents(ctx, spageref);
64
65 //-------------------------------------------------------------
66 // create XObject representing the source page
67 //-------------------------------------------------------------
68 xobj1 = pdf_new_xobject(ctx, pdfout, mediabox, fz_identity, NULL, res);
69 // store spage contents
70 JM_update_stream(ctx, pdfout, xobj1, res, 1);
71 fz_drop_buffer(ctx, res);
72
73 // store spage resources
74 pdf_dict_put_drop(ctx, xobj1, PDF_NAME(Resources), resources);
75 }
76 }
77 fz_catch(ctx) {
78 fz_rethrow(ctx);
79 }
80 return xobj1;
81 }
82
83 //-----------------------------------------------------------------------------
84 // Insert a buffer as a new separate /Contents object of a page.
85 // 1. Create a new stream object from buffer 'newcont'
86 // 2. If /Contents already is an array, then just prepend or append this object
87 // 3. Else, create new array and put old content obj and this object into it.
88 // If the page had no /Contents before, just create a 1-item array.
89 //-----------------------------------------------------------------------------
90 int JM_insert_contents(fz_context * ctx, pdf_document * pdf,
91 pdf_obj * pageref, fz_buffer * newcont, int overlay)
92 {
93 int xref = 0;
94 pdf_obj *newconts = NULL;
95 pdf_obj *carr = NULL;
96 fz_var(newconts);
97 fz_var(carr);
98 fz_try(ctx) {
99 pdf_obj *contents = pdf_dict_get(ctx, pageref, PDF_NAME(Contents));
100 newconts = pdf_add_stream(ctx, pdf, newcont, NULL, 0);
101 xref = pdf_to_num(ctx, newconts);
102 if (pdf_is_array(ctx, contents)) {
103 if (overlay) // append new object
104 pdf_array_push(ctx, contents, newconts);
105 else // prepend new object
106 pdf_array_insert(ctx, contents, newconts, 0);
107 } else {
108 carr = pdf_new_array(ctx, pdf, 5);
109 if (overlay) {
110 if (contents)
111 pdf_array_push(ctx, carr, contents);
112 pdf_array_push(ctx, carr, newconts);
113 } else {
114 pdf_array_push(ctx, carr, newconts);
115 if (contents)
116 pdf_array_push(ctx, carr, contents);
117 }
118 pdf_dict_put(ctx, pageref, PDF_NAME(Contents), carr);
119 }
120 }
121 fz_always(ctx) {
122 pdf_drop_obj(ctx, newconts);
123 pdf_drop_obj(ctx, carr);
124 }
125 fz_catch(ctx) {
126 fz_rethrow(ctx);
127 }
128 return xref;
129 }
130
131 static void show(const char* prefix, PyObject* obj)
132 {
133 if (!obj)
134 {
135 printf( "%s <null>\n", prefix);
136 return;
137 }
138 PyObject* obj_repr = PyObject_Repr( obj);
139 PyObject* obj_repr_u = PyUnicode_AsEncodedString( obj_repr, "utf-8", "~E~");
140 const char* obj_repr_s = PyString_AsString( obj_repr_u);
141 printf( "%s%s\n", prefix, obj_repr_s);
142 fflush(stdout);
143 }
144
145 static PyObject *g_img_info = NULL;
146 static fz_matrix g_img_info_matrix = {0};
147
148 static fz_image *
149 JM_image_filter(fz_context *ctx, void *opaque, fz_matrix ctm, const char *name, fz_image *image)
150 {
151 fz_quad q = fz_transform_quad(fz_quad_from_rect(fz_unit_rect), ctm);
152 #if FZ_VERSION_MAJOR == 1 && FZ_VERSION_MINOR >= 22
153 q = fz_transform_quad( q, g_img_info_matrix);
154 #endif
155 PyObject *temp = Py_BuildValue("sN", name, JM_py_from_quad(q));
156
157 LIST_APPEND_DROP(g_img_info, temp);
158 return image;
159 }
160
161 #if FZ_VERSION_MAJOR == 1 && FZ_VERSION_MINOR >= 22
162
163 static PyObject *
164 JM_image_reporter(fz_context *ctx, pdf_page *page)
165 {
166 pdf_document *doc = page->doc;
167
168 pdf_page_transform(ctx, page, NULL, &g_img_info_matrix);
169 pdf_filter_options filter_options = {0};
170 filter_options.recurse = 0;
171 filter_options.instance_forms = 1;
172 filter_options.ascii = 1;
173 filter_options.no_update = 1;
174
175 pdf_sanitize_filter_options sanitize_filter_options = {0};
176 sanitize_filter_options.opaque = page;
177 sanitize_filter_options.image_filter = JM_image_filter;
178
179 pdf_filter_factory filter_factory[2] = {0};
180 filter_factory[0].filter = pdf_new_sanitize_filter;
181 filter_factory[0].options = &sanitize_filter_options;
182
183 filter_options.filters = filter_factory; // was &
184
185 g_img_info = PyList_New(0);
186
187 pdf_filter_page_contents(ctx, doc, page, &filter_options);
188
189 PyObject *rc = PySequence_Tuple(g_img_info);
190 Py_CLEAR(g_img_info);
191
192 return rc;
193 }
194
195 #else
196
197 void
198 JM_filter_content_stream(
199 fz_context * ctx,
200 pdf_document * doc,
201 pdf_obj * in_stm,
202 pdf_obj * in_res,
203 fz_matrix transform,
204 pdf_filter_options * filter,
205 int struct_parents,
206 fz_buffer **out_buf,
207 pdf_obj **out_res)
208 {
209 pdf_processor *proc_buffer = NULL;
210 pdf_processor *proc_filter = NULL;
211
212 fz_var(proc_buffer);
213 fz_var(proc_filter);
214
215 *out_buf = NULL;
216 *out_res = NULL;
217
218 fz_try(ctx) {
219 *out_buf = fz_new_buffer(ctx, 1024);
220 proc_buffer = pdf_new_buffer_processor(ctx, *out_buf, filter->ascii);
221 if (filter->sanitize) {
222 *out_res = pdf_new_dict(ctx, doc, 1);
223 proc_filter = pdf_new_filter_processor(ctx, doc, proc_buffer, in_res, *out_res, struct_parents, transform, filter);
224 pdf_process_contents(ctx, proc_filter, doc, in_res, in_stm, NULL);
225 pdf_close_processor(ctx, proc_filter);
226 } else {
227 *out_res = pdf_keep_obj(ctx, in_res);
228 pdf_process_contents(ctx, proc_buffer, doc, in_res, in_stm, NULL);
229 }
230 pdf_close_processor(ctx, proc_buffer);
231 }
232 fz_always(ctx) {
233 pdf_drop_processor(ctx, proc_filter);
234 pdf_drop_processor(ctx, proc_buffer);
235 }
236 fz_catch(ctx) {
237 fz_drop_buffer(ctx, *out_buf);
238 *out_buf = NULL;
239 pdf_drop_obj(ctx, *out_res);
240 *out_res = NULL;
241 fz_rethrow(ctx);
242 }
243 }
244
245 PyObject *
246 JM_image_reporter(fz_context *ctx, pdf_page *page)
247 {
248 pdf_document *doc = page->doc;
249 pdf_filter_options filter;
250 memset(&filter, 0, sizeof filter);
251 filter.opaque = page;
252 filter.text_filter = NULL;
253 filter.image_filter = JM_image_filter;
254 filter.end_page = NULL;
255 filter.recurse = 0;
256 filter.instance_forms = 1;
257 filter.sanitize = 1;
258 filter.ascii = 1;
259
260 pdf_obj *contents, *old_res;
261 pdf_obj *struct_parents_obj;
262 pdf_obj *new_res;
263 fz_buffer *buffer;
264 int struct_parents;
265 fz_matrix ctm = fz_identity;
266 pdf_page_transform(ctx, page, NULL, &ctm);
267 struct_parents_obj = pdf_dict_get(ctx, page->obj, PDF_NAME(StructParents));
268 struct_parents = -1;
269 if (pdf_is_number(ctx, struct_parents_obj))
270 struct_parents = pdf_to_int(ctx, struct_parents_obj);
271
272 contents = pdf_page_contents(ctx, page);
273 old_res = pdf_page_resources(ctx, page);
274 g_img_info = PyList_New(0);
275 JM_filter_content_stream(ctx, doc, contents, old_res, ctm, &filter, struct_parents, &buffer, &new_res);
276 fz_drop_buffer(ctx, buffer);
277 pdf_drop_obj(ctx, new_res);
278 PyObject *rc = PySequence_Tuple(g_img_info);
279 Py_CLEAR(g_img_info);
280 return rc;
281 }
282
283 #endif
284
285 %}