comparison mupdf-source/include/mupdf/pdf/interpret.h @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 // Copyright (C) 2004-2025 Artifex Software, Inc.
2 //
3 // This file is part of MuPDF.
4 //
5 // MuPDF is free software: you can redistribute it and/or modify it under the
6 // terms of the GNU Affero General Public License as published by the Free
7 // Software Foundation, either version 3 of the License, or (at your option)
8 // any later version.
9 //
10 // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
11 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
13 // details.
14 //
15 // You should have received a copy of the GNU Affero General Public License
16 // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
17 //
18 // Alternative licensing terms are available from the licensor.
19 // For commercial licensing, see <https://www.artifex.com/> or contact
20 // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
21 // CA 94129, USA, for further information.
22
23 #ifndef PDF_INTERPRET_H
24 #define PDF_INTERPRET_H
25
26 #include "mupdf/pdf/font.h"
27 #include "mupdf/pdf/resource.h"
28 #include "mupdf/pdf/document.h"
29
30 typedef struct pdf_gstate pdf_gstate;
31 typedef struct pdf_processor pdf_processor;
32
33 void *pdf_new_processor(fz_context *ctx, int size);
34 pdf_processor *pdf_keep_processor(fz_context *ctx, pdf_processor *proc);
35 void pdf_close_processor(fz_context *ctx, pdf_processor *proc);
36 void pdf_drop_processor(fz_context *ctx, pdf_processor *proc);
37
38 typedef enum
39 {
40 PDF_PROCESSOR_REQUIRES_DECODED_IMAGES = 1
41 } pdf_processor_requirements;
42
43 struct pdf_processor
44 {
45 int refs;
46
47 int closed;
48
49 /* close the processor. Also closes any chained processors. */
50 void (*close_processor)(fz_context *ctx, pdf_processor *proc);
51 void (*drop_processor)(fz_context *ctx, pdf_processor *proc);
52 void (*reset_processor)(fz_context *ctx, pdf_processor *proc);
53
54 /* At any stage, we can have one set of resources in place.
55 * This function gives us a set of resources to use. We remember
56 * any previous set on a stack, so we can pop back to it later.
57 * Our responsibility (as well as remembering it for our own use)
58 * is to pass either it, or a filtered version of it onto any
59 * chained processor. */
60 void (*push_resources)(fz_context *ctx, pdf_processor *proc, pdf_obj *res);
61 /* Pop the resources stack. This must be passed on to any chained
62 * processors. This returns a pointer to the resource dict just
63 * popped by the deepest filter. The caller inherits this reference. */
64 pdf_obj *(*pop_resources)(fz_context *ctx, pdf_processor *proc);
65
66 /* general graphics state */
67 void (*op_w)(fz_context *ctx, pdf_processor *proc, float linewidth);
68 void (*op_j)(fz_context *ctx, pdf_processor *proc, int linejoin);
69 void (*op_J)(fz_context *ctx, pdf_processor *proc, int linecap);
70 void (*op_M)(fz_context *ctx, pdf_processor *proc, float miterlimit);
71 void (*op_d)(fz_context *ctx, pdf_processor *proc, pdf_obj *array, float phase);
72 void (*op_ri)(fz_context *ctx, pdf_processor *proc, const char *intent);
73 void (*op_i)(fz_context *ctx, pdf_processor *proc, float flatness);
74
75 void (*op_gs_begin)(fz_context *ctx, pdf_processor *proc, const char *name, pdf_obj *extgstate);
76 void (*op_gs_BM)(fz_context *ctx, pdf_processor *proc, const char *blendmode);
77 void (*op_gs_ca)(fz_context *ctx, pdf_processor *proc, float alpha);
78 void (*op_gs_CA)(fz_context *ctx, pdf_processor *proc, float alpha);
79 void (*op_gs_SMask)(fz_context *ctx, pdf_processor *proc, pdf_obj *smask, fz_colorspace *smask_cs, float *bc, int luminosity, pdf_obj *tr);
80 void (*op_gs_end)(fz_context *ctx, pdf_processor *proc);
81
82 /* special graphics state */
83 void (*op_q)(fz_context *ctx, pdf_processor *proc);
84 void (*op_Q)(fz_context *ctx, pdf_processor *proc);
85 void (*op_cm)(fz_context *ctx, pdf_processor *proc, float a, float b, float c, float d, float e, float f);
86
87 /* path construction */
88 void (*op_m)(fz_context *ctx, pdf_processor *proc, float x, float y);
89 void (*op_l)(fz_context *ctx, pdf_processor *proc, float x, float y);
90 void (*op_c)(fz_context *ctx, pdf_processor *proc, float x1, float y1, float x2, float y2, float x3, float y3);
91 void (*op_v)(fz_context *ctx, pdf_processor *proc, float x2, float y2, float x3, float y3);
92 void (*op_y)(fz_context *ctx, pdf_processor *proc, float x1, float y1, float x3, float y3);
93 void (*op_h)(fz_context *ctx, pdf_processor *proc);
94 void (*op_re)(fz_context *ctx, pdf_processor *proc, float x, float y, float w, float h);
95
96 /* path painting */
97 void (*op_S)(fz_context *ctx, pdf_processor *proc);
98 void (*op_s)(fz_context *ctx, pdf_processor *proc);
99 void (*op_F)(fz_context *ctx, pdf_processor *proc);
100 void (*op_f)(fz_context *ctx, pdf_processor *proc);
101 void (*op_fstar)(fz_context *ctx, pdf_processor *proc);
102 void (*op_B)(fz_context *ctx, pdf_processor *proc);
103 void (*op_Bstar)(fz_context *ctx, pdf_processor *proc);
104 void (*op_b)(fz_context *ctx, pdf_processor *proc);
105 void (*op_bstar)(fz_context *ctx, pdf_processor *proc);
106 void (*op_n)(fz_context *ctx, pdf_processor *proc);
107
108 /* clipping paths */
109 void (*op_W)(fz_context *ctx, pdf_processor *proc);
110 void (*op_Wstar)(fz_context *ctx, pdf_processor *proc);
111
112 /* text objects */
113 void (*op_BT)(fz_context *ctx, pdf_processor *proc);
114 void (*op_ET)(fz_context *ctx, pdf_processor *proc);
115
116 /* text state */
117 void (*op_Tc)(fz_context *ctx, pdf_processor *proc, float charspace);
118 void (*op_Tw)(fz_context *ctx, pdf_processor *proc, float wordspace);
119 void (*op_Tz)(fz_context *ctx, pdf_processor *proc, float scale);
120 void (*op_TL)(fz_context *ctx, pdf_processor *proc, float leading);
121 void (*op_Tf)(fz_context *ctx, pdf_processor *proc, const char *name, pdf_font_desc *font, float size);
122 void (*op_Tr)(fz_context *ctx, pdf_processor *proc, int render);
123 void (*op_Ts)(fz_context *ctx, pdf_processor *proc, float rise);
124
125 /* text positioning */
126 void (*op_Td)(fz_context *ctx, pdf_processor *proc, float tx, float ty);
127 void (*op_TD)(fz_context *ctx, pdf_processor *proc, float tx, float ty);
128 void (*op_Tm)(fz_context *ctx, pdf_processor *proc, float a, float b, float c, float d, float e, float f);
129 void (*op_Tstar)(fz_context *ctx, pdf_processor *proc);
130
131 /* text showing */
132 void (*op_TJ)(fz_context *ctx, pdf_processor *proc, pdf_obj *array);
133 void (*op_Tj)(fz_context *ctx, pdf_processor *proc, char *str, size_t len);
134 void (*op_squote)(fz_context *ctx, pdf_processor *proc, char *str, size_t len);
135 void (*op_dquote)(fz_context *ctx, pdf_processor *proc, float aw, float ac, char *str, size_t len);
136
137 /* type 3 fonts */
138 void (*op_d0)(fz_context *ctx, pdf_processor *proc, float wx, float wy);
139 void (*op_d1)(fz_context *ctx, pdf_processor *proc, float wx, float wy, float llx, float lly, float urx, float ury);
140
141 /* color */
142 void (*op_CS)(fz_context *ctx, pdf_processor *proc, const char *name, fz_colorspace *cs);
143 void (*op_cs)(fz_context *ctx, pdf_processor *proc, const char *name, fz_colorspace *cs);
144 void (*op_SC_pattern)(fz_context *ctx, pdf_processor *proc, const char *name, pdf_pattern *pat, int n, float *color);
145 void (*op_sc_pattern)(fz_context *ctx, pdf_processor *proc, const char *name, pdf_pattern *pat, int n, float *color);
146 void (*op_SC_shade)(fz_context *ctx, pdf_processor *proc, const char *name, fz_shade *shade);
147 void (*op_sc_shade)(fz_context *ctx, pdf_processor *proc, const char *name, fz_shade *shade);
148 void (*op_SC_color)(fz_context *ctx, pdf_processor *proc, int n, float *color);
149 void (*op_sc_color)(fz_context *ctx, pdf_processor *proc, int n, float *color);
150
151 void (*op_G)(fz_context *ctx, pdf_processor *proc, float g);
152 void (*op_g)(fz_context *ctx, pdf_processor *proc, float g);
153 void (*op_RG)(fz_context *ctx, pdf_processor *proc, float r, float g, float b);
154 void (*op_rg)(fz_context *ctx, pdf_processor *proc, float r, float g, float b);
155 void (*op_K)(fz_context *ctx, pdf_processor *proc, float c, float m, float y, float k);
156 void (*op_k)(fz_context *ctx, pdf_processor *proc, float c, float m, float y, float k);
157
158 /* shadings, images, xobjects */
159 void (*op_BI)(fz_context *ctx, pdf_processor *proc, fz_image *image, const char *colorspace_name);
160 void (*op_sh)(fz_context *ctx, pdf_processor *proc, const char *name, fz_shade *shade);
161 void (*op_Do_image)(fz_context *ctx, pdf_processor *proc, const char *name, fz_image *image);
162 void (*op_Do_form)(fz_context *ctx, pdf_processor *proc, const char *name, pdf_obj *form);
163
164 /* marked content */
165 void (*op_MP)(fz_context *ctx, pdf_processor *proc, const char *tag);
166 void (*op_DP)(fz_context *ctx, pdf_processor *proc, const char *tag, pdf_obj *raw, pdf_obj *cooked);
167 void (*op_BMC)(fz_context *ctx, pdf_processor *proc, const char *tag);
168 void (*op_BDC)(fz_context *ctx, pdf_processor *proc, const char *tag, pdf_obj *raw, pdf_obj *cooked);
169 void (*op_EMC)(fz_context *ctx, pdf_processor *proc);
170
171 /* compatibility */
172 void (*op_BX)(fz_context *ctx, pdf_processor *proc);
173 void (*op_EX)(fz_context *ctx, pdf_processor *proc);
174
175 /* Virtual ops for ExtGState entries */
176 void (*op_gs_OP)(fz_context *ctx, pdf_processor *proc, int b);
177 void (*op_gs_op)(fz_context *ctx, pdf_processor *proc, int b);
178 void (*op_gs_OPM)(fz_context *ctx, pdf_processor *proc, int i);
179 void (*op_gs_UseBlackPtComp)(fz_context *ctx, pdf_processor *proc, pdf_obj *name);
180
181 /* EOD is used to signify end of data (before any finalise/close down/
182 * automatically added gstate pops). */
183 void (*op_EOD)(fz_context *ctx, pdf_processor *proc);
184
185 /* END is used to signify end of stream (finalise and close down) */
186 void (*op_END)(fz_context *ctx, pdf_processor *proc);
187
188 /* interpreter state that persists across content streams */
189 const char *usage;
190 int hidden;
191
192 pdf_processor_requirements requirements;
193 };
194
195 typedef struct
196 {
197 /* input */
198 pdf_document *doc;
199 pdf_obj *rdb;
200 pdf_lexbuf *buf;
201 fz_cookie *cookie;
202
203 /* state */
204 int gstate;
205 int xbalance;
206 int in_text;
207 fz_rect d1_rect;
208
209 /* stack */
210 pdf_obj *obj;
211 char name[256];
212 char string[256];
213 size_t string_len;
214 int top;
215 float stack[32];
216 } pdf_csi;
217
218 void pdf_count_q_balance(fz_context *ctx, pdf_document *doc, pdf_obj *res, pdf_obj *stm, int *prepend, int *append);
219
220 /* Functions to set up pdf_process structures */
221
222 pdf_processor *pdf_new_run_processor(fz_context *ctx, pdf_document *doc, fz_device *dev, fz_matrix ctm, int struct_parent, const char *usage, pdf_gstate *gstate, fz_default_colorspaces *default_cs, fz_cookie *cookie, pdf_gstate *fill_gstate, pdf_gstate *stroke_gstate);
223
224 /*
225 Create a buffer processor.
226
227 This collects the incoming PDF operator stream into an fz_buffer.
228
229 buffer: The (possibly empty) buffer to which operators will be
230 appended.
231
232 ahxencode: If 0, then image streams will be send as binary,
233 otherwise they will be asciihexencoded.
234
235 newlines: If 0, then minimal spacing will be sent. If 1
236 then newlines will be sent after every operator.
237 */
238 pdf_processor *pdf_new_buffer_processor(fz_context *ctx, fz_buffer *buffer, int ahxencode, int newlines);
239
240 /*
241 Reopen a closed processor to be used again.
242
243 This brings a processor back to life after a close.
244 Not all processors may support this, so this may throw
245 an exception.
246 */
247 void pdf_reset_processor(fz_context *ctx, pdf_processor *proc);
248
249
250 /*
251 Create an output processor. This
252 sends the incoming PDF operator stream to an fz_output stream.
253
254 out: The output stream to which operators will be sent.
255
256 ahxencode: If 0, then image streams will be send as binary,
257 otherwise they will be asciihexencoded.
258
259 newlines: If 0, then minimal spacing will be sent. If 1
260 then newlines will be sent after every operator.
261 */
262 pdf_processor *pdf_new_output_processor(fz_context *ctx, fz_output *out, int ahxencode, int newlines);
263
264 typedef struct pdf_filter_options pdf_filter_options;
265
266 /*
267 Create a filter processor. This filters the PDF operators
268 it is fed, and passes them down (with some changes) to the
269 child filter.
270
271 chain: The child processor to which the filtered operators
272 will be fed.
273
274 The options field contains a pointer to a structure with
275 filter specific options in.
276 */
277 typedef pdf_processor *(pdf_filter_factory_fn)(fz_context *ctx, pdf_document *doc, pdf_processor *chain, int struct_parents, fz_matrix transform, pdf_filter_options *options, void *factory_options);
278
279 /*
280 A pdf_filter_factory is a pdf_filter_factory_fn, plus the options
281 needed to instantiate it.
282 */
283 typedef struct
284 {
285 pdf_filter_factory_fn *filter;
286 void *options;
287 } pdf_filter_factory;
288
289 /*
290 recurse: Filter resources recursively.
291
292 instance_forms: Always recurse on XObject Form resources, but will
293 create a new instance of each XObject Form that is used, filtered
294 individually.
295
296 ascii: If true, escape all binary data in the output.
297
298 no_update: If true, do not update the document at the end.
299
300 opaque: Opaque value that is passed to the complete function.
301
302 complete: A function called at the end of processing.
303 This allows the caller to insert some extra content after
304 all other content.
305
306 filters: Pointer to an array of filter factory/options.
307 The array is terminated by an entry with a NULL factory pointer.
308 Operators will be fed into the filter generated from the first
309 factory function in the list, and from there go to the filter
310 generated from the second factory in the list etc.
311
312 newlines: If 0, then minimal whitespace will be produced. If 1,
313 then a newline will be sent after every operator.
314 */
315 struct pdf_filter_options
316 {
317 int recurse;
318 int instance_forms;
319 int ascii;
320 int no_update;
321
322 void *opaque;
323 void (*complete)(fz_context *ctx, fz_buffer *buffer, void *opaque);
324
325 pdf_filter_factory *filters;
326 int newlines;
327 };
328
329 typedef enum
330 {
331 FZ_CULL_PATH_DROP = 0,
332 FZ_CULL_PATH_FILL = 1,
333 FZ_CULL_PATH_STROKE = 2,
334 FZ_CULL_PATH_FILL_STROKE = 3,
335 FZ_CULL_CLIP_PATH_DROP = 4,
336 FZ_CULL_CLIP_PATH_FILL = 5,
337 FZ_CULL_CLIP_PATH_STROKE = 6,
338 FZ_CULL_CLIP_PATH_FILL_STROKE = 7,
339 FZ_CULL_GLYPH = 8,
340 FZ_CULL_IMAGE,
341 FZ_CULL_SHADING
342 } fz_cull_type;
343
344 /*
345 image_filter: A function called to assess whether a given
346 image should be removed or not.
347
348 text_filter: A function called to assess whether a given
349 character should be removed or not.
350
351 after_text_object: A function called after each text object.
352 This allows the caller to insert some extra content if
353 desired.
354
355 culler: A function called to see whether each object should
356 be culled or not.
357 */
358 typedef struct
359 {
360 void *opaque;
361 fz_image *(*image_filter)(fz_context *ctx, void *opaque, fz_matrix ctm, const char *name, fz_image *image, fz_rect scissor);
362 int (*text_filter)(fz_context *ctx, void *opaque, int *ucsbuf, int ucslen, fz_matrix trm, fz_matrix ctm, fz_rect bbox);
363 void (*after_text_object)(fz_context *ctx, void *opaque, pdf_document *doc, pdf_processor *chain, fz_matrix ctm);
364 int (*culler)(fz_context *ctx, void *opaque, fz_rect bbox, fz_cull_type type);
365 }
366 pdf_sanitize_filter_options;
367
368 /*
369 A sanitize filter factory.
370
371 sopts = pointer to pdf_sanitize_filter_options.
372
373 The changes made by a filter generated from this are:
374
375 * No operations are allowed to change the top level gstate.
376 Additional q/Q operators are inserted to prevent this.
377
378 * Repeated/unnecessary colour operators are removed (so,
379 for example, "0 0 0 rg 0 1 rg 0.5 g" would be sanitised to
380 "0.5 g")
381
382 The intention of these changes is to provide a simpler,
383 but equivalent stream, repairing problems with mismatched
384 operators, maintaining structure (such as BMC, EMC calls)
385 and leaving the graphics state in an known (default) state
386 so that subsequent operations (such as synthesising new
387 operators to be appended to the stream) are easier.
388
389 The net graphical effect of the filtered operator stream
390 should be identical to the incoming operator stream.
391 */
392 pdf_processor *pdf_new_sanitize_filter(fz_context *ctx, pdf_document *doc, pdf_processor *chain, int struct_parents, fz_matrix transform, pdf_filter_options *options, void *sopts);
393
394 pdf_obj *pdf_filter_xobject_instance(fz_context *ctx, pdf_obj *old_xobj, pdf_obj *page_res, fz_matrix ctm, pdf_filter_options *options, pdf_cycle_list *cycle_up);
395
396 void pdf_processor_push_resources(fz_context *ctx, pdf_processor *proc, pdf_obj *res);
397
398 pdf_obj *pdf_processor_pop_resources(fz_context *ctx, pdf_processor *proc);
399
400 /*
401 opaque: Opaque value that is passed to all the filter functions.
402
403 color_rewrite: function pointer called to rewrite a color
404 On entry:
405 *cs = reference to a pdf object representing the colorspace.
406
407 *n = number of color components
408
409 color = *n color values.
410
411 On exit:
412 *cs either the same (for no change in colorspace) or
413 updated to be a new one. Reference must be dropped, and
414 a new kept reference returned!
415
416 *n = number of color components (maybe updated)
417
418 color = *n color values (maybe updated)
419
420 image_rewrite: function pointer called to rewrite an image
421 On entry:
422 *image = reference to an fz_image.
423
424 On exit:
425 *image either the same (for no change) or updated
426 to be a new one. Reference must be dropped, and a
427 new kept reference returned.
428
429 share_rewrite: function pointer called to rewrite a shade
430
431 repeated_image_rewrite: If 0, then each image is rewritten only once.
432 Otherwise, it is called for every instance (useful if gathering
433 information about the ctm).
434 */
435 typedef struct
436 {
437 void *opaque;
438 void (*color_rewrite)(fz_context *ctx, void *opaque, pdf_obj **cs, int *n, float color[FZ_MAX_COLORS]);
439 void (*image_rewrite)(fz_context *ctx, void *opaque, fz_image **image, fz_matrix ctm, pdf_obj *obj);
440 pdf_shade_recolorer *shade_rewrite;
441 int repeated_image_rewrite;
442 } pdf_color_filter_options;
443
444 pdf_processor *
445 pdf_new_color_filter(fz_context *ctx, pdf_document *doc, pdf_processor *chain, int struct_parents, fz_matrix transform, pdf_filter_options *options, void *copts);
446
447 /*
448 Functions to actually process annotations, glyphs and general stream objects.
449 */
450 void pdf_process_contents(fz_context *ctx, pdf_processor *proc, pdf_document *doc, pdf_obj *res, pdf_obj *stm, fz_cookie *cookie, pdf_obj **out_res);
451 void pdf_process_annot(fz_context *ctx, pdf_processor *proc, pdf_annot *annot, fz_cookie *cookie);
452 void pdf_process_glyph(fz_context *ctx, pdf_processor *proc, pdf_document *doc, pdf_obj *resources, fz_buffer *contents);
453
454 /*
455 Function to process a contents stream without handling the resources.
456 The caller is responsible for pushing/popping the resources.
457 */
458 void pdf_process_raw_contents(fz_context *ctx, pdf_processor *proc, pdf_document *doc, pdf_obj *rdb, pdf_obj *stmobj, fz_cookie *cookie);
459
460 /* Text handling helper functions */
461 typedef struct
462 {
463 float char_space;
464 float word_space;
465 float scale;
466 float leading;
467 pdf_font_desc *font;
468 fz_string *fontname;
469 float size;
470 int render;
471 float rise;
472 } pdf_text_state;
473
474 typedef struct
475 {
476 fz_text *text;
477 fz_rect text_bbox;
478 fz_matrix tlm;
479 fz_matrix tm;
480 int text_mode;
481
482 int cid;
483 int gid;
484 fz_rect char_bbox;
485 pdf_font_desc *fontdesc;
486 float char_tx;
487 float char_ty;
488 } pdf_text_object_state;
489
490 void pdf_tos_save(fz_context *ctx, pdf_text_object_state *tos, fz_matrix save[2]);
491 void pdf_tos_restore(fz_context *ctx, pdf_text_object_state *tos, fz_matrix save[2]);
492 fz_text *pdf_tos_get_text(fz_context *ctx, pdf_text_object_state *tos);
493 void pdf_tos_reset(fz_context *ctx, pdf_text_object_state *tos, int render);
494 int pdf_tos_make_trm(fz_context *ctx, pdf_text_object_state *tos, pdf_text_state *text, pdf_font_desc *fontdesc, int cid, fz_matrix *trm, float *adv);
495 void pdf_tos_move_after_char(fz_context *ctx, pdf_text_object_state *tos);
496 void pdf_tos_translate(pdf_text_object_state *tos, float tx, float ty);
497 void pdf_tos_set_matrix(pdf_text_object_state *tos, float a, float b, float c, float d, float e, float f);
498 void pdf_tos_newline(pdf_text_object_state *tos, float leading);
499
500 #endif