comparison mupdf-source/source/fitz/xmltext-device.c @ 3:2c135c81b16c

MERGE: upstream PyMuPDF 1.26.4 with MuPDF 1.26.7
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:44:09 +0200
parents b50eed0cc0ef
children
comparison
equal deleted inserted replaced
0:6015a75abc2d 3:2c135c81b16c
1 // Copyright (C) 2004-2025 Artifex Software, Inc.
2 //
3 // This file is part of MuPDF.
4 //
5 // MuPDF is free software: you can redistribute it and/or modify it under the
6 // terms of the GNU Affero General Public License as published by the Free
7 // Software Foundation, either version 3 of the License, or (at your option)
8 // any later version.
9 //
10 // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
11 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
13 // details.
14 //
15 // You should have received a copy of the GNU Affero General Public License
16 // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
17 //
18 // Alternative licensing terms are available from the licensor.
19 // For commercial licensing, see <https://www.artifex.com/> or contact
20 // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
21 // CA 94129, USA, for further information.
22
23 #include "mupdf/fitz.h"
24
25
26 static int s_xml_starttag_begin(fz_context *ctx, fz_output *out, const char *id)
27 {
28 fz_write_printf(ctx, out, "<%s", id);
29 return 0;
30 }
31
32 static int s_xml_starttag_end(fz_context *ctx, fz_output *out)
33 {
34 fz_write_printf(ctx, out, ">\n");
35 return 0;
36 }
37
38 static int s_xml_starttag_empty_end(fz_context *ctx, fz_output *out)
39 {
40 fz_write_printf(ctx, out, "/>\n");
41 return 0;
42 }
43
44 static int s_xml_endtag(fz_context *ctx, fz_output *out, const char *id)
45 {
46 fz_write_printf(ctx, out, "</%s>\n", id);
47 return 0;
48 }
49
50 static int s_write_attribute_int(fz_context *ctx, fz_output *out, const char *id, int value)
51 {
52 fz_write_printf(ctx, out, " %s=\"%i\"", id, value);
53 return 0;
54 }
55
56 static int s_write_attribute_size(fz_context *ctx, fz_output *out, const char *id, size_t value)
57 {
58 fz_write_printf(ctx, out, " %s=\"%zi\"", id, value);
59 return 0;
60 }
61
62 static int s_write_attribute_float(fz_context *ctx, fz_output *out, const char *id, float value)
63 {
64 fz_write_printf(ctx, out, " %s=\"%g\"", id, value);
65 return 0;
66 }
67
68 static int s_write_attribute_string(fz_context *ctx, fz_output *out, const char *id, const char *value)
69 {
70 fz_write_printf(ctx, out, " %s=\"%s\"", id, value);
71 return 0;
72 }
73
74 static int s_write_attribute_char(fz_context *ctx, fz_output *out, const char *id, char value)
75 {
76 if (value == '"') fz_write_printf(ctx, out, " %s=\"\\%c\"", id, value);
77 else fz_write_printf(ctx, out, " %s=\"%c\"", id, value);
78 return 0;
79 }
80
81 static int s_write_attribute_matrix(fz_context *ctx, fz_output *out, const char *id, const fz_matrix *matrix)
82 {
83 fz_write_printf(ctx, out,
84 " %s=\"%g %g %g %g %g %g\"",
85 id,
86 matrix->a,
87 matrix->b,
88 matrix->c,
89 matrix->d,
90 matrix->e,
91 matrix->f
92 );
93 return 0;
94 }
95
96
97
98
99 typedef struct
100 {
101 fz_device super;
102 fz_output *out;
103 } fz_xmltext_device;
104
105 static void
106 fz_xmltext_text(fz_context *ctx, fz_device *dev_, const fz_text *text, fz_matrix ctm,
107 fz_colorspace *colorspace, const float *color, float alpha, fz_color_params color_params)
108 {
109 fz_xmltext_device *dev = (fz_xmltext_device*) dev_;
110
111 fz_text_span *span;
112 for (span = text->head; span; span = span->next)
113 {
114 int i;
115
116 s_xml_starttag_begin(ctx, dev->out, "span");
117 s_write_attribute_matrix(ctx, dev->out, "ctm", &ctm);
118 s_write_attribute_string(ctx, dev->out, "font_name", span->font->name);
119 if (span->font->flags.is_mono) s_write_attribute_int(ctx, dev->out, "is_mono", 1);
120 if (span->font->flags.is_serif) s_write_attribute_int(ctx, dev->out, "is_serif", 1);
121 if (span->font->flags.is_italic) s_write_attribute_int(ctx, dev->out, "is_italic", 1);
122 if (span->font->flags.ft_substitute) s_write_attribute_int(ctx, dev->out, "ft_substitute", 1);
123 if (span->font->flags.ft_stretch) s_write_attribute_int(ctx, dev->out, "ft_stretch", 1);
124 if (span->font->flags.fake_bold) s_write_attribute_int(ctx, dev->out, "fake_bold", 1);
125 if (span->font->flags.fake_italic) s_write_attribute_int(ctx, dev->out, "fake_italic", 1);
126 if (span->font->flags.has_opentype) s_write_attribute_int(ctx, dev->out, "has_opentype", 1);
127 if (span->font->flags.invalid_bbox) s_write_attribute_int(ctx, dev->out, "invalid_bbox", 1);
128 s_write_attribute_matrix(ctx, dev->out, "trm", &span->trm);
129 s_write_attribute_int(ctx, dev->out, "len", span->len);
130 s_write_attribute_int(ctx, dev->out, "wmode", span->wmode);
131 s_write_attribute_int(ctx, dev->out, "bidi_level", span->bidi_level);
132 s_write_attribute_int(ctx, dev->out, "markup_dir", span->markup_dir);
133 s_write_attribute_int(ctx, dev->out, "language", span->language);
134 s_write_attribute_int(ctx, dev->out, "cap", span->cap);
135 s_xml_starttag_end(ctx, dev->out);
136
137 for (i=0; i<span->len; ++i)
138 {
139 fz_text_item *item = &span->items[i];
140
141 s_xml_starttag_begin(ctx, dev->out, "char");
142 s_write_attribute_float(ctx, dev->out, "x", item->x);
143 s_write_attribute_float(ctx, dev->out, "y", item->y);
144 s_write_attribute_int(ctx, dev->out, "gid", item->gid);
145 s_write_attribute_int(ctx, dev->out, "ucs", item->ucs);
146
147 /*
148 * Firefox complains if we put special characters here; it's only for debugging
149 * so this isn't really a problem.
150 */
151 s_write_attribute_char(ctx, dev->out, "debug_char",
152 (item->ucs >= 32 && item->ucs < 128 && item->ucs != '"')
153 ? item->ucs : ' '
154 );
155 s_write_attribute_float(ctx, dev->out, "adv", span->items[i].adv);
156 s_xml_starttag_empty_end(ctx, dev->out);
157 }
158
159 s_xml_endtag(ctx, dev->out, "span");
160 }
161 }
162
163 static void
164 fz_xmltext_fill_text(fz_context *ctx, fz_device *dev_, const fz_text *text, fz_matrix ctm,
165 fz_colorspace *colorspace, const float *color, float alpha, fz_color_params color_params)
166 {
167 fz_xmltext_text(ctx, dev_, text, ctm, colorspace, color, alpha, color_params);
168 }
169
170 static void
171 fz_xmltext_stroke_text(fz_context *ctx, fz_device *dev_, const fz_text *text, const fz_stroke_state *stroke, fz_matrix ctm,
172 fz_colorspace *colorspace, const float *color, float alpha, fz_color_params color_params)
173 {
174 fz_xmltext_text(ctx, dev_, text, ctm, colorspace, color, alpha, color_params);
175 }
176
177 static void
178 fz_xmltext_clip_text(fz_context *ctx, fz_device *dev_, const fz_text *text, fz_matrix ctm, fz_rect scissor)
179 {
180 fz_xmltext_text(ctx, dev_, text, ctm, NULL, NULL, 0 /*alpha*/, fz_default_color_params);
181 }
182
183 static void
184 fz_xmltext_clip_stroke_text(fz_context *ctx, fz_device *dev_, const fz_text *text, const fz_stroke_state *stroke, fz_matrix ctm, fz_rect scissor)
185 {
186 fz_xmltext_text(ctx, dev_, text, ctm, NULL, 0, 0, fz_default_color_params);
187 }
188
189 static void
190 fz_xmltext_ignore_text(fz_context *ctx, fz_device *dev_, const fz_text *text, fz_matrix ctm)
191 {
192 }
193
194 static void
195 fz_stext_close_device(fz_context *ctx, fz_device *dev_)
196 {
197 }
198
199
200
201 static void fz_xmltext_fill_image(fz_context *ctx, fz_device *dev_, fz_image *img, fz_matrix ctm, float alpha, fz_color_params color_params)
202 {
203 fz_xmltext_device *dev = (fz_xmltext_device*) dev_;
204 fz_pixmap *pixmap = NULL;
205 fz_try(ctx)
206 {
207 const char *type = NULL;
208 fz_compressed_buffer *compressed;
209 s_xml_starttag_begin(ctx, dev->out, "image");
210 /* First try to write compressed data. */
211 compressed = fz_compressed_image_buffer(ctx, img);
212 if (compressed)
213 {
214 if (compressed->params.type == FZ_IMAGE_UNKNOWN)
215 {
216 /* unknown image type. */
217 }
218 else if (compressed->params.type == FZ_IMAGE_RAW)
219 {
220 type = "raw";
221 s_write_attribute_string(ctx, dev->out, "type", type);
222 }
223 else if (compressed->params.type == FZ_IMAGE_FAX)
224 {
225 type = "fax";
226 s_write_attribute_string(ctx, dev->out, "type", type);
227 s_write_attribute_int(ctx, dev->out, "columns", compressed->params.u.fax.columns);
228 s_write_attribute_int(ctx, dev->out, "rows", compressed->params.u.fax.rows);
229 s_write_attribute_int(ctx, dev->out, "k", compressed->params.u.fax.k);
230 s_write_attribute_int(ctx, dev->out, "end_of_line", compressed->params.u.fax.end_of_line);
231 s_write_attribute_int(ctx, dev->out, "encoded_byte_align", compressed->params.u.fax.encoded_byte_align);
232 s_write_attribute_int(ctx, dev->out, "end_of_block", compressed->params.u.fax.end_of_block);
233 s_write_attribute_int(ctx, dev->out, "black_is_1", compressed->params.u.fax.black_is_1);
234 s_write_attribute_int(ctx, dev->out, "damaged_rows_before_error", compressed->params.u.fax.damaged_rows_before_error);
235 }
236 else if (compressed->params.type == FZ_IMAGE_FLATE)
237 {
238 type = "flate";
239 s_write_attribute_string(ctx, dev->out, "type", type);
240 s_write_attribute_int(ctx, dev->out, "columns", compressed->params.u.flate.columns);
241 s_write_attribute_int(ctx, dev->out, "colors", compressed->params.u.flate.colors);
242 s_write_attribute_int(ctx, dev->out, "predictor", compressed->params.u.flate.predictor);
243 s_write_attribute_int(ctx, dev->out, "bpc", compressed->params.u.flate.bpc);
244 }
245 else if (compressed->params.type == FZ_IMAGE_BROTLI)
246 {
247 type = "brotli";
248 s_write_attribute_string(ctx, dev->out, "type", type);
249 s_write_attribute_int(ctx, dev->out, "columns", compressed->params.u.brotli.columns);
250 s_write_attribute_int(ctx, dev->out, "colors", compressed->params.u.brotli.colors);
251 s_write_attribute_int(ctx, dev->out, "predictor", compressed->params.u.brotli.predictor);
252 s_write_attribute_int(ctx, dev->out, "bpc", compressed->params.u.brotli.bpc);
253 }
254 else if (compressed->params.type == FZ_IMAGE_LZW)
255 {
256 type = "lzw";
257 s_write_attribute_string(ctx, dev->out, "type", type);
258 s_write_attribute_int(ctx, dev->out, "columns", compressed->params.u.lzw.columns);
259 s_write_attribute_int(ctx, dev->out, "colors", compressed->params.u.lzw.colors);
260 s_write_attribute_int(ctx, dev->out, "predictor", compressed->params.u.lzw.predictor);
261 s_write_attribute_int(ctx, dev->out, "bpc", compressed->params.u.lzw.bpc);
262 s_write_attribute_int(ctx, dev->out, "early_change", compressed->params.u.lzw.early_change);
263 }
264 else if (compressed->params.type == FZ_IMAGE_BMP)
265 {
266 type = "bmp";
267 s_write_attribute_string(ctx, dev->out, "type", type);
268 }
269 else if (compressed->params.type == FZ_IMAGE_GIF)
270 {
271 type = "gif";
272 s_write_attribute_string(ctx, dev->out, "type", type);
273 }
274 else if (compressed->params.type == FZ_IMAGE_JBIG2)
275 {
276 type = "jbig2";
277 s_write_attribute_string(ctx, dev->out, "type", type);
278 /* do we need to write out *compressed->params.globals somehow? */
279 }
280 else if (compressed->params.type == FZ_IMAGE_JPEG)
281 {
282 type = "jpeg";
283 s_write_attribute_string(ctx, dev->out, "type", type);
284 s_write_attribute_int(ctx, dev->out, "color_transform", compressed->params.u.jpeg.color_transform);
285 if (compressed->params.u.jpeg.invert_cmyk)
286 s_write_attribute_int(ctx, dev->out, "invert_cmyk", 1);
287 }
288 else if (compressed->params.type == FZ_IMAGE_JPX)
289 {
290 type = "jpx";
291 s_write_attribute_string(ctx, dev->out, "type", type);
292 s_write_attribute_int(ctx, dev->out, "smask_in_data", compressed->params.u.jpx.smask_in_data);
293 }
294 else if (compressed->params.type == FZ_IMAGE_JXR)
295 {
296 type = "jxr";
297 s_write_attribute_string(ctx, dev->out, "type", type);
298 }
299 else if (compressed->params.type == FZ_IMAGE_PNG)
300 {
301 type = "png";
302 s_write_attribute_string(ctx, dev->out, "type", type);
303 }
304 else if (compressed->params.type == FZ_IMAGE_PNM)
305 {
306 type = "pnm";
307 s_write_attribute_string(ctx, dev->out, "type", type);
308 }
309 else if (compressed->params.type == FZ_IMAGE_TIFF)
310 {
311 type = "tiff";
312 s_write_attribute_string(ctx, dev->out, "type", type);
313 }
314 else
315 {
316 /* Unrecognised. */
317 }
318
319 if (type)
320 {
321 /* Write out raw data. */
322 unsigned char *data;
323 size_t datasize = fz_buffer_storage(ctx, compressed->buffer, &data);
324 size_t i;
325 s_write_attribute_size(ctx, dev->out, "datasize", datasize);
326 s_xml_starttag_end(ctx, dev->out);
327 for (i=0; i<datasize; ++i)
328 {
329 if (i % 32 == 0) fz_write_printf(ctx, dev->out, "\n ");
330 if (i % 4 == 0) fz_write_printf(ctx, dev->out, " ");
331 fz_write_printf(ctx, dev->out, "%02x", data[i]);
332 }
333 fz_write_printf(ctx, dev->out, "\n");
334 }
335 }
336
337 if (!type)
338 {
339 /* Compressed data not available, so write out raw pixel values. */
340 int l2factor = 0;
341 int y;
342 s_write_attribute_string(ctx, dev->out, "type", "pixmap");
343 s_xml_starttag_end(ctx, dev->out);
344 pixmap = img->get_pixmap(ctx, img, NULL /*subarea*/, img->w, img->h, &l2factor);
345 s_write_attribute_int(ctx, dev->out, "x", pixmap->x);
346 s_write_attribute_int(ctx, dev->out, "y", pixmap->y);
347 s_write_attribute_int(ctx, dev->out, "w", pixmap->w);
348 s_write_attribute_int(ctx, dev->out, "h", pixmap->h);
349 s_write_attribute_int(ctx, dev->out, "n", pixmap->n);
350 s_write_attribute_int(ctx, dev->out, "s", pixmap->s);
351 s_write_attribute_int(ctx, dev->out, "alpha", pixmap->alpha);
352 s_write_attribute_int(ctx, dev->out, "flags", pixmap->flags);
353 s_write_attribute_int(ctx, dev->out, "xres", pixmap->xres);
354 s_write_attribute_int(ctx, dev->out, "yres", pixmap->yres);
355 s_write_attribute_matrix(ctx, dev->out, "ctm", &ctm);
356 s_xml_starttag_end(ctx, dev->out);
357 for (y=0; y<pixmap->h; ++y)
358 {
359 int x;
360 s_xml_starttag_begin(ctx, dev->out, "line");
361 s_write_attribute_int(ctx, dev->out, "y", y);
362 s_xml_starttag_end(ctx, dev->out);
363 for (x=0; x<pixmap->w; ++x)
364 {
365 int b;
366 fz_write_printf(ctx, dev->out, " ");
367 for (b=0; b<pixmap->n; ++b)
368 {
369 fz_write_printf(ctx, dev->out, "%02x", pixmap->samples[y*(size_t)pixmap->stride + x*(size_t)pixmap->n + b]);
370 }
371 }
372 s_xml_endtag(ctx, dev->out, "line");
373 }
374 }
375 s_xml_endtag(ctx, dev->out, "image");
376 }
377 fz_always(ctx)
378 {
379 fz_drop_pixmap(ctx, pixmap);
380 }
381 fz_catch(ctx)
382 {
383 fz_rethrow(ctx);
384 }
385 }
386
387 fz_device *fz_new_xmltext_device(fz_context *ctx, fz_output *out)
388 {
389 fz_xmltext_device *dev = fz_new_derived_device(ctx, fz_xmltext_device);
390
391 dev->super.close_device = fz_stext_close_device;
392
393 dev->super.fill_text = fz_xmltext_fill_text;
394 dev->super.stroke_text = fz_xmltext_stroke_text;
395 dev->super.clip_text = fz_xmltext_clip_text;
396 dev->super.clip_stroke_text = fz_xmltext_clip_stroke_text;
397 dev->super.ignore_text = fz_xmltext_ignore_text;
398 dev->super.fill_image = fz_xmltext_fill_image;
399
400 dev->out = out;
401
402 return (fz_device*)dev;
403 }