Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/source/fitz/xmltext-device.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 // Copyright (C) 2004-2025 Artifex Software, Inc. | |
| 2 // | |
| 3 // This file is part of MuPDF. | |
| 4 // | |
| 5 // MuPDF is free software: you can redistribute it and/or modify it under the | |
| 6 // terms of the GNU Affero General Public License as published by the Free | |
| 7 // Software Foundation, either version 3 of the License, or (at your option) | |
| 8 // any later version. | |
| 9 // | |
| 10 // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY | |
| 11 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | |
| 12 // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more | |
| 13 // details. | |
| 14 // | |
| 15 // You should have received a copy of the GNU Affero General Public License | |
| 16 // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html> | |
| 17 // | |
| 18 // Alternative licensing terms are available from the licensor. | |
| 19 // For commercial licensing, see <https://www.artifex.com/> or contact | |
| 20 // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, | |
| 21 // CA 94129, USA, for further information. | |
| 22 | |
| 23 #include "mupdf/fitz.h" | |
| 24 | |
| 25 | |
| 26 static int s_xml_starttag_begin(fz_context *ctx, fz_output *out, const char *id) | |
| 27 { | |
| 28 fz_write_printf(ctx, out, "<%s", id); | |
| 29 return 0; | |
| 30 } | |
| 31 | |
| 32 static int s_xml_starttag_end(fz_context *ctx, fz_output *out) | |
| 33 { | |
| 34 fz_write_printf(ctx, out, ">\n"); | |
| 35 return 0; | |
| 36 } | |
| 37 | |
| 38 static int s_xml_starttag_empty_end(fz_context *ctx, fz_output *out) | |
| 39 { | |
| 40 fz_write_printf(ctx, out, "/>\n"); | |
| 41 return 0; | |
| 42 } | |
| 43 | |
| 44 static int s_xml_endtag(fz_context *ctx, fz_output *out, const char *id) | |
| 45 { | |
| 46 fz_write_printf(ctx, out, "</%s>\n", id); | |
| 47 return 0; | |
| 48 } | |
| 49 | |
| 50 static int s_write_attribute_int(fz_context *ctx, fz_output *out, const char *id, int value) | |
| 51 { | |
| 52 fz_write_printf(ctx, out, " %s=\"%i\"", id, value); | |
| 53 return 0; | |
| 54 } | |
| 55 | |
| 56 static int s_write_attribute_size(fz_context *ctx, fz_output *out, const char *id, size_t value) | |
| 57 { | |
| 58 fz_write_printf(ctx, out, " %s=\"%zi\"", id, value); | |
| 59 return 0; | |
| 60 } | |
| 61 | |
| 62 static int s_write_attribute_float(fz_context *ctx, fz_output *out, const char *id, float value) | |
| 63 { | |
| 64 fz_write_printf(ctx, out, " %s=\"%g\"", id, value); | |
| 65 return 0; | |
| 66 } | |
| 67 | |
| 68 static int s_write_attribute_string(fz_context *ctx, fz_output *out, const char *id, const char *value) | |
| 69 { | |
| 70 fz_write_printf(ctx, out, " %s=\"%s\"", id, value); | |
| 71 return 0; | |
| 72 } | |
| 73 | |
| 74 static int s_write_attribute_char(fz_context *ctx, fz_output *out, const char *id, char value) | |
| 75 { | |
| 76 if (value == '"') fz_write_printf(ctx, out, " %s=\"\\%c\"", id, value); | |
| 77 else fz_write_printf(ctx, out, " %s=\"%c\"", id, value); | |
| 78 return 0; | |
| 79 } | |
| 80 | |
| 81 static int s_write_attribute_matrix(fz_context *ctx, fz_output *out, const char *id, const fz_matrix *matrix) | |
| 82 { | |
| 83 fz_write_printf(ctx, out, | |
| 84 " %s=\"%g %g %g %g %g %g\"", | |
| 85 id, | |
| 86 matrix->a, | |
| 87 matrix->b, | |
| 88 matrix->c, | |
| 89 matrix->d, | |
| 90 matrix->e, | |
| 91 matrix->f | |
| 92 ); | |
| 93 return 0; | |
| 94 } | |
| 95 | |
| 96 | |
| 97 | |
| 98 | |
| 99 typedef struct | |
| 100 { | |
| 101 fz_device super; | |
| 102 fz_output *out; | |
| 103 } fz_xmltext_device; | |
| 104 | |
| 105 static void | |
| 106 fz_xmltext_text(fz_context *ctx, fz_device *dev_, const fz_text *text, fz_matrix ctm, | |
| 107 fz_colorspace *colorspace, const float *color, float alpha, fz_color_params color_params) | |
| 108 { | |
| 109 fz_xmltext_device *dev = (fz_xmltext_device*) dev_; | |
| 110 | |
| 111 fz_text_span *span; | |
| 112 for (span = text->head; span; span = span->next) | |
| 113 { | |
| 114 int i; | |
| 115 | |
| 116 s_xml_starttag_begin(ctx, dev->out, "span"); | |
| 117 s_write_attribute_matrix(ctx, dev->out, "ctm", &ctm); | |
| 118 s_write_attribute_string(ctx, dev->out, "font_name", span->font->name); | |
| 119 if (span->font->flags.is_mono) s_write_attribute_int(ctx, dev->out, "is_mono", 1); | |
| 120 if (span->font->flags.is_serif) s_write_attribute_int(ctx, dev->out, "is_serif", 1); | |
| 121 if (span->font->flags.is_italic) s_write_attribute_int(ctx, dev->out, "is_italic", 1); | |
| 122 if (span->font->flags.ft_substitute) s_write_attribute_int(ctx, dev->out, "ft_substitute", 1); | |
| 123 if (span->font->flags.ft_stretch) s_write_attribute_int(ctx, dev->out, "ft_stretch", 1); | |
| 124 if (span->font->flags.fake_bold) s_write_attribute_int(ctx, dev->out, "fake_bold", 1); | |
| 125 if (span->font->flags.fake_italic) s_write_attribute_int(ctx, dev->out, "fake_italic", 1); | |
| 126 if (span->font->flags.has_opentype) s_write_attribute_int(ctx, dev->out, "has_opentype", 1); | |
| 127 if (span->font->flags.invalid_bbox) s_write_attribute_int(ctx, dev->out, "invalid_bbox", 1); | |
| 128 s_write_attribute_matrix(ctx, dev->out, "trm", &span->trm); | |
| 129 s_write_attribute_int(ctx, dev->out, "len", span->len); | |
| 130 s_write_attribute_int(ctx, dev->out, "wmode", span->wmode); | |
| 131 s_write_attribute_int(ctx, dev->out, "bidi_level", span->bidi_level); | |
| 132 s_write_attribute_int(ctx, dev->out, "markup_dir", span->markup_dir); | |
| 133 s_write_attribute_int(ctx, dev->out, "language", span->language); | |
| 134 s_write_attribute_int(ctx, dev->out, "cap", span->cap); | |
| 135 s_xml_starttag_end(ctx, dev->out); | |
| 136 | |
| 137 for (i=0; i<span->len; ++i) | |
| 138 { | |
| 139 fz_text_item *item = &span->items[i]; | |
| 140 | |
| 141 s_xml_starttag_begin(ctx, dev->out, "char"); | |
| 142 s_write_attribute_float(ctx, dev->out, "x", item->x); | |
| 143 s_write_attribute_float(ctx, dev->out, "y", item->y); | |
| 144 s_write_attribute_int(ctx, dev->out, "gid", item->gid); | |
| 145 s_write_attribute_int(ctx, dev->out, "ucs", item->ucs); | |
| 146 | |
| 147 /* | |
| 148 * Firefox complains if we put special characters here; it's only for debugging | |
| 149 * so this isn't really a problem. | |
| 150 */ | |
| 151 s_write_attribute_char(ctx, dev->out, "debug_char", | |
| 152 (item->ucs >= 32 && item->ucs < 128 && item->ucs != '"') | |
| 153 ? item->ucs : ' ' | |
| 154 ); | |
| 155 s_write_attribute_float(ctx, dev->out, "adv", span->items[i].adv); | |
| 156 s_xml_starttag_empty_end(ctx, dev->out); | |
| 157 } | |
| 158 | |
| 159 s_xml_endtag(ctx, dev->out, "span"); | |
| 160 } | |
| 161 } | |
| 162 | |
| 163 static void | |
| 164 fz_xmltext_fill_text(fz_context *ctx, fz_device *dev_, const fz_text *text, fz_matrix ctm, | |
| 165 fz_colorspace *colorspace, const float *color, float alpha, fz_color_params color_params) | |
| 166 { | |
| 167 fz_xmltext_text(ctx, dev_, text, ctm, colorspace, color, alpha, color_params); | |
| 168 } | |
| 169 | |
| 170 static void | |
| 171 fz_xmltext_stroke_text(fz_context *ctx, fz_device *dev_, const fz_text *text, const fz_stroke_state *stroke, fz_matrix ctm, | |
| 172 fz_colorspace *colorspace, const float *color, float alpha, fz_color_params color_params) | |
| 173 { | |
| 174 fz_xmltext_text(ctx, dev_, text, ctm, colorspace, color, alpha, color_params); | |
| 175 } | |
| 176 | |
| 177 static void | |
| 178 fz_xmltext_clip_text(fz_context *ctx, fz_device *dev_, const fz_text *text, fz_matrix ctm, fz_rect scissor) | |
| 179 { | |
| 180 fz_xmltext_text(ctx, dev_, text, ctm, NULL, NULL, 0 /*alpha*/, fz_default_color_params); | |
| 181 } | |
| 182 | |
| 183 static void | |
| 184 fz_xmltext_clip_stroke_text(fz_context *ctx, fz_device *dev_, const fz_text *text, const fz_stroke_state *stroke, fz_matrix ctm, fz_rect scissor) | |
| 185 { | |
| 186 fz_xmltext_text(ctx, dev_, text, ctm, NULL, 0, 0, fz_default_color_params); | |
| 187 } | |
| 188 | |
| 189 static void | |
| 190 fz_xmltext_ignore_text(fz_context *ctx, fz_device *dev_, const fz_text *text, fz_matrix ctm) | |
| 191 { | |
| 192 } | |
| 193 | |
| 194 static void | |
| 195 fz_stext_close_device(fz_context *ctx, fz_device *dev_) | |
| 196 { | |
| 197 } | |
| 198 | |
| 199 | |
| 200 | |
| 201 static void fz_xmltext_fill_image(fz_context *ctx, fz_device *dev_, fz_image *img, fz_matrix ctm, float alpha, fz_color_params color_params) | |
| 202 { | |
| 203 fz_xmltext_device *dev = (fz_xmltext_device*) dev_; | |
| 204 fz_pixmap *pixmap = NULL; | |
| 205 fz_try(ctx) | |
| 206 { | |
| 207 const char *type = NULL; | |
| 208 fz_compressed_buffer *compressed; | |
| 209 s_xml_starttag_begin(ctx, dev->out, "image"); | |
| 210 /* First try to write compressed data. */ | |
| 211 compressed = fz_compressed_image_buffer(ctx, img); | |
| 212 if (compressed) | |
| 213 { | |
| 214 if (compressed->params.type == FZ_IMAGE_UNKNOWN) | |
| 215 { | |
| 216 /* unknown image type. */ | |
| 217 } | |
| 218 else if (compressed->params.type == FZ_IMAGE_RAW) | |
| 219 { | |
| 220 type = "raw"; | |
| 221 s_write_attribute_string(ctx, dev->out, "type", type); | |
| 222 } | |
| 223 else if (compressed->params.type == FZ_IMAGE_FAX) | |
| 224 { | |
| 225 type = "fax"; | |
| 226 s_write_attribute_string(ctx, dev->out, "type", type); | |
| 227 s_write_attribute_int(ctx, dev->out, "columns", compressed->params.u.fax.columns); | |
| 228 s_write_attribute_int(ctx, dev->out, "rows", compressed->params.u.fax.rows); | |
| 229 s_write_attribute_int(ctx, dev->out, "k", compressed->params.u.fax.k); | |
| 230 s_write_attribute_int(ctx, dev->out, "end_of_line", compressed->params.u.fax.end_of_line); | |
| 231 s_write_attribute_int(ctx, dev->out, "encoded_byte_align", compressed->params.u.fax.encoded_byte_align); | |
| 232 s_write_attribute_int(ctx, dev->out, "end_of_block", compressed->params.u.fax.end_of_block); | |
| 233 s_write_attribute_int(ctx, dev->out, "black_is_1", compressed->params.u.fax.black_is_1); | |
| 234 s_write_attribute_int(ctx, dev->out, "damaged_rows_before_error", compressed->params.u.fax.damaged_rows_before_error); | |
| 235 } | |
| 236 else if (compressed->params.type == FZ_IMAGE_FLATE) | |
| 237 { | |
| 238 type = "flate"; | |
| 239 s_write_attribute_string(ctx, dev->out, "type", type); | |
| 240 s_write_attribute_int(ctx, dev->out, "columns", compressed->params.u.flate.columns); | |
| 241 s_write_attribute_int(ctx, dev->out, "colors", compressed->params.u.flate.colors); | |
| 242 s_write_attribute_int(ctx, dev->out, "predictor", compressed->params.u.flate.predictor); | |
| 243 s_write_attribute_int(ctx, dev->out, "bpc", compressed->params.u.flate.bpc); | |
| 244 } | |
| 245 else if (compressed->params.type == FZ_IMAGE_BROTLI) | |
| 246 { | |
| 247 type = "brotli"; | |
| 248 s_write_attribute_string(ctx, dev->out, "type", type); | |
| 249 s_write_attribute_int(ctx, dev->out, "columns", compressed->params.u.brotli.columns); | |
| 250 s_write_attribute_int(ctx, dev->out, "colors", compressed->params.u.brotli.colors); | |
| 251 s_write_attribute_int(ctx, dev->out, "predictor", compressed->params.u.brotli.predictor); | |
| 252 s_write_attribute_int(ctx, dev->out, "bpc", compressed->params.u.brotli.bpc); | |
| 253 } | |
| 254 else if (compressed->params.type == FZ_IMAGE_LZW) | |
| 255 { | |
| 256 type = "lzw"; | |
| 257 s_write_attribute_string(ctx, dev->out, "type", type); | |
| 258 s_write_attribute_int(ctx, dev->out, "columns", compressed->params.u.lzw.columns); | |
| 259 s_write_attribute_int(ctx, dev->out, "colors", compressed->params.u.lzw.colors); | |
| 260 s_write_attribute_int(ctx, dev->out, "predictor", compressed->params.u.lzw.predictor); | |
| 261 s_write_attribute_int(ctx, dev->out, "bpc", compressed->params.u.lzw.bpc); | |
| 262 s_write_attribute_int(ctx, dev->out, "early_change", compressed->params.u.lzw.early_change); | |
| 263 } | |
| 264 else if (compressed->params.type == FZ_IMAGE_BMP) | |
| 265 { | |
| 266 type = "bmp"; | |
| 267 s_write_attribute_string(ctx, dev->out, "type", type); | |
| 268 } | |
| 269 else if (compressed->params.type == FZ_IMAGE_GIF) | |
| 270 { | |
| 271 type = "gif"; | |
| 272 s_write_attribute_string(ctx, dev->out, "type", type); | |
| 273 } | |
| 274 else if (compressed->params.type == FZ_IMAGE_JBIG2) | |
| 275 { | |
| 276 type = "jbig2"; | |
| 277 s_write_attribute_string(ctx, dev->out, "type", type); | |
| 278 /* do we need to write out *compressed->params.globals somehow? */ | |
| 279 } | |
| 280 else if (compressed->params.type == FZ_IMAGE_JPEG) | |
| 281 { | |
| 282 type = "jpeg"; | |
| 283 s_write_attribute_string(ctx, dev->out, "type", type); | |
| 284 s_write_attribute_int(ctx, dev->out, "color_transform", compressed->params.u.jpeg.color_transform); | |
| 285 if (compressed->params.u.jpeg.invert_cmyk) | |
| 286 s_write_attribute_int(ctx, dev->out, "invert_cmyk", 1); | |
| 287 } | |
| 288 else if (compressed->params.type == FZ_IMAGE_JPX) | |
| 289 { | |
| 290 type = "jpx"; | |
| 291 s_write_attribute_string(ctx, dev->out, "type", type); | |
| 292 s_write_attribute_int(ctx, dev->out, "smask_in_data", compressed->params.u.jpx.smask_in_data); | |
| 293 } | |
| 294 else if (compressed->params.type == FZ_IMAGE_JXR) | |
| 295 { | |
| 296 type = "jxr"; | |
| 297 s_write_attribute_string(ctx, dev->out, "type", type); | |
| 298 } | |
| 299 else if (compressed->params.type == FZ_IMAGE_PNG) | |
| 300 { | |
| 301 type = "png"; | |
| 302 s_write_attribute_string(ctx, dev->out, "type", type); | |
| 303 } | |
| 304 else if (compressed->params.type == FZ_IMAGE_PNM) | |
| 305 { | |
| 306 type = "pnm"; | |
| 307 s_write_attribute_string(ctx, dev->out, "type", type); | |
| 308 } | |
| 309 else if (compressed->params.type == FZ_IMAGE_TIFF) | |
| 310 { | |
| 311 type = "tiff"; | |
| 312 s_write_attribute_string(ctx, dev->out, "type", type); | |
| 313 } | |
| 314 else | |
| 315 { | |
| 316 /* Unrecognised. */ | |
| 317 } | |
| 318 | |
| 319 if (type) | |
| 320 { | |
| 321 /* Write out raw data. */ | |
| 322 unsigned char *data; | |
| 323 size_t datasize = fz_buffer_storage(ctx, compressed->buffer, &data); | |
| 324 size_t i; | |
| 325 s_write_attribute_size(ctx, dev->out, "datasize", datasize); | |
| 326 s_xml_starttag_end(ctx, dev->out); | |
| 327 for (i=0; i<datasize; ++i) | |
| 328 { | |
| 329 if (i % 32 == 0) fz_write_printf(ctx, dev->out, "\n "); | |
| 330 if (i % 4 == 0) fz_write_printf(ctx, dev->out, " "); | |
| 331 fz_write_printf(ctx, dev->out, "%02x", data[i]); | |
| 332 } | |
| 333 fz_write_printf(ctx, dev->out, "\n"); | |
| 334 } | |
| 335 } | |
| 336 | |
| 337 if (!type) | |
| 338 { | |
| 339 /* Compressed data not available, so write out raw pixel values. */ | |
| 340 int l2factor = 0; | |
| 341 int y; | |
| 342 s_write_attribute_string(ctx, dev->out, "type", "pixmap"); | |
| 343 s_xml_starttag_end(ctx, dev->out); | |
| 344 pixmap = img->get_pixmap(ctx, img, NULL /*subarea*/, img->w, img->h, &l2factor); | |
| 345 s_write_attribute_int(ctx, dev->out, "x", pixmap->x); | |
| 346 s_write_attribute_int(ctx, dev->out, "y", pixmap->y); | |
| 347 s_write_attribute_int(ctx, dev->out, "w", pixmap->w); | |
| 348 s_write_attribute_int(ctx, dev->out, "h", pixmap->h); | |
| 349 s_write_attribute_int(ctx, dev->out, "n", pixmap->n); | |
| 350 s_write_attribute_int(ctx, dev->out, "s", pixmap->s); | |
| 351 s_write_attribute_int(ctx, dev->out, "alpha", pixmap->alpha); | |
| 352 s_write_attribute_int(ctx, dev->out, "flags", pixmap->flags); | |
| 353 s_write_attribute_int(ctx, dev->out, "xres", pixmap->xres); | |
| 354 s_write_attribute_int(ctx, dev->out, "yres", pixmap->yres); | |
| 355 s_write_attribute_matrix(ctx, dev->out, "ctm", &ctm); | |
| 356 s_xml_starttag_end(ctx, dev->out); | |
| 357 for (y=0; y<pixmap->h; ++y) | |
| 358 { | |
| 359 int x; | |
| 360 s_xml_starttag_begin(ctx, dev->out, "line"); | |
| 361 s_write_attribute_int(ctx, dev->out, "y", y); | |
| 362 s_xml_starttag_end(ctx, dev->out); | |
| 363 for (x=0; x<pixmap->w; ++x) | |
| 364 { | |
| 365 int b; | |
| 366 fz_write_printf(ctx, dev->out, " "); | |
| 367 for (b=0; b<pixmap->n; ++b) | |
| 368 { | |
| 369 fz_write_printf(ctx, dev->out, "%02x", pixmap->samples[y*(size_t)pixmap->stride + x*(size_t)pixmap->n + b]); | |
| 370 } | |
| 371 } | |
| 372 s_xml_endtag(ctx, dev->out, "line"); | |
| 373 } | |
| 374 } | |
| 375 s_xml_endtag(ctx, dev->out, "image"); | |
| 376 } | |
| 377 fz_always(ctx) | |
| 378 { | |
| 379 fz_drop_pixmap(ctx, pixmap); | |
| 380 } | |
| 381 fz_catch(ctx) | |
| 382 { | |
| 383 fz_rethrow(ctx); | |
| 384 } | |
| 385 } | |
| 386 | |
| 387 fz_device *fz_new_xmltext_device(fz_context *ctx, fz_output *out) | |
| 388 { | |
| 389 fz_xmltext_device *dev = fz_new_derived_device(ctx, fz_xmltext_device); | |
| 390 | |
| 391 dev->super.close_device = fz_stext_close_device; | |
| 392 | |
| 393 dev->super.fill_text = fz_xmltext_fill_text; | |
| 394 dev->super.stroke_text = fz_xmltext_stroke_text; | |
| 395 dev->super.clip_text = fz_xmltext_clip_text; | |
| 396 dev->super.clip_stroke_text = fz_xmltext_clip_stroke_text; | |
| 397 dev->super.ignore_text = fz_xmltext_ignore_text; | |
| 398 dev->super.fill_image = fz_xmltext_fill_image; | |
| 399 | |
| 400 dev->out = out; | |
| 401 | |
| 402 return (fz_device*)dev; | |
| 403 } |
