Mercurial > hgrepos > Python2 > PyMuPDF
diff mupdf-source/source/fitz/xmltext-device.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mupdf-source/source/fitz/xmltext-device.c Mon Sep 15 11:43:07 2025 +0200 @@ -0,0 +1,403 @@ +// Copyright (C) 2004-2025 Artifex Software, Inc. +// +// This file is part of MuPDF. +// +// MuPDF is free software: you can redistribute it and/or modify it under the +// terms of the GNU Affero General Public License as published by the Free +// Software Foundation, either version 3 of the License, or (at your option) +// any later version. +// +// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more +// details. +// +// You should have received a copy of the GNU Affero General Public License +// along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html> +// +// Alternative licensing terms are available from the licensor. +// For commercial licensing, see <https://www.artifex.com/> or contact +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. + +#include "mupdf/fitz.h" + + +static int s_xml_starttag_begin(fz_context *ctx, fz_output *out, const char *id) +{ + fz_write_printf(ctx, out, "<%s", id); + return 0; +} + +static int s_xml_starttag_end(fz_context *ctx, fz_output *out) +{ + fz_write_printf(ctx, out, ">\n"); + return 0; +} + +static int s_xml_starttag_empty_end(fz_context *ctx, fz_output *out) +{ + fz_write_printf(ctx, out, "/>\n"); + return 0; +} + +static int s_xml_endtag(fz_context *ctx, fz_output *out, const char *id) +{ + fz_write_printf(ctx, out, "</%s>\n", id); + return 0; +} + +static int s_write_attribute_int(fz_context *ctx, fz_output *out, const char *id, int value) +{ + fz_write_printf(ctx, out, " %s=\"%i\"", id, value); + return 0; +} + +static int s_write_attribute_size(fz_context *ctx, fz_output *out, const char *id, size_t value) +{ + fz_write_printf(ctx, out, " %s=\"%zi\"", id, value); + return 0; +} + +static int s_write_attribute_float(fz_context *ctx, fz_output *out, const char *id, float value) +{ + fz_write_printf(ctx, out, " %s=\"%g\"", id, value); + return 0; +} + +static int s_write_attribute_string(fz_context *ctx, fz_output *out, const char *id, const char *value) +{ + fz_write_printf(ctx, out, " %s=\"%s\"", id, value); + return 0; +} + +static int s_write_attribute_char(fz_context *ctx, fz_output *out, const char *id, char value) +{ + if (value == '"') fz_write_printf(ctx, out, " %s=\"\\%c\"", id, value); + else fz_write_printf(ctx, out, " %s=\"%c\"", id, value); + return 0; +} + +static int s_write_attribute_matrix(fz_context *ctx, fz_output *out, const char *id, const fz_matrix *matrix) +{ + fz_write_printf(ctx, out, + " %s=\"%g %g %g %g %g %g\"", + id, + matrix->a, + matrix->b, + matrix->c, + matrix->d, + matrix->e, + matrix->f + ); + return 0; +} + + + + +typedef struct +{ + fz_device super; + fz_output *out; +} fz_xmltext_device; + +static void +fz_xmltext_text(fz_context *ctx, fz_device *dev_, const fz_text *text, fz_matrix ctm, + fz_colorspace *colorspace, const float *color, float alpha, fz_color_params color_params) +{ + fz_xmltext_device *dev = (fz_xmltext_device*) dev_; + + fz_text_span *span; + for (span = text->head; span; span = span->next) + { + int i; + + s_xml_starttag_begin(ctx, dev->out, "span"); + s_write_attribute_matrix(ctx, dev->out, "ctm", &ctm); + s_write_attribute_string(ctx, dev->out, "font_name", span->font->name); + if (span->font->flags.is_mono) s_write_attribute_int(ctx, dev->out, "is_mono", 1); + if (span->font->flags.is_serif) s_write_attribute_int(ctx, dev->out, "is_serif", 1); + if (span->font->flags.is_italic) s_write_attribute_int(ctx, dev->out, "is_italic", 1); + if (span->font->flags.ft_substitute) s_write_attribute_int(ctx, dev->out, "ft_substitute", 1); + if (span->font->flags.ft_stretch) s_write_attribute_int(ctx, dev->out, "ft_stretch", 1); + if (span->font->flags.fake_bold) s_write_attribute_int(ctx, dev->out, "fake_bold", 1); + if (span->font->flags.fake_italic) s_write_attribute_int(ctx, dev->out, "fake_italic", 1); + if (span->font->flags.has_opentype) s_write_attribute_int(ctx, dev->out, "has_opentype", 1); + if (span->font->flags.invalid_bbox) s_write_attribute_int(ctx, dev->out, "invalid_bbox", 1); + s_write_attribute_matrix(ctx, dev->out, "trm", &span->trm); + s_write_attribute_int(ctx, dev->out, "len", span->len); + s_write_attribute_int(ctx, dev->out, "wmode", span->wmode); + s_write_attribute_int(ctx, dev->out, "bidi_level", span->bidi_level); + s_write_attribute_int(ctx, dev->out, "markup_dir", span->markup_dir); + s_write_attribute_int(ctx, dev->out, "language", span->language); + s_write_attribute_int(ctx, dev->out, "cap", span->cap); + s_xml_starttag_end(ctx, dev->out); + + for (i=0; i<span->len; ++i) + { + fz_text_item *item = &span->items[i]; + + s_xml_starttag_begin(ctx, dev->out, "char"); + s_write_attribute_float(ctx, dev->out, "x", item->x); + s_write_attribute_float(ctx, dev->out, "y", item->y); + s_write_attribute_int(ctx, dev->out, "gid", item->gid); + s_write_attribute_int(ctx, dev->out, "ucs", item->ucs); + + /* + * Firefox complains if we put special characters here; it's only for debugging + * so this isn't really a problem. + */ + s_write_attribute_char(ctx, dev->out, "debug_char", + (item->ucs >= 32 && item->ucs < 128 && item->ucs != '"') + ? item->ucs : ' ' + ); + s_write_attribute_float(ctx, dev->out, "adv", span->items[i].adv); + s_xml_starttag_empty_end(ctx, dev->out); + } + + s_xml_endtag(ctx, dev->out, "span"); + } +} + +static void +fz_xmltext_fill_text(fz_context *ctx, fz_device *dev_, const fz_text *text, fz_matrix ctm, + fz_colorspace *colorspace, const float *color, float alpha, fz_color_params color_params) +{ + fz_xmltext_text(ctx, dev_, text, ctm, colorspace, color, alpha, color_params); +} + +static void +fz_xmltext_stroke_text(fz_context *ctx, fz_device *dev_, const fz_text *text, const fz_stroke_state *stroke, fz_matrix ctm, + fz_colorspace *colorspace, const float *color, float alpha, fz_color_params color_params) +{ + fz_xmltext_text(ctx, dev_, text, ctm, colorspace, color, alpha, color_params); +} + +static void +fz_xmltext_clip_text(fz_context *ctx, fz_device *dev_, const fz_text *text, fz_matrix ctm, fz_rect scissor) +{ + fz_xmltext_text(ctx, dev_, text, ctm, NULL, NULL, 0 /*alpha*/, fz_default_color_params); +} + +static void +fz_xmltext_clip_stroke_text(fz_context *ctx, fz_device *dev_, const fz_text *text, const fz_stroke_state *stroke, fz_matrix ctm, fz_rect scissor) +{ + fz_xmltext_text(ctx, dev_, text, ctm, NULL, 0, 0, fz_default_color_params); +} + +static void +fz_xmltext_ignore_text(fz_context *ctx, fz_device *dev_, const fz_text *text, fz_matrix ctm) +{ +} + +static void +fz_stext_close_device(fz_context *ctx, fz_device *dev_) +{ +} + + + +static void fz_xmltext_fill_image(fz_context *ctx, fz_device *dev_, fz_image *img, fz_matrix ctm, float alpha, fz_color_params color_params) +{ + fz_xmltext_device *dev = (fz_xmltext_device*) dev_; + fz_pixmap *pixmap = NULL; + fz_try(ctx) + { + const char *type = NULL; + fz_compressed_buffer *compressed; + s_xml_starttag_begin(ctx, dev->out, "image"); + /* First try to write compressed data. */ + compressed = fz_compressed_image_buffer(ctx, img); + if (compressed) + { + if (compressed->params.type == FZ_IMAGE_UNKNOWN) + { + /* unknown image type. */ + } + else if (compressed->params.type == FZ_IMAGE_RAW) + { + type = "raw"; + s_write_attribute_string(ctx, dev->out, "type", type); + } + else if (compressed->params.type == FZ_IMAGE_FAX) + { + type = "fax"; + s_write_attribute_string(ctx, dev->out, "type", type); + s_write_attribute_int(ctx, dev->out, "columns", compressed->params.u.fax.columns); + s_write_attribute_int(ctx, dev->out, "rows", compressed->params.u.fax.rows); + s_write_attribute_int(ctx, dev->out, "k", compressed->params.u.fax.k); + s_write_attribute_int(ctx, dev->out, "end_of_line", compressed->params.u.fax.end_of_line); + s_write_attribute_int(ctx, dev->out, "encoded_byte_align", compressed->params.u.fax.encoded_byte_align); + s_write_attribute_int(ctx, dev->out, "end_of_block", compressed->params.u.fax.end_of_block); + s_write_attribute_int(ctx, dev->out, "black_is_1", compressed->params.u.fax.black_is_1); + s_write_attribute_int(ctx, dev->out, "damaged_rows_before_error", compressed->params.u.fax.damaged_rows_before_error); + } + else if (compressed->params.type == FZ_IMAGE_FLATE) + { + type = "flate"; + s_write_attribute_string(ctx, dev->out, "type", type); + s_write_attribute_int(ctx, dev->out, "columns", compressed->params.u.flate.columns); + s_write_attribute_int(ctx, dev->out, "colors", compressed->params.u.flate.colors); + s_write_attribute_int(ctx, dev->out, "predictor", compressed->params.u.flate.predictor); + s_write_attribute_int(ctx, dev->out, "bpc", compressed->params.u.flate.bpc); + } + else if (compressed->params.type == FZ_IMAGE_BROTLI) + { + type = "brotli"; + s_write_attribute_string(ctx, dev->out, "type", type); + s_write_attribute_int(ctx, dev->out, "columns", compressed->params.u.brotli.columns); + s_write_attribute_int(ctx, dev->out, "colors", compressed->params.u.brotli.colors); + s_write_attribute_int(ctx, dev->out, "predictor", compressed->params.u.brotli.predictor); + s_write_attribute_int(ctx, dev->out, "bpc", compressed->params.u.brotli.bpc); + } + else if (compressed->params.type == FZ_IMAGE_LZW) + { + type = "lzw"; + s_write_attribute_string(ctx, dev->out, "type", type); + s_write_attribute_int(ctx, dev->out, "columns", compressed->params.u.lzw.columns); + s_write_attribute_int(ctx, dev->out, "colors", compressed->params.u.lzw.colors); + s_write_attribute_int(ctx, dev->out, "predictor", compressed->params.u.lzw.predictor); + s_write_attribute_int(ctx, dev->out, "bpc", compressed->params.u.lzw.bpc); + s_write_attribute_int(ctx, dev->out, "early_change", compressed->params.u.lzw.early_change); + } + else if (compressed->params.type == FZ_IMAGE_BMP) + { + type = "bmp"; + s_write_attribute_string(ctx, dev->out, "type", type); + } + else if (compressed->params.type == FZ_IMAGE_GIF) + { + type = "gif"; + s_write_attribute_string(ctx, dev->out, "type", type); + } + else if (compressed->params.type == FZ_IMAGE_JBIG2) + { + type = "jbig2"; + s_write_attribute_string(ctx, dev->out, "type", type); + /* do we need to write out *compressed->params.globals somehow? */ + } + else if (compressed->params.type == FZ_IMAGE_JPEG) + { + type = "jpeg"; + s_write_attribute_string(ctx, dev->out, "type", type); + s_write_attribute_int(ctx, dev->out, "color_transform", compressed->params.u.jpeg.color_transform); + if (compressed->params.u.jpeg.invert_cmyk) + s_write_attribute_int(ctx, dev->out, "invert_cmyk", 1); + } + else if (compressed->params.type == FZ_IMAGE_JPX) + { + type = "jpx"; + s_write_attribute_string(ctx, dev->out, "type", type); + s_write_attribute_int(ctx, dev->out, "smask_in_data", compressed->params.u.jpx.smask_in_data); + } + else if (compressed->params.type == FZ_IMAGE_JXR) + { + type = "jxr"; + s_write_attribute_string(ctx, dev->out, "type", type); + } + else if (compressed->params.type == FZ_IMAGE_PNG) + { + type = "png"; + s_write_attribute_string(ctx, dev->out, "type", type); + } + else if (compressed->params.type == FZ_IMAGE_PNM) + { + type = "pnm"; + s_write_attribute_string(ctx, dev->out, "type", type); + } + else if (compressed->params.type == FZ_IMAGE_TIFF) + { + type = "tiff"; + s_write_attribute_string(ctx, dev->out, "type", type); + } + else + { + /* Unrecognised. */ + } + + if (type) + { + /* Write out raw data. */ + unsigned char *data; + size_t datasize = fz_buffer_storage(ctx, compressed->buffer, &data); + size_t i; + s_write_attribute_size(ctx, dev->out, "datasize", datasize); + s_xml_starttag_end(ctx, dev->out); + for (i=0; i<datasize; ++i) + { + if (i % 32 == 0) fz_write_printf(ctx, dev->out, "\n "); + if (i % 4 == 0) fz_write_printf(ctx, dev->out, " "); + fz_write_printf(ctx, dev->out, "%02x", data[i]); + } + fz_write_printf(ctx, dev->out, "\n"); + } + } + + if (!type) + { + /* Compressed data not available, so write out raw pixel values. */ + int l2factor = 0; + int y; + s_write_attribute_string(ctx, dev->out, "type", "pixmap"); + s_xml_starttag_end(ctx, dev->out); + pixmap = img->get_pixmap(ctx, img, NULL /*subarea*/, img->w, img->h, &l2factor); + s_write_attribute_int(ctx, dev->out, "x", pixmap->x); + s_write_attribute_int(ctx, dev->out, "y", pixmap->y); + s_write_attribute_int(ctx, dev->out, "w", pixmap->w); + s_write_attribute_int(ctx, dev->out, "h", pixmap->h); + s_write_attribute_int(ctx, dev->out, "n", pixmap->n); + s_write_attribute_int(ctx, dev->out, "s", pixmap->s); + s_write_attribute_int(ctx, dev->out, "alpha", pixmap->alpha); + s_write_attribute_int(ctx, dev->out, "flags", pixmap->flags); + s_write_attribute_int(ctx, dev->out, "xres", pixmap->xres); + s_write_attribute_int(ctx, dev->out, "yres", pixmap->yres); + s_write_attribute_matrix(ctx, dev->out, "ctm", &ctm); + s_xml_starttag_end(ctx, dev->out); + for (y=0; y<pixmap->h; ++y) + { + int x; + s_xml_starttag_begin(ctx, dev->out, "line"); + s_write_attribute_int(ctx, dev->out, "y", y); + s_xml_starttag_end(ctx, dev->out); + for (x=0; x<pixmap->w; ++x) + { + int b; + fz_write_printf(ctx, dev->out, " "); + for (b=0; b<pixmap->n; ++b) + { + fz_write_printf(ctx, dev->out, "%02x", pixmap->samples[y*(size_t)pixmap->stride + x*(size_t)pixmap->n + b]); + } + } + s_xml_endtag(ctx, dev->out, "line"); + } + } + s_xml_endtag(ctx, dev->out, "image"); + } + fz_always(ctx) + { + fz_drop_pixmap(ctx, pixmap); + } + fz_catch(ctx) + { + fz_rethrow(ctx); + } +} + +fz_device *fz_new_xmltext_device(fz_context *ctx, fz_output *out) +{ + fz_xmltext_device *dev = fz_new_derived_device(ctx, fz_xmltext_device); + + dev->super.close_device = fz_stext_close_device; + + dev->super.fill_text = fz_xmltext_fill_text; + dev->super.stroke_text = fz_xmltext_stroke_text; + dev->super.clip_text = fz_xmltext_clip_text; + dev->super.clip_stroke_text = fz_xmltext_clip_stroke_text; + dev->super.ignore_text = fz_xmltext_ignore_text; + dev->super.fill_image = fz_xmltext_fill_image; + + dev->out = out; + + return (fz_device*)dev; +}
