diff mupdf-source/source/fitz/xml-write.c @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mupdf-source/source/fitz/xml-write.c	Mon Sep 15 11:43:07 2025 +0200
@@ -0,0 +1,191 @@
+// Copyright (C) 2024 Artifex Software, Inc.
+//
+// This file is part of MuPDF.
+//
+// MuPDF is free software: you can redistribute it and/or modify it under the
+// terms of the GNU Affero General Public License as published by the Free
+// Software Foundation, either version 3 of the License, or (at your option)
+// any later version.
+//
+// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
+// details.
+//
+// You should have received a copy of the GNU Affero General Public License
+// along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
+//
+// Alternative licensing terms are available from the licensor.
+// For commercial licensing, see <https://www.artifex.com/> or contact
+// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
+// CA 94129, USA, for further information.
+
+#include "xml-imp.h"
+
+void fz_save_xml(fz_context *ctx, fz_xml *root, const char *path, int indented)
+{
+	fz_output *out = fz_new_output_with_path(ctx, path, 0);
+
+	fz_try(ctx)
+	{
+		fz_write_xml(ctx, root, out, indented);
+		fz_close_output(ctx, out);
+	}
+	fz_always(ctx)
+		fz_drop_output(ctx, out);
+	fz_catch(ctx)
+		fz_rethrow(ctx);
+}
+
+static void
+xml_escape_tag(fz_context *ctx, fz_output *out, const char *s)
+{
+	while (1)
+	{
+		int c;
+		size_t len = fz_chartorune(&c, s);
+		size_t i;
+		if (c == 0)
+			break;
+		if (c == '<')
+			fz_write_string(ctx, out, "&lt;");
+		else if (c == '>')
+			fz_write_string(ctx, out, "&gt;");
+		else if (c == '&')
+			fz_write_string(ctx, out, "&amp;");
+		else
+			for (i = 0; i < len; i++)
+			{
+				char d = s[i];
+				if (d < 32 || d >= 127)
+				{
+					fz_write_string(ctx, out, "&#x");
+					fz_write_byte(ctx, out, "0123456789abcdef"[(d>>4)&15]);
+					fz_write_byte(ctx, out, "0123456789abcdef"[d&15]);
+					fz_write_byte(ctx, out, ';');
+				}
+				else
+					fz_write_byte(ctx, out, d);
+			}
+		s += len;
+	}
+}
+
+static void
+xml_escape_string(fz_context *ctx, fz_output *out, const char *s)
+{
+	while (1)
+	{
+		int c;
+		size_t len = fz_chartorune(&c, s);
+		size_t i;
+		if (c == 0)
+			break;
+		if (c == '<')
+			fz_write_string(ctx, out, "&lt;");
+		else if (c == '>')
+			fz_write_string(ctx, out, "&gt;");
+		else if (c == '&')
+			fz_write_string(ctx, out, "&amp;");
+		else if (c == '\"')
+		{
+			fz_write_string(ctx, out, "&quot;");
+		}
+		else
+			for (i = 0; i < len; i++)
+			{
+				char d = s[i];
+				if (d < 32 || d >= 127)
+				{
+					fz_write_string(ctx, out, "&#x");
+					fz_write_byte(ctx, out, "0123456789abcdef"[(d>>4)&15]);
+					fz_write_byte(ctx, out, "0123456789abcdef"[d&15]);
+					fz_write_byte(ctx, out, ';');
+				}
+				else
+					fz_write_byte(ctx, out, d);
+			}
+		s += len;
+	}
+}
+
+static void
+indent(fz_context *ctx, fz_output *out, int depth)
+{
+	fz_write_byte(ctx, out, '\n');
+	while (depth-- > 0)
+	{
+		fz_write_byte(ctx, out, ' ');
+	}
+}
+
+static int
+do_write(fz_context *ctx, fz_xml *node, fz_output *out, int depth)
+{
+	const char *tag;
+	fz_xml *down;
+	int last_was_text = 0;
+
+	for (; node != NULL; node = fz_xml_next(node))
+	{
+		struct attribute *att;
+
+		tag = fz_xml_tag(node);
+		if (!tag)
+		{
+			/* Text node. */
+			char *text = fz_xml_text(node);
+			if (text)
+				xml_escape_tag(ctx, out, text);
+			last_was_text = 1;
+			continue;
+		}
+
+		last_was_text = 0;
+		if (depth >= 0)
+			indent(ctx, out, depth);
+		fz_write_byte(ctx, out, '<');
+		xml_escape_tag(ctx, out, tag);
+
+		for (att = node->u.node.u.d.atts; att; att = att->next)
+		{
+			fz_write_byte(ctx, out, ' ');
+			xml_escape_tag(ctx, out, att->name);
+			fz_write_string(ctx, out, "=\"");
+			xml_escape_string(ctx, out, att->value);
+			fz_write_byte(ctx, out, '\"');
+		}
+
+		down = fz_xml_down(node);
+		if (down)
+		{
+			fz_write_byte(ctx, out, '>');
+			if (!do_write(ctx, down, out, depth >= 0 ? depth+1 : -1))
+				indent(ctx, out, depth);
+			fz_write_string(ctx, out, "</");
+			xml_escape_tag(ctx, out, tag);
+			fz_write_byte(ctx, out, '>');
+		}
+		else
+		{
+			fz_write_string(ctx, out, "/>");
+		}
+	}
+	return depth >= 0 ? last_was_text : 1;
+}
+
+void
+fz_write_xml(fz_context *ctx, fz_xml *root, fz_output *out, int indented)
+{
+	if (root == NULL)
+		return;
+
+	fz_write_string(ctx, out, "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>");
+
+	/* Skip over the document object, if we're handed that. */
+	if (root->up == NULL)
+		root = root->down;
+
+	if (!do_write(ctx, root, out, indented ? 0 : -1))
+		fz_write_byte(ctx, out, '\n');
+}