comparison mupdf-source/source/fitz/xml-write.c @ 3:2c135c81b16c

MERGE: upstream PyMuPDF 1.26.4 with MuPDF 1.26.7
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:44:09 +0200
parents b50eed0cc0ef
children
comparison
equal deleted inserted replaced
0:6015a75abc2d 3:2c135c81b16c
1 // Copyright (C) 2024 Artifex Software, Inc.
2 //
3 // This file is part of MuPDF.
4 //
5 // MuPDF is free software: you can redistribute it and/or modify it under the
6 // terms of the GNU Affero General Public License as published by the Free
7 // Software Foundation, either version 3 of the License, or (at your option)
8 // any later version.
9 //
10 // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
11 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
13 // details.
14 //
15 // You should have received a copy of the GNU Affero General Public License
16 // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
17 //
18 // Alternative licensing terms are available from the licensor.
19 // For commercial licensing, see <https://www.artifex.com/> or contact
20 // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
21 // CA 94129, USA, for further information.
22
23 #include "xml-imp.h"
24
25 void fz_save_xml(fz_context *ctx, fz_xml *root, const char *path, int indented)
26 {
27 fz_output *out = fz_new_output_with_path(ctx, path, 0);
28
29 fz_try(ctx)
30 {
31 fz_write_xml(ctx, root, out, indented);
32 fz_close_output(ctx, out);
33 }
34 fz_always(ctx)
35 fz_drop_output(ctx, out);
36 fz_catch(ctx)
37 fz_rethrow(ctx);
38 }
39
40 static void
41 xml_escape_tag(fz_context *ctx, fz_output *out, const char *s)
42 {
43 while (1)
44 {
45 int c;
46 size_t len = fz_chartorune(&c, s);
47 size_t i;
48 if (c == 0)
49 break;
50 if (c == '<')
51 fz_write_string(ctx, out, "&lt;");
52 else if (c == '>')
53 fz_write_string(ctx, out, "&gt;");
54 else if (c == '&')
55 fz_write_string(ctx, out, "&amp;");
56 else
57 for (i = 0; i < len; i++)
58 {
59 char d = s[i];
60 if (d < 32 || d >= 127)
61 {
62 fz_write_string(ctx, out, "&#x");
63 fz_write_byte(ctx, out, "0123456789abcdef"[(d>>4)&15]);
64 fz_write_byte(ctx, out, "0123456789abcdef"[d&15]);
65 fz_write_byte(ctx, out, ';');
66 }
67 else
68 fz_write_byte(ctx, out, d);
69 }
70 s += len;
71 }
72 }
73
74 static void
75 xml_escape_string(fz_context *ctx, fz_output *out, const char *s)
76 {
77 while (1)
78 {
79 int c;
80 size_t len = fz_chartorune(&c, s);
81 size_t i;
82 if (c == 0)
83 break;
84 if (c == '<')
85 fz_write_string(ctx, out, "&lt;");
86 else if (c == '>')
87 fz_write_string(ctx, out, "&gt;");
88 else if (c == '&')
89 fz_write_string(ctx, out, "&amp;");
90 else if (c == '\"')
91 {
92 fz_write_string(ctx, out, "&quot;");
93 }
94 else
95 for (i = 0; i < len; i++)
96 {
97 char d = s[i];
98 if (d < 32 || d >= 127)
99 {
100 fz_write_string(ctx, out, "&#x");
101 fz_write_byte(ctx, out, "0123456789abcdef"[(d>>4)&15]);
102 fz_write_byte(ctx, out, "0123456789abcdef"[d&15]);
103 fz_write_byte(ctx, out, ';');
104 }
105 else
106 fz_write_byte(ctx, out, d);
107 }
108 s += len;
109 }
110 }
111
112 static void
113 indent(fz_context *ctx, fz_output *out, int depth)
114 {
115 fz_write_byte(ctx, out, '\n');
116 while (depth-- > 0)
117 {
118 fz_write_byte(ctx, out, ' ');
119 }
120 }
121
122 static int
123 do_write(fz_context *ctx, fz_xml *node, fz_output *out, int depth)
124 {
125 const char *tag;
126 fz_xml *down;
127 int last_was_text = 0;
128
129 for (; node != NULL; node = fz_xml_next(node))
130 {
131 struct attribute *att;
132
133 tag = fz_xml_tag(node);
134 if (!tag)
135 {
136 /* Text node. */
137 char *text = fz_xml_text(node);
138 if (text)
139 xml_escape_tag(ctx, out, text);
140 last_was_text = 1;
141 continue;
142 }
143
144 last_was_text = 0;
145 if (depth >= 0)
146 indent(ctx, out, depth);
147 fz_write_byte(ctx, out, '<');
148 xml_escape_tag(ctx, out, tag);
149
150 for (att = node->u.node.u.d.atts; att; att = att->next)
151 {
152 fz_write_byte(ctx, out, ' ');
153 xml_escape_tag(ctx, out, att->name);
154 fz_write_string(ctx, out, "=\"");
155 xml_escape_string(ctx, out, att->value);
156 fz_write_byte(ctx, out, '\"');
157 }
158
159 down = fz_xml_down(node);
160 if (down)
161 {
162 fz_write_byte(ctx, out, '>');
163 if (!do_write(ctx, down, out, depth >= 0 ? depth+1 : -1))
164 indent(ctx, out, depth);
165 fz_write_string(ctx, out, "</");
166 xml_escape_tag(ctx, out, tag);
167 fz_write_byte(ctx, out, '>');
168 }
169 else
170 {
171 fz_write_string(ctx, out, "/>");
172 }
173 }
174 return depth >= 0 ? last_was_text : 1;
175 }
176
177 void
178 fz_write_xml(fz_context *ctx, fz_xml *root, fz_output *out, int indented)
179 {
180 if (root == NULL)
181 return;
182
183 fz_write_string(ctx, out, "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>");
184
185 /* Skip over the document object, if we're handed that. */
186 if (root->up == NULL)
187 root = root->down;
188
189 if (!do_write(ctx, root, out, indented ? 0 : -1))
190 fz_write_byte(ctx, out, '\n');
191 }