comparison mupdf-source/include/mupdf/fitz/xml.h @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 // Copyright (C) 2004-2024 Artifex Software, Inc.
2 //
3 // This file is part of MuPDF.
4 //
5 // MuPDF is free software: you can redistribute it and/or modify it under the
6 // terms of the GNU Affero General Public License as published by the Free
7 // Software Foundation, either version 3 of the License, or (at your option)
8 // any later version.
9 //
10 // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
11 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
13 // details.
14 //
15 // You should have received a copy of the GNU Affero General Public License
16 // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
17 //
18 // Alternative licensing terms are available from the licensor.
19 // For commercial licensing, see <https://www.artifex.com/> or contact
20 // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
21 // CA 94129, USA, for further information.
22
23 #ifndef MUPDF_FITZ_XML_H
24 #define MUPDF_FITZ_XML_H
25
26 #include "mupdf/fitz/system.h"
27 #include "mupdf/fitz/context.h"
28 #include "mupdf/fitz/buffer.h"
29 #include "mupdf/fitz/pool.h"
30 #include "mupdf/fitz/archive.h"
31
32 /**
33 XML document model
34 */
35
36 typedef struct fz_xml fz_xml;
37
38 /* For backwards compatibility */
39 typedef fz_xml fz_xml_doc;
40
41 /**
42 Parse the contents of buffer into a tree of xml nodes.
43
44 preserve_white: whether to keep or delete all-whitespace nodes.
45 */
46 fz_xml *fz_parse_xml(fz_context *ctx, fz_buffer *buf, int preserve_white);
47
48 /**
49 Parse the contents of buffer into a tree of xml nodes.
50
51 preserve_white: whether to keep or delete all-whitespace nodes.
52 */
53 fz_xml *fz_parse_xml_stream(fz_context *ctx, fz_stream *stream, int preserve_white);
54
55 /**
56 Parse the contents of an archive entry into a tree of xml nodes.
57
58 preserve_white: whether to keep or delete all-whitespace nodes.
59 */
60 fz_xml *fz_parse_xml_archive_entry(fz_context *ctx, fz_archive *dir, const char *filename, int preserve_white);
61
62 /**
63 Try and parse the contents of an archive entry into a tree of xml nodes.
64
65 preserve_white: whether to keep or delete all-whitespace nodes.
66
67 Will return NULL if the archive entry can't be found. Otherwise behaves
68 the same as fz_parse_xml_archive_entry. May throw exceptions.
69 */
70 fz_xml *fz_try_parse_xml_archive_entry(fz_context *ctx, fz_archive *dir, const char *filename, int preserve_white);
71
72 /**
73 Parse the contents of a buffer into a tree of XML nodes,
74 using the HTML5 parsing algorithm.
75 */
76 fz_xml *fz_parse_xml_from_html5(fz_context *ctx, fz_buffer *buf);
77
78 /**
79 Add a reference to the XML.
80 */
81 fz_xml *fz_keep_xml(fz_context *ctx, fz_xml *xml);
82
83 /**
84 Drop a reference to the XML. When the last reference is
85 dropped, the node and all its children and siblings will
86 be freed.
87 */
88 void fz_drop_xml(fz_context *ctx, fz_xml *xml);
89
90 /**
91 Detach a node from the tree, unlinking it from its parent,
92 and setting the document root to the node.
93 */
94 void fz_detach_xml(fz_context *ctx, fz_xml *node);
95
96 /**
97 Return the topmost XML node of a document.
98 */
99 fz_xml *fz_xml_root(fz_xml_doc *xml);
100
101 /**
102 Return previous sibling of XML node.
103 */
104 fz_xml *fz_xml_prev(fz_xml *item);
105
106 /**
107 Return next sibling of XML node.
108 */
109 fz_xml *fz_xml_next(fz_xml *item);
110
111 /**
112 Return parent of XML node.
113 */
114 fz_xml *fz_xml_up(fz_xml *item);
115
116 /**
117 Return first child of XML node.
118 */
119 fz_xml *fz_xml_down(fz_xml *item);
120
121 /**
122 Return true if the tag name matches.
123 */
124 int fz_xml_is_tag(fz_xml *item, const char *name);
125
126 /**
127 Return tag of XML node. Return NULL for text nodes.
128 */
129 char *fz_xml_tag(fz_xml *item);
130
131 /**
132 Return the value of an attribute of an XML node.
133 NULL if the attribute doesn't exist.
134 */
135 char *fz_xml_att(fz_xml *item, const char *att);
136
137 /**
138 Return the value of an attribute of an XML node.
139 If the first attribute doesn't exist, try the second.
140 NULL if neither attribute exists.
141 */
142 char *fz_xml_att_alt(fz_xml *item, const char *one, const char *two);
143
144 /**
145 Check for a matching attribute on an XML node.
146
147 If the node has the requested attribute (name), and the value
148 matches (match) then return 1. Otherwise, 0.
149 */
150 int fz_xml_att_eq(fz_xml *item, const char *name, const char *match);
151
152 /**
153 Add an attribute to an XML node.
154 */
155 void fz_xml_add_att(fz_context *ctx, fz_pool *pool, fz_xml *node, const char *key, const char *val);
156
157 /**
158 Return the text content of an XML node.
159 Return NULL if the node is a tag.
160 */
161 char *fz_xml_text(fz_xml *item);
162
163 /**
164 Pretty-print an XML tree to given output.
165 */
166 void fz_output_xml(fz_context *ctx, fz_output *out, fz_xml *item, int level);
167
168 /**
169 Pretty-print an XML tree to stdout. (Deprecated, use
170 fz_output_xml in preference).
171 */
172 void fz_debug_xml(fz_xml *item, int level);
173
174 /**
175 Search the siblings of XML nodes starting with item looking for
176 the first with the given tag.
177
178 Return NULL if none found.
179 */
180 fz_xml *fz_xml_find(fz_xml *item, const char *tag);
181
182 /**
183 Search the siblings of XML nodes starting with the first sibling
184 of item looking for the first with the given tag.
185
186 Return NULL if none found.
187 */
188 fz_xml *fz_xml_find_next(fz_xml *item, const char *tag);
189
190 /**
191 Search the siblings of XML nodes starting with the first child
192 of item looking for the first with the given tag.
193
194 Return NULL if none found.
195 */
196 fz_xml *fz_xml_find_down(fz_xml *item, const char *tag);
197
198 /**
199 Search the siblings of XML nodes starting with item looking for
200 the first with the given tag (or any tag if tag is NULL), and
201 with a matching attribute.
202
203 Return NULL if none found.
204 */
205 fz_xml *fz_xml_find_match(fz_xml *item, const char *tag, const char *att, const char *match);
206
207 /**
208 Search the siblings of XML nodes starting with the first sibling
209 of item looking for the first with the given tag (or any tag if tag
210 is NULL), and with a matching attribute.
211
212 Return NULL if none found.
213 */
214 fz_xml *fz_xml_find_next_match(fz_xml *item, const char *tag, const char *att, const char *match);
215
216 /**
217 Search the siblings of XML nodes starting with the first child
218 of item looking for the first with the given tag (or any tag if
219 tag is NULL), and with a matching attribute.
220
221 Return NULL if none found.
222 */
223 fz_xml *fz_xml_find_down_match(fz_xml *item, const char *tag, const char *att, const char *match);
224
225 /**
226 Perform a depth first search from item, returning the first
227 child that matches the given tag (or any tag if tag is NULL),
228 with the given attribute (if att is non NULL), that matches
229 match (if match is non NULL).
230 */
231 fz_xml *fz_xml_find_dfs(fz_xml *item, const char *tag, const char *att, const char *match);
232
233 /**
234 Perform a depth first search from item, returning the first
235 child that matches the given tag (or any tag if tag is NULL),
236 with the given attribute (if att is non NULL), that matches
237 match (if match is non NULL). The search stops if it ever
238 reaches the top of the tree, or the declared 'top' item.
239 */
240 fz_xml *fz_xml_find_dfs_top(fz_xml *item, const char *tag, const char *att, const char *match, fz_xml *top);
241
242 /**
243 Perform a depth first search onwards from item, returning the first
244 child that matches the given tag (or any tag if tag is NULL),
245 with the given attribute (if att is non NULL), that matches
246 match (if match is non NULL).
247 */
248 fz_xml *fz_xml_find_next_dfs(fz_xml *item, const char *tag, const char *att, const char *match);
249
250 /**
251 Perform a depth first search onwards from item, returning the first
252 child that matches the given tag (or any tag if tag is NULL),
253 with the given attribute (if att is non NULL), that matches
254 match (if match is non NULL). The search stops if it ever reaches
255 the top of the tree, or the declared 'top' item.
256 */
257 fz_xml *fz_xml_find_next_dfs_top(fz_xml *item, const char *tag, const char *att, const char *match, fz_xml *top);
258
259 /**
260 DOM-like functions for html in xml.
261 */
262
263 /**
264 Return a borrowed reference for the 'body' element of
265 the given DOM.
266 */
267 fz_xml *fz_dom_body(fz_context *ctx, fz_xml *dom);
268
269 /**
270 Return a borrowed reference for the document (the top
271 level element) of the DOM.
272 */
273 fz_xml *fz_dom_document_element(fz_context *ctx, fz_xml *dom);
274
275 /**
276 Create an element of a given tag type for the given DOM.
277
278 The element is not linked into the DOM yet.
279 */
280 fz_xml *fz_dom_create_element(fz_context *ctx, fz_xml *dom, const char *tag);
281
282 /**
283 Create a text node for the given DOM.
284
285 The element is not linked into the DOM yet.
286 */
287 fz_xml *fz_dom_create_text_node(fz_context *ctx, fz_xml *dom, const char *text);
288
289 /**
290 Find the first element matching the requirements in a depth first traversal from elt.
291
292 The tagname must match tag, unless tag is NULL, when all tag names are considered to match.
293
294 If att is NULL, then all tags match.
295 Otherwise:
296 If match is NULL, then only nodes that have an att attribute match.
297 If match is non-NULL, then only nodes that have an att attribute that matches match match.
298
299 Returns NULL (if no match found), or a borrowed reference to the first matching element.
300 */
301 fz_xml *fz_dom_find(fz_context *ctx, fz_xml *elt, const char *tag, const char *att, const char *match);
302
303 /**
304 Find the next element matching the requirements.
305 */
306 fz_xml *fz_dom_find_next(fz_context *ctx, fz_xml *elt, const char *tag, const char *att, const char *match);
307
308 /**
309 Insert an element as the last child of a parent, unlinking the
310 child from its current position if required.
311 */
312 void fz_dom_append_child(fz_context *ctx, fz_xml *parent, fz_xml *child);
313
314 /**
315 Insert an element (new_elt), before another element (node),
316 unlinking the new_elt from its current position if required.
317 */
318 void fz_dom_insert_before(fz_context *ctx, fz_xml *node, fz_xml *new_elt);
319
320 /**
321 Insert an element (new_elt), after another element (node),
322 unlinking the new_elt from its current position if required.
323 */
324 void fz_dom_insert_after(fz_context *ctx, fz_xml *node, fz_xml *new_elt);
325
326 /**
327 Remove an element from the DOM. The element can be added back elsewhere
328 if required.
329
330 No reference counting changes for the element.
331 */
332 void fz_dom_remove(fz_context *ctx, fz_xml *elt);
333
334 /**
335 Clone an element (and its children).
336
337 A borrowed reference to the clone is returned. The clone is not
338 yet linked into the DOM.
339 */
340 fz_xml *fz_dom_clone(fz_context *ctx, fz_xml *elt);
341
342 /**
343 Return a borrowed reference to the first child of a node,
344 or NULL if there isn't one.
345 */
346 fz_xml *fz_dom_first_child(fz_context *ctx, fz_xml *elt);
347
348 /**
349 Return a borrowed reference to the parent of a node,
350 or NULL if there isn't one.
351 */
352 fz_xml *fz_dom_parent(fz_context *ctx, fz_xml *elt);
353
354 /**
355 Return a borrowed reference to the next sibling of a node,
356 or NULL if there isn't one.
357 */
358 fz_xml *fz_dom_next(fz_context *ctx, fz_xml *elt);
359
360 /**
361 Return a borrowed reference to the previous sibling of a node,
362 or NULL if there isn't one.
363 */
364 fz_xml *fz_dom_previous(fz_context *ctx, fz_xml *elt);
365
366 /**
367 Add an attribute to an element.
368
369 Ownership of att and value remain with the caller.
370 */
371 void fz_dom_add_attribute(fz_context *ctx, fz_xml *elt, const char *att, const char *value);
372
373 /**
374 Remove an attribute from an element.
375 */
376 void fz_dom_remove_attribute(fz_context *ctx, fz_xml *elt, const char *att);
377
378 /**
379 Retrieve the value of a given attribute from a given element.
380
381 Returns a borrowed pointer to the value or NULL if not found.
382 */
383 const char *fz_dom_attribute(fz_context *ctx, fz_xml *elt, const char *att);
384
385 /**
386 Enumerate through the attributes of an element.
387
388 Call with i=0,1,2,3... to enumerate attributes.
389
390 On return *att and the return value will be NULL if there are not
391 that many attributes to read. Otherwise, *att will be filled in
392 with a borrowed pointer to the attribute name, and the return
393 value will be a borrowed pointer to the value.
394 */
395 const char *fz_dom_get_attribute(fz_context *ctx, fz_xml *elt, int i, const char **att);
396
397 /**
398 Make new xml dom root element.
399 */
400 fz_xml *fz_new_dom(fz_context *ctx, const char *tag);
401
402 /**
403 Create a new dom node.
404
405 This will NOT be linked in yet.
406 */
407 fz_xml *fz_new_dom_node(fz_context *ctx, fz_xml *dom, const char *tag);
408
409 /**
410 Create a new dom text node.
411
412 This will NOT be linked in yet.
413 */
414 fz_xml *fz_new_dom_text_node(fz_context *ctx, fz_xml *dom, const char *text);
415
416 /**
417 Write our xml structure out to an xml stream.
418
419 Properly formatted XML is only allowed to have a single top-level node
420 under which everything must sit. Our structures allow for multiple
421 top level nodes. If required, we will output an extra 'ROOT' node
422 at the top so that the xml is well-formed.
423
424 If 'indented' is non-zero then additional whitespace will be added to
425 make the XML easier to read in a text editor. It will NOT be properly
426 compliant.
427 */
428 void fz_write_xml(fz_context *ctx, fz_xml *root, fz_output *out, int indented);
429
430 /**
431 As for fz_write_xml, but direct to a file.
432 */
433 void fz_save_xml(fz_context *ctx, fz_xml *root, const char *path, int indented);
434
435 #endif