Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/include/mupdf/fitz/xml.h @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 // Copyright (C) 2004-2024 Artifex Software, Inc. | |
| 2 // | |
| 3 // This file is part of MuPDF. | |
| 4 // | |
| 5 // MuPDF is free software: you can redistribute it and/or modify it under the | |
| 6 // terms of the GNU Affero General Public License as published by the Free | |
| 7 // Software Foundation, either version 3 of the License, or (at your option) | |
| 8 // any later version. | |
| 9 // | |
| 10 // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY | |
| 11 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | |
| 12 // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more | |
| 13 // details. | |
| 14 // | |
| 15 // You should have received a copy of the GNU Affero General Public License | |
| 16 // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html> | |
| 17 // | |
| 18 // Alternative licensing terms are available from the licensor. | |
| 19 // For commercial licensing, see <https://www.artifex.com/> or contact | |
| 20 // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, | |
| 21 // CA 94129, USA, for further information. | |
| 22 | |
| 23 #ifndef MUPDF_FITZ_XML_H | |
| 24 #define MUPDF_FITZ_XML_H | |
| 25 | |
| 26 #include "mupdf/fitz/system.h" | |
| 27 #include "mupdf/fitz/context.h" | |
| 28 #include "mupdf/fitz/buffer.h" | |
| 29 #include "mupdf/fitz/pool.h" | |
| 30 #include "mupdf/fitz/archive.h" | |
| 31 | |
| 32 /** | |
| 33 XML document model | |
| 34 */ | |
| 35 | |
| 36 typedef struct fz_xml fz_xml; | |
| 37 | |
| 38 /* For backwards compatibility */ | |
| 39 typedef fz_xml fz_xml_doc; | |
| 40 | |
| 41 /** | |
| 42 Parse the contents of buffer into a tree of xml nodes. | |
| 43 | |
| 44 preserve_white: whether to keep or delete all-whitespace nodes. | |
| 45 */ | |
| 46 fz_xml *fz_parse_xml(fz_context *ctx, fz_buffer *buf, int preserve_white); | |
| 47 | |
| 48 /** | |
| 49 Parse the contents of buffer into a tree of xml nodes. | |
| 50 | |
| 51 preserve_white: whether to keep or delete all-whitespace nodes. | |
| 52 */ | |
| 53 fz_xml *fz_parse_xml_stream(fz_context *ctx, fz_stream *stream, int preserve_white); | |
| 54 | |
| 55 /** | |
| 56 Parse the contents of an archive entry into a tree of xml nodes. | |
| 57 | |
| 58 preserve_white: whether to keep or delete all-whitespace nodes. | |
| 59 */ | |
| 60 fz_xml *fz_parse_xml_archive_entry(fz_context *ctx, fz_archive *dir, const char *filename, int preserve_white); | |
| 61 | |
| 62 /** | |
| 63 Try and parse the contents of an archive entry into a tree of xml nodes. | |
| 64 | |
| 65 preserve_white: whether to keep or delete all-whitespace nodes. | |
| 66 | |
| 67 Will return NULL if the archive entry can't be found. Otherwise behaves | |
| 68 the same as fz_parse_xml_archive_entry. May throw exceptions. | |
| 69 */ | |
| 70 fz_xml *fz_try_parse_xml_archive_entry(fz_context *ctx, fz_archive *dir, const char *filename, int preserve_white); | |
| 71 | |
| 72 /** | |
| 73 Parse the contents of a buffer into a tree of XML nodes, | |
| 74 using the HTML5 parsing algorithm. | |
| 75 */ | |
| 76 fz_xml *fz_parse_xml_from_html5(fz_context *ctx, fz_buffer *buf); | |
| 77 | |
| 78 /** | |
| 79 Add a reference to the XML. | |
| 80 */ | |
| 81 fz_xml *fz_keep_xml(fz_context *ctx, fz_xml *xml); | |
| 82 | |
| 83 /** | |
| 84 Drop a reference to the XML. When the last reference is | |
| 85 dropped, the node and all its children and siblings will | |
| 86 be freed. | |
| 87 */ | |
| 88 void fz_drop_xml(fz_context *ctx, fz_xml *xml); | |
| 89 | |
| 90 /** | |
| 91 Detach a node from the tree, unlinking it from its parent, | |
| 92 and setting the document root to the node. | |
| 93 */ | |
| 94 void fz_detach_xml(fz_context *ctx, fz_xml *node); | |
| 95 | |
| 96 /** | |
| 97 Return the topmost XML node of a document. | |
| 98 */ | |
| 99 fz_xml *fz_xml_root(fz_xml_doc *xml); | |
| 100 | |
| 101 /** | |
| 102 Return previous sibling of XML node. | |
| 103 */ | |
| 104 fz_xml *fz_xml_prev(fz_xml *item); | |
| 105 | |
| 106 /** | |
| 107 Return next sibling of XML node. | |
| 108 */ | |
| 109 fz_xml *fz_xml_next(fz_xml *item); | |
| 110 | |
| 111 /** | |
| 112 Return parent of XML node. | |
| 113 */ | |
| 114 fz_xml *fz_xml_up(fz_xml *item); | |
| 115 | |
| 116 /** | |
| 117 Return first child of XML node. | |
| 118 */ | |
| 119 fz_xml *fz_xml_down(fz_xml *item); | |
| 120 | |
| 121 /** | |
| 122 Return true if the tag name matches. | |
| 123 */ | |
| 124 int fz_xml_is_tag(fz_xml *item, const char *name); | |
| 125 | |
| 126 /** | |
| 127 Return tag of XML node. Return NULL for text nodes. | |
| 128 */ | |
| 129 char *fz_xml_tag(fz_xml *item); | |
| 130 | |
| 131 /** | |
| 132 Return the value of an attribute of an XML node. | |
| 133 NULL if the attribute doesn't exist. | |
| 134 */ | |
| 135 char *fz_xml_att(fz_xml *item, const char *att); | |
| 136 | |
| 137 /** | |
| 138 Return the value of an attribute of an XML node. | |
| 139 If the first attribute doesn't exist, try the second. | |
| 140 NULL if neither attribute exists. | |
| 141 */ | |
| 142 char *fz_xml_att_alt(fz_xml *item, const char *one, const char *two); | |
| 143 | |
| 144 /** | |
| 145 Check for a matching attribute on an XML node. | |
| 146 | |
| 147 If the node has the requested attribute (name), and the value | |
| 148 matches (match) then return 1. Otherwise, 0. | |
| 149 */ | |
| 150 int fz_xml_att_eq(fz_xml *item, const char *name, const char *match); | |
| 151 | |
| 152 /** | |
| 153 Add an attribute to an XML node. | |
| 154 */ | |
| 155 void fz_xml_add_att(fz_context *ctx, fz_pool *pool, fz_xml *node, const char *key, const char *val); | |
| 156 | |
| 157 /** | |
| 158 Return the text content of an XML node. | |
| 159 Return NULL if the node is a tag. | |
| 160 */ | |
| 161 char *fz_xml_text(fz_xml *item); | |
| 162 | |
| 163 /** | |
| 164 Pretty-print an XML tree to given output. | |
| 165 */ | |
| 166 void fz_output_xml(fz_context *ctx, fz_output *out, fz_xml *item, int level); | |
| 167 | |
| 168 /** | |
| 169 Pretty-print an XML tree to stdout. (Deprecated, use | |
| 170 fz_output_xml in preference). | |
| 171 */ | |
| 172 void fz_debug_xml(fz_xml *item, int level); | |
| 173 | |
| 174 /** | |
| 175 Search the siblings of XML nodes starting with item looking for | |
| 176 the first with the given tag. | |
| 177 | |
| 178 Return NULL if none found. | |
| 179 */ | |
| 180 fz_xml *fz_xml_find(fz_xml *item, const char *tag); | |
| 181 | |
| 182 /** | |
| 183 Search the siblings of XML nodes starting with the first sibling | |
| 184 of item looking for the first with the given tag. | |
| 185 | |
| 186 Return NULL if none found. | |
| 187 */ | |
| 188 fz_xml *fz_xml_find_next(fz_xml *item, const char *tag); | |
| 189 | |
| 190 /** | |
| 191 Search the siblings of XML nodes starting with the first child | |
| 192 of item looking for the first with the given tag. | |
| 193 | |
| 194 Return NULL if none found. | |
| 195 */ | |
| 196 fz_xml *fz_xml_find_down(fz_xml *item, const char *tag); | |
| 197 | |
| 198 /** | |
| 199 Search the siblings of XML nodes starting with item looking for | |
| 200 the first with the given tag (or any tag if tag is NULL), and | |
| 201 with a matching attribute. | |
| 202 | |
| 203 Return NULL if none found. | |
| 204 */ | |
| 205 fz_xml *fz_xml_find_match(fz_xml *item, const char *tag, const char *att, const char *match); | |
| 206 | |
| 207 /** | |
| 208 Search the siblings of XML nodes starting with the first sibling | |
| 209 of item looking for the first with the given tag (or any tag if tag | |
| 210 is NULL), and with a matching attribute. | |
| 211 | |
| 212 Return NULL if none found. | |
| 213 */ | |
| 214 fz_xml *fz_xml_find_next_match(fz_xml *item, const char *tag, const char *att, const char *match); | |
| 215 | |
| 216 /** | |
| 217 Search the siblings of XML nodes starting with the first child | |
| 218 of item looking for the first with the given tag (or any tag if | |
| 219 tag is NULL), and with a matching attribute. | |
| 220 | |
| 221 Return NULL if none found. | |
| 222 */ | |
| 223 fz_xml *fz_xml_find_down_match(fz_xml *item, const char *tag, const char *att, const char *match); | |
| 224 | |
| 225 /** | |
| 226 Perform a depth first search from item, returning the first | |
| 227 child that matches the given tag (or any tag if tag is NULL), | |
| 228 with the given attribute (if att is non NULL), that matches | |
| 229 match (if match is non NULL). | |
| 230 */ | |
| 231 fz_xml *fz_xml_find_dfs(fz_xml *item, const char *tag, const char *att, const char *match); | |
| 232 | |
| 233 /** | |
| 234 Perform a depth first search from item, returning the first | |
| 235 child that matches the given tag (or any tag if tag is NULL), | |
| 236 with the given attribute (if att is non NULL), that matches | |
| 237 match (if match is non NULL). The search stops if it ever | |
| 238 reaches the top of the tree, or the declared 'top' item. | |
| 239 */ | |
| 240 fz_xml *fz_xml_find_dfs_top(fz_xml *item, const char *tag, const char *att, const char *match, fz_xml *top); | |
| 241 | |
| 242 /** | |
| 243 Perform a depth first search onwards from item, returning the first | |
| 244 child that matches the given tag (or any tag if tag is NULL), | |
| 245 with the given attribute (if att is non NULL), that matches | |
| 246 match (if match is non NULL). | |
| 247 */ | |
| 248 fz_xml *fz_xml_find_next_dfs(fz_xml *item, const char *tag, const char *att, const char *match); | |
| 249 | |
| 250 /** | |
| 251 Perform a depth first search onwards from item, returning the first | |
| 252 child that matches the given tag (or any tag if tag is NULL), | |
| 253 with the given attribute (if att is non NULL), that matches | |
| 254 match (if match is non NULL). The search stops if it ever reaches | |
| 255 the top of the tree, or the declared 'top' item. | |
| 256 */ | |
| 257 fz_xml *fz_xml_find_next_dfs_top(fz_xml *item, const char *tag, const char *att, const char *match, fz_xml *top); | |
| 258 | |
| 259 /** | |
| 260 DOM-like functions for html in xml. | |
| 261 */ | |
| 262 | |
| 263 /** | |
| 264 Return a borrowed reference for the 'body' element of | |
| 265 the given DOM. | |
| 266 */ | |
| 267 fz_xml *fz_dom_body(fz_context *ctx, fz_xml *dom); | |
| 268 | |
| 269 /** | |
| 270 Return a borrowed reference for the document (the top | |
| 271 level element) of the DOM. | |
| 272 */ | |
| 273 fz_xml *fz_dom_document_element(fz_context *ctx, fz_xml *dom); | |
| 274 | |
| 275 /** | |
| 276 Create an element of a given tag type for the given DOM. | |
| 277 | |
| 278 The element is not linked into the DOM yet. | |
| 279 */ | |
| 280 fz_xml *fz_dom_create_element(fz_context *ctx, fz_xml *dom, const char *tag); | |
| 281 | |
| 282 /** | |
| 283 Create a text node for the given DOM. | |
| 284 | |
| 285 The element is not linked into the DOM yet. | |
| 286 */ | |
| 287 fz_xml *fz_dom_create_text_node(fz_context *ctx, fz_xml *dom, const char *text); | |
| 288 | |
| 289 /** | |
| 290 Find the first element matching the requirements in a depth first traversal from elt. | |
| 291 | |
| 292 The tagname must match tag, unless tag is NULL, when all tag names are considered to match. | |
| 293 | |
| 294 If att is NULL, then all tags match. | |
| 295 Otherwise: | |
| 296 If match is NULL, then only nodes that have an att attribute match. | |
| 297 If match is non-NULL, then only nodes that have an att attribute that matches match match. | |
| 298 | |
| 299 Returns NULL (if no match found), or a borrowed reference to the first matching element. | |
| 300 */ | |
| 301 fz_xml *fz_dom_find(fz_context *ctx, fz_xml *elt, const char *tag, const char *att, const char *match); | |
| 302 | |
| 303 /** | |
| 304 Find the next element matching the requirements. | |
| 305 */ | |
| 306 fz_xml *fz_dom_find_next(fz_context *ctx, fz_xml *elt, const char *tag, const char *att, const char *match); | |
| 307 | |
| 308 /** | |
| 309 Insert an element as the last child of a parent, unlinking the | |
| 310 child from its current position if required. | |
| 311 */ | |
| 312 void fz_dom_append_child(fz_context *ctx, fz_xml *parent, fz_xml *child); | |
| 313 | |
| 314 /** | |
| 315 Insert an element (new_elt), before another element (node), | |
| 316 unlinking the new_elt from its current position if required. | |
| 317 */ | |
| 318 void fz_dom_insert_before(fz_context *ctx, fz_xml *node, fz_xml *new_elt); | |
| 319 | |
| 320 /** | |
| 321 Insert an element (new_elt), after another element (node), | |
| 322 unlinking the new_elt from its current position if required. | |
| 323 */ | |
| 324 void fz_dom_insert_after(fz_context *ctx, fz_xml *node, fz_xml *new_elt); | |
| 325 | |
| 326 /** | |
| 327 Remove an element from the DOM. The element can be added back elsewhere | |
| 328 if required. | |
| 329 | |
| 330 No reference counting changes for the element. | |
| 331 */ | |
| 332 void fz_dom_remove(fz_context *ctx, fz_xml *elt); | |
| 333 | |
| 334 /** | |
| 335 Clone an element (and its children). | |
| 336 | |
| 337 A borrowed reference to the clone is returned. The clone is not | |
| 338 yet linked into the DOM. | |
| 339 */ | |
| 340 fz_xml *fz_dom_clone(fz_context *ctx, fz_xml *elt); | |
| 341 | |
| 342 /** | |
| 343 Return a borrowed reference to the first child of a node, | |
| 344 or NULL if there isn't one. | |
| 345 */ | |
| 346 fz_xml *fz_dom_first_child(fz_context *ctx, fz_xml *elt); | |
| 347 | |
| 348 /** | |
| 349 Return a borrowed reference to the parent of a node, | |
| 350 or NULL if there isn't one. | |
| 351 */ | |
| 352 fz_xml *fz_dom_parent(fz_context *ctx, fz_xml *elt); | |
| 353 | |
| 354 /** | |
| 355 Return a borrowed reference to the next sibling of a node, | |
| 356 or NULL if there isn't one. | |
| 357 */ | |
| 358 fz_xml *fz_dom_next(fz_context *ctx, fz_xml *elt); | |
| 359 | |
| 360 /** | |
| 361 Return a borrowed reference to the previous sibling of a node, | |
| 362 or NULL if there isn't one. | |
| 363 */ | |
| 364 fz_xml *fz_dom_previous(fz_context *ctx, fz_xml *elt); | |
| 365 | |
| 366 /** | |
| 367 Add an attribute to an element. | |
| 368 | |
| 369 Ownership of att and value remain with the caller. | |
| 370 */ | |
| 371 void fz_dom_add_attribute(fz_context *ctx, fz_xml *elt, const char *att, const char *value); | |
| 372 | |
| 373 /** | |
| 374 Remove an attribute from an element. | |
| 375 */ | |
| 376 void fz_dom_remove_attribute(fz_context *ctx, fz_xml *elt, const char *att); | |
| 377 | |
| 378 /** | |
| 379 Retrieve the value of a given attribute from a given element. | |
| 380 | |
| 381 Returns a borrowed pointer to the value or NULL if not found. | |
| 382 */ | |
| 383 const char *fz_dom_attribute(fz_context *ctx, fz_xml *elt, const char *att); | |
| 384 | |
| 385 /** | |
| 386 Enumerate through the attributes of an element. | |
| 387 | |
| 388 Call with i=0,1,2,3... to enumerate attributes. | |
| 389 | |
| 390 On return *att and the return value will be NULL if there are not | |
| 391 that many attributes to read. Otherwise, *att will be filled in | |
| 392 with a borrowed pointer to the attribute name, and the return | |
| 393 value will be a borrowed pointer to the value. | |
| 394 */ | |
| 395 const char *fz_dom_get_attribute(fz_context *ctx, fz_xml *elt, int i, const char **att); | |
| 396 | |
| 397 /** | |
| 398 Make new xml dom root element. | |
| 399 */ | |
| 400 fz_xml *fz_new_dom(fz_context *ctx, const char *tag); | |
| 401 | |
| 402 /** | |
| 403 Create a new dom node. | |
| 404 | |
| 405 This will NOT be linked in yet. | |
| 406 */ | |
| 407 fz_xml *fz_new_dom_node(fz_context *ctx, fz_xml *dom, const char *tag); | |
| 408 | |
| 409 /** | |
| 410 Create a new dom text node. | |
| 411 | |
| 412 This will NOT be linked in yet. | |
| 413 */ | |
| 414 fz_xml *fz_new_dom_text_node(fz_context *ctx, fz_xml *dom, const char *text); | |
| 415 | |
| 416 /** | |
| 417 Write our xml structure out to an xml stream. | |
| 418 | |
| 419 Properly formatted XML is only allowed to have a single top-level node | |
| 420 under which everything must sit. Our structures allow for multiple | |
| 421 top level nodes. If required, we will output an extra 'ROOT' node | |
| 422 at the top so that the xml is well-formed. | |
| 423 | |
| 424 If 'indented' is non-zero then additional whitespace will be added to | |
| 425 make the XML easier to read in a text editor. It will NOT be properly | |
| 426 compliant. | |
| 427 */ | |
| 428 void fz_write_xml(fz_context *ctx, fz_xml *root, fz_output *out, int indented); | |
| 429 | |
| 430 /** | |
| 431 As for fz_write_xml, but direct to a file. | |
| 432 */ | |
| 433 void fz_save_xml(fz_context *ctx, fz_xml *root, const char *path, int indented); | |
| 434 | |
| 435 #endif |
