comparison mupdf-source/thirdparty/gumbo-parser/src/gumbo.h @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 // Copyright 2010 Google Inc. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // Author: jdtang@google.com (Jonathan Tang)
16 //
17 // We use Gumbo as a prefix for types, gumbo_ as a prefix for functions, and
18 // GUMBO_ as a prefix for enum constants (static constants get the Google-style
19 // kGumbo prefix).
20
21 /**
22 * @file
23 * @mainpage Gumbo HTML Parser
24 *
25 * This provides a conformant, no-dependencies implementation of the HTML5
26 * parsing algorithm. It supports only UTF8; if you need to parse a different
27 * encoding, run a preprocessing step to convert to UTF8. It returns a parse
28 * tree made of the structs in this file.
29 *
30 * Example:
31 * @code
32 * GumboOutput* output = gumbo_parse(input);
33 * do_something_with_doctype(output->document);
34 * do_something_with_html_tree(output->root);
35 * gumbo_destroy_output(&options, output);
36 * @endcode
37 * HTML5 Spec:
38 *
39 * http://www.whatwg.org/specs/web-apps/current-work/multipage/syntax.html
40 */
41
42 #ifndef GUMBO_GUMBO_H_
43 #define GUMBO_GUMBO_H_
44
45 #ifdef _MSC_VER
46 #ifndef _CRT_SECURE_NO_WARNINGS
47 #define _CRT_SECURE_NO_WARNINGS
48 #endif
49 #define fileno _fileno
50 #endif
51
52 #include <stdbool.h>
53 #include <stddef.h>
54
55 #ifdef __cplusplus
56 extern "C" {
57 #endif
58
59 /**
60 * A struct representing a character position within the original text buffer.
61 * Line and column numbers are 1-based and offsets are 0-based, which matches
62 * how most editors and command-line tools work. Also, columns measure
63 * positions in terms of characters while offsets measure by bytes; this is
64 * because the offset field is often used to pull out a particular region of
65 * text (which in most languages that bind to C implies pointer arithmetic on a
66 * buffer of bytes), while the column field is often used to reference a
67 * particular column on a printable display, which nowadays is usually UTF-8.
68 */
69 typedef struct {
70 unsigned int line;
71 unsigned int column;
72 unsigned int offset;
73 } GumboSourcePosition;
74
75 /**
76 * A SourcePosition used for elements that have no source position, i.e.
77 * parser-inserted elements.
78 */
79 extern const GumboSourcePosition kGumboEmptySourcePosition;
80
81 /**
82 * A struct representing a string or part of a string. Strings within the
83 * parser are represented by a char* and a length; the char* points into
84 * an existing data buffer owned by some other code (often the original input).
85 * GumboStringPieces are assumed (by convention) to be immutable, because they
86 * may share data. Use GumboStringBuffer if you need to construct a string.
87 * Clients should assume that it is not NUL-terminated, and should always use
88 * explicit lengths when manipulating them.
89 */
90 typedef struct {
91 /** A pointer to the beginning of the string. NULL iff length == 0. */
92 const char* data;
93
94 /** The length of the string fragment, in bytes. May be zero. */
95 size_t length;
96 } GumboStringPiece;
97
98 /** A constant to represent a 0-length null string. */
99 extern const GumboStringPiece kGumboEmptyString;
100
101 /**
102 * Compares two GumboStringPieces, and returns true if they're equal or false
103 * otherwise.
104 */
105 bool gumbo_string_equals(
106 const GumboStringPiece* str1, const GumboStringPiece* str2);
107
108 /**
109 * Compares two GumboStringPieces ignoring case, and returns true if they're
110 * equal or false otherwise.
111 */
112 bool gumbo_string_equals_ignore_case(
113 const GumboStringPiece* str1, const GumboStringPiece* str2);
114
115 /**
116 * A simple vector implementation. This stores a pointer to a data array and a
117 * length. All elements are stored as void*; client code must cast to the
118 * appropriate type. Overflows upon addition result in reallocation of the data
119 * array, with the size doubling to maintain O(1) amortized cost. There is no
120 * removal function, as this isn't needed for any of the operations within this
121 * library. Iteration can be done through inspecting the structure directly in
122 * a for-loop.
123 */
124 typedef struct {
125 /** Data elements. This points to a dynamically-allocated array of capacity
126 * elements, each a void* to the element itself.
127 */
128 void** data;
129
130 /** Number of elements currently in the vector. */
131 unsigned int length;
132
133 /** Current array capacity. */
134 unsigned int capacity;
135 } GumboVector;
136
137 /** An empty (0-length, 0-capacity) GumboVector. */
138 extern const GumboVector kGumboEmptyVector;
139
140 /**
141 * Returns the first index at which an element appears in this vector (testing
142 * by pointer equality), or -1 if it never does.
143 */
144 int gumbo_vector_index_of(GumboVector* vector, const void* element);
145
146 /**
147 * An enum for all the tags defined in the HTML5 standard. These correspond to
148 * the tag names themselves. Enum constants exist only for tags which appear in
149 * the spec itself (or for tags with special handling in the SVG and MathML
150 * namespaces); any other tags appear as GUMBO_TAG_UNKNOWN and the actual tag
151 * name can be obtained through original_tag.
152 *
153 * This is mostly for API convenience, so that clients of this library don't
154 * need to perform a strcasecmp to find the normalized tag name. It also has
155 * efficiency benefits, by letting the parser work with enums instead of
156 * strings.
157 */
158 typedef enum {
159 // Load all the tags from an external source, generated from tag.in.
160 #include "tag_enum.h"
161 // Used for all tags that don't have special handling in HTML. Add new tags
162 // to the end of tag.in so as to preserve backwards-compatibility.
163 GUMBO_TAG_UNKNOWN,
164 // A marker value to indicate the end of the enum, for iterating over it.
165 // Also used as the terminator for varargs functions that take tags.
166 GUMBO_TAG_LAST,
167 } GumboTag;
168
169 /**
170 * Returns the normalized (usually all-lowercased, except for foreign content)
171 * tag name for an GumboTag enum. Return value is static data owned by the
172 * library.
173 */
174 const char* gumbo_normalized_tagname(GumboTag tag);
175
176 /**
177 * Extracts the tag name from the original_text field of an element or token by
178 * stripping off </> characters and attributes and adjusting the passed-in
179 * GumboStringPiece appropriately. The tag name is in the original case and
180 * shares a buffer with the original text, to simplify memory management.
181 * Behavior is undefined if a string-piece that doesn't represent an HTML tag
182 * (<tagname> or </tagname>) is passed in. If the string piece is completely
183 * empty (NULL data pointer), then this function will exit successfully as a
184 * no-op.
185 */
186 void gumbo_tag_from_original_text(GumboStringPiece* text);
187
188 /**
189 * Fixes the case of SVG elements that are not all lowercase.
190 * http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#parsing-main-inforeign
191 * This is not done at parse time because there's no place to store a mutated
192 * tag name. tag_name is an enum (which will be TAG_UNKNOWN for most SVG tags
193 * without special handling), while original_tag_name is a pointer into the
194 * original buffer. Instead, we provide this helper function that clients can
195 * use to rename SVG tags as appropriate.
196 * Returns the case-normalized SVG tagname if a replacement is found, or NULL if
197 * no normalization is called for. The return value is static data and owned by
198 * the library.
199 */
200 const char* gumbo_normalize_svg_tagname(const GumboStringPiece* tagname);
201
202 /**
203 * Converts a tag name string (which may be in upper or mixed case) to a tag
204 * enum. The `tag` version expects `tagname` to be NULL-terminated
205 */
206 GumboTag gumbo_tag_enum(const char* tagname);
207 GumboTag gumbo_tagn_enum(const char* tagname, size_t length);
208
209 /**
210 * Attribute namespaces.
211 * HTML includes special handling for XLink, XML, and XMLNS namespaces on
212 * attributes. Everything else goes in the generic "NONE" namespace.
213 */
214 typedef enum {
215 GUMBO_ATTR_NAMESPACE_NONE,
216 GUMBO_ATTR_NAMESPACE_XLINK,
217 GUMBO_ATTR_NAMESPACE_XML,
218 GUMBO_ATTR_NAMESPACE_XMLNS,
219 } GumboAttributeNamespaceEnum;
220
221 /**
222 * A struct representing a single attribute on an HTML tag. This is a
223 * name-value pair, but also includes information about source locations and
224 * original source text.
225 */
226 typedef struct {
227 /**
228 * The namespace for the attribute. This will usually be
229 * GUMBO_ATTR_NAMESPACE_NONE, but some XLink/XMLNS/XML attributes take special
230 * values, per:
231 * http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#adjust-foreign-attributes
232 */
233 GumboAttributeNamespaceEnum attr_namespace;
234
235 /**
236 * The name of the attribute. This is in a freshly-allocated buffer to deal
237 * with case-normalization, and is null-terminated.
238 */
239 const char* name;
240
241 /**
242 * The original text of the attribute name, as a pointer into the original
243 * source buffer.
244 */
245 GumboStringPiece original_name;
246
247 /**
248 * The value of the attribute. This is in a freshly-allocated buffer to deal
249 * with unescaping, and is null-terminated. It does not include any quotes
250 * that surround the attribute. If the attribute has no value (for example,
251 * 'selected' on a checkbox), this will be an empty string.
252 */
253 const char* value;
254
255 /**
256 * The original text of the value of the attribute. This points into the
257 * original source buffer. It includes any quotes that surround the
258 * attribute, and you can look at original_value.data[0] and
259 * original_value.data[original_value.length - 1] to determine what the quote
260 * characters were. If the attribute has no value, this will be a 0-length
261 * string.
262 */
263 GumboStringPiece original_value;
264
265 /** The starting position of the attribute name. */
266 GumboSourcePosition name_start;
267
268 /**
269 * The ending position of the attribute name. This is not always derivable
270 * from the starting position of the value because of the possibility of
271 * whitespace around the = sign.
272 */
273 GumboSourcePosition name_end;
274
275 /** The starting position of the attribute value. */
276 GumboSourcePosition value_start;
277
278 /** The ending position of the attribute value. */
279 GumboSourcePosition value_end;
280 } GumboAttribute;
281
282 /**
283 * Given a vector of GumboAttributes, look up the one with the specified name
284 * and return it, or NULL if no such attribute exists. This uses a
285 * case-insensitive match, as HTML is case-insensitive.
286 */
287 GumboAttribute* gumbo_get_attribute(const GumboVector* attrs, const char* name);
288
289 /**
290 * Enum denoting the type of node. This determines the type of the node.v
291 * union.
292 */
293 typedef enum {
294 /** Document node. v will be a GumboDocument. */
295 GUMBO_NODE_DOCUMENT,
296 /** Element node. v will be a GumboElement. */
297 GUMBO_NODE_ELEMENT,
298 /** Text node. v will be a GumboText. */
299 GUMBO_NODE_TEXT,
300 /** CDATA node. v will be a GumboText. */
301 GUMBO_NODE_CDATA,
302 /** Comment node. v will be a GumboText, excluding comment delimiters. */
303 GUMBO_NODE_COMMENT,
304 /** Text node, where all contents is whitespace. v will be a GumboText. */
305 GUMBO_NODE_WHITESPACE,
306 /** Template node. This is separate from GUMBO_NODE_ELEMENT because many
307 * client libraries will want to ignore the contents of template nodes, as
308 * the spec suggests. Recursing on GUMBO_NODE_ELEMENT will do the right thing
309 * here, while clients that want to include template contents should also
310 * check for GUMBO_NODE_TEMPLATE. v will be a GumboElement. */
311 GUMBO_NODE_TEMPLATE
312 } GumboNodeType;
313
314 /**
315 * Forward declaration of GumboNode so it can be used recursively in
316 * GumboNode.parent.
317 */
318 typedef struct GumboInternalNode GumboNode;
319
320 /**
321 * http://www.whatwg.org/specs/web-apps/current-work/complete/dom.html#quirks-mode
322 */
323 typedef enum {
324 GUMBO_DOCTYPE_NO_QUIRKS,
325 GUMBO_DOCTYPE_QUIRKS,
326 GUMBO_DOCTYPE_LIMITED_QUIRKS
327 } GumboQuirksModeEnum;
328
329 /**
330 * Namespaces.
331 * Unlike in X(HT)ML, namespaces in HTML5 are not denoted by a prefix. Rather,
332 * anything inside an <svg> tag is in the SVG namespace, anything inside the
333 * <math> tag is in the MathML namespace, and anything else is inside the HTML
334 * namespace. No other namespaces are supported, so this can be an enum only.
335 */
336 typedef enum {
337 GUMBO_NAMESPACE_HTML,
338 GUMBO_NAMESPACE_SVG,
339 GUMBO_NAMESPACE_MATHML
340 } GumboNamespaceEnum;
341
342 /**
343 * Parse flags.
344 * We track the reasons for parser insertion of nodes and store them in a
345 * bitvector in the node itself. This lets client code optimize out nodes that
346 * are implied by the HTML structure of the document, or flag constructs that
347 * may not be allowed by a style guide, or track the prevalence of incorrect or
348 * tricky HTML code.
349 */
350 typedef enum {
351 /**
352 * A normal node - both start and end tags appear in the source, nothing has
353 * been reparented.
354 */
355 GUMBO_INSERTION_NORMAL = 0,
356
357 /**
358 * A node inserted by the parser to fulfill some implicit insertion rule.
359 * This is usually set in addition to some other flag giving a more specific
360 * insertion reason; it's a generic catch-all term meaning "The start tag for
361 * this node did not appear in the document source".
362 */
363 GUMBO_INSERTION_BY_PARSER = 1 << 0,
364
365 /**
366 * A flag indicating that the end tag for this node did not appear in the
367 * document source. Note that in some cases, you can still have
368 * parser-inserted nodes with an explicit end tag: for example, "Text</html>"
369 * has GUMBO_INSERTED_BY_PARSER set on the <html> node, but
370 * GUMBO_INSERTED_END_TAG_IMPLICITLY is unset, as the </html> tag actually
371 * exists. This flag will be set only if the end tag is completely missing;
372 * in some cases, the end tag may be misplaced (eg. a </body> tag with text
373 * afterwards), which will leave this flag unset and require clients to
374 * inspect the parse errors for that case.
375 */
376 GUMBO_INSERTION_IMPLICIT_END_TAG = 1 << 1,
377
378 // Value 1 << 2 was for a flag that has since been removed.
379
380 /**
381 * A flag for nodes that are inserted because their presence is implied by
382 * other tags, eg. <html>, <head>, <body>, <tbody>, etc.
383 */
384 GUMBO_INSERTION_IMPLIED = 1 << 3,
385
386 /**
387 * A flag for nodes that are converted from their end tag equivalents. For
388 * example, </p> when no paragraph is open implies that the parser should
389 * create a <p> tag and immediately close it, while </br> means the same thing
390 * as <br>.
391 */
392 GUMBO_INSERTION_CONVERTED_FROM_END_TAG = 1 << 4,
393
394 /** A flag for nodes that are converted from the parse of an <isindex> tag. */
395 GUMBO_INSERTION_FROM_ISINDEX = 1 << 5,
396
397 /** A flag for <image> tags that are rewritten as <img>. */
398 GUMBO_INSERTION_FROM_IMAGE = 1 << 6,
399
400 /**
401 * A flag for nodes that are cloned as a result of the reconstruction of
402 * active formatting elements. This is set only on the clone; the initial
403 * portion of the formatting run is a NORMAL node with an IMPLICIT_END_TAG.
404 */
405 GUMBO_INSERTION_RECONSTRUCTED_FORMATTING_ELEMENT = 1 << 7,
406
407 /** A flag for nodes that are cloned by the adoption agency algorithm. */
408 GUMBO_INSERTION_ADOPTION_AGENCY_CLONED = 1 << 8,
409
410 /** A flag for nodes that are moved by the adoption agency algorithm. */
411 GUMBO_INSERTION_ADOPTION_AGENCY_MOVED = 1 << 9,
412
413 /**
414 * A flag for nodes that have been foster-parented out of a table (or
415 * should've been foster-parented, if verbatim mode is set).
416 */
417 GUMBO_INSERTION_FOSTER_PARENTED = 1 << 10,
418 } GumboParseFlags;
419
420 /**
421 * Information specific to document nodes.
422 */
423 typedef struct {
424 /**
425 * An array of GumboNodes, containing the children of this element. This will
426 * normally consist of the <html> element and any comment nodes found.
427 * Pointers are owned.
428 */
429 GumboVector /* GumboNode* */ children;
430
431 // True if there was an explicit doctype token as opposed to it being omitted.
432 bool has_doctype;
433
434 // Fields from the doctype token, copied verbatim.
435 const char* name;
436 const char* public_identifier;
437 const char* system_identifier;
438
439 /**
440 * Whether or not the document is in QuirksMode, as determined by the values
441 * in the GumboTokenDocType template.
442 */
443 GumboQuirksModeEnum doc_type_quirks_mode;
444 } GumboDocument;
445
446 /**
447 * The struct used to represent TEXT, CDATA, COMMENT, and WHITESPACE elements.
448 * This contains just a block of text and its position.
449 */
450 typedef struct {
451 /**
452 * The text of this node, after entities have been parsed and decoded. For
453 * comment/cdata nodes, this does not include the comment delimiters.
454 */
455 const char* text;
456
457 /**
458 * The original text of this node, as a pointer into the original buffer. For
459 * comment/cdata nodes, this includes the comment delimiters.
460 */
461 GumboStringPiece original_text;
462
463 /**
464 * The starting position of this node. This corresponds to the position of
465 * original_text, before entities are decoded.
466 * */
467 GumboSourcePosition start_pos;
468 } GumboText;
469
470 /**
471 * The struct used to represent all HTML elements. This contains information
472 * about the tag, attributes, and child nodes.
473 */
474 typedef struct {
475 /**
476 * An array of GumboNodes, containing the children of this element. Pointers
477 * are owned.
478 */
479 GumboVector /* GumboNode* */ children;
480
481 /** The GumboTag enum for this element. */
482 GumboTag tag;
483
484 /** The GumboNamespaceEnum for this element. */
485 GumboNamespaceEnum tag_namespace;
486
487 /**
488 * A GumboStringPiece pointing to the original tag text for this element,
489 * pointing directly into the source buffer. If the tag was inserted
490 * algorithmically (for example, <head> or <tbody> insertion), this will be a
491 * zero-length string.
492 */
493 GumboStringPiece original_tag;
494
495 /**
496 * A GumboStringPiece pointing to the original end tag text for this element.
497 * If the end tag was inserted algorithmically, (for example, closing a
498 * self-closing tag), this will be a zero-length string.
499 */
500 GumboStringPiece original_end_tag;
501
502 /** The source position for the start of the start tag. */
503 GumboSourcePosition start_pos;
504
505 /** The source position for the start of the end tag. */
506 GumboSourcePosition end_pos;
507
508 /**
509 * An array of GumboAttributes, containing the attributes for this tag in the
510 * order that they were parsed. Pointers are owned.
511 */
512 GumboVector /* GumboAttribute* */ attributes;
513 } GumboElement;
514
515 /**
516 * A supertype for GumboElement and GumboText, so that we can include one
517 * generic type in lists of children and cast as necessary to subtypes.
518 */
519 struct GumboInternalNode {
520 /** The type of node that this is. */
521 GumboNodeType type;
522
523 /** Pointer back to parent node. Not owned. */
524 GumboNode* parent;
525
526 /** The index within the parent's children vector of this node. */
527 size_t index_within_parent;
528
529 /**
530 * A bitvector of flags containing information about why this element was
531 * inserted into the parse tree, including a variety of special parse
532 * situations.
533 */
534 GumboParseFlags parse_flags;
535
536 /** The actual node data. */
537 union {
538 GumboDocument document; // For GUMBO_NODE_DOCUMENT.
539 GumboElement element; // For GUMBO_NODE_ELEMENT.
540 GumboText text; // For everything else.
541 } v;
542 };
543
544 /**
545 * The type for an allocator function. Takes the 'userdata' member of the
546 * GumboParser struct as its first argument. Semantics should be the same as
547 * malloc, i.e. return a block of size_t bytes on success or NULL on failure.
548 * Allocating a block of 0 bytes behaves as per malloc.
549 */
550 // TODO(jdtang): Add checks throughout the codebase for out-of-memory condition.
551 typedef void* (*GumboAllocatorFunction)(void* userdata, size_t size);
552
553 /**
554 * The type for a deallocator function. Takes the 'userdata' member of the
555 * GumboParser struct as its first argument.
556 */
557 typedef void (*GumboDeallocatorFunction)(void* userdata, void* ptr);
558
559 /**
560 * Input struct containing configuration options for the parser.
561 * These let you specify alternate memory managers, provide different error
562 * handling, etc.
563 * Use kGumboDefaultOptions for sensible defaults, and only set what you need.
564 */
565 typedef struct GumboInternalOptions {
566 /** A memory allocator function. Default: malloc. */
567 GumboAllocatorFunction allocator;
568
569 /** A memory deallocator function. Default: free. */
570 GumboDeallocatorFunction deallocator;
571
572 /**
573 * An opaque object that's passed in as the first argument to all callbacks
574 * used by this library. Default: NULL.
575 */
576 void* userdata;
577
578 /**
579 * The tab-stop size, for computing positions in source code that uses tabs.
580 * Default: 8.
581 */
582 int tab_stop;
583
584 /**
585 * Whether or not to stop parsing when the first error is encountered.
586 * Default: false.
587 */
588 bool stop_on_first_error;
589
590 /**
591 * The maximum number of errors before the parser stops recording them. This
592 * is provided so that if the page is totally borked, we don't completely fill
593 * up the errors vector and exhaust memory with useless redundant errors. Set
594 * to -1 to disable the limit.
595 * Default: -1
596 */
597 int max_errors;
598
599 /**
600 * The fragment context for parsing:
601 * https://html.spec.whatwg.org/multipage/syntax.html#parsing-html-fragments
602 *
603 * If GUMBO_TAG_LAST is passed here, it is assumed to be "no fragment", i.e.
604 * the regular parsing algorithm. Otherwise, pass the tag enum for the
605 * intended parent of the parsed fragment. We use just the tag enum rather
606 * than a full node because that's enough to set all the parsing context we
607 * need, and it provides some additional flexibility for client code to act as
608 * if parsing a fragment even when a full HTML tree isn't available.
609 *
610 * Default: GUMBO_TAG_LAST
611 */
612 GumboTag fragment_context;
613
614 /**
615 * The namespace for the fragment context. This lets client code
616 * differentiate between, say, parsing a <title> tag in SVG vs. parsing it in
617 * HTML.
618 * Default: GUMBO_NAMESPACE_HTML
619 */
620 GumboNamespaceEnum fragment_namespace;
621 } GumboOptions;
622
623 /** Default options struct; use this with gumbo_parse_with_options. */
624 extern const GumboOptions kGumboDefaultOptions;
625
626 /** The output struct containing the results of the parse. */
627 typedef struct GumboInternalOutput {
628 /**
629 * Pointer to the document node. This is a GumboNode of type NODE_DOCUMENT
630 * that contains the entire document as its child.
631 */
632 GumboNode* document;
633
634 /**
635 * Pointer to the root node. This the <html> tag that forms the root of the
636 * document.
637 */
638 GumboNode* root;
639
640 /**
641 * A list of errors that occurred during the parse.
642 * NOTE: In version 1.0 of this library, the API for errors hasn't been fully
643 * fleshed out and may change in the future. For this reason, the GumboError
644 * header isn't part of the public API. Contact us if you need errors
645 * reported so we can work out something appropriate for your use-case.
646 */
647 GumboVector /* GumboError */ errors;
648 } GumboOutput;
649
650 /**
651 * Parses a buffer of UTF8 text into an GumboNode parse tree. The buffer must
652 * live at least as long as the parse tree, as some fields (eg. original_text)
653 * point directly into the original buffer.
654 *
655 * This doesn't support buffers longer than 4 gigabytes.
656 */
657 GumboOutput* gumbo_parse(const char* buffer);
658
659 /**
660 * Extended version of gumbo_parse that takes an explicit options structure,
661 * buffer, and length.
662 */
663 GumboOutput* gumbo_parse_with_options(
664 const GumboOptions* options, const char* buffer, size_t buffer_length);
665
666 /** Release the memory used for the parse tree & parse errors. */
667 void gumbo_destroy_output(const GumboOptions* options, GumboOutput* output);
668
669 #ifdef __cplusplus
670 }
671 #endif
672
673 #endif // GUMBO_GUMBO_H_