comparison mupdf-source/thirdparty/gumbo-parser/src/error.h @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 // Copyright 2010 Google Inc. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // Author: jdtang@google.com (Jonathan Tang)
16 //
17 // Error types, enums, and handling functions.
18
19 #ifndef GUMBO_ERROR_H_
20 #define GUMBO_ERROR_H_
21 #ifdef _MSC_VER
22 #ifndef _CRT_SECURE_NO_WARNINGS
23 #define _CRT_SECURE_NO_WARNINGS
24 #endif
25 #endif
26 #include <stdint.h>
27
28 #include "gumbo.h"
29 #include "insertion_mode.h"
30 #include "string_buffer.h"
31 #include "token_type.h"
32
33 #ifdef __cplusplus
34 extern "C" {
35 #endif
36
37 struct GumboInternalParser;
38
39 typedef enum {
40 GUMBO_ERR_UTF8_INVALID,
41 GUMBO_ERR_UTF8_TRUNCATED,
42 GUMBO_ERR_UTF8_NULL,
43 GUMBO_ERR_NUMERIC_CHAR_REF_NO_DIGITS,
44 GUMBO_ERR_NUMERIC_CHAR_REF_WITHOUT_SEMICOLON,
45 GUMBO_ERR_NUMERIC_CHAR_REF_INVALID,
46 GUMBO_ERR_NAMED_CHAR_REF_WITHOUT_SEMICOLON,
47 GUMBO_ERR_NAMED_CHAR_REF_INVALID,
48 GUMBO_ERR_TAG_STARTS_WITH_QUESTION,
49 GUMBO_ERR_TAG_EOF,
50 GUMBO_ERR_TAG_INVALID,
51 GUMBO_ERR_CLOSE_TAG_EMPTY,
52 GUMBO_ERR_CLOSE_TAG_EOF,
53 GUMBO_ERR_CLOSE_TAG_INVALID,
54 GUMBO_ERR_SCRIPT_EOF,
55 GUMBO_ERR_ATTR_NAME_EOF,
56 GUMBO_ERR_ATTR_NAME_INVALID,
57 GUMBO_ERR_ATTR_DOUBLE_QUOTE_EOF,
58 GUMBO_ERR_ATTR_SINGLE_QUOTE_EOF,
59 GUMBO_ERR_ATTR_UNQUOTED_EOF,
60 GUMBO_ERR_ATTR_UNQUOTED_RIGHT_BRACKET,
61 GUMBO_ERR_ATTR_UNQUOTED_EQUALS,
62 GUMBO_ERR_ATTR_AFTER_EOF,
63 GUMBO_ERR_ATTR_AFTER_INVALID,
64 GUMBO_ERR_DUPLICATE_ATTR,
65 GUMBO_ERR_SOLIDUS_EOF,
66 GUMBO_ERR_SOLIDUS_INVALID,
67 GUMBO_ERR_DASHES_OR_DOCTYPE,
68 GUMBO_ERR_COMMENT_EOF,
69 GUMBO_ERR_COMMENT_INVALID,
70 GUMBO_ERR_COMMENT_BANG_AFTER_DOUBLE_DASH,
71 GUMBO_ERR_COMMENT_DASH_AFTER_DOUBLE_DASH,
72 GUMBO_ERR_COMMENT_SPACE_AFTER_DOUBLE_DASH,
73 GUMBO_ERR_COMMENT_END_BANG_EOF,
74 GUMBO_ERR_DOCTYPE_EOF,
75 GUMBO_ERR_DOCTYPE_INVALID,
76 GUMBO_ERR_DOCTYPE_SPACE,
77 GUMBO_ERR_DOCTYPE_RIGHT_BRACKET,
78 GUMBO_ERR_DOCTYPE_SPACE_OR_RIGHT_BRACKET,
79 GUMBO_ERR_DOCTYPE_END,
80 GUMBO_ERR_PARSER,
81 GUMBO_ERR_UNACKNOWLEDGED_SELF_CLOSING_TAG,
82 } GumboErrorType;
83
84 // Additional data for duplicated attributes.
85 typedef struct GumboInternalDuplicateAttrError {
86 // The name of the attribute. Owned by this struct.
87 const char* name;
88
89 // The (0-based) index within the attributes vector of the original
90 // occurrence.
91 unsigned int original_index;
92
93 // The (0-based) index where the new occurrence would be.
94 unsigned int new_index;
95 } GumboDuplicateAttrError;
96
97 // A simplified representation of the tokenizer state, designed to be more
98 // useful to clients of this library than the internal representation. This
99 // condenses the actual states used in the tokenizer state machine into a few
100 // values that will be familiar to users of HTML.
101 typedef enum {
102 GUMBO_ERR_TOKENIZER_DATA,
103 GUMBO_ERR_TOKENIZER_CHAR_REF,
104 GUMBO_ERR_TOKENIZER_RCDATA,
105 GUMBO_ERR_TOKENIZER_RAWTEXT,
106 GUMBO_ERR_TOKENIZER_PLAINTEXT,
107 GUMBO_ERR_TOKENIZER_SCRIPT,
108 GUMBO_ERR_TOKENIZER_TAG,
109 GUMBO_ERR_TOKENIZER_SELF_CLOSING_TAG,
110 GUMBO_ERR_TOKENIZER_ATTR_NAME,
111 GUMBO_ERR_TOKENIZER_ATTR_VALUE,
112 GUMBO_ERR_TOKENIZER_MARKUP_DECLARATION,
113 GUMBO_ERR_TOKENIZER_COMMENT,
114 GUMBO_ERR_TOKENIZER_DOCTYPE,
115 GUMBO_ERR_TOKENIZER_CDATA,
116 } GumboTokenizerErrorState;
117
118 // Additional data for tokenizer errors.
119 // This records the current state and codepoint encountered - this is usually
120 // enough to reconstruct what went wrong and provide a friendly error message.
121 typedef struct GumboInternalTokenizerError {
122 // The bad codepoint encountered.
123 int codepoint;
124
125 // The state that the tokenizer was in at the time.
126 GumboTokenizerErrorState state;
127 } GumboTokenizerError;
128
129 // Additional data for parse errors.
130 typedef struct GumboInternalParserError {
131 // The type of input token that resulted in this error.
132 GumboTokenType input_type;
133
134 // The HTML tag of the input token. TAG_UNKNOWN if this was not a tag token.
135 GumboTag input_tag;
136
137 // The insertion mode that the parser was in at the time.
138 GumboInsertionMode parser_state;
139
140 // The tag stack at the point of the error. Note that this is an GumboVector
141 // of GumboTag's *stored by value* - cast the void* to an GumboTag directly to
142 // get at the tag.
143 GumboVector /* GumboTag */ tag_stack;
144 } GumboParserError;
145
146 // The overall error struct representing an error in decoding/tokenizing/parsing
147 // the HTML. This contains an enumerated type flag, a source position, and then
148 // a union of fields containing data specific to the error.
149 typedef struct GumboInternalError {
150 // The type of error.
151 GumboErrorType type;
152
153 // The position within the source file where the error occurred.
154 GumboSourcePosition position;
155
156 // A pointer to the byte within the original source file text where the error
157 // occurred (note that this is not the same as position.offset, as that gives
158 // character-based instead of byte-based offsets).
159 const char* original_text;
160
161 // Type-specific error information.
162 union {
163 // The code point we encountered, for:
164 // * GUMBO_ERR_UTF8_INVALID
165 // * GUMBO_ERR_UTF8_TRUNCATED
166 // * GUMBO_ERR_NUMERIC_CHAR_REF_WITHOUT_SEMICOLON
167 // * GUMBO_ERR_NUMERIC_CHAR_REF_INVALID
168 uint64_t codepoint;
169
170 // Tokenizer errors.
171 GumboTokenizerError tokenizer;
172
173 // Short textual data, for:
174 // * GUMBO_ERR_NAMED_CHAR_REF_WITHOUT_SEMICOLON
175 // * GUMBO_ERR_NAMED_CHAR_REF_INVALID
176 GumboStringPiece text;
177
178 // Duplicate attribute data, for GUMBO_ERR_DUPLICATE_ATTR.
179 GumboDuplicateAttrError duplicate_attr;
180
181 // Parser state, for GUMBO_ERR_PARSER and
182 // GUMBO_ERR_UNACKNOWLEDGE_SELF_CLOSING_TAG.
183 struct GumboInternalParserError parser;
184 } v;
185 } GumboError;
186
187 // Adds a new error to the parser's error list, and returns a pointer to it so
188 // that clients can fill out the rest of its fields. May return NULL if we're
189 // already over the max_errors field specified in GumboOptions.
190 GumboError* gumbo_add_error(struct GumboInternalParser* parser);
191
192 // Initializes the errors vector in the parser.
193 void gumbo_init_errors(struct GumboInternalParser* errors);
194
195 // Frees all the errors in the 'errors_' field of the parser.
196 void gumbo_destroy_errors(struct GumboInternalParser* errors);
197
198 // Frees the memory used for a single GumboError.
199 void gumbo_error_destroy(struct GumboInternalParser* parser, GumboError* error);
200
201 // Prints an error to a string. This fills an empty GumboStringBuffer with a
202 // freshly-allocated buffer containing the error message text. The caller is
203 // responsible for deleting the buffer. (Note that the buffer is allocated with
204 // the allocator specified in the GumboParser config and hence should be freed
205 // by gumbo_parser_deallocate().)
206 void gumbo_error_to_string(struct GumboInternalParser* parser,
207 const GumboError* error, GumboStringBuffer* output);
208
209 // Prints a caret diagnostic to a string. This fills an empty GumboStringBuffer
210 // with a freshly-allocated buffer containing the error message text. The
211 // caller is responsible for deleting the buffer. (Note that the buffer is
212 // allocated with the allocator specified in the GumboParser config and hence
213 // should be freed by gumbo_parser_deallocate().)
214 void gumbo_caret_diagnostic_to_string(struct GumboInternalParser* parser,
215 const GumboError* error, const char* source_text,
216 GumboStringBuffer* output);
217
218 // Like gumbo_caret_diagnostic_to_string, but prints the text to stdout instead
219 // of writing to a string.
220 void gumbo_print_caret_diagnostic(struct GumboInternalParser* parser,
221 const GumboError* error, const char* source_text);
222
223 #ifdef __cplusplus
224 }
225 #endif
226
227 #endif // GUMBO_ERROR_H_