Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/gumbo-parser/src/error.h @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 // Copyright 2010 Google Inc. All Rights Reserved. | |
| 2 // | |
| 3 // Licensed under the Apache License, Version 2.0 (the "License"); | |
| 4 // you may not use this file except in compliance with the License. | |
| 5 // You may obtain a copy of the License at | |
| 6 // | |
| 7 // http://www.apache.org/licenses/LICENSE-2.0 | |
| 8 // | |
| 9 // Unless required by applicable law or agreed to in writing, software | |
| 10 // distributed under the License is distributed on an "AS IS" BASIS, | |
| 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 12 // See the License for the specific language governing permissions and | |
| 13 // limitations under the License. | |
| 14 // | |
| 15 // Author: jdtang@google.com (Jonathan Tang) | |
| 16 // | |
| 17 // Error types, enums, and handling functions. | |
| 18 | |
| 19 #ifndef GUMBO_ERROR_H_ | |
| 20 #define GUMBO_ERROR_H_ | |
| 21 #ifdef _MSC_VER | |
| 22 #ifndef _CRT_SECURE_NO_WARNINGS | |
| 23 #define _CRT_SECURE_NO_WARNINGS | |
| 24 #endif | |
| 25 #endif | |
| 26 #include <stdint.h> | |
| 27 | |
| 28 #include "gumbo.h" | |
| 29 #include "insertion_mode.h" | |
| 30 #include "string_buffer.h" | |
| 31 #include "token_type.h" | |
| 32 | |
| 33 #ifdef __cplusplus | |
| 34 extern "C" { | |
| 35 #endif | |
| 36 | |
| 37 struct GumboInternalParser; | |
| 38 | |
| 39 typedef enum { | |
| 40 GUMBO_ERR_UTF8_INVALID, | |
| 41 GUMBO_ERR_UTF8_TRUNCATED, | |
| 42 GUMBO_ERR_UTF8_NULL, | |
| 43 GUMBO_ERR_NUMERIC_CHAR_REF_NO_DIGITS, | |
| 44 GUMBO_ERR_NUMERIC_CHAR_REF_WITHOUT_SEMICOLON, | |
| 45 GUMBO_ERR_NUMERIC_CHAR_REF_INVALID, | |
| 46 GUMBO_ERR_NAMED_CHAR_REF_WITHOUT_SEMICOLON, | |
| 47 GUMBO_ERR_NAMED_CHAR_REF_INVALID, | |
| 48 GUMBO_ERR_TAG_STARTS_WITH_QUESTION, | |
| 49 GUMBO_ERR_TAG_EOF, | |
| 50 GUMBO_ERR_TAG_INVALID, | |
| 51 GUMBO_ERR_CLOSE_TAG_EMPTY, | |
| 52 GUMBO_ERR_CLOSE_TAG_EOF, | |
| 53 GUMBO_ERR_CLOSE_TAG_INVALID, | |
| 54 GUMBO_ERR_SCRIPT_EOF, | |
| 55 GUMBO_ERR_ATTR_NAME_EOF, | |
| 56 GUMBO_ERR_ATTR_NAME_INVALID, | |
| 57 GUMBO_ERR_ATTR_DOUBLE_QUOTE_EOF, | |
| 58 GUMBO_ERR_ATTR_SINGLE_QUOTE_EOF, | |
| 59 GUMBO_ERR_ATTR_UNQUOTED_EOF, | |
| 60 GUMBO_ERR_ATTR_UNQUOTED_RIGHT_BRACKET, | |
| 61 GUMBO_ERR_ATTR_UNQUOTED_EQUALS, | |
| 62 GUMBO_ERR_ATTR_AFTER_EOF, | |
| 63 GUMBO_ERR_ATTR_AFTER_INVALID, | |
| 64 GUMBO_ERR_DUPLICATE_ATTR, | |
| 65 GUMBO_ERR_SOLIDUS_EOF, | |
| 66 GUMBO_ERR_SOLIDUS_INVALID, | |
| 67 GUMBO_ERR_DASHES_OR_DOCTYPE, | |
| 68 GUMBO_ERR_COMMENT_EOF, | |
| 69 GUMBO_ERR_COMMENT_INVALID, | |
| 70 GUMBO_ERR_COMMENT_BANG_AFTER_DOUBLE_DASH, | |
| 71 GUMBO_ERR_COMMENT_DASH_AFTER_DOUBLE_DASH, | |
| 72 GUMBO_ERR_COMMENT_SPACE_AFTER_DOUBLE_DASH, | |
| 73 GUMBO_ERR_COMMENT_END_BANG_EOF, | |
| 74 GUMBO_ERR_DOCTYPE_EOF, | |
| 75 GUMBO_ERR_DOCTYPE_INVALID, | |
| 76 GUMBO_ERR_DOCTYPE_SPACE, | |
| 77 GUMBO_ERR_DOCTYPE_RIGHT_BRACKET, | |
| 78 GUMBO_ERR_DOCTYPE_SPACE_OR_RIGHT_BRACKET, | |
| 79 GUMBO_ERR_DOCTYPE_END, | |
| 80 GUMBO_ERR_PARSER, | |
| 81 GUMBO_ERR_UNACKNOWLEDGED_SELF_CLOSING_TAG, | |
| 82 } GumboErrorType; | |
| 83 | |
| 84 // Additional data for duplicated attributes. | |
| 85 typedef struct GumboInternalDuplicateAttrError { | |
| 86 // The name of the attribute. Owned by this struct. | |
| 87 const char* name; | |
| 88 | |
| 89 // The (0-based) index within the attributes vector of the original | |
| 90 // occurrence. | |
| 91 unsigned int original_index; | |
| 92 | |
| 93 // The (0-based) index where the new occurrence would be. | |
| 94 unsigned int new_index; | |
| 95 } GumboDuplicateAttrError; | |
| 96 | |
| 97 // A simplified representation of the tokenizer state, designed to be more | |
| 98 // useful to clients of this library than the internal representation. This | |
| 99 // condenses the actual states used in the tokenizer state machine into a few | |
| 100 // values that will be familiar to users of HTML. | |
| 101 typedef enum { | |
| 102 GUMBO_ERR_TOKENIZER_DATA, | |
| 103 GUMBO_ERR_TOKENIZER_CHAR_REF, | |
| 104 GUMBO_ERR_TOKENIZER_RCDATA, | |
| 105 GUMBO_ERR_TOKENIZER_RAWTEXT, | |
| 106 GUMBO_ERR_TOKENIZER_PLAINTEXT, | |
| 107 GUMBO_ERR_TOKENIZER_SCRIPT, | |
| 108 GUMBO_ERR_TOKENIZER_TAG, | |
| 109 GUMBO_ERR_TOKENIZER_SELF_CLOSING_TAG, | |
| 110 GUMBO_ERR_TOKENIZER_ATTR_NAME, | |
| 111 GUMBO_ERR_TOKENIZER_ATTR_VALUE, | |
| 112 GUMBO_ERR_TOKENIZER_MARKUP_DECLARATION, | |
| 113 GUMBO_ERR_TOKENIZER_COMMENT, | |
| 114 GUMBO_ERR_TOKENIZER_DOCTYPE, | |
| 115 GUMBO_ERR_TOKENIZER_CDATA, | |
| 116 } GumboTokenizerErrorState; | |
| 117 | |
| 118 // Additional data for tokenizer errors. | |
| 119 // This records the current state and codepoint encountered - this is usually | |
| 120 // enough to reconstruct what went wrong and provide a friendly error message. | |
| 121 typedef struct GumboInternalTokenizerError { | |
| 122 // The bad codepoint encountered. | |
| 123 int codepoint; | |
| 124 | |
| 125 // The state that the tokenizer was in at the time. | |
| 126 GumboTokenizerErrorState state; | |
| 127 } GumboTokenizerError; | |
| 128 | |
| 129 // Additional data for parse errors. | |
| 130 typedef struct GumboInternalParserError { | |
| 131 // The type of input token that resulted in this error. | |
| 132 GumboTokenType input_type; | |
| 133 | |
| 134 // The HTML tag of the input token. TAG_UNKNOWN if this was not a tag token. | |
| 135 GumboTag input_tag; | |
| 136 | |
| 137 // The insertion mode that the parser was in at the time. | |
| 138 GumboInsertionMode parser_state; | |
| 139 | |
| 140 // The tag stack at the point of the error. Note that this is an GumboVector | |
| 141 // of GumboTag's *stored by value* - cast the void* to an GumboTag directly to | |
| 142 // get at the tag. | |
| 143 GumboVector /* GumboTag */ tag_stack; | |
| 144 } GumboParserError; | |
| 145 | |
| 146 // The overall error struct representing an error in decoding/tokenizing/parsing | |
| 147 // the HTML. This contains an enumerated type flag, a source position, and then | |
| 148 // a union of fields containing data specific to the error. | |
| 149 typedef struct GumboInternalError { | |
| 150 // The type of error. | |
| 151 GumboErrorType type; | |
| 152 | |
| 153 // The position within the source file where the error occurred. | |
| 154 GumboSourcePosition position; | |
| 155 | |
| 156 // A pointer to the byte within the original source file text where the error | |
| 157 // occurred (note that this is not the same as position.offset, as that gives | |
| 158 // character-based instead of byte-based offsets). | |
| 159 const char* original_text; | |
| 160 | |
| 161 // Type-specific error information. | |
| 162 union { | |
| 163 // The code point we encountered, for: | |
| 164 // * GUMBO_ERR_UTF8_INVALID | |
| 165 // * GUMBO_ERR_UTF8_TRUNCATED | |
| 166 // * GUMBO_ERR_NUMERIC_CHAR_REF_WITHOUT_SEMICOLON | |
| 167 // * GUMBO_ERR_NUMERIC_CHAR_REF_INVALID | |
| 168 uint64_t codepoint; | |
| 169 | |
| 170 // Tokenizer errors. | |
| 171 GumboTokenizerError tokenizer; | |
| 172 | |
| 173 // Short textual data, for: | |
| 174 // * GUMBO_ERR_NAMED_CHAR_REF_WITHOUT_SEMICOLON | |
| 175 // * GUMBO_ERR_NAMED_CHAR_REF_INVALID | |
| 176 GumboStringPiece text; | |
| 177 | |
| 178 // Duplicate attribute data, for GUMBO_ERR_DUPLICATE_ATTR. | |
| 179 GumboDuplicateAttrError duplicate_attr; | |
| 180 | |
| 181 // Parser state, for GUMBO_ERR_PARSER and | |
| 182 // GUMBO_ERR_UNACKNOWLEDGE_SELF_CLOSING_TAG. | |
| 183 struct GumboInternalParserError parser; | |
| 184 } v; | |
| 185 } GumboError; | |
| 186 | |
| 187 // Adds a new error to the parser's error list, and returns a pointer to it so | |
| 188 // that clients can fill out the rest of its fields. May return NULL if we're | |
| 189 // already over the max_errors field specified in GumboOptions. | |
| 190 GumboError* gumbo_add_error(struct GumboInternalParser* parser); | |
| 191 | |
| 192 // Initializes the errors vector in the parser. | |
| 193 void gumbo_init_errors(struct GumboInternalParser* errors); | |
| 194 | |
| 195 // Frees all the errors in the 'errors_' field of the parser. | |
| 196 void gumbo_destroy_errors(struct GumboInternalParser* errors); | |
| 197 | |
| 198 // Frees the memory used for a single GumboError. | |
| 199 void gumbo_error_destroy(struct GumboInternalParser* parser, GumboError* error); | |
| 200 | |
| 201 // Prints an error to a string. This fills an empty GumboStringBuffer with a | |
| 202 // freshly-allocated buffer containing the error message text. The caller is | |
| 203 // responsible for deleting the buffer. (Note that the buffer is allocated with | |
| 204 // the allocator specified in the GumboParser config and hence should be freed | |
| 205 // by gumbo_parser_deallocate().) | |
| 206 void gumbo_error_to_string(struct GumboInternalParser* parser, | |
| 207 const GumboError* error, GumboStringBuffer* output); | |
| 208 | |
| 209 // Prints a caret diagnostic to a string. This fills an empty GumboStringBuffer | |
| 210 // with a freshly-allocated buffer containing the error message text. The | |
| 211 // caller is responsible for deleting the buffer. (Note that the buffer is | |
| 212 // allocated with the allocator specified in the GumboParser config and hence | |
| 213 // should be freed by gumbo_parser_deallocate().) | |
| 214 void gumbo_caret_diagnostic_to_string(struct GumboInternalParser* parser, | |
| 215 const GumboError* error, const char* source_text, | |
| 216 GumboStringBuffer* output); | |
| 217 | |
| 218 // Like gumbo_caret_diagnostic_to_string, but prints the text to stdout instead | |
| 219 // of writing to a string. | |
| 220 void gumbo_print_caret_diagnostic(struct GumboInternalParser* parser, | |
| 221 const GumboError* error, const char* source_text); | |
| 222 | |
| 223 #ifdef __cplusplus | |
| 224 } | |
| 225 #endif | |
| 226 | |
| 227 #endif // GUMBO_ERROR_H_ |
