Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/gumbo-parser/src/error.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 // Copyright 2010 Google Inc. All Rights Reserved. | |
| 2 // | |
| 3 // Licensed under the Apache License, Version 2.0 (the "License"); | |
| 4 // you may not use this file except in compliance with the License. | |
| 5 // You may obtain a copy of the License at | |
| 6 // | |
| 7 // http://www.apache.org/licenses/LICENSE-2.0 | |
| 8 // | |
| 9 // Unless required by applicable law or agreed to in writing, software | |
| 10 // distributed under the License is distributed on an "AS IS" BASIS, | |
| 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 12 // See the License for the specific language governing permissions and | |
| 13 // limitations under the License. | |
| 14 // | |
| 15 // Author: jdtang@google.com (Jonathan Tang) | |
| 16 | |
| 17 #include "error.h" | |
| 18 | |
| 19 #include <assert.h> | |
| 20 #include <stdarg.h> | |
| 21 #include <stdio.h> | |
| 22 #include <string.h> | |
| 23 | |
| 24 #include "gumbo.h" | |
| 25 #include "parser.h" | |
| 26 #include "string_buffer.h" | |
| 27 #include "util.h" | |
| 28 #include "vector.h" | |
| 29 | |
| 30 // Prints a formatted message to a StringBuffer. This automatically resizes the | |
| 31 // StringBuffer as necessary to fit the message. Returns the number of bytes | |
| 32 // written. | |
| 33 static int print_message( | |
| 34 GumboParser* parser, GumboStringBuffer* output, const char* format, ...) { | |
| 35 va_list args; | |
| 36 size_t remaining_capacity = output->capacity - output->length; | |
| 37 va_start(args, format); | |
| 38 int bytes_written = vsnprintf( | |
| 39 output->data + output->length, remaining_capacity, format, args); | |
| 40 va_end(args); | |
| 41 #ifdef _MSC_VER | |
| 42 if (bytes_written == -1) { | |
| 43 // vsnprintf returns -1 on MSVC++ if there's not enough capacity, instead of | |
| 44 // returning the number of bytes that would've been written had there been | |
| 45 // enough. In this case, we'll double the buffer size and hope it fits when | |
| 46 // we retry (letting it fail and returning 0 if it doesn't), since there's | |
| 47 // no way to smartly resize the buffer. | |
| 48 gumbo_string_buffer_reserve(parser, output->capacity * 2, output); | |
| 49 va_start(args, format); | |
| 50 int result = vsnprintf( | |
| 51 output->data + output->length, remaining_capacity, format, args); | |
| 52 va_end(args); | |
| 53 return result == -1 ? 0 : result; | |
| 54 } | |
| 55 #else | |
| 56 // -1 in standard C99 indicates an encoding error. Return 0 and do nothing. | |
| 57 if (bytes_written == -1) { | |
| 58 return 0; | |
| 59 } | |
| 60 #endif | |
| 61 | |
| 62 if (bytes_written > remaining_capacity) { | |
| 63 gumbo_string_buffer_reserve( | |
| 64 parser, output->capacity + bytes_written, output); | |
| 65 remaining_capacity = output->capacity - output->length; | |
| 66 va_start(args, format); | |
| 67 bytes_written = vsnprintf( | |
| 68 output->data + output->length, remaining_capacity, format, args); | |
| 69 va_end(args); | |
| 70 } | |
| 71 output->length += bytes_written; | |
| 72 return bytes_written; | |
| 73 } | |
| 74 | |
| 75 static void print_tag_stack(GumboParser* parser, const GumboParserError* error, | |
| 76 GumboStringBuffer* output) { | |
| 77 print_message(parser, output, " Currently open tags: "); | |
| 78 for (unsigned int i = 0; i < error->tag_stack.length; ++i) { | |
| 79 if (i) { | |
| 80 print_message(parser, output, ", "); | |
| 81 } | |
| 82 GumboTag tag = (GumboTag) (intptr_t) error->tag_stack.data[i]; | |
| 83 print_message(parser, output, gumbo_normalized_tagname(tag)); | |
| 84 } | |
| 85 gumbo_string_buffer_append_codepoint(parser, '.', output); | |
| 86 } | |
| 87 | |
| 88 static void handle_parser_error(GumboParser* parser, | |
| 89 const GumboParserError* error, GumboStringBuffer* output) { | |
| 90 if (error->parser_state == GUMBO_INSERTION_MODE_INITIAL && | |
| 91 error->input_type != GUMBO_TOKEN_DOCTYPE) { | |
| 92 print_message( | |
| 93 parser, output, "The doctype must be the first token in the document"); | |
| 94 return; | |
| 95 } | |
| 96 | |
| 97 switch (error->input_type) { | |
| 98 case GUMBO_TOKEN_DOCTYPE: | |
| 99 print_message(parser, output, "This is not a legal doctype"); | |
| 100 return; | |
| 101 case GUMBO_TOKEN_COMMENT: | |
| 102 // Should never happen; comments are always legal. | |
| 103 assert(0); | |
| 104 // But just in case... | |
| 105 print_message(parser, output, "Comments aren't legal here"); | |
| 106 return; | |
| 107 case GUMBO_TOKEN_CDATA: | |
| 108 case GUMBO_TOKEN_WHITESPACE: | |
| 109 case GUMBO_TOKEN_CHARACTER: | |
| 110 print_message(parser, output, "Character tokens aren't legal here"); | |
| 111 return; | |
| 112 case GUMBO_TOKEN_NULL: | |
| 113 print_message(parser, output, "Null bytes are not allowed in HTML5"); | |
| 114 return; | |
| 115 case GUMBO_TOKEN_EOF: | |
| 116 if (error->parser_state == GUMBO_INSERTION_MODE_INITIAL) { | |
| 117 print_message(parser, output, "You must provide a doctype"); | |
| 118 } else { | |
| 119 print_message(parser, output, "Premature end of file"); | |
| 120 print_tag_stack(parser, error, output); | |
| 121 } | |
| 122 return; | |
| 123 case GUMBO_TOKEN_START_TAG: | |
| 124 case GUMBO_TOKEN_END_TAG: | |
| 125 print_message(parser, output, "That tag isn't allowed here"); | |
| 126 print_tag_stack(parser, error, output); | |
| 127 // TODO(jdtang): Give more specific messaging. | |
| 128 return; | |
| 129 } | |
| 130 } | |
| 131 | |
| 132 // Finds the preceding newline in an original source buffer from a given byte | |
| 133 // location. Returns a character pointer to the character after that, or a | |
| 134 // pointer to the beginning of the string if this is the first line. | |
| 135 static const char* find_last_newline( | |
| 136 const char* original_text, const char* error_location) { | |
| 137 assert(error_location >= original_text); | |
| 138 const char* c = error_location; | |
| 139 for (; c != original_text && *c != '\n'; --c) { | |
| 140 // There may be an error at EOF, which would be a nul byte. | |
| 141 assert(*c || c == error_location); | |
| 142 } | |
| 143 return c == original_text ? c : c + 1; | |
| 144 } | |
| 145 | |
| 146 // Finds the next newline in the original source buffer from a given byte | |
| 147 // location. Returns a character pointer to that newline, or a pointer to the | |
| 148 // terminating null byte if this is the last line. | |
| 149 static const char* find_next_newline( | |
| 150 const char* original_text, const char* error_location) { | |
| 151 const char* c = error_location; | |
| 152 for (; *c && *c != '\n'; ++c) | |
| 153 ; | |
| 154 return c; | |
| 155 } | |
| 156 | |
| 157 GumboError* gumbo_add_error(GumboParser* parser) { | |
| 158 int max_errors = parser->_options->max_errors; | |
| 159 if (max_errors >= 0 && parser->_output->errors.length >= (unsigned int) max_errors) { | |
| 160 return NULL; | |
| 161 } | |
| 162 GumboError* error = gumbo_parser_allocate(parser, sizeof(GumboError)); | |
| 163 gumbo_vector_add(parser, error, &parser->_output->errors); | |
| 164 return error; | |
| 165 } | |
| 166 | |
| 167 void gumbo_error_to_string( | |
| 168 GumboParser* parser, const GumboError* error, GumboStringBuffer* output) { | |
| 169 print_message( | |
| 170 parser, output, "@%d:%d: ", error->position.line, error->position.column); | |
| 171 switch (error->type) { | |
| 172 case GUMBO_ERR_UTF8_INVALID: | |
| 173 print_message( | |
| 174 parser, output, "Invalid UTF8 character 0x%x", error->v.codepoint); | |
| 175 break; | |
| 176 case GUMBO_ERR_UTF8_TRUNCATED: | |
| 177 print_message(parser, output, | |
| 178 "Input stream ends with a truncated UTF8 character 0x%x", | |
| 179 error->v.codepoint); | |
| 180 break; | |
| 181 case GUMBO_ERR_NUMERIC_CHAR_REF_NO_DIGITS: | |
| 182 print_message( | |
| 183 parser, output, "No digits after &# in numeric character reference"); | |
| 184 break; | |
| 185 case GUMBO_ERR_NUMERIC_CHAR_REF_WITHOUT_SEMICOLON: | |
| 186 print_message(parser, output, | |
| 187 "The numeric character reference &#%d should be followed " | |
| 188 "by a semicolon", | |
| 189 error->v.codepoint); | |
| 190 break; | |
| 191 case GUMBO_ERR_NUMERIC_CHAR_REF_INVALID: | |
| 192 print_message(parser, output, | |
| 193 "The numeric character reference &#%d; encodes an invalid " | |
| 194 "unicode codepoint", | |
| 195 error->v.codepoint); | |
| 196 break; | |
| 197 case GUMBO_ERR_NAMED_CHAR_REF_WITHOUT_SEMICOLON: | |
| 198 // The textual data came from one of the literal strings in the table, and | |
| 199 // so it'll be null-terminated. | |
| 200 print_message(parser, output, | |
| 201 "The named character reference &%.*s should be followed by a " | |
| 202 "semicolon", | |
| 203 (int) error->v.text.length, error->v.text.data); | |
| 204 break; | |
| 205 case GUMBO_ERR_NAMED_CHAR_REF_INVALID: | |
| 206 print_message(parser, output, | |
| 207 "The named character reference &%.*s; is not a valid entity name", | |
| 208 (int) error->v.text.length, error->v.text.data); | |
| 209 break; | |
| 210 case GUMBO_ERR_DUPLICATE_ATTR: | |
| 211 print_message(parser, output, | |
| 212 "Attribute %s occurs multiple times, at positions %d and %d", | |
| 213 error->v.duplicate_attr.name, error->v.duplicate_attr.original_index, | |
| 214 error->v.duplicate_attr.new_index); | |
| 215 break; | |
| 216 case GUMBO_ERR_PARSER: | |
| 217 case GUMBO_ERR_UNACKNOWLEDGED_SELF_CLOSING_TAG: | |
| 218 handle_parser_error(parser, &error->v.parser, output); | |
| 219 break; | |
| 220 default: | |
| 221 print_message(parser, output, | |
| 222 "Tokenizer error with an unimplemented error message"); | |
| 223 break; | |
| 224 } | |
| 225 gumbo_string_buffer_append_codepoint(parser, '.', output); | |
| 226 } | |
| 227 | |
| 228 void gumbo_caret_diagnostic_to_string(GumboParser* parser, | |
| 229 const GumboError* error, const char* source_text, | |
| 230 GumboStringBuffer* output) { | |
| 231 gumbo_error_to_string(parser, error, output); | |
| 232 | |
| 233 const char* line_start = find_last_newline(source_text, error->original_text); | |
| 234 const char* line_end = find_next_newline(source_text, error->original_text); | |
| 235 GumboStringPiece original_line; | |
| 236 original_line.data = line_start; | |
| 237 original_line.length = line_end - line_start; | |
| 238 | |
| 239 gumbo_string_buffer_append_codepoint(parser, '\n', output); | |
| 240 gumbo_string_buffer_append_string(parser, &original_line, output); | |
| 241 gumbo_string_buffer_append_codepoint(parser, '\n', output); | |
| 242 gumbo_string_buffer_reserve( | |
| 243 parser, output->length + error->position.column, output); | |
| 244 int num_spaces = error->position.column - 1; | |
| 245 memset(output->data + output->length, ' ', num_spaces); | |
| 246 output->length += num_spaces; | |
| 247 gumbo_string_buffer_append_codepoint(parser, '^', output); | |
| 248 gumbo_string_buffer_append_codepoint(parser, '\n', output); | |
| 249 } | |
| 250 | |
| 251 void gumbo_print_caret_diagnostic( | |
| 252 GumboParser* parser, const GumboError* error, const char* source_text) { | |
| 253 GumboStringBuffer text; | |
| 254 gumbo_string_buffer_init(parser, &text); | |
| 255 gumbo_caret_diagnostic_to_string(parser, error, source_text, &text); | |
| 256 printf("%.*s", (int) text.length, text.data); | |
| 257 gumbo_string_buffer_destroy(parser, &text); | |
| 258 } | |
| 259 | |
| 260 void gumbo_error_destroy(GumboParser* parser, GumboError* error) { | |
| 261 if (error->type == GUMBO_ERR_PARSER || | |
| 262 error->type == GUMBO_ERR_UNACKNOWLEDGED_SELF_CLOSING_TAG) { | |
| 263 gumbo_vector_destroy(parser, &error->v.parser.tag_stack); | |
| 264 } else if (error->type == GUMBO_ERR_DUPLICATE_ATTR) { | |
| 265 gumbo_parser_deallocate(parser, (void*) error->v.duplicate_attr.name); | |
| 266 } | |
| 267 gumbo_parser_deallocate(parser, error); | |
| 268 } | |
| 269 | |
| 270 void gumbo_init_errors(GumboParser* parser) { | |
| 271 gumbo_vector_init(parser, 5, &parser->_output->errors); | |
| 272 } | |
| 273 | |
| 274 void gumbo_destroy_errors(GumboParser* parser) { | |
| 275 for (unsigned int i = 0; i < parser->_output->errors.length; ++i) { | |
| 276 gumbo_error_destroy(parser, parser->_output->errors.data[i]); | |
| 277 } | |
| 278 gumbo_vector_destroy(parser, &parser->_output->errors); | |
| 279 } |
