Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/curl/docs/examples/htmltitle.cpp @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /*************************************************************************** | |
| 2 * _ _ ____ _ | |
| 3 * Project ___| | | | _ \| | | |
| 4 * / __| | | | |_) | | | |
| 5 * | (__| |_| | _ <| |___ | |
| 6 * \___|\___/|_| \_\_____| | |
| 7 * | |
| 8 * Copyright (C) 1998 - 2017, Daniel Stenberg, <daniel@haxx.se>, et al. | |
| 9 * | |
| 10 * This software is licensed as described in the file COPYING, which | |
| 11 * you should have received as part of this distribution. The terms | |
| 12 * are also available at https://curl.haxx.se/docs/copyright.html. | |
| 13 * | |
| 14 * You may opt to use, copy, modify, merge, publish, distribute and/or sell | |
| 15 * copies of the Software, and permit persons to whom the Software is | |
| 16 * furnished to do so, under the terms of the COPYING file. | |
| 17 * | |
| 18 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY | |
| 19 * KIND, either express or implied. | |
| 20 * | |
| 21 ***************************************************************************/ | |
| 22 /* <DESC> | |
| 23 * Get a web page, extract the title with libxml. | |
| 24 * </DESC> | |
| 25 | |
| 26 Written by Lars Nilsson | |
| 27 | |
| 28 GNU C++ compile command line suggestion (edit paths accordingly): | |
| 29 | |
| 30 g++ -Wall -I/opt/curl/include -I/opt/libxml/include/libxml2 htmltitle.cpp \ | |
| 31 -o htmltitle -L/opt/curl/lib -L/opt/libxml/lib -lcurl -lxml2 | |
| 32 */ | |
| 33 #include <stdio.h> | |
| 34 #include <string.h> | |
| 35 #include <stdlib.h> | |
| 36 #include <string> | |
| 37 #include <curl/curl.h> | |
| 38 #include <libxml/HTMLparser.h> | |
| 39 | |
| 40 // | |
| 41 // Case-insensitive string comparison | |
| 42 // | |
| 43 | |
| 44 #ifdef _MSC_VER | |
| 45 #define COMPARE(a, b) (!_stricmp((a), (b))) | |
| 46 #else | |
| 47 #define COMPARE(a, b) (!strcasecmp((a), (b))) | |
| 48 #endif | |
| 49 | |
| 50 // | |
| 51 // libxml callback context structure | |
| 52 // | |
| 53 | |
| 54 struct Context | |
| 55 { | |
| 56 Context(): addTitle(false) { } | |
| 57 | |
| 58 bool addTitle; | |
| 59 std::string title; | |
| 60 }; | |
| 61 | |
| 62 // | |
| 63 // libcurl variables for error strings and returned data | |
| 64 | |
| 65 static char errorBuffer[CURL_ERROR_SIZE]; | |
| 66 static std::string buffer; | |
| 67 | |
| 68 // | |
| 69 // libcurl write callback function | |
| 70 // | |
| 71 | |
| 72 static int writer(char *data, size_t size, size_t nmemb, | |
| 73 std::string *writerData) | |
| 74 { | |
| 75 if(writerData == NULL) | |
| 76 return 0; | |
| 77 | |
| 78 writerData->append(data, size*nmemb); | |
| 79 | |
| 80 return size * nmemb; | |
| 81 } | |
| 82 | |
| 83 // | |
| 84 // libcurl connection initialization | |
| 85 // | |
| 86 | |
| 87 static bool init(CURL *&conn, char *url) | |
| 88 { | |
| 89 CURLcode code; | |
| 90 | |
| 91 conn = curl_easy_init(); | |
| 92 | |
| 93 if(conn == NULL) { | |
| 94 fprintf(stderr, "Failed to create CURL connection\n"); | |
| 95 exit(EXIT_FAILURE); | |
| 96 } | |
| 97 | |
| 98 code = curl_easy_setopt(conn, CURLOPT_ERRORBUFFER, errorBuffer); | |
| 99 if(code != CURLE_OK) { | |
| 100 fprintf(stderr, "Failed to set error buffer [%d]\n", code); | |
| 101 return false; | |
| 102 } | |
| 103 | |
| 104 code = curl_easy_setopt(conn, CURLOPT_URL, url); | |
| 105 if(code != CURLE_OK) { | |
| 106 fprintf(stderr, "Failed to set URL [%s]\n", errorBuffer); | |
| 107 return false; | |
| 108 } | |
| 109 | |
| 110 code = curl_easy_setopt(conn, CURLOPT_FOLLOWLOCATION, 1L); | |
| 111 if(code != CURLE_OK) { | |
| 112 fprintf(stderr, "Failed to set redirect option [%s]\n", errorBuffer); | |
| 113 return false; | |
| 114 } | |
| 115 | |
| 116 code = curl_easy_setopt(conn, CURLOPT_WRITEFUNCTION, writer); | |
| 117 if(code != CURLE_OK) { | |
| 118 fprintf(stderr, "Failed to set writer [%s]\n", errorBuffer); | |
| 119 return false; | |
| 120 } | |
| 121 | |
| 122 code = curl_easy_setopt(conn, CURLOPT_WRITEDATA, &buffer); | |
| 123 if(code != CURLE_OK) { | |
| 124 fprintf(stderr, "Failed to set write data [%s]\n", errorBuffer); | |
| 125 return false; | |
| 126 } | |
| 127 | |
| 128 return true; | |
| 129 } | |
| 130 | |
| 131 // | |
| 132 // libxml start element callback function | |
| 133 // | |
| 134 | |
| 135 static void StartElement(void *voidContext, | |
| 136 const xmlChar *name, | |
| 137 const xmlChar **attributes) | |
| 138 { | |
| 139 Context *context = static_cast<Context *>(voidContext); | |
| 140 | |
| 141 if(COMPARE(reinterpret_cast<char *>(name), "TITLE")) { | |
| 142 context->title = ""; | |
| 143 context->addTitle = true; | |
| 144 } | |
| 145 (void) attributes; | |
| 146 } | |
| 147 | |
| 148 // | |
| 149 // libxml end element callback function | |
| 150 // | |
| 151 | |
| 152 static void EndElement(void *voidContext, | |
| 153 const xmlChar *name) | |
| 154 { | |
| 155 Context *context = static_cast<Context *>(voidContext); | |
| 156 | |
| 157 if(COMPARE(reinterpret_cast<char *>(name), "TITLE")) | |
| 158 context->addTitle = false; | |
| 159 } | |
| 160 | |
| 161 // | |
| 162 // Text handling helper function | |
| 163 // | |
| 164 | |
| 165 static void handleCharacters(Context *context, | |
| 166 const xmlChar *chars, | |
| 167 int length) | |
| 168 { | |
| 169 if(context->addTitle) | |
| 170 context->title.append(reinterpret_cast<char *>(chars), length); | |
| 171 } | |
| 172 | |
| 173 // | |
| 174 // libxml PCDATA callback function | |
| 175 // | |
| 176 | |
| 177 static void Characters(void *voidContext, | |
| 178 const xmlChar *chars, | |
| 179 int length) | |
| 180 { | |
| 181 Context *context = static_cast<Context *>(voidContext); | |
| 182 | |
| 183 handleCharacters(context, chars, length); | |
| 184 } | |
| 185 | |
| 186 // | |
| 187 // libxml CDATA callback function | |
| 188 // | |
| 189 | |
| 190 static void cdata(void *voidContext, | |
| 191 const xmlChar *chars, | |
| 192 int length) | |
| 193 { | |
| 194 Context *context = static_cast<Context *>(voidContext); | |
| 195 | |
| 196 handleCharacters(context, chars, length); | |
| 197 } | |
| 198 | |
| 199 // | |
| 200 // libxml SAX callback structure | |
| 201 // | |
| 202 | |
| 203 static htmlSAXHandler saxHandler = | |
| 204 { | |
| 205 NULL, | |
| 206 NULL, | |
| 207 NULL, | |
| 208 NULL, | |
| 209 NULL, | |
| 210 NULL, | |
| 211 NULL, | |
| 212 NULL, | |
| 213 NULL, | |
| 214 NULL, | |
| 215 NULL, | |
| 216 NULL, | |
| 217 NULL, | |
| 218 NULL, | |
| 219 StartElement, | |
| 220 EndElement, | |
| 221 NULL, | |
| 222 Characters, | |
| 223 NULL, | |
| 224 NULL, | |
| 225 NULL, | |
| 226 NULL, | |
| 227 NULL, | |
| 228 NULL, | |
| 229 NULL, | |
| 230 cdata, | |
| 231 NULL | |
| 232 }; | |
| 233 | |
| 234 // | |
| 235 // Parse given (assumed to be) HTML text and return the title | |
| 236 // | |
| 237 | |
| 238 static void parseHtml(const std::string &html, | |
| 239 std::string &title) | |
| 240 { | |
| 241 htmlParserCtxtPtr ctxt; | |
| 242 Context context; | |
| 243 | |
| 244 ctxt = htmlCreatePushParserCtxt(&saxHandler, &context, "", 0, "", | |
| 245 XML_CHAR_ENCODING_NONE); | |
| 246 | |
| 247 htmlParseChunk(ctxt, html.c_str(), html.size(), 0); | |
| 248 htmlParseChunk(ctxt, "", 0, 1); | |
| 249 | |
| 250 htmlFreeParserCtxt(ctxt); | |
| 251 | |
| 252 title = context.title; | |
| 253 } | |
| 254 | |
| 255 int main(int argc, char *argv[]) | |
| 256 { | |
| 257 CURL *conn = NULL; | |
| 258 CURLcode code; | |
| 259 std::string title; | |
| 260 | |
| 261 // Ensure one argument is given | |
| 262 | |
| 263 if(argc != 2) { | |
| 264 fprintf(stderr, "Usage: %s <url>\n", argv[0]); | |
| 265 exit(EXIT_FAILURE); | |
| 266 } | |
| 267 | |
| 268 curl_global_init(CURL_GLOBAL_DEFAULT); | |
| 269 | |
| 270 // Initialize CURL connection | |
| 271 | |
| 272 if(!init(conn, argv[1])) { | |
| 273 fprintf(stderr, "Connection initializion failed\n"); | |
| 274 exit(EXIT_FAILURE); | |
| 275 } | |
| 276 | |
| 277 // Retrieve content for the URL | |
| 278 | |
| 279 code = curl_easy_perform(conn); | |
| 280 curl_easy_cleanup(conn); | |
| 281 | |
| 282 if(code != CURLE_OK) { | |
| 283 fprintf(stderr, "Failed to get '%s' [%s]\n", argv[1], errorBuffer); | |
| 284 exit(EXIT_FAILURE); | |
| 285 } | |
| 286 | |
| 287 // Parse the (assumed) HTML code | |
| 288 parseHtml(buffer, title); | |
| 289 | |
| 290 // Display the extracted title | |
| 291 printf("Title: %s\n", title.c_str()); | |
| 292 | |
| 293 return EXIT_SUCCESS; | |
| 294 } |
