Mercurial > hgrepos > Python2 > PyMuPDF

diff mupdf-source/thirdparty/curl/docs/examples/htmltitle.cpp @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author: Franz Glasner <fzglas.hg@dom66.de>
date: Mon, 15 Sep 2025 11:43:07 +0200
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mupdf-source/thirdparty/curl/docs/examples/htmltitle.cpp	Mon Sep 15 11:43:07 2025 +0200
@@ -0,0 +1,294 @@
+/***************************************************************************
+ *                                  _   _ ____  _
+ *  Project                     ___| | | |  _ \| |
+ *                             / __| | | | |_) | |
+ *                            | (__| |_| |  _ <| |___
+ *                             \___|\___/|_| \_\_____|
+ *
+ * Copyright (C) 1998 - 2017, Daniel Stenberg, <daniel@haxx.se>, et al.
+ *
+ * This software is licensed as described in the file COPYING, which
+ * you should have received as part of this distribution. The terms
+ * are also available at https://curl.haxx.se/docs/copyright.html.
+ *
+ * You may opt to use, copy, modify, merge, publish, distribute and/or sell
+ * copies of the Software, and permit persons to whom the Software is
+ * furnished to do so, under the terms of the COPYING file.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ***************************************************************************/
+/* <DESC>
+ * Get a web page, extract the title with libxml.
+ * </DESC>
+
+ Written by Lars Nilsson
+
+ GNU C++ compile command line suggestion (edit paths accordingly):
+
+ g++ -Wall -I/opt/curl/include -I/opt/libxml/include/libxml2 htmltitle.cpp \
+ -o htmltitle -L/opt/curl/lib -L/opt/libxml/lib -lcurl -lxml2
+*/
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <string>
+#include <curl/curl.h>
+#include <libxml/HTMLparser.h>
+
+//
+//  Case-insensitive string comparison
+//
+
+#ifdef _MSC_VER
+#define COMPARE(a, b) (!_stricmp((a), (b)))
+#else
+#define COMPARE(a, b) (!strcasecmp((a), (b)))
+#endif
+
+//
+//  libxml callback context structure
+//
+
+struct Context
+{
+  Context(): addTitle(false) { }
+
+  bool addTitle;
+  std::string title;
+};
+
+//
+//  libcurl variables for error strings and returned data
+
+static char errorBuffer[CURL_ERROR_SIZE];
+static std::string buffer;
+
+//
+//  libcurl write callback function
+//
+
+static int writer(char *data, size_t size, size_t nmemb,
+                  std::string *writerData)
+{
+  if(writerData == NULL)
+    return 0;
+
+  writerData->append(data, size*nmemb);
+
+  return size * nmemb;
+}
+
+//
+//  libcurl connection initialization
+//
+
+static bool init(CURL *&conn, char *url)
+{
+  CURLcode code;
+
+  conn = curl_easy_init();
+
+  if(conn == NULL) {
+    fprintf(stderr, "Failed to create CURL connection\n");
+    exit(EXIT_FAILURE);
+  }
+
+  code = curl_easy_setopt(conn, CURLOPT_ERRORBUFFER, errorBuffer);
+  if(code != CURLE_OK) {
+    fprintf(stderr, "Failed to set error buffer [%d]\n", code);
+    return false;
+  }
+
+  code = curl_easy_setopt(conn, CURLOPT_URL, url);
+  if(code != CURLE_OK) {
+    fprintf(stderr, "Failed to set URL [%s]\n", errorBuffer);
+    return false;
+  }
+
+  code = curl_easy_setopt(conn, CURLOPT_FOLLOWLOCATION, 1L);
+  if(code != CURLE_OK) {
+    fprintf(stderr, "Failed to set redirect option [%s]\n", errorBuffer);
+    return false;
+  }
+
+  code = curl_easy_setopt(conn, CURLOPT_WRITEFUNCTION, writer);
+  if(code != CURLE_OK) {
+    fprintf(stderr, "Failed to set writer [%s]\n", errorBuffer);
+    return false;
+  }
+
+  code = curl_easy_setopt(conn, CURLOPT_WRITEDATA, &buffer);
+  if(code != CURLE_OK) {
+    fprintf(stderr, "Failed to set write data [%s]\n", errorBuffer);
+    return false;
+  }
+
+  return true;
+}
+
+//
+//  libxml start element callback function
+//
+
+static void StartElement(void *voidContext,
+                         const xmlChar *name,
+                         const xmlChar **attributes)
+{
+  Context *context = static_cast<Context *>(voidContext);
+
+  if(COMPARE(reinterpret_cast<char *>(name), "TITLE")) {
+    context->title = "";
+    context->addTitle = true;
+  }
+  (void) attributes;
+}
+
+//
+//  libxml end element callback function
+//
+
+static void EndElement(void *voidContext,
+                       const xmlChar *name)
+{
+  Context *context = static_cast<Context *>(voidContext);
+
+  if(COMPARE(reinterpret_cast<char *>(name), "TITLE"))
+    context->addTitle = false;
+}
+
+//
+//  Text handling helper function
+//
+
+static void handleCharacters(Context *context,
+                             const xmlChar *chars,
+                             int length)
+{
+  if(context->addTitle)
+    context->title.append(reinterpret_cast<char *>(chars), length);
+}
+
+//
+//  libxml PCDATA callback function
+//
+
+static void Characters(void *voidContext,
+                       const xmlChar *chars,
+                       int length)
+{
+  Context *context = static_cast<Context *>(voidContext);
+
+  handleCharacters(context, chars, length);
+}
+
+//
+//  libxml CDATA callback function
+//
+
+static void cdata(void *voidContext,
+                  const xmlChar *chars,
+                  int length)
+{
+  Context *context = static_cast<Context *>(voidContext);
+
+  handleCharacters(context, chars, length);
+}
+
+//
+//  libxml SAX callback structure
+//
+
+static htmlSAXHandler saxHandler =
+{
+  NULL,
+  NULL,
+  NULL,
+  NULL,
+  NULL,
+  NULL,
+  NULL,
+  NULL,
+  NULL,
+  NULL,
+  NULL,
+  NULL,
+  NULL,
+  NULL,
+  StartElement,
+  EndElement,
+  NULL,
+  Characters,
+  NULL,
+  NULL,
+  NULL,
+  NULL,
+  NULL,
+  NULL,
+  NULL,
+  cdata,
+  NULL
+};
+
+//
+//  Parse given (assumed to be) HTML text and return the title
+//
+
+static void parseHtml(const std::string &html,
+                      std::string &title)
+{
+  htmlParserCtxtPtr ctxt;
+  Context context;
+
+  ctxt = htmlCreatePushParserCtxt(&saxHandler, &context, "", 0, "",
+                                  XML_CHAR_ENCODING_NONE);
+
+  htmlParseChunk(ctxt, html.c_str(), html.size(), 0);
+  htmlParseChunk(ctxt, "", 0, 1);
+
+  htmlFreeParserCtxt(ctxt);
+
+  title = context.title;
+}
+
+int main(int argc, char *argv[])
+{
+  CURL *conn = NULL;
+  CURLcode code;
+  std::string title;
+
+  // Ensure one argument is given
+
+  if(argc != 2) {
+    fprintf(stderr, "Usage: %s <url>\n", argv[0]);
+    exit(EXIT_FAILURE);
+  }
+
+  curl_global_init(CURL_GLOBAL_DEFAULT);
+
+  // Initialize CURL connection
+
+  if(!init(conn, argv[1])) {
+    fprintf(stderr, "Connection initializion failed\n");
+    exit(EXIT_FAILURE);
+  }
+
+  // Retrieve content for the URL
+
+  code = curl_easy_perform(conn);
+  curl_easy_cleanup(conn);
+
+  if(code != CURLE_OK) {
+    fprintf(stderr, "Failed to get '%s' [%s]\n", argv[1], errorBuffer);
+    exit(EXIT_FAILURE);
+  }
+
+  // Parse the (assumed) HTML code
+  parseHtml(buffer, title);
+
+  // Display the extracted title
+  printf("Title: %s\n", title.c_str());
+
+  return EXIT_SUCCESS;
+}
author	Franz Glasner <fzglas.hg@dom66.de>
date	Mon, 15 Sep 2025 11:43:07 +0200
parents
children