diff mupdf-source/thirdparty/gumbo-parser/python/gumbo/__init__.py @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mupdf-source/thirdparty/gumbo-parser/python/gumbo/__init__.py	Mon Sep 15 11:43:07 2025 +0200
@@ -0,0 +1,45 @@
+"""Gumbo HTML parser.
+
+These are the Python bindings for Gumbo.  All public API classes and functions
+are exported from this module.  They include:
+
+- CTypes representations of all structs and enums defined in gumbo.h.  The
+  naming convention is to take the C name and strip off the "Gumbo" prefix.
+
+- A low-level wrapper around the gumbo_parse function, returning the classes
+  exposed above.  Usage:
+
+  import gumbo
+  with gumboc.parse(text, **options) as output:
+    do_stuff_with_doctype(output.document)
+    do_stuff_with_parse_tree(output.root)
+
+- Higher-level bindings that mimic the API provided by html5lib.  Usage:
+
+  from gumbo import html5lib
+
+  This requires that html5lib be installed (it uses their treebuilders), and is
+  intended as a drop-in replacement.
+
+- Similarly, higher-level bindings that mimic BeautifulSoup and return
+  BeautifulSoup objects.  For this, use:
+
+  import gumbo
+  soup = gumbo.soup_parse(text, **options)
+
+  It will give you back a soup object like BeautifulSoup.BeautifulSoup(text).
+"""
+
+from gumbo.gumboc import *
+
+try:
+  from gumbo import html5lib_adapter as html5lib
+except ImportError:
+  # html5lib not installed
+  pass
+
+try:
+  from gumbo.soup_adapter import parse as soup_parse
+except ImportError:
+  # BeautifulSoup not installed
+  pass