changeset 39:a6bc019ac0b2 upstream

ADD: PyMuPDF v1.26.5: the original sdist.
author Franz Glasner <fzglas.hg@dom66.de>
date Sat, 11 Oct 2025 11:19:58 +0200
parents b50eed0cc0ef
children aa33339d6b8a
files PKG-INFO changes.txt pipcl.py scripts/test.py setup.py src/__init__.py src/extra.i src/utils.py tests/conftest.py tests/gentle_compare.py tests/resources/test_1645_expected-after-1.27.0.pdf tests/resources/test_4613.png tests/resources/test_4699.pdf tests/resources/test_4699.png tests/resources/test_4712_a.pdf tests/resources/test_4712_b.pdf tests/resources/test_4716.pdf tests/test_4716.py tests/test_annots.py tests/test_codespell.py tests/test_flake8.py tests/test_font.py tests/test_general.py tests/test_import.py tests/test_memory.py tests/test_pixmap.py tests/test_pylint.py tests/test_release.py tests/test_tables.py tests/test_tesseract.py tests/test_textbox.py tests/test_textextract.py
diffstat 32 files changed, 6066 insertions(+), 5056 deletions(-) [+]
line wrap: on
line diff
--- a/PKG-INFO	Mon Sep 15 11:43:07 2025 +0200
+++ b/PKG-INFO	Sat Oct 11 11:19:58 2025 +0200
@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: PyMuPDF
-Version: 1.26.4
+Version: 1.26.5
 Summary: A high performance Python library for data extraction, analysis, conversion & manipulation of PDF (and other) documents.
 Description-Content-Type: text/markdown
 Author: Artifex
--- a/changes.txt	Mon Sep 15 11:43:07 2025 +0200
+++ b/changes.txt	Sat Oct 11 11:19:58 2025 +0200
@@ -2,7 +2,31 @@
 ==========
 
 
-**Changes in version 1.26.4**
+**Changes in version 1.26.5**
+
+* Use MuPDF-1.26.10.
+
+* Fixed issues:
+
+  * **Fixed** `2883 <https://github.com/pymupdf/PyMuPDF/issues/2883>`_: Improve the Python type annotations for fitz_new
+  * **Fixed** `4507 <https://github.com/pymupdf/PyMuPDF/issues/4507>`_: Bugs in pyodide
+  * **Fixed** `4613 <https://github.com/pymupdf/PyMuPDF/issues/4613>`_: Thai and number blocks are not auto-scaled and get wrong hyphen when using in insert_htmlbox
+  * **Fixed** `4700 <https://github.com/pymupdf/PyMuPDF/issues/4700>`_: pymupdf.open() processes .zip file without raising
+  * **Fixed** `4716 <https://github.com/pymupdf/PyMuPDF/issues/4716>`_: Problems with unreadable characters
+
+* Other:
+
+  * Supported Python versions are now 3.9-3.14.
+  * We now define all class methods explicitly instead of with dynamic assignment; this improves type hints.
+  * Removed `pymupdf.utils.Shape` class, was duplicate of `pymupdf.Shape`.
+  * Allow use of cibuildwheel to build and test on Pyodide.
+  * Fixed various Pyodide bugs.
+  * In documentation, added section about Linux wheels and glibc compatibility.
+  * Improved documentation of pymupdf.open()'s <filetype> arg.
+  * Retrospectively mark `4544 <https://github.com/pymupdf/PyMuPDF/issues/4544>`_ as fixed in 1.26.4.
+
+
+**Changes in version 1.26.4 (2025-08-25)**
 
 * Use MuPDF-1.26.7.
 
@@ -13,6 +37,7 @@
   * **Fixed** `4457 <https://github.com/pymupdf/PyMuPDF/issues/4457>`_: Wrong characters displayed after font subsetting (w/ native method)
   * **Fixed** `4462 <https://github.com/pymupdf/PyMuPDF/issues/4462>`_: delete_pages() does not accept a single int
   * **Fixed** `4533 <https://github.com/pymupdf/PyMuPDF/issues/4533>`_: Open PDF error segmentation fault
+  * **Fixed** `4544 <https://github.com/pymupdf/PyMuPDF/issues/4544>`_: About pdf_clip_page
   * **Fixed** `4565 <https://github.com/pymupdf/PyMuPDF/issues/4565>`_: MacOS uses Tesseract and not Tesseract-OCR
   * **Fixed** `4571 <https://github.com/pymupdf/PyMuPDF/issues/4571>`_: Broken merged pdfs.
   * **Fixed** `4590 <https://github.com/pymupdf/PyMuPDF/issues/4590>`_: TypeError in utils.py scrub(): annot.update_file(buffer=...) is invalid
--- a/pipcl.py	Mon Sep 15 11:43:07 2025 +0200
+++ b/pipcl.py	Sat Oct 11 11:19:58 2025 +0200
@@ -2,23 +2,37 @@
 Python packaging operations, including PEP-517 support, for use by a `setup.py`
 script.
 
-The intention is to take care of as many packaging details as possible so that
-setup.py contains only project-specific information, while also giving as much
-flexibility as possible.
-
-For example we provide a function `build_extension()` that can be used to build
-a SWIG extension, but we also give access to the located compiler/linker so
-that a `setup.py` script can take over the details itself.
-
-Run doctests with: `python -m doctest pipcl.py`
-
-For Graal we require that PIPCL_GRAAL_PYTHON is set to non-graal Python (we
-build for non-graal except with Graal Python's include paths and library
-directory).
+Overview:
+
+    The intention is to take care of as many packaging details as possible so
+    that setup.py contains only project-specific information, while also giving
+    as much flexibility as possible.
+
+    For example we provide a function `build_extension()` that can be used
+    to build a SWIG extension, but we also give access to the located
+    compiler/linker so that a `setup.py` script can take over the details
+    itself.
+
+Doctests:
+    Doctest strings are provided in some comments.
+
+    Test in the usual way with:
+        python -m doctest pipcl.py
+
+    Test specific functions/classes with:
+        python pipcl.py --doctest run_if ...
+
+        If no functions or classes are specified, this tests everything.
+
+Graal:
+    For Graal we require that PIPCL_GRAAL_PYTHON is set to non-graal Python (we
+    build for non-graal except with Graal Python's include paths and library
+    directory).
 '''
 
 import base64
 import codecs
+import difflib
 import glob
 import hashlib
 import inspect
@@ -55,6 +69,9 @@
     by legacy distutils/setuptools and described in:
     https://pip.pypa.io/en/stable/reference/build-system/setup-py/
 
+    The file pyproject.toml must exist; this is checked if/when fn_build() is
+    called.
+
     Here is a `doctest` example of using pipcl to create a SWIG extension
     module. Requires `swig`.
 
@@ -321,63 +338,86 @@
             wheel_compresslevel = None,
             ):
         '''
-        The initial args before `root` define the package
-        metadata and closely follow the definitions in:
+        The initial args before `entry_points` define the
+        package metadata and closely follow the definitions in:
         https://packaging.python.org/specifications/core-metadata/
 
         Args:
 
             name:
+                Used for metadata `Name`.
                 A string, the name of the Python package.
             version:
+                Used for metadata `Version`.
                 A string, the version of the Python package. Also see PEP-440
                 `Version Identification and Dependency Specification`.
             platform:
+                Used for metadata `Platform`.
                 A string or list of strings.
             supported_platform:
+                Used for metadata `Supported-Platform`.
                 A string or list of strings.
             summary:
+                Used for metadata `Summary`.
                 A string, short description of the package.
             description:
+                Used for metadata `Description`.
                 A string. If contains newlines, a detailed description of the
                 package. Otherwise the path of a file containing the detailed
                 description of the package.
             description_content_type:
+                Used for metadata `Description-Content-Type`.
                 A string describing markup of `description` arg. For example
                 `text/markdown; variant=GFM`.
             keywords:
+                Used for metadata `Keywords`.
                 A string containing comma-separated keywords.
             home_page:
+                Used for metadata `Home-page`.
                 URL of home page.
             download_url:
+                Used for metadata `Download-URL`.
                 Where this version can be downloaded from.
             author:
+                Used for metadata `Author`.
                 Author.
             author_email:
+                Used for metadata `Author-email`.
                 Author email.
             maintainer:
+                Used for metadata `Maintainer`.
                 Maintainer.
             maintainer_email:
+                Used for metadata `Maintainer-email`.
                 Maintainer email.
             license:
+                Used for metadata `License`.
                 A string containing the license text. Written into metadata
                 file `COPYING`. Is also written into metadata itself if not
                 multi-line.
             classifier:
+                Used for metadata `Classifier`.
                 A string or list of strings. Also see:
 
                 * https://pypi.org/pypi?%3Aaction=list_classifiers
                 * https://pypi.org/classifiers/
 
             requires_dist:
-                A string or list of strings. None items are ignored. Also see PEP-508.
+                Used for metadata `Requires-Dist`.
+                A string or list of strings, Python packages required
+                at runtime. None items are ignored.
             requires_python:
+                Used for metadata `Requires-Python`.
                 A string or list of strings.
             requires_external:
+                Used for metadata `Requires-External`.
                 A string or list of strings.
             project_url:
-                A string or list of strings, each of the form: `{name}, {url}`.
+                Used for metadata `Project-URL`.
+                A string or list of strings, each of the form: `{name},
+                {url}`.
             provides_extra:
+                Used for metadata `Provides-Extra`.
                 A string or list of strings.
 
             entry_points:
@@ -415,8 +455,11 @@
                 added.
 
                 `to_` identifies what the file should be called within a wheel
-                or when installing. If `to_` ends with `/`, the leaf of `from_`
-                is appended to it (and `from_` must not be a `bytes`).
+                or when installing. If `to_` is empty or `/` we set it to the
+                leaf of `from_` (`from_` must not be a `bytes`) - i.e. we place
+                the file in the root directory of the wheel; otherwise if
+                `to_` ends with `/` the leaf of `from_` is appended to it (and
+                `from_` must not be a `bytes`).
 
                 Initial `$dist-info/` in `_to` is replaced by
                 `{name}-{version}.dist-info/`; this is useful for license files
@@ -439,6 +482,11 @@
                 default being `sysconfig.get_path('platlib')` e.g.
                 `myvenv/lib/python3.9/site-packages/`.
 
+                When calling this function, we assert that the file
+                pyproject.toml exists in the current directory. (We do this
+                here rather than in pipcl.Package's constructor, as otherwise
+                importing setup.py from non-package-related code could fail.)
+
             fn_clean:
                 A function taking a single arg `all_` that cleans generated
                 files. `all_` is true iff `--all` is in argv.
@@ -457,8 +505,7 @@
                 It can be convenient to use `pipcl.git_items()`.
 
                 The specification for sdists requires that the list contains
-                `pyproject.toml`; we enforce this with a diagnostic rather than
-                raising an exception, to allow legacy command-line usage.
+                `pyproject.toml`; we enforce this with a Python assert.
 
             tag_python:
                 First element of wheel tag defined in PEP-425. If None we use
@@ -528,6 +575,12 @@
         assert_str_or_multi( requires_external)
         assert_str_or_multi( project_url)
         assert_str_or_multi( provides_extra)
+        
+        assert re.match('^([A-Z0-9]|[A-Z0-9][A-Z0-9._-]*[A-Z0-9])\\Z', name, re.IGNORECASE), (
+                f'Invalid package name'
+                f' (https://packaging.python.org/en/latest/specifications/name-normalization/)'
+                f': {name!r}'
+                )
 
         # https://packaging.python.org/en/latest/specifications/core-metadata/.
         assert re.match('([A-Z0-9]|[A-Z0-9][A-Z0-9._-]*[A-Z0-9])$', name, re.IGNORECASE), \
@@ -602,7 +655,10 @@
                 f' metadata_directory={metadata_directory!r}'
                 )
 
-        if sys.implementation.name == 'graalpy':
+        if os.environ.get('CIBUILDWHEEL') == '1':
+            # Don't special-case graal builds when running under cibuildwheel.
+            pass
+        elif sys.implementation.name == 'graalpy':
             # We build for Graal by building a native Python wheel with Graal
             # Python's include paths and library directory. We then rename the
             # wheel to contain graal's tag etc.
@@ -754,7 +810,7 @@
             else:
                 items = self.fn_sdist()
 
-        prefix = f'{_normalise(self.name)}-{self.version}'
+        prefix = f'{_normalise2(self.name)}-{self.version}'
         os.makedirs(sdist_directory, exist_ok=True)
         tarpath = f'{sdist_directory}/{prefix}.tar.gz'
         log2(f'Creating sdist: {tarpath}')
@@ -796,12 +852,11 @@
                         assert 0, f'Path is inside sdist_directory={sdist_directory}: {from_!r}'
                     assert os.path.exists(from_), f'Path does not exist: {from_!r}'
                     assert os.path.isfile(from_), f'Path is not a file: {from_!r}'
-                    if to_rel == 'pyproject.toml':
-                        found_pyproject_toml = True
                     add(from_, to_rel)
-
-            if not found_pyproject_toml:
-                log0(f'Warning: no pyproject.toml specified.')
+                if to_rel == 'pyproject.toml':
+                    found_pyproject_toml = True
+
+            assert found_pyproject_toml, f'Cannot create sdist because file not specified: pyproject.toml'
 
             # Always add a PKG-INFO file.
             add_string(self._metainfo(), 'PKG-INFO')
@@ -826,9 +881,11 @@
         Get two-digit python version, e.g. 'cp3.8' for python-3.8.6.
         '''
         if self.tag_python_:
-            return self.tag_python_
+            ret = self.tag_python_
         else:
-            return 'cp' + ''.join(platform.python_version().split('.')[:2])
+            ret = 'cp' + ''.join(platform.python_version().split('.')[:2])
+        assert '-' not in ret
+        return ret
 
     def tag_abi(self):
         '''
@@ -884,10 +941,13 @@
                 ret = ret2
 
         log0( f'tag_platform(): returning {ret=}.')
+        assert '-' not in ret
         return ret
 
     def wheel_name(self):
-        return f'{_normalise(self.name)}-{self.version}-{self.tag_python()}-{self.tag_abi()}-{self.tag_platform()}.whl'
+        ret = f'{_normalise2(self.name)}-{self.version}-{self.tag_python()}-{self.tag_abi()}-{self.tag_platform()}.whl'
+        assert ret.count('-') == 4, f'Expected 4 dash characters in {ret=}.'
+        return ret
 
     def wheel_name_match(self, wheel):
         '''
@@ -916,7 +976,7 @@
                 log2(f'py_limited_api; {tag_python=} compatible with {self.tag_python()=}.')
                 py_limited_api_compatible = True
             
-        log2(f'{_normalise(self.name) == name=}')
+        log2(f'{_normalise2(self.name) == name=}')
         log2(f'{self.version == version=}')
         log2(f'{self.tag_python() == tag_python=} {self.tag_python()=} {tag_python=}')
         log2(f'{py_limited_api_compatible=}')
@@ -925,7 +985,7 @@
         log2(f'{self.tag_platform()=}')
         log2(f'{tag_platform.split(".")=}')
         ret = (1
-                and _normalise(self.name) == name
+                and _normalise2(self.name) == name
                 and self.version == version
                 and (self.tag_python() == tag_python or py_limited_api_compatible)
                 and self.tag_abi() == tag_abi
@@ -947,6 +1007,9 @@
 
     def _call_fn_build( self, config_settings=None):
         assert self.fn_build
+        assert os.path.isfile('pyproject.toml'), (
+                'Cannot create package because file does not exist: pyproject.toml'
+                )
         log2(f'calling self.fn_build={self.fn_build}')
         if inspect.signature(self.fn_build).parameters:
             ret = self.fn_build(config_settings)
@@ -954,6 +1017,28 @@
             ret = self.fn_build()
         assert isinstance( ret, (list, tuple)), \
                 f'Expected list/tuple from {self.fn_build} but got: {ret!r}'
+
+        # Check that any extensions that we have built, have same
+        # py_limited_api value. If package is marked with py_limited_api=True
+        # then non-py_limited_api extensions seem to fail at runtime on
+        # Windows.
+        #
+        # (We could possibly allow package py_limited_api=False and extensions
+        # py_limited_api=True, but haven't tested this, and it seems simpler to
+        # be strict.)
+        for item in ret:
+            from_, (to_abs, to_rel) = self._fromto(item)
+            from_abs = os.path.abspath(from_)
+            is_py_limited_api = _extensions_to_py_limited_api.get(from_abs)
+            if is_py_limited_api is not None:
+                assert bool(self.py_limited_api) == bool(is_py_limited_api), (
+                        f'Extension was built with'
+                        f' py_limited_api={is_py_limited_api} but pipcl.Package'
+                        f' name={self.name!r} has'
+                        f' py_limited_api={self.py_limited_api}:'
+                        f' {from_abs!r}'
+                        )
+        
         return ret
 
 
@@ -1052,7 +1137,7 @@
         it writes to a slightly different directory.
         '''
         if root is None:
-            root = f'{self.name}-{self.version}.dist-info'
+            root = f'{normalise2(self.name)}-{self.version}.dist-info'
         self._write_info(f'{root}/METADATA')
         if self.license:
             with open( f'{root}/COPYING', 'w') as f:
@@ -1340,7 +1425,7 @@
             )
 
     def _dist_info_dir( self):
-        return f'{_normalise(self.name)}-{self.version}.dist-info'
+        return f'{_normalise2(self.name)}-{self.version}.dist-info'
 
     def _metainfo(self):
         '''
@@ -1446,8 +1531,12 @@
         `p` is a tuple `(from_, to_)` where `from_` is str/bytes and `to_` is
         str. If `from_` is a bytes it is contents of file to add, otherwise the
         path of an existing file; non-absolute paths are assumed to be relative
-        to `self.root`. If `to_` is empty or ends with `/`, we append the leaf
-        of `from_` (which must be a str).
+        to `self.root`.
+
+        If `to_` is empty or `/` we set it to the leaf of `from_` (which must
+        be a str) - i.e. we place the file in the root directory of the wheel;
+        otherwise if `to_` ends with `/` we append the leaf of `from_` (which
+        must be a str).
 
         If `to_` starts with `$dist-info/`, we replace this with
         `self._dist_info_dir()`.
@@ -1467,14 +1556,16 @@
         from_, to_ = p
         assert isinstance(from_, (str, bytes))
         assert isinstance(to_, str)
-        if to_.endswith('/') or to_=='':
+        if to_ == '/' or to_ == '':
+            to_ = os.path.basename(from_)
+        elif to_.endswith('/'):
             to_ += os.path.basename(from_)
         prefix = '$dist-info/'
         if to_.startswith( prefix):
             to_ = f'{self._dist_info_dir()}/{to_[ len(prefix):]}'
         prefix = '$data/'
         if to_.startswith( prefix):
-            to_ = f'{self.name}-{self.version}.data/{to_[ len(prefix):]}'
+            to_ = f'{_normalise2(self.name)}-{self.version}.data/{to_[ len(prefix):]}'
         if isinstance(from_, str):
             from_, _ = self._path_relative_to_root( from_, assert_within_root=False)
         to_ = self._path_relative_to_root(to_)
@@ -1482,11 +1573,13 @@
         log2(f'returning {from_=} {to_=}')
         return from_, to_
 
+_extensions_to_py_limited_api = dict()
 
 def build_extension(
         name,
         path_i,
         outdir,
+        *,
         builddir=None,
         includes=None,
         defines=None,
@@ -1498,6 +1591,7 @@
         linker_extra='',
         swig=None,
         cpp=True,
+        source_extra=None,
         prerequisites_swig=None,
         prerequisites_compile=None,
         prerequisites_link=None,
@@ -1539,7 +1633,7 @@
             A string, or a sequence of library names. Each item is prefixed
             with `-l` on non-Windows.
         optimise:
-            Whether to use compiler optimisations.
+            Whether to use compiler optimisations and define NDEBUG.
         debug:
             Whether to build with debug symbols.
         compiler_extra:
@@ -1550,6 +1644,8 @@
             Swig command; if false we use 'swig'.
         cpp:
             If true we tell SWIG to generate C++ code instead of C.
+        source_extra:
+            Extra source files to build into the shared library,
         prerequisites_swig:
         prerequisites_compile:
         prerequisites_link:
@@ -1584,10 +1680,15 @@
             `compile_extra` (also `/I` on windows) and use them with swig so
             that it can see the same header files as C/C++. This is useful
             when using enviromment variables such as `CC` and `CXX` to set
-            `compile_extra.
+            `compile_extra`.
         py_limited_api:
             If true we build for current Python's limited API / stable ABI.
 
+            Note that we will assert false if this extension is added to a
+            pipcl.Package that has a different <py_limited_api>, because
+            on Windows importing a non-py_limited_api extension inside a
+            py_limited=True package fails.
+
     Returns the leafname of the generated library file within `outdir`, e.g.
     `_{name}.so` on Unix or `_{name}.cp311-win_amd64.pyd` on Windows.
     '''
@@ -1599,6 +1700,12 @@
         builddir = outdir
     if not swig:
         swig = 'swig'
+        
+    if source_extra is None:
+        source_extra = list()
+    if isinstance(source_extra, str):
+        source_extra = [source_extra]
+    
     includes_text = _flags( includes, '-I')
     defines_text = _flags( defines, '-D')
     libpaths_text = _flags( libpaths, '/LIBPATH:', '"') if windows() else _flags( libpaths, '-L')
@@ -1608,11 +1715,11 @@
     os.makedirs( outdir, exist_ok=True)
 
     # Run SWIG.
-
+    #
     if infer_swig_includes:
         # Extract include flags from `compiler_extra`.
         swig_includes_extra = ''
-        compiler_extra_items = compiler_extra.split()
+        compiler_extra_items = shlex.split(compiler_extra)
         i = 0
         while i < len(compiler_extra_items):
             item = compiler_extra_items[i]
@@ -1647,75 +1754,130 @@
             prerequisites_swig2,
             )
 
-    so_suffix = _so_suffix(use_so_versioning = not py_limited_api)
+    if pyodide():
+        so_suffix = '.so'
+        log0(f'pyodide: PEP-3149 suffix untested, so omitting. {_so_suffix()=}.')
+    else:
+        so_suffix = _so_suffix(use_so_versioning = not py_limited_api)
     path_so_leaf = f'_{name}{so_suffix}'
     path_so = f'{outdir}/{path_so_leaf}'
 
     py_limited_api2 = current_py_limited_api() if py_limited_api else None
 
+    compiler_command, pythonflags = base_compiler(cpp=cpp)
+    linker_command, _ = base_linker(cpp=cpp)
+    # setuptools on Linux seems to use slightly different compile flags:
+    #
+    # -fwrapv -O3 -Wall -O2 -g0 -DPY_CALL_TRAMPOLINE
+    #
+
+    general_flags = ''
     if windows():
-        path_obj = f'{path_so}.obj'
-
         permissive = '/permissive-'
         EHsc = '/EHsc'
         T = '/Tp' if cpp else '/Tc'
         optimise2 = '/DNDEBUG /O2' if optimise else '/D_DEBUG'
-        debug2 = ''
-        if debug:
-            debug2 = '/Zi'  # Generate .pdb.
-            # debug2 = '/Z7'    # Embed debug info in .obj files.
-        
+        debug2 = '/Zi' if debug else ''
         py_limited_api3 = f'/DPy_LIMITED_API={py_limited_api2}' if py_limited_api2 else ''
 
-        # As of 2023-08-23, it looks like VS tools create slightly
-        # .dll's each time, even with identical inputs.
-        #
-        # Some info about this is at:
-        # https://nikhilism.com/post/2020/windows-deterministic-builds/.
-        # E.g. an undocumented linker flag `/Brepro`.
+    else:
+        if debug:
+            general_flags += '/Zi' if windows() else ' -g'
+        if optimise:
+            general_flags += ' /DNDEBUG /O2' if windows() else ' -O2 -DNDEBUG'
+
+        py_limited_api3 = f'-DPy_LIMITED_API={py_limited_api2}' if py_limited_api2 else ''
+
+    if windows():
+        pass
+    elif darwin():
+        # MacOS's linker does not like `-z origin`.
+        rpath_flag = "-Wl,-rpath,@loader_path/"
+        # Avoid `Undefined symbols for ... "_PyArg_UnpackTuple" ...'.
+        general_flags += ' -undefined dynamic_lookup'
+    elif pyodide():
+        # Setting `-Wl,-rpath,'$ORIGIN',-z,origin` gives:
+        #   emcc: warning: ignoring unsupported linker flag: `-rpath` [-Wlinkflags]
+        #   wasm-ld: error: unknown -z value: origin
         #
-
-        command, pythonflags = base_compiler(cpp=cpp)
-        command = f'''
-                {command}
-                    # General:
-                    /c                          # Compiles without linking.
-                    {EHsc}                      # Enable "Standard C++ exception handling".
-
-                    #/MD                         # Creates a multithreaded DLL using MSVCRT.lib.
-                    {'/MDd' if debug else '/MD'}
-
-                    # Input/output files:
-                    {T}{path_cpp}               # /Tp specifies C++ source file.
-                    /Fo{path_obj}               # Output file. codespell:ignore
-
-                    # Include paths:
-                    {includes_text}
-                    {pythonflags.includes}      # Include path for Python headers.
-
-                    # Code generation:
-                    {optimise2}
-                    {debug2}
-                    {permissive}                # Set standard-conformance mode.
-
-                    # Diagnostics:
-                    #/FC                         # Display full path of source code files passed to cl.exe in diagnostic text.
-                    /W3                         # Sets which warning level to output. /W3 is IDE default.
-                    /diagnostics:caret          # Controls the format of diagnostic messages.
-                    /nologo                     #
-
-                    {defines_text}
-                    {compiler_extra}
-
-                    {py_limited_api3}
-                '''
-        run_if( command, path_obj, path_cpp, prerequisites_compile)
-
-        command, pythonflags = base_linker(cpp=cpp)
+        rpath_flag = "-Wl,-rpath,'$ORIGIN'"
+    else:
+        rpath_flag = "-Wl,-rpath,'$ORIGIN',-z,origin"
+    
+    # Fun fact - on Linux, if the -L and -l options are before '{path_cpp}'
+    # they seem to be ignored...
+    #
+    path_os = list()
+
+    for path_source in [path_cpp] + source_extra:
+        path_o = f'{path_source}.obj' if windows() else f'{path_source}.o'
+        path_os.append(f' {path_o}')
+
+        prerequisites_path = f'{path_o}.d'
+
+        if windows():
+            compiler_command2 = f'''
+                    {compiler_command}
+                        # General:
+                        /c                          # Compiles without linking.
+                        {EHsc}                      # Enable "Standard C++ exception handling".
+
+                        #/MD                         # Creates a multithreaded DLL using MSVCRT.lib.
+                        {'/MDd' if debug else '/MD'}
+
+                        # Input/output files:
+                        {T}{path_source}            # /Tp specifies C++ source file.
+                        /Fo{path_o}                 # Output file. codespell:ignore
+
+                        # Include paths:
+                        {includes_text}
+                        {pythonflags.includes}      # Include path for Python headers.
+
+                        # Code generation:
+                        {optimise2}
+                        {debug2}
+                        {permissive}                # Set standard-conformance mode.
+
+                        # Diagnostics:
+                        #/FC                         # Display full path of source code files passed to cl.exe in diagnostic text.
+                        /W3                         # Sets which warning level to output. /W3 is IDE default.
+                        /diagnostics:caret          # Controls the format of diagnostic messages.
+                        /nologo                     #
+
+                        {defines_text}
+                        {compiler_extra}
+
+                        {py_limited_api3}
+                    '''
+
+        else:
+            compiler_command2 = f'''
+                    {compiler_command}
+                        -fPIC
+                        {general_flags.strip()}
+                        {pythonflags.includes}
+                        {includes_text}
+                        {defines_text}
+                        -MD -MF {prerequisites_path}
+                        -c {path_source}
+                        -o {path_o}
+                        {compiler_extra}
+                        {py_limited_api3}
+                    '''
+        run_if(
+                compiler_command2,
+                path_o,
+                path_source,
+                [path_source] + _get_prerequisites(prerequisites_path),
+                )
+
+    # Link
+    prerequisites_path = f'{path_so}.d'
+    if windows():
         debug2 = '/DEBUG' if debug else ''
         base, _ = os.path.splitext(path_so_leaf)
-        command = f'''
-                {command}
+        command2 = f'''
+                {linker_command}
                     /DLL                    # Builds a DLL.
                     /EXPORT:PyInit__{name}  # Exports a function.
                     /IMPLIB:{base}.lib      # Overrides the default import library name.
@@ -1725,139 +1887,67 @@
                     {debug2}
                     /nologo
                     {libs_text}
-                    {path_obj}
+                    {' '.join(path_os)}
                     {linker_extra}
                 '''
-        run_if( command, path_so, path_obj, prerequisites_link)
-
+    elif pyodide():
+        command2 = f'''
+                {linker_command}
+                    -MD -MF {prerequisites_path}
+                    -o {path_so}
+                    {' '.join(path_os)}
+                    {libpaths_text}
+                    {libs_text}
+                    {linker_extra}
+                    {pythonflags.ldflags}
+                    {rpath_flag}
+                '''
     else:
-
-        # Not Windows.
-        #
-        command, pythonflags = base_compiler(cpp=cpp)
-
-        # setuptools on Linux seems to use slightly different compile flags:
-        #
-        # -fwrapv -O3 -Wall -O2 -g0 -DPY_CALL_TRAMPOLINE
-        #
-
-        general_flags = ''
-        if debug:
-            general_flags += ' -g'
-        if optimise:
-            general_flags += ' -O2 -DNDEBUG'
-
-        py_limited_api3 = f'-DPy_LIMITED_API={py_limited_api2}' if py_limited_api2 else ''
-
-        if darwin():
-            # MacOS's linker does not like `-z origin`.
-            rpath_flag = "-Wl,-rpath,@loader_path/"
-
-            # Avoid `Undefined symbols for ... "_PyArg_UnpackTuple" ...'.
-            general_flags += ' -undefined dynamic_lookup'
-        elif pyodide():
-            # Setting `-Wl,-rpath,'$ORIGIN',-z,origin` gives:
-            #   emcc: warning: ignoring unsupported linker flag: `-rpath` [-Wlinkflags]
-            #   wasm-ld: error: unknown -z value: origin
-            #
-            log0(f'pyodide: PEP-3149 suffix untested, so omitting. {_so_suffix()=}.')
-            path_so_leaf = f'_{name}.so'
-            path_so = f'{outdir}/{path_so_leaf}'
-
-            rpath_flag = ''
-        else:
-            rpath_flag = "-Wl,-rpath,'$ORIGIN',-z,origin"
-        path_so = f'{outdir}/{path_so_leaf}'
-        # Fun fact - on Linux, if the -L and -l options are before '{path_cpp}'
-        # they seem to be ignored...
-        #
-        prerequisites = list()
-
-        if pyodide():
-            # Looks like pyodide's `cc` can't compile and link in one invocation.
-            prerequisites_compile_path = f'{path_cpp}.o.d'
-            prerequisites += _get_prerequisites( prerequisites_compile_path)
-            command = f'''
-                    {command}
-                        -fPIC
-                        {general_flags.strip()}
-                        {pythonflags.includes}
-                        {includes_text}
-                        {defines_text}
-                        -MD -MF {prerequisites_compile_path}
-                        -c {path_cpp}
-                        -o {path_cpp}.o
-                        {compiler_extra}
-                        {py_limited_api3}
-                    '''
-            prerequisites_link_path = f'{path_cpp}.o.d'
-            prerequisites += _get_prerequisites( prerequisites_link_path)
-            ld, _ = base_linker(cpp=cpp)
-            command += f'''
-                    && {ld}
-                        {path_cpp}.o
-                        -o {path_so}
-                        -MD -MF {prerequisites_link_path}
-                        {rpath_flag}
-                        {libpaths_text}
-                        {libs_text}
-                        {linker_extra}
-                        {pythonflags.ldflags}
-                    '''
-        else:
-            # We use compiler to compile and link in one command.
-            prerequisites_path = f'{path_so}.d'
-            prerequisites = _get_prerequisites(prerequisites_path)
-
-            command = f'''
-                    {command}
-                        -fPIC
-                        -shared
-                        {general_flags.strip()}
-                        {pythonflags.includes}
-                        {includes_text}
-                        {defines_text}
-                        {path_cpp}
-                        -MD -MF {prerequisites_path}
-                        -o {path_so}
-                        {compiler_extra}
-                        {libpaths_text}
-                        {linker_extra}
-                        {pythonflags.ldflags}
-                        {libs_text}
-                        {rpath_flag}
-                        {py_limited_api3}
-                    '''
-        command_was_run = run_if(
-                command,
-                path_so,
-                path_cpp,
-                prerequisites_compile,
-                prerequisites_link,
-                prerequisites,
-                )
-
-        if command_was_run and darwin():
-            # We need to patch up references to shared libraries in `libs`.
-            sublibraries = list()
-            for lib in () if libs is None else libs:
-                for libpath in libpaths:
-                    found = list()
-                    for suffix in '.so', '.dylib':
-                        path = f'{libpath}/lib{os.path.basename(lib)}{suffix}'
-                        if os.path.exists( path):
-                            found.append( path)
-                    if found:
-                        assert len(found) == 1, f'More than one file matches lib={lib!r}: {found}'
-                        sublibraries.append( found[0])
-                        break
-                else:
-                    log2(f'Warning: can not find path of lib={lib!r} in libpaths={libpaths}')
-            macos_patch( path_so, *sublibraries)
+        command2 = f'''
+                {linker_command}
+                    -shared
+                    {general_flags.strip()}
+                    -MD -MF {prerequisites_path}
+                    -o {path_so}
+                    {' '.join(path_os)}
+                    {libpaths_text}
+                    {libs_text}
+                    {linker_extra}
+                    {pythonflags.ldflags}
+                    {rpath_flag}
+                    {py_limited_api3}
+                '''
+    link_was_run = run_if(
+            command2,
+            path_so,
+            path_cpp,
+            *path_os,
+            *_get_prerequisites(f'{path_so}.d'),
+            )
+
+    if link_was_run and darwin():
+        # We need to patch up references to shared libraries in `libs`.
+        sublibraries = list()
+        for lib in () if libs is None else libs:
+            for libpath in libpaths:
+                found = list()
+                for suffix in '.so', '.dylib':
+                    path = f'{libpath}/lib{os.path.basename(lib)}{suffix}'
+                    if os.path.exists( path):
+                        found.append( path)
+                if found:
+                    assert len(found) == 1, f'More than one file matches lib={lib!r}: {found}'
+                    sublibraries.append( found[0])
+                    break
+            else:
+                log2(f'Warning: can not find path of lib={lib!r} in libpaths={libpaths}')
+        macos_patch( path_so, *sublibraries)
 
         #run(f'ls -l {path_so}', check=0)
         #run(f'file {path_so}', check=0)
 
+    _extensions_to_py_limited_api[os.path.abspath(path_so)] = py_limited_api
+    
     return path_so_leaf
 
 
@@ -1983,7 +2073,7 @@
             )
     if not e:
         branch = out.strip()
-    log(f'git_info(): directory={directory!r} returning branch={branch!r} sha={sha!r} comment={comment!r}')
+    log1(f'git_info(): directory={directory!r} returning branch={branch!r} sha={sha!r} comment={comment!r}')
     return sha, comment, diff, branch
 
 
@@ -2027,88 +2117,96 @@
 
 
 def git_get(
-        remote,
         local,
         *,
+        remote=None,
         branch=None,
+        tag=None,
+        text=None,
         depth=1,
         env_extra=None,
-        tag=None,
         update=True,
         submodules=True,
-        default_remote=None,
         ):
     '''
-    Ensures that <local> is a git checkout (at either <tag>, or <branch> HEAD)
-    of a remote repository.
-    
-    Exactly one of <branch> and <tag> must be specified, or <remote> must start
-    with 'git:' and match the syntax described below.
+    Creates/updates local checkout <local> of remote repository and returns
+    absolute path of <local>.
+
+    If <text> is set but does not start with 'git:', it is assumed to be an up
+    to date local checkout, and we return absolute path of <text> without doing
+    any git operations.
     
     Args:
+        local:
+            Local directory. Created and/or updated using `git clone` and `git
+            fetch` etc.
         remote:
             Remote git repostitory, for example
-            'https://github.com/ArtifexSoftware/mupdf.git'.
+            'https://github.com/ArtifexSoftware/mupdf.git'. Can be overridden
+            by <text>.
+        branch:
+            Branch to use; can be overridden by <text>.
+        tag:
+            Tag to use; can be overridden by <text>.
+        text:
+            If None or empty:
+                Ignored.
             
-            If starts with 'git:', the remaining text should be a command-line
-            style string containing some or all of these args:
-                --branch <branch>
-                --tag <tag>
-                <remote>
-            These overrides <branch>, <tag> and <default_remote>.
+            If starts with 'git:':
+                The remaining text should be a command-line
+                style string containing some or all of these args:
+                    --branch <branch>
+                    --tag <tag>
+                    <remote>
+                These overrides <branch>, <tag> and <remote>.
+            Otherwise:
+                <text> is assumed to be a local directory, and we simply return
+                it as an absolute path without doing any git operations.
             
             For example these all clone/update/branch master of https://foo.bar/qwerty.git to local
             checkout 'foo-local':
             
-                git_get('https://foo.bar/qwerty.git', 'foo-local', branch='master')
-                git_get('git:--branch master https://foo.bar/qwerty.git', 'foo-local')
-                git_get('git:--branch master', 'foo-local', default_remote='https://foo.bar/qwerty.git')
-                git_get('git:', 'foo-local', branch='master', default_remote='https://foo.bar/qwerty.git')
-            
-        local:
-            Local directory. If <local>/.git exists, we attempt to run `git
-            update` in it.
-        branch:
-            Branch to use. Is used as default if remote starts with 'git:'.
+                git_get('foo-local', remote='https://foo.bar/qwerty.git', branch='master')
+                git_get('foo-local', text='git:--branch master https://foo.bar/qwerty.git')
+                git_get('foo-local', text='git:--branch master', remote='https://foo.bar/qwerty.git')
+                git_get('foo-local', text='git:', branch='master', remote='https://foo.bar/qwerty.git')
         depth:
             Depth of local checkout when cloning and fetching, or None.
         env_extra:
             Dict of extra name=value environment variables to use whenever we
             run git.
-        tag:
-            Tag to use. Is used as default if remote starts with 'git:'.
         update:
             If false we do not update existing repository. Might be useful if
             testing without network access.
         submodules:
             If true, we clone with `--recursive --shallow-submodules` and run
             `git submodule update --init --recursive` before returning.
-        default_remote:
-            The remote URL if <remote> starts with 'git:' but does not specify
-            the remote URL.
     '''
     log0(f'{remote=} {local=} {branch=} {tag=}')
-    if remote.startswith('git:'):
-        remote0 = remote
-        args = iter(shlex.split(remote0[len('git:'):]))
-        remote = default_remote
-        while 1:
-            try:
-                arg = next(args)
-            except StopIteration:
-                break
-            if arg == '--branch':
-                branch = next(args)
-                tag = None
-            elif arg == '--tag':
-                tag == next(args)
-                branch = None
-            else:
-                remote = arg
-        assert remote, f'{default_remote=} and no remote specified in remote={remote0!r}.'
-        assert branch or tag, f'{branch=} {tag=} and no branch/tag specified in remote={remote0!r}.'
+    
+    if text:
+        if text.startswith('git:'):
+            args = iter(shlex.split(text[len('git:'):]))
+            while 1:
+                try:
+                    arg = next(args)
+                except StopIteration:
+                    break
+                if arg == '--branch':
+                    branch = next(args)
+                    tag = None
+                elif arg == '--tag':
+                    tag = next(args)
+                    branch = None
+                else:
+                    remote = arg
+            assert remote, f'<remote> unset and no remote specified in {text=}.'
+            assert branch or tag, f'<branch> and <tag> unset and no branch/tag specified in {text=}.'
+        else:
+            log0(f'Using local directory {text!r}.')
+            return os.path.abspath(text)
         
-    assert (branch and not tag) or (not branch and tag), f'Must specify exactly one of <branch> and <tag>.'
+    assert (branch and not tag) or (not branch and tag), f'Must specify exactly one of <branch> and <tag>; {branch=} {tag=}.'
     
     depth_arg = f' --depth {depth}' if depth else ''
     
@@ -2116,7 +2214,7 @@
         # This seems to pull in the entire repository.
         log0(f'do_update(): attempting to update {local=}.')
         # Remove any local changes.
-        run(f'cd {local} && git checkout .', env_extra=env_extra)
+        run(f'cd {local} && git reset --hard', env_extra=env_extra)
         if tag:
             # `-u` avoids `fatal: Refusing to fetch into current branch`.
             # Using '+' and `revs/tags/` prefix seems to avoid errors like:
@@ -2164,6 +2262,7 @@
 
     # Show sha of checkout.
     run( f'cd {local} && git show --pretty=oneline|head -n 1', check=False)
+    return os.path.abspath(local)
     
 
 def run(
@@ -2452,10 +2551,11 @@
                     log2(f'### Have removed `-lcrypt` from ldflags: {self.ldflags!r} -> {ldflags2!r}')
                     self.ldflags = ldflags2
 
-        log1(f'{self.includes=}')
-        log1(f'    {includes_=}')
-        log1(f'{self.ldflags=}')
-        log1(f'    {ldflags_=}')
+        if 0:
+            log1(f'{self.includes=}')
+            log1(f'    {includes_=}')
+            log1(f'{self.ldflags=}')
+            log1(f'    {ldflags_=}')
 
 
 def macos_add_cross_flags(command):
@@ -2555,7 +2655,7 @@
     return f'x{32 if sys.maxsize == 2**31 - 1 else 64}'
 
 
-def run_if( command, out, *prerequisites):
+def run_if( command, out, *prerequisites, caller=1):
     '''
     Runs a command only if the output file is not up to date.
 
@@ -2585,21 +2685,26 @@
         ...     os.remove( out)
         >>> if os.path.exists( f'{out}.cmd'):
         ...     os.remove( f'{out}.cmd')
-        >>> run_if( f'touch {out}', out)
+        >>> run_if( f'touch {out}', out, caller=0)
         pipcl.py:run_if(): Running command because: File does not exist: 'run_if_test_out'
         pipcl.py:run_if(): Running: touch run_if_test_out
         True
 
     If we repeat, the output file will be up to date so the command is not run:
 
-        >>> run_if( f'touch {out}', out)
+        >>> run_if( f'touch {out}', out, caller=0)
         pipcl.py:run_if(): Not running command because up to date: 'run_if_test_out'
 
     If we change the command, the command is run:
 
-        >>> run_if( f'touch  {out}', out)
-        pipcl.py:run_if(): Running command because: Command has changed
-        pipcl.py:run_if(): Running: touch  run_if_test_out
+        >>> run_if( f'touch {out};', out, caller=0)
+        pipcl.py:run_if(): Running command because: Command has changed:
+        pipcl.py:run_if():     @@ -1,2 +1,2 @@
+        pipcl.py:run_if():      touch
+        pipcl.py:run_if():     -run_if_test_out
+        pipcl.py:run_if():     +run_if_test_out;
+        pipcl.py:run_if(): 
+        pipcl.py:run_if(): Running: touch run_if_test_out;
         True
 
     If we add a prerequisite that is newer than the output, the command is run:
@@ -2608,15 +2713,20 @@
         >>> prerequisite = 'run_if_test_prerequisite'
         >>> run( f'touch {prerequisite}', caller=0)
         pipcl.py:run(): Running: touch run_if_test_prerequisite
-        >>> run_if( f'touch  {out}', out, prerequisite)
-        pipcl.py:run_if(): Running command because: Prerequisite is new: 'run_if_test_prerequisite'
+        >>> run_if( f'touch  {out}', out, prerequisite, caller=0)
+        pipcl.py:run_if(): Running command because: Command has changed:
+        pipcl.py:run_if():     @@ -1,2 +1,2 @@
+        pipcl.py:run_if():      touch
+        pipcl.py:run_if():     -run_if_test_out;
+        pipcl.py:run_if():     +run_if_test_out
+        pipcl.py:run_if(): 
         pipcl.py:run_if(): Running: touch  run_if_test_out
         True
 
     If we repeat, the output will be newer than the prerequisite, so the
     command is not run:
 
-        >>> run_if( f'touch  {out}', out, prerequisite)
+        >>> run_if( f'touch  {out}', out, prerequisite, caller=0)
         pipcl.py:run_if(): Not running command because up to date: 'run_if_test_out'
     '''
     doit = False
@@ -2633,13 +2743,34 @@
                 cmd = f.read()
         else:
             cmd = None
-        if command != cmd:
+        cmd_args = shlex.split(cmd or '')
+        command_args = shlex.split(command or '')
+        if command_args != cmd_args:
             if cmd is None:
                 doit = 'No previous command stored'
             else:
                 doit = f'Command has changed'
                 if 0:
-                    doit += f': {cmd!r} => {command!r}'
+                    doit += f':\n    {cmd!r}\n    {command!r}'
+                if 0:
+                    doit += f'\nbefore:\n'
+                    doit += textwrap.indent(cmd, '    ')
+                    doit += f'\nafter:\n'
+                    doit += textwrap.indent(command, '    ')
+                if 1:
+                    # Show diff based on commands split into pseudo lines by
+                    # shlex.split().
+                    doit += ':\n'
+                    lines = difflib.unified_diff(
+                            cmd.split(),
+                            command.split(),
+                            lineterm='',
+                            )
+                    # Skip initial lines.
+                    assert next(lines) == '--- '
+                    assert next(lines) == '+++ '
+                    for line in lines:
+                        doit += f'    {line}\n'
 
     if not doit:
         # See whether any prerequisites are newer than target.
@@ -2652,9 +2783,9 @@
         for p in prerequisites:
             prerequisites_all += _make_prerequisites( p)
         if 0:
-            log2( 'prerequisites_all:')
+            log2( 'prerequisites_all:', caller=caller+1)
             for i in  prerequisites_all:
-                log2( f'    {i!r}')
+                log2( f'    {i!r}', caller=caller+1)
         pre_mtime = 0
         pre_path = None
         for prerequisite in prerequisites_all:
@@ -2670,7 +2801,7 @@
                 break
         if not doit:
             if pre_mtime > out_mtime:
-                doit = f'Prerequisite is new: {pre_path!r}'
+                doit = f'Prerequisite is new: {os.path.abspath(pre_path)!r}'
 
     if doit:
         # Remove `cmd_path` before we run the command, so any failure
@@ -2680,16 +2811,16 @@
             os.remove( cmd_path)
         except Exception:
             pass
-        log1( f'Running command because: {doit}')
-
-        run( command)
+        log1( f'Running command because: {doit}', caller=caller+1)
+
+        run( command, caller=caller+1)
 
         # Write the command we ran, into `cmd_path`.
         with open( cmd_path, 'w') as f:
             f.write( command)
         return True
     else:
-        log1( f'Not running command because up to date: {out!r}')
+        log1( f'Not running command because up to date: {out!r}', caller=caller+1)
 
     if 0:
         log2( f'out_mtime={time.ctime(out_mtime)} pre_mtime={time.ctime(pre_mtime)}.'
@@ -2761,6 +2892,11 @@
     return re.sub(r"[-_.]+", "-", name).lower()
 
 
+def _normalise2(name):
+    # https://packaging.python.org/en/latest/specifications/binary-distribution-format/
+    return _normalise(name).replace('-', '_')
+
+
 def _assert_version_pep_440(version):
     assert re.match(
                 r'^([1-9][0-9]*!)?(0|[1-9][0-9]*)(\.(0|[1-9][0-9]*))*((a|b|rc)(0|[1-9][0-9]*))?(\.post(0|[1-9][0-9]*))?(\.dev(0|[1-9][0-9]*))?$',
@@ -2790,6 +2926,9 @@
     global g_log_line_numbers
     g_log_line_numbers = bool(yes)
 
+def log(text='', caller=1):
+    _log(text, 0, caller+1)
+
 def log0(text='', caller=1):
     _log(text, 0, caller+1)
 
@@ -2813,19 +2952,30 @@
                 print(f'{filename}:{fr.function}(): {line}', file=sys.stdout, flush=1)
 
 
-def relpath(path, start=None):
+def relpath(path, start=None, allow_up=True):
     '''
     A safe alternative to os.path.relpath(), avoiding an exception on Windows
     if the drive needs to change - in this case we use os.path.abspath().
+    
+    Args:
+        path:
+            Path to be processed.
+        start:
+            Start directory or current directory if None.
+        allow_up:
+            If false we return absolute path is <path> is not within <start>.
     '''
     if windows():
         try:
-            return os.path.relpath(path, start)
+            ret = os.path.relpath(path, start)
         except ValueError:
             # os.path.relpath() fails if trying to change drives.
-            return os.path.abspath(path)
+            ret = os.path.abspath(path)
     else:
-        return os.path.relpath(path, start)
+        ret = os.path.relpath(path, start)
+    if not allow_up and ret.startswith('../') or ret.startswith('..\\'):
+        ret = os.path.abspath(path)
+    return ret
 
 
 def _so_suffix(use_so_versioning=True):
@@ -2981,21 +3131,22 @@
         for path, id_ in items.items():
             id0 = self.items0.get(path)
             if id0 != id_:
-                #mtime0, hash0 = id0
-                #mtime1, hash1 = id_
-                #log0(f'New/modified file {path=}.')
-                #log0(f'    {mtime0=} {"==" if mtime0==mtime1 else "!="} {mtime1=}.')
-                #log0(f'    {hash0=} {"==" if hash0==hash1 else "!="} {hash1=}.')
                 ret.append(path)
         return ret
+    def get_n(self, n):
+        '''
+        Returns new files matching <glob_pattern>, asserting that there are
+        exactly <n>.
+        '''
+        ret = self.get()
+        assert len(ret) == n, f'{len(ret)=}: {ret}'
+        return ret
     def get_one(self):
         '''
         Returns new match of <glob_pattern>, asserting that there is exactly
         one.
         '''
-        ret = self.get()
-        assert len(ret) == 1, f'{len(ret)=}'
-        return ret[0]
+        return self.get_n(1)[0]
     def _file_id(self, path):
         mtime = os.stat(path).st_mtime
         with open(path, 'rb') as f:
@@ -3025,7 +3176,7 @@
     
     Args:
         swig:
-            If starts with 'git:', passed as <remote> arg to git_remote().
+            If starts with 'git:', passed as <text> arg to git_get().
         quick:
             If true, we do not update/build local checkout if the binary is
             already present.
@@ -3033,9 +3184,8 @@
             path to use for checkout.
     '''
     if swig and swig.startswith('git:'):
-        assert platform.system() != 'Windows'
-        swig_local = os.path.abspath(swig_local)
-        # Note that {swig_local}/install/bin/swig doesn't work on MacoS because
+        assert platform.system() != 'Windows', f'Cannot build swig on Windows.'
+        # Note that {swig_local}/install/bin/swig doesn't work on MacOS because
         # {swig_local}/INSTALL is a file and the fs is case-insensitive.
         swig_binary = f'{swig_local}/install-dir/bin/swig'
         if quick and os.path.isfile(swig_binary):
@@ -3043,10 +3193,10 @@
         else:
             # Clone swig.
             swig_env_extra = None
-            git_get(
-                    swig,
+            swig_local = git_get(
                     swig_local,
-                    default_remote='https://github.com/swig/swig.git',
+                    text=swig,
+                    remote='https://github.com/swig/swig.git',
                     branch='master',
                     )
             if darwin():
@@ -3061,10 +3211,10 @@
                 # > If you need to have bison first in your PATH, run:
                 # >   echo 'export PATH="/opt/homebrew/opt/bison/bin:$PATH"' >> ~/.zshrc
                 #
-                run(f'brew install bison')
-                PATH = os.environ['PATH']
-                PATH = f'/opt/homebrew/opt/bison/bin:{PATH}'
-                swig_env_extra = dict(PATH=PATH)
+                swig_env_extra = dict()
+                macos_add_brew_path('bison', swig_env_extra)
+                run(f'which bison')
+                run(f'which bison', env_extra=swig_env_extra)
             # Build swig.
             run(f'cd {swig_local} && ./autogen.sh', env_extra=swig_env_extra)
             run(f'cd {swig_local} && ./configure --prefix={swig_local}/install-dir', env_extra=swig_env_extra)
@@ -3076,6 +3226,38 @@
         return swig
 
 
+def macos_add_brew_path(package, env=None, gnubin=True):
+    '''
+    Adds path(s) for Brew <package>'s binaries to env['PATH'].
+    
+    Args:
+        package:
+            Name of package. We get <package_root> of installed package by
+            running `brew --prefix <package>`.
+        env:
+            The environment dict to modify. If None we use os.environ. If PATH
+            is not in <env>, we first copy os.environ['PATH'] into <env>.
+        gnubin:
+            If true, we also add path to gnu binaries if it exists,
+            <package_root>/libexe/gnubin.
+    '''
+    if not darwin():
+        return
+    if env is None:
+        env = os.environ
+    if 'PATH' not in env:
+        env['PATH'] = os.environ['PATH']
+    package_root = run(f'brew --prefix {package}', capture=1).strip()
+    def add(path):
+        if os.path.isdir(path):
+            log1(f'Adding to $PATH: {path}')
+            PATH = env['PATH']
+            env['PATH'] = f'{path}:{PATH}'
+    add(f'{package_root}/bin')
+    if gnubin:
+        add(f'{package_root}/libexec/gnubin')
+
+
 def _show_dict(d):
     ret = ''
     for n in sorted(d.keys()):
@@ -3119,12 +3301,76 @@
     return includes_, ldflags_
 
 
+def venv_in(path=None):
+    '''
+    If path is None, returns true if we are in a venv. Otherwise returns true
+    only if we are in venv <path>.
+    '''
+    if path:
+        return os.path.abspath(sys.prefix) == os.path.abspath(path)
+    else:
+        return sys.prefix != sys.base_prefix
+
+
+def venv_run(args, path, recreate=True, clean=False):
+    '''
+    Runs Python command inside venv and returns termination code.
+    
+    Args:
+        args:
+            List of args or string command.
+        path:
+            Path of venv directory.
+        recreate:
+            If false we do not run `<sys.executable> -m venv <path>` if <path>
+            already exists. This avoids a delay in the common case where <path>
+            is already set up, but fails if <path> exists but does not contain
+            a valid venv.
+        clean:
+            If true we first delete <path>.
+    '''
+    if clean:
+        log(f'Removing any existing venv {path}.')
+        assert path.startswith('venv-')
+        shutil.rmtree(path, ignore_errors=1)
+    if recreate or not os.path.isdir(path):
+        run(f'{sys.executable} -m venv {path}')
+    
+    if isinstance(args, str):
+        args_string = args
+    elif platform.system() == 'Windows':
+        # shlex not reliable on Windows so we use Use crude quoting with "...".
+        args_string = ''
+        for i, arg in enumerate(args):
+            assert '"' not in arg
+            if i:
+                args_string += ' '
+            args_string += f'"{arg}"'
+    else:
+        args_string = shlex.join(args)
+    
+    if platform.system() == 'Windows':
+        command = f'{path}\\Scripts\\activate && python {args_string}'
+    else:
+        command = f'. {path}/bin/activate && python {args_string}'
+    e = run(command, check=0)
+    return e
+
+
 if __name__ == '__main__':
     # Internal-only limited command line support, used if
     # graal_legacy_python_config is true.
     #
     includes, ldflags = sysconfig_python_flags()
-    if sys.argv[1:] == ['--graal-legacy-python-config', '--includes']:
+    if sys.argv[1] == '--doctest':
+        import doctest
+        if sys.argv[2:]:
+            for f in sys.argv[2:]:
+                ff = globals()[f]
+                doctest.run_docstring_examples(ff, globals())
+        else:
+            doctest.testmod(None)
+    elif sys.argv[1:] == ['--graal-legacy-python-config', '--includes']:
         print(includes)
     elif sys.argv[1:] == ['--graal-legacy-python-config', '--ldflags']:
         print(ldflags)
--- a/scripts/test.py	Mon Sep 15 11:43:07 2025 +0200
+++ b/scripts/test.py	Sat Oct 11 11:19:58 2025 +0200
@@ -4,7 +4,7 @@
 
 Examples:
 
-    ./PyMuPDF/scripts/test.py --m mupdf build test
+    ./PyMuPDF/scripts/test.py -m mupdf build test
         Build and test with pre-existing local mupdf/ checkout.
 
     ./PyMuPDF/scripts/test.py build test
@@ -13,10 +13,13 @@
     ./PyMuPDF/scripts/test.py -m 'git:https://git.ghostscript.com/mupdf.git' build test
         Build and test with internal checkout of MuPDF master.
 
-    ./PyMuPDF/scripts/test.py -m 'git:--branch 1.26.x https://github.com/ArtifexSoftware/mupdf.git' build test
+    ./PyMuPDF/scripts/test.py -m ':1.26.x' build test
         Build and test using internal checkout of mupdf 1.26.x branch from
         Github.
 
+    ./PyMuPDF/scripts/test.py install test -i 1.26.3 -k test_2596
+        Install pymupdf-1.26.3 from pupi.org and test only test_2596.
+
 Usage:
 
 * Command line arguments are called parameters if they start with `-`,
@@ -31,7 +34,7 @@
 Other:
 
 * If we are not already running inside a Python venv, we automatically create a
-  venv and re-run ourselves inside it.
+  venv and re-run ourselves inside it (also see the -v option).
 * Build/wheel/install commands always install into the venv.
 * Tests use whatever PyMuPDF/MuPDF is currently installed in the venv.
 * We run tests with pytest.
@@ -55,6 +58,7 @@
         `setup.py`.]
     
     --build-flavour <build_flavour>
+        [Obsolete.]
         Combination of 'p', 'b', 'd'. See ../setup.py's description of
         PYMUPDF_SETUP_FLAVOUR. Default is 'pbd', i.e. self-contained PyMuPDF
         wheels including MuPDF build-time files.
@@ -71,10 +75,11 @@
     --cibw-name <cibw_name>
         Name to use when installing cibuildwheel, e.g.:
             --cibw-name cibuildwheel==3.0.0b1
+            --cibw-name git+https://github.com/pypa/cibuildwheel
         Default is `cibuildwheel`, i.e. the current release.
     
     --cibw-pyodide 0|1
-         Experimental, make `cibuild` command build a pyodide wheel.
+         Experimental, make `cibw` command build a pyodide wheel.
          2025-05-27: this fails when building mupdf C API - `ld -r -b binary
          ...` fails with:
             emcc: error: binary: No such file or directory ("binary" was expected to be an input file, based on the commandline arguments provided)
@@ -90,6 +95,56 @@
     --cibw-release-2
         Set up so that `cibw` builds only linux-aarch64 wheel.
     
+    --cibw-skip-add-defaults 0|1
+        If 1 (the default) we add defaults to CIBW_SKIP such as `pp*` (to
+        exclude pypy) and `cp3??t-*` (to exclude free-threading).
+    
+    --cibw-test-project 0|1
+         If 1, command `cibw` will use a minimal test project instead of the
+         PyMuPDF directory itself.
+         
+         The test project uses setjmp/longjmp and C++ throw/catch.
+         
+         The test checks for current behaviour, so with `--cibw-pyodide 1` it
+         succeeds if the cibw command fails with the expected error message.
+         
+         2025-08-22:
+            Builds ok on Linux.
+            
+            Fails at runtime with --cibw-pyodide 1:
+            
+                With compile/link flags ``:
+                    (+45.0s): remote.py:233:main: jules-devuan: Pyodide has suffered a fatal error. Please report this to the Pyodide maintainers.
+                    (+45.1s): remote.py:233:main: jules-devuan: Stack (most recent call first):
+                    (+45.1s): remote.py:233:main: jules-devuan:   File "/tmp/cibw-run-h_pfo0wf/cp312-pyodide_wasm32/venv-test/lib/python3.12/site-packages/foo/__init__.py", line 63 in bar
+                    (+45.1s): remote.py:233:main: jules-devuan:   File "<string>The cause of the fatal error was:
+                    (+45.1s): remote.py:233:main: jules-devuan: CppException std::runtime_error: deliberate exception
+                    (+45.1s): remote.py:233:main: jules-devuan:     at convertCppException (/home/jules/.cache/cibuildwheel/pyodide-build-0.30.7/0.27.7/xbuildenv/pyodide-root/dist/pyodide.asm.js:10:48959)
+                    (+45.1s): remote.py:233:main: jules-devuan:     at API.fatal_error (/home/jules/.cache/cibuildwheel/pyodide-build-0.30.7/0.27.7/xbuildenv/pyodide-root/dist/pyodide.asm.js:10:49253)
+                    (+45.1s): remote.py:233:main: jules-devuan:     at main (file:///home/jules/.cache/cibuildwheel/pyodide-build-0.30.7/0.27.7/xbuildenv/pyodide-root/dist/python_cli_entry.mjs:149:13) {
+                    (+45.1s): remote.py:233:main: jules-devuan:   ty: 'std::runtime_error',
+                    (+45.1s): remote.py:233:main: jules-devuan:   pyodide_fatal_error: true
+                    (+45.1s): remote.py:233:main: jules-devuan: }
+                    (+45.1s): remote.py:233:main: jules-devuan: ", line 1 in <module>
+                    (+45.1s): remote.py:233:main: jules-devuan: CppException std::runtime_error: deliberate exception
+                    (+45.1s): remote.py:233:main: jules-devuan:     at convertCppException (/home/jules/.cache/cibuildwheel/pyodide-build-0.30.7/0.27.7/xbuildenv/pyodide-root/dist/pyodide.asm.js:10:48959)
+                    (+45.1s): remote.py:233:main: jules-devuan:     at API.fatal_error (/home/jules/.cache/cibuildwheel/pyodide-build-0.30.7/0.27.7/xbuildenv/pyodide-root/dist/pyodide.asm.js:10:49253)
+                    (+45.1s): remote.py:233:main: jules-devuan:     at main (file:///home/jules/.cache/cibuildwheel/pyodide-build-0.30.7/0.27.7/xbuildenv/pyodide-root/dist/python_cli_entry.mjs:149:13) {
+                    (+45.1s): remote.py:233:main: jules-devuan:   ty: 'std::runtime_error',
+                    (+45.1s): remote.py:233:main: jules-devuan:   pyodide_fatal_error: true
+                    (+45.1s): remote.py:233:main: jules-devuan: }
+
+                With compile/link flags `-fwasm-exceptions`:
+                    [LinkError: WebAssembly.instantiate(): Import #60 module="env" function="__c_longjmp": tag import requires a WebAssembly.Tag]
+
+                With compile/link flags `-fwasm-exceptions -sSUPPORT_LONGJMP=wasm`:
+                    [LinkError: WebAssembly.instantiate(): Import #60 module="env" function="__c_longjmp": tag import requires a WebAssembly.Tag]
+
+    --cibw-test-project-setjmp 0|1
+        If 1, --cibw-test-project builds a project that uses
+        setjmp/longjmp. Default is 0 (Windows builds fail when attempting to
+        compile the output from swig).
+    
     -d
         Equivalent to `-b debug`.
     
@@ -104,9 +159,6 @@
     -f 0|1
         If 1 we also test alias `fitz` as well as `pymupdf`. Default is '0'.
     
-    --gdb 0|1
-        Run tests under gdb. Requires user interaction.
-    
     --graal
         Use graal - run inside a Graal VM instead of a Python venv.
         
@@ -130,15 +182,21 @@
             Default is 'r'. Also see `PyMuPDF:tests/run_compound.py`.
     
     -i <install_version>
-        Set version installed by the 'install' command.
+        Controls behaviour of `install` command:
+        
+        * If <install_version> ends with `.whl` we use `pip install
+          <install_version>`.
+        * If <install_version> starts with == or >= or >, we use `pip install
+          pymupdf<install_version>`.
+        * Otherwise we use `pip install pymupdf==<install_version>`.
     
     -k <expression>
         Specify which test(s) to run; passed straight through to pytest's `-k`.
         For example `-k test_3354`.
     
     -m <location> | --mupdf <location>
-        Location of local mupdf/ directory or 'git:...' to be used
-        when building PyMuPDF.
+        Location of mupdf as local directory or remote git, to be used when
+        building PyMuPDF.
         
         This sets environment variable PYMUPDF_SETUP_MUPDF_BUILD, which is used
         by PyMuPDF/setup.py. If not specified PyMuPDF will download its default
@@ -176,7 +234,7 @@
     
     -P 0|1
         If 1, automatically install required system packages such as
-        Valgrind. Default is 0.
+        Valgrind. Default is 1 if running as Github action, otherwise 0.
     
     --pybind 0|1
         Experimental, for investigating
@@ -197,9 +255,9 @@
     --show-args:
         Show sys.argv and exit. For debugging.
     
-    --sync-paths
+    --sync-paths <path>
         Do not run anything, instead write required files/directories/checkouts
-        to stdout, one per line. This is to help with automated running on
+        to <path>, one per line. This is to help with automated running on
         remote machines.
     
     --system-site-packages 0|1
@@ -241,7 +299,7 @@
         Use specified prefix when running pytest, must be one of:
             gdb
             helgrind
-            vagrind
+            valgrind
     
     -v <venv>
         venv is:
@@ -332,6 +390,19 @@
 run = pipcl.run
 
 
+# We build and test Python 3.x for x in this range.
+python_versions_minor = range(9, 14+1)
+
+def cibw_cp(*version_minors):
+    '''
+    Returns <version_tuples> in 'cp39*' format, e.g. suitable for CIBW_BUILD.
+    '''
+    ret = list()
+    for version_minor in version_minors:
+        ret.append(f'cp3{version_minor}*')
+    return ' '.join(ret)
+
+
 def main(argv):
 
     if github_workflow_unimportant():
@@ -341,6 +412,9 @@
     cibw_name = None
     cibw_pyodide = None
     cibw_pyodide_version = None
+    cibw_skip_add_defaults = True
+    cibw_test_project = None
+    cibw_test_project_setjmp = False
     commands = list()
     env_extra = dict()
     graal = False
@@ -348,7 +422,7 @@
     install_version = None
     mupdf_sync = None
     os_names = list()
-    system_packages = False
+    system_packages = True if os.environ.get('GITHUB_ACTIONS') == 'true' else False
     pybind = False
     pyodide_build_version = None
     pytest_options = ''
@@ -408,13 +482,16 @@
             env_extra['CIBW_ARCHS_LINUX'] = 'auto64'
             env_extra['CIBW_ARCHS_MACOS'] = 'auto64'
             env_extra['CIBW_ARCHS_WINDOWS'] = 'auto'    # win32 and win64.
-            env_extra['CIBW_SKIP'] = 'pp* *i686 cp36* cp37* *musllinux*aarch64*'
+            env_extra['CIBW_SKIP'] = '*i686 *musllinux*aarch64* cp3??t-*'
+            cibw_skip_add_defaults = 0
         
         elif arg == '--cibw-release-2':
-            env_extra['CIBW_ARCHS_LINUX'] = 'aarch64'
             # Testing only first and last python versions because otherwise
             # Github times out after 6h.
-            env_extra['CIBW_BUILD'] = 'cp39* cp313*'
+            env_extra['CIBW_BUILD'] = cibw_cp(python_versions_minor[0], python_versions_minor[-1])
+            env_extra['CIBW_ARCHS_LINUX'] = 'aarch64'
+            env_extra['CIBW_SKIP'] = '*i686 *musllinux*aarch64* cp3??t-*'
+            cibw_skip_add_defaults = 0
             os_names = ['linux']
         
         elif arg == '--cibw-archs-linux':
@@ -424,7 +501,16 @@
             cibw_name = next(args)
         
         elif arg == '--cibw-pyodide':
-            cibw_pyodide = next(args)
+            cibw_pyodide = int(next(args))
+        
+        elif arg == '--cibw-skip-add-defaults':
+            cibw_skip_add_defaults = int(next(args))
+        
+        elif arg == '--cibw-test-project':
+            cibw_test_project = int(next(args))
+        
+        elif arg == '--cibw-test-project-setjmp':
+            cibw_test_project_setjmp = int(next(args))
         
         elif arg == '-d':
             env_extra['PYMUPDF_SETUP_MUPDF_BUILD_TYPE'] = 'debug'
@@ -463,13 +549,13 @@
                 _mupdf = None
             elif _mupdf.startswith(':'):
                 _branch = _mupdf[1:]
-                _mupdf = 'git:--branch {_branch} https://github.com/ArtifexSoftware/mupdf.git'
-                os.environ['PYMUPDF_SETUP_MUPDF_BUILD'] = _mupdf
+                _mupdf = f'git:--branch {_branch} https://github.com/ArtifexSoftware/mupdf.git'
+                env_extra['PYMUPDF_SETUP_MUPDF_BUILD'] = _mupdf
             elif _mupdf.startswith('git:') or '://' in _mupdf:
-                os.environ['PYMUPDF_SETUP_MUPDF_BUILD'] = _mupdf
+                env_extra['PYMUPDF_SETUP_MUPDF_BUILD'] = _mupdf
             else:
                 assert os.path.isdir(_mupdf), f'Not a directory: {_mupdf=}'
-                os.environ['PYMUPDF_SETUP_MUPDF_BUILD'] = os.path.abspath(_mupdf)
+                env_extra['PYMUPDF_SETUP_MUPDF_BUILD'] = os.path.abspath(_mupdf)
                 mupdf_sync = _mupdf
         
         elif arg == '--mupdf-clean':
@@ -501,7 +587,7 @@
         elif arg == '--show-args':
             show_args = 1
         elif arg == '--sync-paths':
-            sync_paths = True
+            sync_paths = next(args)
         
         elif arg == '--system-site-packages':
             system_site_packages = int(next(args))
@@ -539,10 +625,11 @@
     # Handle special args --sync-paths, -h, -v, -o first.
     #
     if sync_paths:
-        # Just print required files, directories and checkouts.
-        print(pymupdf_dir)
-        if mupdf_sync:
-            print(mupdf_sync)
+        # Print required files, directories and checkouts.
+        with open(sync_paths, 'w') as f:
+            print(pymupdf_dir, file=f)
+            if mupdf_sync:
+                print(mupdf_sync, file=f)
         return
 
     if show_help:
@@ -578,7 +665,7 @@
                     if venv == 1 and os.path.exists(pyenv_dir) and os.path.exists(venv_name):
                         log(f'{venv=} and {venv_name=} already exists so not building pyenv or creating venv.')
                     else:
-                        pipcl.git_get('https://github.com/pyenv/pyenv.git', pyenv_dir, branch='master')
+                        pipcl.git_get(pyenv_dir, remote='https://github.com/pyenv/pyenv.git', branch='master')
                         run(f'cd {pyenv_dir} && src/configure && make -C src')
                         run(f'which pyenv')
                         run(f'pyenv install -v -s {graalpy}')
@@ -622,27 +709,33 @@
         
         elif command == 'cibw':
             # Build wheel(s) with cibuildwheel.
-            if cibw_pyodide and env_extra.get('CIBW_BUILD') is None:
-                assert 0, f'Need a Python version for Pyodide.'
-                CIBW_BUILD = 'cp312*'
-                env_extra['CIBW_BUILD'] = CIBW_BUILD
-                log(f'Defaulting to {CIBW_BUILD=} for Pyodide.')
-            #if cibw_pyodide_version == None:
-            #    cibw_pyodide_version = '0.28.0'
+            
+            if platform.system() == 'Linux':
+                PYMUPDF_SETUP_MUPDF_BUILD = env_extra.get('PYMUPDF_SETUP_MUPDF_BUILD')
+                if PYMUPDF_SETUP_MUPDF_BUILD and not PYMUPDF_SETUP_MUPDF_BUILD.startswith('git:'):
+                    assert PYMUPDF_SETUP_MUPDF_BUILD.startswith('/')
+                    env_extra['PYMUPDF_SETUP_MUPDF_BUILD'] = f'/host/{PYMUPDF_SETUP_MUPDF_BUILD}'
+            
             cibuildwheel(
                     env_extra,
                     cibw_name or 'cibuildwheel',
                     cibw_pyodide,
                     cibw_pyodide_version,
                     cibw_sdist,
+                    cibw_test_project,
+                    cibw_test_project_setjmp,
+                    cibw_skip_add_defaults,
                     )
         
         elif command == 'install':
             p = 'pymupdf'
             if install_version:
-                if not install_version.startswith(('==', '>=', '>')):
-                    p = f'{p}=='
-                p = f'{p}{install_version}'
+                if install_version.endswith('.whl'):
+                    p = install_version
+                elif install_version.startswith(('==', '>=', '>')):
+                    p = f'{p}{install_version}'
+                else:
+                    p = f'{p}=={install_version}'
             run(f'pip install --force-reinstall {p}')
             have_installed = True
         
@@ -739,7 +832,7 @@
         venv,
         wheel,
         ):
-    print(f'{build_isolation=}')
+    log(f'{build_isolation=}')
     
     if build_isolation is None:
         # On OpenBSD libclang is not available on pypi.org, so we need to force
@@ -775,7 +868,16 @@
         run(f'pip install{build_isolation_text} -v --force-reinstall {pymupdf_dir_abs}', env_extra=env_extra)
 
 
-def cibuildwheel(env_extra, cibw_name, cibw_pyodide, cibw_pyodide_version, cibw_sdist):
+def cibuildwheel(
+        env_extra,
+        cibw_name,
+        cibw_pyodide,
+        cibw_pyodide_version,
+        cibw_sdist,
+        cibw_test_project,
+        cibw_test_project_setjmp,
+        cibw_skip_add_defaults,
+        ):
     
     if cibw_sdist and platform.system() == 'Linux':
         log(f'Building sdist.')
@@ -789,9 +891,19 @@
     # Some general flags.
     if 'CIBW_BUILD_VERBOSITY' not in env_extra:
         env_extra['CIBW_BUILD_VERBOSITY'] = '1'
-    if 'CIBW_SKIP' not in env_extra:
-        env_extra['CIBW_SKIP'] = 'pp* *i686 cp36* cp37* *musllinux* *-win32 *-aarch64'
-
+    
+    # Add default flags to CIBW_SKIP.
+    # 2025-10-07: `cp3??t-*` excludes free-threading, which currently breaks
+    # some tests.
+    
+    if cibw_skip_add_defaults:
+        CIBW_SKIP = env_extra.get('CIBW_SKIP', '')
+        CIBW_SKIP += ' *i686 *musllinux* *-win32 *-aarch64 cp3??t-*'
+        CIBW_SKIP = CIBW_SKIP.split()
+        CIBW_SKIP = sorted(list(set(CIBW_SKIP)))
+        CIBW_SKIP = ' '.join(CIBW_SKIP)
+        env_extra['CIBW_SKIP'] = CIBW_SKIP
+    
     # Set what wheels to build, if not already specified.
     if 'CIBW_ARCHS' not in env_extra:
         if 'CIBW_ARCHS_WINDOWS' not in env_extra:
@@ -823,14 +935,18 @@
     CIBW_BUILD = env_extra.get('CIBW_BUILD')
     log(f'{CIBW_BUILD=}')
     if CIBW_BUILD is None:
-        if os.environ.get('GITHUB_ACTIONS') == 'true':
+        if cibw_pyodide:
+            # Using python-3.13 fixes problems with MuPDF's setjmp/longjmp.
+            CIBW_BUILD = 'cp313*'
+        elif os.environ.get('GITHUB_ACTIONS') == 'true':
             # Build/test all supported Python versions.
-            CIBW_BUILD = 'cp39* cp310* cp311* cp312* cp313*'
+            CIBW_BUILD = cibw_cp(*python_versions_minor)
         else:
             # Build/test current Python only.
             v = platform.python_version_tuple()[:2]
             log(f'{v=}')
             CIBW_BUILD = f'cp{"".join(v)}*'
+        log(f'Defaulting to {CIBW_BUILD=}.')
     
     cibw_pyodide_args = ''
     if cibw_pyodide:
@@ -843,11 +959,25 @@
         env_extra['CIBW_PYODIDE_VERSION'] = cibw_pyodide_version
         env_extra['CIBW_ENABLE'] = 'pyodide-prerelease'
 
-    # Pass all the environment variables we have set, to Linux
-    # docker. Note that this will miss any settings in the original
-    # environment.
-    env_extra['CIBW_ENVIRONMENT_PASS_LINUX'] = ' '.join(sorted(env_extra.keys()))
-
+    # Pass all the environment variables we have set, to Linux docker. Note
+    # that this will miss any settings in the original environment. We have to
+    # add CIBW_BUILD explicitly because we haven't set it yet.
+    CIBW_ENVIRONMENT_PASS_LINUX = set(env_extra.keys())
+    CIBW_ENVIRONMENT_PASS_LINUX.add('CIBW_BUILD')
+    CIBW_ENVIRONMENT_PASS_LINUX = sorted(list(CIBW_ENVIRONMENT_PASS_LINUX))
+    CIBW_ENVIRONMENT_PASS_LINUX = ' '.join(CIBW_ENVIRONMENT_PASS_LINUX)
+    env_extra['CIBW_ENVIRONMENT_PASS_LINUX'] = CIBW_ENVIRONMENT_PASS_LINUX
+    
+    if cibw_test_project:
+        cibw_do_test_project(
+                env_extra,
+                CIBW_BUILD,
+                cibw_pyodide,
+                cibw_pyodide_args,
+                cibw_test_project_setjmp,
+                )
+        return
+    
     # Build for lowest (assumed first) Python version.
     #
     CIBW_BUILD_0 = CIBW_BUILD.split()[0]
@@ -859,9 +989,175 @@
     # will notice that the wheel we built above supports all versions of
     # Python, so will not actually do any builds here.
     #
+    # We only do this if there are more than one Python versions. This still
+    # duplicates the testing of the first python version.
+    if len(CIBW_BUILD.split()) > 1:
+        env_extra['CIBW_BUILD'] = CIBW_BUILD
+        run(f'cd {pymupdf_dir} && cibuildwheel{cibw_pyodide_args}', env_extra=env_extra)
+        run(f'ls -ld {pymupdf_dir}/wheelhouse/*')
+
+
+def cibw_do_test_project(
+        env_extra,
+        CIBW_BUILD,
+        cibw_pyodide,
+        cibw_pyodide_args,
+        cibw_test_project_setjmp,
+        ):
+    testdir = f'{pymupdf_dir_abs}/cibw_test'
+    shutil.rmtree(testdir, ignore_errors=1)
+    os.mkdir(testdir)
+    with open(f'{testdir}/setup.py', 'w') as f:
+        f.write(textwrap.dedent(f'''
+                import shutil
+                import sys
+                import os
+                import pipcl
+
+                def build():
+                    so_leaf = pipcl.build_extension(
+                            name = 'foo',
+                            path_i = 'foo.i',
+                            outdir = 'build',
+                            source_extra = 'qwerty.cpp',
+                            py_limited_api = True,
+                            )
+                    
+                    return [
+                            ('build/foo.py', 'foo/__init__.py'),
+                            (f'build/{{so_leaf}}', f'foo/'),
+                            ]
+
+                p = pipcl.Package(
+                        name = 'pymupdf-test',
+                        version = '1.2.3',
+                        fn_build = build,
+                        py_limited_api=True,
+                        )
+
+                def get_requires_for_build_wheel(config_settings=None):
+                    return ['swig']
+                
+                build_wheel = p.build_wheel
+                build_sdist = p.build_sdist
+                
+                # Handle old-style setup.py command-line usage:
+                if __name__ == '__main__':
+                    p.handle_argv(sys.argv)
+                '''))
+    with open(f'{testdir}/foo.i', 'w') as f:
+        if cibw_test_project_setjmp:
+            f.write(textwrap.dedent('''
+                    %{
+                    #include <stdexcept>
+
+                    #include <assert.h>
+                    #include <setjmp.h>
+                    #include <stdio.h>
+                    #include <string.h>
+
+                    int qwerty(void);
+
+                    static sigjmp_buf jmpbuf;
+                    static int bar0(const char* text)
+                    {
+                        printf("bar0(): text: %s\\n", text);
+
+                        int q = qwerty();
+                        printf("bar0(): q=%i\\n", q);
+
+                        int len = (int) strlen(text);
+                        printf("bar0(): len=%i\\n", len);
+                        printf("bar0(): calling longjmp().\\n");
+                        fflush(stdout);
+                        longjmp(jmpbuf, 1);
+                        assert(0);
+                    }
+                    int bar1(const char* text)
+                    {
+                        int ret = 0;
+                        if (setjmp(jmpbuf) == 0)
+                        {
+                            ret = bar0(text);
+                        }
+                        else
+                        {
+                            printf("bar1(): setjmp() returned non-zero.\\n");
+                            throw std::runtime_error("deliberate exception");
+                        }
+                        assert(0);
+                    }
+                    int bar(const char* text)
+                    {
+                        int ret = 0;
+                        try
+                        {
+                            ret = bar1(text);
+                        }
+                        catch(std::exception& e)
+                        {
+                            printf("bar1(): received exception: %s\\n", e.what());
+                        }
+                        return ret;
+                    }
+                    %}
+                    int bar(const char* text);
+                    '''))
+        else:
+            f.write(textwrap.dedent('''
+                    %{
+                    #include <stdexcept>
+
+                    #include <assert.h>
+                    #include <stdio.h>
+                    #include <string.h>
+
+                    int qwerty(void);
+
+                    int bar(const char* text)
+                    {
+                        qwerty();
+                        return strlen(text);
+                    }
+                    %}
+                    int bar(const char* text);
+                    '''))
+    
+    with open(f'{testdir}/qwerty.cpp', 'w') as f:
+        f.write(textwrap.dedent('''
+                #include <stdio.h>
+                int qwerty(void)
+                {
+                    printf("qwerty()\\n");
+                    return 3;
+                }
+                '''))
+
+    with open(f'{testdir}/pyproject.toml', 'w') as f:
+        f.write(textwrap.dedent('''
+                [build-system]
+                # We define required packages in setup.py:get_requires_for_build_wheel().
+                requires = []
+
+                # See pep-517.
+                #
+                build-backend = "setup"
+                backend-path = ["."]
+                '''))
+        
+    shutil.copy2(f'{pymupdf_dir_abs}/pipcl.py', f'{testdir}/pipcl.py')
+    shutil.copy2(f'{pymupdf_dir_abs}/wdev.py', f'{testdir}/wdev.py')
+
     env_extra['CIBW_BUILD'] = CIBW_BUILD
-    run(f'cd {pymupdf_dir} && cibuildwheel{cibw_pyodide_args}', env_extra=env_extra)
-    run(f'ls -ld {pymupdf_dir}/wheelhouse/*')
+    CIBW_TEST_COMMAND = ''
+    if cibw_pyodide:
+        CIBW_TEST_COMMAND += 'pyodide xbuildenv search --all; '
+    CIBW_TEST_COMMAND += 'python -c "import foo; foo.bar(\\"some text\\")"'
+    env_extra['CIBW_TEST_COMMAND'] = CIBW_TEST_COMMAND
+    #env_extra['CIBW_TEST_COMMAND'] = ''
+    
+    run(f'cd {testdir} && cibuildwheel --output-dir ../wheelhouse{cibw_pyodide_args}', env_extra=env_extra)
+    run(f'ls -ldt {pymupdf_dir_abs}/wheelhouse/*')
         
 
 def build_pyodide_wheel(pyodide_build_version=None):
@@ -1088,16 +1384,21 @@
     
     PYODIDE_ROOT = os.environ.get('PYODIDE_ROOT')
     if PYODIDE_ROOT is not None:
+        # We can't install packages with `pip install`; setup.py will have
+        # specified pytest in the wheels's <requires_dist>, so it will be
+        # already installed.
+        #
         log(f'Not installing test packages because {PYODIDE_ROOT=}.')
-        command = f'{pytest_options} {pytest_arg} -s'
+        command = f'{pytest_options} {pytest_arg}'
         args = shlex.split(command)
-        print(f'{PYODIDE_ROOT=} so calling pytest.main(args).')
-        print(f'{command=}')
-        print(f'args are ({len(args)}):')
+        log(f'{PYODIDE_ROOT=} so calling pytest.main(args).')
+        log(f'{command=}')
+        log(f'args are ({len(args)}):')
         for arg in args:
-            print(f'    {arg!r}')
+            log(f'    {arg!r}')
         import pytest
-        pytest.main(args)
+        e = pytest.main(args)
+        assert e == 0, f'pytest.main() failed: {e=}'
         return
     
     if venv >= 2:
@@ -1163,7 +1464,7 @@
 
     # Always start by removing any test_*_fitz.py files.
     for p in glob.glob(f'{pymupdf_dir_rel}/tests/test_*_fitz.py'):
-        print(f'Removing {p=}')
+        log(f'Removing {p=}')
         os.remove(p)
     if test_fitz:
         # Create copies of each test file, modified to use `pymupdf`
@@ -1175,7 +1476,7 @@
                 continue
             branch, leaf = os.path.split(p)
             p2 = f'{branch}/{leaf[:5]}fitz_{leaf[5:]}'
-            print(f'Converting {p=} to {p2=}.')
+            log(f'Converting {p=} to {p2=}.')
             with open(p, encoding='utf8') as f:
                 text = f.read()
             text2 = re.sub("([^\'])\\bpymupdf\\b", '\\1fitz', text)
--- a/setup.py	Mon Sep 15 11:43:07 2025 +0200
+++ b/setup.py	Sat Oct 11 11:19:58 2025 +0200
@@ -88,11 +88,12 @@
             Empty string:
                 Build PyMuPDF with the system MuPDF.
             A string starting with 'git:':
-                Use `git clone` to get a MuPDF checkout. We use the
-                string in the git clone command; it must contain the git
-                URL from which to clone, and can also contain other `git
-                clone` args, for example:
-                    PYMUPDF_SETUP_MUPDF_BUILD="git:--branch master https://github.com/ArtifexSoftware/mupdf.git"
+                We use `git` commands to clone/update a local MuPDF checkout.
+                Should match `git:[--branch <branch>][--tag <tag>][<remote>]`.
+                If <remote> is omitted we use a default.
+                For example:
+                    PYMUPDF_SETUP_MUPDF_BUILD="git:--branch master"
+                Passed as <text> arg to pipcl.git_get().
             Otherwise:
                 Location of mupdf directory.
     
@@ -425,7 +426,7 @@
 
 mupdf_tgz = os.path.abspath( f'{__file__}/../mupdf.tgz')
 
-def get_mupdf_internal(out, location=None, sha=None, local_tgz=None):
+def get_mupdf_internal(out, location=None, local_tgz=None):
     '''
     Gets MuPDF as either a .tgz or a local directory.
     
@@ -438,8 +439,6 @@
             If starts with 'git:', should be remote git location.
             Otherwise if containing '://' should be URL for .tgz.
             Otherwise should path of local mupdf checkout.
-        sha:
-            If not None and we use git clone, we checkout this sha.
         local_tgz:
             If not None, must be local .tgz file.
     Returns:
@@ -451,7 +450,7 @@
             default location.
                 
     '''
-    log(f'get_mupdf_internal(): {out=} {location=} {sha=}')
+    log(f'get_mupdf_internal(): {out=} {location=}')
     assert out in ('dir', 'tgz')
     if location is None:
         location = f'https://mupdf.com/downloads/archive/mupdf-{version_mupdf}-source.tar.gz'
@@ -465,21 +464,15 @@
     if local_tgz:
         assert os.path.isfile(local_tgz)
     elif location.startswith( 'git:'):
-        location_git = location[4:]
         local_dir = 'mupdf-git'
+        pipcl.git_get(local_dir, text=location, remote='https://github.com/ArtifexSoftware/mupdf.git')
         
-        # Try to update existing checkout.
-        e = run(f'cd {local_dir} && git pull && git submodule update --init', check=False)
-        if e:
-            # No existing git checkout, so do a fresh clone.
-            _fs_remove(local_dir)
-            gitargs = location[4:]
-            run(f'git clone --recursive --depth 1 --shallow-submodules {gitargs} {local_dir}')
-
         # Show sha of checkout.
-        run( f'cd {local_dir} && git show --pretty=oneline|head -n 1', check=False)
-        if sha:
-            run( f'cd {local_dir} && git checkout {sha}')
+        run(
+                f'cd {local_dir} && git show --pretty=oneline|head -n 1',
+                check = False,
+                prefix = 'mupdf git id: ',
+                )
     elif '://' in location:
         # Download .tgz.
         local_tgz = os.path.basename( location)
@@ -574,14 +567,10 @@
 windows = platform.system() == 'Windows' or platform.system().startswith('CYGWIN')
 msys2 = platform.system().startswith('MSYS_NT-')
 
-pyodide_flags = '-fwasm-exceptions'
-
 if os.environ.get('PYODIDE') == '1':
     if os.environ.get('OS') != 'pyodide':
         log('PYODIDE=1, setting OS=pyodide.')
         os.environ['OS'] = 'pyodide'
-        os.environ['XCFLAGS'] = pyodide_flags
-        os.environ['XCXXFLAGS'] = pyodide_flags
 
 pyodide = os.environ.get('OS') == 'pyodide'
 
@@ -704,8 +693,8 @@
             add('d', f'{mupdf_build_dir}/libmupdf-threads.a', f'{to_dir_d}/lib/')
         elif pyodide:
             add('p', f'{mupdf_build_dir}/_mupdf.so', to_dir)
-            add('b', f'{mupdf_build_dir}/libmupdfcpp.so', 'PyMuPDF.libs/')
-            add('b', f'{mupdf_build_dir}/libmupdf.so', 'PyMuPDF.libs/')
+            add('b', f'{mupdf_build_dir}/libmupdfcpp.so', to_dir)
+            add('b', f'{mupdf_build_dir}/libmupdf.so', to_dir)
         else:
             add('p', f'{mupdf_build_dir}/_mupdf.so', to_dir)
             add('b', pipcl.get_soname(f'{mupdf_build_dir}/libmupdfcpp.so'), to_dir)
@@ -748,10 +737,12 @@
         except Exception:
             return 0
     swig_version_tuple = tuple(int_or_0(i) for i in swig_version.split('.'))
+    version_p_tuple = tuple(int_or_0(i) for i in version_p.split('.'))
     log(f'{swig_version=}')
     text = ''
     text += f'mupdf_location = {mupdf_location!r}\n'
     text += f'pymupdf_version = {version_p!r}\n'
+    text += f'pymupdf_version_tuple = {version_p_tuple!r}\n'
     text += f'pymupdf_git_sha = {sha!r}\n'
     text += f'pymupdf_git_diff = {diff!r}\n'
     text += f'pymupdf_git_branch = {branch!r}\n'
@@ -1211,10 +1202,6 @@
         if cxxflags:
             compiler_extra += f' {cxxflags}'
 
-    if pyodide:
-        compiler_extra += f' {pyodide_flags}'
-        linker_extra += f' {pyodide_flags}'
-        
     return compiler_extra, linker_extra, includes, defines, optimise, debug, libpaths, libs, libraries, 
 
 
@@ -1280,9 +1267,9 @@
 #
 
 # PyMuPDF version.
-version_p = '1.26.4'
+version_p = '1.26.5'
 
-version_mupdf = '1.26.7'
+version_mupdf = '1.26.10'
 
 # PyMuPDFb version. This is the PyMuPDF version whose PyMuPDFb wheels we will
 # (re)use if generating separate PyMuPDFb wheels. Though as of PyMuPDF-1.24.11
@@ -1413,9 +1400,6 @@
             ret.append(libclang)
         elif openbsd:
             print(f'OpenBSD: libclang not available via pip; assuming `pkg_add py3-llvm`.')
-        elif darwin and platform.machine() == 'arm64':
-            print(f'MacOS/arm64: forcing use of libclang 16.0.6 because 18.1.1 known to fail with `clang.cindex.TranslationUnitLoadError: Error parsing translation unit.`')
-            ret.append('libclang==16.0.6')
         elif darwin and platform_release_tuple() < (18,):
             # There are still of problems when building on old macos.
             ret.append('libclang==14.0.6')
--- a/src/__init__.py	Mon Sep 15 11:43:07 2025 +0200
+++ b/src/__init__.py	Sat Oct 11 11:19:58 2025 +0200
@@ -383,6 +383,7 @@
 from ._build import pymupdf_git_diff    # noqa F401
 from ._build import pymupdf_git_sha     # noqa F401
 from ._build import pymupdf_version     # noqa F401
+from ._build import pymupdf_version_tuple   # noqa F401
 from ._build import swig_version        # noqa F401
 from ._build import swig_version_tuple  # noqa F401
 
@@ -393,7 +394,6 @@
 
 # Versions as tuples; useful when comparing versions.
 #
-pymupdf_version_tuple = tuple( [_int_rc(i) for i in pymupdf_version.split('.')])
 mupdf_version_tuple = tuple( [_int_rc(i) for i in mupdf_version.split('.')])
 
 assert mupdf_version_tuple == (mupdf.FZ_VERSION_MAJOR, mupdf.FZ_VERSION_MINOR, mupdf.FZ_VERSION_PATCH), \
@@ -1035,6 +1035,12 @@
         stream = JM_BinFromBuffer(buf)
         res['stream'] = stream
         return res
+    
+    def get_text(self, *args, **kwargs):
+        return utils.get_text(self, *args, **kwargs)
+
+    def get_textbox(self, *args, **kwargs):
+        return utils.get_textbox(self, *args, **kwargs)
 
     def get_textpage(self, clip=None, flags=0):
         """Make annotation TextPage."""
@@ -3058,6 +3064,14 @@
         v = JM_pdf_obj_from_str( pdf, font)
         mupdf.pdf_dict_put( fonts, k, v)
 
+    def del_toc_item(
+            self,
+            idx: int,
+            ) -> None:
+        """Delete TOC / bookmark item by index."""
+        xref = self.get_outline_xrefs()[idx]
+        self._remove_toc_item(xref)
+
     def _delToC(self):
         """Delete the TOC."""
         if self.is_closed or self.is_encrypted:
@@ -3103,6 +3117,454 @@
             raise ValueError( MSG_BAD_XREF)
         mupdf.pdf_delete_object(pdf, xref)
 
+    def _do_links(
+            doc1: 'Document',
+            doc2: 'Document',
+            from_page: int = -1,
+            to_page: int = -1,
+            start_at: int = -1,
+            ) -> None:
+        """Insert links contained in copied page range into destination PDF.
+
+        Parameter values **must** equal those of method insert_pdf(), which must
+        have been previously executed.
+        """
+        #pymupdf.log( 'utils.do_links()')
+        # --------------------------------------------------------------------------
+        # internal function to create the actual "/Annots" object string
+        # --------------------------------------------------------------------------
+        def cre_annot(lnk, xref_dst, pno_src, ctm):
+            """Create annotation object string for a passed-in link."""
+
+            r = lnk["from"] * ctm  # rect in PDF coordinates
+            rect = _format_g(tuple(r))
+            if lnk["kind"] == LINK_GOTO:
+                txt = annot_skel["goto1"]  # annot_goto
+                idx = pno_src.index(lnk["page"])
+                p = lnk["to"] * ctm  # target point in PDF coordinates
+                annot = txt(xref_dst[idx], p.x, p.y, lnk["zoom"], rect)
+
+            elif lnk["kind"] == LINK_GOTOR:
+                if lnk["page"] >= 0:
+                    txt = annot_skel["gotor1"]  # annot_gotor
+                    pnt = lnk.get("to", Point(0, 0))  # destination point
+                    if type(pnt) is not Point:
+                        pnt = Point(0, 0)
+                    annot = txt(
+                        lnk["page"],
+                        pnt.x,
+                        pnt.y,
+                        lnk["zoom"],
+                        lnk["file"],
+                        lnk["file"],
+                        rect,
+                    )
+                else:
+                    txt = annot_skel["gotor2"]  # annot_gotor_n
+                    to = get_pdf_str(lnk["to"])
+                    to = to[1:-1]
+                    f = lnk["file"]
+                    annot = txt(to, f, rect)
+
+            elif lnk["kind"] == LINK_LAUNCH:
+                txt = annot_skel["launch"]  # annot_launch
+                annot = txt(lnk["file"], lnk["file"], rect)
+
+            elif lnk["kind"] == LINK_URI:
+                txt = annot_skel["uri"]  # annot_uri
+                annot = txt(lnk["uri"], rect)
+
+            else:
+                annot = ""
+
+            return annot
+
+        # --------------------------------------------------------------------------
+
+        # validate & normalize parameters
+        if from_page < 0:
+            fp = 0
+        elif from_page >= doc2.page_count:
+            fp = doc2.page_count - 1
+        else:
+            fp = from_page
+
+        if to_page < 0 or to_page >= doc2.page_count:
+            tp = doc2.page_count - 1
+        else:
+            tp = to_page
+
+        if start_at < 0:
+            raise ValueError("'start_at' must be >= 0")
+        sa = start_at
+
+        incr = 1 if fp <= tp else -1  # page range could be reversed
+
+        # lists of source / destination page numbers
+        pno_src = list(range(fp, tp + incr, incr))
+        pno_dst = [sa + i for i in range(len(pno_src))]
+
+        # lists of source / destination page xrefs
+        xref_src = []
+        xref_dst = []
+        for i in range(len(pno_src)):
+            p_src = pno_src[i]
+            p_dst = pno_dst[i]
+            old_xref = doc2.page_xref(p_src)
+            new_xref = doc1.page_xref(p_dst)
+            xref_src.append(old_xref)
+            xref_dst.append(new_xref)
+
+        # create the links for each copied page in destination PDF
+        for i in range(len(xref_src)):
+            page_src = doc2[pno_src[i]]  # load source page
+            links = page_src.get_links()  # get all its links
+            #log( '{pno_src=}')
+            #log( '{type(page_src)=}')
+            #log( '{page_src=}')
+            #log( '{=i len(links)}')
+            if len(links) == 0:  # no links there
+                page_src = None
+                continue
+            ctm = ~page_src.transformation_matrix  # calc page transformation matrix
+            page_dst = doc1[pno_dst[i]]  # load destination page
+            link_tab = []  # store all link definitions here
+            for l in links:
+                if l["kind"] == LINK_GOTO and (l["page"] not in pno_src):
+                    continue  # GOTO link target not in copied pages
+                annot_text = cre_annot(l, xref_dst, pno_src, ctm)
+                if annot_text:
+                    link_tab.append(annot_text)
+            if link_tab != []:
+                page_dst._addAnnot_FromString( tuple(link_tab))
+        #log( 'utils.do_links() returning.')
+
+    def _do_widgets(
+            tar: 'Document',
+            src: 'Document',
+            graftmap,
+            from_page: int = -1,
+            to_page: int = -1,
+            start_at: int = -1,
+            join_duplicates=0,
+            ) -> None:
+        """Insert widgets of copied page range into target PDF.
+
+        Parameter values **must** equal those of method insert_pdf() which
+        must have been previously executed.
+        """
+        if not src.is_form_pdf:  # nothing to do: source PDF has no fields
+            return
+
+        def clean_kid_parents(acro_fields):
+            """ Make sure all kids have correct "Parent" pointers."""
+            for i in range(acro_fields.pdf_array_len()):
+                parent = acro_fields.pdf_array_get(i)
+                kids = parent.pdf_dict_get(PDF_NAME("Kids"))
+                for j in range(kids.pdf_array_len()):
+                    kid = kids.pdf_array_get(j)
+                    kid.pdf_dict_put(PDF_NAME("Parent"), parent)
+
+        def join_widgets(pdf, acro_fields, xref1, xref2, name):
+            """Called for each pair of widgets having the same name.
+
+            Args:
+                pdf: target MuPDF document
+                acro_fields: object Root/AcroForm/Fields
+                xref1, xref2: widget xrefs having same names
+                name: (str) the name
+
+            Result:
+                Defined or updated widget parent that points to both widgets.
+            """
+
+            def re_target(pdf, acro_fields, xref1, kids1, xref2, kids2):
+                """Merge widget in xref2 into "Kids" list of widget xref1.
+
+                Args:
+                    xref1, kids1: target widget and its "Kids" array.
+                    xref2, kids2: source wwidget and its "Kids" array (may be empty).
+                """
+                # make indirect objects from widgets
+                w1_ind = mupdf.pdf_new_indirect(pdf, xref1, 0)
+                w2_ind = mupdf.pdf_new_indirect(pdf, xref2, 0)
+                # find source widget in "Fields" array
+                idx = acro_fields.pdf_array_find(w2_ind)
+                acro_fields.pdf_array_delete(idx)
+
+                if not kids2.pdf_is_array():  # source widget has no kids
+                    widget = mupdf.pdf_load_object(pdf, xref2)
+
+                    # delete name from widget and insert target as parent
+                    widget.pdf_dict_del(PDF_NAME("T"))
+                    widget.pdf_dict_put(PDF_NAME("Parent"), w1_ind)
+
+                    # put in target Kids
+                    kids1.pdf_array_push(w2_ind)
+                else:  # copy source kids to target kids
+                    for i in range(kids2.pdf_array_len()):
+                        kid = kids2.pdf_array_get(i)
+                        kid.pdf_dict_put(PDF_NAME("Parent"), w1_ind)
+                        kid_ind = mupdf.pdf_new_indirect(pdf, kid.pdf_to_num(), 0)
+                        kids1.pdf_array_push(kid_ind)
+
+            def new_target(pdf, acro_fields, xref1, w1, xref2, w2, name):
+                """Make new "Parent" for two widgets with same name.
+
+                Args:
+                    xref1, w1: first widget
+                    xref2, w2: second widget
+                    name: field name
+
+                Result:
+                    Both widgets have no "Kids". We create a new object with the
+                    name and a "Kids" array containing the widgets.
+                    Original widgets must be removed from AcroForm/Fields.
+                """
+                # make new "Parent" object
+                new = mupdf.pdf_new_dict(pdf, 5)
+                new.pdf_dict_put_text_string(PDF_NAME("T"), name)
+                kids = new.pdf_dict_put_array(PDF_NAME("Kids"), 2)
+                new_obj = mupdf.pdf_add_object(pdf, new)
+                new_obj_xref = new_obj.pdf_to_num()
+                new_ind = mupdf.pdf_new_indirect(pdf, new_obj_xref, 0)
+
+                # copy over some required source widget properties
+                ft = w1.pdf_dict_get(PDF_NAME("FT"))
+                w1.pdf_dict_del(PDF_NAME("FT"))
+                new_obj.pdf_dict_put(PDF_NAME("FT"), ft)
+
+                aa = w1.pdf_dict_get(PDF_NAME("AA"))
+                w1.pdf_dict_del(PDF_NAME("AA"))
+                new_obj.pdf_dict_put(PDF_NAME("AA"), aa)
+
+                # remove name field, insert "Parent" field in source widgets
+                w1.pdf_dict_del(PDF_NAME("T"))
+                w1.pdf_dict_put(PDF_NAME("Parent"), new_ind)
+                w2.pdf_dict_del(PDF_NAME("T"))
+                w2.pdf_dict_put(PDF_NAME("Parent"), new_ind)
+
+                # put source widgets in "kids" array
+                ind1 = mupdf.pdf_new_indirect(pdf, xref1, 0)
+                ind2 = mupdf.pdf_new_indirect(pdf, xref2, 0)
+                kids.pdf_array_push(ind1)
+                kids.pdf_array_push(ind2)
+
+                # remove source widgets from "AcroForm/Fields"
+                idx = acro_fields.pdf_array_find(ind1)
+                acro_fields.pdf_array_delete(idx)
+                idx = acro_fields.pdf_array_find(ind2)
+                acro_fields.pdf_array_delete(idx)
+
+                acro_fields.pdf_array_push(new_ind)
+
+            w1 = mupdf.pdf_load_object(pdf, xref1)
+            w2 = mupdf.pdf_load_object(pdf, xref2)
+            kids1 = w1.pdf_dict_get(PDF_NAME("Kids"))
+            kids2 = w2.pdf_dict_get(PDF_NAME("Kids"))
+
+            # check which widget has a suitable "Kids" array
+            if kids1.pdf_is_array():
+                re_target(pdf, acro_fields, xref1, kids1, xref2, kids2)  # pylint: disable=arguments-out-of-order
+            elif kids2.pdf_is_array():
+                re_target(pdf, acro_fields, xref2, kids2, xref1, kids1)  # pylint: disable=arguments-out-of-order
+            else:
+                new_target(pdf, acro_fields, xref1, w1, xref2, w2, name)  # pylint: disable=arguments-out-of-order
+
+        def get_kids(parent, kids_list):
+            """Return xref list of leaf kids for a parent.
+
+            Call with an empty list.
+            """
+            kids = mupdf.pdf_dict_get(parent, PDF_NAME("Kids"))
+            if not kids.pdf_is_array():
+                return kids_list
+            for i in range(kids.pdf_array_len()):
+                kid = kids.pdf_array_get(i)
+                if mupdf.pdf_is_dict(mupdf.pdf_dict_get(kid, PDF_NAME("Kids"))):
+                    kids_list = get_kids(kid, kids_list)
+                else:
+                    kids_list.append(kid.pdf_to_num())
+            return kids_list
+
+        def kids_xrefs(widget):
+            """Get the xref of top "Parent" and the list of leaf widgets."""
+            kids_list = []
+            parent = mupdf.pdf_dict_get(widget, PDF_NAME("Parent"))
+            parent_xref = parent.pdf_to_num()
+            if parent_xref == 0:
+                return parent_xref, kids_list
+            kids_list = get_kids(parent, kids_list)
+            return parent_xref, kids_list
+
+        def deduplicate_names(pdf, acro_fields, join_duplicates=False):
+            """Handle any widget name duplicates caused by the merge."""
+            names = {}  # key is a widget name, value a list of widgets having it.
+
+            # extract all names and widgets in "AcroForm/Fields"
+            for i in range(mupdf.pdf_array_len(acro_fields)):
+                wobject = mupdf.pdf_array_get(acro_fields, i)
+                xref = wobject.pdf_to_num()
+
+                # extract widget name and collect widget(s) using it
+                T = mupdf.pdf_dict_get_text_string(wobject, PDF_NAME("T"))
+                xrefs = names.get(T, [])
+                xrefs.append(xref)
+                names[T] = xrefs
+
+            for name, xrefs in names.items():
+                if len(xrefs) < 2:
+                    continue
+                xref0, xref1 = xrefs[:2]  # only exactly 2 should occur!
+                if join_duplicates:  # combine fields with equal names
+                    join_widgets(pdf, acro_fields, xref0, xref1, name)
+                else:  # make field names unique
+                    newname = name + f" [{xref1}]"  # append this to the name
+                    wobject = mupdf.pdf_load_object(pdf, xref1)
+                    wobject.pdf_dict_put_text_string(PDF_NAME("T"), newname)
+
+            clean_kid_parents(acro_fields)
+
+        def get_acroform(doc):
+            """Retrieve the AcroForm dictionary form a PDF."""
+            pdf = mupdf.pdf_document_from_fz_document(doc)
+            # AcroForm (= central form field info)
+            return mupdf.pdf_dict_getp(mupdf.pdf_trailer(pdf), "Root/AcroForm")
+
+        tarpdf = mupdf.pdf_document_from_fz_document(tar)
+        srcpdf = mupdf.pdf_document_from_fz_document(src)
+
+        if tar.is_form_pdf:
+            # target is a Form PDF, so use it to include source fields
+            acro = get_acroform(tar)
+            # Important arrays in AcroForm
+            acro_fields = acro.pdf_dict_get(PDF_NAME("Fields"))
+            tar_co = acro.pdf_dict_get(PDF_NAME("CO"))
+            if not tar_co.pdf_is_array():
+                tar_co = acro.pdf_dict_put_array(PDF_NAME("CO"), 5)
+        else:
+            # target is no Form PDF, so copy over source AcroForm
+            acro = mupdf.pdf_deep_copy_obj(get_acroform(src))  # make a copy
+
+            # Clear "Fields" and "CO" arrays: will be populated by page fields.
+            # This is required to avoid copying unneeded objects.
+            acro.pdf_dict_del(PDF_NAME("Fields"))
+            acro.pdf_dict_put_array(PDF_NAME("Fields"), 5)
+            acro.pdf_dict_del(PDF_NAME("CO"))
+            acro.pdf_dict_put_array(PDF_NAME("CO"), 5)
+
+            # Enrich AcroForm for copying to target
+            acro_graft = mupdf.pdf_graft_mapped_object(graftmap, acro)
+
+            # Insert AcroForm into target PDF
+            acro_tar = mupdf.pdf_add_object(tarpdf, acro_graft)
+            acro_fields = acro_tar.pdf_dict_get(PDF_NAME("Fields"))
+            tar_co = acro_tar.pdf_dict_get(PDF_NAME("CO"))
+
+            # get its xref and insert it into target catalog
+            tar_xref = acro_tar.pdf_to_num()
+            acro_tar_ind = mupdf.pdf_new_indirect(tarpdf, tar_xref, 0)
+            root = mupdf.pdf_dict_get(mupdf.pdf_trailer(tarpdf), PDF_NAME("Root"))
+            root.pdf_dict_put(PDF_NAME("AcroForm"), acro_tar_ind)
+
+        if from_page <= to_page:
+            src_range = range(from_page, to_page + 1)
+        else:
+            src_range = range(from_page, to_page - 1, -1)
+
+        parents = {}  # information about widget parents
+
+        # remove "P" owning page reference from all widgets of all source pages
+        for i in src_range:
+            src_page = src[i]
+            for xref in [
+                xref
+                for xref, wtype, _ in src_page.annot_xrefs()
+                if wtype == mupdf.PDF_ANNOT_WIDGET  # pylint: disable=no-member
+            ]:
+                w_obj = mupdf.pdf_load_object(srcpdf, xref)
+                w_obj.pdf_dict_del(PDF_NAME("P"))
+
+                # get the widget's parent structure
+                parent_xref, old_kids = kids_xrefs(w_obj)
+                if parent_xref:
+                    parents[parent_xref] = {
+                        "new_xref": 0,
+                        "old_kids": old_kids,
+                        "new_kids": [],
+                    }
+        # Copy over Parent widgets first - they are not page-dependent
+        for xref in parents.keys():  # pylint: disable=consider-using-dict-items
+            parent = mupdf.pdf_load_object(srcpdf, xref)
+            parent_graft = mupdf.pdf_graft_mapped_object(graftmap, parent)
+            parent_tar = mupdf.pdf_add_object(tarpdf, parent_graft)
+            kids_xrefs_new = get_kids(parent_tar, [])
+            parent_xref_new = parent_tar.pdf_to_num()
+            parent_ind = mupdf.pdf_new_indirect(tarpdf, parent_xref_new, 0)
+            acro_fields.pdf_array_push(parent_ind)
+            parents[xref]["new_xref"] = parent_xref_new
+            parents[xref]["new_kids"] = kids_xrefs_new
+
+        for i in range(len(src_range)):
+            # read first copied over page in target
+            tar_page = tar[start_at + i]
+
+            # read the original page in the source PDF
+            src_page = src[src_range[i]]
+
+            # now walk through source page widgets and copy over
+            w_xrefs = [  # widget xrefs of the source page
+                xref
+                for xref, wtype, _ in src_page.annot_xrefs()
+                if wtype == mupdf.PDF_ANNOT_WIDGET  # pylint: disable=no-member
+            ]
+            if not w_xrefs:  # no widgets on this source page
+                continue
+
+            # convert to formal PDF page
+            tar_page_pdf = mupdf.pdf_page_from_fz_page(tar_page)
+
+            # extract annotations array
+            tar_annots = mupdf.pdf_dict_get(tar_page_pdf.obj(), PDF_NAME("Annots"))
+            if not mupdf.pdf_is_array(tar_annots):
+                tar_annots = mupdf.pdf_dict_put_array(
+                    tar_page_pdf.obj(), PDF_NAME("Annots"), 5
+                )
+
+            for xref in w_xrefs:
+                w_obj = mupdf.pdf_load_object(srcpdf, xref)
+
+                # check if field takes part in inter-field validations
+                is_aac = mupdf.pdf_is_dict(mupdf.pdf_dict_getp(w_obj, "AA/C"))
+
+                # check if parent of widget already in target
+                parent_xref = mupdf.pdf_to_num(
+                    w_obj.pdf_dict_get(PDF_NAME("Parent"))
+                )
+                if parent_xref == 0:  # parent not in target yet
+                    try:
+                        w_obj_graft = mupdf.pdf_graft_mapped_object(graftmap, w_obj)
+                    except Exception as e:
+                        message_warning(f"cannot copy widget at {xref=}: {e}")
+                        continue
+                    w_obj_tar = mupdf.pdf_add_object(tarpdf, w_obj_graft)
+                    tar_xref = w_obj_tar.pdf_to_num()
+                    w_obj_tar_ind = mupdf.pdf_new_indirect(tarpdf, tar_xref, 0)
+                    mupdf.pdf_array_push(tar_annots, w_obj_tar_ind)
+                    mupdf.pdf_array_push(acro_fields, w_obj_tar_ind)
+                else:
+                    parent = parents[parent_xref]
+                    idx = parent["old_kids"].index(xref)  # search for xref in parent
+                    tar_xref = parent["new_kids"][idx]
+                    w_obj_tar_ind = mupdf.pdf_new_indirect(tarpdf, tar_xref, 0)
+                    mupdf.pdf_array_push(tar_annots, w_obj_tar_ind)
+
+                # Into "AcroForm/CO" if a computation field.
+                if is_aac:
+                    mupdf.pdf_array_push(tar_co, w_obj_tar_ind)
+
+        deduplicate_names(tarpdf, acro_fields, join_duplicates=join_duplicates)
+
     def _embeddedFileGet(self, idx):
         pdf = _as_pdf_document(self)
         names = mupdf.pdf_dict_getl(
@@ -4266,6 +4728,107 @@
 
         self._reset_page_refs()
 
+    def get_char_widths(
+            doc: 'Document',
+            xref: int,
+            limit: int = 256,
+            idx: int = 0,
+            fontdict: OptDict = None,
+            ) -> list:
+        """Get list of glyph information of a font.
+
+        Notes:
+            Must be provided by its XREF number. If we already dealt with the
+            font, it will be recorded in doc.FontInfos. Otherwise we insert an
+            entry there.
+            Finally we return the glyphs for the font. This is a list of
+            (glyph, width) where glyph is an integer controlling the char
+            appearance, and width is a float controlling the char's spacing:
+            width * fontsize is the actual space.
+            For 'simple' fonts, glyph == ord(char) will usually be true.
+            Exceptions are 'Symbol' and 'ZapfDingbats'. We are providing data for these directly here.
+        """
+        fontinfo = CheckFontInfo(doc, xref)
+        if fontinfo is None:  # not recorded yet: create it
+            if fontdict is None:
+                name, ext, stype, asc, dsc = utils._get_font_properties(doc, xref)
+                fontdict = {
+                    "name": name,
+                    "type": stype,
+                    "ext": ext,
+                    "ascender": asc,
+                    "descender": dsc,
+                }
+            else:
+                name = fontdict["name"]
+                ext = fontdict["ext"]
+                stype = fontdict["type"]
+                ordering = fontdict["ordering"]
+                simple = fontdict["simple"]
+
+            if ext == "":
+                raise ValueError("xref is not a font")
+
+            # check for 'simple' fonts
+            if stype in ("Type1", "MMType1", "TrueType"):
+                simple = True
+            else:
+                simple = False
+
+            # check for CJK fonts
+            if name in ("Fangti", "Ming"):
+                ordering = 0
+            elif name in ("Heiti", "Song"):
+                ordering = 1
+            elif name in ("Gothic", "Mincho"):
+                ordering = 2
+            elif name in ("Dotum", "Batang"):
+                ordering = 3
+            else:
+                ordering = -1
+
+            fontdict["simple"] = simple
+
+            if name == "ZapfDingbats":
+                glyphs = zapf_glyphs
+            elif name == "Symbol":
+                glyphs = symbol_glyphs
+            else:
+                glyphs = None
+
+            fontdict["glyphs"] = glyphs
+            fontdict["ordering"] = ordering
+            fontinfo = [xref, fontdict]
+            doc.FontInfos.append(fontinfo)
+        else:
+            fontdict = fontinfo[1]
+            glyphs = fontdict["glyphs"]
+            simple = fontdict["simple"]
+            ordering = fontdict["ordering"]
+
+        if glyphs is None:
+            oldlimit = 0
+        else:
+            oldlimit = len(glyphs)
+
+        mylimit = max(256, limit)
+
+        if mylimit <= oldlimit:
+            return glyphs
+
+        if ordering < 0:  # not a CJK font
+            glyphs = doc._get_char_widths(
+                xref, fontdict["name"], fontdict["ext"], fontdict["ordering"], mylimit, idx
+            )
+        else:  # CJK fonts use char codes and width = 1
+            glyphs = None
+
+        fontdict["glyphs"] = glyphs
+        fontinfo[1] = fontdict
+        UpdateFontInfo(doc, fontinfo)
+
+        return glyphs
+
     def get_layer(self, config=-1):
         """Content of ON, OFF, RBGroups of an OC layer."""
         pdf = _as_pdf_document(self)
@@ -4323,6 +4886,23 @@
         xref = mupdf.pdf_create_object(pdf)
         return xref
 
+    def get_oc(doc: 'Document', xref: int) -> int:
+        """Return optional content object xref for an image or form xobject.
+
+        Args:
+            xref: (int) xref number of an image or form xobject.
+        """
+        if doc.is_closed or doc.is_encrypted:
+            raise ValueError("document close or encrypted")
+        t, name = doc.xref_get_key(xref, "Subtype")
+        if t != "name" or name not in ("/Image", "/Form"):
+            raise ValueError("bad object type at xref %i" % xref)
+        t, oc = doc.xref_get_key(xref, "OC")
+        if t != "xref":
+            return 0
+        rc = int(oc.replace("0 R", ""))
+        return rc
+    
     def get_ocgs(self):
         """Show existing optional content groups."""
         ci = mupdf.pdf_new_name( "CreatorInfo")
@@ -4355,7 +4935,11 @@
                         o = mupdf.pdf_array_get( intent, j)
                         if mupdf.pdf_is_name( o):
                             intents.append( mupdf.pdf_to_name( o))
-            hidden = mupdf.pdf_is_ocg_hidden( pdf, mupdf.PdfObj(), usage, ocg)
+            if mupdf_version_tuple >= (1, 27):
+                resource_stack = mupdf.PdfResourceStack()
+                hidden = mupdf.pdf_is_ocg_hidden( pdf, resource_stack, usage, ocg)
+            else:
+                hidden = mupdf.pdf_is_ocg_hidden( pdf, mupdf.PdfObj(), usage, ocg)
             item = {
                     "name": name,
                     "intent": intents,
@@ -4366,6 +4950,73 @@
             rc[ temp] = item
         return rc
 
+    def get_ocmd(doc: 'Document', xref: int) -> dict:
+        """Return the definition of an OCMD (optional content membership dictionary).
+
+        Recognizes PDF dict keys /OCGs (PDF array of OCGs), /P (policy string) and
+        /VE (visibility expression, PDF array). Via string manipulation, this
+        info is converted to a Python dictionary with keys "xref", "ocgs", "policy"
+        and "ve" - ready to recycle as input for 'set_ocmd()'.
+        """
+
+        if xref not in range(doc.xref_length()):
+            raise ValueError("bad xref")
+        text = doc.xref_object(xref, compressed=True)
+        if "/Type/OCMD" not in text:
+            raise ValueError("bad object type")
+        textlen = len(text)
+
+        p0 = text.find("/OCGs[")  # look for /OCGs key
+        p1 = text.find("]", p0)
+        if p0 < 0 or p1 < 0:  # no OCGs found
+            ocgs = None
+        else:
+            ocgs = text[p0 + 6 : p1].replace("0 R", " ").split()
+            ocgs = list(map(int, ocgs))
+
+        p0 = text.find("/P/")  # look for /P policy key
+        if p0 < 0:
+            policy = None
+        else:
+            p1 = text.find("ff", p0)
+            if p1 < 0:
+                p1 = text.find("on", p0)
+            if p1 < 0:  # some irregular syntax
+                raise ValueError("bad object at xref")
+            else:
+                policy = text[p0 + 3 : p1 + 2]
+
+        p0 = text.find("/VE[")  # look for /VE visibility expression key
+        if p0 < 0:  # no visibility expression found
+            ve = None
+        else:
+            lp = rp = 0  # find end of /VE by finding last ']'.
+            p1 = p0
+            while lp < 1 or lp != rp:
+                p1 += 1
+                if not p1 < textlen:  # some irregular syntax
+                    raise ValueError("bad object at xref")
+                if text[p1] == "[":
+                    lp += 1
+                if text[p1] == "]":
+                    rp += 1
+            # p1 now positioned at the last "]"
+            ve = text[p0 + 3 : p1 + 1]  # the PDF /VE array
+            ve = (
+                ve.replace("/And", '"and",')
+                .replace("/Not", '"not",')
+                .replace("/Or", '"or",')
+            )
+            ve = ve.replace(" 0 R]", "]").replace(" 0 R", ",").replace("][", "],[")
+            import json
+            try:
+                ve = json.loads(ve)
+            except Exception:
+                exception_info()
+                message(f"bad /VE key: {ve!r}")
+                raise
+        return {"xref": xref, "ocgs": ocgs, "policy": policy, "ve": ve}
+
     def get_outline_xrefs(self):
         """Get list of outline xref numbers."""
         xrefs = []
@@ -4414,6 +5065,98 @@
             return [v[:-1] for v in val]
         return val
 
+    def get_page_labels(self):
+        """Return page label definitions in PDF document.
+
+        Returns:
+            A list of dictionaries with the following format:
+            {'startpage': int, 'prefix': str, 'style': str, 'firstpagenum': int}.
+        """
+        # Jorj McKie, 2021-01-10
+        return [utils.rule_dict(item) for item in self._get_page_labels()]
+
+    def get_page_numbers(doc, label, only_one=False):
+        """Return a list of page numbers with the given label.
+
+        Args:
+            doc: PDF document object (resp. 'self').
+            label: (str) label.
+            only_one: (bool) stop searching after first hit.
+        Returns:
+            List of page numbers having this label.
+        """
+        # Jorj McKie, 2021-01-06
+
+        numbers = []
+        if not label:
+            return numbers
+        labels = doc._get_page_labels()
+        if labels == []:
+            return numbers
+        for i in range(doc.page_count):
+            plabel = utils.get_label_pno(i, labels)
+            if plabel == label:
+                numbers.append(i)
+                if only_one:
+                    break
+        return numbers
+    
+    def get_page_pixmap(
+            doc: 'Document',
+            pno: int,
+            *,
+            matrix: matrix_like = None,
+            dpi=None,
+            colorspace: Colorspace = None,
+            clip: rect_like = None,
+            alpha: bool = False,
+            annots: bool = True,
+            ) -> 'Pixmap':
+        """Create pixmap of document page by page number.
+
+        Notes:
+            Convenience function calling page.get_pixmap.
+        Args:
+            pno: (int) page number
+            matrix: pymupdf.Matrix for transformation (default: pymupdf.Identity).
+            colorspace: (str,pymupdf.Colorspace) rgb, rgb, gray - case ignored, default csRGB.
+            clip: (irect-like) restrict rendering to this area.
+            alpha: (bool) include alpha channel
+            annots: (bool) also render annotations
+        """
+        if matrix is None:
+            matrix = Identity
+        if colorspace is None:
+            colorspace = csRGB
+        return doc[pno].get_pixmap(
+                matrix=matrix,
+                dpi=dpi, colorspace=colorspace,
+                clip=clip,
+                alpha=alpha,
+                annots=annots
+                )
+    
+    def get_page_text(
+            doc: 'Document',
+            pno: int,
+            option: str = "text",
+            clip: rect_like = None,
+            flags: OptInt = None,
+            textpage: 'TextPage' = None,
+            sort: bool = False,
+            ) -> typing.Any:
+        """Extract a document page's text by page number.
+
+        Notes:
+            Convenience function calling page.get_text().
+        Args:
+            pno: page number
+            option: (str) text, words, blocks, html, dict, json, rawdict, xhtml or xml.
+        Returns:
+            output from page.TextPage().
+        """
+        return doc[pno].get_text(option, clip=clip, flags=flags, sort=sort)
+    
     def get_page_xobjects(self, pno: int) -> list:
         """Retrieve a list of XObjects used on a page.
         """
@@ -4440,6 +5183,60 @@
             sigflag = mupdf.pdf_to_int(sigflags)
         return sigflag
 
+    def get_toc(
+            doc: 'Document',
+            simple: bool = True,
+            ) -> list:
+        """Create a table of contents.
+
+        Args:
+            simple: a bool to control output. Returns a list, where each entry consists of outline level, title, page number and link destination (if simple = False). For details see PyMuPDF's documentation.
+        """
+        def recurse(olItem, liste, lvl):
+            """Recursively follow the outline item chain and record item information in a list."""
+            while olItem and olItem.this.m_internal:
+                if olItem.title:
+                    title = olItem.title
+                else:
+                    title = " "
+
+                if not olItem.is_external:
+                    if olItem.uri:
+                        if olItem.page == -1:
+                            resolve = doc.resolve_link(olItem.uri)
+                            page = resolve[0] + 1
+                        else:
+                            page = olItem.page + 1
+                    else:
+                        page = -1
+                else:
+                    page = -1
+
+                if not simple:
+                    link = utils.getLinkDict(olItem, doc)
+                    liste.append([lvl, title, page, link])
+                else:
+                    liste.append([lvl, title, page])
+
+                if olItem.down:
+                    liste = recurse(olItem.down, liste, lvl + 1)
+                olItem = olItem.next
+            return liste
+
+        # ensure document is open
+        if doc.is_closed:
+            raise ValueError("document closed")
+        doc.init_doc()
+        olItem = doc.outline
+        if not olItem:
+            return []
+        lvl = 1
+        liste = []
+        toc = recurse(olItem, liste, lvl)
+        if doc.is_pdf and not simple:
+            doc._extend_toc_items(toc)
+        return toc
+    
     def get_xml_metadata(self):
         """Get document XML metadata."""
         xml = None
@@ -4457,6 +5254,31 @@
             rc = ''
         return rc
 
+    def has_annots(doc: 'Document') -> bool:
+        """Check whether there are annotations on any page."""
+        if doc.is_closed:
+            raise ValueError("document closed")
+        if not doc.is_pdf:
+            raise ValueError("is no PDF")
+        for i in range(doc.page_count):
+            for item in doc.page_annot_xrefs(i):
+                # pylint: disable=no-member
+                if not (item[1] == mupdf.PDF_ANNOT_LINK or item[1] == mupdf.PDF_ANNOT_WIDGET):  # pylint: disable=no-member
+                    return True
+        return False
+    
+    def has_links(doc: 'Document') -> bool:
+        """Check whether there are links on any page."""
+        if doc.is_closed:
+            raise ValueError("document closed")
+        if not doc.is_pdf:
+            raise ValueError("is no PDF")
+        for i in range(doc.page_count):
+            for item in doc.page_annot_xrefs(i):
+                if item[1] == mupdf.PDF_ANNOT_LINK:  # pylint: disable=no-member
+                    return True
+        return False
+    
     def init_doc(self):
         if self.is_encrypted:
             raise ValueError("cannot initialize - document still encrypted")
@@ -4522,6 +5344,36 @@
                 final=final,
                 )
 
+    def insert_page(
+            doc: 'Document',
+            pno: int,
+            text: typing.Union[str, list, None] = None,
+            fontsize: float = 11,
+            width: float = 595,
+            height: float = 842,
+            fontname: str = "helv",
+            fontfile: OptStr = None,
+            color: OptSeq = (0,),
+            ) -> int:
+        """Create a new PDF page and insert some text.
+
+        Notes:
+            Function combining pymupdf.Document.new_page() and pymupdf.Page.insert_text().
+            For parameter details see these methods.
+        """
+        page = doc.new_page(pno=pno, width=width, height=height)
+        if not bool(text):
+            return 0
+        rc = page.insert_text(
+            (50, 72),
+            text,
+            fontsize=fontsize,
+            fontname=fontname,
+            fontfile=fontfile,
+            color=color,
+        )
+        return rc
+    
     def insert_pdf(
             self,
             docsrc,
@@ -5022,6 +5874,24 @@
         ret = mupdf.fz_needs_password( document)
         return ret
 
+    def new_page(
+            doc: 'Document',
+            pno: int = -1,
+            width: float = 595,
+            height: float = 842,
+            ) -> Page:
+        """Create and return a new page object.
+
+        Args:
+            pno: (int) insert before this page. Default: after last page.
+            width: (float) page width in points. Default: 595 (ISO A4 width).
+            height: (float) page height in points. Default 842 (ISO A4 height).
+        Returns:
+            A pymupdf.Page object.
+        """
+        doc._newPage(pno, width=width, height=height)
+        return doc[pno]
+    
     def next_location(self, page_id):
         """Get (chapter, page) of next page."""
         if self.is_closed or self.is_encrypted:
@@ -5668,6 +6538,201 @@
         """ Save PDF incrementally"""
         return self.save(self.name, incremental=True, encryption=mupdf.PDF_ENCRYPT_KEEP)
 
+    # ------------------------------------------------------------------------------
+    # Remove potentially sensitive data from a PDF. Similar to the Adobe
+    # Acrobat 'sanitize' function
+    # ------------------------------------------------------------------------------
+    def scrub(
+            doc: 'Document',
+            attached_files: bool = True,
+            clean_pages: bool = True,
+            embedded_files: bool = True,
+            hidden_text: bool = True,
+            javascript: bool = True,
+            metadata: bool = True,
+            redactions: bool = True,
+            redact_images: int = 0,
+            remove_links: bool = True,
+            reset_fields: bool = True,
+            reset_responses: bool = True,
+            thumbnails: bool = True,
+            xml_metadata: bool = True,
+            ) -> None:
+        
+        def remove_hidden(cont_lines):
+            """Remove hidden text from a PDF page.
+
+            Args:
+                cont_lines: list of lines with /Contents content. Should have status
+                    from after page.cleanContents().
+
+            Returns:
+                List of /Contents lines from which hidden text has been removed.
+
+            Notes:
+                The input must have been created after the page's /Contents object(s)
+                have been cleaned with page.cleanContents(). This ensures a standard
+                formatting: one command per line, single spaces between operators.
+                This allows for drastic simplification of this code.
+            """
+            out_lines = []  # will return this
+            in_text = False  # indicate if within BT/ET object
+            suppress = False  # indicate text suppression active
+            make_return = False
+            for line in cont_lines:
+                if line == b"BT":  # start of text object
+                    in_text = True  # switch on
+                    out_lines.append(line)  # output it
+                    continue
+                if line == b"ET":  # end of text object
+                    in_text = False  # switch off
+                    out_lines.append(line)  # output it
+                    continue
+                if line == b"3 Tr":  # text suppression operator
+                    suppress = True  # switch on
+                    make_return = True
+                    continue
+                if line[-2:] == b"Tr" and line[0] != b"3":
+                    suppress = False  # text rendering changed
+                    out_lines.append(line)
+                    continue
+                if line == b"Q":  # unstack command also switches off
+                    suppress = False
+                    out_lines.append(line)
+                    continue
+                if suppress and in_text:  # suppress hidden lines
+                    continue
+                out_lines.append(line)
+            if make_return:
+                return out_lines
+            else:
+                return None
+
+        if not doc.is_pdf:  # only works for PDF
+            raise ValueError("is no PDF")
+        if doc.is_encrypted or doc.is_closed:
+            raise ValueError("closed or encrypted doc")
+
+        if not clean_pages:
+            hidden_text = False
+            redactions = False
+
+        if metadata:
+            doc.set_metadata({})  # remove standard metadata
+
+        for page in doc:
+            if reset_fields:
+                # reset form fields (widgets)
+                for widget in page.widgets():
+                    widget.reset()
+
+            if remove_links:
+                links = page.get_links()  # list of all links on page
+                for link in links:  # remove all links
+                    page.delete_link(link)
+
+            found_redacts = False
+            for annot in page.annots():
+                if annot.type[0] == mupdf.PDF_ANNOT_FILE_ATTACHMENT and attached_files:
+                    annot.update_file(buffer_=b" ")  # set file content to empty
+                if reset_responses:
+                    annot.delete_responses()
+                if annot.type[0] == mupdf.PDF_ANNOT_REDACT:  # pylint: disable=no-member
+                    found_redacts = True
+
+            if redactions and found_redacts:
+                page.apply_redactions(images=redact_images)
+
+            if not (clean_pages or hidden_text):
+                continue  # done with the page
+
+            page.clean_contents()
+            if not page.get_contents():
+                continue
+            if hidden_text:
+                xrefs = page.get_contents()
+                assert len(xrefs) == 1  # only one because of cleaning.
+                xref = xrefs[0]
+                cont = doc.xref_stream(xref)
+                cont_lines = remove_hidden(cont.splitlines())  # remove hidden text
+                if cont_lines:  # something was actually removed
+                    cont = b"\n".join(cont_lines)
+                    doc.update_stream(xref, cont)  # rewrite the page /Contents
+
+            if thumbnails:  # remove page thumbnails?
+                if doc.xref_get_key(page.xref, "Thumb")[0] != "null":
+                    doc.xref_set_key(page.xref, "Thumb", "null")
+
+        # pages are scrubbed, now perform document-wide scrubbing
+        # remove embedded files
+        if embedded_files:
+            for name in doc.embfile_names():
+                doc.embfile_del(name)
+
+        if xml_metadata:
+            doc.del_xml_metadata()
+        if not (xml_metadata or javascript):
+            xref_limit = 0
+        else:
+            xref_limit = doc.xref_length()
+        for xref in range(1, xref_limit):
+            if not doc.xref_object(xref):
+                msg = "bad xref %i - clean PDF before scrubbing" % xref
+                raise ValueError(msg)
+            if javascript and doc.xref_get_key(xref, "S")[1] == "/JavaScript":
+                # a /JavaScript action object
+                obj = "<</S/JavaScript/JS()>>"  # replace with a null JavaScript
+                doc.update_object(xref, obj)  # update this object
+                continue  # no further handling
+
+            if not xml_metadata:
+                continue
+
+            if doc.xref_get_key(xref, "Type")[1] == "/Metadata":
+                # delete any metadata object directly
+                doc.update_object(xref, "<<>>")
+                doc.update_stream(xref, b"deleted", new=True)
+                continue
+
+            if doc.xref_get_key(xref, "Metadata")[0] != "null":
+                doc.xref_set_key(xref, "Metadata", "null")
+    
+    def search_page_for(
+            doc: 'Document',
+            pno: int,
+            text: str,
+            quads: bool = False,
+            clip: rect_like = None,
+            flags: int = None,
+            textpage: 'TextPage' = None,
+            ) -> list:
+        """Search for a string on a page.
+
+        Args:
+            pno: page number
+            text: string to be searched for
+            clip: restrict search to this rectangle
+            quads: (bool) return quads instead of rectangles
+            flags: bit switches, default: join hyphened words
+            textpage: reuse a prepared textpage
+        Returns:
+            a list of rectangles or quads, each containing an occurrence.
+        """
+        if flags is None:
+            flags = (0
+                    | TEXT_DEHYPHENATE
+                    | TEXT_PRESERVE_LIGATURES
+                    | TEXT_PRESERVE_WHITESPACE
+                    | TEXT_MEDIABOX_CLIP
+                    )
+        return doc[pno].search_for(
+            text,
+            quads=quads,
+            clip=clip,
+            flags=flags,
+            textpage=textpage,
+        )
+    
     def select(self, pyliste):
         """Build sub-pdf with page numbers in the list."""
         if self.is_closed or self.is_encrypted:
@@ -5812,6 +6877,162 @@
         self.xref_set_key(xref, "MarkInfo", pdfdict)
         return True
 
+    def set_metadata(doc: 'Document', m: dict = None) -> None:
+        """Update the PDF /Info object.
+
+        Args:
+            m: a dictionary like doc.metadata.
+        """
+        if not doc.is_pdf:
+            raise ValueError("is no PDF")
+        if doc.is_closed or doc.is_encrypted:
+            raise ValueError("document closed or encrypted")
+        if m is None:
+            m = {}
+        elif type(m) is not dict:
+            raise ValueError("bad metadata")
+        keymap = {
+            "author": "Author",
+            "producer": "Producer",
+            "creator": "Creator",
+            "title": "Title",
+            "format": None,
+            "encryption": None,
+            "creationDate": "CreationDate",
+            "modDate": "ModDate",
+            "subject": "Subject",
+            "keywords": "Keywords",
+            "trapped": "Trapped",
+        }
+        valid_keys = set(keymap.keys())
+        diff_set = set(m.keys()).difference(valid_keys)
+        if diff_set != set():
+            msg = "bad dict key(s): %s" % diff_set
+            raise ValueError(msg)
+
+        t, temp = doc.xref_get_key(-1, "Info")
+        if t != "xref":
+            info_xref = 0
+        else:
+            info_xref = int(temp.replace("0 R", ""))
+
+        if m == {} and info_xref == 0:  # nothing to do
+            return
+
+        if info_xref == 0:  # no prev metadata: get new xref
+            info_xref = doc.get_new_xref()
+            doc.update_object(info_xref, "<<>>")  # fill it with empty object
+            doc.xref_set_key(-1, "Info", "%i 0 R" % info_xref)
+        elif m == {}:  # remove existing metadata
+            doc.xref_set_key(-1, "Info", "null")
+            doc.init_doc()
+            return
+
+        for key, val in [(k, v) for k, v in m.items() if keymap[k] is not None]:
+            pdf_key = keymap[key]
+            if not bool(val) or val in ("none", "null"):
+                val = "null"
+            else:
+                val = get_pdf_str(val)
+            doc.xref_set_key(info_xref, pdf_key, val)
+        doc.init_doc()
+        return
+
+    def set_oc(doc: 'Document', xref: int, oc: int) -> None:
+        """Attach optional content object to image or form xobject.
+
+        Args:
+            xref: (int) xref number of an image or form xobject
+            oc: (int) xref number of an OCG or OCMD
+        """
+        if doc.is_closed or doc.is_encrypted:
+            raise ValueError("document close or encrypted")
+        t, name = doc.xref_get_key(xref, "Subtype")
+        if t != "name" or name not in ("/Image", "/Form"):
+            raise ValueError("bad object type at xref %i" % xref)
+        if oc > 0:
+            t, name = doc.xref_get_key(oc, "Type")
+            if t != "name" or name not in ("/OCG", "/OCMD"):
+                raise ValueError("bad object type at xref %i" % oc)
+        if oc == 0 and "OC" in doc.xref_get_keys(xref):
+            doc.xref_set_key(xref, "OC", "null")
+            return None
+        doc.xref_set_key(xref, "OC", "%i 0 R" % oc)
+        return None
+
+    def set_ocmd(
+            doc: 'Document',
+            xref: int = 0,
+            ocgs: typing.Union[list, None] = None,
+            policy: OptStr = None,
+            ve: typing.Union[list, None] = None,
+            ) -> int:
+        """Create or update an OCMD object in a PDF document.
+
+        Args:
+            xref: (int) 0 for creating a new object, otherwise update existing one.
+            ocgs: (list) OCG xref numbers, which shall be subject to 'policy'.
+            policy: one of 'AllOn', 'AllOff', 'AnyOn', 'AnyOff' (any casing).
+            ve: (list) visibility expression. Use instead of 'ocgs' with 'policy'.
+
+        Returns:
+            Xref of the created or updated OCMD.
+        """
+
+        all_ocgs = set(doc.get_ocgs().keys())
+
+        def ve_maker(ve):
+            if type(ve) not in (list, tuple) or len(ve) < 2:
+                raise ValueError("bad 've' format: %s" % ve)
+            if ve[0].lower() not in ("and", "or", "not"):
+                raise ValueError("bad operand: %s" % ve[0])
+            if ve[0].lower() == "not" and len(ve) != 2:
+                raise ValueError("bad 've' format: %s" % ve)
+            item = "[/%s" % ve[0].title()
+            for x in ve[1:]:
+                if type(x) is int:
+                    if x not in all_ocgs:
+                        raise ValueError("bad OCG %i" % x)
+                    item += " %i 0 R" % x
+                else:
+                    item += " %s" % ve_maker(x)
+            item += "]"
+            return item
+
+        text = "<</Type/OCMD"
+
+        if ocgs and type(ocgs) in (list, tuple):  # some OCGs are provided
+            s = set(ocgs).difference(all_ocgs)  # contains illegal xrefs
+            if s != set():
+                msg = "bad OCGs: %s" % s
+                raise ValueError(msg)
+            text += "/OCGs[" + " ".join(map(lambda x: "%i 0 R" % x, ocgs)) + "]"
+
+        if policy:
+            policy = str(policy).lower()
+            pols = {
+                "anyon": "AnyOn",
+                "allon": "AllOn",
+                "anyoff": "AnyOff",
+                "alloff": "AllOff",
+            }
+            if policy not in ("anyon", "allon", "anyoff", "alloff"):
+                raise ValueError("bad policy: %s" % policy)
+            text += "/P/%s" % pols[policy]
+
+        if ve:
+            text += "/VE%s" % ve_maker(ve)
+
+        text += ">>"
+
+        # make new object or replace old OCMD (check type first)
+        if xref == 0:
+            xref = doc.get_new_xref()
+        elif "/Type/OCMD" not in doc.xref_object(xref, compressed=True):
+            raise ValueError("bad xref or not an OCMD")
+        doc.update_object(xref, text)
+        return xref
+
     def set_pagelayout(self, pagelayout: str):
         """Set the PDF PageLayout value."""
         valid = ("SinglePage", "OneColumn", "TwoColumnLeft", "TwoColumnRight", "TwoPageLeft", "TwoPageRight")
@@ -5844,6 +7065,349 @@
                 return True
         raise ValueError("bad PageMode value")
 
+    def set_page_labels(doc, labels):
+        """Add / replace page label definitions in PDF document.
+
+        Args:
+            doc: PDF document (resp. 'self').
+            labels: list of label dictionaries like:
+            {'startpage': int, 'prefix': str, 'style': str, 'firstpagenum': int},
+            as returned by get_page_labels().
+        """
+        # William Chapman, 2021-01-06
+
+        def create_label_str(label):
+            """Convert Python label dict to corresponding PDF rule string.
+
+            Args:
+                label: (dict) build rule for the label.
+            Returns:
+                PDF label rule string wrapped in "<<", ">>".
+            """
+            s = "%i<<" % label["startpage"]
+            if label.get("prefix", "") != "":
+                s += "/P(%s)" % label["prefix"]
+            if label.get("style", "") != "":
+                s += "/S/%s" % label["style"]
+            if label.get("firstpagenum", 1) > 1:
+                s += "/St %i" % label["firstpagenum"]
+            s += ">>"
+            return s
+
+        def create_nums(labels):
+            """Return concatenated string of all labels rules.
+
+            Args:
+                labels: (list) dictionaries as created by function 'rule_dict'.
+            Returns:
+                PDF compatible string for page label definitions, ready to be
+                enclosed in PDF array 'Nums[...]'.
+            """
+            labels.sort(key=lambda x: x["startpage"])
+            s = "".join([create_label_str(label) for label in labels])
+            return s
+
+        doc._set_page_labels(create_nums(labels))
+
+    def set_toc(
+            doc: 'Document',
+            toc: list,
+            collapse: int = 1,
+            ) -> int:
+        """Create new outline tree (table of contents, TOC).
+
+        Args:
+            toc: (list, tuple) each entry must contain level, title, page and
+                optionally top margin on the page. None or '()' remove the TOC.
+            collapse: (int) collapses entries beyond this level. Zero or None
+                shows all entries unfolded.
+        Returns:
+            the number of inserted items, or the number of removed items respectively.
+        """
+        if doc.is_closed or doc.is_encrypted:
+            raise ValueError("document closed or encrypted")
+        if not doc.is_pdf:
+            raise ValueError("is no PDF")
+        if not toc:  # remove all entries
+            return len(doc._delToC())
+
+        # validity checks --------------------------------------------------------
+        if type(toc) not in (list, tuple):
+            raise ValueError("'toc' must be list or tuple")
+        toclen = len(toc)
+        page_count = doc.page_count
+        t0 = toc[0]
+        if type(t0) not in (list, tuple):
+            raise ValueError("items must be sequences of 3 or 4 items")
+        if t0[0] != 1:
+            raise ValueError("hierarchy level of item 0 must be 1")
+        for i in list(range(toclen - 1)):
+            t1 = toc[i]
+            t2 = toc[i + 1]
+            if not -1 <= t1[2] <= page_count:
+                raise ValueError("row %i: page number out of range" % i)
+            if (type(t2) not in (list, tuple)) or len(t2) not in (3, 4):
+                raise ValueError("bad row %i" % (i + 1))
+            if (type(t2[0]) is not int) or t2[0] < 1:
+                raise ValueError("bad hierarchy level in row %i" % (i + 1))
+            if t2[0] > t1[0] + 1:
+                raise ValueError("bad hierarchy level in row %i" % (i + 1))
+        # no formal errors in toc --------------------------------------------------
+
+        # --------------------------------------------------------------------------
+        # make a list of xref numbers, which we can use for our TOC entries
+        # --------------------------------------------------------------------------
+        old_xrefs = doc._delToC()  # del old outlines, get their xref numbers
+
+        # prepare table of xrefs for new bookmarks
+        old_xrefs = []
+        xref = [0] + old_xrefs
+        xref[0] = doc._getOLRootNumber()  # entry zero is outline root xref number
+        if toclen > len(old_xrefs):  # too few old xrefs?
+            for i in range((toclen - len(old_xrefs))):
+                xref.append(doc.get_new_xref())  # acquire new ones
+
+        lvltab = {0: 0}  # to store last entry per hierarchy level
+
+        # ------------------------------------------------------------------------------
+        # contains new outline objects as strings - first one is the outline root
+        # ------------------------------------------------------------------------------
+        olitems = [{"count": 0, "first": -1, "last": -1, "xref": xref[0]}]
+        # ------------------------------------------------------------------------------
+        # build olitems as a list of PDF-like connected dictionaries
+        # ------------------------------------------------------------------------------
+        for i in range(toclen):
+            o = toc[i]
+            lvl = o[0]  # level
+            title = get_pdf_str(o[1])  # title
+            pno = min(doc.page_count - 1, max(0, o[2] - 1))  # page number
+            page_xref = doc.page_xref(pno)
+            page_height = doc.page_cropbox(pno).height
+            top = Point(72, page_height - 36)
+            dest_dict = {"to": top, "kind": LINK_GOTO}  # fall back target
+            if o[2] < 0:
+                dest_dict["kind"] = LINK_NONE
+            if len(o) > 3:  # some target is specified
+                if type(o[3]) in (int, float):  # convert a number to a point
+                    dest_dict["to"] = Point(72, page_height - o[3])
+                else:  # if something else, make sure we have a dict
+                    # We make a copy of o[3] to avoid modifying our caller's data.
+                    dest_dict = o[3].copy() if type(o[3]) is dict else dest_dict
+                    if "to" not in dest_dict:  # target point not in dict?
+                        dest_dict["to"] = top  # put default in
+                    else:  # transform target to PDF coordinates
+                        page = doc[pno]
+                        point = Point(dest_dict["to"])
+                        point.y = page.cropbox.height - point.y
+                        point = point * page.rotation_matrix
+                        dest_dict["to"] = (point.x, point.y)
+            d = {}
+            d["first"] = -1
+            d["count"] = 0
+            d["last"] = -1
+            d["prev"] = -1
+            d["next"] = -1
+            d["dest"] = utils.getDestStr(page_xref, dest_dict)
+            d["top"] = dest_dict["to"]
+            d["title"] = title
+            d["parent"] = lvltab[lvl - 1]
+            d["xref"] = xref[i + 1]
+            d["color"] = dest_dict.get("color")
+            d["flags"] = dest_dict.get("italic", 0) + 2 * dest_dict.get("bold", 0)
+            lvltab[lvl] = i + 1
+            parent = olitems[lvltab[lvl - 1]]  # the parent entry
+
+            if (
+                dest_dict.get("collapse") or collapse and lvl > collapse
+            ):  # suppress expansion
+                parent["count"] -= 1  # make /Count negative
+            else:
+                parent["count"] += 1  # positive /Count
+
+            if parent["first"] == -1:
+                parent["first"] = i + 1
+                parent["last"] = i + 1
+            else:
+                d["prev"] = parent["last"]
+                prev = olitems[parent["last"]]
+                prev["next"] = i + 1
+                parent["last"] = i + 1
+            olitems.append(d)
+
+        # ------------------------------------------------------------------------------
+        # now create each outline item as a string and insert it in the PDF
+        # ------------------------------------------------------------------------------
+        for i, ol in enumerate(olitems):
+            txt = "<<"
+            if ol["count"] != 0:
+                txt += "/Count %i" % ol["count"]
+            try:
+                txt += ol["dest"]
+            except Exception:
+                # Verbose in PyMuPDF/tests.
+                if g_exceptions_verbose >= 2:   exception_info()
+                pass
+            try:
+                if ol["first"] > -1:
+                    txt += "/First %i 0 R" % xref[ol["first"]]
+            except Exception:
+                if g_exceptions_verbose >= 2:   exception_info()
+                pass
+            try:
+                if ol["last"] > -1:
+                    txt += "/Last %i 0 R" % xref[ol["last"]]
+            except Exception:
+                if g_exceptions_verbose >= 2:   exception_info()
+                pass
+            try:
+                if ol["next"] > -1:
+                    txt += "/Next %i 0 R" % xref[ol["next"]]
+            except Exception:
+                # Verbose in PyMuPDF/tests.
+                if g_exceptions_verbose >= 2:   exception_info()
+                pass
+            try:
+                if ol["parent"] > -1:
+                    txt += "/Parent %i 0 R" % xref[ol["parent"]]
+            except Exception:
+                # Verbose in PyMuPDF/tests.
+                if g_exceptions_verbose >= 2:   exception_info()
+                pass
+            try:
+                if ol["prev"] > -1:
+                    txt += "/Prev %i 0 R" % xref[ol["prev"]]
+            except Exception:
+                # Verbose in PyMuPDF/tests.
+                if g_exceptions_verbose >= 2:   exception_info()
+                pass
+            try:
+                txt += "/Title" + ol["title"]
+            except Exception:
+                # Verbose in PyMuPDF/tests.
+                if g_exceptions_verbose >= 2:   exception_info()
+                pass
+
+            if ol.get("color") and len(ol["color"]) == 3:
+                txt += f"/C[ {_format_g(tuple(ol['color']))}]"
+            if ol.get("flags", 0) > 0:
+                txt += "/F %i" % ol["flags"]
+
+            if i == 0:  # special: this is the outline root
+                txt += "/Type/Outlines"  # so add the /Type entry
+            txt += ">>"
+            doc.update_object(xref[i], txt)  # insert the PDF object
+
+        doc.init_doc()
+        return toclen
+
+    def set_toc_item(
+            doc: 'Document',
+            idx: int,
+            dest_dict: OptDict = None,
+            kind: OptInt = None,
+            pno: OptInt = None,
+            uri: OptStr = None,
+            title: OptStr = None,
+            to: point_like = None,
+            filename: OptStr = None,
+            zoom: float = 0,
+            ) -> None:
+        """Update TOC item by index.
+
+        It allows changing the item's title and link destination.
+
+        Args:
+            idx:
+                (int) desired index of the TOC list, as created by get_toc.
+            dest_dict:
+                (dict) destination dictionary as created by get_toc(False).
+                Outrules all other parameters. If None, the remaining parameters
+                are used to make a dest dictionary.
+            kind:
+                (int) kind of link (pymupdf.LINK_GOTO, etc.). If None, then only
+                the title will be updated. If pymupdf.LINK_NONE, the TOC item will
+                be deleted.
+            pno:
+                (int) page number (1-based like in get_toc). Required if
+                pymupdf.LINK_GOTO.
+            uri:
+                (str) the URL, required if pymupdf.LINK_URI.
+            title:
+                (str) the new title. No change if None.
+            to:
+                (point-like) destination on the target page. If omitted, (72, 36)
+                will be used as target coordinates.
+            filename:
+                (str) destination filename, required for pymupdf.LINK_GOTOR and
+                pymupdf.LINK_LAUNCH.
+            name:
+                (str) a destination name for pymupdf.LINK_NAMED.
+            zoom:
+                (float) a zoom factor for the target location (pymupdf.LINK_GOTO).
+        """
+        xref = doc.get_outline_xrefs()[idx]
+        page_xref = 0
+        if type(dest_dict) is dict:
+            if dest_dict["kind"] == LINK_GOTO:
+                pno = dest_dict["page"]
+                page_xref = doc.page_xref(pno)
+                page_height = doc.page_cropbox(pno).height
+                to = dest_dict.get('to', Point(72, 36))
+                to.y = page_height - to.y
+                dest_dict["to"] = to
+            action = utils.getDestStr(page_xref, dest_dict)
+            if not action.startswith("/A"):
+                raise ValueError("bad bookmark dest")
+            color = dest_dict.get("color")
+            if color:
+                color = list(map(float, color))
+                if len(color) != 3 or min(color) < 0 or max(color) > 1:
+                    raise ValueError("bad color value")
+            bold = dest_dict.get("bold", False)
+            italic = dest_dict.get("italic", False)
+            flags = italic + 2 * bold
+            collapse = dest_dict.get("collapse")
+            return doc._update_toc_item(
+                xref,
+                action=action[2:],
+                title=title,
+                color=color,
+                flags=flags,
+                collapse=collapse,
+            )
+
+        if kind == LINK_NONE:  # delete bookmark item
+            return doc.del_toc_item(idx)
+        if kind is None and title is None:  # treat as no-op
+            return None
+        if kind is None:  # only update title text
+            return doc._update_toc_item(xref, action=None, title=title)
+
+        if kind == LINK_GOTO:
+            if pno is None or pno not in range(1, doc.page_count + 1):
+                raise ValueError("bad page number")
+            page_xref = doc.page_xref(pno - 1)
+            page_height = doc.page_cropbox(pno - 1).height
+            if to is None:
+                to = Point(72, page_height - 36)
+            else:
+                to = Point(to)
+                to.y = page_height - to.y
+
+        ddict = {
+            "kind": kind,
+            "to": to,
+            "uri": uri,
+            "page": pno,
+            "file": filename,
+            "zoom": zoom,
+        }
+        action = utils.getDestStr(page_xref, ddict)
+        if action == "" or not action.startswith("/A"):
+            raise ValueError("bad bookmark dest")
+
+        return doc._update_toc_item(xref, action=action[2:], title=title)
+
     def set_xml_metadata(self, metadata):
         """Store XML document level metadata."""
         if self.is_closed or self.is_encrypted:
@@ -5862,6 +7426,318 @@
             mupdf.pdf_dict_put( xml, PDF_NAME('Subtype'), PDF_NAME('XML'))
             mupdf.pdf_dict_put( root, PDF_NAME('Metadata'), xml)
 
+    def subset_fonts(doc: 'Document', verbose: bool = False, fallback: bool = False) -> OptInt:
+        """Build font subsets in a PDF.
+
+        Eligible fonts are potentially replaced by smaller versions. Page text is
+        NOT rewritten and thus should retain properties like being hidden or
+        controlled by optional content.
+
+        This method by default uses MuPDF's own internal feature to create subset
+        fonts. As this is a new function, errors may still occur. In this case,
+        please fall back to using the previous version by using "fallback=True".
+        Fallback mode requires the external package 'fontTools'.
+
+        Args:
+            fallback: use the older deprecated implementation.
+            verbose: only used by fallback mode.
+
+        Returns:
+            The new MuPDF-based code returns None.  The deprecated fallback
+            mode returns 0 if there are no fonts to subset.  Otherwise, it
+            returns the decrease in fontsize (the difference in fontsize),
+            measured in bytes.
+        """
+        # Font binaries: -  "buffer" -> (names, xrefs, (unicodes, glyphs))
+        # An embedded font is uniquely defined by its fontbuffer only. It may have
+        # multiple names and xrefs.
+        # Once the sets of used unicodes and glyphs are known, we compute a
+        # smaller version of the buffer user package fontTools.
+
+        if not fallback:  # by default use MuPDF function
+            pdf = mupdf.pdf_document_from_fz_document(doc)
+            mupdf.pdf_subset_fonts2(pdf, list(range(doc.page_count)))
+            return
+
+        font_buffers = {}
+
+        def get_old_widths(xref):
+            """Retrieve old font '/W' and '/DW' values."""
+            df = doc.xref_get_key(xref, "DescendantFonts")
+            if df[0] != "array":  # only handle xref specifications
+                return None, None
+            df_xref = int(df[1][1:-1].replace("0 R", ""))
+            widths = doc.xref_get_key(df_xref, "W")
+            if widths[0] != "array":  # no widths key found
+                widths = None
+            else:
+                widths = widths[1]
+            dwidths = doc.xref_get_key(df_xref, "DW")
+            if dwidths[0] != "int":
+                dwidths = None
+            else:
+                dwidths = dwidths[1]
+            return widths, dwidths
+
+        def set_old_widths(xref, widths, dwidths):
+            """Restore the old '/W' and '/DW' in subsetted font.
+
+            If either parameter is None or evaluates to False, the corresponding
+            dictionary key will be set to null.
+            """
+            df = doc.xref_get_key(xref, "DescendantFonts")
+            if df[0] != "array":  # only handle xref specs
+                return None
+            df_xref = int(df[1][1:-1].replace("0 R", ""))
+            if (type(widths) is not str or not widths) and doc.xref_get_key(df_xref, "W")[
+                0
+            ] != "null":
+                doc.xref_set_key(df_xref, "W", "null")
+            else:
+                doc.xref_set_key(df_xref, "W", widths)
+            if (type(dwidths) is not str or not dwidths) and doc.xref_get_key(
+                df_xref, "DW"
+            )[0] != "null":
+                doc.xref_set_key(df_xref, "DW", "null")
+            else:
+                doc.xref_set_key(df_xref, "DW", dwidths)
+            return None
+
+        def set_subset_fontname(new_xref):
+            """Generate a name prefix to tag a font as subset.
+
+            We use a random generator to select 6 upper case ASCII characters.
+            The prefixed name must be put in the font xref as the "/BaseFont" value
+            and in the FontDescriptor object as the '/FontName' value.
+            """
+            # The following generates a prefix like 'ABCDEF+'
+            import random
+            import string
+            prefix = "".join(random.choices(tuple(string.ascii_uppercase), k=6)) + "+"
+            font_str = doc.xref_object(new_xref, compressed=True)
+            font_str = font_str.replace("/BaseFont/", "/BaseFont/" + prefix)
+            df = doc.xref_get_key(new_xref, "DescendantFonts")
+            if df[0] == "array":
+                df_xref = int(df[1][1:-1].replace("0 R", ""))
+                fd = doc.xref_get_key(df_xref, "FontDescriptor")
+                if fd[0] == "xref":
+                    fd_xref = int(fd[1].replace("0 R", ""))
+                    fd_str = doc.xref_object(fd_xref, compressed=True)
+                    fd_str = fd_str.replace("/FontName/", "/FontName/" + prefix)
+                    doc.update_object(fd_xref, fd_str)
+            doc.update_object(new_xref, font_str)
+
+        def build_subset(buffer, unc_set, gid_set):
+            """Build font subset using fontTools.
+
+            Args:
+                buffer: (bytes) the font given as a binary buffer.
+                unc_set: (set) required glyph ids.
+            Returns:
+                Either None if subsetting is unsuccessful or the subset font buffer.
+            """
+            try:
+                import fontTools.subset as fts
+            except ImportError:
+                if g_exceptions_verbose:    exception_info()
+                message("This method requires fontTools to be installed.")
+                raise
+            import tempfile
+            with tempfile.TemporaryDirectory() as tmp_dir:
+                oldfont_path = f"{tmp_dir}/oldfont.ttf"
+                newfont_path = f"{tmp_dir}/newfont.ttf"
+                uncfile_path = f"{tmp_dir}/uncfile.txt"
+                args = [
+                    oldfont_path,
+                    "--retain-gids",
+                    f"--output-file={newfont_path}",
+                    "--layout-features=*",
+                    "--passthrough-tables",
+                    "--ignore-missing-glyphs",
+                    "--ignore-missing-unicodes",
+                    "--symbol-cmap",
+                ]
+
+                # store glyph ids or unicodes as file
+                with open(f"{tmp_dir}/uncfile.txt", "w", encoding='utf8') as unc_file:
+                    if 0xFFFD in unc_set:  # error unicode exists -> use glyphs
+                        args.append(f"--gids-file={uncfile_path}")
+                        gid_set.add(189)
+                        unc_list = list(gid_set)
+                        for unc in unc_list:
+                            unc_file.write("%i\n" % unc)
+                    else:
+                        args.append(f"--unicodes-file={uncfile_path}")
+                        unc_set.add(255)
+                        unc_list = list(unc_set)
+                        for unc in unc_list:
+                            unc_file.write("%04x\n" % unc)
+
+                # store fontbuffer as a file
+                with open(oldfont_path, "wb") as fontfile:
+                    fontfile.write(buffer)
+                try:
+                    os.remove(newfont_path)  # remove old file
+                except Exception:
+                    pass
+                try:  # invoke fontTools subsetter
+                    fts.main(args)
+                    font = Font(fontfile=newfont_path)
+                    new_buffer = font.buffer  # subset font binary
+                    if font.glyph_count == 0:  # intercept empty font
+                        new_buffer = None
+                except Exception:
+                    exception_info()
+                    new_buffer = None
+            return new_buffer
+
+        def repl_fontnames(doc):
+            """Populate 'font_buffers'.
+
+            For each font candidate, store its xref and the list of names
+            by which PDF text may refer to it (there may be multiple).
+            """
+
+            def norm_name(name):
+                """Recreate font name that contains PDF hex codes.
+
+                E.g. #20 -> space, chr(32)
+                """
+                while "#" in name:
+                    p = name.find("#")
+                    c = int(name[p + 1 : p + 3], 16)
+                    name = name.replace(name[p : p + 3], chr(c))
+                return name
+
+            def get_fontnames(doc, item):
+                """Return a list of fontnames for an item of page.get_fonts().
+
+                There may be multiple names e.g. for Type0 fonts.
+                """
+                fontname = item[3]
+                names = [fontname]
+                fontname = doc.xref_get_key(item[0], "BaseFont")[1][1:]
+                fontname = norm_name(fontname)
+                if fontname not in names:
+                    names.append(fontname)
+                descendents = doc.xref_get_key(item[0], "DescendantFonts")
+                if descendents[0] != "array":
+                    return names
+                descendents = descendents[1][1:-1]
+                if descendents.endswith(" 0 R"):
+                    xref = int(descendents[:-4])
+                    descendents = doc.xref_object(xref, compressed=True)
+                p1 = descendents.find("/BaseFont")
+                if p1 >= 0:
+                    p2 = descendents.find("/", p1 + 1)
+                    p1 = min(descendents.find("/", p2 + 1), descendents.find(">>", p2 + 1))
+                    fontname = descendents[p2 + 1 : p1]
+                    fontname = norm_name(fontname)
+                    if fontname not in names:
+                        names.append(fontname)
+                return names
+
+            for i in range(doc.page_count):
+                for f in doc.get_page_fonts(i, full=True):
+                    font_xref = f[0]  # font xref
+                    font_ext = f[1]  # font file extension
+                    basename = f[3]  # font basename
+
+                    if font_ext not in (  # skip if not supported by fontTools
+                        "otf",
+                        "ttf",
+                        "woff",
+                        "woff2",
+                    ):
+                        continue
+                    # skip fonts which already are subsets
+                    if len(basename) > 6 and basename[6] == "+":
+                        continue
+
+                    extr = doc.extract_font(font_xref)
+                    fontbuffer = extr[-1]
+                    names = get_fontnames(doc, f)
+                    name_set, xref_set, subsets = font_buffers.get(
+                        fontbuffer, (set(), set(), (set(), set()))
+                    )
+                    xref_set.add(font_xref)
+                    for name in names:
+                        name_set.add(name)
+                    font = Font(fontbuffer=fontbuffer)
+                    name_set.add(font.name)
+                    del font
+                    font_buffers[fontbuffer] = (name_set, xref_set, subsets)
+
+        def find_buffer_by_name(name):
+            for buffer, (name_set, _, _) in font_buffers.items():
+                if name in name_set:
+                    return buffer
+            return None
+
+        # -----------------
+        # main function
+        # -----------------
+        repl_fontnames(doc)  # populate font information
+        if not font_buffers:  # nothing found to do
+            if verbose:
+                message(f'No fonts to subset.')
+            return 0
+
+        old_fontsize = 0
+        new_fontsize = 0
+        for fontbuffer in font_buffers.keys():
+            old_fontsize += len(fontbuffer)
+
+        # Scan page text for usage of subsettable fonts
+        for page in doc:
+            # go through the text and extend set of used glyphs by font
+            # we use a modified MuPDF trace device, which delivers us glyph ids.
+            for span in page.get_texttrace():
+                if type(span) is not dict:  # skip useless information
+                    continue
+                fontname = span["font"][:33]  # fontname for the span
+                buffer = find_buffer_by_name(fontname)
+                if buffer is None:
+                    continue
+                name_set, xref_set, (set_ucs, set_gid) = font_buffers[buffer]
+                for c in span["chars"]:
+                    set_ucs.add(c[0])  # unicode
+                    set_gid.add(c[1])  # glyph id
+                font_buffers[buffer] = (name_set, xref_set, (set_ucs, set_gid))
+
+        # build the font subsets
+        for old_buffer, (name_set, xref_set, subsets) in font_buffers.items():
+            new_buffer = build_subset(old_buffer, subsets[0], subsets[1])
+            fontname = list(name_set)[0]
+            if new_buffer is None or len(new_buffer) >= len(old_buffer):
+                # subset was not created or did not get smaller
+                if verbose:
+                    message(f'Cannot subset {fontname!r}.')
+                continue
+            if verbose:
+                message(f"Built subset of font {fontname!r}.")
+            val = doc._insert_font(fontbuffer=new_buffer)  # store subset font in PDF
+            new_xref = val[0]  # get its xref
+            set_subset_fontname(new_xref)  # tag fontname as subset font
+            font_str = doc.xref_object(  # get its object definition
+                new_xref,
+                compressed=True,
+            )
+            # walk through the original font xrefs and replace each by the subset def
+            for font_xref in xref_set:
+                # we need the original '/W' and '/DW' width values
+                width_table, def_width = get_old_widths(font_xref)
+                # ... and replace original font definition at xref with it
+                doc.update_object(font_xref, font_str)
+                # now copy over old '/W' and '/DW' values
+                if width_table or def_width:
+                    set_old_widths(font_xref, width_table, def_width)
+            # 'new_xref' remains unused in the PDF and must be removed
+            # by garbage collection.
+            new_fontsize += len(new_buffer)
+
+        return old_fontsize - new_fontsize
+
     def switch_layer(self, config, as_default=0):
         """Activate an OC layer."""
         pdf = _as_pdf_document(self)
@@ -5972,6 +7848,9 @@
                 compression_effort=compression_effort,
         )
         return bio.getvalue()
+    
+    def tobytes(self, *args, **kwargs):
+        return self.write(*args, **kwargs)
 
     @property
     def xref(self):
@@ -5979,6 +7858,41 @@
         CheckParent(self)
         return self.parent.page_xref(self.number)
 
+    def xref_copy(doc: 'Document', source: int, target: int, *, keep: list = None) -> None:
+        """Copy a PDF dictionary object to another one given their xref numbers.
+
+        Args:
+            doc: PDF document object
+            source: source xref number
+            target: target xref number, the xref must already exist
+            keep: an optional list of 1st level keys in target that should not be
+                  removed before copying.
+        Notes:
+            This works similar to the copy() method of dictionaries in Python. The
+            source may be a stream object.
+        """
+        if doc.xref_is_stream(source):
+            # read new xref stream, maintaining compression
+            stream = doc.xref_stream_raw(source)
+            doc.update_stream(
+                target,
+                stream,
+                compress=False,  # keeps source compression
+                new=True,  # in case target is no stream
+            )
+
+        # empty the target completely, observe exceptions
+        if keep is None:
+            keep = []
+        for key in doc.xref_get_keys(target):
+            if key in keep:
+                continue
+            doc.xref_set_key(target, key, "null")
+        # copy over all source dict items
+        for key in doc.xref_get_keys(source):
+            item = doc.xref_get_key(source, key)
+            doc.xref_set_key(target, key, item[1])
+    
     def xref_get_key(self, xref, key):
         """Get PDF dict key value of object at 'xref'."""
         pdf = _as_pdf_document(self)
@@ -6195,7 +8109,6 @@
     __slots__ = ('this', 'page_count2', 'this_is_pdf', '__dict__')
     
     outline = property(lambda self: self._outline)
-    tobytes = write
     is_stream = xref_is_stream
 
 open = Document
@@ -8734,6 +10647,117 @@
             annot._yielded=True
             yield annot
 
+    def apply_redactions(
+            page: 'Page',
+            images: int = 2,
+            graphics: int = 1,
+            text: int = 0,
+            ) -> bool:
+        """Apply the redaction annotations of the page.
+
+        Args:
+            page: the PDF page.
+            images:
+                  0 - ignore images
+                  1 - remove all overlapping images
+                  2 - blank out overlapping image parts
+                  3 - remove image unless invisible
+            graphics:
+                  0 - ignore graphics
+                  1 - remove graphics if contained in rectangle
+                  2 - remove all overlapping graphics
+            text:
+                  0 - remove text
+                  1 - ignore text
+        """
+
+        def center_rect(annot_rect, new_text, font, fsize):
+            """Calculate minimal sub-rectangle for the overlay text.
+
+            Notes:
+                Because 'insert_textbox' supports no vertical text centering,
+                we calculate an approximate number of lines here and return a
+                sub-rect with smaller height, which should still be sufficient.
+            Args:
+                annot_rect: the annotation rectangle
+                new_text: the text to insert.
+                font: the fontname. Must be one of the CJK or Base-14 set, else
+                    the rectangle is returned unchanged.
+                fsize: the fontsize
+            Returns:
+                A rectangle to use instead of the annot rectangle.
+            """
+            if not new_text or annot_rect.width <= EPSILON:
+                return annot_rect
+            try:
+                text_width = get_text_length(new_text, font, fsize)
+            except (ValueError, mupdf.FzErrorBase):  # unsupported font
+                if g_exceptions_verbose:
+                    exception_info()
+                return annot_rect
+            line_height = fsize * 1.2
+            limit = annot_rect.width
+            h = math.ceil(text_width / limit) * line_height  # estimate rect height
+            if h >= annot_rect.height:
+                return annot_rect
+            r = annot_rect
+            y = (annot_rect.tl.y + annot_rect.bl.y - h) * 0.5
+            r.y0 = y
+            return r
+
+        CheckParent(page)
+        doc = page.parent
+        if doc.is_encrypted or doc.is_closed:
+            raise ValueError("document closed or encrypted")
+        if not doc.is_pdf:
+            raise ValueError("is no PDF")
+
+        redact_annots = []  # storage of annot values
+        for annot in page.annots(
+            types=(mupdf.PDF_ANNOT_REDACT,)  # pylint: disable=no-member
+        ):
+            # loop redactions
+            redact_annots.append(annot._get_redact_values())  # save annot values
+
+        if redact_annots == []:  # any redactions on this page?
+            return False  # no redactions
+
+        rc = page._apply_redactions(text, images, graphics)  # call MuPDF
+        if not rc:  # should not happen really
+            raise ValueError("Error applying redactions.")
+
+        # now write replacement text in old redact rectangles
+        shape = page.new_shape()
+        for redact in redact_annots:
+            annot_rect = redact["rect"]
+            fill = redact["fill"]
+            if fill:
+                shape.draw_rect(annot_rect)  # colorize the rect background
+                shape.finish(fill=fill, color=fill)
+            if "text" in redact.keys():  # if we also have text
+                new_text = redact["text"]
+                align = redact.get("align", 0)
+                fname = redact["fontname"]
+                fsize = redact["fontsize"]
+                color = redact["text_color"]
+                # try finding vertical centered sub-rect
+                trect = center_rect(annot_rect, new_text, fname, fsize)
+
+                rc = -1
+                while rc < 0 and fsize >= 4:  # while not enough room
+                    # (re-) try insertion
+                    rc = shape.insert_textbox(
+                        trect,
+                        new_text,
+                        fontname=fname,
+                        fontsize=fsize,
+                        color=color,
+                        align=align,
+                    )
+                    fsize -= 0.5  # reduce font if unsuccessful
+        shape.commit()  # append new contents object
+        return True
+
     def recolor(self, components=1):
         """Convert colorspaces of objects on the page.
         
@@ -8842,6 +10866,19 @@
         annot._erase()
         return val
 
+    def delete_image(page: 'Page', xref: int):
+        """Delete the image referred to by xef.
+
+        Actually replaces by a small transparent Pixmap using method Page.replace_image.
+
+        Args:
+            xref: xref of the image to delete.
+        """
+        # make a small 100% transparent pixmap (of just any dimension)
+        pix = Pixmap(csGRAY, (0, 0, 1, 1), 1)
+        pix.clear_with()  # clear all samples bytes to 0x00
+        page.replace_image(xref, pixmap=pix)
+
     def delete_link(self, linkdict):
         """Delete a Link."""
         CheckParent(self)
@@ -8886,6 +10923,20 @@
 
         return finished()
 
+    def delete_widget(page: 'Page', widget: Widget) -> Widget:
+        """Delete widget from page and return the next one."""
+        CheckParent(page)
+        annot = getattr(widget, "_annot", None)
+        if annot is None:
+            raise ValueError("bad type: widget")
+        nextwidget = widget.next
+        page.delete_annot(annot)
+        widget._annot.parent = None
+        keylist = list(widget.__dict__.keys())
+        for key in keylist:
+            del widget.__dict__[key]
+        return nextwidget
+
     @property
     def derotation_matrix(self) -> Matrix:
         """Reflects page de-rotation."""
@@ -8896,6 +10947,408 @@
             return Matrix(mupdf.FzRect(mupdf.FzRect.UNIT))
         return Matrix(JM_derotate_page_matrix(pdfpage))
 
+    def draw_bezier(
+            page: 'Page',
+            p1: point_like,
+            p2: point_like,
+            p3: point_like,
+            p4: point_like,
+            color: OptSeq = (0,),
+            fill: OptSeq = None,
+            dashes: OptStr = None,
+            width: float = 1,
+            morph: OptStr = None,
+            closePath: bool = False,
+            lineCap: int = 0,
+            lineJoin: int = 0,
+            overlay: bool = True,
+            stroke_opacity: float = 1,
+            fill_opacity: float = 1,
+            oc: int = 0,
+            ) -> Point:
+        """Draw a general cubic Bezier curve from p1 to p4 using control points p2 and p3."""
+        img = page.new_shape()
+        Q = img.draw_bezier(Point(p1), Point(p2), Point(p3), Point(p4))
+        img.finish(
+                color=color,
+                fill=fill,
+                dashes=dashes,
+                width=width,
+                lineCap=lineCap,
+                lineJoin=lineJoin,
+                morph=morph,
+                closePath=closePath,
+                stroke_opacity=stroke_opacity,
+                fill_opacity=fill_opacity,
+                oc=oc,
+                )
+        img.commit(overlay)
+
+        return Q
+
+    def draw_circle(
+            page: 'Page',
+            center: point_like,
+            radius: float,
+            color: OptSeq = (0,),
+            fill: OptSeq = None,
+            morph: OptSeq = None,
+            dashes: OptStr = None,
+            width: float = 1,
+            lineCap: int = 0,
+            lineJoin: int = 0,
+            overlay: bool = True,
+            stroke_opacity: float = 1,
+            fill_opacity: float = 1,
+            oc: int = 0,
+            ) -> Point:
+        """Draw a circle given its center and radius."""
+        img = page.new_shape()
+        Q = img.draw_circle(Point(center), radius)
+        img.finish(
+                color=color,
+                fill=fill,
+                dashes=dashes,
+                width=width,
+                lineCap=lineCap,
+                lineJoin=lineJoin,
+                morph=morph,
+                stroke_opacity=stroke_opacity,
+                fill_opacity=fill_opacity,
+                oc=oc,
+                )
+        img.commit(overlay)
+        return Q
+
+    def draw_curve(
+            page: 'Page',
+            p1: point_like,
+            p2: point_like,
+            p3: point_like,
+            color: OptSeq = (0,),
+            fill: OptSeq = None,
+            dashes: OptStr = None,
+            width: float = 1,
+            morph: OptSeq = None,
+            closePath: bool = False,
+            lineCap: int = 0,
+            lineJoin: int = 0,
+            overlay: bool = True,
+            stroke_opacity: float = 1,
+            fill_opacity: float = 1,
+            oc: int = 0,
+            ) -> Point:
+        """Draw a special Bezier curve from p1 to p3, generating control points on lines p1 to p2 and p2 to p3."""
+        img = page.new_shape()
+        Q = img.draw_curve(Point(p1), Point(p2), Point(p3))
+        img.finish(
+                color=color,
+                fill=fill,
+                dashes=dashes,
+                width=width,
+                lineCap=lineCap,
+                lineJoin=lineJoin,
+                morph=morph,
+                closePath=closePath,
+                stroke_opacity=stroke_opacity,
+                fill_opacity=fill_opacity,
+                oc=oc,
+                )
+        img.commit(overlay)
+
+        return Q
+
+    def draw_line(
+            page: 'Page',
+            p1: point_like,
+            p2: point_like,
+            color: OptSeq = (0,),
+            dashes: OptStr = None,
+            width: float = 1,
+            lineCap: int = 0,
+            lineJoin: int = 0,
+            overlay: bool = True,
+            morph: OptSeq = None,
+            stroke_opacity: float = 1,
+            fill_opacity: float = 1,
+            oc=0,
+            ) -> Point:
+        """Draw a line from point p1 to point p2."""
+        img = page.new_shape()
+        p = img.draw_line(Point(p1), Point(p2))
+        img.finish(
+                color=color,
+                dashes=dashes,
+                width=width,
+                closePath=False,
+                lineCap=lineCap,
+                lineJoin=lineJoin,
+                morph=morph,
+                stroke_opacity=stroke_opacity,
+                fill_opacity=fill_opacity,
+                oc=oc,
+                )
+        img.commit(overlay)
+
+        return p
+
+    def draw_oval(
+            page: 'Page',
+            rect: typing.Union[rect_like, quad_like],
+            color: OptSeq = (0,),
+            fill: OptSeq = None,
+            dashes: OptStr = None,
+            morph: OptSeq = None,
+            width: float = 1,
+            lineCap: int = 0,
+            lineJoin: int = 0,
+            overlay: bool = True,
+            stroke_opacity: float = 1,
+            fill_opacity: float = 1,
+            oc: int = 0,
+            ) -> Point:
+        """Draw an oval given its containing rectangle or quad."""
+        img = page.new_shape()
+        Q = img.draw_oval(rect)
+        img.finish(
+                color=color,
+                fill=fill,
+                dashes=dashes,
+                width=width,
+                lineCap=lineCap,
+                lineJoin=lineJoin,
+                morph=morph,
+                stroke_opacity=stroke_opacity,
+                fill_opacity=fill_opacity,
+                oc=oc,
+                )
+        img.commit(overlay)
+
+        return Q
+
+    def draw_polyline(
+            page: 'Page',
+            points: list,
+            color: OptSeq = (0,),
+            fill: OptSeq = None,
+            dashes: OptStr = None,
+            width: float = 1,
+            morph: OptSeq = None,
+            lineCap: int = 0,
+            lineJoin: int = 0,
+            overlay: bool = True,
+            closePath: bool = False,
+            stroke_opacity: float = 1,
+            fill_opacity: float = 1,
+            oc: int = 0,
+            ) -> Point:
+        """Draw multiple connected line segments."""
+        img = page.new_shape()
+        Q = img.draw_polyline(points)
+        img.finish(
+                color=color,
+                fill=fill,
+                dashes=dashes,
+                width=width,
+                lineCap=lineCap,
+                lineJoin=lineJoin,
+                morph=morph,
+                closePath=closePath,
+                stroke_opacity=stroke_opacity,
+                fill_opacity=fill_opacity,
+                oc=oc,
+                )
+        img.commit(overlay)
+
+        return Q
+
+    def draw_quad(
+            page: 'Page',
+            quad: quad_like,
+            color: OptSeq = (0,),
+            fill: OptSeq = None,
+            dashes: OptStr = None,
+            width: float = 1,
+            lineCap: int = 0,
+            lineJoin: int = 0,
+            morph: OptSeq = None,
+            overlay: bool = True,
+            stroke_opacity: float = 1,
+            fill_opacity: float = 1,
+            oc: int = 0,
+            ) -> Point:
+        """Draw a quadrilateral."""
+        img = page.new_shape()
+        Q = img.draw_quad(Quad(quad))
+        img.finish(
+                color=color,
+                fill=fill,
+                dashes=dashes,
+                width=width,
+                lineCap=lineCap,
+                lineJoin=lineJoin,
+                morph=morph,
+                stroke_opacity=stroke_opacity,
+                fill_opacity=fill_opacity,
+                oc=oc,
+                )
+        img.commit(overlay)
+
+        return Q
+
+    def draw_rect(
+            page: 'Page',
+            rect: rect_like,
+            color: OptSeq = (0,),
+            fill: OptSeq = None,
+            dashes: OptStr = None,
+            width: float = 1,
+            lineCap: int = 0,
+            lineJoin: int = 0,
+            morph: OptSeq = None,
+            overlay: bool = True,
+            stroke_opacity: float = 1,
+            fill_opacity: float = 1,
+            oc: int = 0,
+            radius=None,
+            ) -> Point:
+        '''
+        Draw a rectangle. See Shape class method for details.
+        '''
+        img = page.new_shape()
+        Q = img.draw_rect(Rect(rect), radius=radius)
+        img.finish(
+                color=color,
+                fill=fill,
+                dashes=dashes,
+                width=width,
+                lineCap=lineCap,
+                lineJoin=lineJoin,
+                morph=morph,
+                stroke_opacity=stroke_opacity,
+                fill_opacity=fill_opacity,
+                oc=oc,
+                )
+        img.commit(overlay)
+
+        return Q
+
+    def draw_sector(
+            page: 'Page',
+            center: point_like,
+            point: point_like,
+            beta: float,
+            color: OptSeq = (0,),
+            fill: OptSeq = None,
+            dashes: OptStr = None,
+            fullSector: bool = True,
+            morph: OptSeq = None,
+            width: float = 1,
+            closePath: bool = False,
+            lineCap: int = 0,
+            lineJoin: int = 0,
+            overlay: bool = True,
+            stroke_opacity: float = 1,
+            fill_opacity: float = 1,
+            oc: int = 0,
+            ) -> Point:
+        """Draw a circle sector given circle center, one arc end point and the angle of the arc.
+
+        Parameters:
+            center -- center of circle
+            point -- arc end point
+            beta -- angle of arc (degrees)
+            fullSector -- connect arc ends with center
+        """
+        img = page.new_shape()
+        Q = img.draw_sector(Point(center), Point(point), beta, fullSector=fullSector)
+        img.finish(
+                color=color,
+                fill=fill,
+                dashes=dashes,
+                width=width,
+                lineCap=lineCap,
+                lineJoin=lineJoin,
+                morph=morph,
+                closePath=closePath,
+                stroke_opacity=stroke_opacity,
+                fill_opacity=fill_opacity,
+                oc=oc,
+                )
+        img.commit(overlay)
+
+        return Q
+
+    def draw_squiggle(
+            page: 'Page',
+            p1: point_like,
+            p2: point_like,
+            breadth: float = 2,
+            color: OptSeq = (0,),
+            dashes: OptStr = None,
+            width: float = 1,
+            lineCap: int = 0,
+            lineJoin: int = 0,
+            overlay: bool = True,
+            morph: OptSeq = None,
+            stroke_opacity: float = 1,
+            fill_opacity: float = 1,
+            oc: int = 0,
+            ) -> Point:
+        """Draw a squiggly line from point p1 to point p2."""
+        img = page.new_shape()
+        p = img.draw_squiggle(Point(p1), Point(p2), breadth=breadth)
+        img.finish(
+                color=color,
+                dashes=dashes,
+                width=width,
+                closePath=False,
+                lineCap=lineCap,
+                lineJoin=lineJoin,
+                morph=morph,
+                stroke_opacity=stroke_opacity,
+                fill_opacity=fill_opacity,
+                oc=oc,
+                )
+        img.commit(overlay)
+
+        return p
+
+    def draw_zigzag(
+            page: 'Page',
+            p1: point_like,
+            p2: point_like,
+            breadth: float = 2,
+            color: OptSeq = (0,),
+            dashes: OptStr = None,
+            width: float = 1,
+            lineCap: int = 0,
+            lineJoin: int = 0,
+            overlay: bool = True,
+            morph: OptSeq = None,
+            stroke_opacity: float = 1,
+            fill_opacity: float = 1,
+            oc: int = 0,
+            ) -> Point:
+        """Draw a zigzag line from point p1 to point p2."""
+        img = page.new_shape()
+        p = img.draw_zigzag(Point(p1), Point(p2), breadth=breadth)
+        img.finish(
+                color=color,
+                dashes=dashes,
+                width=width,
+                closePath=False,
+                lineCap=lineCap,
+                lineJoin=lineJoin,
+                morph=morph,
+                stroke_opacity=stroke_opacity,
+                fill_opacity=fill_opacity,
+                oc=oc,
+                )
+        img.commit(overlay)
+
+        return p
+
     def extend_textpage(self, tpage, flags=0, matrix=None):
         page = self.this
         tp = tpage.this
@@ -9218,6 +11671,168 @@
             val = None
             return paths
 
+    def get_image_info(
+            page: 'Page',
+            hashes: bool = False,
+            xrefs: bool = False
+            ) -> list:
+        """Extract image information only from a pymupdf.TextPage.
+
+        Args:
+            hashes: (bool) include MD5 hash for each image.
+            xrefs: (bool) try to find the xref for each image. Sets hashes to true.
+        """
+        doc = page.parent
+        if xrefs and doc.is_pdf:
+            hashes = True
+        if not doc.is_pdf:
+            xrefs = False
+        imginfo = getattr(page, "_image_info", None)
+        if imginfo and not xrefs:
+            return imginfo
+        if not imginfo:
+            tp = page.get_textpage(flags=TEXT_PRESERVE_IMAGES)
+            imginfo = tp.extractIMGINFO(hashes=hashes)
+            del tp
+            if hashes:
+                page._image_info = imginfo
+        if not xrefs or not doc.is_pdf:
+            return imginfo
+        imglist = page.get_images()
+        digests = {}
+        for item in imglist:
+            xref = item[0]
+            pix = Pixmap(doc, xref)
+            digests[pix.digest] = xref
+            del pix
+        for i in range(len(imginfo)):
+            item = imginfo[i]
+            xref = digests.get(item["digest"], 0)
+            item["xref"] = xref
+            imginfo[i] = item
+        return imginfo
+
+    def get_image_rects(page: 'Page', name, transform=False) -> list:
+        """Return list of image positions on a page.
+
+        Args:
+            name: (str, list, int) image identification. May be reference name, an
+                  item of the page's image list or an xref.
+            transform: (bool) whether to also return the transformation matrix.
+        Returns:
+            A list of pymupdf.Rect objects or tuples of (pymupdf.Rect, pymupdf.Matrix)
+            for all image locations on the page.
+        """
+        if type(name) in (list, tuple):
+            xref = name[0]
+        elif type(name) is int:
+            xref = name
+        else:
+            imglist = [i for i in page.get_images() if i[7] == name]
+            if imglist == []:
+                raise ValueError("bad image name")
+            elif len(imglist) != 1:
+                raise ValueError("multiple image names found")
+            xref = imglist[0][0]
+        pix = Pixmap(page.parent, xref)  # make pixmap of the image to compute MD5
+        digest = pix.digest
+        del pix
+        infos = page.get_image_info(hashes=True)
+        if not transform:
+            bboxes = [Rect(im["bbox"]) for im in infos if im["digest"] == digest]
+        else:
+            bboxes = [
+                (Rect(im["bbox"]), Matrix(im["transform"]))
+                for im in infos
+                if im["digest"] == digest
+            ]
+        return bboxes
+
+    def get_label(page):
+        """Return the label for this PDF page.
+
+        Args:
+            page: page object.
+        Returns:
+            The label (str) of the page. Errors return an empty string.
+        """
+        # Jorj McKie, 2021-01-06
+
+        labels = page.parent._get_page_labels()
+        if not labels:
+            return ""
+        labels.sort()
+        return utils.get_label_pno(page.number, labels)
+
+    def get_links(page: 'Page') -> list:
+        """Create a list of all links contained in a PDF page.
+
+        Notes:
+            see PyMuPDF ducmentation for details.
+        """
+
+        CheckParent(page)
+        ln = page.first_link
+        links = []
+        while ln:
+            nl = utils.getLinkDict(ln, page.parent)
+            links.append(nl)
+            ln = ln.next
+        if links != [] and page.parent.is_pdf:
+            linkxrefs = [x for x in
+                    #page.annot_xrefs()
+                    JM_get_annot_xref_list2(page)
+                    if x[1] == mupdf.PDF_ANNOT_LINK  # pylint: disable=no-member
+                    ]
+            if len(linkxrefs) == len(links):
+                for i in range(len(linkxrefs)):
+                    links[i]["xref"] = linkxrefs[i][0]
+                    links[i]["id"] = linkxrefs[i][2]
+        return links
+
+    def get_pixmap(
+                page: 'Page',
+                *,
+                matrix: matrix_like=Identity,
+                dpi=None,
+                colorspace: Colorspace=None,
+                clip: rect_like=None,
+                alpha: bool=False,
+                annots: bool=True,
+                ) -> 'Pixmap':
+        """Create pixmap of page.
+
+        Keyword args:
+            matrix: Matrix for transformation (default: Identity).
+            dpi: desired dots per inch. If given, matrix is ignored.
+            colorspace: (str/Colorspace) cmyk, rgb, gray - case ignored, default csRGB.
+            clip: (irect-like) restrict rendering to this area.
+            alpha: (bool) whether to include alpha channel
+            annots: (bool) whether to also render annotations
+        """
+        if colorspace is None:
+            colorspace = csRGB
+        if dpi:
+            zoom = dpi / 72
+            matrix = Matrix(zoom, zoom)
+
+        if type(colorspace) is str:
+            if colorspace.upper() == "GRAY":
+                colorspace = csGRAY
+            elif colorspace.upper() == "CMYK":
+                colorspace = csCMYK
+            else:
+                colorspace = csRGB
+        if colorspace.n not in (1, 3, 4):
+            raise ValueError("unsupported colorspace")
+
+        dl = page.get_displaylist(annots=annots)
+        pix = dl.get_pixmap(matrix=matrix, colorspace=colorspace, alpha=alpha, clip=clip)
+        dl = None
+        if dpi:
+            pix.set_dpi(dpi, dpi)
+        return pix
+
     def remove_rotation(self):
         """Set page rotation to 0 while maintaining visual appearance."""
         rot = self.rotation  # normalized rotation value
@@ -9503,6 +12118,21 @@
             del tp
         return rc
 
+    def get_text(self, *args, **kwargs):
+        return utils.get_text(self, *args, **kwargs)
+
+    def get_text_blocks(self, *args, **kwargs):
+        return utils.get_text_blocks(self, *args, **kwargs)
+    
+    def get_text_selection(self, *args, **kwargs):
+        return utils.get_text_selection(self, *args, **kwargs)
+    
+    def get_text_words(self, *args, **kwargs):
+        return utils.get_text_words(self, *args, **kwargs)
+    
+    def get_textpage_ocr(self, *args, **kwargs):
+        return utils.get_textpage_ocr(self, *args, **kwargs)
+    
     def get_textpage(self, clip: rect_like = None, flags: int = 0, matrix=None) -> "TextPage":
         CheckParent(self)
         if matrix is None:
@@ -9628,6 +12258,406 @@
         doc.get_char_widths(xref, fontdict=fontdict)
         return xref
 
+    def insert_htmlbox(
+        page,
+        rect,
+        text,
+        *,
+        css=None,
+        scale_low=0,
+        archive=None,
+        rotate=0,
+        oc=0,
+        opacity=1,
+        overlay=True,
+        _scale_word_width=True,
+        _verbose=False,
+    ) -> tuple:
+        """Insert text with optional HTML tags and stylings into a rectangle.
+
+        Args:
+            rect: (rect-like) rectangle into which the text should be placed.
+            text: (str) text with optional HTML tags and stylings.
+            css: (str) CSS styling commands.
+            scale_low: (float) force-fit content by scaling it down. Must be in
+                range [0, 1]. If 1, no scaling will take place. If 0, arbitrary
+                down-scaling is acceptable. A value of 0.1 would mean that content
+                may be scaled down by at most 90%.
+            archive: Archive object pointing to locations of used fonts or images
+            rotate: (int) rotate the text in the box by a multiple of 90 degrees.
+            oc: (int) the xref of an OCG / OCMD (Optional Content).
+            opacity: (float) set opacity of inserted content.
+            overlay: (bool) put text on top of page content.
+            _scale_word_width: internal, for testing only.
+            _verbose: internal, for testing only.
+        Returns:
+            A tuple of floats (spare_height, scale).
+            spare_height:
+                The height of the remaining space in <rect> below the
+                text, or -1 if we failed to fit.
+            scale:
+                The scaling required; `0 < scale <= 1`.
+                Will be less than `scale_low` if we failed to fit.
+        """
+        # normalize rotation angle
+        if not rotate % 90 == 0:
+            raise ValueError("bad rotation angle")
+        while rotate < 0:
+            rotate += 360
+        while rotate >= 360:
+            rotate -= 360
+
+        if not 0 <= scale_low <= 1:
+            raise ValueError("'scale_low' must be in [0, 1]")
+
+        if css is None:
+            css = ""
+
+        rect = Rect(rect)
+        if rotate in (90, 270):
+            temp_rect = Rect(0, 0, rect.height, rect.width)
+        else:
+            temp_rect = Rect(0, 0, rect.width, rect.height)
+
+        # use a small border by default
+        mycss = "body {margin:1px;}" + css  # append user CSS
+
+        # either make a story, or accept a given one
+        if isinstance(text, str):  # if a string, convert to a Story
+            story = Story(html=text, user_css=mycss, archive=archive)
+        elif isinstance(text, Story):
+            story = text
+        else:
+            raise ValueError("'text' must be a string or a Story")
+        
+        # ----------------------------------------------------------------
+        # Find a scaling factor that lets our story fit in. Instead of scaling
+        # the text smaller, we instead look at how much bigger the rect needs
+        # to be to fit the text, then reverse the scaling to get how much we
+        # need to scale down the text.
+        # ----------------------------------------------------------------
+        rect_scale_max = None if scale_low == 0 else 1 / scale_low
+
+        fit = story.fit_scale(
+                temp_rect,
+                scale_min=1,
+                scale_max=rect_scale_max,
+                flags=mupdf.FZ_PLACE_STORY_FLAG_NO_OVERFLOW if _scale_word_width else 0,
+                verbose=_verbose,
+                )
+        
+        if not fit.big_enough:  # there was no fit
+            scale = 1 / fit.parameter
+            return (-1, scale)
+
+        # fit.filled is a tuple; we convert it in place to a Rect for
+        # convenience. (fit.rect is already a Rect.)
+        fit.filled = Rect(fit.filled)
+        assert (fit.rect.x0, fit.rect.y0) == (0, 0)
+        assert (fit.filled.x0, fit.filled.y0) == (0, 0)
+        
+        scale = 1 / fit.parameter
+        assert scale >= scale_low, f'{scale_low=} {scale=}'
+        
+        spare_height = max((fit.rect.y1 - fit.filled.y1) * scale, 0)
+
+        def rect_function(*args):
+            return fit.rect, fit.rect, None
+
+        # draw story on temp PDF page
+        doc = story.write_with_links(rect_function)
+
+        # Insert opacity if requested.
+        # For this, we prepend a command to the /Contents.
+        if 0 <= opacity < 1:
+            tpage = doc[0]  # load page
+            # generate /ExtGstate for the page
+            alp0 = tpage._set_opacity(CA=opacity, ca=opacity)
+            s = f"/{alp0} gs\n"  # generate graphic state command
+            TOOLS._insert_contents(tpage, s.encode(), 0)
+
+        # put result in target page
+        page.show_pdf_page(rect, doc, 0, rotate=rotate, oc=oc, overlay=overlay)
+
+        # -------------------------------------------------------------------------
+        # re-insert links in target rect (show_pdf_page cannot copy annotations)
+        # -------------------------------------------------------------------------
+        # scaled center point of fit.rect
+        mp1 = (fit.rect.tl + fit.rect.br) / 2 * scale
+
+        # center point of target rect
+        mp2 = (rect.tl + rect.br) / 2
+
+        # compute link positioning matrix:
+        # - move center of scaled-down fit.rect to (0,0)
+        # - rotate
+        # - move (0,0) to center of target rect
+        mat = (
+            Matrix(scale, 0, 0, scale, -mp1.x, -mp1.y)
+            * Matrix(-rotate)
+            * Matrix(1, 0, 0, 1, mp2.x, mp2.y)
+        )
+
+        # copy over links
+        for link in doc[0].get_links():
+            link["from"] *= mat
+            page.insert_link(link)
+
+        return spare_height, scale
+
+    def insert_image(
+            page,
+            rect,
+            *,
+            alpha=-1,
+            filename=None,
+            height=0,
+            keep_proportion=True,
+            mask=None,
+            oc=0,
+            overlay=True,
+            pixmap=None,
+            rotate=0,
+            stream=None,
+            width=0,
+            xref=0,
+            ):
+        """Insert an image for display in a rectangle.
+
+        Args:
+            rect: (rect_like) position of image on the page.
+            alpha: (int, optional) set to 0 if image has no transparency.
+            filename: (str, Path, file object) image filename.
+            height: (int)
+            keep_proportion: (bool) keep width / height ratio (default).
+            mask: (bytes, optional) image consisting of alpha values to use.
+            oc: (int) xref of OCG or OCMD to declare as Optional Content.
+            overlay: (bool) put in foreground (default) or background.
+            pixmap: (pymupdf.Pixmap) use this as image.
+            rotate: (int) rotate by 0, 90, 180 or 270 degrees.
+            stream: (bytes) use this as image.
+            width: (int)
+            xref: (int) use this as image.
+
+        'page' and 'rect' are positional, all other parameters are keywords.
+
+        If 'xref' is given, that image is used. Other input options are ignored.
+        Else, exactly one of pixmap, stream or filename must be given.
+
+        'alpha=0' for non-transparent images improves performance significantly.
+        Affects stream and filename only.
+
+        Optimum transparent insertions are possible by using filename / stream in
+        conjunction with a 'mask' image of alpha values.
+
+        Returns:
+            xref (int) of inserted image. Re-use as argument for multiple insertions.
+        """
+        CheckParent(page)
+        doc = page.parent
+        if not doc.is_pdf:
+            raise ValueError("is no PDF")
+
+        if xref == 0 and (bool(filename) + bool(stream) + bool(pixmap) != 1):
+            raise ValueError("xref=0 needs exactly one of filename, pixmap, stream")
+
+        if filename:
+            if type(filename) is str:
+                pass
+            elif hasattr(filename, "absolute"):
+                filename = str(filename)
+            elif hasattr(filename, "name"):
+                filename = filename.name
+            else:
+                raise ValueError("bad filename")
+
+        if filename and not os.path.exists(filename):
+            raise FileNotFoundError("No such file: '%s'" % filename)
+        elif stream and type(stream) not in (bytes, bytearray, io.BytesIO):
+            raise ValueError("stream must be bytes-like / BytesIO")
+        elif pixmap and type(pixmap) is not Pixmap:
+            raise ValueError("pixmap must be a Pixmap")
+        if mask and not (stream or filename):
+            raise ValueError("mask requires stream or filename")
+        if mask and type(mask) not in (bytes, bytearray, io.BytesIO):
+            raise ValueError("mask must be bytes-like / BytesIO")
+        while rotate < 0:
+            rotate += 360
+        while rotate >= 360:
+            rotate -= 360
+        if rotate not in (0, 90, 180, 270):
+            raise ValueError("bad rotate value")
+
+        r = Rect(rect)
+        if r.is_empty or r.is_infinite:
+            raise ValueError("rect must be finite and not empty")
+        clip = r * ~page.transformation_matrix
+
+        # Create a unique image reference name.
+        ilst = [i[7] for i in doc.get_page_images(page.number)]
+        ilst += [i[1] for i in doc.get_page_xobjects(page.number)]
+        ilst += [i[4] for i in doc.get_page_fonts(page.number)]
+        n = "fzImg"  # 'pymupdf image'
+        i = 0
+        _imgname = n + "0"  # first name candidate
+        while _imgname in ilst:
+            i += 1
+            _imgname = n + str(i)  # try new name
+
+        if overlay:
+            page.wrap_contents()  # ensure a balanced graphics state
+        digests = doc.InsertedImages
+        xref, digests = page._insert_image(
+            filename=filename,
+            pixmap=pixmap,
+            stream=stream,
+            imask=mask,
+            clip=clip,
+            overlay=overlay,
+            oc=oc,
+            xref=xref,
+            rotate=rotate,
+            keep_proportion=keep_proportion,
+            width=width,
+            height=height,
+            alpha=alpha,
+            _imgname=_imgname,
+            digests=digests,
+        )
+        if digests is not None:
+            doc.InsertedImages = digests
+
+        return xref
+
+    def insert_link(page: 'Page', lnk: dict, mark: bool = True) -> None:
+        """Insert a new link for the current page."""
+        CheckParent(page)
+        annot = utils.getLinkText(page, lnk)
+        if annot == "":
+            raise ValueError("link kind not supported")
+        page._addAnnot_FromString((annot,))
+
+    def insert_text(
+            page: 'Page',
+            point: point_like,
+            text: typing.Union[str, list],
+            *,
+            fontsize: float = 11,
+            lineheight: OptFloat = None,
+            fontname: str = "helv",
+            fontfile: OptStr = None,
+            set_simple: int = 0,
+            encoding: int = 0,
+            color: OptSeq = None,
+            fill: OptSeq = None,
+            border_width: float = 0.05,
+            miter_limit: float = 1,
+            render_mode: int = 0,
+            rotate: int = 0,
+            morph: OptSeq = None,
+            overlay: bool = True,
+            stroke_opacity: float = 1,
+            fill_opacity: float = 1,
+            oc: int = 0,
+            ):
+
+        img = page.new_shape()
+        rc = img.insert_text(
+            point,
+            text,
+            fontsize=fontsize,
+            lineheight=lineheight,
+            fontname=fontname,
+            fontfile=fontfile,
+            set_simple=set_simple,
+            encoding=encoding,
+            color=color,
+            fill=fill,
+            border_width=border_width,
+            render_mode=render_mode,
+            miter_limit=miter_limit,
+            rotate=rotate,
+            morph=morph,
+            stroke_opacity=stroke_opacity,
+            fill_opacity=fill_opacity,
+            oc=oc,
+        )
+        if rc >= 0:
+            img.commit(overlay)
+        return rc
+
+    def insert_textbox(
+            page: 'Page',
+            rect: rect_like,
+            buffer: typing.Union[str, list],
+            *,
+            fontname: str = "helv",
+            fontfile: OptStr = None,
+            set_simple: int = 0,
+            encoding: int = 0,
+            fontsize: float = 11,
+            lineheight: OptFloat = None,
+            color: OptSeq = None,
+            fill: OptSeq = None,
+            expandtabs: int = 1,
+            align: int = 0,
+            rotate: int = 0,
+            render_mode: int = 0,
+            miter_limit: float = 1,
+            border_width: float = 0.05,
+            morph: OptSeq = None,
+            overlay: bool = True,
+            stroke_opacity: float = 1,
+            fill_opacity: float = 1,
+            oc: int = 0,
+            ) -> float:
+        """Insert text into a given rectangle.
+
+        Notes:
+            Creates a Shape object, uses its same-named method and commits it.
+        Parameters:
+            rect: (rect-like) area to use for text.
+            buffer: text to be inserted
+            fontname: a Base-14 font, font name or '/name'
+            fontfile: name of a font file
+            fontsize: font size
+            lineheight: overwrite the font property
+            color: RGB color triple
+            expandtabs: handles tabulators with string function
+            align: left, center, right, justified
+            rotate: 0, 90, 180, or 270 degrees
+            morph: morph box with a matrix and a fixpoint
+            overlay: put text in foreground or background
+        Returns:
+            unused or deficit rectangle area (float)
+        """
+        img = page.new_shape()
+        rc = img.insert_textbox(
+            rect,
+            buffer,
+            fontsize=fontsize,
+            lineheight=lineheight,
+            fontname=fontname,
+            fontfile=fontfile,
+            set_simple=set_simple,
+            encoding=encoding,
+            color=color,
+            fill=fill,
+            expandtabs=expandtabs,
+            render_mode=render_mode,
+            miter_limit=miter_limit,
+            border_width=border_width,
+            align=align,
+            rotate=rotate,
+            morph=morph,
+            stroke_opacity=stroke_opacity,
+            fill_opacity=fill_opacity,
+            oc=oc,
+        )
+        if rc >= 0:
+            img.commit(overlay)
+        return rc
+
     @property
     def is_wrapped(self):
         """Check if /Contents is in a balanced graphics state."""
@@ -9740,6 +12770,9 @@
     def mediabox_size(self):
         return Point(self.mediabox.x1, self.mediabox.y1)
 
+    def new_shape(self):
+        return Shape(self)
+
     #@property
     #def parent( self):
     #    assert self._parent
@@ -9759,6 +12792,44 @@
         # fixme this looks wrong.
         self.this = page
 
+    def replace_image(
+            page: 'Page',
+            xref: int,
+            *,
+            filename=None,
+            pixmap=None,
+            stream=None,
+            ):
+        """Replace the image referred to by xref.
+
+        Replace the image by changing the object definition stored under xref. This
+        will leave the pages appearance instructions intact, so the new image is
+        being displayed with the same bbox, rotation etc.
+        By providing a small fully transparent image, an effect as if the image had
+        been deleted can be achieved.
+        A typical use may include replacing large images by a smaller version,
+        e.g. with a lower resolution or graylevel instead of colored.
+
+        Args:
+            xref: the xref of the image to replace.
+            filename, pixmap, stream: exactly one of these must be provided. The
+                meaning being the same as in Page.insert_image.
+        """
+        doc = page.parent  # the owning document
+        if not doc.xref_is_image(xref):
+            raise ValueError("xref not an image")  # insert new image anywhere in page
+        if bool(filename) + bool(stream) + bool(pixmap) != 1:
+            raise ValueError("Exactly one of filename/stream/pixmap must be given")
+        new_xref = page.insert_image(
+            page.rect, filename=filename, stream=stream, pixmap=pixmap
+        )
+        doc.xref_copy(new_xref, xref)  # copy over new to old
+        last_contents_xref = page.get_contents()[-1]
+        # new image insertion has created a new /Contents source,
+        # which we will set to spaces now
+        doc.update_stream(last_contents_xref, b" ")
+        page._image_info = None  # clear cache of extracted image information
+
     @property
     def rotation(self):
         """Page rotation."""
@@ -9780,6 +12851,47 @@
         CheckParent(self)
         mupdf.fz_run_page(self.this, dw.device, JM_matrix_from_py(m), mupdf.FzCookie())
 
+    def search_for(
+            page,
+            text,
+            *,
+            clip=None,
+            quads=False,
+            flags=None,
+            textpage=None,
+            ) -> list:
+        """Search for a string on a page.
+
+        Args:
+            text: string to be searched for
+            clip: restrict search to this rectangle
+            quads: (bool) return quads instead of rectangles
+            flags: bit switches, default: join hyphened words
+            textpage: a pre-created pymupdf.TextPage
+        Returns:
+            a list of rectangles or quads, each containing one occurrence.
+        """
+        if flags is None:
+            flags=(0
+                | TEXT_DEHYPHENATE
+                | TEXT_PRESERVE_WHITESPACE
+                | TEXT_PRESERVE_LIGATURES
+                | TEXT_MEDIABOX_CLIP
+                )
+        if clip is not None:
+            clip = Rect(clip)
+
+        CheckParent(page)
+        tp = textpage
+        if tp is None:
+            tp = page.get_textpage(clip=clip, flags=flags)  # create pymupdf.TextPage
+        elif getattr(tp, "parent") != page:
+            raise ValueError("not a textpage of this page")
+        rlist = tp.search(text, quads=quads)
+        if textpage is None:
+            del tp
+        return rlist
+
     def set_artbox(self, rect):
         """Set the ArtBox."""
         return self._set_pagebox("ArtBox", rect)
@@ -9847,6 +12959,130 @@
         """Set the TrimBox."""
         return self._set_pagebox("TrimBox", rect)
 
+    def show_pdf_page(
+            page,
+            rect,
+            docsrc,
+            pno=0,
+            keep_proportion=True,
+            overlay=True,
+            oc=0,
+            rotate=0,
+            clip=None,
+            ) -> int:
+        """Show page number 'pno' of PDF 'docsrc' in rectangle 'rect'.
+
+        Args:
+            rect: (rect-like) where to place the source image
+            docsrc: (document) source PDF
+            pno: (int) source page number
+            keep_proportion: (bool) do not change width-height-ratio
+            overlay: (bool) put in foreground
+            oc: (xref) make visibility dependent on this OCG / OCMD (which must be defined in the target PDF)
+            rotate: (int) degrees (multiple of 90)
+            clip: (rect-like) part of source page rectangle
+        Returns:
+            xref of inserted object (for reuse)
+        """
+        def calc_matrix(sr, tr, keep=True, rotate=0):
+            """Calculate transformation matrix from source to target rect.
+
+            Notes:
+                The product of four matrices in this sequence: (1) translate correct
+                source corner to origin, (2) rotate, (3) scale, (4) translate to
+                target's top-left corner.
+            Args:
+                sr: source rect in PDF (!) coordinate system
+                tr: target rect in PDF coordinate system
+                keep: whether to keep source ratio of width to height
+                rotate: rotation angle in degrees
+            Returns:
+                Transformation matrix.
+            """
+            # calc center point of source rect
+            smp = (sr.tl + sr.br) / 2.0
+            # calc center point of target rect
+            tmp = (tr.tl + tr.br) / 2.0
+
+            # m moves to (0, 0), then rotates
+            m = Matrix(1, 0, 0, 1, -smp.x, -smp.y) * Matrix(rotate)
+
+            sr1 = sr * m  # resulting source rect to calculate scale factors
+
+            fw = tr.width / sr1.width  # scale the width
+            fh = tr.height / sr1.height  # scale the height
+            if keep:
+                fw = fh = min(fw, fh)  # take min if keeping aspect ratio
+
+            m *= Matrix(fw, fh)  # concat scale matrix
+            m *= Matrix(1, 0, 0, 1, tmp.x, tmp.y)  # concat move to target center
+            return JM_TUPLE(m)
+
+        CheckParent(page)
+        doc = page.parent
+
+        if not doc.is_pdf or not docsrc.is_pdf:
+            raise ValueError("is no PDF")
+
+        if rect.is_empty or rect.is_infinite:
+            raise ValueError("rect must be finite and not empty")
+
+        while pno < 0:  # support negative page numbers
+            pno += docsrc.page_count
+        src_page = docsrc[pno]  # load source page
+
+        tar_rect = rect * ~page.transformation_matrix  # target rect in PDF coordinates
+
+        src_rect = src_page.rect if not clip else src_page.rect & clip  # source rect
+        if src_rect.is_empty or src_rect.is_infinite:
+            raise ValueError("clip must be finite and not empty")
+        src_rect = src_rect * ~src_page.transformation_matrix  # ... in PDF coord
+
+        matrix = calc_matrix(src_rect, tar_rect, keep=keep_proportion, rotate=rotate)
+
+        # list of existing /Form /XObjects
+        ilst = [i[1] for i in doc.get_page_xobjects(page.number)]
+        ilst += [i[7] for i in doc.get_page_images(page.number)]
+        ilst += [i[4] for i in doc.get_page_fonts(page.number)]
+
+        # create a name not in that list
+        n = "fzFrm"
+        i = 0
+        _imgname = n + "0"
+        while _imgname in ilst:
+            i += 1
+            _imgname = n + str(i)
+
+        isrc = docsrc._graft_id  # used as key for graftmaps
+        if doc._graft_id == isrc:
+            raise ValueError("source document must not equal target")
+
+        # retrieve / make Graftmap for source PDF
+        gmap = doc.Graftmaps.get(isrc, None)
+        if gmap is None:
+            gmap = Graftmap(doc)
+            doc.Graftmaps[isrc] = gmap
+
+        # take note of generated xref for automatic reuse
+        pno_id = (isrc, pno)  # id of docsrc[pno]
+        xref = doc.ShownPages.get(pno_id, 0)
+
+        if overlay:
+            page.wrap_contents()  # ensure a balanced graphics state
+        xref = page._show_pdf_page(
+            src_page,
+            overlay=overlay,
+            matrix=matrix,
+            xref=xref,
+            oc=oc,
+            clip=src_rect,
+            graftmap=gmap,
+            _imgname=_imgname,
+        )
+        doc.ShownPages[pno_id] = xref
+
+        return xref
+
     @property
     def transformation_matrix(self):
         """Page transformation matrix."""
@@ -9875,6 +13111,15 @@
         mb = self.mediabox
         return Rect(rect[0], mb.y1 - rect[3], rect[2], mb.y1 - rect[1])
 
+    def update_link(page: 'Page', lnk: dict) -> None:
+        """Update a link on the current page."""
+        CheckParent(page)
+        annot = utils.getLinkText(page, lnk)
+        if annot == "":
+            raise ValueError("link kind not supported")
+
+        page.parent.update_object(lnk["xref"], annot, page=page)
+
     def widgets(self, types=None):
         """ Generator over the widgets of a page.
 
@@ -9902,6 +13147,57 @@
             append = b"\nQ" * pop + b"\n"
             TOOLS._insert_contents(self, append, True)
 
+    def write_text(
+            page: 'Page',
+            rect=None,
+            writers=None,
+            overlay=True,
+            color=None,
+            opacity=None,
+            keep_proportion=True,
+            rotate=0,
+            oc=0,
+            ) -> None:
+        """Write the text of one or more pymupdf.TextWriter objects.
+
+        Args:
+            rect: target rectangle. If None, the union of the text writers is used.
+            writers: one or more pymupdf.TextWriter objects.
+            overlay: put in foreground or background.
+            keep_proportion: maintain aspect ratio of rectangle sides.
+            rotate: arbitrary rotation angle.
+            oc: the xref of an optional content object
+        """
+        assert isinstance(page, Page)
+        if not writers:
+            raise ValueError("need at least one pymupdf.TextWriter")
+        if type(writers) is TextWriter:
+            if rotate == 0 and rect is None:
+                writers.write_text(page, opacity=opacity, color=color, overlay=overlay)
+                return None
+            else:
+                writers = (writers,)
+        clip = writers[0].text_rect
+        textdoc = Document()
+        tpage = textdoc.new_page(width=page.rect.width, height=page.rect.height)
+        for writer in writers:
+            clip |= writer.text_rect
+            writer.write_text(tpage, opacity=opacity, color=color)
+        if rect is None:
+            rect = clip
+        page.show_pdf_page(
+            rect,
+            textdoc,
+            0,
+            overlay=overlay,
+            keep_proportion=keep_proportion,
+            rotate=rotate,
+            clip=clip,
+            oc=oc,
+        )
+        textdoc = None
+        tpage = None
+
     @property
     def xref(self):
         """PDF xref number of page."""
@@ -11502,6 +14798,996 @@
     tr = top_right
 
 
+class Shape:
+    """Create a new shape."""
+
+    @staticmethod
+    def horizontal_angle(C, P):
+        """Return the angle to the horizontal for the connection from C to P.
+        This uses the arcus sine function and resolves its inherent ambiguity by
+        looking up in which quadrant vector S = P - C is located.
+        """
+        S = Point(P - C).unit  # unit vector 'C' -> 'P'
+        alfa = math.asin(abs(S.y))  # absolute angle from horizontal
+        if S.x < 0:  # make arcsin result unique
+            if S.y <= 0:  # bottom-left
+                alfa = -(math.pi - alfa)
+            else:  # top-left
+                alfa = math.pi - alfa
+        else:
+            if S.y >= 0:  # top-right
+                pass
+            else:  # bottom-right
+                alfa = -alfa
+        return alfa
+
+    def __init__(self, page: Page):
+        CheckParent(page)
+        self.page = page
+        self.doc = page.parent
+        if not self.doc.is_pdf:
+            raise ValueError("is no PDF")
+        self.height = page.mediabox_size.y
+        self.width = page.mediabox_size.x
+        self.x = page.cropbox_position.x
+        self.y = page.cropbox_position.y
+
+        self.pctm = page.transformation_matrix  # page transf. matrix
+        self.ipctm = ~self.pctm  # inverted transf. matrix
+
+        self.draw_cont = ""
+        self.text_cont = ""
+        self.totalcont = ""
+        self.last_point = None
+        self.rect = None
+
+    def updateRect(self, x):
+        if self.rect is None:
+            if len(x) == 2:
+                self.rect = Rect(x, x)
+            else:
+                self.rect = Rect(x)
+
+        else:
+            if len(x) == 2:
+                x = Point(x)
+                self.rect.x0 = min(self.rect.x0, x.x)
+                self.rect.y0 = min(self.rect.y0, x.y)
+                self.rect.x1 = max(self.rect.x1, x.x)
+                self.rect.y1 = max(self.rect.y1, x.y)
+            else:
+                x = Rect(x)
+                self.rect.x0 = min(self.rect.x0, x.x0)
+                self.rect.y0 = min(self.rect.y0, x.y0)
+                self.rect.x1 = max(self.rect.x1, x.x1)
+                self.rect.y1 = max(self.rect.y1, x.y1)
+
+    def draw_line(self, p1: point_like, p2: point_like) -> Point:
+        """Draw a line between two points."""
+        p1 = Point(p1)
+        p2 = Point(p2)
+        if not (self.last_point == p1):
+            self.draw_cont += _format_g(JM_TUPLE(p1 * self.ipctm)) + " m\n"
+            self.last_point = p1
+            self.updateRect(p1)
+
+        self.draw_cont += _format_g(JM_TUPLE(p2 * self.ipctm)) + " l\n"
+        self.updateRect(p2)
+        self.last_point = p2
+        return self.last_point
+
+    def draw_polyline(self, points: list) -> Point:
+        """Draw several connected line segments."""
+        for i, p in enumerate(points):
+            if i == 0:
+                if not (self.last_point == Point(p)):
+                    self.draw_cont += _format_g(JM_TUPLE(Point(p) * self.ipctm)) + " m\n"
+                    self.last_point = Point(p)
+            else:
+                self.draw_cont += _format_g(JM_TUPLE(Point(p) * self.ipctm)) + " l\n"
+            self.updateRect(p)
+
+        self.last_point = Point(points[-1])
+        return self.last_point
+
+    def draw_bezier(
+        self,
+        p1: point_like,
+        p2: point_like,
+        p3: point_like,
+        p4: point_like,
+    ) -> Point:
+        """Draw a standard cubic Bezier curve."""
+        p1 = Point(p1)
+        p2 = Point(p2)
+        p3 = Point(p3)
+        p4 = Point(p4)
+        if not (self.last_point == p1):
+            self.draw_cont += _format_g(JM_TUPLE(p1 * self.ipctm)) + " m\n"
+        args = JM_TUPLE(list(p2 * self.ipctm) + list(p3 * self.ipctm) + list(p4 * self.ipctm))
+        self.draw_cont += _format_g(args) + " c\n"
+        self.updateRect(p1)
+        self.updateRect(p2)
+        self.updateRect(p3)
+        self.updateRect(p4)
+        self.last_point = p4
+        return self.last_point
+
+    def draw_oval(self, tetra: typing.Union[quad_like, rect_like]) -> Point:
+        """Draw an ellipse inside a tetrapod."""
+        if len(tetra) != 4:
+            raise ValueError("invalid arg length")
+        if hasattr(tetra[0], "__float__"):
+            q = Rect(tetra).quad
+        else:
+            q = Quad(tetra)
+
+        mt = q.ul + (q.ur - q.ul) * 0.5
+        mr = q.ur + (q.lr - q.ur) * 0.5
+        mb = q.ll + (q.lr - q.ll) * 0.5
+        ml = q.ul + (q.ll - q.ul) * 0.5
+        if not (self.last_point == ml):
+            self.draw_cont += _format_g(JM_TUPLE(ml * self.ipctm)) + " m\n"
+            self.last_point = ml
+        self.draw_curve(ml, q.ll, mb)
+        self.draw_curve(mb, q.lr, mr)
+        self.draw_curve(mr, q.ur, mt)
+        self.draw_curve(mt, q.ul, ml)
+        self.updateRect(q.rect)
+        self.last_point = ml
+        return self.last_point
+
+    def draw_circle(self, center: point_like, radius: float) -> Point:
+        """Draw a circle given its center and radius."""
+        if not radius > EPSILON:
+            raise ValueError("radius must be positive")
+        center = Point(center)
+        p1 = center - (radius, 0)
+        return self.draw_sector(center, p1, 360, fullSector=False)
+
+    def draw_curve(
+        self,
+        p1: point_like,
+        p2: point_like,
+        p3: point_like,
+    ) -> Point:
+        """Draw a curve between points using one control point."""
+        kappa = 0.55228474983
+        p1 = Point(p1)
+        p2 = Point(p2)
+        p3 = Point(p3)
+        k1 = p1 + (p2 - p1) * kappa
+        k2 = p3 + (p2 - p3) * kappa
+        return self.draw_bezier(p1, k1, k2, p3)
+
+    def draw_sector(
+        self,
+        center: point_like,
+        point: point_like,
+        beta: float,
+        fullSector: bool = True,
+    ) -> Point:
+        """Draw a circle sector."""
+        center = Point(center)
+        point = Point(point)
+        l3 = lambda a, b: _format_g((a, b)) + " m\n"
+        l4 = lambda a, b, c, d, e, f: _format_g((a, b, c, d, e, f)) + " c\n"
+        l5 = lambda a, b: _format_g((a, b)) + " l\n"
+        betar = math.radians(-beta)
+        w360 = math.radians(math.copysign(360, betar)) * (-1)
+        w90 = math.radians(math.copysign(90, betar))
+        w45 = w90 / 2
+        while abs(betar) > 2 * math.pi:
+            betar += w360  # bring angle below 360 degrees
+        if not (self.last_point == point):
+            self.draw_cont += l3(*JM_TUPLE(point * self.ipctm))
+            self.last_point = point
+        Q = Point(0, 0)  # just make sure it exists
+        C = center
+        P = point
+        S = P - C  # vector 'center' -> 'point'
+        rad = abs(S)  # circle radius
+
+        if not rad > EPSILON:
+            raise ValueError("radius must be positive")
+
+        alfa = self.horizontal_angle(center, point)
+        while abs(betar) > abs(w90):  # draw 90 degree arcs
+            q1 = C.x + math.cos(alfa + w90) * rad
+            q2 = C.y + math.sin(alfa + w90) * rad
+            Q = Point(q1, q2)  # the arc's end point
+            r1 = C.x + math.cos(alfa + w45) * rad / math.cos(w45)
+            r2 = C.y + math.sin(alfa + w45) * rad / math.cos(w45)
+            R = Point(r1, r2)  # crossing point of tangents
+            kappah = (1 - math.cos(w45)) * 4 / 3 / abs(R - Q)
+            kappa = kappah * abs(P - Q)
+            cp1 = P + (R - P) * kappa  # control point 1
+            cp2 = Q + (R - Q) * kappa  # control point 2
+            self.draw_cont += l4(*JM_TUPLE(
+                list(cp1 * self.ipctm) + list(cp2 * self.ipctm) + list(Q * self.ipctm)
+            ))
+
+            betar -= w90  # reduce param angle by 90 deg
+            alfa += w90  # advance start angle by 90 deg
+            P = Q  # advance to arc end point
+        # draw (remaining) arc
+        if abs(betar) > 1e-3:  # significant degrees left?
+            beta2 = betar / 2
+            q1 = C.x + math.cos(alfa + betar) * rad
+            q2 = C.y + math.sin(alfa + betar) * rad
+            Q = Point(q1, q2)  # the arc's end point
+            r1 = C.x + math.cos(alfa + beta2) * rad / math.cos(beta2)
+            r2 = C.y + math.sin(alfa + beta2) * rad / math.cos(beta2)
+            R = Point(r1, r2)  # crossing point of tangents
+            # kappa height is 4/3 of segment height
+            kappah = (1 - math.cos(beta2)) * 4 / 3 / abs(R - Q)  # kappa height
+            kappa = kappah * abs(P - Q) / (1 - math.cos(betar))
+            cp1 = P + (R - P) * kappa  # control point 1
+            cp2 = Q + (R - Q) * kappa  # control point 2
+            self.draw_cont += l4(*JM_TUPLE(
+                list(cp1 * self.ipctm) + list(cp2 * self.ipctm) + list(Q * self.ipctm)
+            ))
+        if fullSector:
+            self.draw_cont += l3(*JM_TUPLE(point * self.ipctm))
+            self.draw_cont += l5(*JM_TUPLE(center * self.ipctm))
+            self.draw_cont += l5(*JM_TUPLE(Q * self.ipctm))
+        self.last_point = Q
+        return self.last_point
+
+    def draw_rect(self, rect: rect_like, *, radius=None) -> Point:
+        """Draw a rectangle.
+
+        Args:
+            radius: if not None, the rectangle will have rounded corners.
+                This is the radius of the curvature, given as percentage of
+                the rectangle width or height. Valid are values 0 < v <= 0.5.
+                For a sequence of two values, the corners will have different
+                radii. Otherwise, the percentage will be computed from the
+                shorter side. A value of (0.5, 0.5) will draw an ellipse.
+        """
+        r = Rect(rect)
+        if radius is None:  # standard rectangle
+            self.draw_cont += _format_g(JM_TUPLE(
+                list(r.bl * self.ipctm) + [r.width, r.height]
+            )) + " re\n"
+            self.updateRect(r)
+            self.last_point = r.tl
+            return self.last_point
+        # rounded corners requested. This requires 1 or 2 values, each
+        # with 0 < value <= 0.5
+        if hasattr(radius, "__float__"):
+            if radius <= 0 or radius > 0.5:
+                raise ValueError(f"bad radius value {radius}.")
+            d = min(r.width, r.height) * radius
+            px = (d, 0)
+            py = (0, d)
+        elif hasattr(radius, "__len__") and len(radius) == 2:
+            rx, ry = radius
+            px = (rx * r.width, 0)
+            py = (0, ry * r.height)
+            if min(rx, ry) <= 0 or max(rx, ry) > 0.5:
+                raise ValueError(f"bad radius value {radius}.")
+        else:
+            raise ValueError(f"bad radius value {radius}.")
+
+        lp = self.draw_line(r.tl + py, r.bl - py)
+        lp = self.draw_curve(lp, r.bl, r.bl + px)
+
+        lp = self.draw_line(lp, r.br - px)
+        lp = self.draw_curve(lp, r.br, r.br - py)
+
+        lp = self.draw_line(lp, r.tr + py)
+        lp = self.draw_curve(lp, r.tr, r.tr - px)
+
+        lp = self.draw_line(lp, r.tl + px)
+        self.last_point = self.draw_curve(lp, r.tl, r.tl + py)
+
+        self.updateRect(r)
+        return self.last_point
+
+    def draw_quad(self, quad: quad_like) -> Point:
+        """Draw a Quad."""
+        q = Quad(quad)
+        return self.draw_polyline([q.ul, q.ll, q.lr, q.ur, q.ul])
+
+    def draw_zigzag(
+        self,
+        p1: point_like,
+        p2: point_like,
+        breadth: float = 2,
+    ) -> Point:
+        """Draw a zig-zagged line from p1 to p2."""
+        p1 = Point(p1)
+        p2 = Point(p2)
+        S = p2 - p1  # vector start - end
+        rad = abs(S)  # distance of points
+        cnt = 4 * int(round(rad / (4 * breadth), 0))  # always take full phases
+        if cnt < 4:
+            raise ValueError("points too close")
+        mb = rad / cnt  # revised breadth
+        matrix = Matrix(util_hor_matrix(p1, p2))  # normalize line to x-axis
+        i_mat = ~matrix  # get original position
+        points = []  # stores edges
+        for i in range(1, cnt):
+            if i % 4 == 1:  # point "above" connection
+                p = Point(i, -1) * mb
+            elif i % 4 == 3:  # point "below" connection
+                p = Point(i, 1) * mb
+            else:  # ignore others
+                continue
+            points.append(p * i_mat)
+        self.draw_polyline([p1] + points + [p2])  # add start and end points
+        return p2
+
+    def draw_squiggle(
+        self,
+        p1: point_like,
+        p2: point_like,
+        breadth=2,
+    ) -> Point:
+        """Draw a squiggly line from p1 to p2."""
+        p1 = Point(p1)
+        p2 = Point(p2)
+        S = p2 - p1  # vector start - end
+        rad = abs(S)  # distance of points
+        cnt = 4 * int(round(rad / (4 * breadth), 0))  # always take full phases
+        if cnt < 4:
+            raise ValueError("points too close")
+        mb = rad / cnt  # revised breadth
+        matrix = Matrix(util_hor_matrix(p1, p2))  # normalize line to x-axis
+        i_mat = ~matrix  # get original position
+        k = 2.4142135623765633  # y of draw_curve helper point
+
+        points = []  # stores edges
+        for i in range(1, cnt):
+            if i % 4 == 1:  # point "above" connection
+                p = Point(i, -k) * mb
+            elif i % 4 == 3:  # point "below" connection
+                p = Point(i, k) * mb
+            else:  # else on connection line
+                p = Point(i, 0) * mb
+            points.append(p * i_mat)
+
+        points = [p1] + points + [p2]
+        cnt = len(points)
+        i = 0
+        while i + 2 < cnt:
+            self.draw_curve(points[i], points[i + 1], points[i + 2])
+            i += 2
+        return p2
+
+    # ==============================================================================
+    # Shape.insert_text
+    # ==============================================================================
+    def insert_text(
+        self,
+        point: point_like,
+        buffer: typing.Union[str, list],
+        *,
+        fontsize: float = 11,
+        lineheight: OptFloat = None,
+        fontname: str = "helv",
+        fontfile: OptStr = None,
+        set_simple: bool = 0,
+        encoding: int = 0,
+        color: OptSeq = None,
+        fill: OptSeq = None,
+        render_mode: int = 0,
+        border_width: float = 0.05,
+        miter_limit: float = 1,
+        rotate: int = 0,
+        morph: OptSeq = None,
+        stroke_opacity: float = 1,
+        fill_opacity: float = 1,
+        oc: int = 0,
+    ) -> int:
+
+        # ensure 'text' is a list of strings, worth dealing with
+        if not bool(buffer):
+            return 0
+
+        if type(buffer) not in (list, tuple):
+            text = buffer.splitlines()
+        else:
+            text = buffer
+
+        if not len(text) > 0:
+            return 0
+
+        point = Point(point)
+        try:
+            maxcode = max([ord(c) for c in " ".join(text)])
+        except Exception:
+            exception_info()
+            return 0
+
+        # ensure valid 'fontname'
+        fname = fontname
+        if fname.startswith("/"):
+            fname = fname[1:]
+
+        xref = self.page.insert_font(
+            fontname=fname, fontfile=fontfile, encoding=encoding, set_simple=set_simple
+        )
+        fontinfo = CheckFontInfo(self.doc, xref)
+
+        fontdict = fontinfo[1]
+        ordering = fontdict["ordering"]
+        simple = fontdict["simple"]
+        bfname = fontdict["name"]
+        ascender = fontdict["ascender"]
+        descender = fontdict["descender"]
+        if lineheight:
+            lheight = fontsize * lineheight
+        elif ascender - descender <= 1:
+            lheight = fontsize * 1.2
+        else:
+            lheight = fontsize * (ascender - descender)
+
+        if maxcode > 255:
+            glyphs = self.doc.get_char_widths(xref, maxcode + 1)
+        else:
+            glyphs = fontdict["glyphs"]
+
+        tab = []
+        for t in text:
+            if simple and bfname not in ("Symbol", "ZapfDingbats"):
+                g = None
+            else:
+                g = glyphs
+            tab.append(getTJstr(t, g, simple, ordering))
+        text = tab
+
+        color_str = ColorCode(color, "c")
+        fill_str = ColorCode(fill, "f")
+        if not fill and render_mode == 0:  # ensure fill color when 0 Tr
+            fill = color
+            fill_str = ColorCode(color, "f")
+
+        morphing = CheckMorph(morph)
+        rot = rotate
+        if rot % 90 != 0:
+            raise ValueError("bad rotate value")
+
+        while rot < 0:
+            rot += 360
+        rot = rot % 360  # text rotate = 0, 90, 270, 180
+
+        templ1 = lambda a, b, c, d, e, f, g: f"\nq\n{a}{b}BT\n{c}1 0 0 1 {_format_g((d, e))} Tm\n/{f} {_format_g(g)} Tf "
+        templ2 = lambda a: f"TJ\n0 -{_format_g(a)} TD\n"
+        cmp90 = "0 1 -1 0 0 0 cm\n"  # rotates 90 deg counter-clockwise
+        cmm90 = "0 -1 1 0 0 0 cm\n"  # rotates 90 deg clockwise
+        cm180 = "-1 0 0 -1 0 0 cm\n"  # rotates by 180 deg.
+        height = self.height
+        width = self.width
+
+        # setting up for standard rotation directions
+        # case rotate = 0
+        if morphing:
+            m1 = Matrix(1, 0, 0, 1, morph[0].x + self.x, height - morph[0].y - self.y)
+            mat = ~m1 * morph[1] * m1
+            cm = _format_g(JM_TUPLE(mat)) + " cm\n"
+        else:
+            cm = ""
+        top = height - point.y - self.y  # start of 1st char
+        left = point.x + self.x  # start of 1. char
+        space = top  # space available
+        #headroom = point.y + self.y  # distance to page border
+        if rot == 90:
+            left = height - point.y - self.y
+            top = -point.x - self.x
+            cm += cmp90
+            space = width - abs(top)
+            #headroom = point.x + self.x
+
+        elif rot == 270:
+            left = -height + point.y + self.y
+            top = point.x + self.x
+            cm += cmm90
+            space = abs(top)
+            #headroom = width - point.x - self.x
+
+        elif rot == 180:
+            left = -point.x - self.x
+            top = -height + point.y + self.y
+            cm += cm180
+            space = abs(point.y + self.y)
+            #headroom = height - point.y - self.y
+
+        optcont = self.page._get_optional_content(oc)
+        if optcont is not None:
+            bdc = "/OC /%s BDC\n" % optcont
+            emc = "EMC\n"
+        else:
+            bdc = emc = ""
+
+        alpha = self.page._set_opacity(CA=stroke_opacity, ca=fill_opacity)
+        if alpha is None:
+            alpha = ""
+        else:
+            alpha = "/%s gs\n" % alpha
+        nres = templ1(bdc, alpha, cm, left, top, fname, fontsize)
+
+        if render_mode > 0:
+            nres += "%i Tr " % render_mode
+            nres += _format_g(border_width * fontsize) + " w "
+            if miter_limit is not None:
+                nres += _format_g(miter_limit) + " M "
+        if color is not None:
+            nres += color_str
+        if fill is not None:
+            nres += fill_str
+
+        # =========================================================================
+        #   start text insertion
+        # =========================================================================
+        nres += text[0]
+        nlines = 1  # set output line counter
+        if len(text) > 1:
+            nres += templ2(lheight)  # line 1
+        else:
+            nres += 'TJ'
+        for i in range(1, len(text)):
+            if space < lheight:
+                break  # no space left on page
+            if i > 1:
+                nres += "\nT* "
+            nres += text[i] + 'TJ'
+            space -= lheight
+            nlines += 1
+
+        nres += "\nET\n%sQ\n" % emc
+
+        # =========================================================================
+        #   end of text insertion
+        # =========================================================================
+        # update the /Contents object
+        self.text_cont += nres
+        return nlines
+
+    # ==============================================================================
+    # Shape.insert_textbox
+    # ==============================================================================
+    def insert_textbox(
+        self,
+        rect: rect_like,
+        buffer: typing.Union[str, list],
+        *,
+        fontname: OptStr = "helv",
+        fontfile: OptStr = None,
+        fontsize: float = 11,
+        lineheight: OptFloat = None,
+        set_simple: bool = 0,
+        encoding: int = 0,
+        color: OptSeq = None,
+        fill: OptSeq = None,
+        expandtabs: int = 1,
+        border_width: float = 0.05,
+        miter_limit: float = 1,
+        align: int = 0,
+        render_mode: int = 0,
+        rotate: int = 0,
+        morph: OptSeq = None,
+        stroke_opacity: float = 1,
+        fill_opacity: float = 1,
+        oc: int = 0,
+    ) -> float:
+        """Insert text into a given rectangle.
+
+        Args:
+            rect -- the textbox to fill
+            buffer -- text to be inserted
+            fontname -- a Base-14 font, font name or '/name'
+            fontfile -- name of a font file
+            fontsize -- font size
+            lineheight -- overwrite the font property
+            color -- RGB stroke color triple
+            fill -- RGB fill color triple
+            render_mode -- text rendering control
+            border_width -- thickness of glyph borders as percentage of fontsize
+            expandtabs -- handles tabulators with string function
+            align -- left, center, right, justified
+            rotate -- 0, 90, 180, or 270 degrees
+            morph -- morph box with a matrix and a fixpoint
+        Returns:
+            unused or deficit rectangle area (float)
+        """
+        rect = Rect(rect)
+        if rect.is_empty or rect.is_infinite:
+            raise ValueError("text box must be finite and not empty")
+
+        color_str = ColorCode(color, "c")
+        fill_str = ColorCode(fill, "f")
+        if fill is None and render_mode == 0:  # ensure fill color for 0 Tr
+            fill = color
+            fill_str = ColorCode(color, "f")
+
+        optcont = self.page._get_optional_content(oc)
+        if optcont is not None:
+            bdc = "/OC /%s BDC\n" % optcont
+            emc = "EMC\n"
+        else:
+            bdc = emc = ""
+
+        # determine opacity / transparency
+        alpha = self.page._set_opacity(CA=stroke_opacity, ca=fill_opacity)
+        if alpha is None:
+            alpha = ""
+        else:
+            alpha = "/%s gs\n" % alpha
+
+        if rotate % 90 != 0:
+            raise ValueError("rotate must be multiple of 90")
+
+        rot = rotate
+        while rot < 0:
+            rot += 360
+        rot = rot % 360
+
+        # is buffer worth of dealing with?
+        if not bool(buffer):
+            return rect.height if rot in (0, 180) else rect.width
+
+        cmp90 = "0 1 -1 0 0 0 cm\n"  # rotates counter-clockwise
+        cmm90 = "0 -1 1 0 0 0 cm\n"  # rotates clockwise
+        cm180 = "-1 0 0 -1 0 0 cm\n"  # rotates by 180 deg.
+        height = self.height
+
+        fname = fontname
+        if fname.startswith("/"):
+            fname = fname[1:]
+
+        xref = self.page.insert_font(
+            fontname=fname, fontfile=fontfile, encoding=encoding, set_simple=set_simple
+        )
+        fontinfo = CheckFontInfo(self.doc, xref)
+
+        fontdict = fontinfo[1]
+        ordering = fontdict["ordering"]
+        simple = fontdict["simple"]
+        glyphs = fontdict["glyphs"]
+        bfname = fontdict["name"]
+        ascender = fontdict["ascender"]
+        descender = fontdict["descender"]
+
+        if lineheight:
+            lheight_factor = lineheight
+        elif ascender - descender <= 1:
+            lheight_factor = 1.2
+        else:
+            lheight_factor = ascender - descender
+        lheight = fontsize * lheight_factor
+
+        # create a list from buffer, split into its lines
+        if type(buffer) in (list, tuple):
+            t0 = "\n".join(buffer)
+        else:
+            t0 = buffer
+
+        maxcode = max([ord(c) for c in t0])
+        # replace invalid char codes for simple fonts
+        if simple and maxcode > 255:
+            t0 = "".join([c if ord(c) < 256 else "?" for c in t0])
+
+        t0 = t0.splitlines()
+
+        glyphs = self.doc.get_char_widths(xref, maxcode + 1)
+        if simple and bfname not in ("Symbol", "ZapfDingbats"):
+            tj_glyphs = None
+        else:
+            tj_glyphs = glyphs
+
+        # ----------------------------------------------------------------------
+        # calculate pixel length of a string
+        # ----------------------------------------------------------------------
+        def pixlen(x):
+            """Calculate pixel length of x."""
+            if ordering < 0:
+                return sum([glyphs[ord(c)][1] for c in x]) * fontsize
+            else:
+                return len(x) * fontsize
+
+        # ---------------------------------------------------------------------
+
+        if ordering < 0:
+            blen = glyphs[32][1] * fontsize  # pixel size of space character
+        else:
+            blen = fontsize
+
+        text = ""  # output buffer
+
+        if CheckMorph(morph):
+            m1 = Matrix(
+                1, 0, 0, 1, morph[0].x + self.x, self.height - morph[0].y - self.y
+            )
+            mat = ~m1 * morph[1] * m1
+            cm = _format_g(JM_TUPLE(mat)) + " cm\n"
+        else:
+            cm = ""
+
+        # ---------------------------------------------------------------------
+        # adjust for text orientation / rotation
+        # ---------------------------------------------------------------------
+        progr = 1  # direction of line progress
+        c_pnt = Point(0, fontsize * ascender)  # used for line progress
+        if rot == 0:  # normal orientation
+            point = rect.tl + c_pnt  # line 1 is 'lheight' below top
+            maxwidth = rect.width  # pixels available in one line
+            maxheight = rect.height  # available text height
+
+        elif rot == 90:  # rotate counter clockwise
+            c_pnt = Point(fontsize * ascender, 0)  # progress in x-direction
+            point = rect.bl + c_pnt  # line 1 'lheight' away from left
+            maxwidth = rect.height  # pixels available in one line
+            maxheight = rect.width  # available text height
+            cm += cmp90
+
+        elif rot == 180:  # text upside down
+            # progress upwards in y direction
+            c_pnt = -Point(0, fontsize * ascender)
+            point = rect.br + c_pnt  # line 1 'lheight' above bottom
+            maxwidth = rect.width  # pixels available in one line
+            progr = -1  # subtract lheight for next line
+            maxheight =rect.height  # available text height
+            cm += cm180
+
+        else:  # rotate clockwise (270 or -90)
+            # progress from right to left
+            c_pnt = -Point(fontsize * ascender, 0)
+            point = rect.tr + c_pnt  # line 1 'lheight' left of right
+            maxwidth = rect.height  # pixels available in one line
+            progr = -1  # subtract lheight for next line
+            maxheight = rect.width  # available text height
+            cm += cmm90
+
+        # =====================================================================
+        # line loop
+        # =====================================================================
+        just_tab = []  # 'justify' indicators per line
+
+        for i, line in enumerate(t0):
+            line_t = line.expandtabs(expandtabs).split(" ")  # split into words
+            num_words = len(line_t)
+            lbuff = ""  # init line buffer
+            rest = maxwidth  # available line pixels
+            # =================================================================
+            # word loop
+            # =================================================================
+            for j in range(num_words):
+                word = line_t[j]
+                pl_w = pixlen(word)  # pixel len of word
+                if rest >= pl_w:  # does it fit on the line?
+                    lbuff += word + " "  # yes, append word
+                    rest -= pl_w + blen  # update available line space
+                    continue  # next word
+
+                # word doesn't fit - output line (if not empty)
+                if lbuff:
+                    lbuff = lbuff.rstrip() + "\n"  # line full, append line break
+                    text += lbuff  # append to total text
+                    just_tab.append(True)  # can align-justify
+
+                lbuff = ""  # re-init line buffer
+                rest = maxwidth  # re-init avail. space
+
+                if pl_w <= maxwidth:  # word shorter than 1 line?
+                    lbuff = word + " "  # start the line with it
+                    rest = maxwidth - pl_w - blen  # update free space
+                    continue
+
+                # long word: split across multiple lines - char by char ...
+                if len(just_tab) > 0:
+                    just_tab[-1] = False  # cannot align-justify
+                for c in word:
+                    if pixlen(lbuff) <= maxwidth - pixlen(c):
+                        lbuff += c
+                    else:  # line full
+                        lbuff += "\n"  # close line
+                        text += lbuff  # append to text
+                        just_tab.append(False)  # cannot align-justify
+                        lbuff = c  # start new line with this char
+
+                lbuff += " "  # finish long word
+                rest = maxwidth - pixlen(lbuff)  # long word stored
+
+            if lbuff:  # unprocessed line content?
+                text += lbuff.rstrip()  # append to text
+                just_tab.append(False)  # cannot align-justify
+
+            if i < len(t0) - 1:  # not the last line?
+                text += "\n"  # insert line break
+
+        # compute used part of the textbox
+        if text.endswith("\n"):
+            text = text[:-1]
+        lb_count = text.count("\n") + 1  # number of lines written
+
+        # text height = line count * line height plus one descender value
+        text_height = lheight * lb_count - descender * fontsize
+
+        more = text_height - maxheight  # difference to height limit
+        if more > EPSILON:  # landed too much outside rect
+            return (-1) * more  # return deficit, don't output
+
+        more = abs(more)
+        if more < EPSILON:
+            more = 0  # don't bother with epsilons
+        nres = "\nq\n%s%sBT\n" % (bdc, alpha) + cm  # initialize output buffer
+        templ = lambda a, b, c, d: f"1 0 0 1 {_format_g((a, b))} Tm /{c} {_format_g(d)} Tf "
+        # center, right, justify: output each line with its own specifics
+        text_t = text.splitlines()  # split text in lines again
+        just_tab[-1] = False  # never justify last line
+        for i, t in enumerate(text_t):
+            spacing = 0
+            pl = maxwidth - pixlen(t)  # length of empty line part
+            pnt = point + c_pnt * (i * lheight_factor)  # text start of line
+            if align == 1:  # center: right shift by half width
+                if rot in (0, 180):
+                    pnt = pnt + Point(pl / 2, 0) * progr
+                else:
+                    pnt = pnt - Point(0, pl / 2) * progr
+            elif align == 2:  # right: right shift by full width
+                if rot in (0, 180):
+                    pnt = pnt + Point(pl, 0) * progr
+                else:
+                    pnt = pnt - Point(0, pl) * progr
+            elif align == 3:  # justify
+                spaces = t.count(" ")  # number of spaces in line
+                if spaces > 0 and just_tab[i]:  # if any, and we may justify
+                    spacing = pl / spaces  # make every space this much larger
+                else:
+                    spacing = 0  # keep normal space length
+            top = height - pnt.y - self.y
+            left = pnt.x + self.x
+            if rot == 90:
+                left = height - pnt.y - self.y
+                top = -pnt.x - self.x
+            elif rot == 270:
+                left = -height + pnt.y + self.y
+                top = pnt.x + self.x
+            elif rot == 180:
+                left = -pnt.x - self.x
+                top = -height + pnt.y + self.y
+
+            nres += templ(left, top, fname, fontsize)
+
+            if render_mode > 0:
+                nres += "%i Tr " % render_mode
+                nres += _format_g(border_width * fontsize) + " w "
+                if miter_limit is not None:
+                    nres += _format_g(miter_limit) + " M "
+
+            if align == 3:
+                nres += _format_g(spacing) + " Tw "
+
+            if color is not None:
+                nres += color_str
+            if fill is not None:
+                nres += fill_str
+            nres += "%sTJ\n" % getTJstr(t, tj_glyphs, simple, ordering)
+
+        nres += "ET\n%sQ\n" % emc
+
+        self.text_cont += nres
+        self.updateRect(rect)
+        return more
+
+    def finish(
+        self,
+        width: float = 1,
+        color: OptSeq = (0,),
+        fill: OptSeq = None,
+        lineCap: int = 0,
+        lineJoin: int = 0,
+        dashes: OptStr = None,
+        even_odd: bool = False,
+        morph: OptSeq = None,
+        closePath: bool = True,
+        fill_opacity: float = 1,
+        stroke_opacity: float = 1,
+        oc: int = 0,
+    ) -> None:
+        """Finish the current drawing segment.
+
+        Notes:
+            Apply colors, opacity, dashes, line style and width, or
+            morphing. Also whether to close the path
+            by connecting last to first point.
+        """
+        if self.draw_cont == "":  # treat empty contents as no-op
+            return
+
+        if width == 0:  # border color makes no sense then
+            color = None
+        elif color is None:  # vice versa
+            width = 0
+        # if color == None and fill == None:
+        #     raise ValueError("at least one of 'color' or 'fill' must be given")
+        color_str = ColorCode(color, "c")  # ensure proper color string
+        fill_str = ColorCode(fill, "f")  # ensure proper fill string
+
+        optcont = self.page._get_optional_content(oc)
+        if optcont is not None:
+            self.draw_cont = "/OC /%s BDC\n" % optcont + self.draw_cont
+            emc = "EMC\n"
+        else:
+            emc = ""
+
+        alpha = self.page._set_opacity(CA=stroke_opacity, ca=fill_opacity)
+        if alpha is not None:
+            self.draw_cont = "/%s gs\n" % alpha + self.draw_cont
+
+        if width != 1 and width != 0:
+            self.draw_cont += _format_g(width) + " w\n"
+
+        if lineCap != 0:
+            self.draw_cont = "%i J\n" % lineCap + self.draw_cont
+        if lineJoin != 0:
+            self.draw_cont = "%i j\n" % lineJoin + self.draw_cont
+
+        if dashes not in (None, "", "[] 0"):
+            self.draw_cont = "%s d\n" % dashes + self.draw_cont
+
+        if closePath:
+            self.draw_cont += "h\n"
+            self.last_point = None
+
+        if color is not None:
+            self.draw_cont += color_str
+
+        if fill is not None:
+            self.draw_cont += fill_str
+            if color is not None:
+                if not even_odd:
+                    self.draw_cont += "B\n"
+                else:
+                    self.draw_cont += "B*\n"
+            else:
+                if not even_odd:
+                    self.draw_cont += "f\n"
+                else:
+                    self.draw_cont += "f*\n"
+        else:
+            self.draw_cont += "S\n"
+
+        self.draw_cont += emc
+        if CheckMorph(morph):
+            m1 = Matrix(
+                1, 0, 0, 1, morph[0].x + self.x, self.height - morph[0].y - self.y
+            )
+            mat = ~m1 * morph[1] * m1
+            self.draw_cont = _format_g(JM_TUPLE(mat)) + " cm\n" + self.draw_cont
+
+        self.totalcont += "\nq\n" + self.draw_cont + "Q\n"
+        self.draw_cont = ""
+        self.last_point = None
+        return
+
+    def commit(self, overlay: bool = True) -> None:
+        """Update the page's /Contents object with Shape data.
+
+        The argument controls whether data appear in foreground (default)
+        or background.
+        """
+        CheckParent(self.page)  # doc may have died meanwhile
+        self.totalcont += self.text_cont
+        self.totalcont = self.totalcont.encode()
+
+        if self.totalcont:
+            if overlay:
+                self.page.wrap_contents()  # ensure a balanced graphics state
+            # make /Contents object with dummy stream
+            xref = TOOLS._insert_contents(self.page, b" ", overlay)
+            # update it with potential compression
+            self.doc.update_stream(xref, self.totalcont)
+
+        self.last_point = None  # clean up ...
+        self.rect = None  #
+        self.draw_cont = ""  # for potential ...
+        self.text_cont = ""  # ...
+        self.totalcont = ""  # re-use
+
+
 class Story:
 
     def __init__( self, html='', user_css=None, em=12, archive=None):
@@ -11663,10 +15949,13 @@
             function( position2)
         mupdf.fz_story_positions( self.this, function2)
 
-    def place( self, where):
+    def place( self, where, flags=0):
+        '''
+        Wrapper for fz_place_story_flags().
+        '''
         where = JM_rect_from_py( where)
         filled = mupdf.FzRect()
-        more = mupdf.fz_place_story( self.this, where, filled)
+        more = mupdf.fz_place_story_flags( self.this, where, filled, flags)
         return more, JM_py_from_rect( filled)
 
     def reset( self):
@@ -11783,7 +16072,9 @@
         `big_enough`:
             `True` if the fit succeeded.
         `filled`:
-            From the last call to `Story.place()`.
+            Tuple (x0, y0, x1, y1) from the last call to `Story.place()`. This
+            will be wider than .rect if any single word (which we never split)
+            was too wide for .rect.
         `more`:
             `False` if the fit succeeded.
         `numcalls`:
@@ -11791,7 +16082,7 @@
         `parameter`:
             The successful parameter value, or the largest failing value.
         `rect`:
-            The rect created from `parameter`.
+            The pumupdf.Rect created from `parameter`.
         '''
         def __init__(self, big_enough=None, filled=None, more=None, numcalls=None, parameter=None, rect=None):
             self.big_enough = big_enough
@@ -11811,7 +16102,7 @@
                     f' rect={self.rect}'
                     )
 
-    def fit(self, fn, pmin=None, pmax=None, delta=0.001, verbose=False):
+    def fit(self, fn, pmin=None, pmax=None, delta=0.001, verbose=False, flags=0):
         '''
         Finds optimal rect that contains the story `self`.
         
@@ -11838,6 +16129,9 @@
             Maximum error in returned `parameter`.
         :arg verbose:
             If true we output diagnostics.
+        :arg flags:
+            Passed to mupdf.fz_place_story_flags(). e.g.
+            zero or `mupdf.FZ_PLACE_STORY_FLAG_NO_OVERFLOW`.
         '''
         def log(text):
             assert verbose
@@ -11893,7 +16187,7 @@
                 if verbose:
                     log(f'update(): not calling self.place() because rect is empty.')
             else:
-                more, filled = self.place(rect)
+                more, filled = self.place(rect, flags)
                 state.numcalls += 1
                 big_enough = not more
                 result = Story.FitResult(
@@ -11962,12 +16256,12 @@
             parameter = (state.pmin + state.pmax) / 2
             update(parameter)
 
-    def fit_scale(self, rect, scale_min=0, scale_max=None, delta=0.001, verbose=False):
+    def fit_scale(self, rect, scale_min=0, scale_max=None, delta=0.001, verbose=False, flags=0):
         '''
         Finds smallest value `scale` in range `scale_min..scale_max` where
         `scale * rect` is large enough to contain the story `self`.
 
-        Returns a `Story.FitResult` instance.
+        Returns a `Story.FitResult` instance with `.parameter` set to `scale`.
 
         :arg width:
             width of rect.
@@ -11982,13 +16276,15 @@
             Maximum error in returned scale.
         :arg verbose:
             If true we output diagnostics.
+        :arg flags:
+            Passed to Story.place().
         '''
         x0, y0, x1, y1 = rect
         width = x1 - x0
         height = y1 - y0
         def fn(scale):
             return Rect(x0, y0, x0 + scale*width, y0 + scale*height)
-        return self.fit(fn, scale_min, scale_max, delta, verbose)
+        return self.fit(fn, scale_min, scale_max, delta, verbose, flags)
 
     def fit_height(self, width, height_min=0, height_max=None, origin=(0, 0), delta=0.001, verbose=False):
         '''
@@ -12315,6 +16611,10 @@
                             and not mupdf.fz_is_infinite_rect(tp_rect)
                             ):
                         continue
+
+                    if buflen == 0 and ch.m_internal.c == 0x200d:
+                        # ZERO WIDTH JOINER cannot start a word
+                        continue
                     word_delimiter = JM_is_word_delimiter(ch.m_internal.c, delimiters)
                     this_char_rtl = JM_is_rtl_char(ch.m_internal.c)
                     if word_delimiter or this_char_rtl != last_char_rtl:
@@ -12515,6 +16815,232 @@
         text = " ".join(words)
         return text
 
+    def fill_textbox(
+            writer: 'TextWriter',
+            rect: rect_like,
+            text: typing.Union[str, list],
+            pos: point_like = None,
+            font: typing.Optional[Font] = None,
+            fontsize: float = 11,
+            lineheight: OptFloat = None,
+            align: int = 0,
+            warn: bool = None,
+            right_to_left: bool = False,
+            small_caps: bool = False,
+            ) -> tuple:
+        """Fill a rectangle with text.
+
+        Args:
+            writer: pymupdf.TextWriter object (= "self")
+            rect: rect-like to receive the text.
+            text: string or list/tuple of strings.
+            pos: point-like start position of first word.
+            font: pymupdf.Font object (default pymupdf.Font('helv')).
+            fontsize: the fontsize.
+            lineheight: overwrite the font property
+            align: (int) 0 = left, 1 = center, 2 = right, 3 = justify
+            warn: (bool) text overflow action: none, warn, or exception
+            right_to_left: (bool) indicate right-to-left language.
+        """
+        rect = Rect(rect)
+        if rect.is_empty:
+            raise ValueError("fill rect must not empty.")
+        if type(font) is not Font:
+            font = Font("helv")
+
+        def textlen(x):
+            """Return length of a string."""
+            return font.text_length(
+                x, fontsize=fontsize, small_caps=small_caps
+            )  # abbreviation
+
+        def char_lengths(x):
+            """Return list of single character lengths for a string."""
+            return font.char_lengths(x, fontsize=fontsize, small_caps=small_caps)
+
+        def append_this(pos, text):
+            ret = writer.append(
+                    pos, text, font=font, fontsize=fontsize, small_caps=small_caps
+                    )
+            return ret
+
+        tolerance = fontsize * 0.2  # extra distance to left border
+        space_len = textlen(" ")
+        std_width = rect.width - tolerance
+        std_start = rect.x0 + tolerance
+
+        def norm_words(width, words):
+            """Cut any word in pieces no longer than 'width'."""
+            nwords = []
+            word_lengths = []
+            for w in words:
+                wl_lst = char_lengths(w)
+                wl = sum(wl_lst)
+                if wl <= width:  # nothing to do - copy over
+                    nwords.append(w)
+                    word_lengths.append(wl)
+                    continue
+
+                # word longer than rect width - split it in parts
+                n = len(wl_lst)
+                while n > 0:
+                    wl = sum(wl_lst[:n])
+                    if wl <= width:
+                        nwords.append(w[:n])
+                        word_lengths.append(wl)
+                        w = w[n:]
+                        wl_lst = wl_lst[n:]
+                        n = len(wl_lst)
+                    else:
+                        n -= 1
+            return nwords, word_lengths
+
+        def output_justify(start, line):
+            """Justified output of a line."""
+            # ignore leading / trailing / multiple spaces
+            words = [w for w in line.split(" ") if w != ""]
+            nwords = len(words)
+            if nwords == 0:
+                return
+            if nwords == 1:  # single word cannot be justified
+                append_this(start, words[0])
+                return
+            tl = sum([textlen(w) for w in words])  # total word lengths
+            gaps = nwords - 1  # number of word gaps
+            gapl = (std_width - tl) / gaps  # width of each gap
+            for w in words:
+                _, lp = append_this(start, w)  # output one word
+                start.x = lp.x + gapl  # next start at word end plus gap
+            return
+
+        asc = font.ascender
+        dsc = font.descender
+        if not lineheight:
+            if asc - dsc <= 1:
+                lheight = 1.2
+            else:
+                lheight = asc - dsc
+        else:
+            lheight = lineheight
+
+        LINEHEIGHT = fontsize * lheight  # effective line height
+        width = std_width  # available horizontal space
+
+        # starting point of text
+        if pos is not None:
+            pos = Point(pos)
+        else:  # default is just below rect top-left
+            pos = rect.tl + (tolerance, fontsize * asc)
+        if pos not in rect:
+            raise ValueError("Text must start in rectangle.")
+
+        # calculate displacement factor for alignment
+        if align == TEXT_ALIGN_CENTER:
+            factor = 0.5
+        elif align == TEXT_ALIGN_RIGHT:
+            factor = 1.0
+        else:
+            factor = 0
+
+        # split in lines if just a string was given
+        if type(text) is str:
+            textlines = text.splitlines()
+        else:
+            textlines = []
+            for line in text:
+                textlines.extend(line.splitlines())
+
+        max_lines = int((rect.y1 - pos.y) / LINEHEIGHT) + 1
+
+        new_lines = []  # the final list of textbox lines
+        no_justify = []  # no justify for these line numbers
+        for i, line in enumerate(textlines):
+            if line in ("", " "):
+                new_lines.append((line, space_len))
+                width = rect.width - tolerance
+                no_justify.append((len(new_lines) - 1))
+                continue
+            if i == 0:
+                width = rect.x1 - pos.x
+            else:
+                width = rect.width - tolerance
+
+            if right_to_left:  # reverses Arabic / Hebrew text front to back
+                line = writer.clean_rtl(line)
+            tl = textlen(line)
+            if tl <= width:  # line short enough
+                new_lines.append((line, tl))
+                no_justify.append((len(new_lines) - 1))
+                continue
+
+            # we need to split the line in fitting parts
+            words = line.split(" ")  # the words in the line
+
+            # cut in parts any words that are longer than rect width
+            words, word_lengths = norm_words(width, words)
+
+            n = len(words)
+            while True:
+                line0 = " ".join(words[:n])
+                wl = sum(word_lengths[:n]) + space_len * (n - 1)
+                if wl <= width:
+                    new_lines.append((line0, wl))
+                    words = words[n:]
+                    word_lengths = word_lengths[n:]
+                    n = len(words)
+                    line0 = None
+                else:
+                    n -= 1
+
+                if len(words) == 0:
+                    break
+                assert n
+
+        # -------------------------------------------------------------------------
+        # List of lines created. Each item is (text, tl), where 'tl' is the PDF
+        # output length (float) and 'text' is the text. Except for justified text,
+        # this is output-ready.
+        # -------------------------------------------------------------------------
+        nlines = len(new_lines)
+        if nlines > max_lines:
+            msg = "Only fitting %i of %i lines." % (max_lines, nlines)
+            if warn is None:
+                pass
+            elif warn:
+                message("Warning: " + msg)
+            else:
+                raise ValueError(msg)
+
+        start = Point()
+        no_justify += [len(new_lines) - 1]  # no justifying of last line
+        for i in range(max_lines):
+            try:
+                line, tl = new_lines.pop(0)
+            except IndexError:
+                if g_exceptions_verbose >= 2:   exception_info()
+                break
+
+            if right_to_left:  # Arabic, Hebrew
+                line = "".join(reversed(line))
+
+            if i == 0:  # may have different start for first line
+                start = pos
+
+            if align == TEXT_ALIGN_JUSTIFY and i not in no_justify and tl < std_width:
+                output_justify(start, line)
+                start.x = std_start
+                start.y += LINEHEIGHT
+                continue
+
+            if i > 0 or pos.x == std_start:  # left, center, right alignments
+                start.x += (width - tl) * factor
+
+            append_this(start, line)
+            start.x = std_start
+            start.y += LINEHEIGHT
+
+        return new_lines  # return non-written lines
+
     def write_text(self, page, color=None, opacity=-1, overlay=1, morph=None, matrix=None, render_mode=0, oc=0):
         """Write the text to a PDF page having the TextWriter's page size.
 
@@ -12735,6 +17261,16 @@
         """Check if x is in the rectangle."""
         return self.__contains__(x)
 
+    def get_area(self, *args) -> float:
+        """Calculate area of rectangle.\nparameter is one of 'px' (default), 'in', 'cm', or 'mm'."""
+        if args:
+            unit = args[0]
+        else:
+            unit = "px"
+        u = {"px": (1, 1), "in": (1.0, 72.0), "cm": (2.54, 72.0), "mm": (25.4, 72.0)}
+        f = (u[unit][0] / u[unit][1]) ** 2
+        return f * self.width * self.height
+
     def include_point(self, p):
         """Extend rectangle to include point p."""
         rect = self.rect.include_point(p)
@@ -20924,6 +25460,82 @@
     return _wxcolors
 
 
+def _mupdf_devel(make_links=True):
+    '''
+    Allows PyMuPDF installation to be used to compile and link programmes that
+    use the MuPDF C/C++ API.
+    
+    Args:
+        make_links:
+            If true, then on non-windows we also create softlinks to any shared
+            libraries that are supplied with a version suffix; this allows them
+            to be used in a link command.
+
+            For example we create links such as:
+
+            site-packages/pymupdf/
+                libmupdf.so -> libmupdf.so.26.7
+                libmupdfcpp.so -> libmupdfcpp.so.26.7
+    
+    Returns: (mupdf_include, mupdf_lib).
+        mupdf_include:
+            Path of MuPDF include directory within PyMuPDF install.
+        mupdf_lib
+            Path of MuPDF library directory within PyMuPDF install.
+    '''
+    import platform
+    
+    log(f'{mupdf_version=}')
+    
+    p = os.path.normpath(f'{__file__}/..')
+
+    mupdf_include = f'{p}/mupdf-devel/include'
+    
+    if platform.system() == 'Windows':
+        # Separate .lib files are used at build time.
+        mupdf_lib = f'{p}/mupdf-devel/lib'
+    else:
+        # .so files are used for both buildtime and runtime linking.
+        mupdf_lib = p
+    log(f'Within installed PyMuPDF:')
+    log(f'    {mupdf_include=}')
+    log(f'    {mupdf_lib=}')
+
+    assert os.path.isdir(mupdf_include), f'Not a directory: {mupdf_include=}.'
+    assert os.path.isdir(mupdf_lib), f'Not a directory: {mupdf_lib=}.'
+
+    if platform.system() != 'Windows' and make_links:
+        # Make symbolic links within the installed pymupdf module so
+        # that ld can find libmupdf.so etc. This is a bit of a hack, but
+        # necessary because wheels cannot contain symbolic links.
+        #
+        # For example we create `libmupdf.so -> libmupdf.so.24.8`.
+        #
+        # We are careful to only create symlinks for the expected MuPDF
+        # version, in case old .so files from a previous install are still
+        # in place.
+        #
+        log(f'Creating symlinks in {mupdf_lib=} for MuPDF-{mupdf_version} .so files.')
+        regex_suffix = mupdf_version.split('.')[1:3]
+        regex_suffix = '[.]'.join(regex_suffix)
+        mupdf_lib_regex = f'^(lib[^.]+[.]so)[.]{regex_suffix}$'
+        log(f'{mupdf_lib_regex=}.')
+        for leaf in os.listdir(mupdf_lib):
+            m = re.match(mupdf_lib_regex, leaf)
+            if m:
+                pfrom = f'{mupdf_lib}/{m.group(1)}'
+                # os.path.exists() can return false if softlink exists
+                # but points to non-existent file, so we also use
+                # `os.path.islink()`.
+                if os.path.islink(pfrom) or os.path.exists(pfrom):
+                    log(f'Removing existing link {pfrom=}.')
+                    os.remove(pfrom)
+                log(f'Creating symlink: {pfrom} -> {leaf}')
+                os.symlink(leaf, pfrom)
+    
+    return mupdf_include, mupdf_lib
+
+
 # We cannot import utils earlier because it imports this .py file itself and
 # uses some pymupdf.* types in function typing.
 #
@@ -20938,83 +25550,9 @@
 recover_quad                = utils.recover_quad
 recover_span_quad           = utils.recover_span_quad
 
-Annot.get_text              = utils.get_text
-Annot.get_textbox           = utils.get_textbox
-
-Document._do_links          = utils.do_links
-Document._do_widgets        = utils.do_widgets
-Document.del_toc_item       = utils.del_toc_item
-Document.get_char_widths    = utils.get_char_widths
-Document.get_oc             = utils.get_oc
-Document.get_ocmd           = utils.get_ocmd
-Document.get_page_labels    = utils.get_page_labels
-Document.get_page_numbers   = utils.get_page_numbers
-Document.get_page_pixmap    = utils.get_page_pixmap
-Document.get_page_text      = utils.get_page_text
-Document.get_toc            = utils.get_toc
-Document.has_annots         = utils.has_annots
-Document.has_links          = utils.has_links
-Document.insert_page        = utils.insert_page
-Document.new_page           = utils.new_page
-Document.scrub              = utils.scrub
-Document.search_page_for    = utils.search_page_for
-Document.set_metadata       = utils.set_metadata
-Document.set_oc             = utils.set_oc
-Document.set_ocmd           = utils.set_ocmd
-Document.set_page_labels    = utils.set_page_labels
-Document.set_toc            = utils.set_toc
-Document.set_toc_item       = utils.set_toc_item
-Document.subset_fonts       = utils.subset_fonts
-Document.tobytes            = Document.write
-Document.xref_copy          = utils.xref_copy
-
-IRect.get_area              = utils.get_area
-
-Page.apply_redactions       = utils.apply_redactions
-Page.delete_image           = utils.delete_image
-Page.delete_widget          = utils.delete_widget
-Page.draw_bezier            = utils.draw_bezier
-Page.draw_circle            = utils.draw_circle
-Page.draw_curve             = utils.draw_curve
-Page.draw_line              = utils.draw_line
-Page.draw_oval              = utils.draw_oval
-Page.draw_polyline          = utils.draw_polyline
-Page.draw_quad              = utils.draw_quad
-Page.draw_rect              = utils.draw_rect
-Page.draw_sector            = utils.draw_sector
-Page.draw_squiggle          = utils.draw_squiggle
-Page.draw_zigzag            = utils.draw_zigzag
-Page.get_image_info         = utils.get_image_info
-Page.get_image_rects        = utils.get_image_rects
-Page.get_label              = utils.get_label
-Page.get_links              = utils.get_links
-Page.get_pixmap             = utils.get_pixmap
-Page.get_text               = utils.get_text
-Page.get_text_blocks        = utils.get_text_blocks
-Page.get_text_selection     = utils.get_text_selection
-Page.get_text_words         = utils.get_text_words
-Page.get_textbox            = utils.get_textbox
-Page.get_textpage_ocr       = utils.get_textpage_ocr
-Page.insert_image           = utils.insert_image
-Page.insert_link            = utils.insert_link
-Page.insert_text            = utils.insert_text
-Page.insert_textbox         = utils.insert_textbox
-Page.insert_htmlbox         = utils.insert_htmlbox
-Page.new_shape              = lambda x: utils.Shape(x)
-Page.replace_image          = utils.replace_image
-Page.search_for             = utils.search_for
-Page.show_pdf_page          = utils.show_pdf_page
-Page.update_link            = utils.update_link
-Page.write_text             = utils.write_text
-Shape                       = utils.Shape
 from .table import find_tables
-
 Page.find_tables = find_tables
 
-Rect.get_area               = utils.get_area
-
-TextWriter.fill_textbox     = utils.fill_textbox
-
 
 class FitzDeprecation(DeprecationWarning):
     pass
@@ -21284,19 +25822,19 @@
     _alias( Rect, 'is_infinite')
     _alias( TextWriter, 'fill_textbox')
     _alias( TextWriter, 'write_text')
-    _alias( utils.Shape, 'draw_bezier')
-    _alias( utils.Shape, 'draw_circle')
-    _alias( utils.Shape, 'draw_curve')
-    _alias( utils.Shape, 'draw_line')
-    _alias( utils.Shape, 'draw_oval')
-    _alias( utils.Shape, 'draw_polyline')
-    _alias( utils.Shape, 'draw_quad')
-    _alias( utils.Shape, 'draw_rect')
-    _alias( utils.Shape, 'draw_sector')
-    _alias( utils.Shape, 'draw_squiggle')
-    _alias( utils.Shape, 'draw_zigzag')
-    _alias( utils.Shape, 'insert_text')
-    _alias( utils.Shape, 'insert_textbox')
+    _alias( Shape, 'draw_bezier')
+    _alias( Shape, 'draw_circle')
+    _alias( Shape, 'draw_curve')
+    _alias( Shape, 'draw_line')
+    _alias( Shape, 'draw_oval')
+    _alias( Shape, 'draw_polyline')
+    _alias( Shape, 'draw_quad')
+    _alias( Shape, 'draw_rect')
+    _alias( Shape, 'draw_sector')
+    _alias( Shape, 'draw_squiggle')
+    _alias( Shape, 'draw_zigzag')
+    _alias( Shape, 'insert_text')
+    _alias( Shape, 'insert_textbox')
 
 if 0:
     restore_aliases()
--- a/src/extra.i	Mon Sep 15 11:43:07 2025 +0200
+++ b/src/extra.i	Sat Oct 11 11:19:58 2025 +0200
@@ -1,5 +1,3 @@
-%module fitz_extra
-
 %pythoncode %{
 # pylint: disable=all
 %}
@@ -3297,7 +3295,11 @@
                 {
                     continue;
                 }
-
+                // prevent Unicode ZWJ 0x200d to start a word
+                if (buflen == 0 && ch.m_internal->c == 0x200d)
+                {
+                    continue;
+                }
                 int word_delimiter = JM_is_word_delimiter(ch.m_internal->c, delimiters);
                 int this_char_rtl = JM_is_rtl_char(ch.m_internal->c);
                 if (word_delimiter || this_char_rtl != last_char_rtl)
--- a/src/utils.py	Mon Sep 15 11:43:07 2025 +0200
+++ b/src/utils.py	Sat Oct 11 11:19:58 2025 +0200
@@ -6,9 +6,7 @@
 # lightweight PDF, XPS, and E-book viewer, renderer and toolkit which is
 # maintained and developed by Artifex Software, Inc. https://artifex.com.
 # ------------------------------------------------------------------------
-import io
 import math
-import os
 import typing
 import weakref
 
@@ -51,428 +49,6 @@
 """
 
 
-def write_text(
-        page: pymupdf.Page,
-        rect=None,
-        writers=None,
-        overlay=True,
-        color=None,
-        opacity=None,
-        keep_proportion=True,
-        rotate=0,
-        oc=0,
-        ) -> None:
-    """Write the text of one or more pymupdf.TextWriter objects.
-
-    Args:
-        rect: target rectangle. If None, the union of the text writers is used.
-        writers: one or more pymupdf.TextWriter objects.
-        overlay: put in foreground or background.
-        keep_proportion: maintain aspect ratio of rectangle sides.
-        rotate: arbitrary rotation angle.
-        oc: the xref of an optional content object
-    """
-    assert isinstance(page, pymupdf.Page)
-    if not writers:
-        raise ValueError("need at least one pymupdf.TextWriter")
-    if type(writers) is pymupdf.TextWriter:
-        if rotate == 0 and rect is None:
-            writers.write_text(page, opacity=opacity, color=color, overlay=overlay)
-            return None
-        else:
-            writers = (writers,)
-    clip = writers[0].text_rect
-    textdoc = pymupdf.Document()
-    tpage = textdoc.new_page(width=page.rect.width, height=page.rect.height)
-    for writer in writers:
-        clip |= writer.text_rect
-        writer.write_text(tpage, opacity=opacity, color=color)
-    if rect is None:
-        rect = clip
-    page.show_pdf_page(
-        rect,
-        textdoc,
-        0,
-        overlay=overlay,
-        keep_proportion=keep_proportion,
-        rotate=rotate,
-        clip=clip,
-        oc=oc,
-    )
-    textdoc = None
-    tpage = None
-
-
-def show_pdf_page(
-        page,
-        rect,
-        docsrc,
-        pno=0,
-        keep_proportion=True,
-        overlay=True,
-        oc=0,
-        rotate=0,
-        clip=None,
-        ) -> int:
-    """Show page number 'pno' of PDF 'docsrc' in rectangle 'rect'.
-
-    Args:
-        rect: (rect-like) where to place the source image
-        docsrc: (document) source PDF
-        pno: (int) source page number
-        keep_proportion: (bool) do not change width-height-ratio
-        overlay: (bool) put in foreground
-        oc: (xref) make visibility dependent on this OCG / OCMD (which must be defined in the target PDF)
-        rotate: (int) degrees (multiple of 90)
-        clip: (rect-like) part of source page rectangle
-    Returns:
-        xref of inserted object (for reuse)
-    """
-    def calc_matrix(sr, tr, keep=True, rotate=0):
-        """Calculate transformation matrix from source to target rect.
-
-        Notes:
-            The product of four matrices in this sequence: (1) translate correct
-            source corner to origin, (2) rotate, (3) scale, (4) translate to
-            target's top-left corner.
-        Args:
-            sr: source rect in PDF (!) coordinate system
-            tr: target rect in PDF coordinate system
-            keep: whether to keep source ratio of width to height
-            rotate: rotation angle in degrees
-        Returns:
-            Transformation matrix.
-        """
-        # calc center point of source rect
-        smp = (sr.tl + sr.br) / 2.0
-        # calc center point of target rect
-        tmp = (tr.tl + tr.br) / 2.0
-
-        # m moves to (0, 0), then rotates
-        m = pymupdf.Matrix(1, 0, 0, 1, -smp.x, -smp.y) * pymupdf.Matrix(rotate)
-
-        sr1 = sr * m  # resulting source rect to calculate scale factors
-
-        fw = tr.width / sr1.width  # scale the width
-        fh = tr.height / sr1.height  # scale the height
-        if keep:
-            fw = fh = min(fw, fh)  # take min if keeping aspect ratio
-
-        m *= pymupdf.Matrix(fw, fh)  # concat scale matrix
-        m *= pymupdf.Matrix(1, 0, 0, 1, tmp.x, tmp.y)  # concat move to target center
-        return pymupdf.JM_TUPLE(m)
-
-    pymupdf.CheckParent(page)
-    doc = page.parent
-
-    if not doc.is_pdf or not docsrc.is_pdf:
-        raise ValueError("is no PDF")
-
-    if rect.is_empty or rect.is_infinite:
-        raise ValueError("rect must be finite and not empty")
-
-    while pno < 0:  # support negative page numbers
-        pno += docsrc.page_count
-    src_page = docsrc[pno]  # load source page
-
-    tar_rect = rect * ~page.transformation_matrix  # target rect in PDF coordinates
-
-    src_rect = src_page.rect if not clip else src_page.rect & clip  # source rect
-    if src_rect.is_empty or src_rect.is_infinite:
-        raise ValueError("clip must be finite and not empty")
-    src_rect = src_rect * ~src_page.transformation_matrix  # ... in PDF coord
-
-    matrix = calc_matrix(src_rect, tar_rect, keep=keep_proportion, rotate=rotate)
-
-    # list of existing /Form /XObjects
-    ilst = [i[1] for i in doc.get_page_xobjects(page.number)]
-    ilst += [i[7] for i in doc.get_page_images(page.number)]
-    ilst += [i[4] for i in doc.get_page_fonts(page.number)]
-
-    # create a name not in that list
-    n = "fzFrm"
-    i = 0
-    _imgname = n + "0"
-    while _imgname in ilst:
-        i += 1
-        _imgname = n + str(i)
-
-    isrc = docsrc._graft_id  # used as key for graftmaps
-    if doc._graft_id == isrc:
-        raise ValueError("source document must not equal target")
-
-    # retrieve / make pymupdf.Graftmap for source PDF
-    gmap = doc.Graftmaps.get(isrc, None)
-    if gmap is None:
-        gmap = pymupdf.Graftmap(doc)
-        doc.Graftmaps[isrc] = gmap
-
-    # take note of generated xref for automatic reuse
-    pno_id = (isrc, pno)  # id of docsrc[pno]
-    xref = doc.ShownPages.get(pno_id, 0)
-
-    if overlay:
-        page.wrap_contents()  # ensure a balanced graphics state
-    xref = page._show_pdf_page(
-        src_page,
-        overlay=overlay,
-        matrix=matrix,
-        xref=xref,
-        oc=oc,
-        clip=src_rect,
-        graftmap=gmap,
-        _imgname=_imgname,
-    )
-    doc.ShownPages[pno_id] = xref
-
-    return xref
-
-
-def replace_image(page: pymupdf.Page, xref: int, *, filename=None, pixmap=None, stream=None):
-    """Replace the image referred to by xref.
-
-    Replace the image by changing the object definition stored under xref. This
-    will leave the pages appearance instructions intact, so the new image is
-    being displayed with the same bbox, rotation etc.
-    By providing a small fully transparent image, an effect as if the image had
-    been deleted can be achieved.
-    A typical use may include replacing large images by a smaller version,
-    e.g. with a lower resolution or graylevel instead of colored.
-
-    Args:
-        xref: the xref of the image to replace.
-        filename, pixmap, stream: exactly one of these must be provided. The
-            meaning being the same as in Page.insert_image.
-    """
-    doc = page.parent  # the owning document
-    if not doc.xref_is_image(xref):
-        raise ValueError("xref not an image")  # insert new image anywhere in page
-    if bool(filename) + bool(stream) + bool(pixmap) != 1:
-        raise ValueError("Exactly one of filename/stream/pixmap must be given")
-    new_xref = page.insert_image(
-        page.rect, filename=filename, stream=stream, pixmap=pixmap
-    )
-    doc.xref_copy(new_xref, xref)  # copy over new to old
-    last_contents_xref = page.get_contents()[-1]
-    # new image insertion has created a new /Contents source,
-    # which we will set to spaces now
-    doc.update_stream(last_contents_xref, b" ")
-    page._image_info = None  # clear cache of extracted image information
-
-
-def delete_image(page: pymupdf.Page, xref: int):
-    """Delete the image referred to by xef.
-
-    Actually replaces by a small transparent Pixmap using method Page.replace_image.
-
-    Args:
-        xref: xref of the image to delete.
-    """
-    # make a small 100% transparent pixmap (of just any dimension)
-    pix = pymupdf.Pixmap(pymupdf.csGRAY, (0, 0, 1, 1), 1)
-    pix.clear_with()  # clear all samples bytes to 0x00
-    page.replace_image(xref, pixmap=pix)
-
-
-def insert_image(
-        page,
-        rect,
-        *,
-        alpha=-1,
-        filename=None,
-        height=0,
-        keep_proportion=True,
-        mask=None,
-        oc=0,
-        overlay=True,
-        pixmap=None,
-        rotate=0,
-        stream=None,
-        width=0,
-        xref=0,
-        ):
-    """Insert an image for display in a rectangle.
-
-    Args:
-        rect: (rect_like) position of image on the page.
-        alpha: (int, optional) set to 0 if image has no transparency.
-        filename: (str, Path, file object) image filename.
-        height: (int)
-        keep_proportion: (bool) keep width / height ratio (default).
-        mask: (bytes, optional) image consisting of alpha values to use.
-        oc: (int) xref of OCG or OCMD to declare as Optional Content.
-        overlay: (bool) put in foreground (default) or background.
-        pixmap: (pymupdf.Pixmap) use this as image.
-        rotate: (int) rotate by 0, 90, 180 or 270 degrees.
-        stream: (bytes) use this as image.
-        width: (int)
-        xref: (int) use this as image.
-
-    'page' and 'rect' are positional, all other parameters are keywords.
-
-    If 'xref' is given, that image is used. Other input options are ignored.
-    Else, exactly one of pixmap, stream or filename must be given.
-
-    'alpha=0' for non-transparent images improves performance significantly.
-    Affects stream and filename only.
-
-    Optimum transparent insertions are possible by using filename / stream in
-    conjunction with a 'mask' image of alpha values.
-
-    Returns:
-        xref (int) of inserted image. Re-use as argument for multiple insertions.
-    """
-    pymupdf.CheckParent(page)
-    doc = page.parent
-    if not doc.is_pdf:
-        raise ValueError("is no PDF")
-
-    if xref == 0 and (bool(filename) + bool(stream) + bool(pixmap) != 1):
-        raise ValueError("xref=0 needs exactly one of filename, pixmap, stream")
-
-    if filename:
-        if type(filename) is str:
-            pass
-        elif hasattr(filename, "absolute"):
-            filename = str(filename)
-        elif hasattr(filename, "name"):
-            filename = filename.name
-        else:
-            raise ValueError("bad filename")
-
-    if filename and not os.path.exists(filename):
-        raise FileNotFoundError("No such file: '%s'" % filename)
-    elif stream and type(stream) not in (bytes, bytearray, io.BytesIO):
-        raise ValueError("stream must be bytes-like / BytesIO")
-    elif pixmap and type(pixmap) is not pymupdf.Pixmap:
-        raise ValueError("pixmap must be a pymupdf.Pixmap")
-    if mask and not (stream or filename):
-        raise ValueError("mask requires stream or filename")
-    if mask and type(mask) not in (bytes, bytearray, io.BytesIO):
-        raise ValueError("mask must be bytes-like / BytesIO")
-    while rotate < 0:
-        rotate += 360
-    while rotate >= 360:
-        rotate -= 360
-    if rotate not in (0, 90, 180, 270):
-        raise ValueError("bad rotate value")
-
-    r = pymupdf.Rect(rect)
-    if r.is_empty or r.is_infinite:
-        raise ValueError("rect must be finite and not empty")
-    clip = r * ~page.transformation_matrix
-
-    # Create a unique image reference name.
-    ilst = [i[7] for i in doc.get_page_images(page.number)]
-    ilst += [i[1] for i in doc.get_page_xobjects(page.number)]
-    ilst += [i[4] for i in doc.get_page_fonts(page.number)]
-    n = "fzImg"  # 'pymupdf image'
-    i = 0
-    _imgname = n + "0"  # first name candidate
-    while _imgname in ilst:
-        i += 1
-        _imgname = n + str(i)  # try new name
-
-    if overlay:
-        page.wrap_contents()  # ensure a balanced graphics state
-    digests = doc.InsertedImages
-    xref, digests = page._insert_image(
-        filename=filename,
-        pixmap=pixmap,
-        stream=stream,
-        imask=mask,
-        clip=clip,
-        overlay=overlay,
-        oc=oc,
-        xref=xref,
-        rotate=rotate,
-        keep_proportion=keep_proportion,
-        width=width,
-        height=height,
-        alpha=alpha,
-        _imgname=_imgname,
-        digests=digests,
-    )
-    if digests is not None:
-        doc.InsertedImages = digests
-
-    return xref
-
-
-def search_for(
-        page,
-        text,
-        *,
-        clip=None,
-        quads=False,
-        flags=pymupdf.TEXT_DEHYPHENATE
-            | pymupdf.TEXT_PRESERVE_WHITESPACE
-            | pymupdf.TEXT_PRESERVE_LIGATURES
-            | pymupdf.TEXT_MEDIABOX_CLIP
-            ,
-        textpage=None,
-        ) -> list:
-    """Search for a string on a page.
-
-    Args:
-        text: string to be searched for
-        clip: restrict search to this rectangle
-        quads: (bool) return quads instead of rectangles
-        flags: bit switches, default: join hyphened words
-        textpage: a pre-created pymupdf.TextPage
-    Returns:
-        a list of rectangles or quads, each containing one occurrence.
-    """
-    if clip is not None:
-        clip = pymupdf.Rect(clip)
-
-    pymupdf.CheckParent(page)
-    tp = textpage
-    if tp is None:
-        tp = page.get_textpage(clip=clip, flags=flags)  # create pymupdf.TextPage
-    elif getattr(tp, "parent") != page:
-        raise ValueError("not a textpage of this page")
-    rlist = tp.search(text, quads=quads)
-    if textpage is None:
-        del tp
-    return rlist
-
-
-def search_page_for(
-    doc: pymupdf.Document,
-    pno: int,
-    text: str,
-    quads: bool = False,
-    clip: rect_like = None,
-    flags: int = pymupdf.TEXT_DEHYPHENATE
-            | pymupdf.TEXT_PRESERVE_LIGATURES
-            | pymupdf.TEXT_PRESERVE_WHITESPACE
-            | pymupdf.TEXT_MEDIABOX_CLIP
-            ,
-    textpage: pymupdf.TextPage = None,
-) -> list:
-    """Search for a string on a page.
-
-    Args:
-        pno: page number
-        text: string to be searched for
-        clip: restrict search to this rectangle
-        quads: (bool) return quads instead of rectangles
-        flags: bit switches, default: join hyphened words
-        textpage: reuse a prepared textpage
-    Returns:
-        a list of rectangles or quads, each containing an occurrence.
-    """
-
-    return doc[pno].search_for(
-        text,
-        quads=quads,
-        clip=clip,
-        flags=flags,
-        textpage=textpage,
-    )
-
-
 def get_text_blocks(
     page: pymupdf.Page,
     clip: rect_like = None,
@@ -822,81 +398,6 @@
     return tpage
 
 
-def get_image_info(page: pymupdf.Page, hashes: bool = False, xrefs: bool = False) -> list:
-    """Extract image information only from a pymupdf.TextPage.
-
-    Args:
-        hashes: (bool) include MD5 hash for each image.
-        xrefs: (bool) try to find the xref for each image. Sets hashes to true.
-    """
-    doc = page.parent
-    if xrefs and doc.is_pdf:
-        hashes = True
-    if not doc.is_pdf:
-        xrefs = False
-    imginfo = getattr(page, "_image_info", None)
-    if imginfo and not xrefs:
-        return imginfo
-    if not imginfo:
-        tp = page.get_textpage(flags=pymupdf.TEXT_PRESERVE_IMAGES)
-        imginfo = tp.extractIMGINFO(hashes=hashes)
-        del tp
-        if hashes:
-            page._image_info = imginfo
-    if not xrefs or not doc.is_pdf:
-        return imginfo
-    imglist = page.get_images()
-    digests = {}
-    for item in imglist:
-        xref = item[0]
-        pix = pymupdf.Pixmap(doc, xref)
-        digests[pix.digest] = xref
-        del pix
-    for i in range(len(imginfo)):
-        item = imginfo[i]
-        xref = digests.get(item["digest"], 0)
-        item["xref"] = xref
-        imginfo[i] = item
-    return imginfo
-
-
-def get_image_rects(page: pymupdf.Page, name, transform=False) -> list:
-    """Return list of image positions on a page.
-
-    Args:
-        name: (str, list, int) image identification. May be reference name, an
-              item of the page's image list or an xref.
-        transform: (bool) whether to also return the transformation matrix.
-    Returns:
-        A list of pymupdf.Rect objects or tuples of (pymupdf.Rect, pymupdf.Matrix)
-        for all image locations on the page.
-    """
-    if type(name) in (list, tuple):
-        xref = name[0]
-    elif type(name) is int:
-        xref = name
-    else:
-        imglist = [i for i in page.get_images() if i[7] == name]
-        if imglist == []:
-            raise ValueError("bad image name")
-        elif len(imglist) != 1:
-            raise ValueError("multiple image names found")
-        xref = imglist[0][0]
-    pix = pymupdf.Pixmap(page.parent, xref)  # make pixmap of the image to compute MD5
-    digest = pix.digest
-    del pix
-    infos = page.get_image_info(hashes=True)
-    if not transform:
-        bboxes = [pymupdf.Rect(im["bbox"]) for im in infos if im["digest"] == digest]
-    else:
-        bboxes = [
-            (pymupdf.Rect(im["bbox"]), pymupdf.Matrix(im["transform"]))
-            for im in infos
-            if im["digest"] == digest
-        ]
-    return bboxes
-
-
 def get_text(
     page: pymupdf.Page,
     option: str = "text",
@@ -1006,101 +507,6 @@
     return t
 
 
-def get_page_text(
-    doc: pymupdf.Document,
-    pno: int,
-    option: str = "text",
-    clip: rect_like = None,
-    flags: OptInt = None,
-    textpage: pymupdf.TextPage = None,
-    sort: bool = False,
-) -> typing.Any:
-    """Extract a document page's text by page number.
-
-    Notes:
-        Convenience function calling page.get_text().
-    Args:
-        pno: page number
-        option: (str) text, words, blocks, html, dict, json, rawdict, xhtml or xml.
-    Returns:
-        output from page.TextPage().
-    """
-    return doc[pno].get_text(option, clip=clip, flags=flags, sort=sort)
-
-def get_pixmap(
-        page: pymupdf.Page,
-        *,
-        matrix: matrix_like=pymupdf.Identity,
-        dpi=None,
-        colorspace: pymupdf.Colorspace=pymupdf.csRGB,
-        clip: rect_like=None,
-        alpha: bool=False,
-        annots: bool=True,
-        ) -> pymupdf.Pixmap:
-    """Create pixmap of page.
-
-    Keyword args:
-        matrix: Matrix for transformation (default: Identity).
-        dpi: desired dots per inch. If given, matrix is ignored.
-        colorspace: (str/Colorspace) cmyk, rgb, gray - case ignored, default csRGB.
-        clip: (irect-like) restrict rendering to this area.
-        alpha: (bool) whether to include alpha channel
-        annots: (bool) whether to also render annotations
-    """
-    if dpi:
-        zoom = dpi / 72
-        matrix = pymupdf.Matrix(zoom, zoom)
-
-    if type(colorspace) is str:
-        if colorspace.upper() == "GRAY":
-            colorspace = pymupdf.csGRAY
-        elif colorspace.upper() == "CMYK":
-            colorspace = pymupdf.csCMYK
-        else:
-            colorspace = pymupdf.csRGB
-    if colorspace.n not in (1, 3, 4):
-        raise ValueError("unsupported colorspace")
-
-    dl = page.get_displaylist(annots=annots)
-    pix = dl.get_pixmap(matrix=matrix, colorspace=colorspace, alpha=alpha, clip=clip)
-    dl = None
-    if dpi:
-        pix.set_dpi(dpi, dpi)
-    return pix
-
-
-def get_page_pixmap(
-    doc: pymupdf.Document,
-    pno: int,
-    *,
-    matrix: matrix_like = pymupdf.Identity,
-    dpi=None,
-    colorspace: pymupdf.Colorspace = pymupdf.csRGB,
-    clip: rect_like = None,
-    alpha: bool = False,
-    annots: bool = True,
-) -> pymupdf.Pixmap:
-    """Create pixmap of document page by page number.
-
-    Notes:
-        Convenience function calling page.get_pixmap.
-    Args:
-        pno: (int) page number
-        matrix: pymupdf.Matrix for transformation (default: pymupdf.Identity).
-        colorspace: (str,pymupdf.Colorspace) rgb, rgb, gray - case ignored, default csRGB.
-        clip: (irect-like) restrict rendering to this area.
-        alpha: (bool) include alpha channel
-        annots: (bool) also render annotations
-    """
-    return doc[pno].get_pixmap(
-            matrix=matrix,
-            dpi=dpi, colorspace=colorspace,
-            clip=clip,
-            alpha=alpha,
-            annots=annots
-            )
-
-
 def getLinkDict(ln, document=None) -> dict:
     if isinstance(ln, pymupdf.Outline):
         dest = ln.destination(document)
@@ -1160,280 +566,6 @@
     return nl
 
 
-def get_links(page: pymupdf.Page) -> list:
-    """Create a list of all links contained in a PDF page.
-
-    Notes:
-        see PyMuPDF ducmentation for details.
-    """
-
-    pymupdf.CheckParent(page)
-    ln = page.first_link
-    links = []
-    while ln:
-        nl = getLinkDict(ln, page.parent)
-        links.append(nl)
-        ln = ln.next
-    if links != [] and page.parent.is_pdf:
-        linkxrefs = [x for x in
-                #page.annot_xrefs()
-                pymupdf.JM_get_annot_xref_list2(page)
-                if x[1] == pymupdf.PDF_ANNOT_LINK  # pylint: disable=no-member
-                ]
-        if len(linkxrefs) == len(links):
-            for i in range(len(linkxrefs)):
-                links[i]["xref"] = linkxrefs[i][0]
-                links[i]["id"] = linkxrefs[i][2]
-    return links
-
-
-def get_toc(
-    doc: pymupdf.Document,
-    simple: bool = True,
-) -> list:
-    """Create a table of contents.
-
-    Args:
-        simple: a bool to control output. Returns a list, where each entry consists of outline level, title, page number and link destination (if simple = False). For details see PyMuPDF's documentation.
-    """
-    def recurse(olItem, liste, lvl):
-        """Recursively follow the outline item chain and record item information in a list."""
-        while olItem and olItem.this.m_internal:
-            if olItem.title:
-                title = olItem.title
-            else:
-                title = " "
-
-            if not olItem.is_external:
-                if olItem.uri:
-                    if olItem.page == -1:
-                        resolve = doc.resolve_link(olItem.uri)
-                        page = resolve[0] + 1
-                    else:
-                        page = olItem.page + 1
-                else:
-                    page = -1
-            else:
-                page = -1
-
-            if not simple:
-                link = getLinkDict(olItem, doc)
-                liste.append([lvl, title, page, link])
-            else:
-                liste.append([lvl, title, page])
-
-            if olItem.down:
-                liste = recurse(olItem.down, liste, lvl + 1)
-            olItem = olItem.next
-        return liste
-
-    # ensure document is open
-    if doc.is_closed:
-        raise ValueError("document closed")
-    doc.init_doc()
-    olItem = doc.outline
-    if not olItem:
-        return []
-    lvl = 1
-    liste = []
-    toc = recurse(olItem, liste, lvl)
-    if doc.is_pdf and not simple:
-        doc._extend_toc_items(toc)
-    return toc
-
-
-def del_toc_item(
-    doc: pymupdf.Document,
-    idx: int,
-) -> None:
-    """Delete TOC / bookmark item by index."""
-    xref = doc.get_outline_xrefs()[idx]
-    doc._remove_toc_item(xref)
-
-
-def set_toc_item(
-    doc: pymupdf.Document,
-    idx: int,
-    dest_dict: OptDict = None,
-    kind: OptInt = None,
-    pno: OptInt = None,
-    uri: OptStr = None,
-    title: OptStr = None,
-    to: point_like = None,
-    filename: OptStr = None,
-    zoom: float = 0,
-) -> None:
-    """Update TOC item by index.
-
-    It allows changing the item's title and link destination.
-
-    Args:
-        idx:
-            (int) desired index of the TOC list, as created by get_toc.
-        dest_dict:
-            (dict) destination dictionary as created by get_toc(False).
-            Outrules all other parameters. If None, the remaining parameters
-            are used to make a dest dictionary.
-        kind:
-            (int) kind of link (pymupdf.LINK_GOTO, etc.). If None, then only
-            the title will be updated. If pymupdf.LINK_NONE, the TOC item will
-            be deleted.
-        pno:
-            (int) page number (1-based like in get_toc). Required if
-            pymupdf.LINK_GOTO.
-        uri:
-            (str) the URL, required if pymupdf.LINK_URI.
-        title:
-            (str) the new title. No change if None.
-        to:
-            (point-like) destination on the target page. If omitted, (72, 36)
-            will be used as target coordinates.
-        filename:
-            (str) destination filename, required for pymupdf.LINK_GOTOR and
-            pymupdf.LINK_LAUNCH.
-        name:
-            (str) a destination name for pymupdf.LINK_NAMED.
-        zoom:
-            (float) a zoom factor for the target location (pymupdf.LINK_GOTO).
-    """
-    xref = doc.get_outline_xrefs()[idx]
-    page_xref = 0
-    if type(dest_dict) is dict:
-        if dest_dict["kind"] == pymupdf.LINK_GOTO:
-            pno = dest_dict["page"]
-            page_xref = doc.page_xref(pno)
-            page_height = doc.page_cropbox(pno).height
-            to = dest_dict.get('to', pymupdf.Point(72, 36))
-            to.y = page_height - to.y
-            dest_dict["to"] = to
-        action = getDestStr(page_xref, dest_dict)
-        if not action.startswith("/A"):
-            raise ValueError("bad bookmark dest")
-        color = dest_dict.get("color")
-        if color:
-            color = list(map(float, color))
-            if len(color) != 3 or min(color) < 0 or max(color) > 1:
-                raise ValueError("bad color value")
-        bold = dest_dict.get("bold", False)
-        italic = dest_dict.get("italic", False)
-        flags = italic + 2 * bold
-        collapse = dest_dict.get("collapse")
-        return doc._update_toc_item(
-            xref,
-            action=action[2:],
-            title=title,
-            color=color,
-            flags=flags,
-            collapse=collapse,
-        )
-
-    if kind == pymupdf.LINK_NONE:  # delete bookmark item
-        return doc.del_toc_item(idx)
-    if kind is None and title is None:  # treat as no-op
-        return None
-    if kind is None:  # only update title text
-        return doc._update_toc_item(xref, action=None, title=title)
-
-    if kind == pymupdf.LINK_GOTO:
-        if pno is None or pno not in range(1, doc.page_count + 1):
-            raise ValueError("bad page number")
-        page_xref = doc.page_xref(pno - 1)
-        page_height = doc.page_cropbox(pno - 1).height
-        if to is None:
-            to = pymupdf.Point(72, page_height - 36)
-        else:
-            to = pymupdf.Point(to)
-            to.y = page_height - to.y
-
-    ddict = {
-        "kind": kind,
-        "to": to,
-        "uri": uri,
-        "page": pno,
-        "file": filename,
-        "zoom": zoom,
-    }
-    action = getDestStr(page_xref, ddict)
-    if action == "" or not action.startswith("/A"):
-        raise ValueError("bad bookmark dest")
-
-    return doc._update_toc_item(xref, action=action[2:], title=title)
-
-
-def get_area(*args) -> float:
-    """Calculate area of rectangle.\nparameter is one of 'px' (default), 'in', 'cm', or 'mm'."""
-    rect = args[0]
-    if len(args) > 1:
-        unit = args[1]
-    else:
-        unit = "px"
-    u = {"px": (1, 1), "in": (1.0, 72.0), "cm": (2.54, 72.0), "mm": (25.4, 72.0)}
-    f = (u[unit][0] / u[unit][1]) ** 2
-    return f * rect.width * rect.height
-
-
-def set_metadata(doc: pymupdf.Document, m: dict = None) -> None:
-    """Update the PDF /Info object.
-
-    Args:
-        m: a dictionary like doc.metadata.
-    """
-    if not doc.is_pdf:
-        raise ValueError("is no PDF")
-    if doc.is_closed or doc.is_encrypted:
-        raise ValueError("document closed or encrypted")
-    if m is None:
-        m = {}
-    elif type(m) is not dict:
-        raise ValueError("bad metadata")
-    keymap = {
-        "author": "Author",
-        "producer": "Producer",
-        "creator": "Creator",
-        "title": "Title",
-        "format": None,
-        "encryption": None,
-        "creationDate": "CreationDate",
-        "modDate": "ModDate",
-        "subject": "Subject",
-        "keywords": "Keywords",
-        "trapped": "Trapped",
-    }
-    valid_keys = set(keymap.keys())
-    diff_set = set(m.keys()).difference(valid_keys)
-    if diff_set != set():
-        msg = "bad dict key(s): %s" % diff_set
-        raise ValueError(msg)
-
-    t, temp = doc.xref_get_key(-1, "Info")
-    if t != "xref":
-        info_xref = 0
-    else:
-        info_xref = int(temp.replace("0 R", ""))
-
-    if m == {} and info_xref == 0:  # nothing to do
-        return
-
-    if info_xref == 0:  # no prev metadata: get new xref
-        info_xref = doc.get_new_xref()
-        doc.update_object(info_xref, "<<>>")  # fill it with empty object
-        doc.xref_set_key(-1, "Info", "%i 0 R" % info_xref)
-    elif m == {}:  # remove existing metadata
-        doc.xref_set_key(-1, "Info", "null")
-        doc.init_doc()
-        return
-
-    for key, val in [(k, v) for k, v in m.items() if keymap[k] is not None]:
-        pdf_key = keymap[key]
-        if not bool(val) or val in ("none", "null"):
-            val = "null"
-        else:
-            val = pymupdf.get_pdf_str(val)
-        doc.xref_set_key(info_xref, pdf_key, val)
-    doc.init_doc()
-    return
-
-
 def getDestStr(xref: int, ddict: dict) -> str:
     """Calculate the PDF action string.
 
@@ -1492,647 +624,6 @@
     return ""
 
 
-def set_toc(
-    doc: pymupdf.Document,
-    toc: list,
-    collapse: int = 1,
-) -> int:
-    """Create new outline tree (table of contents, TOC).
-
-    Args:
-        toc: (list, tuple) each entry must contain level, title, page and
-            optionally top margin on the page. None or '()' remove the TOC.
-        collapse: (int) collapses entries beyond this level. Zero or None
-            shows all entries unfolded.
-    Returns:
-        the number of inserted items, or the number of removed items respectively.
-    """
-    if doc.is_closed or doc.is_encrypted:
-        raise ValueError("document closed or encrypted")
-    if not doc.is_pdf:
-        raise ValueError("is no PDF")
-    if not toc:  # remove all entries
-        return len(doc._delToC())
-
-    # validity checks --------------------------------------------------------
-    if type(toc) not in (list, tuple):
-        raise ValueError("'toc' must be list or tuple")
-    toclen = len(toc)
-    page_count = doc.page_count
-    t0 = toc[0]
-    if type(t0) not in (list, tuple):
-        raise ValueError("items must be sequences of 3 or 4 items")
-    if t0[0] != 1:
-        raise ValueError("hierarchy level of item 0 must be 1")
-    for i in list(range(toclen - 1)):
-        t1 = toc[i]
-        t2 = toc[i + 1]
-        if not -1 <= t1[2] <= page_count:
-            raise ValueError("row %i: page number out of range" % i)
-        if (type(t2) not in (list, tuple)) or len(t2) not in (3, 4):
-            raise ValueError("bad row %i" % (i + 1))
-        if (type(t2[0]) is not int) or t2[0] < 1:
-            raise ValueError("bad hierarchy level in row %i" % (i + 1))
-        if t2[0] > t1[0] + 1:
-            raise ValueError("bad hierarchy level in row %i" % (i + 1))
-    # no formal errors in toc --------------------------------------------------
-
-    # --------------------------------------------------------------------------
-    # make a list of xref numbers, which we can use for our TOC entries
-    # --------------------------------------------------------------------------
-    old_xrefs = doc._delToC()  # del old outlines, get their xref numbers
-
-    # prepare table of xrefs for new bookmarks
-    old_xrefs = []
-    xref = [0] + old_xrefs
-    xref[0] = doc._getOLRootNumber()  # entry zero is outline root xref number
-    if toclen > len(old_xrefs):  # too few old xrefs?
-        for i in range((toclen - len(old_xrefs))):
-            xref.append(doc.get_new_xref())  # acquire new ones
-
-    lvltab = {0: 0}  # to store last entry per hierarchy level
-
-    # ------------------------------------------------------------------------------
-    # contains new outline objects as strings - first one is the outline root
-    # ------------------------------------------------------------------------------
-    olitems = [{"count": 0, "first": -1, "last": -1, "xref": xref[0]}]
-    # ------------------------------------------------------------------------------
-    # build olitems as a list of PDF-like connected dictionaries
-    # ------------------------------------------------------------------------------
-    for i in range(toclen):
-        o = toc[i]
-        lvl = o[0]  # level
-        title = pymupdf.get_pdf_str(o[1])  # title
-        pno = min(doc.page_count - 1, max(0, o[2] - 1))  # page number
-        page_xref = doc.page_xref(pno)
-        page_height = doc.page_cropbox(pno).height
-        top = pymupdf.Point(72, page_height - 36)
-        dest_dict = {"to": top, "kind": pymupdf.LINK_GOTO}  # fall back target
-        if o[2] < 0:
-            dest_dict["kind"] = pymupdf.LINK_NONE
-        if len(o) > 3:  # some target is specified
-            if type(o[3]) in (int, float):  # convert a number to a point
-                dest_dict["to"] = pymupdf.Point(72, page_height - o[3])
-            else:  # if something else, make sure we have a dict
-                # We make a copy of o[3] to avoid modifying our caller's data.
-                dest_dict = o[3].copy() if type(o[3]) is dict else dest_dict
-                if "to" not in dest_dict:  # target point not in dict?
-                    dest_dict["to"] = top  # put default in
-                else:  # transform target to PDF coordinates
-                    page = doc[pno]
-                    point = pymupdf.Point(dest_dict["to"])
-                    point.y = page.cropbox.height - point.y
-                    point = point * page.rotation_matrix
-                    dest_dict["to"] = (point.x, point.y)
-        d = {}
-        d["first"] = -1
-        d["count"] = 0
-        d["last"] = -1
-        d["prev"] = -1
-        d["next"] = -1
-        d["dest"] = getDestStr(page_xref, dest_dict)
-        d["top"] = dest_dict["to"]
-        d["title"] = title
-        d["parent"] = lvltab[lvl - 1]
-        d["xref"] = xref[i + 1]
-        d["color"] = dest_dict.get("color")
-        d["flags"] = dest_dict.get("italic", 0) + 2 * dest_dict.get("bold", 0)
-        lvltab[lvl] = i + 1
-        parent = olitems[lvltab[lvl - 1]]  # the parent entry
-
-        if (
-            dest_dict.get("collapse") or collapse and lvl > collapse
-        ):  # suppress expansion
-            parent["count"] -= 1  # make /Count negative
-        else:
-            parent["count"] += 1  # positive /Count
-
-        if parent["first"] == -1:
-            parent["first"] = i + 1
-            parent["last"] = i + 1
-        else:
-            d["prev"] = parent["last"]
-            prev = olitems[parent["last"]]
-            prev["next"] = i + 1
-            parent["last"] = i + 1
-        olitems.append(d)
-
-    # ------------------------------------------------------------------------------
-    # now create each outline item as a string and insert it in the PDF
-    # ------------------------------------------------------------------------------
-    for i, ol in enumerate(olitems):
-        txt = "<<"
-        if ol["count"] != 0:
-            txt += "/Count %i" % ol["count"]
-        try:
-            txt += ol["dest"]
-        except Exception:
-            # Verbose in PyMuPDF/tests.
-            if g_exceptions_verbose >= 2:   pymupdf.exception_info()
-            pass
-        try:
-            if ol["first"] > -1:
-                txt += "/First %i 0 R" % xref[ol["first"]]
-        except Exception:
-            if g_exceptions_verbose >= 2:   pymupdf.exception_info()
-            pass
-        try:
-            if ol["last"] > -1:
-                txt += "/Last %i 0 R" % xref[ol["last"]]
-        except Exception:
-            if g_exceptions_verbose >= 2:   pymupdf.exception_info()
-            pass
-        try:
-            if ol["next"] > -1:
-                txt += "/Next %i 0 R" % xref[ol["next"]]
-        except Exception:
-            # Verbose in PyMuPDF/tests.
-            if g_exceptions_verbose >= 2:   pymupdf.exception_info()
-            pass
-        try:
-            if ol["parent"] > -1:
-                txt += "/Parent %i 0 R" % xref[ol["parent"]]
-        except Exception:
-            # Verbose in PyMuPDF/tests.
-            if g_exceptions_verbose >= 2:   pymupdf.exception_info()
-            pass
-        try:
-            if ol["prev"] > -1:
-                txt += "/Prev %i 0 R" % xref[ol["prev"]]
-        except Exception:
-            # Verbose in PyMuPDF/tests.
-            if g_exceptions_verbose >= 2:   pymupdf.exception_info()
-            pass
-        try:
-            txt += "/Title" + ol["title"]
-        except Exception:
-            # Verbose in PyMuPDF/tests.
-            if g_exceptions_verbose >= 2:   pymupdf.exception_info()
-            pass
-
-        if ol.get("color") and len(ol["color"]) == 3:
-            txt += f"/C[ {_format_g(tuple(ol['color']))}]"
-        if ol.get("flags", 0) > 0:
-            txt += "/F %i" % ol["flags"]
-
-        if i == 0:  # special: this is the outline root
-            txt += "/Type/Outlines"  # so add the /Type entry
-        txt += ">>"
-        doc.update_object(xref[i], txt)  # insert the PDF object
-
-    doc.init_doc()
-    return toclen
-
-
-def do_widgets(
-    tar: pymupdf.Document,
-    src: pymupdf.Document,
-    graftmap,
-    from_page: int = -1,
-    to_page: int = -1,
-    start_at: int = -1,
-    join_duplicates=0,
-) -> None:
-    """Insert widgets of copied page range into target PDF.
-
-    Parameter values **must** equal those of method insert_pdf() which
-    must have been previously executed.
-    """
-    if not src.is_form_pdf:  # nothing to do: source PDF has no fields
-        return
-
-    def clean_kid_parents(acro_fields):
-        """ Make sure all kids have correct "Parent" pointers."""
-        for i in range(acro_fields.pdf_array_len()):
-            parent = acro_fields.pdf_array_get(i)
-            kids = parent.pdf_dict_get(pymupdf.PDF_NAME("Kids"))
-            for j in range(kids.pdf_array_len()):
-                kid = kids.pdf_array_get(j)
-                kid.pdf_dict_put(pymupdf.PDF_NAME("Parent"), parent)
-
-    def join_widgets(pdf, acro_fields, xref1, xref2, name):
-        """Called for each pair of widgets having the same name.
-
-        Args:
-            pdf: target MuPDF document
-            acro_fields: object Root/AcroForm/Fields
-            xref1, xref2: widget xrefs having same names
-            name: (str) the name
-
-        Result:
-            Defined or updated widget parent that points to both widgets.
-        """
-
-        def re_target(pdf, acro_fields, xref1, kids1, xref2, kids2):
-            """Merge widget in xref2 into "Kids" list of widget xref1.
-
-            Args:
-                xref1, kids1: target widget and its "Kids" array.
-                xref2, kids2: source wwidget and its "Kids" array (may be empty).
-            """
-            # make indirect objects from widgets
-            w1_ind = mupdf.pdf_new_indirect(pdf, xref1, 0)
-            w2_ind = mupdf.pdf_new_indirect(pdf, xref2, 0)
-            # find source widget in "Fields" array
-            idx = acro_fields.pdf_array_find(w2_ind)
-            acro_fields.pdf_array_delete(idx)
-
-            if not kids2.pdf_is_array():  # source widget has no kids
-                widget = mupdf.pdf_load_object(pdf, xref2)
-
-                # delete name from widget and insert target as parent
-                widget.pdf_dict_del(pymupdf.PDF_NAME("T"))
-                widget.pdf_dict_put(pymupdf.PDF_NAME("Parent"), w1_ind)
-
-                # put in target Kids
-                kids1.pdf_array_push(w2_ind)
-            else:  # copy source kids to target kids
-                for i in range(kids2.pdf_array_len()):
-                    kid = kids2.pdf_array_get(i)
-                    kid.pdf_dict_put(pymupdf.PDF_NAME("Parent"), w1_ind)
-                    kid_ind = mupdf.pdf_new_indirect(pdf, kid.pdf_to_num(), 0)
-                    kids1.pdf_array_push(kid_ind)
-
-        def new_target(pdf, acro_fields, xref1, w1, xref2, w2, name):
-            """Make new "Parent" for two widgets with same name.
-
-            Args:
-                xref1, w1: first widget
-                xref2, w2: second widget
-                name: field name
-
-            Result:
-                Both widgets have no "Kids". We create a new object with the
-                name and a "Kids" array containing the widgets.
-                Original widgets must be removed from AcroForm/Fields.
-            """
-            # make new "Parent" object
-            new = mupdf.pdf_new_dict(pdf, 5)
-            new.pdf_dict_put_text_string(pymupdf.PDF_NAME("T"), name)
-            kids = new.pdf_dict_put_array(pymupdf.PDF_NAME("Kids"), 2)
-            new_obj = mupdf.pdf_add_object(pdf, new)
-            new_obj_xref = new_obj.pdf_to_num()
-            new_ind = mupdf.pdf_new_indirect(pdf, new_obj_xref, 0)
-
-            # copy over some required source widget properties
-            ft = w1.pdf_dict_get(pymupdf.PDF_NAME("FT"))
-            w1.pdf_dict_del(pymupdf.PDF_NAME("FT"))
-            new_obj.pdf_dict_put(pymupdf.PDF_NAME("FT"), ft)
-
-            aa = w1.pdf_dict_get(pymupdf.PDF_NAME("AA"))
-            w1.pdf_dict_del(pymupdf.PDF_NAME("AA"))
-            new_obj.pdf_dict_put(pymupdf.PDF_NAME("AA"), aa)
-
-            # remove name field, insert "Parent" field in source widgets
-            w1.pdf_dict_del(pymupdf.PDF_NAME("T"))
-            w1.pdf_dict_put(pymupdf.PDF_NAME("Parent"), new_ind)
-            w2.pdf_dict_del(pymupdf.PDF_NAME("T"))
-            w2.pdf_dict_put(pymupdf.PDF_NAME("Parent"), new_ind)
-
-            # put source widgets in "kids" array
-            ind1 = mupdf.pdf_new_indirect(pdf, xref1, 0)
-            ind2 = mupdf.pdf_new_indirect(pdf, xref2, 0)
-            kids.pdf_array_push(ind1)
-            kids.pdf_array_push(ind2)
-
-            # remove source widgets from "AcroForm/Fields"
-            idx = acro_fields.pdf_array_find(ind1)
-            acro_fields.pdf_array_delete(idx)
-            idx = acro_fields.pdf_array_find(ind2)
-            acro_fields.pdf_array_delete(idx)
-
-            acro_fields.pdf_array_push(new_ind)
-
-        w1 = mupdf.pdf_load_object(pdf, xref1)
-        w2 = mupdf.pdf_load_object(pdf, xref2)
-        kids1 = w1.pdf_dict_get(pymupdf.PDF_NAME("Kids"))
-        kids2 = w2.pdf_dict_get(pymupdf.PDF_NAME("Kids"))
-
-        # check which widget has a suitable "Kids" array
-        if kids1.pdf_is_array():
-            re_target(pdf, acro_fields, xref1, kids1, xref2, kids2)  # pylint: disable=arguments-out-of-order
-        elif kids2.pdf_is_array():
-            re_target(pdf, acro_fields, xref2, kids2, xref1, kids1)  # pylint: disable=arguments-out-of-order
-        else:
-            new_target(pdf, acro_fields, xref1, w1, xref2, w2, name)  # pylint: disable=arguments-out-of-order
-
-    def get_kids(parent, kids_list):
-        """Return xref list of leaf kids for a parent.
-
-        Call with an empty list.
-        """
-        kids = mupdf.pdf_dict_get(parent, pymupdf.PDF_NAME("Kids"))
-        if not kids.pdf_is_array():
-            return kids_list
-        for i in range(kids.pdf_array_len()):
-            kid = kids.pdf_array_get(i)
-            if mupdf.pdf_is_dict(mupdf.pdf_dict_get(kid, pymupdf.PDF_NAME("Kids"))):
-                kids_list = get_kids(kid, kids_list)
-            else:
-                kids_list.append(kid.pdf_to_num())
-        return kids_list
-
-    def kids_xrefs(widget):
-        """Get the xref of top "Parent" and the list of leaf widgets."""
-        kids_list = []
-        parent = mupdf.pdf_dict_get(widget, pymupdf.PDF_NAME("Parent"))
-        parent_xref = parent.pdf_to_num()
-        if parent_xref == 0:
-            return parent_xref, kids_list
-        kids_list = get_kids(parent, kids_list)
-        return parent_xref, kids_list
-
-    def deduplicate_names(pdf, acro_fields, join_duplicates=False):
-        """Handle any widget name duplicates caused by the merge."""
-        names = {}  # key is a widget name, value a list of widgets having it.
-
-        # extract all names and widgets in "AcroForm/Fields"
-        for i in range(mupdf.pdf_array_len(acro_fields)):
-            wobject = mupdf.pdf_array_get(acro_fields, i)
-            xref = wobject.pdf_to_num()
-
-            # extract widget name and collect widget(s) using it
-            T = mupdf.pdf_dict_get_text_string(wobject, pymupdf.PDF_NAME("T"))
-            xrefs = names.get(T, [])
-            xrefs.append(xref)
-            names[T] = xrefs
-
-        for name, xrefs in names.items():
-            if len(xrefs) < 2:
-                continue
-            xref0, xref1 = xrefs[:2]  # only exactly 2 should occur!
-            if join_duplicates:  # combine fields with equal names
-                join_widgets(pdf, acro_fields, xref0, xref1, name)
-            else:  # make field names unique
-                newname = name + f" [{xref1}]"  # append this to the name
-                wobject = mupdf.pdf_load_object(pdf, xref1)
-                wobject.pdf_dict_put_text_string(pymupdf.PDF_NAME("T"), newname)
-
-        clean_kid_parents(acro_fields)
-
-    def get_acroform(doc):
-        """Retrieve the AcroForm dictionary form a PDF."""
-        pdf = mupdf.pdf_document_from_fz_document(doc)
-        # AcroForm (= central form field info)
-        return mupdf.pdf_dict_getp(mupdf.pdf_trailer(pdf), "Root/AcroForm")
-
-    tarpdf = mupdf.pdf_document_from_fz_document(tar)
-    srcpdf = mupdf.pdf_document_from_fz_document(src)
-
-    if tar.is_form_pdf:
-        # target is a Form PDF, so use it to include source fields
-        acro = get_acroform(tar)
-        # Important arrays in AcroForm
-        acro_fields = acro.pdf_dict_get(pymupdf.PDF_NAME("Fields"))
-        tar_co = acro.pdf_dict_get(pymupdf.PDF_NAME("CO"))
-        if not tar_co.pdf_is_array():
-            tar_co = acro.pdf_dict_put_array(pymupdf.PDF_NAME("CO"), 5)
-    else:
-        # target is no Form PDF, so copy over source AcroForm
-        acro = mupdf.pdf_deep_copy_obj(get_acroform(src))  # make a copy
-
-        # Clear "Fields" and "CO" arrays: will be populated by page fields.
-        # This is required to avoid copying unneeded objects.
-        acro.pdf_dict_del(pymupdf.PDF_NAME("Fields"))
-        acro.pdf_dict_put_array(pymupdf.PDF_NAME("Fields"), 5)
-        acro.pdf_dict_del(pymupdf.PDF_NAME("CO"))
-        acro.pdf_dict_put_array(pymupdf.PDF_NAME("CO"), 5)
-
-        # Enrich AcroForm for copying to target
-        acro_graft = mupdf.pdf_graft_mapped_object(graftmap, acro)
-
-        # Insert AcroForm into target PDF
-        acro_tar = mupdf.pdf_add_object(tarpdf, acro_graft)
-        acro_fields = acro_tar.pdf_dict_get(pymupdf.PDF_NAME("Fields"))
-        tar_co = acro_tar.pdf_dict_get(pymupdf.PDF_NAME("CO"))
-
-        # get its xref and insert it into target catalog
-        tar_xref = acro_tar.pdf_to_num()
-        acro_tar_ind = mupdf.pdf_new_indirect(tarpdf, tar_xref, 0)
-        root = mupdf.pdf_dict_get(mupdf.pdf_trailer(tarpdf), pymupdf.PDF_NAME("Root"))
-        root.pdf_dict_put(pymupdf.PDF_NAME("AcroForm"), acro_tar_ind)
-
-    if from_page <= to_page:
-        src_range = range(from_page, to_page + 1)
-    else:
-        src_range = range(from_page, to_page - 1, -1)
-
-    parents = {}  # information about widget parents
-
-    # remove "P" owning page reference from all widgets of all source pages
-    for i in src_range:
-        src_page = src[i]
-        for xref in [
-            xref
-            for xref, wtype, _ in src_page.annot_xrefs()
-            if wtype == pymupdf.PDF_ANNOT_WIDGET  # pylint: disable=no-member
-        ]:
-            w_obj = mupdf.pdf_load_object(srcpdf, xref)
-            w_obj.pdf_dict_del(pymupdf.PDF_NAME("P"))
-
-            # get the widget's parent structure
-            parent_xref, old_kids = kids_xrefs(w_obj)
-            if parent_xref:
-                parents[parent_xref] = {
-                    "new_xref": 0,
-                    "old_kids": old_kids,
-                    "new_kids": [],
-                }
-    # Copy over Parent widgets first - they are not page-dependent
-    for xref in parents.keys():  # pylint: disable=consider-using-dict-items
-        parent = mupdf.pdf_load_object(srcpdf, xref)
-        parent_graft = mupdf.pdf_graft_mapped_object(graftmap, parent)
-        parent_tar = mupdf.pdf_add_object(tarpdf, parent_graft)
-        kids_xrefs_new = get_kids(parent_tar, [])
-        parent_xref_new = parent_tar.pdf_to_num()
-        parent_ind = mupdf.pdf_new_indirect(tarpdf, parent_xref_new, 0)
-        acro_fields.pdf_array_push(parent_ind)
-        parents[xref]["new_xref"] = parent_xref_new
-        parents[xref]["new_kids"] = kids_xrefs_new
-
-    for i in range(len(src_range)):
-        # read first copied over page in target
-        tar_page = tar[start_at + i]
-
-        # read the original page in the source PDF
-        src_page = src[src_range[i]]
-
-        # now walk through source page widgets and copy over
-        w_xrefs = [  # widget xrefs of the source page
-            xref
-            for xref, wtype, _ in src_page.annot_xrefs()
-            if wtype == pymupdf.PDF_ANNOT_WIDGET  # pylint: disable=no-member
-        ]
-        if not w_xrefs:  # no widgets on this source page
-            continue
-
-        # convert to formal PDF page
-        tar_page_pdf = mupdf.pdf_page_from_fz_page(tar_page)
-
-        # extract annotations array
-        tar_annots = mupdf.pdf_dict_get(tar_page_pdf.obj(), pymupdf.PDF_NAME("Annots"))
-        if not mupdf.pdf_is_array(tar_annots):
-            tar_annots = mupdf.pdf_dict_put_array(
-                tar_page_pdf.obj(), pymupdf.PDF_NAME("Annots"), 5
-            )
-
-        for xref in w_xrefs:
-            w_obj = mupdf.pdf_load_object(srcpdf, xref)
-
-            # check if field takes part in inter-field validations
-            is_aac = mupdf.pdf_is_dict(mupdf.pdf_dict_getp(w_obj, "AA/C"))
-
-            # check if parent of widget already in target
-            parent_xref = mupdf.pdf_to_num(
-                w_obj.pdf_dict_get(pymupdf.PDF_NAME("Parent"))
-            )
-            if parent_xref == 0:  # parent not in target yet
-                try:
-                    w_obj_graft = mupdf.pdf_graft_mapped_object(graftmap, w_obj)
-                except Exception as e:
-                    pymupdf.message_warning(f"cannot copy widget at {xref=}: {e}")
-                    continue
-                w_obj_tar = mupdf.pdf_add_object(tarpdf, w_obj_graft)
-                tar_xref = w_obj_tar.pdf_to_num()
-                w_obj_tar_ind = mupdf.pdf_new_indirect(tarpdf, tar_xref, 0)
-                mupdf.pdf_array_push(tar_annots, w_obj_tar_ind)
-                mupdf.pdf_array_push(acro_fields, w_obj_tar_ind)
-            else:
-                parent = parents[parent_xref]
-                idx = parent["old_kids"].index(xref)  # search for xref in parent
-                tar_xref = parent["new_kids"][idx]
-                w_obj_tar_ind = mupdf.pdf_new_indirect(tarpdf, tar_xref, 0)
-                mupdf.pdf_array_push(tar_annots, w_obj_tar_ind)
-
-            # Into "AcroForm/CO" if a computation field.
-            if is_aac:
-                mupdf.pdf_array_push(tar_co, w_obj_tar_ind)
-
-    deduplicate_names(tarpdf, acro_fields, join_duplicates=join_duplicates)
-
-def do_links(
-    doc1: pymupdf.Document,
-    doc2: pymupdf.Document,
-    from_page: int = -1,
-    to_page: int = -1,
-    start_at: int = -1,
-) -> None:
-    """Insert links contained in copied page range into destination PDF.
-
-    Parameter values **must** equal those of method insert_pdf(), which must
-    have been previously executed.
-    """
-    #pymupdf.log( 'utils.do_links()')
-    # --------------------------------------------------------------------------
-    # internal function to create the actual "/Annots" object string
-    # --------------------------------------------------------------------------
-    def cre_annot(lnk, xref_dst, pno_src, ctm):
-        """Create annotation object string for a passed-in link."""
-
-        r = lnk["from"] * ctm  # rect in PDF coordinates
-        rect = _format_g(tuple(r))
-        if lnk["kind"] == pymupdf.LINK_GOTO:
-            txt = pymupdf.annot_skel["goto1"]  # annot_goto
-            idx = pno_src.index(lnk["page"])
-            p = lnk["to"] * ctm  # target point in PDF coordinates
-            annot = txt(xref_dst[idx], p.x, p.y, lnk["zoom"], rect)
-
-        elif lnk["kind"] == pymupdf.LINK_GOTOR:
-            if lnk["page"] >= 0:
-                txt = pymupdf.annot_skel["gotor1"]  # annot_gotor
-                pnt = lnk.get("to", pymupdf.Point(0, 0))  # destination point
-                if type(pnt) is not pymupdf.Point:
-                    pnt = pymupdf.Point(0, 0)
-                annot = txt(
-                    lnk["page"],
-                    pnt.x,
-                    pnt.y,
-                    lnk["zoom"],
-                    lnk["file"],
-                    lnk["file"],
-                    rect,
-                )
-            else:
-                txt = pymupdf.annot_skel["gotor2"]  # annot_gotor_n
-                to = pymupdf.get_pdf_str(lnk["to"])
-                to = to[1:-1]
-                f = lnk["file"]
-                annot = txt(to, f, rect)
-
-        elif lnk["kind"] == pymupdf.LINK_LAUNCH:
-            txt = pymupdf.annot_skel["launch"]  # annot_launch
-            annot = txt(lnk["file"], lnk["file"], rect)
-
-        elif lnk["kind"] == pymupdf.LINK_URI:
-            txt = pymupdf.annot_skel["uri"]  # annot_uri
-            annot = txt(lnk["uri"], rect)
-
-        else:
-            annot = ""
-
-        return annot
-
-    # --------------------------------------------------------------------------
-
-    # validate & normalize parameters
-    if from_page < 0:
-        fp = 0
-    elif from_page >= doc2.page_count:
-        fp = doc2.page_count - 1
-    else:
-        fp = from_page
-
-    if to_page < 0 or to_page >= doc2.page_count:
-        tp = doc2.page_count - 1
-    else:
-        tp = to_page
-
-    if start_at < 0:
-        raise ValueError("'start_at' must be >= 0")
-    sa = start_at
-
-    incr = 1 if fp <= tp else -1  # page range could be reversed
-
-    # lists of source / destination page numbers
-    pno_src = list(range(fp, tp + incr, incr))
-    pno_dst = [sa + i for i in range(len(pno_src))]
-
-    # lists of source / destination page xrefs
-    xref_src = []
-    xref_dst = []
-    for i in range(len(pno_src)):
-        p_src = pno_src[i]
-        p_dst = pno_dst[i]
-        old_xref = doc2.page_xref(p_src)
-        new_xref = doc1.page_xref(p_dst)
-        xref_src.append(old_xref)
-        xref_dst.append(new_xref)
-
-    # create the links for each copied page in destination PDF
-    for i in range(len(xref_src)):
-        page_src = doc2[pno_src[i]]  # load source page
-        links = page_src.get_links()  # get all its links
-        #pymupdf.log( '{pno_src=}')
-        #pymupdf.log( '{type(page_src)=}')
-        #pymupdf.log( '{page_src=}')
-        #pymupdf.log( '{=i len(links)}')
-        if len(links) == 0:  # no links there
-            page_src = None
-            continue
-        ctm = ~page_src.transformation_matrix  # calc page transformation matrix
-        page_dst = doc1[pno_dst[i]]  # load destination page
-        link_tab = []  # store all link definitions here
-        for l in links:
-            if l["kind"] == pymupdf.LINK_GOTO and (l["page"] not in pno_src):
-                continue  # GOTO link target not in copied pages
-            annot_text = cre_annot(l, xref_dst, pno_src, ctm)
-            if annot_text:
-                link_tab.append(annot_text)
-        if link_tab != []:
-            page_dst._addAnnot_FromString( tuple(link_tab))
-    #pymupdf.log( 'utils.do_links() returning.')
-
-
 def getLinkText(page: pymupdf.Page, lnk: dict) -> str:
     # --------------------------------------------------------------------------
     # define skeletons for /Annots object texts
@@ -2216,754 +707,6 @@
     return annot
 
 
-def delete_widget(page: pymupdf.Page, widget: pymupdf.Widget) -> pymupdf.Widget:
-    """Delete widget from page and return the next one."""
-    pymupdf.CheckParent(page)
-    annot = getattr(widget, "_annot", None)
-    if annot is None:
-        raise ValueError("bad type: widget")
-    nextwidget = widget.next
-    page.delete_annot(annot)
-    widget._annot.parent = None
-    keylist = list(widget.__dict__.keys())
-    for key in keylist:
-        del widget.__dict__[key]
-    return nextwidget
-
-
-def update_link(page: pymupdf.Page, lnk: dict) -> None:
-    """Update a link on the current page."""
-    pymupdf.CheckParent(page)
-    annot = getLinkText(page, lnk)
-    if annot == "":
-        raise ValueError("link kind not supported")
-
-    page.parent.update_object(lnk["xref"], annot, page=page)
-
-
-def insert_link(page: pymupdf.Page, lnk: dict, mark: bool = True) -> None:
-    """Insert a new link for the current page."""
-    pymupdf.CheckParent(page)
-    annot = getLinkText(page, lnk)
-    if annot == "":
-        raise ValueError("link kind not supported")
-    page._addAnnot_FromString((annot,))
-
-
-def insert_textbox(
-    page: pymupdf.Page,
-    rect: rect_like,
-    buffer: typing.Union[str, list],
-    *,
-    fontname: str = "helv",
-    fontfile: OptStr = None,
-    set_simple: int = 0,
-    encoding: int = 0,
-    fontsize: float = 11,
-    lineheight: OptFloat = None,
-    color: OptSeq = None,
-    fill: OptSeq = None,
-    expandtabs: int = 1,
-    align: int = 0,
-    rotate: int = 0,
-    render_mode: int = 0,
-    miter_limit: float = 1,
-    border_width: float = 0.05,
-    morph: OptSeq = None,
-    overlay: bool = True,
-    stroke_opacity: float = 1,
-    fill_opacity: float = 1,
-    oc: int = 0,
-) -> float:
-    """Insert text into a given rectangle.
-
-    Notes:
-        Creates a Shape object, uses its same-named method and commits it.
-    Parameters:
-        rect: (rect-like) area to use for text.
-        buffer: text to be inserted
-        fontname: a Base-14 font, font name or '/name'
-        fontfile: name of a font file
-        fontsize: font size
-        lineheight: overwrite the font property
-        color: RGB color triple
-        expandtabs: handles tabulators with string function
-        align: left, center, right, justified
-        rotate: 0, 90, 180, or 270 degrees
-        morph: morph box with a matrix and a fixpoint
-        overlay: put text in foreground or background
-    Returns:
-        unused or deficit rectangle area (float)
-    """
-    img = page.new_shape()
-    rc = img.insert_textbox(
-        rect,
-        buffer,
-        fontsize=fontsize,
-        lineheight=lineheight,
-        fontname=fontname,
-        fontfile=fontfile,
-        set_simple=set_simple,
-        encoding=encoding,
-        color=color,
-        fill=fill,
-        expandtabs=expandtabs,
-        render_mode=render_mode,
-        miter_limit=miter_limit,
-        border_width=border_width,
-        align=align,
-        rotate=rotate,
-        morph=morph,
-        stroke_opacity=stroke_opacity,
-        fill_opacity=fill_opacity,
-        oc=oc,
-    )
-    if rc >= 0:
-        img.commit(overlay)
-    return rc
-
-
-def insert_text(
-    page: pymupdf.Page,
-    point: point_like,
-    text: typing.Union[str, list],
-    *,
-    fontsize: float = 11,
-    lineheight: OptFloat = None,
-    fontname: str = "helv",
-    fontfile: OptStr = None,
-    set_simple: int = 0,
-    encoding: int = 0,
-    color: OptSeq = None,
-    fill: OptSeq = None,
-    border_width: float = 0.05,
-    miter_limit: float = 1,
-    render_mode: int = 0,
-    rotate: int = 0,
-    morph: OptSeq = None,
-    overlay: bool = True,
-    stroke_opacity: float = 1,
-    fill_opacity: float = 1,
-    oc: int = 0,
-):
-
-    img = page.new_shape()
-    rc = img.insert_text(
-        point,
-        text,
-        fontsize=fontsize,
-        lineheight=lineheight,
-        fontname=fontname,
-        fontfile=fontfile,
-        set_simple=set_simple,
-        encoding=encoding,
-        color=color,
-        fill=fill,
-        border_width=border_width,
-        render_mode=render_mode,
-        miter_limit=miter_limit,
-        rotate=rotate,
-        morph=morph,
-        stroke_opacity=stroke_opacity,
-        fill_opacity=fill_opacity,
-        oc=oc,
-    )
-    if rc >= 0:
-        img.commit(overlay)
-    return rc
-
-
-def insert_htmlbox(
-    page,
-    rect,
-    text,
-    *,
-    css=None,
-    scale_low=0,
-    archive=None,
-    rotate=0,
-    oc=0,
-    opacity=1,
-    overlay=True,
-) -> float:
-    """Insert text with optional HTML tags and stylings into a rectangle.
-
-    Args:
-        rect: (rect-like) rectangle into which the text should be placed.
-        text: (str) text with optional HTML tags and stylings.
-        css: (str) CSS styling commands.
-        scale_low: (float) force-fit content by scaling it down. Must be in
-            range [0, 1]. If 1, no scaling will take place. If 0, arbitrary
-            down-scaling is acceptable. A value of 0.1 would mean that content
-            may be scaled down by at most 90%.
-        archive: Archive object pointing to locations of used fonts or images
-        rotate: (int) rotate the text in the box by a multiple of 90 degrees.
-        oc: (int) the xref of an OCG / OCMD (Optional Content).
-        opacity: (float) set opacity of inserted content.
-        overlay: (bool) put text on top of page content.
-    Returns:
-        A tuple of floats (spare_height, scale).
-        spare_height: -1 if content did not fit, else >= 0. It is the height of the
-               unused (still available) rectangle stripe. Positive only if
-               scale_min = 1 (no down scaling).
-        scale: downscaling factor, 0 < scale <= 1. Set to 0 if spare_height = -1 (no fit).
-    """
-
-    # normalize rotation angle
-    if not rotate % 90 == 0:
-        raise ValueError("bad rotation angle")
-    while rotate < 0:
-        rotate += 360
-    while rotate >= 360:
-        rotate -= 360
-
-    if not 0 <= scale_low <= 1:
-        raise ValueError("'scale_low' must be in [0, 1]")
-
-    if css is None:
-        css = ""
-
-    rect = pymupdf.Rect(rect)
-    if rotate in (90, 270):
-        temp_rect = pymupdf.Rect(0, 0, rect.height, rect.width)
-    else:
-        temp_rect = pymupdf.Rect(0, 0, rect.width, rect.height)
-
-    # use a small border by default
-    mycss = "body {margin:1px;}" + css  # append user CSS
-
-    # either make a story, or accept a given one
-    if isinstance(text, str):  # if a string, convert to a Story
-        story = pymupdf.Story(html=text, user_css=mycss, archive=archive)
-    elif isinstance(text, pymupdf.Story):
-        story = text
-    else:
-        raise ValueError("'text' must be a string or a Story")
-    # ----------------------------------------------------------------
-    # Find a scaling factor that lets our story fit in
-    # ----------------------------------------------------------------
-    scale_max = None if scale_low == 0 else 1 / scale_low
-
-    fit = story.fit_scale(temp_rect, scale_min=1, scale_max=scale_max)
-    if not fit.big_enough:  # there was no fit
-        return (-1, scale_low)
-
-    filled = fit.filled
-    scale = 1 / fit.parameter  # shrink factor
-
-    spare_height = fit.rect.y1 - filled[3]  # unused room at rectangle bottom
-    # Note: due to MuPDF's logic this may be negative even for successful fits.
-    if scale != 1 or spare_height < 0:  # if scaling occurred, set spare_height to 0
-        spare_height = 0
-
-    def rect_function(*args):
-        return fit.rect, fit.rect, pymupdf.Identity
-
-    # draw story on temp PDF page
-    doc = story.write_with_links(rect_function)
-
-    # Insert opacity if requested.
-    # For this, we prepend a command to the /Contents.
-    if 0 <= opacity < 1:
-        tpage = doc[0]  # load page
-        # generate /ExtGstate for the page
-        alp0 = tpage._set_opacity(CA=opacity, ca=opacity)
-        s = f"/{alp0} gs\n"  # generate graphic state command
-        pymupdf.TOOLS._insert_contents(tpage, s.encode(), 0)
-
-    # put result in target page
-    page.show_pdf_page(rect, doc, 0, rotate=rotate, oc=oc, overlay=overlay)
-
-    # -------------------------------------------------------------------------
-    # re-insert links in target rect (show_pdf_page cannot copy annotations)
-    # -------------------------------------------------------------------------
-    # scaled center point of fit.rect
-    mp1 = (fit.rect.tl + fit.rect.br) / 2 * scale
-
-    # center point of target rect
-    mp2 = (rect.tl + rect.br) / 2
-
-    # compute link positioning matrix:
-    # - move center of scaled-down fit.rect to (0,0)
-    # - rotate
-    # - move (0,0) to center of target rect
-    mat = (
-        pymupdf.Matrix(scale, 0, 0, scale, -mp1.x, -mp1.y)
-        * pymupdf.Matrix(-rotate)
-        * pymupdf.Matrix(1, 0, 0, 1, mp2.x, mp2.y)
-    )
-
-    # copy over links
-    for link in doc[0].get_links():
-        link["from"] *= mat
-        page.insert_link(link)
-
-    return spare_height, scale
-
-
-def new_page(
-    doc: pymupdf.Document,
-    pno: int = -1,
-    width: float = 595,
-    height: float = 842,
-) -> pymupdf.Page:
-    """Create and return a new page object.
-
-    Args:
-        pno: (int) insert before this page. Default: after last page.
-        width: (float) page width in points. Default: 595 (ISO A4 width).
-        height: (float) page height in points. Default 842 (ISO A4 height).
-    Returns:
-        A pymupdf.Page object.
-    """
-    doc._newPage(pno, width=width, height=height)
-    return doc[pno]
-
-
-def insert_page(
-    doc: pymupdf.Document,
-    pno: int,
-    text: typing.Union[str, list, None] = None,
-    fontsize: float = 11,
-    width: float = 595,
-    height: float = 842,
-    fontname: str = "helv",
-    fontfile: OptStr = None,
-    color: OptSeq = (0,),
-) -> int:
-    """Create a new PDF page and insert some text.
-
-    Notes:
-        Function combining pymupdf.Document.new_page() and pymupdf.Page.insert_text().
-        For parameter details see these methods.
-    """
-    page = doc.new_page(pno=pno, width=width, height=height)
-    if not bool(text):
-        return 0
-    rc = page.insert_text(
-        (50, 72),
-        text,
-        fontsize=fontsize,
-        fontname=fontname,
-        fontfile=fontfile,
-        color=color,
-    )
-    return rc
-
-
-def draw_line(
-    page: pymupdf.Page,
-    p1: point_like,
-    p2: point_like,
-    color: OptSeq = (0,),
-    dashes: OptStr = None,
-    width: float = 1,
-    lineCap: int = 0,
-    lineJoin: int = 0,
-    overlay: bool = True,
-    morph: OptSeq = None,
-    stroke_opacity: float = 1,
-    fill_opacity: float = 1,
-    oc=0,
-) -> pymupdf.Point:
-    """Draw a line from point p1 to point p2."""
-    img = page.new_shape()
-    p = img.draw_line(pymupdf.Point(p1), pymupdf.Point(p2))
-    img.finish(
-        color=color,
-        dashes=dashes,
-        width=width,
-        closePath=False,
-        lineCap=lineCap,
-        lineJoin=lineJoin,
-        morph=morph,
-        stroke_opacity=stroke_opacity,
-        fill_opacity=fill_opacity,
-        oc=oc,
-    )
-    img.commit(overlay)
-
-    return p
-
-
-def draw_squiggle(
-    page: pymupdf.Page,
-    p1: point_like,
-    p2: point_like,
-    breadth: float = 2,
-    color: OptSeq = (0,),
-    dashes: OptStr = None,
-    width: float = 1,
-    lineCap: int = 0,
-    lineJoin: int = 0,
-    overlay: bool = True,
-    morph: OptSeq = None,
-    stroke_opacity: float = 1,
-    fill_opacity: float = 1,
-    oc: int = 0,
-) -> pymupdf.Point:
-    """Draw a squiggly line from point p1 to point p2."""
-    img = page.new_shape()
-    p = img.draw_squiggle(pymupdf.Point(p1), pymupdf.Point(p2), breadth=breadth)
-    img.finish(
-        color=color,
-        dashes=dashes,
-        width=width,
-        closePath=False,
-        lineCap=lineCap,
-        lineJoin=lineJoin,
-        morph=morph,
-        stroke_opacity=stroke_opacity,
-        fill_opacity=fill_opacity,
-        oc=oc,
-    )
-    img.commit(overlay)
-
-    return p
-
-
-def draw_zigzag(
-    page: pymupdf.Page,
-    p1: point_like,
-    p2: point_like,
-    breadth: float = 2,
-    color: OptSeq = (0,),
-    dashes: OptStr = None,
-    width: float = 1,
-    lineCap: int = 0,
-    lineJoin: int = 0,
-    overlay: bool = True,
-    morph: OptSeq = None,
-    stroke_opacity: float = 1,
-    fill_opacity: float = 1,
-    oc: int = 0,
-) -> pymupdf.Point:
-    """Draw a zigzag line from point p1 to point p2."""
-    img = page.new_shape()
-    p = img.draw_zigzag(pymupdf.Point(p1), pymupdf.Point(p2), breadth=breadth)
-    img.finish(
-        color=color,
-        dashes=dashes,
-        width=width,
-        closePath=False,
-        lineCap=lineCap,
-        lineJoin=lineJoin,
-        morph=morph,
-        stroke_opacity=stroke_opacity,
-        fill_opacity=fill_opacity,
-        oc=oc,
-    )
-    img.commit(overlay)
-
-    return p
-
-
-def draw_rect(
-        page: pymupdf.Page,
-        rect: rect_like,
-        color: OptSeq = (0,),
-        fill: OptSeq = None,
-        dashes: OptStr = None,
-        width: float = 1,
-        lineCap: int = 0,
-        lineJoin: int = 0,
-        morph: OptSeq = None,
-        overlay: bool = True,
-        stroke_opacity: float = 1,
-        fill_opacity: float = 1,
-        oc: int = 0,
-        radius=None,
-        ) -> pymupdf.Point:
-    '''
-    Draw a rectangle. See Shape class method for details.
-    '''
-    img = page.new_shape()
-    Q = img.draw_rect(pymupdf.Rect(rect), radius=radius)
-    img.finish(
-        color=color,
-        fill=fill,
-        dashes=dashes,
-        width=width,
-        lineCap=lineCap,
-        lineJoin=lineJoin,
-        morph=morph,
-        stroke_opacity=stroke_opacity,
-        fill_opacity=fill_opacity,
-        oc=oc,
-    )
-    img.commit(overlay)
-
-    return Q
-
-
-def draw_quad(
-    page: pymupdf.Page,
-    quad: quad_like,
-    color: OptSeq = (0,),
-    fill: OptSeq = None,
-    dashes: OptStr = None,
-    width: float = 1,
-    lineCap: int = 0,
-    lineJoin: int = 0,
-    morph: OptSeq = None,
-    overlay: bool = True,
-    stroke_opacity: float = 1,
-    fill_opacity: float = 1,
-    oc: int = 0,
-) -> pymupdf.Point:
-    """Draw a quadrilateral."""
-    img = page.new_shape()
-    Q = img.draw_quad(pymupdf.Quad(quad))
-    img.finish(
-        color=color,
-        fill=fill,
-        dashes=dashes,
-        width=width,
-        lineCap=lineCap,
-        lineJoin=lineJoin,
-        morph=morph,
-        stroke_opacity=stroke_opacity,
-        fill_opacity=fill_opacity,
-        oc=oc,
-    )
-    img.commit(overlay)
-
-    return Q
-
-
-def draw_polyline(
-    page: pymupdf.Page,
-    points: list,
-    color: OptSeq = (0,),
-    fill: OptSeq = None,
-    dashes: OptStr = None,
-    width: float = 1,
-    morph: OptSeq = None,
-    lineCap: int = 0,
-    lineJoin: int = 0,
-    overlay: bool = True,
-    closePath: bool = False,
-    stroke_opacity: float = 1,
-    fill_opacity: float = 1,
-    oc: int = 0,
-) -> pymupdf.Point:
-    """Draw multiple connected line segments."""
-    img = page.new_shape()
-    Q = img.draw_polyline(points)
-    img.finish(
-        color=color,
-        fill=fill,
-        dashes=dashes,
-        width=width,
-        lineCap=lineCap,
-        lineJoin=lineJoin,
-        morph=morph,
-        closePath=closePath,
-        stroke_opacity=stroke_opacity,
-        fill_opacity=fill_opacity,
-        oc=oc,
-    )
-    img.commit(overlay)
-
-    return Q
-
-
-def draw_circle(
-    page: pymupdf.Page,
-    center: point_like,
-    radius: float,
-    color: OptSeq = (0,),
-    fill: OptSeq = None,
-    morph: OptSeq = None,
-    dashes: OptStr = None,
-    width: float = 1,
-    lineCap: int = 0,
-    lineJoin: int = 0,
-    overlay: bool = True,
-    stroke_opacity: float = 1,
-    fill_opacity: float = 1,
-    oc: int = 0,
-) -> pymupdf.Point:
-    """Draw a circle given its center and radius."""
-    img = page.new_shape()
-    Q = img.draw_circle(pymupdf.Point(center), radius)
-    img.finish(
-        color=color,
-        fill=fill,
-        dashes=dashes,
-        width=width,
-        lineCap=lineCap,
-        lineJoin=lineJoin,
-        morph=morph,
-        stroke_opacity=stroke_opacity,
-        fill_opacity=fill_opacity,
-        oc=oc,
-    )
-    img.commit(overlay)
-    return Q
-
-
-def draw_oval(
-    page: pymupdf.Page,
-    rect: typing.Union[rect_like, quad_like],
-    color: OptSeq = (0,),
-    fill: OptSeq = None,
-    dashes: OptStr = None,
-    morph: OptSeq = None,
-    width: float = 1,
-    lineCap: int = 0,
-    lineJoin: int = 0,
-    overlay: bool = True,
-    stroke_opacity: float = 1,
-    fill_opacity: float = 1,
-    oc: int = 0,
-) -> pymupdf.Point:
-    """Draw an oval given its containing rectangle or quad."""
-    img = page.new_shape()
-    Q = img.draw_oval(rect)
-    img.finish(
-        color=color,
-        fill=fill,
-        dashes=dashes,
-        width=width,
-        lineCap=lineCap,
-        lineJoin=lineJoin,
-        morph=morph,
-        stroke_opacity=stroke_opacity,
-        fill_opacity=fill_opacity,
-        oc=oc,
-    )
-    img.commit(overlay)
-
-    return Q
-
-
-def draw_curve(
-    page: pymupdf.Page,
-    p1: point_like,
-    p2: point_like,
-    p3: point_like,
-    color: OptSeq = (0,),
-    fill: OptSeq = None,
-    dashes: OptStr = None,
-    width: float = 1,
-    morph: OptSeq = None,
-    closePath: bool = False,
-    lineCap: int = 0,
-    lineJoin: int = 0,
-    overlay: bool = True,
-    stroke_opacity: float = 1,
-    fill_opacity: float = 1,
-    oc: int = 0,
-) -> pymupdf.Point:
-    """Draw a special Bezier curve from p1 to p3, generating control points on lines p1 to p2 and p2 to p3."""
-    img = page.new_shape()
-    Q = img.draw_curve(pymupdf.Point(p1), pymupdf.Point(p2), pymupdf.Point(p3))
-    img.finish(
-        color=color,
-        fill=fill,
-        dashes=dashes,
-        width=width,
-        lineCap=lineCap,
-        lineJoin=lineJoin,
-        morph=morph,
-        closePath=closePath,
-        stroke_opacity=stroke_opacity,
-        fill_opacity=fill_opacity,
-        oc=oc,
-    )
-    img.commit(overlay)
-
-    return Q
-
-
-def draw_bezier(
-    page: pymupdf.Page,
-    p1: point_like,
-    p2: point_like,
-    p3: point_like,
-    p4: point_like,
-    color: OptSeq = (0,),
-    fill: OptSeq = None,
-    dashes: OptStr = None,
-    width: float = 1,
-    morph: OptStr = None,
-    closePath: bool = False,
-    lineCap: int = 0,
-    lineJoin: int = 0,
-    overlay: bool = True,
-    stroke_opacity: float = 1,
-    fill_opacity: float = 1,
-    oc: int = 0,
-) -> pymupdf.Point:
-    """Draw a general cubic Bezier curve from p1 to p4 using control points p2 and p3."""
-    img = page.new_shape()
-    Q = img.draw_bezier(pymupdf.Point(p1), pymupdf.Point(p2), pymupdf.Point(p3), pymupdf.Point(p4))
-    img.finish(
-        color=color,
-        fill=fill,
-        dashes=dashes,
-        width=width,
-        lineCap=lineCap,
-        lineJoin=lineJoin,
-        morph=morph,
-        closePath=closePath,
-        stroke_opacity=stroke_opacity,
-        fill_opacity=fill_opacity,
-        oc=oc,
-    )
-    img.commit(overlay)
-
-    return Q
-
-
-def draw_sector(
-    page: pymupdf.Page,
-    center: point_like,
-    point: point_like,
-    beta: float,
-    color: OptSeq = (0,),
-    fill: OptSeq = None,
-    dashes: OptStr = None,
-    fullSector: bool = True,
-    morph: OptSeq = None,
-    width: float = 1,
-    closePath: bool = False,
-    lineCap: int = 0,
-    lineJoin: int = 0,
-    overlay: bool = True,
-    stroke_opacity: float = 1,
-    fill_opacity: float = 1,
-    oc: int = 0,
-) -> pymupdf.Point:
-    """Draw a circle sector given circle center, one arc end point and the angle of the arc.
-
-    Parameters:
-        center -- center of circle
-        point -- arc end point
-        beta -- angle of arc (degrees)
-        fullSector -- connect arc ends with center
-    """
-    img = page.new_shape()
-    Q = img.draw_sector(pymupdf.Point(center), pymupdf.Point(point), beta, fullSector=fullSector)
-    img.finish(
-        color=color,
-        fill=fill,
-        dashes=dashes,
-        width=width,
-        lineCap=lineCap,
-        lineJoin=lineJoin,
-        morph=morph,
-        closePath=closePath,
-        stroke_opacity=stroke_opacity,
-        fill_opacity=fill_opacity,
-        oc=oc,
-    )
-    img.commit(overlay)
-
-    return Q
-
-
 # ----------------------------------------------------------------------
 # Name:        wx.lib.colourdb.py
 # Purpose:     Adds a bunch of colour names and RGB values to the
@@ -3081,1360 +824,6 @@
     return fontname, ext, stype, asc, dsc
 
 
-def get_char_widths(
-    doc: pymupdf.Document, xref: int, limit: int = 256, idx: int = 0, fontdict: OptDict = None
-) -> list:
-    """Get list of glyph information of a font.
-
-    Notes:
-        Must be provided by its XREF number. If we already dealt with the
-        font, it will be recorded in doc.FontInfos. Otherwise we insert an
-        entry there.
-        Finally we return the glyphs for the font. This is a list of
-        (glyph, width) where glyph is an integer controlling the char
-        appearance, and width is a float controlling the char's spacing:
-        width * fontsize is the actual space.
-        For 'simple' fonts, glyph == ord(char) will usually be true.
-        Exceptions are 'Symbol' and 'ZapfDingbats'. We are providing data for these directly here.
-    """
-    fontinfo = pymupdf.CheckFontInfo(doc, xref)
-    if fontinfo is None:  # not recorded yet: create it
-        if fontdict is None:
-            name, ext, stype, asc, dsc = _get_font_properties(doc, xref)
-            fontdict = {
-                "name": name,
-                "type": stype,
-                "ext": ext,
-                "ascender": asc,
-                "descender": dsc,
-            }
-        else:
-            name = fontdict["name"]
-            ext = fontdict["ext"]
-            stype = fontdict["type"]
-            ordering = fontdict["ordering"]
-            simple = fontdict["simple"]
-
-        if ext == "":
-            raise ValueError("xref is not a font")
-
-        # check for 'simple' fonts
-        if stype in ("Type1", "MMType1", "TrueType"):
-            simple = True
-        else:
-            simple = False
-
-        # check for CJK fonts
-        if name in ("Fangti", "Ming"):
-            ordering = 0
-        elif name in ("Heiti", "Song"):
-            ordering = 1
-        elif name in ("Gothic", "Mincho"):
-            ordering = 2
-        elif name in ("Dotum", "Batang"):
-            ordering = 3
-        else:
-            ordering = -1
-
-        fontdict["simple"] = simple
-
-        if name == "ZapfDingbats":
-            glyphs = pymupdf.zapf_glyphs
-        elif name == "Symbol":
-            glyphs = pymupdf.symbol_glyphs
-        else:
-            glyphs = None
-
-        fontdict["glyphs"] = glyphs
-        fontdict["ordering"] = ordering
-        fontinfo = [xref, fontdict]
-        doc.FontInfos.append(fontinfo)
-    else:
-        fontdict = fontinfo[1]
-        glyphs = fontdict["glyphs"]
-        simple = fontdict["simple"]
-        ordering = fontdict["ordering"]
-
-    if glyphs is None:
-        oldlimit = 0
-    else:
-        oldlimit = len(glyphs)
-
-    mylimit = max(256, limit)
-
-    if mylimit <= oldlimit:
-        return glyphs
-
-    if ordering < 0:  # not a CJK font
-        glyphs = doc._get_char_widths(
-            xref, fontdict["name"], fontdict["ext"], fontdict["ordering"], mylimit, idx
-        )
-    else:  # CJK fonts use char codes and width = 1
-        glyphs = None
-
-    fontdict["glyphs"] = glyphs
-    fontinfo[1] = fontdict
-    pymupdf.UpdateFontInfo(doc, fontinfo)
-
-    return glyphs
-
-
-class Shape:
-    """Create a new shape."""
-
-    @staticmethod
-    def horizontal_angle(C, P):
-        """Return the angle to the horizontal for the connection from C to P.
-        This uses the arcus sine function and resolves its inherent ambiguity by
-        looking up in which quadrant vector S = P - C is located.
-        """
-        S = pymupdf.Point(P - C).unit  # unit vector 'C' -> 'P'
-        alfa = math.asin(abs(S.y))  # absolute angle from horizontal
-        if S.x < 0:  # make arcsin result unique
-            if S.y <= 0:  # bottom-left
-                alfa = -(math.pi - alfa)
-            else:  # top-left
-                alfa = math.pi - alfa
-        else:
-            if S.y >= 0:  # top-right
-                pass
-            else:  # bottom-right
-                alfa = -alfa
-        return alfa
-
-    def __init__(self, page: pymupdf.Page):
-        pymupdf.CheckParent(page)
-        self.page = page
-        self.doc = page.parent
-        if not self.doc.is_pdf:
-            raise ValueError("is no PDF")
-        self.height = page.mediabox_size.y
-        self.width = page.mediabox_size.x
-        self.x = page.cropbox_position.x
-        self.y = page.cropbox_position.y
-
-        self.pctm = page.transformation_matrix  # page transf. matrix
-        self.ipctm = ~self.pctm  # inverted transf. matrix
-
-        self.draw_cont = ""
-        self.text_cont = ""
-        self.totalcont = ""
-        self.last_point = None
-        self.rect = None
-
-    def updateRect(self, x):
-        if self.rect is None:
-            if len(x) == 2:
-                self.rect = pymupdf.Rect(x, x)
-            else:
-                self.rect = pymupdf.Rect(x)
-
-        else:
-            if len(x) == 2:
-                x = pymupdf.Point(x)
-                self.rect.x0 = min(self.rect.x0, x.x)
-                self.rect.y0 = min(self.rect.y0, x.y)
-                self.rect.x1 = max(self.rect.x1, x.x)
-                self.rect.y1 = max(self.rect.y1, x.y)
-            else:
-                x = pymupdf.Rect(x)
-                self.rect.x0 = min(self.rect.x0, x.x0)
-                self.rect.y0 = min(self.rect.y0, x.y0)
-                self.rect.x1 = max(self.rect.x1, x.x1)
-                self.rect.y1 = max(self.rect.y1, x.y1)
-
-    def draw_line(self, p1: point_like, p2: point_like) -> pymupdf.Point:
-        """Draw a line between two points."""
-        p1 = pymupdf.Point(p1)
-        p2 = pymupdf.Point(p2)
-        if not (self.last_point == p1):
-            self.draw_cont += _format_g(pymupdf.JM_TUPLE(p1 * self.ipctm)) + " m\n"
-            self.last_point = p1
-            self.updateRect(p1)
-
-        self.draw_cont += _format_g(pymupdf.JM_TUPLE(p2 * self.ipctm)) + " l\n"
-        self.updateRect(p2)
-        self.last_point = p2
-        return self.last_point
-
-    def draw_polyline(self, points: list) -> pymupdf.Point:
-        """Draw several connected line segments."""
-        for i, p in enumerate(points):
-            if i == 0:
-                if not (self.last_point == pymupdf.Point(p)):
-                    self.draw_cont += _format_g(pymupdf.JM_TUPLE(pymupdf.Point(p) * self.ipctm)) + " m\n"
-                    self.last_point = pymupdf.Point(p)
-            else:
-                self.draw_cont += _format_g(pymupdf.JM_TUPLE(pymupdf.Point(p) * self.ipctm)) + " l\n"
-            self.updateRect(p)
-
-        self.last_point = pymupdf.Point(points[-1])
-        return self.last_point
-
-    def draw_bezier(
-        self,
-        p1: point_like,
-        p2: point_like,
-        p3: point_like,
-        p4: point_like,
-    ) -> pymupdf.Point:
-        """Draw a standard cubic Bezier curve."""
-        p1 = pymupdf.Point(p1)
-        p2 = pymupdf.Point(p2)
-        p3 = pymupdf.Point(p3)
-        p4 = pymupdf.Point(p4)
-        if not (self.last_point == p1):
-            self.draw_cont += _format_g(pymupdf.JM_TUPLE(p1 * self.ipctm)) + " m\n"
-        args = pymupdf.JM_TUPLE(list(p2 * self.ipctm) + list(p3 * self.ipctm) + list(p4 * self.ipctm))
-        self.draw_cont += _format_g(args) + " c\n"
-        self.updateRect(p1)
-        self.updateRect(p2)
-        self.updateRect(p3)
-        self.updateRect(p4)
-        self.last_point = p4
-        return self.last_point
-
-    def draw_oval(self, tetra: typing.Union[quad_like, rect_like]) -> pymupdf.Point:
-        """Draw an ellipse inside a tetrapod."""
-        if len(tetra) != 4:
-            raise ValueError("invalid arg length")
-        if hasattr(tetra[0], "__float__"):
-            q = pymupdf.Rect(tetra).quad
-        else:
-            q = pymupdf.Quad(tetra)
-
-        mt = q.ul + (q.ur - q.ul) * 0.5
-        mr = q.ur + (q.lr - q.ur) * 0.5
-        mb = q.ll + (q.lr - q.ll) * 0.5
-        ml = q.ul + (q.ll - q.ul) * 0.5
-        if not (self.last_point == ml):
-            self.draw_cont += _format_g(pymupdf.JM_TUPLE(ml * self.ipctm)) + " m\n"
-            self.last_point = ml
-        self.draw_curve(ml, q.ll, mb)
-        self.draw_curve(mb, q.lr, mr)
-        self.draw_curve(mr, q.ur, mt)
-        self.draw_curve(mt, q.ul, ml)
-        self.updateRect(q.rect)
-        self.last_point = ml
-        return self.last_point
-
-    def draw_circle(self, center: point_like, radius: float) -> pymupdf.Point:
-        """Draw a circle given its center and radius."""
-        if not radius > pymupdf.EPSILON:
-            raise ValueError("radius must be positive")
-        center = pymupdf.Point(center)
-        p1 = center - (radius, 0)
-        return self.draw_sector(center, p1, 360, fullSector=False)
-
-    def draw_curve(
-        self,
-        p1: point_like,
-        p2: point_like,
-        p3: point_like,
-    ) -> pymupdf.Point:
-        """Draw a curve between points using one control point."""
-        kappa = 0.55228474983
-        p1 = pymupdf.Point(p1)
-        p2 = pymupdf.Point(p2)
-        p3 = pymupdf.Point(p3)
-        k1 = p1 + (p2 - p1) * kappa
-        k2 = p3 + (p2 - p3) * kappa
-        return self.draw_bezier(p1, k1, k2, p3)
-
-    def draw_sector(
-        self,
-        center: point_like,
-        point: point_like,
-        beta: float,
-        fullSector: bool = True,
-    ) -> pymupdf.Point:
-        """Draw a circle sector."""
-        center = pymupdf.Point(center)
-        point = pymupdf.Point(point)
-        l3 = lambda a, b: _format_g((a, b)) + " m\n"
-        l4 = lambda a, b, c, d, e, f: _format_g((a, b, c, d, e, f)) + " c\n"
-        l5 = lambda a, b: _format_g((a, b)) + " l\n"
-        betar = math.radians(-beta)
-        w360 = math.radians(math.copysign(360, betar)) * (-1)
-        w90 = math.radians(math.copysign(90, betar))
-        w45 = w90 / 2
-        while abs(betar) > 2 * math.pi:
-            betar += w360  # bring angle below 360 degrees
-        if not (self.last_point == point):
-            self.draw_cont += l3(*pymupdf.JM_TUPLE(point * self.ipctm))
-            self.last_point = point
-        Q = pymupdf.Point(0, 0)  # just make sure it exists
-        C = center
-        P = point
-        S = P - C  # vector 'center' -> 'point'
-        rad = abs(S)  # circle radius
-
-        if not rad > pymupdf.EPSILON:
-            raise ValueError("radius must be positive")
-
-        alfa = self.horizontal_angle(center, point)
-        while abs(betar) > abs(w90):  # draw 90 degree arcs
-            q1 = C.x + math.cos(alfa + w90) * rad
-            q2 = C.y + math.sin(alfa + w90) * rad
-            Q = pymupdf.Point(q1, q2)  # the arc's end point
-            r1 = C.x + math.cos(alfa + w45) * rad / math.cos(w45)
-            r2 = C.y + math.sin(alfa + w45) * rad / math.cos(w45)
-            R = pymupdf.Point(r1, r2)  # crossing point of tangents
-            kappah = (1 - math.cos(w45)) * 4 / 3 / abs(R - Q)
-            kappa = kappah * abs(P - Q)
-            cp1 = P + (R - P) * kappa  # control point 1
-            cp2 = Q + (R - Q) * kappa  # control point 2
-            self.draw_cont += l4(*pymupdf.JM_TUPLE(
-                list(cp1 * self.ipctm) + list(cp2 * self.ipctm) + list(Q * self.ipctm)
-            ))
-
-            betar -= w90  # reduce param angle by 90 deg
-            alfa += w90  # advance start angle by 90 deg
-            P = Q  # advance to arc end point
-        # draw (remaining) arc
-        if abs(betar) > 1e-3:  # significant degrees left?
-            beta2 = betar / 2
-            q1 = C.x + math.cos(alfa + betar) * rad
-            q2 = C.y + math.sin(alfa + betar) * rad
-            Q = pymupdf.Point(q1, q2)  # the arc's end point
-            r1 = C.x + math.cos(alfa + beta2) * rad / math.cos(beta2)
-            r2 = C.y + math.sin(alfa + beta2) * rad / math.cos(beta2)
-            R = pymupdf.Point(r1, r2)  # crossing point of tangents
-            # kappa height is 4/3 of segment height
-            kappah = (1 - math.cos(beta2)) * 4 / 3 / abs(R - Q)  # kappa height
-            kappa = kappah * abs(P - Q) / (1 - math.cos(betar))
-            cp1 = P + (R - P) * kappa  # control point 1
-            cp2 = Q + (R - Q) * kappa  # control point 2
-            self.draw_cont += l4(*pymupdf.JM_TUPLE(
-                list(cp1 * self.ipctm) + list(cp2 * self.ipctm) + list(Q * self.ipctm)
-            ))
-        if fullSector:
-            self.draw_cont += l3(*pymupdf.JM_TUPLE(point * self.ipctm))
-            self.draw_cont += l5(*pymupdf.JM_TUPLE(center * self.ipctm))
-            self.draw_cont += l5(*pymupdf.JM_TUPLE(Q * self.ipctm))
-        self.last_point = Q
-        return self.last_point
-
-    def draw_rect(self, rect: rect_like, *, radius=None) -> pymupdf.Point:
-        """Draw a rectangle.
-
-        Args:
-            radius: if not None, the rectangle will have rounded corners.
-                This is the radius of the curvature, given as percentage of
-                the rectangle width or height. Valid are values 0 < v <= 0.5.
-                For a sequence of two values, the corners will have different
-                radii. Otherwise, the percentage will be computed from the
-                shorter side. A value of (0.5, 0.5) will draw an ellipse.
-        """
-        r = pymupdf.Rect(rect)
-        if radius is None:  # standard rectangle
-            self.draw_cont += _format_g(pymupdf.JM_TUPLE(
-                list(r.bl * self.ipctm) + [r.width, r.height]
-            )) + " re\n"
-            self.updateRect(r)
-            self.last_point = r.tl
-            return self.last_point
-        # rounded corners requested. This requires 1 or 2 values, each
-        # with 0 < value <= 0.5
-        if hasattr(radius, "__float__"):
-            if radius <= 0 or radius > 0.5:
-                raise ValueError(f"bad radius value {radius}.")
-            d = min(r.width, r.height) * radius
-            px = (d, 0)
-            py = (0, d)
-        elif hasattr(radius, "__len__") and len(radius) == 2:
-            rx, ry = radius
-            px = (rx * r.width, 0)
-            py = (0, ry * r.height)
-            if min(rx, ry) <= 0 or max(rx, ry) > 0.5:
-                raise ValueError(f"bad radius value {radius}.")
-        else:
-            raise ValueError(f"bad radius value {radius}.")
-
-        lp = self.draw_line(r.tl + py, r.bl - py)
-        lp = self.draw_curve(lp, r.bl, r.bl + px)
-
-        lp = self.draw_line(lp, r.br - px)
-        lp = self.draw_curve(lp, r.br, r.br - py)
-
-        lp = self.draw_line(lp, r.tr + py)
-        lp = self.draw_curve(lp, r.tr, r.tr - px)
-
-        lp = self.draw_line(lp, r.tl + px)
-        self.last_point = self.draw_curve(lp, r.tl, r.tl + py)
-
-        self.updateRect(r)
-        return self.last_point
-
-    def draw_quad(self, quad: quad_like) -> pymupdf.Point:
-        """Draw a Quad."""
-        q = pymupdf.Quad(quad)
-        return self.draw_polyline([q.ul, q.ll, q.lr, q.ur, q.ul])
-
-    def draw_zigzag(
-        self,
-        p1: point_like,
-        p2: point_like,
-        breadth: float = 2,
-    ) -> pymupdf.Point:
-        """Draw a zig-zagged line from p1 to p2."""
-        p1 = pymupdf.Point(p1)
-        p2 = pymupdf.Point(p2)
-        S = p2 - p1  # vector start - end
-        rad = abs(S)  # distance of points
-        cnt = 4 * int(round(rad / (4 * breadth), 0))  # always take full phases
-        if cnt < 4:
-            raise ValueError("points too close")
-        mb = rad / cnt  # revised breadth
-        matrix = pymupdf.Matrix(pymupdf.util_hor_matrix(p1, p2))  # normalize line to x-axis
-        i_mat = ~matrix  # get original position
-        points = []  # stores edges
-        for i in range(1, cnt):
-            if i % 4 == 1:  # point "above" connection
-                p = pymupdf.Point(i, -1) * mb
-            elif i % 4 == 3:  # point "below" connection
-                p = pymupdf.Point(i, 1) * mb
-            else:  # ignore others
-                continue
-            points.append(p * i_mat)
-        self.draw_polyline([p1] + points + [p2])  # add start and end points
-        return p2
-
-    def draw_squiggle(
-        self,
-        p1: point_like,
-        p2: point_like,
-        breadth=2,
-    ) -> pymupdf.Point:
-        """Draw a squiggly line from p1 to p2."""
-        p1 = pymupdf.Point(p1)
-        p2 = pymupdf.Point(p2)
-        S = p2 - p1  # vector start - end
-        rad = abs(S)  # distance of points
-        cnt = 4 * int(round(rad / (4 * breadth), 0))  # always take full phases
-        if cnt < 4:
-            raise ValueError("points too close")
-        mb = rad / cnt  # revised breadth
-        matrix = pymupdf.Matrix(pymupdf.util_hor_matrix(p1, p2))  # normalize line to x-axis
-        i_mat = ~matrix  # get original position
-        k = 2.4142135623765633  # y of draw_curve helper point
-
-        points = []  # stores edges
-        for i in range(1, cnt):
-            if i % 4 == 1:  # point "above" connection
-                p = pymupdf.Point(i, -k) * mb
-            elif i % 4 == 3:  # point "below" connection
-                p = pymupdf.Point(i, k) * mb
-            else:  # else on connection line
-                p = pymupdf.Point(i, 0) * mb
-            points.append(p * i_mat)
-
-        points = [p1] + points + [p2]
-        cnt = len(points)
-        i = 0
-        while i + 2 < cnt:
-            self.draw_curve(points[i], points[i + 1], points[i + 2])
-            i += 2
-        return p2
-
-    # ==============================================================================
-    # Shape.insert_text
-    # ==============================================================================
-    def insert_text(
-        self,
-        point: point_like,
-        buffer: typing.Union[str, list],
-        *,
-        fontsize: float = 11,
-        lineheight: OptFloat = None,
-        fontname: str = "helv",
-        fontfile: OptStr = None,
-        set_simple: bool = 0,
-        encoding: int = 0,
-        color: OptSeq = None,
-        fill: OptSeq = None,
-        render_mode: int = 0,
-        border_width: float = 0.05,
-        miter_limit: float = 1,
-        rotate: int = 0,
-        morph: OptSeq = None,
-        stroke_opacity: float = 1,
-        fill_opacity: float = 1,
-        oc: int = 0,
-    ) -> int:
-
-        # ensure 'text' is a list of strings, worth dealing with
-        if not bool(buffer):
-            return 0
-
-        if type(buffer) not in (list, tuple):
-            text = buffer.splitlines()
-        else:
-            text = buffer
-
-        if not len(text) > 0:
-            return 0
-
-        point = pymupdf.Point(point)
-        try:
-            maxcode = max([ord(c) for c in " ".join(text)])
-        except Exception:
-            pymupdf.exception_info()
-            return 0
-
-        # ensure valid 'fontname'
-        fname = fontname
-        if fname.startswith("/"):
-            fname = fname[1:]
-
-        xref = self.page.insert_font(
-            fontname=fname, fontfile=fontfile, encoding=encoding, set_simple=set_simple
-        )
-        fontinfo = pymupdf.CheckFontInfo(self.doc, xref)
-
-        fontdict = fontinfo[1]
-        ordering = fontdict["ordering"]
-        simple = fontdict["simple"]
-        bfname = fontdict["name"]
-        ascender = fontdict["ascender"]
-        descender = fontdict["descender"]
-        if lineheight:
-            lheight = fontsize * lineheight
-        elif ascender - descender <= 1:
-            lheight = fontsize * 1.2
-        else:
-            lheight = fontsize * (ascender - descender)
-
-        if maxcode > 255:
-            glyphs = self.doc.get_char_widths(xref, maxcode + 1)
-        else:
-            glyphs = fontdict["glyphs"]
-
-        tab = []
-        for t in text:
-            if simple and bfname not in ("Symbol", "ZapfDingbats"):
-                g = None
-            else:
-                g = glyphs
-            tab.append(pymupdf.getTJstr(t, g, simple, ordering))
-        text = tab
-
-        color_str = pymupdf.ColorCode(color, "c")
-        fill_str = pymupdf.ColorCode(fill, "f")
-        if not fill and render_mode == 0:  # ensure fill color when 0 Tr
-            fill = color
-            fill_str = pymupdf.ColorCode(color, "f")
-
-        morphing = pymupdf.CheckMorph(morph)
-        rot = rotate
-        if rot % 90 != 0:
-            raise ValueError("bad rotate value")
-
-        while rot < 0:
-            rot += 360
-        rot = rot % 360  # text rotate = 0, 90, 270, 180
-
-        templ1 = lambda a, b, c, d, e, f, g: f"\nq\n{a}{b}BT\n{c}1 0 0 1 {_format_g((d, e))} Tm\n/{f} {_format_g(g)} Tf "
-        templ2 = lambda a: f"TJ\n0 -{_format_g(a)} TD\n"
-        cmp90 = "0 1 -1 0 0 0 cm\n"  # rotates 90 deg counter-clockwise
-        cmm90 = "0 -1 1 0 0 0 cm\n"  # rotates 90 deg clockwise
-        cm180 = "-1 0 0 -1 0 0 cm\n"  # rotates by 180 deg.
-        height = self.height
-        width = self.width
-
-        # setting up for standard rotation directions
-        # case rotate = 0
-        if morphing:
-            m1 = pymupdf.Matrix(1, 0, 0, 1, morph[0].x + self.x, height - morph[0].y - self.y)
-            mat = ~m1 * morph[1] * m1
-            cm = _format_g(pymupdf.JM_TUPLE(mat)) + " cm\n"
-        else:
-            cm = ""
-        top = height - point.y - self.y  # start of 1st char
-        left = point.x + self.x  # start of 1. char
-        space = top  # space available
-        #headroom = point.y + self.y  # distance to page border
-        if rot == 90:
-            left = height - point.y - self.y
-            top = -point.x - self.x
-            cm += cmp90
-            space = width - abs(top)
-            #headroom = point.x + self.x
-
-        elif rot == 270:
-            left = -height + point.y + self.y
-            top = point.x + self.x
-            cm += cmm90
-            space = abs(top)
-            #headroom = width - point.x - self.x
-
-        elif rot == 180:
-            left = -point.x - self.x
-            top = -height + point.y + self.y
-            cm += cm180
-            space = abs(point.y + self.y)
-            #headroom = height - point.y - self.y
-
-        optcont = self.page._get_optional_content(oc)
-        if optcont is not None:
-            bdc = "/OC /%s BDC\n" % optcont
-            emc = "EMC\n"
-        else:
-            bdc = emc = ""
-
-        alpha = self.page._set_opacity(CA=stroke_opacity, ca=fill_opacity)
-        if alpha is None:
-            alpha = ""
-        else:
-            alpha = "/%s gs\n" % alpha
-        nres = templ1(bdc, alpha, cm, left, top, fname, fontsize)
-
-        if render_mode > 0:
-            nres += "%i Tr " % render_mode
-            nres += _format_g(border_width * fontsize) + " w "
-            if miter_limit is not None:
-                nres += _format_g(miter_limit) + " M "
-        if color is not None:
-            nres += color_str
-        if fill is not None:
-            nres += fill_str
-
-        # =========================================================================
-        #   start text insertion
-        # =========================================================================
-        nres += text[0]
-        nlines = 1  # set output line counter
-        if len(text) > 1:
-            nres += templ2(lheight)  # line 1
-        else:
-            nres += 'TJ'
-        for i in range(1, len(text)):
-            if space < lheight:
-                break  # no space left on page
-            if i > 1:
-                nres += "\nT* "
-            nres += text[i] + 'TJ'
-            space -= lheight
-            nlines += 1
-
-        nres += "\nET\n%sQ\n" % emc
-
-        # =========================================================================
-        #   end of text insertion
-        # =========================================================================
-        # update the /Contents object
-        self.text_cont += nres
-        return nlines
-
-    # ==============================================================================
-    # Shape.insert_textbox
-    # ==============================================================================
-    def insert_textbox(
-        self,
-        rect: rect_like,
-        buffer: typing.Union[str, list],
-        *,
-        fontname: OptStr = "helv",
-        fontfile: OptStr = None,
-        fontsize: float = 11,
-        lineheight: OptFloat = None,
-        set_simple: bool = 0,
-        encoding: int = 0,
-        color: OptSeq = None,
-        fill: OptSeq = None,
-        expandtabs: int = 1,
-        border_width: float = 0.05,
-        miter_limit: float = 1,
-        align: int = 0,
-        render_mode: int = 0,
-        rotate: int = 0,
-        morph: OptSeq = None,
-        stroke_opacity: float = 1,
-        fill_opacity: float = 1,
-        oc: int = 0,
-    ) -> float:
-        """Insert text into a given rectangle.
-
-        Args:
-            rect -- the textbox to fill
-            buffer -- text to be inserted
-            fontname -- a Base-14 font, font name or '/name'
-            fontfile -- name of a font file
-            fontsize -- font size
-            lineheight -- overwrite the font property
-            color -- RGB stroke color triple
-            fill -- RGB fill color triple
-            render_mode -- text rendering control
-            border_width -- thickness of glyph borders as percentage of fontsize
-            expandtabs -- handles tabulators with string function
-            align -- left, center, right, justified
-            rotate -- 0, 90, 180, or 270 degrees
-            morph -- morph box with a matrix and a fixpoint
-        Returns:
-            unused or deficit rectangle area (float)
-        """
-        rect = pymupdf.Rect(rect)
-        if rect.is_empty or rect.is_infinite:
-            raise ValueError("text box must be finite and not empty")
-
-        color_str = pymupdf.ColorCode(color, "c")
-        fill_str = pymupdf.ColorCode(fill, "f")
-        if fill is None and render_mode == 0:  # ensure fill color for 0 Tr
-            fill = color
-            fill_str = pymupdf.ColorCode(color, "f")
-
-        optcont = self.page._get_optional_content(oc)
-        if optcont is not None:
-            bdc = "/OC /%s BDC\n" % optcont
-            emc = "EMC\n"
-        else:
-            bdc = emc = ""
-
-        # determine opacity / transparency
-        alpha = self.page._set_opacity(CA=stroke_opacity, ca=fill_opacity)
-        if alpha is None:
-            alpha = ""
-        else:
-            alpha = "/%s gs\n" % alpha
-
-        if rotate % 90 != 0:
-            raise ValueError("rotate must be multiple of 90")
-
-        rot = rotate
-        while rot < 0:
-            rot += 360
-        rot = rot % 360
-
-        # is buffer worth of dealing with?
-        if not bool(buffer):
-            return rect.height if rot in (0, 180) else rect.width
-
-        cmp90 = "0 1 -1 0 0 0 cm\n"  # rotates counter-clockwise
-        cmm90 = "0 -1 1 0 0 0 cm\n"  # rotates clockwise
-        cm180 = "-1 0 0 -1 0 0 cm\n"  # rotates by 180 deg.
-        height = self.height
-
-        fname = fontname
-        if fname.startswith("/"):
-            fname = fname[1:]
-
-        xref = self.page.insert_font(
-            fontname=fname, fontfile=fontfile, encoding=encoding, set_simple=set_simple
-        )
-        fontinfo = pymupdf.CheckFontInfo(self.doc, xref)
-
-        fontdict = fontinfo[1]
-        ordering = fontdict["ordering"]
-        simple = fontdict["simple"]
-        glyphs = fontdict["glyphs"]
-        bfname = fontdict["name"]
-        ascender = fontdict["ascender"]
-        descender = fontdict["descender"]
-
-        if lineheight:
-            lheight_factor = lineheight
-        elif ascender - descender <= 1:
-            lheight_factor = 1.2
-        else:
-            lheight_factor = ascender - descender
-        lheight = fontsize * lheight_factor
-
-        # create a list from buffer, split into its lines
-        if type(buffer) in (list, tuple):
-            t0 = "\n".join(buffer)
-        else:
-            t0 = buffer
-
-        maxcode = max([ord(c) for c in t0])
-        # replace invalid char codes for simple fonts
-        if simple and maxcode > 255:
-            t0 = "".join([c if ord(c) < 256 else "?" for c in t0])
-
-        t0 = t0.splitlines()
-
-        glyphs = self.doc.get_char_widths(xref, maxcode + 1)
-        if simple and bfname not in ("Symbol", "ZapfDingbats"):
-            tj_glyphs = None
-        else:
-            tj_glyphs = glyphs
-
-        # ----------------------------------------------------------------------
-        # calculate pixel length of a string
-        # ----------------------------------------------------------------------
-        def pixlen(x):
-            """Calculate pixel length of x."""
-            if ordering < 0:
-                return sum([glyphs[ord(c)][1] for c in x]) * fontsize
-            else:
-                return len(x) * fontsize
-
-        # ---------------------------------------------------------------------
-
-        if ordering < 0:
-            blen = glyphs[32][1] * fontsize  # pixel size of space character
-        else:
-            blen = fontsize
-
-        text = ""  # output buffer
-
-        if pymupdf.CheckMorph(morph):
-            m1 = pymupdf.Matrix(
-                1, 0, 0, 1, morph[0].x + self.x, self.height - morph[0].y - self.y
-            )
-            mat = ~m1 * morph[1] * m1
-            cm = _format_g(pymupdf.JM_TUPLE(mat)) + " cm\n"
-        else:
-            cm = ""
-
-        # ---------------------------------------------------------------------
-        # adjust for text orientation / rotation
-        # ---------------------------------------------------------------------
-        progr = 1  # direction of line progress
-        c_pnt = pymupdf.Point(0, fontsize * ascender)  # used for line progress
-        if rot == 0:  # normal orientation
-            point = rect.tl + c_pnt  # line 1 is 'lheight' below top
-            maxwidth = rect.width  # pixels available in one line
-            maxheight = rect.height  # available text height
-
-        elif rot == 90:  # rotate counter clockwise
-            c_pnt = pymupdf.Point(fontsize * ascender, 0)  # progress in x-direction
-            point = rect.bl + c_pnt  # line 1 'lheight' away from left
-            maxwidth = rect.height  # pixels available in one line
-            maxheight = rect.width  # available text height
-            cm += cmp90
-
-        elif rot == 180:  # text upside down
-            # progress upwards in y direction
-            c_pnt = -pymupdf.Point(0, fontsize * ascender)
-            point = rect.br + c_pnt  # line 1 'lheight' above bottom
-            maxwidth = rect.width  # pixels available in one line
-            progr = -1  # subtract lheight for next line
-            maxheight =rect.height  # available text height
-            cm += cm180
-
-        else:  # rotate clockwise (270 or -90)
-            # progress from right to left
-            c_pnt = -pymupdf.Point(fontsize * ascender, 0)
-            point = rect.tr + c_pnt  # line 1 'lheight' left of right
-            maxwidth = rect.height  # pixels available in one line
-            progr = -1  # subtract lheight for next line
-            maxheight = rect.width  # available text height
-            cm += cmm90
-
-        # =====================================================================
-        # line loop
-        # =====================================================================
-        just_tab = []  # 'justify' indicators per line
-
-        for i, line in enumerate(t0):
-            line_t = line.expandtabs(expandtabs).split(" ")  # split into words
-            num_words = len(line_t)
-            lbuff = ""  # init line buffer
-            rest = maxwidth  # available line pixels
-            # =================================================================
-            # word loop
-            # =================================================================
-            for j in range(num_words):
-                word = line_t[j]
-                pl_w = pixlen(word)  # pixel len of word
-                if rest >= pl_w:  # does it fit on the line?
-                    lbuff += word + " "  # yes, append word
-                    rest -= pl_w + blen  # update available line space
-                    continue  # next word
-
-                # word doesn't fit - output line (if not empty)
-                if lbuff:
-                    lbuff = lbuff.rstrip() + "\n"  # line full, append line break
-                    text += lbuff  # append to total text
-                    just_tab.append(True)  # can align-justify
-
-                lbuff = ""  # re-init line buffer
-                rest = maxwidth  # re-init avail. space
-
-                if pl_w <= maxwidth:  # word shorter than 1 line?
-                    lbuff = word + " "  # start the line with it
-                    rest = maxwidth - pl_w - blen  # update free space
-                    continue
-
-                # long word: split across multiple lines - char by char ...
-                if len(just_tab) > 0:
-                    just_tab[-1] = False  # cannot align-justify
-                for c in word:
-                    if pixlen(lbuff) <= maxwidth - pixlen(c):
-                        lbuff += c
-                    else:  # line full
-                        lbuff += "\n"  # close line
-                        text += lbuff  # append to text
-                        just_tab.append(False)  # cannot align-justify
-                        lbuff = c  # start new line with this char
-
-                lbuff += " "  # finish long word
-                rest = maxwidth - pixlen(lbuff)  # long word stored
-
-            if lbuff:  # unprocessed line content?
-                text += lbuff.rstrip()  # append to text
-                just_tab.append(False)  # cannot align-justify
-
-            if i < len(t0) - 1:  # not the last line?
-                text += "\n"  # insert line break
-
-        # compute used part of the textbox
-        if text.endswith("\n"):
-            text = text[:-1]
-        lb_count = text.count("\n") + 1  # number of lines written
-
-        # text height = line count * line height plus one descender value
-        text_height = lheight * lb_count - descender * fontsize
-
-        more = text_height - maxheight  # difference to height limit
-        if more > pymupdf.EPSILON:  # landed too much outside rect
-            return (-1) * more  # return deficit, don't output
-
-        more = abs(more)
-        if more < pymupdf.EPSILON:
-            more = 0  # don't bother with epsilons
-        nres = "\nq\n%s%sBT\n" % (bdc, alpha) + cm  # initialize output buffer
-        templ = lambda a, b, c, d: f"1 0 0 1 {_format_g((a, b))} Tm /{c} {_format_g(d)} Tf "
-        # center, right, justify: output each line with its own specifics
-        text_t = text.splitlines()  # split text in lines again
-        just_tab[-1] = False  # never justify last line
-        for i, t in enumerate(text_t):
-            spacing = 0
-            pl = maxwidth - pixlen(t)  # length of empty line part
-            pnt = point + c_pnt * (i * lheight_factor)  # text start of line
-            if align == 1:  # center: right shift by half width
-                if rot in (0, 180):
-                    pnt = pnt + pymupdf.Point(pl / 2, 0) * progr
-                else:
-                    pnt = pnt - pymupdf.Point(0, pl / 2) * progr
-            elif align == 2:  # right: right shift by full width
-                if rot in (0, 180):
-                    pnt = pnt + pymupdf.Point(pl, 0) * progr
-                else:
-                    pnt = pnt - pymupdf.Point(0, pl) * progr
-            elif align == 3:  # justify
-                spaces = t.count(" ")  # number of spaces in line
-                if spaces > 0 and just_tab[i]:  # if any, and we may justify
-                    spacing = pl / spaces  # make every space this much larger
-                else:
-                    spacing = 0  # keep normal space length
-            top = height - pnt.y - self.y
-            left = pnt.x + self.x
-            if rot == 90:
-                left = height - pnt.y - self.y
-                top = -pnt.x - self.x
-            elif rot == 270:
-                left = -height + pnt.y + self.y
-                top = pnt.x + self.x
-            elif rot == 180:
-                left = -pnt.x - self.x
-                top = -height + pnt.y + self.y
-
-            nres += templ(left, top, fname, fontsize)
-
-            if render_mode > 0:
-                nres += "%i Tr " % render_mode
-                nres += _format_g(border_width * fontsize) + " w "
-                if miter_limit is not None:
-                    nres += _format_g(miter_limit) + " M "
-
-            if align == 3:
-                nres += _format_g(spacing) + " Tw "
-
-            if color is not None:
-                nres += color_str
-            if fill is not None:
-                nres += fill_str
-            nres += "%sTJ\n" % pymupdf.getTJstr(t, tj_glyphs, simple, ordering)
-
-        nres += "ET\n%sQ\n" % emc
-
-        self.text_cont += nres
-        self.updateRect(rect)
-        return more
-
-    def finish(
-        self,
-        width: float = 1,
-        color: OptSeq = (0,),
-        fill: OptSeq = None,
-        lineCap: int = 0,
-        lineJoin: int = 0,
-        dashes: OptStr = None,
-        even_odd: bool = False,
-        morph: OptSeq = None,
-        closePath: bool = True,
-        fill_opacity: float = 1,
-        stroke_opacity: float = 1,
-        oc: int = 0,
-    ) -> None:
-        """Finish the current drawing segment.
-
-        Notes:
-            Apply colors, opacity, dashes, line style and width, or
-            morphing. Also whether to close the path
-            by connecting last to first point.
-        """
-        if self.draw_cont == "":  # treat empty contents as no-op
-            return
-
-        if width == 0:  # border color makes no sense then
-            color = None
-        elif color is None:  # vice versa
-            width = 0
-        # if color == None and fill == None:
-        #     raise ValueError("at least one of 'color' or 'fill' must be given")
-        color_str = pymupdf.ColorCode(color, "c")  # ensure proper color string
-        fill_str = pymupdf.ColorCode(fill, "f")  # ensure proper fill string
-
-        optcont = self.page._get_optional_content(oc)
-        if optcont is not None:
-            self.draw_cont = "/OC /%s BDC\n" % optcont + self.draw_cont
-            emc = "EMC\n"
-        else:
-            emc = ""
-
-        alpha = self.page._set_opacity(CA=stroke_opacity, ca=fill_opacity)
-        if alpha is not None:
-            self.draw_cont = "/%s gs\n" % alpha + self.draw_cont
-
-        if width != 1 and width != 0:
-            self.draw_cont += _format_g(width) + " w\n"
-
-        if lineCap != 0:
-            self.draw_cont = "%i J\n" % lineCap + self.draw_cont
-        if lineJoin != 0:
-            self.draw_cont = "%i j\n" % lineJoin + self.draw_cont
-
-        if dashes not in (None, "", "[] 0"):
-            self.draw_cont = "%s d\n" % dashes + self.draw_cont
-
-        if closePath:
-            self.draw_cont += "h\n"
-            self.last_point = None
-
-        if color is not None:
-            self.draw_cont += color_str
-
-        if fill is not None:
-            self.draw_cont += fill_str
-            if color is not None:
-                if not even_odd:
-                    self.draw_cont += "B\n"
-                else:
-                    self.draw_cont += "B*\n"
-            else:
-                if not even_odd:
-                    self.draw_cont += "f\n"
-                else:
-                    self.draw_cont += "f*\n"
-        else:
-            self.draw_cont += "S\n"
-
-        self.draw_cont += emc
-        if pymupdf.CheckMorph(morph):
-            m1 = pymupdf.Matrix(
-                1, 0, 0, 1, morph[0].x + self.x, self.height - morph[0].y - self.y
-            )
-            mat = ~m1 * morph[1] * m1
-            self.draw_cont = _format_g(pymupdf.JM_TUPLE(mat)) + " cm\n" + self.draw_cont
-
-        self.totalcont += "\nq\n" + self.draw_cont + "Q\n"
-        self.draw_cont = ""
-        self.last_point = None
-        return
-
-    def commit(self, overlay: bool = True) -> None:
-        """Update the page's /Contents object with Shape data.
-
-        The argument controls whether data appear in foreground (default)
-        or background.
-        """
-        pymupdf.CheckParent(self.page)  # doc may have died meanwhile
-        self.totalcont += self.text_cont
-        self.totalcont = self.totalcont.encode()
-
-        if self.totalcont:
-            if overlay:
-                self.page.wrap_contents()  # ensure a balanced graphics state
-            # make /Contents object with dummy stream
-            xref = pymupdf.TOOLS._insert_contents(self.page, b" ", overlay)
-            # update it with potential compression
-            self.doc.update_stream(xref, self.totalcont)
-
-        self.last_point = None  # clean up ...
-        self.rect = None  #
-        self.draw_cont = ""  # for potential ...
-        self.text_cont = ""  # ...
-        self.totalcont = ""  # re-use
-
-
-def apply_redactions(
-    page: pymupdf.Page, images: int = 2, graphics: int = 1, text: int = 0
-) -> bool:
-    """Apply the redaction annotations of the page.
-
-    Args:
-        page: the PDF page.
-        images:
-              0 - ignore images
-              1 - remove all overlapping images
-              2 - blank out overlapping image parts
-              3 - remove image unless invisible
-        graphics:
-              0 - ignore graphics
-              1 - remove graphics if contained in rectangle
-              2 - remove all overlapping graphics
-        text:
-              0 - remove text
-              1 - ignore text
-    """
-
-    def center_rect(annot_rect, new_text, font, fsize):
-        """Calculate minimal sub-rectangle for the overlay text.
-
-        Notes:
-            Because 'insert_textbox' supports no vertical text centering,
-            we calculate an approximate number of lines here and return a
-            sub-rect with smaller height, which should still be sufficient.
-        Args:
-            annot_rect: the annotation rectangle
-            new_text: the text to insert.
-            font: the fontname. Must be one of the CJK or Base-14 set, else
-                the rectangle is returned unchanged.
-            fsize: the fontsize
-        Returns:
-            A rectangle to use instead of the annot rectangle.
-        """
-        if not new_text or annot_rect.width <= pymupdf.EPSILON:
-            return annot_rect
-        try:
-            text_width = pymupdf.get_text_length(new_text, font, fsize)
-        except (ValueError, mupdf.FzErrorBase):  # unsupported font
-            if g_exceptions_verbose:
-                pymupdf.exception_info()
-            return annot_rect
-        line_height = fsize * 1.2
-        limit = annot_rect.width
-        h = math.ceil(text_width / limit) * line_height  # estimate rect height
-        if h >= annot_rect.height:
-            return annot_rect
-        r = annot_rect
-        y = (annot_rect.tl.y + annot_rect.bl.y - h) * 0.5
-        r.y0 = y
-        return r
-
-    pymupdf.CheckParent(page)
-    doc = page.parent
-    if doc.is_encrypted or doc.is_closed:
-        raise ValueError("document closed or encrypted")
-    if not doc.is_pdf:
-        raise ValueError("is no PDF")
-
-    redact_annots = []  # storage of annot values
-    for annot in page.annots(
-        types=(pymupdf.PDF_ANNOT_REDACT,)  # pylint: disable=no-member
-    ):
-        # loop redactions
-        redact_annots.append(annot._get_redact_values())  # save annot values
-
-    if redact_annots == []:  # any redactions on this page?
-        return False  # no redactions
-
-    rc = page._apply_redactions(text, images, graphics)  # call MuPDF
-    if not rc:  # should not happen really
-        raise ValueError("Error applying redactions.")
-
-    # now write replacement text in old redact rectangles
-    shape = page.new_shape()
-    for redact in redact_annots:
-        annot_rect = redact["rect"]
-        fill = redact["fill"]
-        if fill:
-            shape.draw_rect(annot_rect)  # colorize the rect background
-            shape.finish(fill=fill, color=fill)
-        if "text" in redact.keys():  # if we also have text
-            new_text = redact["text"]
-            align = redact.get("align", 0)
-            fname = redact["fontname"]
-            fsize = redact["fontsize"]
-            color = redact["text_color"]
-            # try finding vertical centered sub-rect
-            trect = center_rect(annot_rect, new_text, fname, fsize)
-
-            rc = -1
-            while rc < 0 and fsize >= 4:  # while not enough room
-                # (re-) try insertion
-                rc = shape.insert_textbox(
-                    trect,
-                    new_text,
-                    fontname=fname,
-                    fontsize=fsize,
-                    color=color,
-                    align=align,
-                )
-                fsize -= 0.5  # reduce font if unsuccessful
-    shape.commit()  # append new contents object
-    return True
-
-
-# ------------------------------------------------------------------------------
-# Remove potentially sensitive data from a PDF. Similar to the Adobe
-# Acrobat 'sanitize' function
-# ------------------------------------------------------------------------------
-def scrub(
-    doc: pymupdf.Document,
-    attached_files: bool = True,
-    clean_pages: bool = True,
-    embedded_files: bool = True,
-    hidden_text: bool = True,
-    javascript: bool = True,
-    metadata: bool = True,
-    redactions: bool = True,
-    redact_images: int = 0,
-    remove_links: bool = True,
-    reset_fields: bool = True,
-    reset_responses: bool = True,
-    thumbnails: bool = True,
-    xml_metadata: bool = True,
-) -> None:
-    def remove_hidden(cont_lines):
-        """Remove hidden text from a PDF page.
-
-        Args:
-            cont_lines: list of lines with /Contents content. Should have status
-                from after page.cleanContents().
-
-        Returns:
-            List of /Contents lines from which hidden text has been removed.
-
-        Notes:
-            The input must have been created after the page's /Contents object(s)
-            have been cleaned with page.cleanContents(). This ensures a standard
-            formatting: one command per line, single spaces between operators.
-            This allows for drastic simplification of this code.
-        """
-        out_lines = []  # will return this
-        in_text = False  # indicate if within BT/ET object
-        suppress = False  # indicate text suppression active
-        make_return = False
-        for line in cont_lines:
-            if line == b"BT":  # start of text object
-                in_text = True  # switch on
-                out_lines.append(line)  # output it
-                continue
-            if line == b"ET":  # end of text object
-                in_text = False  # switch off
-                out_lines.append(line)  # output it
-                continue
-            if line == b"3 Tr":  # text suppression operator
-                suppress = True  # switch on
-                make_return = True
-                continue
-            if line[-2:] == b"Tr" and line[0] != b"3":
-                suppress = False  # text rendering changed
-                out_lines.append(line)
-                continue
-            if line == b"Q":  # unstack command also switches off
-                suppress = False
-                out_lines.append(line)
-                continue
-            if suppress and in_text:  # suppress hidden lines
-                continue
-            out_lines.append(line)
-        if make_return:
-            return out_lines
-        else:
-            return None
-
-    if not doc.is_pdf:  # only works for PDF
-        raise ValueError("is no PDF")
-    if doc.is_encrypted or doc.is_closed:
-        raise ValueError("closed or encrypted doc")
-
-    if not clean_pages:
-        hidden_text = False
-        redactions = False
-
-    if metadata:
-        doc.set_metadata({})  # remove standard metadata
-
-    for page in doc:
-        if reset_fields:
-            # reset form fields (widgets)
-            for widget in page.widgets():
-                widget.reset()
-
-        if remove_links:
-            links = page.get_links()  # list of all links on page
-            for link in links:  # remove all links
-                page.delete_link(link)
-
-        found_redacts = False
-        for annot in page.annots():
-            if annot.type[0] == mupdf.PDF_ANNOT_FILE_ATTACHMENT and attached_files:
-                annot.update_file(buffer_=b" ")  # set file content to empty
-            if reset_responses:
-                annot.delete_responses()
-            if annot.type[0] == pymupdf.PDF_ANNOT_REDACT:  # pylint: disable=no-member
-                found_redacts = True
-
-        if redactions and found_redacts:
-            page.apply_redactions(images=redact_images)
-
-        if not (clean_pages or hidden_text):
-            continue  # done with the page
-
-        page.clean_contents()
-        if not page.get_contents():
-            continue
-        if hidden_text:
-            xref = page.get_contents()[0]  # only one b/o cleaning!
-            cont = doc.xref_stream(xref)
-            cont_lines = remove_hidden(cont.splitlines())  # remove hidden text
-            if cont_lines:  # something was actually removed
-                cont = b"\n".join(cont_lines)
-                doc.update_stream(xref, cont)  # rewrite the page /Contents
-
-        if thumbnails:  # remove page thumbnails?
-            if doc.xref_get_key(page.xref, "Thumb")[0] != "null":
-                doc.xref_set_key(page.xref, "Thumb", "null")
-
-    # pages are scrubbed, now perform document-wide scrubbing
-    # remove embedded files
-    if embedded_files:
-        for name in doc.embfile_names():
-            doc.embfile_del(name)
-
-    if xml_metadata:
-        doc.del_xml_metadata()
-    if not (xml_metadata or javascript):
-        xref_limit = 0
-    else:
-        xref_limit = doc.xref_length()
-    for xref in range(1, xref_limit):
-        if not doc.xref_object(xref):
-            msg = "bad xref %i - clean PDF before scrubbing" % xref
-            raise ValueError(msg)
-        if javascript and doc.xref_get_key(xref, "S")[1] == "/JavaScript":
-            # a /JavaScript action object
-            obj = "<</S/JavaScript/JS()>>"  # replace with a null JavaScript
-            doc.update_object(xref, obj)  # update this object
-            continue  # no further handling
-
-        if not xml_metadata:
-            continue
-
-        if doc.xref_get_key(xref, "Type")[1] == "/Metadata":
-            # delete any metadata object directly
-            doc.update_object(xref, "<<>>")
-            doc.update_stream(xref, b"deleted", new=True)
-            continue
-
-        if doc.xref_get_key(xref, "Metadata")[0] != "null":
-            doc.xref_set_key(xref, "Metadata", "null")
-
-
 def _show_fz_text( text):
     #if mupdf_cppyy:
     #    assert isinstance( text, cppyy.gbl.mupdf.Text)
@@ -4451,418 +840,6 @@
         span = span.next
     return f'num_spans={num_spans} num_chars={num_chars}'
 
-def fill_textbox(
-    writer: pymupdf.TextWriter,
-    rect: rect_like,
-    text: typing.Union[str, list],
-    pos: point_like = None,
-    font: typing.Optional[pymupdf.Font] = None,
-    fontsize: float = 11,
-    lineheight: OptFloat = None,
-    align: int = 0,
-    warn: bool = None,
-    right_to_left: bool = False,
-    small_caps: bool = False,
-) -> tuple:
-    """Fill a rectangle with text.
-
-    Args:
-        writer: pymupdf.TextWriter object (= "self")
-        rect: rect-like to receive the text.
-        text: string or list/tuple of strings.
-        pos: point-like start position of first word.
-        font: pymupdf.Font object (default pymupdf.Font('helv')).
-        fontsize: the fontsize.
-        lineheight: overwrite the font property
-        align: (int) 0 = left, 1 = center, 2 = right, 3 = justify
-        warn: (bool) text overflow action: none, warn, or exception
-        right_to_left: (bool) indicate right-to-left language.
-    """
-    rect = pymupdf.Rect(rect)
-    if rect.is_empty:
-        raise ValueError("fill rect must not empty.")
-    if type(font) is not pymupdf.Font:
-        font = pymupdf.Font("helv")
-
-    def textlen(x):
-        """Return length of a string."""
-        return font.text_length(
-            x, fontsize=fontsize, small_caps=small_caps
-        )  # abbreviation
-
-    def char_lengths(x):
-        """Return list of single character lengths for a string."""
-        return font.char_lengths(x, fontsize=fontsize, small_caps=small_caps)
-
-    def append_this(pos, text):
-        ret = writer.append(
-                pos, text, font=font, fontsize=fontsize, small_caps=small_caps
-                )
-        return ret
-
-    tolerance = fontsize * 0.2  # extra distance to left border
-    space_len = textlen(" ")
-    std_width = rect.width - tolerance
-    std_start = rect.x0 + tolerance
-
-    def norm_words(width, words):
-        """Cut any word in pieces no longer than 'width'."""
-        nwords = []
-        word_lengths = []
-        for w in words:
-            wl_lst = char_lengths(w)
-            wl = sum(wl_lst)
-            if wl <= width:  # nothing to do - copy over
-                nwords.append(w)
-                word_lengths.append(wl)
-                continue
-
-            # word longer than rect width - split it in parts
-            n = len(wl_lst)
-            while n > 0:
-                wl = sum(wl_lst[:n])
-                if wl <= width:
-                    nwords.append(w[:n])
-                    word_lengths.append(wl)
-                    w = w[n:]
-                    wl_lst = wl_lst[n:]
-                    n = len(wl_lst)
-                else:
-                    n -= 1
-        return nwords, word_lengths
-
-    def output_justify(start, line):
-        """Justified output of a line."""
-        # ignore leading / trailing / multiple spaces
-        words = [w for w in line.split(" ") if w != ""]
-        nwords = len(words)
-        if nwords == 0:
-            return
-        if nwords == 1:  # single word cannot be justified
-            append_this(start, words[0])
-            return
-        tl = sum([textlen(w) for w in words])  # total word lengths
-        gaps = nwords - 1  # number of word gaps
-        gapl = (std_width - tl) / gaps  # width of each gap
-        for w in words:
-            _, lp = append_this(start, w)  # output one word
-            start.x = lp.x + gapl  # next start at word end plus gap
-        return
-
-    asc = font.ascender
-    dsc = font.descender
-    if not lineheight:
-        if asc - dsc <= 1:
-            lheight = 1.2
-        else:
-            lheight = asc - dsc
-    else:
-        lheight = lineheight
-
-    LINEHEIGHT = fontsize * lheight  # effective line height
-    width = std_width  # available horizontal space
-
-    # starting point of text
-    if pos is not None:
-        pos = pymupdf.Point(pos)
-    else:  # default is just below rect top-left
-        pos = rect.tl + (tolerance, fontsize * asc)
-    if pos not in rect:
-        raise ValueError("Text must start in rectangle.")
-
-    # calculate displacement factor for alignment
-    if align == pymupdf.TEXT_ALIGN_CENTER:
-        factor = 0.5
-    elif align == pymupdf.TEXT_ALIGN_RIGHT:
-        factor = 1.0
-    else:
-        factor = 0
-
-    # split in lines if just a string was given
-    if type(text) is str:
-        textlines = text.splitlines()
-    else:
-        textlines = []
-        for line in text:
-            textlines.extend(line.splitlines())
-
-    max_lines = int((rect.y1 - pos.y) / LINEHEIGHT) + 1
-
-    new_lines = []  # the final list of textbox lines
-    no_justify = []  # no justify for these line numbers
-    for i, line in enumerate(textlines):
-        if line in ("", " "):
-            new_lines.append((line, space_len))
-            width = rect.width - tolerance
-            no_justify.append((len(new_lines) - 1))
-            continue
-        if i == 0:
-            width = rect.x1 - pos.x
-        else:
-            width = rect.width - tolerance
-
-        if right_to_left:  # reverses Arabic / Hebrew text front to back
-            line = writer.clean_rtl(line)
-        tl = textlen(line)
-        if tl <= width:  # line short enough
-            new_lines.append((line, tl))
-            no_justify.append((len(new_lines) - 1))
-            continue
-
-        # we need to split the line in fitting parts
-        words = line.split(" ")  # the words in the line
-
-        # cut in parts any words that are longer than rect width
-        words, word_lengths = norm_words(width, words)
-
-        n = len(words)
-        while True:
-            line0 = " ".join(words[:n])
-            wl = sum(word_lengths[:n]) + space_len * (n - 1)
-            if wl <= width:
-                new_lines.append((line0, wl))
-                words = words[n:]
-                word_lengths = word_lengths[n:]
-                n = len(words)
-                line0 = None
-            else:
-                n -= 1
-
-            if len(words) == 0:
-                break
-            assert n
-
-    # -------------------------------------------------------------------------
-    # List of lines created. Each item is (text, tl), where 'tl' is the PDF
-    # output length (float) and 'text' is the text. Except for justified text,
-    # this is output-ready.
-    # -------------------------------------------------------------------------
-    nlines = len(new_lines)
-    if nlines > max_lines:
-        msg = "Only fitting %i of %i lines." % (max_lines, nlines)
-        if warn is None:
-            pass
-        elif warn:
-            pymupdf.message("Warning: " + msg)
-        else:
-            raise ValueError(msg)
-
-    start = pymupdf.Point()
-    no_justify += [len(new_lines) - 1]  # no justifying of last line
-    for i in range(max_lines):
-        try:
-            line, tl = new_lines.pop(0)
-        except IndexError:
-            if g_exceptions_verbose >= 2:   pymupdf.exception_info()
-            break
-
-        if right_to_left:  # Arabic, Hebrew
-            line = "".join(reversed(line))
-
-        if i == 0:  # may have different start for first line
-            start = pos
-
-        if align == pymupdf.TEXT_ALIGN_JUSTIFY and i not in no_justify and tl < std_width:
-            output_justify(start, line)
-            start.x = std_start
-            start.y += LINEHEIGHT
-            continue
-
-        if i > 0 or pos.x == std_start:  # left, center, right alignments
-            start.x += (width - tl) * factor
-
-        append_this(start, line)
-        start.x = std_start
-        start.y += LINEHEIGHT
-
-    return new_lines  # return non-written lines
-
-
-# ------------------------------------------------------------------------
-# Optional Content functions
-# ------------------------------------------------------------------------
-def get_oc(doc: pymupdf.Document, xref: int) -> int:
-    """Return optional content object xref for an image or form xobject.
-
-    Args:
-        xref: (int) xref number of an image or form xobject.
-    """
-    if doc.is_closed or doc.is_encrypted:
-        raise ValueError("document close or encrypted")
-    t, name = doc.xref_get_key(xref, "Subtype")
-    if t != "name" or name not in ("/Image", "/Form"):
-        raise ValueError("bad object type at xref %i" % xref)
-    t, oc = doc.xref_get_key(xref, "OC")
-    if t != "xref":
-        return 0
-    rc = int(oc.replace("0 R", ""))
-    return rc
-
-
-def set_oc(doc: pymupdf.Document, xref: int, oc: int) -> None:
-    """Attach optional content object to image or form xobject.
-
-    Args:
-        xref: (int) xref number of an image or form xobject
-        oc: (int) xref number of an OCG or OCMD
-    """
-    if doc.is_closed or doc.is_encrypted:
-        raise ValueError("document close or encrypted")
-    t, name = doc.xref_get_key(xref, "Subtype")
-    if t != "name" or name not in ("/Image", "/Form"):
-        raise ValueError("bad object type at xref %i" % xref)
-    if oc > 0:
-        t, name = doc.xref_get_key(oc, "Type")
-        if t != "name" or name not in ("/OCG", "/OCMD"):
-            raise ValueError("bad object type at xref %i" % oc)
-    if oc == 0 and "OC" in doc.xref_get_keys(xref):
-        doc.xref_set_key(xref, "OC", "null")
-        return None
-    doc.xref_set_key(xref, "OC", "%i 0 R" % oc)
-    return None
-
-
-def set_ocmd(
-    doc: pymupdf.Document,
-    xref: int = 0,
-    ocgs: typing.Union[list, None] = None,
-    policy: OptStr = None,
-    ve: typing.Union[list, None] = None,
-) -> int:
-    """Create or update an OCMD object in a PDF document.
-
-    Args:
-        xref: (int) 0 for creating a new object, otherwise update existing one.
-        ocgs: (list) OCG xref numbers, which shall be subject to 'policy'.
-        policy: one of 'AllOn', 'AllOff', 'AnyOn', 'AnyOff' (any casing).
-        ve: (list) visibility expression. Use instead of 'ocgs' with 'policy'.
-
-    Returns:
-        Xref of the created or updated OCMD.
-    """
-
-    all_ocgs = set(doc.get_ocgs().keys())
-
-    def ve_maker(ve):
-        if type(ve) not in (list, tuple) or len(ve) < 2:
-            raise ValueError("bad 've' format: %s" % ve)
-        if ve[0].lower() not in ("and", "or", "not"):
-            raise ValueError("bad operand: %s" % ve[0])
-        if ve[0].lower() == "not" and len(ve) != 2:
-            raise ValueError("bad 've' format: %s" % ve)
-        item = "[/%s" % ve[0].title()
-        for x in ve[1:]:
-            if type(x) is int:
-                if x not in all_ocgs:
-                    raise ValueError("bad OCG %i" % x)
-                item += " %i 0 R" % x
-            else:
-                item += " %s" % ve_maker(x)
-        item += "]"
-        return item
-
-    text = "<</Type/OCMD"
-
-    if ocgs and type(ocgs) in (list, tuple):  # some OCGs are provided
-        s = set(ocgs).difference(all_ocgs)  # contains illegal xrefs
-        if s != set():
-            msg = "bad OCGs: %s" % s
-            raise ValueError(msg)
-        text += "/OCGs[" + " ".join(map(lambda x: "%i 0 R" % x, ocgs)) + "]"
-
-    if policy:
-        policy = str(policy).lower()
-        pols = {
-            "anyon": "AnyOn",
-            "allon": "AllOn",
-            "anyoff": "AnyOff",
-            "alloff": "AllOff",
-        }
-        if policy not in ("anyon", "allon", "anyoff", "alloff"):
-            raise ValueError("bad policy: %s" % policy)
-        text += "/P/%s" % pols[policy]
-
-    if ve:
-        text += "/VE%s" % ve_maker(ve)
-
-    text += ">>"
-
-    # make new object or replace old OCMD (check type first)
-    if xref == 0:
-        xref = doc.get_new_xref()
-    elif "/Type/OCMD" not in doc.xref_object(xref, compressed=True):
-        raise ValueError("bad xref or not an OCMD")
-    doc.update_object(xref, text)
-    return xref
-
-
-def get_ocmd(doc: pymupdf.Document, xref: int) -> dict:
-    """Return the definition of an OCMD (optional content membership dictionary).
-
-    Recognizes PDF dict keys /OCGs (PDF array of OCGs), /P (policy string) and
-    /VE (visibility expression, PDF array). Via string manipulation, this
-    info is converted to a Python dictionary with keys "xref", "ocgs", "policy"
-    and "ve" - ready to recycle as input for 'set_ocmd()'.
-    """
-
-    if xref not in range(doc.xref_length()):
-        raise ValueError("bad xref")
-    text = doc.xref_object(xref, compressed=True)
-    if "/Type/OCMD" not in text:
-        raise ValueError("bad object type")
-    textlen = len(text)
-
-    p0 = text.find("/OCGs[")  # look for /OCGs key
-    p1 = text.find("]", p0)
-    if p0 < 0 or p1 < 0:  # no OCGs found
-        ocgs = None
-    else:
-        ocgs = text[p0 + 6 : p1].replace("0 R", " ").split()
-        ocgs = list(map(int, ocgs))
-
-    p0 = text.find("/P/")  # look for /P policy key
-    if p0 < 0:
-        policy = None
-    else:
-        p1 = text.find("ff", p0)
-        if p1 < 0:
-            p1 = text.find("on", p0)
-        if p1 < 0:  # some irregular syntax
-            raise ValueError("bad object at xref")
-        else:
-            policy = text[p0 + 3 : p1 + 2]
-
-    p0 = text.find("/VE[")  # look for /VE visibility expression key
-    if p0 < 0:  # no visibility expression found
-        ve = None
-    else:
-        lp = rp = 0  # find end of /VE by finding last ']'.
-        p1 = p0
-        while lp < 1 or lp != rp:
-            p1 += 1
-            if not p1 < textlen:  # some irregular syntax
-                raise ValueError("bad object at xref")
-            if text[p1] == "[":
-                lp += 1
-            if text[p1] == "]":
-                rp += 1
-        # p1 now positioned at the last "]"
-        ve = text[p0 + 3 : p1 + 1]  # the PDF /VE array
-        ve = (
-            ve.replace("/And", '"and",')
-            .replace("/Not", '"not",')
-            .replace("/Or", '"or",')
-        )
-        ve = ve.replace(" 0 R]", "]").replace(" 0 R", ",").replace("][", "],[")
-        import json
-        try:
-            ve = json.loads(ve)
-        except Exception:
-            pymupdf.exception_info()
-            pymupdf.message(f"bad /VE key: {ve!r}")
-            raise
-    return {"xref": xref, "ocgs": ocgs, "policy": policy, "ve": ve}
-
 
 """
 Handle page labels for PDF documents.
@@ -4937,50 +914,6 @@
     return construct_label(style, prefix, pagenumber)
 
 
-def get_label(page):
-    """Return the label for this PDF page.
-
-    Args:
-        page: page object.
-    Returns:
-        The label (str) of the page. Errors return an empty string.
-    """
-    # Jorj McKie, 2021-01-06
-
-    labels = page.parent._get_page_labels()
-    if not labels:
-        return ""
-    labels.sort()
-    return get_label_pno(page.number, labels)
-
-
-def get_page_numbers(doc, label, only_one=False):
-    """Return a list of page numbers with the given label.
-
-    Args:
-        doc: PDF document object (resp. 'self').
-        label: (str) label.
-        only_one: (bool) stop searching after first hit.
-    Returns:
-        List of page numbers having this label.
-    """
-    # Jorj McKie, 2021-01-06
-
-    numbers = []
-    if not label:
-        return numbers
-    labels = doc._get_page_labels()
-    if labels == []:
-        return numbers
-    for i in range(doc.page_count):
-        plabel = get_label_pno(i, labels)
-        if plabel == label:
-            numbers.append(i)
-            if only_one:
-                break
-    return numbers
-
-
 def construct_label(style, prefix, pno) -> str:
     """Construct a label based on style, prefix and page number."""
     # William Chapman, 2021-01-06
@@ -5049,94 +982,6 @@
     return "".join([a for a in roman_num(num)])
 
 
-def get_page_labels(doc):
-    """Return page label definitions in PDF document.
-
-    Args:
-        doc: PDF document (resp. 'self').
-    Returns:
-        A list of dictionaries with the following format:
-        {'startpage': int, 'prefix': str, 'style': str, 'firstpagenum': int}.
-    """
-    # Jorj McKie, 2021-01-10
-    return [rule_dict(item) for item in doc._get_page_labels()]
-
-
-def set_page_labels(doc, labels):
-    """Add / replace page label definitions in PDF document.
-
-    Args:
-        doc: PDF document (resp. 'self').
-        labels: list of label dictionaries like:
-        {'startpage': int, 'prefix': str, 'style': str, 'firstpagenum': int},
-        as returned by get_page_labels().
-    """
-    # William Chapman, 2021-01-06
-
-    def create_label_str(label):
-        """Convert Python label dict to corresponding PDF rule string.
-
-        Args:
-            label: (dict) build rule for the label.
-        Returns:
-            PDF label rule string wrapped in "<<", ">>".
-        """
-        s = "%i<<" % label["startpage"]
-        if label.get("prefix", "") != "":
-            s += "/P(%s)" % label["prefix"]
-        if label.get("style", "") != "":
-            s += "/S/%s" % label["style"]
-        if label.get("firstpagenum", 1) > 1:
-            s += "/St %i" % label["firstpagenum"]
-        s += ">>"
-        return s
-
-    def create_nums(labels):
-        """Return concatenated string of all labels rules.
-
-        Args:
-            labels: (list) dictionaries as created by function 'rule_dict'.
-        Returns:
-            PDF compatible string for page label definitions, ready to be
-            enclosed in PDF array 'Nums[...]'.
-        """
-        labels.sort(key=lambda x: x["startpage"])
-        s = "".join([create_label_str(label) for label in labels])
-        return s
-
-    doc._set_page_labels(create_nums(labels))
-
-
-# End of Page Label Code -------------------------------------------------
-
-
-def has_links(doc: pymupdf.Document) -> bool:
-    """Check whether there are links on any page."""
-    if doc.is_closed:
-        raise ValueError("document closed")
-    if not doc.is_pdf:
-        raise ValueError("is no PDF")
-    for i in range(doc.page_count):
-        for item in doc.page_annot_xrefs(i):
-            if item[1] == pymupdf.PDF_ANNOT_LINK:  # pylint: disable=no-member
-                return True
-    return False
-
-
-def has_annots(doc: pymupdf.Document) -> bool:
-    """Check whether there are annotations on any page."""
-    if doc.is_closed:
-        raise ValueError("document closed")
-    if not doc.is_pdf:
-        raise ValueError("is no PDF")
-    for i in range(doc.page_count):
-        for item in doc.page_annot_xrefs(i):
-            # pylint: disable=no-member
-            if not (item[1] == pymupdf.PDF_ANNOT_LINK or item[1] == pymupdf.PDF_ANNOT_WIDGET):  # pylint: disable=no-member
-                return True
-    return False
-
-
 # -------------------------------------------------------------------
 # Functions to recover the quad contained in a text extraction bbox
 # -------------------------------------------------------------------
@@ -5322,358 +1167,3 @@
         raise ValueError("bad span argument")
 
     return recover_bbox_quad(line_dir, span, bbox)
-
-
-# -------------------------------------------------------------------
-# Building font subsets using fontTools
-# -------------------------------------------------------------------
-def subset_fonts(doc: pymupdf.Document, verbose: bool = False, fallback: bool = False) -> OptInt:
-    """Build font subsets in a PDF.
-
-    Eligible fonts are potentially replaced by smaller versions. Page text is
-    NOT rewritten and thus should retain properties like being hidden or
-    controlled by optional content.
-
-    This method by default uses MuPDF's own internal feature to create subset
-    fonts. As this is a new function, errors may still occur. In this case,
-    please fall back to using the previous version by using "fallback=True".
-    Fallback mode requires the external package 'fontTools'.
-
-    Args:
-        fallback: use the older deprecated implementation.
-        verbose: only used by fallback mode.
-
-    Returns:
-        The new MuPDF-based code returns None.  The deprecated fallback
-        mode returns 0 if there are no fonts to subset.  Otherwise, it
-        returns the decrease in fontsize (the difference in fontsize),
-        measured in bytes.
-    """
-    # Font binaries: -  "buffer" -> (names, xrefs, (unicodes, glyphs))
-    # An embedded font is uniquely defined by its fontbuffer only. It may have
-    # multiple names and xrefs.
-    # Once the sets of used unicodes and glyphs are known, we compute a
-    # smaller version of the buffer user package fontTools.
-
-    if not fallback:  # by default use MuPDF function
-        pdf = mupdf.pdf_document_from_fz_document(doc)
-        mupdf.pdf_subset_fonts2(pdf, list(range(doc.page_count)))
-        return
-
-    font_buffers = {}
-
-    def get_old_widths(xref):
-        """Retrieve old font '/W' and '/DW' values."""
-        df = doc.xref_get_key(xref, "DescendantFonts")
-        if df[0] != "array":  # only handle xref specifications
-            return None, None
-        df_xref = int(df[1][1:-1].replace("0 R", ""))
-        widths = doc.xref_get_key(df_xref, "W")
-        if widths[0] != "array":  # no widths key found
-            widths = None
-        else:
-            widths = widths[1]
-        dwidths = doc.xref_get_key(df_xref, "DW")
-        if dwidths[0] != "int":
-            dwidths = None
-        else:
-            dwidths = dwidths[1]
-        return widths, dwidths
-
-    def set_old_widths(xref, widths, dwidths):
-        """Restore the old '/W' and '/DW' in subsetted font.
-
-        If either parameter is None or evaluates to False, the corresponding
-        dictionary key will be set to null.
-        """
-        df = doc.xref_get_key(xref, "DescendantFonts")
-        if df[0] != "array":  # only handle xref specs
-            return None
-        df_xref = int(df[1][1:-1].replace("0 R", ""))
-        if (type(widths) is not str or not widths) and doc.xref_get_key(df_xref, "W")[
-            0
-        ] != "null":
-            doc.xref_set_key(df_xref, "W", "null")
-        else:
-            doc.xref_set_key(df_xref, "W", widths)
-        if (type(dwidths) is not str or not dwidths) and doc.xref_get_key(
-            df_xref, "DW"
-        )[0] != "null":
-            doc.xref_set_key(df_xref, "DW", "null")
-        else:
-            doc.xref_set_key(df_xref, "DW", dwidths)
-        return None
-
-    def set_subset_fontname(new_xref):
-        """Generate a name prefix to tag a font as subset.
-
-        We use a random generator to select 6 upper case ASCII characters.
-        The prefixed name must be put in the font xref as the "/BaseFont" value
-        and in the FontDescriptor object as the '/FontName' value.
-        """
-        # The following generates a prefix like 'ABCDEF+'
-        import random
-        import string
-        prefix = "".join(random.choices(tuple(string.ascii_uppercase), k=6)) + "+"
-        font_str = doc.xref_object(new_xref, compressed=True)
-        font_str = font_str.replace("/BaseFont/", "/BaseFont/" + prefix)
-        df = doc.xref_get_key(new_xref, "DescendantFonts")
-        if df[0] == "array":
-            df_xref = int(df[1][1:-1].replace("0 R", ""))
-            fd = doc.xref_get_key(df_xref, "FontDescriptor")
-            if fd[0] == "xref":
-                fd_xref = int(fd[1].replace("0 R", ""))
-                fd_str = doc.xref_object(fd_xref, compressed=True)
-                fd_str = fd_str.replace("/FontName/", "/FontName/" + prefix)
-                doc.update_object(fd_xref, fd_str)
-        doc.update_object(new_xref, font_str)
-
-    def build_subset(buffer, unc_set, gid_set):
-        """Build font subset using fontTools.
-
-        Args:
-            buffer: (bytes) the font given as a binary buffer.
-            unc_set: (set) required glyph ids.
-        Returns:
-            Either None if subsetting is unsuccessful or the subset font buffer.
-        """
-        try:
-            import fontTools.subset as fts
-        except ImportError:
-            if g_exceptions_verbose:    pymupdf.exception_info()
-            pymupdf.message("This method requires fontTools to be installed.")
-            raise
-        import tempfile
-        with tempfile.TemporaryDirectory() as tmp_dir:
-            oldfont_path = f"{tmp_dir}/oldfont.ttf"
-            newfont_path = f"{tmp_dir}/newfont.ttf"
-            uncfile_path = f"{tmp_dir}/uncfile.txt"
-            args = [
-                oldfont_path,
-                "--retain-gids",
-                f"--output-file={newfont_path}",
-                "--layout-features=*",
-                "--passthrough-tables",
-                "--ignore-missing-glyphs",
-                "--ignore-missing-unicodes",
-                "--symbol-cmap",
-            ]
-
-            # store glyph ids or unicodes as file
-            with open(f"{tmp_dir}/uncfile.txt", "w", encoding='utf8') as unc_file:
-                if 0xFFFD in unc_set:  # error unicode exists -> use glyphs
-                    args.append(f"--gids-file={uncfile_path}")
-                    gid_set.add(189)
-                    unc_list = list(gid_set)
-                    for unc in unc_list:
-                        unc_file.write("%i\n" % unc)
-                else:
-                    args.append(f"--unicodes-file={uncfile_path}")
-                    unc_set.add(255)
-                    unc_list = list(unc_set)
-                    for unc in unc_list:
-                        unc_file.write("%04x\n" % unc)
-
-            # store fontbuffer as a file
-            with open(oldfont_path, "wb") as fontfile:
-                fontfile.write(buffer)
-            try:
-                os.remove(newfont_path)  # remove old file
-            except Exception:
-                pass
-            try:  # invoke fontTools subsetter
-                fts.main(args)
-                font = pymupdf.Font(fontfile=newfont_path)
-                new_buffer = font.buffer  # subset font binary
-                if font.glyph_count == 0:  # intercept empty font
-                    new_buffer = None
-            except Exception:
-                pymupdf.exception_info()
-                new_buffer = None
-        return new_buffer
-
-    def repl_fontnames(doc):
-        """Populate 'font_buffers'.
-
-        For each font candidate, store its xref and the list of names
-        by which PDF text may refer to it (there may be multiple).
-        """
-
-        def norm_name(name):
-            """Recreate font name that contains PDF hex codes.
-
-            E.g. #20 -> space, chr(32)
-            """
-            while "#" in name:
-                p = name.find("#")
-                c = int(name[p + 1 : p + 3], 16)
-                name = name.replace(name[p : p + 3], chr(c))
-            return name
-
-        def get_fontnames(doc, item):
-            """Return a list of fontnames for an item of page.get_fonts().
-
-            There may be multiple names e.g. for Type0 fonts.
-            """
-            fontname = item[3]
-            names = [fontname]
-            fontname = doc.xref_get_key(item[0], "BaseFont")[1][1:]
-            fontname = norm_name(fontname)
-            if fontname not in names:
-                names.append(fontname)
-            descendents = doc.xref_get_key(item[0], "DescendantFonts")
-            if descendents[0] != "array":
-                return names
-            descendents = descendents[1][1:-1]
-            if descendents.endswith(" 0 R"):
-                xref = int(descendents[:-4])
-                descendents = doc.xref_object(xref, compressed=True)
-            p1 = descendents.find("/BaseFont")
-            if p1 >= 0:
-                p2 = descendents.find("/", p1 + 1)
-                p1 = min(descendents.find("/", p2 + 1), descendents.find(">>", p2 + 1))
-                fontname = descendents[p2 + 1 : p1]
-                fontname = norm_name(fontname)
-                if fontname not in names:
-                    names.append(fontname)
-            return names
-
-        for i in range(doc.page_count):
-            for f in doc.get_page_fonts(i, full=True):
-                font_xref = f[0]  # font xref
-                font_ext = f[1]  # font file extension
-                basename = f[3]  # font basename
-
-                if font_ext not in (  # skip if not supported by fontTools
-                    "otf",
-                    "ttf",
-                    "woff",
-                    "woff2",
-                ):
-                    continue
-                # skip fonts which already are subsets
-                if len(basename) > 6 and basename[6] == "+":
-                    continue
-
-                extr = doc.extract_font(font_xref)
-                fontbuffer = extr[-1]
-                names = get_fontnames(doc, f)
-                name_set, xref_set, subsets = font_buffers.get(
-                    fontbuffer, (set(), set(), (set(), set()))
-                )
-                xref_set.add(font_xref)
-                for name in names:
-                    name_set.add(name)
-                font = pymupdf.Font(fontbuffer=fontbuffer)
-                name_set.add(font.name)
-                del font
-                font_buffers[fontbuffer] = (name_set, xref_set, subsets)
-
-    def find_buffer_by_name(name):
-        for buffer, (name_set, _, _) in font_buffers.items():
-            if name in name_set:
-                return buffer
-        return None
-
-    # -----------------
-    # main function
-    # -----------------
-    repl_fontnames(doc)  # populate font information
-    if not font_buffers:  # nothing found to do
-        if verbose:
-            pymupdf.message(f'No fonts to subset.')
-        return 0
-
-    old_fontsize = 0
-    new_fontsize = 0
-    for fontbuffer in font_buffers.keys():
-        old_fontsize += len(fontbuffer)
-
-    # Scan page text for usage of subsettable fonts
-    for page in doc:
-        # go through the text and extend set of used glyphs by font
-        # we use a modified MuPDF trace device, which delivers us glyph ids.
-        for span in page.get_texttrace():
-            if type(span) is not dict:  # skip useless information
-                continue
-            fontname = span["font"][:33]  # fontname for the span
-            buffer = find_buffer_by_name(fontname)
-            if buffer is None:
-                continue
-            name_set, xref_set, (set_ucs, set_gid) = font_buffers[buffer]
-            for c in span["chars"]:
-                set_ucs.add(c[0])  # unicode
-                set_gid.add(c[1])  # glyph id
-            font_buffers[buffer] = (name_set, xref_set, (set_ucs, set_gid))
-
-    # build the font subsets
-    for old_buffer, (name_set, xref_set, subsets) in font_buffers.items():
-        new_buffer = build_subset(old_buffer, subsets[0], subsets[1])
-        fontname = list(name_set)[0]
-        if new_buffer is None or len(new_buffer) >= len(old_buffer):
-            # subset was not created or did not get smaller
-            if verbose:
-                pymupdf.message(f'Cannot subset {fontname!r}.')
-            continue
-        if verbose:
-            pymupdf.message(f"Built subset of font {fontname!r}.")
-        val = doc._insert_font(fontbuffer=new_buffer)  # store subset font in PDF
-        new_xref = val[0]  # get its xref
-        set_subset_fontname(new_xref)  # tag fontname as subset font
-        font_str = doc.xref_object(  # get its object definition
-            new_xref,
-            compressed=True,
-        )
-        # walk through the original font xrefs and replace each by the subset def
-        for font_xref in xref_set:
-            # we need the original '/W' and '/DW' width values
-            width_table, def_width = get_old_widths(font_xref)
-            # ... and replace original font definition at xref with it
-            doc.update_object(font_xref, font_str)
-            # now copy over old '/W' and '/DW' values
-            if width_table or def_width:
-                set_old_widths(font_xref, width_table, def_width)
-        # 'new_xref' remains unused in the PDF and must be removed
-        # by garbage collection.
-        new_fontsize += len(new_buffer)
-
-    return old_fontsize - new_fontsize
-
-
-# -------------------------------------------------------------------
-# Copy XREF object to another XREF
-# -------------------------------------------------------------------
-def xref_copy(doc: pymupdf.Document, source: int, target: int, *, keep: list = None) -> None:
-    """Copy a PDF dictionary object to another one given their xref numbers.
-
-    Args:
-        doc: PDF document object
-        source: source xref number
-        target: target xref number, the xref must already exist
-        keep: an optional list of 1st level keys in target that should not be
-              removed before copying.
-    Notes:
-        This works similar to the copy() method of dictionaries in Python. The
-        source may be a stream object.
-    """
-    if doc.xref_is_stream(source):
-        # read new xref stream, maintaining compression
-        stream = doc.xref_stream_raw(source)
-        doc.update_stream(
-            target,
-            stream,
-            compress=False,  # keeps source compression
-            new=True,  # in case target is no stream
-        )
-
-    # empty the target completely, observe exceptions
-    if keep is None:
-        keep = []
-    for key in doc.xref_get_keys(target):
-        if key in keep:
-            continue
-        doc.xref_set_key(target, key, "null")
-    # copy over all source dict items
-    for key in doc.xref_get_keys(source):
-        item = doc.xref_get_key(source, key)
-        doc.xref_set_key(target, key, item[1])
--- a/tests/conftest.py	Mon Sep 15 11:43:07 2025 +0200
+++ b/tests/conftest.py	Sat Oct 11 11:19:58 2025 +0200
@@ -77,11 +77,25 @@
     # Allow post-test checking that pymupdf._globals has not changed.
     _globals_pre = get_members(pymupdf._globals)
     
+    testsfailed_before = request.session.testsfailed
+    
     # Run the test.
     rep = yield
     
     sys.stdout.flush()
     
+    # This seems the only way for us to tell that a test has failed. In
+    # particular, <rep> is always None. We're implicitly relying on tests not
+    # being run in parallel.
+    #
+    failed = request.session.testsfailed - testsfailed_before
+    assert failed in (0, 1)
+    
+    if failed:
+        # Do not check post-test conditions if the test as failed. This avoids
+        # additional confusing `ERROR` status for failed tests.
+        return
+    
     # Test has run; check it did not create any MuPDF warnings etc.
     wt = pymupdf.TOOLS.mupdf_warnings()
     if not hasattr(pymupdf, 'mupdf'):
--- a/tests/gentle_compare.py	Mon Sep 15 11:43:07 2025 +0200
+++ b/tests/gentle_compare.py	Sat Oct 11 11:19:58 2025 +0200
@@ -32,8 +32,6 @@
     '''
     Returns RMS diff of raw bytes of two sequences.
     '''
-    if verbose is True:
-        verbose = 100000
     assert len(a) == len(b)
     e = 0
     for i, (aa, bb) in enumerate(zip(a, b)):
@@ -62,7 +60,7 @@
     a_mv = a.samples_mv
     b_mv = b.samples_mv
     assert len(a_mv) == len(b_mv)
-    ret = rms(a_mv, b_mv, verbose=True, out_prefix=out_prefix)
+    ret = rms(a_mv, b_mv, out_prefix=out_prefix)
     print(f'{out_prefix}pixmaps_rms(): {ret=}.')
     return ret
 
Binary file tests/resources/test_1645_expected-after-1.27.0.pdf has changed
Binary file tests/resources/test_4613.png has changed
Binary file tests/resources/test_4699.pdf has changed
Binary file tests/resources/test_4699.png has changed
Binary file tests/resources/test_4712_a.pdf has changed
Binary file tests/resources/test_4712_b.pdf has changed
Binary file tests/resources/test_4716.pdf has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test_4716.py	Sat Oct 11 11:19:58 2025 +0200
@@ -0,0 +1,15 @@
+import pymupdf
+import os
+
+def test_4716():
+    """Confirm that ZERO WIDTH JOINER will never start a word."""
+    script_dir = os.path.dirname(__file__)
+    filename = os.path.join(script_dir, "resources", "test_4716.pdf")
+    doc = pymupdf.open(filename)
+    expected = set(["+25.00", "Любимый", "-10.00"])
+    word_text = set()
+    for page in doc:
+        words = page.get_text("words")
+        for w in words:
+            word_text.add(w[4])
+    assert word_text == expected
--- a/tests/test_annots.py	Mon Sep 15 11:43:07 2025 +0200
+++ b/tests/test_annots.py	Sat Oct 11 11:19:58 2025 +0200
@@ -236,7 +236,10 @@
     pymupdf.TOOLS.set_annot_stem('jorj')
     try:
         path_in = os.path.abspath( f'{__file__}/../resources/symbol-list.pdf')
-        path_expected = os.path.abspath( f'{__file__}/../../tests/resources/test_1645_expected.pdf')
+        if pymupdf.mupdf_version_tuple >= (1, 27):
+            path_expected = os.path.abspath( f'{__file__}/../../tests/resources/test_1645_expected-after-1.27.0.pdf')
+        else:
+            path_expected = os.path.abspath( f'{__file__}/../../tests/resources/test_1645_expected.pdf')
         path_out = os.path.abspath( f'{__file__}/../test_1645_out.pdf')
         doc = pymupdf.open(path_in)
         page = doc[0]
@@ -254,11 +257,13 @@
                 )
         doc.save(path_out, garbage=1, deflate=True, no_new_id=True)
         print(f'Have created {path_out}. comparing with {path_expected}.')
-        with open( path_out, 'rb') as f:
-            out = f.read()
-        with open( path_expected, 'rb') as f:
-            expected = f.read()
-        assert out == expected, f'Files differ: {path_out} {path_expected}'
+        with pymupdf.open(path_expected) as doc_expected, pymupdf.open(path_out) as doc_out:
+            rms = gentle_compare.pixmaps_rms(
+                    doc_expected[0].get_pixmap(),
+                    doc_out[0].get_pixmap(),
+                    )
+        print(f'test_1645: {rms=}')
+        assert rms < 0.1, f'Pixmaps differ: {path_expected=} {path_out=}'
     finally:
         # Restore annot_stem.
         pymupdf.TOOLS.set_annot_stem(annot_stem)
--- a/tests/test_codespell.py	Mon Sep 15 11:43:07 2025 +0200
+++ b/tests/test_codespell.py	Sat Oct 11 11:19:58 2025 +0200
@@ -12,6 +12,10 @@
     '''
     Check rebased Python code with codespell.
     '''
+    if os.environ.get('PYODIDE_ROOT'):
+        print('test_codespell(): not running on Pyodide - cannot run child processes.')
+        return
+        
     if not hasattr(pymupdf, 'mupdf'):
         print('Not running codespell with classic implementation.')
         return
--- a/tests/test_flake8.py	Mon Sep 15 11:43:07 2025 +0200
+++ b/tests/test_flake8.py	Sat Oct 11 11:19:58 2025 +0200
@@ -9,6 +9,10 @@
     '''
     Check rebased Python code with flake8.
     '''
+    if os.environ.get('PYODIDE_ROOT'):
+        print('test_flake8(): not running on Pyodide - cannot run child processes.')
+        return
+        
     if not hasattr(pymupdf, 'mupdf'):
         print(f'Not running flake8 with classic implementation.')
         return
--- a/tests/test_font.py	Mon Sep 15 11:43:07 2025 +0200
+++ b/tests/test_font.py	Sat Oct 11 11:19:58 2025 +0200
@@ -83,6 +83,10 @@
         assert text == expected
 
 def test_fontarchive():
+    if os.environ.get('PYODIDE_ROOT'):
+        print('test_fontarchive(): not running on Pyodide - we get ValueError: No font code \'notos\' found in pymupdf-fonts..')
+        return
+        
     import subprocess
     arch = pymupdf.Archive()
     css = pymupdf.css_for_pymupdf_font("notos", archive=arch, name="sans-serif")
@@ -234,6 +238,10 @@
 
 
 def test_4457():
+    if os.environ.get('PYODIDE_ROOT'):
+        print('test_4457(): not running on Pyodide - cannot run child processes.')
+        return
+        
     print()
     files = (
             ('https://github.com/user-attachments/files/20862923/test_4457_a.pdf', 'test_4457_a.pdf', None, 4),
--- a/tests/test_general.py	Mon Sep 15 11:43:07 2025 +0200
+++ b/tests/test_general.py	Sat Oct 11 11:19:58 2025 +0200
@@ -785,6 +785,9 @@
 
 
 def test_subset_fonts():
+    if os.environ.get('PYODIDE_ROOT'):
+        print('test_subset_fonts(): not running on Pyodide - ValueError: No font code \'ubuntu\' found in pymupdf-fonts.')
+        return
     """Confirm subset_fonts is working."""
     if not hasattr(pymupdf, "mupdf"):
         print("Not testing 'test_subset_fonts' in classic.")
@@ -1026,6 +1029,10 @@
     os.remove(oldfile)
 
 def test_cli():
+    if os.environ.get('PYODIDE_ROOT'):
+        print('test_cli(): not running on Pyodide - cannot run child processes.')
+        return
+        
     if not hasattr(pymupdf, 'mupdf'):
         print('test_cli(): Not running on classic because of fitz_old.')
         return
@@ -1063,6 +1070,10 @@
     Check redirection of messages and log diagnostics with environment
     variables PYMUPDF_LOG and PYMUPDF_MESSAGE.
     '''
+    if os.environ.get('PYODIDE_ROOT'):
+        print('test_cli_out(): not running on Pyodide - cannot run child processes.')
+        return
+        
     if not hasattr(pymupdf, 'mupdf'):
         print('test_cli(): Not running on classic because of fitz_old.')
         return
@@ -1150,6 +1161,10 @@
     '''
     Checks pymupdf.use_python_logging().
     '''
+    if os.environ.get('PYODIDE_ROOT'):
+        print('test_cli(): not running on Pyodide - cannot run child processes.')
+        return
+        
     log_prefix = None
     if os.environ.get('PYMUPDF_USE_EXTRA') == '0':
         log_prefix = f'.+Using non-default setting from PYMUPDF_USE_EXTRA: \'0\''
@@ -1433,6 +1448,10 @@
     Checks behaviour of fz_open_document() and fz_open_document_with_stream()
     with different filenames/magic values.
     '''
+    if os.environ.get('PYODIDE_ROOT'):
+        print('test_open2(): not running on Pyodide - cannot run child processes.')
+        return
+        
     if platform.system() == 'Windows':
         print(f'test_open2(): not running on Windows because `git ls-files` known fail on Github Windows runners.')
         return
@@ -1789,6 +1808,10 @@
     document.delete_page()
 
 def test_4263():
+    if os.environ.get('PYODIDE_ROOT'):
+        print('test_4263(): not running on Pyodide - cannot run child processes.')
+        return
+        
     path = os.path.normpath(f'{__file__}/../../tests/resources/test_4263.pdf')
     path_out = f'{path}.linerarized.pdf'
     command = f'pymupdf clean -linear {path} {path_out}'
@@ -1915,6 +1938,10 @@
         
 
 def test_4533():
+    if os.environ.get('PYODIDE_ROOT'):
+        print('test_4533(): not running on Pyodide - cannot run child processes.')
+        return
+        
     print()
     path = util.download(
             'https://github.com/user-attachments/files/20497146/NineData_user_manual_V3.0.5.pdf',
@@ -1962,10 +1989,16 @@
     print(f'{pymupdf.pymupdf_git_branch=}')
     print(f'{pymupdf.pymupdf_git_sha=}')
     print(f'{pymupdf.pymupdf_version=}')
-    print(f'pymupdf.pymupdf_git_diff:\n{textwrap.indent(pymupdf.pymupdf_git_diff, "    ")}')
+    print(f'{pymupdf.pymupdf_git_diff=}')
+    if pymupdf.pymupdf_git_diff:
+        print(f'pymupdf.pymupdf_git_diff:\n{textwrap.indent(pymupdf.pymupdf_git_diff, "    ")}')
     
 
 def test_4392():
+    if os.environ.get('PYODIDE_ROOT'):
+        print('test_4392(): not running on Pyodide - cannot run child processes.')
+        return
+        
     print()
     path = os.path.normpath(f'{__file__}/../../tests/test_4392.py')
     with open(path, 'w') as f:
@@ -2051,3 +2084,99 @@
     # Check pymupdf.Document.scrub() works.
     with pymupdf.open(path) as document:
         document.scrub()
+
+
+def test_4702():
+    if os.environ.get('PYODIDE_ROOT'):
+        # util.download() uses subprocess.
+        print('test_4702(): not running on Pyodide - cannot run child processes.')
+        return
+
+    path = util.download(
+            'https://github.com/user-attachments/files/22403483/01995b6ca7837b52abaa24e38e8c076d.pdf',
+            'test_4702.pdf',
+            )
+    with pymupdf.open(path) as document:
+        for xref in range(1, document.xref_length()):
+            print(f'{xref=}')
+            try:
+                _ = document.xref_object(xref)
+            except Exception as e1:
+                print(f'{e1=}')
+                try:
+                    document.update_object(xref, "<<>>")
+                except Exception as e2:
+                    print(f'{e2=}')
+                    raise
+    wt = pymupdf.TOOLS.mupdf_warnings()
+    assert wt == 'repairing PDF document'
+    
+    with pymupdf.open(path) as document:
+        for xref in range(1, document.xref_length()):
+            print(f'{xref=}')
+            _ = document.xref_object(xref)
+    wt = pymupdf.TOOLS.mupdf_warnings()
+    assert wt == 'repairing PDF document'
+
+
+def test_4712():
+    '''
+    Crash with "corrupted double-linked list
+    '''
+    if 1:
+        print(f'test_4712(): Not running because known to fail.')
+        return
+    path_a = os.path.normpath(f'{__file__}/../../tests/resources/test_4712_a.pdf')
+    path_b = os.path.normpath(f'{__file__}/../../tests/resources/test_4712_b.pdf')
+    doc1 = pymupdf.open(path_a)
+    for i in range(6):
+        doc1.load_page(i).get_pixmap()
+    doc2 = pymupdf.open(path_b)
+    for i in range(6):
+        doc2.load_page(i).get_pixmap()
+
+
+def test_4712m():
+    if 1:
+        print(f'test_4712b(): Not running because known to fail.')
+        return
+    
+    path_a = os.path.normpath(f'{__file__}/../../tests/resources/test_4712_a.pdf')
+    path_b = os.path.normpath(f'{__file__}/../../tests/resources/test_4712_b.pdf')
+    
+    mupdf = pymupdf.mupdf
+    def get_pixmap(page):
+        displaylist = mupdf.fz_new_display_list_from_page(page)
+        rect = mupdf.fz_bound_display_list(displaylist)
+        irect = mupdf.fz_round_rect(rect)
+        pixmap = mupdf.fz_new_pixmap_with_bbox(
+                mupdf.FzColorspace(mupdf.FzColorspace.Fixed_RGB),
+                irect,
+                mupdf.FzSeparations(),
+                0,  # alpha
+                )
+        mupdf.fz_clear_pixmap_with_value(pixmap, 0xFF)
+        matrix = mupdf.FzMatrix()
+        device = mupdf.fz_new_draw_device(matrix, pixmap)
+        mupdf.fz_run_display_list(
+                displaylist,
+                device,
+                mupdf.FzMatrix(),
+                mupdf.FzRect(mupdf.FzRect.Fixed_INFINITE),
+                mupdf.FzCookie(),
+                )
+        mupdf.fz_close_device(device)
+    
+    def process_document(document):
+        for i in range(6):
+            print(f'    {i=}', flush=1)
+            page = mupdf.fz_load_page(document, i)
+            get_pixmap(page)
+
+    print(f'Processing {path_a=}', flush=1)
+    document_a = mupdf.fz_open_document(path_a)
+    process_document(document_a)
+
+    print(f'Processing {path_b=}', flush=1)
+    document_b = mupdf.fz_open_document(path_b)
+    process_document(document_b)
--- a/tests/test_import.py	Mon Sep 15 11:43:07 2025 +0200
+++ b/tests/test_import.py	Sat Oct 11 11:19:58 2025 +0200
@@ -5,6 +5,10 @@
 
 
 def test_import():
+    if os.environ.get('PYODIDE_ROOT'):
+        print('test_import(): not running on Pyodide - cannot run child processes.')
+        return
+        
     root = os.path.abspath(f'{__file__}/../../')
     p = f'{root}/tests/resources_test_import.py'
     with open(p, 'w') as f:
--- a/tests/test_memory.py	Mon Sep 15 11:43:07 2025 +0200
+++ b/tests/test_memory.py	Sat Oct 11 11:19:58 2025 +0200
@@ -17,6 +17,10 @@
     '''
     Check for memory leaks.
     '''
+    if os.environ.get('PYODIDE_ROOT'):
+        print('test_2791(): not running on Pyodide - No module named \'psutil\'.')
+        return
+        
     if os.environ.get('PYMUPDF_RUNNING_ON_VALGRIND') == '1':
         print(f'test_2791(): not running because PYMUPDF_RUNNING_ON_VALGRIND=1.')
         return
@@ -94,6 +98,10 @@
 
 
 def test_4090():
+    if os.environ.get('PYODIDE_ROOT'):
+        print('test_4090(): not running on Pyodide - No module named \'psutil\'.')
+        return
+        
     print(f'test_4090(): {os.environ.get("PYTHONMALLOC")=}.')
     import psutil
     process = psutil.Process()
@@ -148,6 +156,10 @@
 
 
 def test_4125():
+    if os.environ.get('PYODIDE_ROOT'):
+        print('test_4125(): not running on Pyodide - No module named \'psutil\'.')
+        return
+        
     if os.environ.get('PYMUPDF_RUNNING_ON_VALGRIND') == '1':
         print(f'test_4125(): not running because PYMUPDF_RUNNING_ON_VALGRIND=1.')
         return
--- a/tests/test_pixmap.py	Mon Sep 15 11:43:07 2025 +0200
+++ b/tests/test_pixmap.py	Sat Oct 11 11:19:58 2025 +0200
@@ -70,7 +70,7 @@
         pix2 = pymupdf.Pixmap(stream)
         assert repr(pix1) == repr(pix2)
     except ModuleNotFoundError:
-        assert platform.system() == 'Windows' and sys.maxsize == 2**31 - 1
+        assert platform.system() in ('Windows', 'Emscripten') and sys.maxsize == 2**31 - 1
 
 
 def test_save(tmpdir):
@@ -556,6 +556,9 @@
 
 
 def test_4445():
+    if os.environ.get('PYODIDE_ROOT'):
+        print('test_4445(): not running on Pyodide - cannot run child processes.')
+        return
     print()
     # Test case is large so we download it instead of having it in PyMuPDF
     # git. We put it in `cache/` directory do it is not removed by `git clean`
@@ -628,3 +631,22 @@
         assert rms == 0
     else:
         assert rms >= 10
+
+
+def test_4699():
+    path = os.path.normpath(f'{__file__}/../../tests/resources/test_4699.pdf')
+    path_png_expected = os.path.normpath(f'{__file__}/../../tests/resources/test_4699.png')
+    path_png_actual = os.path.normpath(f'{__file__}/../../tests/test_4699.png')
+    with pymupdf.open(path) as document:
+        page = document[0]
+        pixmap = page.get_pixmap()
+        pixmap.save(path_png_actual)
+    print(f'Have saved to {path_png_actual=}.')
+    rms = gentle_compare.pixmaps_rms(path_png_expected, pixmap)
+    print(f'test_4699(): {rms=}')
+    if pymupdf.mupdf_version_tuple >= (1, 27):
+        assert rms == 0
+    else:
+        wt = pymupdf.TOOLS.mupdf_warnings()
+        assert 'syntax error: cannot find ExtGState resource' in wt
+        assert rms > 20
--- a/tests/test_pylint.py	Mon Sep 15 11:43:07 2025 +0200
+++ b/tests/test_pylint.py	Sat Oct 11 11:19:58 2025 +0200
@@ -7,6 +7,10 @@
 
 def test_pylint():
     
+    if os.environ.get('PYODIDE_ROOT'):
+        print('test_pylint(): not running on Pyodide - cannot run child processes.')
+        return
+    
     if not hasattr(pymupdf, 'mupdf'):
         print(f'test_pylint(): Not running with classic implementation.')
         return
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test_release.py	Sat Oct 11 11:19:58 2025 +0200
@@ -0,0 +1,84 @@
+import pymupdf
+
+import os
+import re
+import sys
+
+
+g_root_abs = os.path.normpath(f'{__file__}/../../')
+
+sys.path.insert(0, g_root_abs)
+try:
+    import pipcl
+    import setup
+finally:
+    del sys.path[0]
+
+g_root = pipcl.relpath(g_root_abs)
+
+
+def _file_line(path, text, re_match, offset=+2):
+    '''
+    Returns <file>:<line> for location of regex match.
+    
+    path:
+        filename.
+    text:
+        Contents of <filename>.
+    re_match:
+        A re.Match.
+    offset:
+        Added to line number of start of <re_match>. Default offset=2 is
+        because callers usually grep for leading newline, and line numbers are
+        generally 1-based.
+    '''
+    text_before = text[:re_match.start()]
+    line = text_before.count('\n') + offset
+    return f'{path}:{line}'
+
+
+def test_release_versions():
+    '''
+    PyMuPDF and default MuPDF must have same major.minor version.
+    '''
+    version_p_tuple = [int(i) for i in setup.version_p.split('.')]
+    version_mupdf_tuple = [int(i) for i in setup.version_mupdf.split('.')]
+    assert version_p_tuple[:2] == version_mupdf_tuple[:2], \
+            f'PyMuPDF and MuPDF major.minor versions do not match. {setup.version_p=} {setup.version_mupdf=}.'
+
+
+def test_release_bug_template():
+    '''
+    Bug report template must list current PyMuPDF version.
+    '''
+    p = f'{g_root}/.github/ISSUE_TEMPLATE/bug_report.yml'
+    expected = f'\n        - {setup.version_p}\n'
+    with open(p) as f:
+        text = f.read()
+    assert expected in text, f'{p}:1: Failed to find line for {setup.version_p=}, {expected!r}.'
+
+
+def test_release_changelog_version():
+    '''
+    In changes.txt, first item must match setup.version_p.
+    '''
+    p = f'{g_root}/changes.txt'
+    with open(p) as f:
+        text = f.read()
+    m = re.search(f'\n[*][*]Changes in version ([0-9.]+)[*][*]\n', text)
+    assert m, f'Cannot parse {p}.'
+    assert m[1] == setup.version_p, \
+            f'{_file_line(p, text, m)}: Cannot find {setup.version_p=} in first changelog item: {m[0].strip()!r}.'
+    
+
+def test_release_changelog_mupdf_version():
+    '''
+    In changes.txt, first mentioned of MuPDF must match setup.version_mupdf.
+    '''
+    p = f'{g_root}/changes.txt'
+    with open(p) as f:
+        text = f.read()
+    m = re.search(f'\n[*] Use MuPDF-([0-9.]+)[.]\n', text)
+    assert m, f'Cannot parse {p}.'
+    assert m[1] == setup.version_mupdf, \
+            f'{_file_line(p, text, m)}: First mentioned MuPDF version does not match {setup.version_mupdf=}: {m[0].strip()!r}.'
--- a/tests/test_tables.py	Mon Sep 15 11:43:07 2025 +0200
+++ b/tests/test_tables.py	Sat Oct 11 11:19:58 2025 +0200
@@ -184,7 +184,12 @@
     ), f"{pymupdf.TOOLS.set_small_glyph_heights()=}"
 
     wt = pymupdf.TOOLS.mupdf_warnings()
-    if pymupdf.mupdf_version_tuple >= (1, 26, 0):
+    if pymupdf.mupdf_version_tuple >= (1, 26, 8):
+        assert (
+            wt
+            == "bogus font ascent/descent values (3117 / -2463)\n... repeated 2 times...\nActualtext with no position. Text may be lost or mispositioned.\n... repeated 96 times..."
+        )
+    elif pymupdf.mupdf_version_tuple >= (1, 26, 0):
         assert (
             wt
             == "bogus font ascent/descent values (3117 / -2463)\n... repeated 2 times..."
--- a/tests/test_tesseract.py	Mon Sep 15 11:43:07 2025 +0200
+++ b/tests/test_tesseract.py	Sat Oct 11 11:19:58 2025 +0200
@@ -24,14 +24,18 @@
             tail = 'OCR initialisation failed'
         else:
             tail = 'Tesseract language initialisation failed'
-        e_expected = f'code=3: {tail}'
-        if platform.system() == 'OpenBSD':
-            # 2023-12-12: For some reason the SWIG catch code only catches
-            # the exception as FzErrorBase.
-            e_expected_type = pymupdf.mupdf.FzErrorBase
-            print(f'OpenBSD workaround - expecting FzErrorBase, not FzErrorLibrary.')
+        if os.environ.get('PYODIDE_ROOT'):
+            e_expected = 'code=6: No OCR support in this build'
+            e_expected_type = pymupdf.mupdf.FzErrorUnsupported
         else:
-            e_expected_type = pymupdf.mupdf.FzErrorLibrary
+            e_expected = f'code=3: {tail}'
+            if platform.system() == 'OpenBSD':
+                # 2023-12-12: For some reason the SWIG catch code only catches
+                # the exception as FzErrorBase.
+                e_expected_type = pymupdf.mupdf.FzErrorBase
+                print(f'OpenBSD workaround - expecting FzErrorBase, not FzErrorLibrary.')
+            else:
+                e_expected_type = pymupdf.mupdf.FzErrorLibrary
     else:
         # classic.
         e_expected = 'OCR initialisation failed'
@@ -71,6 +75,10 @@
     #
     # Note that Tesseract seems to output its own diagnostics.
     #
+    if os.environ.get('PYODIDE_ROOT'):
+        print('test_3842b(): not running on Pyodide - cannot run child processes.')
+        return
+        
     path = os.path.normpath(f'{__file__}/../../tests/resources/test_3842.pdf')
     with pymupdf.open(path) as document:
         page = document[6]
@@ -91,6 +99,10 @@
 
 
 def test_3842():
+    if os.environ.get('PYODIDE_ROOT'):
+        print('test_3842(): not running on Pyodide - cannot run child processes.')
+        return
+        
     path = os.path.normpath(f'{__file__}/../../tests/resources/test_3842.pdf')
     with pymupdf.open(path) as document:
         page = document[6]
--- a/tests/test_textbox.py	Mon Sep 15 11:43:07 2025 +0200
+++ b/tests/test_textbox.py	Sat Oct 11 11:19:58 2025 +0200
@@ -7,6 +7,11 @@
 """
 import pymupdf
 
+import gentle_compare
+
+import os
+import textwrap
+
 # codespell:ignore-begin
 text = """Der Kleine Schwertwal (Pseudorca crassidens), auch bekannt als Unechter oder Schwarzer Schwertwal, ist eine Art der Delfine (Delphinidae) und der einzige rezente Vertreter der Gattung Pseudorca.
 
@@ -182,7 +187,9 @@
         assert spare_height < 0
         assert scale == 1
         spare_height, scale = page.insert_htmlbox(rect, text, rotate=rot, scale_low=0)
-        assert spare_height == 0
+        page.draw_rect(rect, (1, 0, 0))
+        doc.save(os.path.normpath(f'{__file__}/../../tests/test_htmlbox1.pdf'))
+        assert abs(spare_height - 3.8507) < 0.001
         assert 0 < scale < 1
         page = doc.reload_page(page)
         link = page.get_links()[0]  # extracts the links on the page
@@ -286,3 +293,77 @@
         text = '111111111'
         print(f'Calling writer.fill_textbox().', flush=1)
         writer.fill_textbox(rect=pymupdf.Rect(0, 0, 100, 20), pos=(80, 0), text=text, fontsize=8)
+
+
+def test_4613():
+    print()
+    text = 3 * 'abcdefghijklmnopqrstuvwxyz\nABCDEFGHIJKLMNOPQRSTUVWXYZ\n'
+    story = pymupdf.Story(text)
+    rect = pymupdf.Rect(10, 10, 100, 100)
+    
+    # Test default operation where we get additional scaling down because of
+    # the long words in our text.
+    print(f'test_4613(): ### Testing default operation.')
+    with pymupdf.open() as doc:
+        page = doc.new_page()
+        spare_height, scale = page.insert_htmlbox(rect, story)
+        print(f'test_4613(): {spare_height=} {scale=}')
+        # The additional down-scaling from the long word widths results in
+        # spare vertical space.
+        page.draw_rect(rect, (1, 0, 0))
+        path = os.path.normpath(f'{__file__}/../../tests/test_4613.pdf')
+        doc.save(path)
+
+        path_pixmap = os.path.normpath(f'{__file__}/../../tests/test_4613.png')
+        path_pixmap_expected = os.path.normpath(f'{__file__}/../../tests/resources/test_4613.png')
+        pixmap = page.get_pixmap(dpi=300)
+        pixmap.save(path_pixmap)
+        
+        pixmap_diff = gentle_compare.pixmaps_diff(path_pixmap_expected, pixmap)
+        pixmap_diff.save(os.path.normpath(f'{__file__}/../../tests/test_4613-diff.png'))
+        
+        rms = gentle_compare.pixmaps_rms(pixmap, path_pixmap_expected)
+        print(f'{rms=}')
+        assert rms == 0, f'{rms=}'
+    
+        assert abs(spare_height - 45.7536) < 0.1
+        assert abs(scale - 0.4009) < 0.01
+
+        new_text = page.get_text('text', clip=rect)
+        print(f'test_4613(): new_text:')
+        print(textwrap.indent(new_text, '    '))
+        assert new_text == text
+
+    # Check with _scale_word_width=False - ignore too-wide words.
+    print(f'test_4613(): ### Testing with _scale_word_width=False.')
+    with pymupdf.open() as doc:
+        page = doc.new_page()
+        spare_height, scale = page.insert_htmlbox(rect, story, _scale_word_width=False)
+        print(f'test_4613(): _scale_word_width=False: {spare_height=} {scale=}')
+        # With _scale_word_width=False we allow long words to extend beyond the
+        # rect, so we should have spare_height == 0 and only a small amount of
+        # down-scaling.
+        assert spare_height == 0
+        assert abs(scale - 0.914) < 0.01
+        new_text = page.get_text('text', clip=rect)
+        print(f'test_4613(): new_text:')
+        print(textwrap.indent(new_text, '    '))
+        assert new_text == textwrap.dedent('''
+                abcdefghijklmno
+                ABCDEFGHIJKLM
+                abcdefghijklmno
+                ABCDEFGHIJKLM
+                abcdefghijklmno
+                ABCDEFGHIJKLM
+                ''')[1:]
+        
+
+    # Check that we get no fit if scale_low is not low enough.
+    print(f'test_4613(): ### Testing with scale_low too high to allow a fit.')
+    with pymupdf.open() as doc:
+        page = doc.new_page()
+        scale_low=0.6
+        spare_height, scale = page.insert_htmlbox(rect, story, scale_low=scale_low)
+        print(f'test_4613(): {scale_low=}: {spare_height=} {scale=}')
+        assert spare_height == -1
+        assert scale == scale_low
--- a/tests/test_textextract.py	Mon Sep 15 11:43:07 2025 +0200
+++ b/tests/test_textextract.py	Sat Oct 11 11:19:58 2025 +0200
@@ -263,6 +263,10 @@
 
 
 def test_document_text():
+    if os.environ.get('PYODIDE_ROOT'):
+        print('test_document_text(): not running on Pyodide - multiprocessing not available.')
+        return
+        
     import platform
     import time
     
@@ -310,6 +314,9 @@
 
 
 def test_4524():
+    if os.environ.get('PYODIDE_ROOT'):
+        print('test_4524(): not running on Pyodide - multiprocessing not available.')
+        return
     path = os.path.abspath(f'{__file__}/../../tests/resources/mupdf_explored.pdf')
     print('')
     document = pymupdf.Document(path)
@@ -331,6 +338,11 @@
             for line in text.split('\n'):
                 print(f'    {line!r}')
             print('='*40)
+    wt = pymupdf.TOOLS.mupdf_warnings()
+    if pymupdf.mupdf_version_tuple < (1, 26, 8):
+        assert not wt
+    else:
+        assert wt == 'Actualtext with no position. Text may be lost or mispositioned.\n... repeated 2 times...'
 
 
 def test_3687():
@@ -375,12 +387,14 @@
     assert texts1 == texts0
 
     wt = pymupdf.TOOLS.mupdf_warnings()
-    if pymupdf.mupdf_version_tuple < (1, 27):
-        assert wt == 'Actualtext with no position. Text may be lost or mispositioned.\n... repeated 434 times...'
-    else:
+    if pymupdf.mupdf_version_tuple >= (1, 27):
         expected = 'format error: No common ancestor in structure tree\nstructure tree broken, assume tree is missing'
         expected = '\n'.join([expected] * 56)
         assert wt == expected
+    elif pymupdf.mupdf_version_tuple >= (1, 26, 8):
+        assert wt == 'Actualtext with no position. Text may be lost or mispositioned.\n... repeated 7684 times...'
+    else:
+        assert wt == 'Actualtext with no position. Text may be lost or mispositioned.\n... repeated 434 times...'
 
 def test_3650():
     path = os.path.normpath(f'{__file__}/../../tests/resources/test_3650.pdf')
@@ -878,6 +892,9 @@
     
     # This output is different from expected_1_23_5.
     expected_mupdf_1_26_1 = b'JOB No.: Shipper (complete name and address) \xe5\x8f\x91\xe8\xb4\xa7\xe4\xba\xba(\xe5\x90\x8d\xe7\xa7\xb0\xe5\x8f\x8a\xe5\x9c\xb0\xe5\x9d\x80)  Tel:                                  Fax: \n \nS/O No. \xe6\x89\x98\xe8\xbf\x90\xe5\x8d\x95\xe5\x8f\xb7\xe7\xa0\x81     \nSINORICH TRANSPORT LIMITED \nSHIPPING ORDER \n\xe6\x89\x98\xe8\xbf\x90\xe5\x8d\x95 \n \xe5\xb8\x82\xe5\x9c\xba\xe9\x83\xa8: \n88570009 \n88577019 \n88'.decode()
+
+    # This output is different from either of the two expected strings.
+    expected_mupdf_1_27_0 = b'JOB No.: \n \nS/O No. \xe6\x89\x98\xe8\xbf\x90\xe5\x8d\x95\xe5\x8f\xb7\xe7\xa0\x81   \nSINORICH TRANSPORT LIMITED \nSHIPPING ORDER \n\xe6\x89\x98\xe8\xbf\x90\xe5\x8d\x95 \n \xe5\xb8\x82\xe5\x9c\xba\xe9\x83\xa8: \n88570009 \n88577019 \n88572702 \n \xe6\x93\x8d\xe4\xbd\x9c\xe9\x83\xa8: \n88570008 \n88570004 \n \xe6\x96\x87\xe4\xbb\xb6\xe9\x83\xa8: \n88570003\n \nNotify Party(complete name and address, '.decode()
     
     print(f'expected_1_23_5\n{textwrap.indent(expected_1_23_5, "    ")}')
     print(f'expected_mupdf_1_26_1\n{textwrap.indent(expected_mupdf_1_26_1, "    ")}')
@@ -887,10 +904,16 @@
     print(f'{text=}')
     print(f'{text.encode()=}')
     
-    if pymupdf.mupdf_version_tuple >= (1, 26, 1):
+    wt = pymupdf.TOOLS.mupdf_warnings()
+    if pymupdf.mupdf_version_tuple >= (1, 26, 8):
+        assert text == expected_mupdf_1_27_0
+        assert wt == 'Actualtext with no position. Text may be lost or mispositioned.\n... repeated 120 times...'
+    elif pymupdf.mupdf_version_tuple >= (1, 26, 1):
         assert text == expected_mupdf_1_26_1
+        assert not wt
     else:
         print(f'No expected output for {pymupdf.mupdf_version_tuple=}')
+        assert not wt
 
 
 def test_4503():