Mercurial > hgrepos > Python2 > PyMuPDF

diff src/utils.py @ 41:71bcc18e306f
MERGE: New upstream PyMuPDF v1.26.5 including MuPDF v1.26.10 BUGS: Needs some additional changes yet. Not yet tested.
author: Franz Glasner <fzglas.hg@dom66.de>
date: Sat, 11 Oct 2025 15:24:40 +0200
parents: a6bc019ac0b2
--- a/src/utils.py	Tue Sep 23 10:27:15 2025 +0200
+++ b/src/utils.py	Sat Oct 11 15:24:40 2025 +0200
@@ -6,9 +6,7 @@
 # lightweight PDF, XPS, and E-book viewer, renderer and toolkit which is
 # maintained and developed by Artifex Software, Inc. https://artifex.com.
 # ------------------------------------------------------------------------
-import io
 import math
-import os
 import typing
 import weakref
 
@@ -51,428 +49,6 @@
 """
 
 
-def write_text(
-        page: pymupdf.Page,
-        rect=None,
-        writers=None,
-        overlay=True,
-        color=None,
-        opacity=None,
-        keep_proportion=True,
-        rotate=0,
-        oc=0,
-        ) -> None:
-    """Write the text of one or more pymupdf.TextWriter objects.
-
-    Args:
-        rect: target rectangle. If None, the union of the text writers is used.
-        writers: one or more pymupdf.TextWriter objects.
-        overlay: put in foreground or background.
-        keep_proportion: maintain aspect ratio of rectangle sides.
-        rotate: arbitrary rotation angle.
-        oc: the xref of an optional content object
-    """
-    assert isinstance(page, pymupdf.Page)
-    if not writers:
-        raise ValueError("need at least one pymupdf.TextWriter")
-    if type(writers) is pymupdf.TextWriter:
-        if rotate == 0 and rect is None:
-            writers.write_text(page, opacity=opacity, color=color, overlay=overlay)
-            return None
-        else:
-            writers = (writers,)
-    clip = writers[0].text_rect
-    textdoc = pymupdf.Document()
-    tpage = textdoc.new_page(width=page.rect.width, height=page.rect.height)
-    for writer in writers:
-        clip |= writer.text_rect
-        writer.write_text(tpage, opacity=opacity, color=color)
-    if rect is None:
-        rect = clip
-    page.show_pdf_page(
-        rect,
-        textdoc,
-        0,
-        overlay=overlay,
-        keep_proportion=keep_proportion,
-        rotate=rotate,
-        clip=clip,
-        oc=oc,
-    )
-    textdoc = None
-    tpage = None
-
-
-def show_pdf_page(
-        page,
-        rect,
-        docsrc,
-        pno=0,
-        keep_proportion=True,
-        overlay=True,
-        oc=0,
-        rotate=0,
-        clip=None,
-        ) -> int:
-    """Show page number 'pno' of PDF 'docsrc' in rectangle 'rect'.
-
-    Args:
-        rect: (rect-like) where to place the source image
-        docsrc: (document) source PDF
-        pno: (int) source page number
-        keep_proportion: (bool) do not change width-height-ratio
-        overlay: (bool) put in foreground
-        oc: (xref) make visibility dependent on this OCG / OCMD (which must be defined in the target PDF)
-        rotate: (int) degrees (multiple of 90)
-        clip: (rect-like) part of source page rectangle
-    Returns:
-        xref of inserted object (for reuse)
-    """
-    def calc_matrix(sr, tr, keep=True, rotate=0):
-        """Calculate transformation matrix from source to target rect.
-
-        Notes:
-            The product of four matrices in this sequence: (1) translate correct
-            source corner to origin, (2) rotate, (3) scale, (4) translate to
-            target's top-left corner.
-        Args:
-            sr: source rect in PDF (!) coordinate system
-            tr: target rect in PDF coordinate system
-            keep: whether to keep source ratio of width to height
-            rotate: rotation angle in degrees
-        Returns:
-            Transformation matrix.
-        """
-        # calc center point of source rect
-        smp = (sr.tl + sr.br) / 2.0
-        # calc center point of target rect
-        tmp = (tr.tl + tr.br) / 2.0
-
-        # m moves to (0, 0), then rotates
-        m = pymupdf.Matrix(1, 0, 0, 1, -smp.x, -smp.y) * pymupdf.Matrix(rotate)
-
-        sr1 = sr * m  # resulting source rect to calculate scale factors
-
-        fw = tr.width / sr1.width  # scale the width
-        fh = tr.height / sr1.height  # scale the height
-        if keep:
-            fw = fh = min(fw, fh)  # take min if keeping aspect ratio
-
-        m *= pymupdf.Matrix(fw, fh)  # concat scale matrix
-        m *= pymupdf.Matrix(1, 0, 0, 1, tmp.x, tmp.y)  # concat move to target center
-        return pymupdf.JM_TUPLE(m)
-
-    pymupdf.CheckParent(page)
-    doc = page.parent
-
-    if not doc.is_pdf or not docsrc.is_pdf:
-        raise ValueError("is no PDF")
-
-    if rect.is_empty or rect.is_infinite:
-        raise ValueError("rect must be finite and not empty")
-
-    while pno < 0:  # support negative page numbers
-        pno += docsrc.page_count
-    src_page = docsrc[pno]  # load source page
-
-    tar_rect = rect * ~page.transformation_matrix  # target rect in PDF coordinates
-
-    src_rect = src_page.rect if not clip else src_page.rect & clip  # source rect
-    if src_rect.is_empty or src_rect.is_infinite:
-        raise ValueError("clip must be finite and not empty")
-    src_rect = src_rect * ~src_page.transformation_matrix  # ... in PDF coord
-
-    matrix = calc_matrix(src_rect, tar_rect, keep=keep_proportion, rotate=rotate)
-
-    # list of existing /Form /XObjects
-    ilst = [i[1] for i in doc.get_page_xobjects(page.number)]
-    ilst += [i[7] for i in doc.get_page_images(page.number)]
-    ilst += [i[4] for i in doc.get_page_fonts(page.number)]
-
-    # create a name not in that list
-    n = "fzFrm"
-    i = 0
-    _imgname = n + "0"
-    while _imgname in ilst:
-        i += 1
-        _imgname = n + str(i)
-
-    isrc = docsrc._graft_id  # used as key for graftmaps
-    if doc._graft_id == isrc:
-        raise ValueError("source document must not equal target")
-
-    # retrieve / make pymupdf.Graftmap for source PDF
-    gmap = doc.Graftmaps.get(isrc, None)
-    if gmap is None:
-        gmap = pymupdf.Graftmap(doc)
-        doc.Graftmaps[isrc] = gmap
-
-    # take note of generated xref for automatic reuse
-    pno_id = (isrc, pno)  # id of docsrc[pno]
-    xref = doc.ShownPages.get(pno_id, 0)
-
-    if overlay:
-        page.wrap_contents()  # ensure a balanced graphics state
-    xref = page._show_pdf_page(
-        src_page,
-        overlay=overlay,
-        matrix=matrix,
-        xref=xref,
-        oc=oc,
-        clip=src_rect,
-        graftmap=gmap,
-        _imgname=_imgname,
-    )
-    doc.ShownPages[pno_id] = xref
-
-    return xref
-
-
-def replace_image(page: pymupdf.Page, xref: int, *, filename=None, pixmap=None, stream=None):
-    """Replace the image referred to by xref.
-
-    Replace the image by changing the object definition stored under xref. This
-    will leave the pages appearance instructions intact, so the new image is
-    being displayed with the same bbox, rotation etc.
-    By providing a small fully transparent image, an effect as if the image had
-    been deleted can be achieved.
-    A typical use may include replacing large images by a smaller version,
-    e.g. with a lower resolution or graylevel instead of colored.
-
-    Args:
-        xref: the xref of the image to replace.
-        filename, pixmap, stream: exactly one of these must be provided. The
-            meaning being the same as in Page.insert_image.
-    """
-    doc = page.parent  # the owning document
-    if not doc.xref_is_image(xref):
-        raise ValueError("xref not an image")  # insert new image anywhere in page
-    if bool(filename) + bool(stream) + bool(pixmap) != 1:
-        raise ValueError("Exactly one of filename/stream/pixmap must be given")
-    new_xref = page.insert_image(
-        page.rect, filename=filename, stream=stream, pixmap=pixmap
-    )
-    doc.xref_copy(new_xref, xref)  # copy over new to old
-    last_contents_xref = page.get_contents()[-1]
-    # new image insertion has created a new /Contents source,
-    # which we will set to spaces now
-    doc.update_stream(last_contents_xref, b" ")
-    page._image_info = None  # clear cache of extracted image information
-
-
-def delete_image(page: pymupdf.Page, xref: int):
-    """Delete the image referred to by xef.
-
-    Actually replaces by a small transparent Pixmap using method Page.replace_image.
-
-    Args:
-        xref: xref of the image to delete.
-    """
-    # make a small 100% transparent pixmap (of just any dimension)
-    pix = pymupdf.Pixmap(pymupdf.csGRAY, (0, 0, 1, 1), 1)
-    pix.clear_with()  # clear all samples bytes to 0x00
-    page.replace_image(xref, pixmap=pix)
-
-
-def insert_image(
-        page,
-        rect,
-        *,
-        alpha=-1,
-        filename=None,
-        height=0,
-        keep_proportion=True,
-        mask=None,
-        oc=0,
-        overlay=True,
-        pixmap=None,
-        rotate=0,
-        stream=None,
-        width=0,
-        xref=0,
-        ):
-    """Insert an image for display in a rectangle.
-
-    Args:
-        rect: (rect_like) position of image on the page.
-        alpha: (int, optional) set to 0 if image has no transparency.
-        filename: (str, Path, file object) image filename.
-        height: (int)
-        keep_proportion: (bool) keep width / height ratio (default).
-        mask: (bytes, optional) image consisting of alpha values to use.
-        oc: (int) xref of OCG or OCMD to declare as Optional Content.
-        overlay: (bool) put in foreground (default) or background.
-        pixmap: (pymupdf.Pixmap) use this as image.
-        rotate: (int) rotate by 0, 90, 180 or 270 degrees.
-        stream: (bytes) use this as image.
-        width: (int)
-        xref: (int) use this as image.
-
-    'page' and 'rect' are positional, all other parameters are keywords.
-
-    If 'xref' is given, that image is used. Other input options are ignored.
-    Else, exactly one of pixmap, stream or filename must be given.
-
-    'alpha=0' for non-transparent images improves performance significantly.
-    Affects stream and filename only.
-
-    Optimum transparent insertions are possible by using filename / stream in
-    conjunction with a 'mask' image of alpha values.
-
-    Returns:
-        xref (int) of inserted image. Re-use as argument for multiple insertions.
-    """
-    pymupdf.CheckParent(page)
-    doc = page.parent
-    if not doc.is_pdf:
-        raise ValueError("is no PDF")
-
-    if xref == 0 and (bool(filename) + bool(stream) + bool(pixmap) != 1):
-        raise ValueError("xref=0 needs exactly one of filename, pixmap, stream")
-
-    if filename:
-        if type(filename) is str:
-            pass
-        elif hasattr(filename, "absolute"):
-            filename = str(filename)
-        elif hasattr(filename, "name"):
-            filename = filename.name
-        else:
-            raise ValueError("bad filename")
-
-    if filename and not os.path.exists(filename):
-        raise FileNotFoundError("No such file: '%s'" % filename)
-    elif stream and type(stream) not in (bytes, bytearray, io.BytesIO):
-        raise ValueError("stream must be bytes-like / BytesIO")
-    elif pixmap and type(pixmap) is not pymupdf.Pixmap:
-        raise ValueError("pixmap must be a pymupdf.Pixmap")
-    if mask and not (stream or filename):
-        raise ValueError("mask requires stream or filename")
-    if mask and type(mask) not in (bytes, bytearray, io.BytesIO):
-        raise ValueError("mask must be bytes-like / BytesIO")
-    while rotate < 0:
-        rotate += 360
-    while rotate >= 360:
-        rotate -= 360
-    if rotate not in (0, 90, 180, 270):
-        raise ValueError("bad rotate value")
-
-    r = pymupdf.Rect(rect)
-    if r.is_empty or r.is_infinite:
-        raise ValueError("rect must be finite and not empty")
-    clip = r * ~page.transformation_matrix
-
-    # Create a unique image reference name.
-    ilst = [i[7] for i in doc.get_page_images(page.number)]
-    ilst += [i[1] for i in doc.get_page_xobjects(page.number)]
-    ilst += [i[4] for i in doc.get_page_fonts(page.number)]
-    n = "fzImg"  # 'pymupdf image'
-    i = 0
-    _imgname = n + "0"  # first name candidate
-    while _imgname in ilst:
-        i += 1
-        _imgname = n + str(i)  # try new name
-
-    if overlay:
-        page.wrap_contents()  # ensure a balanced graphics state
-    digests = doc.InsertedImages
-    xref, digests = page._insert_image(
-        filename=filename,
-        pixmap=pixmap,
-        stream=stream,
-        imask=mask,
-        clip=clip,
-        overlay=overlay,
-        oc=oc,
-        xref=xref,
-        rotate=rotate,
-        keep_proportion=keep_proportion,
-        width=width,
-        height=height,
-        alpha=alpha,
-        _imgname=_imgname,
-        digests=digests,
-    )
-    if digests is not None:
-        doc.InsertedImages = digests
-
-    return xref
-
-
-def search_for(
-        page,
-        text,
-        *,
-        clip=None,
-        quads=False,
-        flags=pymupdf.TEXT_DEHYPHENATE
-            | pymupdf.TEXT_PRESERVE_WHITESPACE
-            | pymupdf.TEXT_PRESERVE_LIGATURES
-            | pymupdf.TEXT_MEDIABOX_CLIP
-            ,
-        textpage=None,
-        ) -> list:
-    """Search for a string on a page.
-
-    Args:
-        text: string to be searched for
-        clip: restrict search to this rectangle
-        quads: (bool) return quads instead of rectangles
-        flags: bit switches, default: join hyphened words
-        textpage: a pre-created pymupdf.TextPage
-    Returns:
-        a list of rectangles or quads, each containing one occurrence.
-    """
-    if clip is not None:
-        clip = pymupdf.Rect(clip)
-
-    pymupdf.CheckParent(page)
-    tp = textpage
-    if tp is None:
-        tp = page.get_textpage(clip=clip, flags=flags)  # create pymupdf.TextPage
-    elif getattr(tp, "parent") != page:
-        raise ValueError("not a textpage of this page")
-    rlist = tp.search(text, quads=quads)
-    if textpage is None:
-        del tp
-    return rlist
-
-
-def search_page_for(
-    doc: pymupdf.Document,
-    pno: int,
-    text: str,
-    quads: bool = False,
-    clip: rect_like = None,
-    flags: int = pymupdf.TEXT_DEHYPHENATE
-            | pymupdf.TEXT_PRESERVE_LIGATURES
-            | pymupdf.TEXT_PRESERVE_WHITESPACE
-            | pymupdf.TEXT_MEDIABOX_CLIP
-            ,
-    textpage: pymupdf.TextPage = None,
-) -> list:
-    """Search for a string on a page.
-
-    Args:
-        pno: page number
-        text: string to be searched for
-        clip: restrict search to this rectangle
-        quads: (bool) return quads instead of rectangles
-        flags: bit switches, default: join hyphened words
-        textpage: reuse a prepared textpage
-    Returns:
-        a list of rectangles or quads, each containing an occurrence.
-    """
-
-    return doc[pno].search_for(
-        text,
-        quads=quads,
-        clip=clip,
-        flags=flags,
-        textpage=textpage,
-    )
-
-
 def get_text_blocks(
     page: pymupdf.Page,
     clip: rect_like = None,
@@ -822,81 +398,6 @@
     return tpage
 
 
-def get_image_info(page: pymupdf.Page, hashes: bool = False, xrefs: bool = False) -> list:
-    """Extract image information only from a pymupdf.TextPage.
-
-    Args:
-        hashes: (bool) include MD5 hash for each image.
-        xrefs: (bool) try to find the xref for each image. Sets hashes to true.
-    """
-    doc = page.parent
-    if xrefs and doc.is_pdf:
-        hashes = True
-    if not doc.is_pdf:
-        xrefs = False
-    imginfo = getattr(page, "_image_info", None)
-    if imginfo and not xrefs:
-        return imginfo
-    if not imginfo:
-        tp = page.get_textpage(flags=pymupdf.TEXT_PRESERVE_IMAGES)
-        imginfo = tp.extractIMGINFO(hashes=hashes)
-        del tp
-        if hashes:
-            page._image_info = imginfo
-    if not xrefs or not doc.is_pdf:
-        return imginfo
-    imglist = page.get_images()
-    digests = {}
-    for item in imglist:
-        xref = item[0]
-        pix = pymupdf.Pixmap(doc, xref)
-        digests[pix.digest] = xref
-        del pix
-    for i in range(len(imginfo)):
-        item = imginfo[i]
-        xref = digests.get(item["digest"], 0)
-        item["xref"] = xref
-        imginfo[i] = item
-    return imginfo
-
-
-def get_image_rects(page: pymupdf.Page, name, transform=False) -> list:
-    """Return list of image positions on a page.
-
-    Args:
-        name: (str, list, int) image identification. May be reference name, an
-              item of the page's image list or an xref.
-        transform: (bool) whether to also return the transformation matrix.
-    Returns:
-        A list of pymupdf.Rect objects or tuples of (pymupdf.Rect, pymupdf.Matrix)
-        for all image locations on the page.
-    """
-    if type(name) in (list, tuple):
-        xref = name[0]
-    elif type(name) is int:
-        xref = name
-    else:
-        imglist = [i for i in page.get_images() if i[7] == name]
-        if imglist == []:
-            raise ValueError("bad image name")
-        elif len(imglist) != 1:
-            raise ValueError("multiple image names found")
-        xref = imglist[0][0]
-    pix = pymupdf.Pixmap(page.parent, xref)  # make pixmap of the image to compute MD5
-    digest = pix.digest
-    del pix
-    infos = page.get_image_info(hashes=True)
-    if not transform:
-        bboxes = [pymupdf.Rect(im["bbox"]) for im in infos if im["digest"] == digest]
-    else:
-        bboxes = [
-            (pymupdf.Rect(im["bbox"]), pymupdf.Matrix(im["transform"]))
-            for im in infos
-            if im["digest"] == digest
-        ]
-    return bboxes
-
-
 def get_text(
     page: pymupdf.Page,
     option: str = "text",
@@ -1006,101 +507,6 @@
     return t
 
 
-def get_page_text(
-    doc: pymupdf.Document,
-    pno: int,
-    option: str = "text",
-    clip: rect_like = None,
-    flags: OptInt = None,
-    textpage: pymupdf.TextPage = None,
-    sort: bool = False,
-) -> typing.Any:
-    """Extract a document page's text by page number.
-
-    Notes:
-        Convenience function calling page.get_text().
-    Args:
-        pno: page number
-        option: (str) text, words, blocks, html, dict, json, rawdict, xhtml or xml.
-    Returns:
-        output from page.TextPage().
-    """
-    return doc[pno].get_text(option, clip=clip, flags=flags, sort=sort)
-
-def get_pixmap(
-        page: pymupdf.Page,
-        *,
-        matrix: matrix_like=pymupdf.Identity,
-        dpi=None,
-        colorspace: pymupdf.Colorspace=pymupdf.csRGB,
-        clip: rect_like=None,
-        alpha: bool=False,
-        annots: bool=True,
-        ) -> pymupdf.Pixmap:
-    """Create pixmap of page.
-
-    Keyword args:
-        matrix: Matrix for transformation (default: Identity).
-        dpi: desired dots per inch. If given, matrix is ignored.
-        colorspace: (str/Colorspace) cmyk, rgb, gray - case ignored, default csRGB.
-        clip: (irect-like) restrict rendering to this area.
-        alpha: (bool) whether to include alpha channel
-        annots: (bool) whether to also render annotations
-    """
-    if dpi:
-        zoom = dpi / 72
-        matrix = pymupdf.Matrix(zoom, zoom)
-
-    if type(colorspace) is str:
-        if colorspace.upper() == "GRAY":
-            colorspace = pymupdf.csGRAY
-        elif colorspace.upper() == "CMYK":
-            colorspace = pymupdf.csCMYK
-        else:
-            colorspace = pymupdf.csRGB
-    if colorspace.n not in (1, 3, 4):
-        raise ValueError("unsupported colorspace")
-
-    dl = page.get_displaylist(annots=annots)
-    pix = dl.get_pixmap(matrix=matrix, colorspace=colorspace, alpha=alpha, clip=clip)
-    dl = None
-    if dpi:
-        pix.set_dpi(dpi, dpi)
-    return pix
-
-
-def get_page_pixmap(
-    doc: pymupdf.Document,
-    pno: int,
-    *,
-    matrix: matrix_like = pymupdf.Identity,
-    dpi=None,
-    colorspace: pymupdf.Colorspace = pymupdf.csRGB,
-    clip: rect_like = None,
-    alpha: bool = False,
-    annots: bool = True,
-) -> pymupdf.Pixmap:
-    """Create pixmap of document page by page number.
-
-    Notes:
-        Convenience function calling page.get_pixmap.
-    Args:
-        pno: (int) page number
-        matrix: pymupdf.Matrix for transformation (default: pymupdf.Identity).
-        colorspace: (str,pymupdf.Colorspace) rgb, rgb, gray - case ignored, default csRGB.
-        clip: (irect-like) restrict rendering to this area.
-        alpha: (bool) include alpha channel
-        annots: (bool) also render annotations
-    """
-    return doc[pno].get_pixmap(
-            matrix=matrix,
-            dpi=dpi, colorspace=colorspace,
-            clip=clip,
-            alpha=alpha,
-            annots=annots
-            )
-
-
 def getLinkDict(ln, document=None) -> dict:
     if isinstance(ln, pymupdf.Outline):
         dest = ln.destination(document)
@@ -1160,280 +566,6 @@
     return nl
 
 
-def get_links(page: pymupdf.Page) -> list:
-    """Create a list of all links contained in a PDF page.
-
-    Notes:
-        see PyMuPDF ducmentation for details.
-    """
-
-    pymupdf.CheckParent(page)
-    ln = page.first_link
-    links = []
-    while ln:
-        nl = getLinkDict(ln, page.parent)
-        links.append(nl)
-        ln = ln.next
-    if links != [] and page.parent.is_pdf:
-        linkxrefs = [x for x in
-                #page.annot_xrefs()
-                pymupdf.JM_get_annot_xref_list2(page)
-                if x[1] == pymupdf.PDF_ANNOT_LINK  # pylint: disable=no-member
-                ]
-        if len(linkxrefs) == len(links):
-            for i in range(len(linkxrefs)):
-                links[i]["xref"] = linkxrefs[i][0]
-                links[i]["id"] = linkxrefs[i][2]
-    return links
-
-
-def get_toc(
-    doc: pymupdf.Document,
-    simple: bool = True,
-) -> list:
-    """Create a table of contents.
-
-    Args:
-        simple: a bool to control output. Returns a list, where each entry consists of outline level, title, page number and link destination (if simple = False). For details see PyMuPDF's documentation.
-    """
-    def recurse(olItem, liste, lvl):
-        """Recursively follow the outline item chain and record item information in a list."""
-        while olItem and olItem.this.m_internal:
-            if olItem.title:
-                title = olItem.title
-            else:
-                title = " "
-
-            if not olItem.is_external:
-                if olItem.uri:
-                    if olItem.page == -1:
-                        resolve = doc.resolve_link(olItem.uri)
-                        page = resolve[0] + 1
-                    else:
-                        page = olItem.page + 1
-                else:
-                    page = -1
-            else:
-                page = -1
-
-            if not simple:
-                link = getLinkDict(olItem, doc)
-                liste.append([lvl, title, page, link])
-            else:
-                liste.append([lvl, title, page])
-
-            if olItem.down:
-                liste = recurse(olItem.down, liste, lvl + 1)
-            olItem = olItem.next
-        return liste
-
-    # ensure document is open
-    if doc.is_closed:
-        raise ValueError("document closed")
-    doc.init_doc()
-    olItem = doc.outline
-    if not olItem:
-        return []
-    lvl = 1
-    liste = []
-    toc = recurse(olItem, liste, lvl)
-    if doc.is_pdf and not simple:
-        doc._extend_toc_items(toc)
-    return toc
-
-
-def del_toc_item(
-    doc: pymupdf.Document,
-    idx: int,
-) -> None:
-    """Delete TOC / bookmark item by index."""
-    xref = doc.get_outline_xrefs()[idx]
-    doc._remove_toc_item(xref)
-
-
-def set_toc_item(
-    doc: pymupdf.Document,
-    idx: int,
-    dest_dict: OptDict = None,
-    kind: OptInt = None,
-    pno: OptInt = None,
-    uri: OptStr = None,
-    title: OptStr = None,
-    to: point_like = None,
-    filename: OptStr = None,
-    zoom: float = 0,
-) -> None:
-    """Update TOC item by index.
-
-    It allows changing the item's title and link destination.
-
-    Args:
-        idx:
-            (int) desired index of the TOC list, as created by get_toc.
-        dest_dict:
-            (dict) destination dictionary as created by get_toc(False).
-            Outrules all other parameters. If None, the remaining parameters
-            are used to make a dest dictionary.
-        kind:
-            (int) kind of link (pymupdf.LINK_GOTO, etc.). If None, then only
-            the title will be updated. If pymupdf.LINK_NONE, the TOC item will
-            be deleted.
-        pno:
-            (int) page number (1-based like in get_toc). Required if
-            pymupdf.LINK_GOTO.
-        uri:
-            (str) the URL, required if pymupdf.LINK_URI.
-        title:
-            (str) the new title. No change if None.
-        to:
-            (point-like) destination on the target page. If omitted, (72, 36)
-            will be used as target coordinates.
-        filename:
-            (str) destination filename, required for pymupdf.LINK_GOTOR and
-            pymupdf.LINK_LAUNCH.
-        name:
-            (str) a destination name for pymupdf.LINK_NAMED.
-        zoom:
-            (float) a zoom factor for the target location (pymupdf.LINK_GOTO).
-    """
-    xref = doc.get_outline_xrefs()[idx]
-    page_xref = 0
-    if type(dest_dict) is dict:
-        if dest_dict["kind"] == pymupdf.LINK_GOTO:
-            pno = dest_dict["page"]
-            page_xref = doc.page_xref(pno)
-            page_height = doc.page_cropbox(pno).height
-            to = dest_dict.get('to', pymupdf.Point(72, 36))
-            to.y = page_height - to.y
-            dest_dict["to"] = to
-        action = getDestStr(page_xref, dest_dict)
-        if not action.startswith("/A"):
-            raise ValueError("bad bookmark dest")
-        color = dest_dict.get("color")
-        if color:
-            color = list(map(float, color))
-            if len(color) != 3 or min(color) < 0 or max(color) > 1:
-                raise ValueError("bad color value")
-        bold = dest_dict.get("bold", False)
-        italic = dest_dict.get("italic", False)
-        flags = italic + 2 * bold
-        collapse = dest_dict.get("collapse")
-        return doc._update_toc_item(
-            xref,
-            action=action[2:],
-            title=title,
-            color=color,
-            flags=flags,
-            collapse=collapse,
-        )
-
-    if kind == pymupdf.LINK_NONE:  # delete bookmark item
-        return doc.del_toc_item(idx)
-    if kind is None and title is None:  # treat as no-op
-        return None
-    if kind is None:  # only update title text
-        return doc._update_toc_item(xref, action=None, title=title)
-
-    if kind == pymupdf.LINK_GOTO:
-        if pno is None or pno not in range(1, doc.page_count + 1):
-            raise ValueError("bad page number")
-        page_xref = doc.page_xref(pno - 1)
-        page_height = doc.page_cropbox(pno - 1).height
-        if to is None:
-            to = pymupdf.Point(72, page_height - 36)
-        else:
-            to = pymupdf.Point(to)
-            to.y = page_height - to.y
-
-    ddict = {
-        "kind": kind,
-        "to": to,
-        "uri": uri,
-        "page": pno,
-        "file": filename,
-        "zoom": zoom,
-    }
-    action = getDestStr(page_xref, ddict)
-    if action == "" or not action.startswith("/A"):
-        raise ValueError("bad bookmark dest")
-
-    return doc._update_toc_item(xref, action=action[2:], title=title)
-
-
-def get_area(*args) -> float:
-    """Calculate area of rectangle.\nparameter is one of 'px' (default), 'in', 'cm', or 'mm'."""
-    rect = args[0]
-    if len(args) > 1:
-        unit = args[1]
-    else:
-        unit = "px"
-    u = {"px": (1, 1), "in": (1.0, 72.0), "cm": (2.54, 72.0), "mm": (25.4, 72.0)}
-    f = (u[unit][0] / u[unit][1]) ** 2
-    return f * rect.width * rect.height
-
-
-def set_metadata(doc: pymupdf.Document, m: dict = None) -> None:
-    """Update the PDF /Info object.
-
-    Args:
-        m: a dictionary like doc.metadata.
-    """
-    if not doc.is_pdf:
-        raise ValueError("is no PDF")
-    if doc.is_closed or doc.is_encrypted:
-        raise ValueError("document closed or encrypted")
-    if m is None:
-        m = {}
-    elif type(m) is not dict:
-        raise ValueError("bad metadata")
-    keymap = {
-        "author": "Author",
-        "producer": "Producer",
-        "creator": "Creator",
-        "title": "Title",
-        "format": None,
-        "encryption": None,
-        "creationDate": "CreationDate",
-        "modDate": "ModDate",
-        "subject": "Subject",
-        "keywords": "Keywords",
-        "trapped": "Trapped",
-    }
-    valid_keys = set(keymap.keys())
-    diff_set = set(m.keys()).difference(valid_keys)
-    if diff_set != set():
-        msg = "bad dict key(s): %s" % diff_set
-        raise ValueError(msg)
-
-    t, temp = doc.xref_get_key(-1, "Info")
-    if t != "xref":
-        info_xref = 0
-    else:
-        info_xref = int(temp.replace("0 R", ""))
-
-    if m == {} and info_xref == 0:  # nothing to do
-        return
-
-    if info_xref == 0:  # no prev metadata: get new xref
-        info_xref = doc.get_new_xref()
-        doc.update_object(info_xref, "<<>>")  # fill it with empty object
-        doc.xref_set_key(-1, "Info", "%i 0 R" % info_xref)
-    elif m == {}:  # remove existing metadata
-        doc.xref_set_key(-1, "Info", "null")
-        doc.init_doc()
-        return
-
-    for key, val in [(k, v) for k, v in m.items() if keymap[k] is not None]:
-        pdf_key = keymap[key]
-        if not bool(val) or val in ("none", "null"):
-            val = "null"
-        else:
-            val = pymupdf.get_pdf_str(val)
-        doc.xref_set_key(info_xref, pdf_key, val)
-    doc.init_doc()
-    return
-
-
 def getDestStr(xref: int, ddict: dict) -> str:
     """Calculate the PDF action string.
 
@@ -1492,647 +624,6 @@
     return ""
 
 
-def set_toc(
-    doc: pymupdf.Document,
-    toc: list,
-    collapse: int = 1,
-) -> int:
-    """Create new outline tree (table of contents, TOC).
-
-    Args:
-        toc: (list, tuple) each entry must contain level, title, page and
-            optionally top margin on the page. None or '()' remove the TOC.
-        collapse: (int) collapses entries beyond this level. Zero or None
-            shows all entries unfolded.
-    Returns:
-        the number of inserted items, or the number of removed items respectively.
-    """
-    if doc.is_closed or doc.is_encrypted:
-        raise ValueError("document closed or encrypted")
-    if not doc.is_pdf:
-        raise ValueError("is no PDF")
-    if not toc:  # remove all entries
-        return len(doc._delToC())
-
-    # validity checks --------------------------------------------------------
-    if type(toc) not in (list, tuple):
-        raise ValueError("'toc' must be list or tuple")
-    toclen = len(toc)
-    page_count = doc.page_count
-    t0 = toc[0]
-    if type(t0) not in (list, tuple):
-        raise ValueError("items must be sequences of 3 or 4 items")
-    if t0[0] != 1:
-        raise ValueError("hierarchy level of item 0 must be 1")
-    for i in list(range(toclen - 1)):
-        t1 = toc[i]
-        t2 = toc[i + 1]
-        if not -1 <= t1[2] <= page_count:
-            raise ValueError("row %i: page number out of range" % i)
-        if (type(t2) not in (list, tuple)) or len(t2) not in (3, 4):
-            raise ValueError("bad row %i" % (i + 1))
-        if (type(t2[0]) is not int) or t2[0] < 1:
-            raise ValueError("bad hierarchy level in row %i" % (i + 1))
-        if t2[0] > t1[0] + 1:
-            raise ValueError("bad hierarchy level in row %i" % (i + 1))
-    # no formal errors in toc --------------------------------------------------
-
-    # --------------------------------------------------------------------------
-    # make a list of xref numbers, which we can use for our TOC entries
-    # --------------------------------------------------------------------------
-    old_xrefs = doc._delToC()  # del old outlines, get their xref numbers
-
-    # prepare table of xrefs for new bookmarks
-    old_xrefs = []
-    xref = [0] + old_xrefs
-    xref[0] = doc._getOLRootNumber()  # entry zero is outline root xref number
-    if toclen > len(old_xrefs):  # too few old xrefs?
-        for i in range((toclen - len(old_xrefs))):
-            xref.append(doc.get_new_xref())  # acquire new ones
-
-    lvltab = {0: 0}  # to store last entry per hierarchy level
-
-    # ------------------------------------------------------------------------------
-    # contains new outline objects as strings - first one is the outline root
-    # ------------------------------------------------------------------------------
-    olitems = [{"count": 0, "first": -1, "last": -1, "xref": xref[0]}]
-    # ------------------------------------------------------------------------------
-    # build olitems as a list of PDF-like connected dictionaries
-    # ------------------------------------------------------------------------------
-    for i in range(toclen):
-        o = toc[i]
-        lvl = o[0]  # level
-        title = pymupdf.get_pdf_str(o[1])  # title
-        pno = min(doc.page_count - 1, max(0, o[2] - 1))  # page number
-        page_xref = doc.page_xref(pno)
-        page_height = doc.page_cropbox(pno).height
-        top = pymupdf.Point(72, page_height - 36)
-        dest_dict = {"to": top, "kind": pymupdf.LINK_GOTO}  # fall back target
-        if o[2] < 0:
-            dest_dict["kind"] = pymupdf.LINK_NONE
-        if len(o) > 3:  # some target is specified
-            if type(o[3]) in (int, float):  # convert a number to a point
-                dest_dict["to"] = pymupdf.Point(72, page_height - o[3])
-            else:  # if something else, make sure we have a dict
-                # We make a copy of o[3] to avoid modifying our caller's data.
-                dest_dict = o[3].copy() if type(o[3]) is dict else dest_dict
-                if "to" not in dest_dict:  # target point not in dict?
-                    dest_dict["to"] = top  # put default in
-                else:  # transform target to PDF coordinates
-                    page = doc[pno]
-                    point = pymupdf.Point(dest_dict["to"])
-                    point.y = page.cropbox.height - point.y
-                    point = point * page.rotation_matrix
-                    dest_dict["to"] = (point.x, point.y)
-        d = {}
-        d["first"] = -1
-        d["count"] = 0
-        d["last"] = -1
-        d["prev"] = -1
-        d["next"] = -1
-        d["dest"] = getDestStr(page_xref, dest_dict)
-        d["top"] = dest_dict["to"]
-        d["title"] = title
-        d["parent"] = lvltab[lvl - 1]
-        d["xref"] = xref[i + 1]
-        d["color"] = dest_dict.get("color")
-        d["flags"] = dest_dict.get("italic", 0) + 2 * dest_dict.get("bold", 0)
-        lvltab[lvl] = i + 1
-        parent = olitems[lvltab[lvl - 1]]  # the parent entry
-
-        if (
-            dest_dict.get("collapse") or collapse and lvl > collapse
-        ):  # suppress expansion
-            parent["count"] -= 1  # make /Count negative
-        else:
-            parent["count"] += 1  # positive /Count
-
-        if parent["first"] == -1:
-            parent["first"] = i + 1
-            parent["last"] = i + 1
-        else:
-            d["prev"] = parent["last"]
-            prev = olitems[parent["last"]]
-            prev["next"] = i + 1
-            parent["last"] = i + 1
-        olitems.append(d)
-
-    # ------------------------------------------------------------------------------
-    # now create each outline item as a string and insert it in the PDF
-    # ------------------------------------------------------------------------------
-    for i, ol in enumerate(olitems):
-        txt = "<<"
-        if ol["count"] != 0:
-            txt += "/Count %i" % ol["count"]
-        try:
-            txt += ol["dest"]
-        except Exception:
-            # Verbose in PyMuPDF/tests.
-            if g_exceptions_verbose >= 2:   pymupdf.exception_info()
-            pass
-        try:
-            if ol["first"] > -1:
-                txt += "/First %i 0 R" % xref[ol["first"]]
-        except Exception:
-            if g_exceptions_verbose >= 2:   pymupdf.exception_info()
-            pass
-        try:
-            if ol["last"] > -1:
-                txt += "/Last %i 0 R" % xref[ol["last"]]
-        except Exception:
-            if g_exceptions_verbose >= 2:   pymupdf.exception_info()
-            pass
-        try:
-            if ol["next"] > -1:
-                txt += "/Next %i 0 R" % xref[ol["next"]]
-        except Exception:
-            # Verbose in PyMuPDF/tests.
-            if g_exceptions_verbose >= 2:   pymupdf.exception_info()
-            pass
-        try:
-            if ol["parent"] > -1:
-                txt += "/Parent %i 0 R" % xref[ol["parent"]]
-        except Exception:
-            # Verbose in PyMuPDF/tests.
-            if g_exceptions_verbose >= 2:   pymupdf.exception_info()
-            pass
-        try:
-            if ol["prev"] > -1:
-                txt += "/Prev %i 0 R" % xref[ol["prev"]]
-        except Exception:
-            # Verbose in PyMuPDF/tests.
-            if g_exceptions_verbose >= 2:   pymupdf.exception_info()
-            pass
-        try:
-            txt += "/Title" + ol["title"]
-        except Exception:
-            # Verbose in PyMuPDF/tests.
-            if g_exceptions_verbose >= 2:   pymupdf.exception_info()
-            pass
-
-        if ol.get("color") and len(ol["color"]) == 3:
-            txt += f"/C[ {_format_g(tuple(ol['color']))}]"
-        if ol.get("flags", 0) > 0:
-            txt += "/F %i" % ol["flags"]
-
-        if i == 0:  # special: this is the outline root
-            txt += "/Type/Outlines"  # so add the /Type entry
-        txt += ">>"
-        doc.update_object(xref[i], txt)  # insert the PDF object
-
-    doc.init_doc()
-    return toclen
-
-
-def do_widgets(
-    tar: pymupdf.Document,
-    src: pymupdf.Document,
-    graftmap,
-    from_page: int = -1,
-    to_page: int = -1,
-    start_at: int = -1,
-    join_duplicates=0,
-) -> None:
-    """Insert widgets of copied page range into target PDF.
-
-    Parameter values **must** equal those of method insert_pdf() which
-    must have been previously executed.
-    """
-    if not src.is_form_pdf:  # nothing to do: source PDF has no fields
-        return
-
-    def clean_kid_parents(acro_fields):
-        """ Make sure all kids have correct "Parent" pointers."""
-        for i in range(acro_fields.pdf_array_len()):
-            parent = acro_fields.pdf_array_get(i)
-            kids = parent.pdf_dict_get(pymupdf.PDF_NAME("Kids"))
-            for j in range(kids.pdf_array_len()):
-                kid = kids.pdf_array_get(j)
-                kid.pdf_dict_put(pymupdf.PDF_NAME("Parent"), parent)
-
-    def join_widgets(pdf, acro_fields, xref1, xref2, name):
-        """Called for each pair of widgets having the same name.
-
-        Args:
-            pdf: target MuPDF document
-            acro_fields: object Root/AcroForm/Fields
-            xref1, xref2: widget xrefs having same names
-            name: (str) the name
-
-        Result:
-            Defined or updated widget parent that points to both widgets.
-        """
-
-        def re_target(pdf, acro_fields, xref1, kids1, xref2, kids2):
-            """Merge widget in xref2 into "Kids" list of widget xref1.
-
-            Args:
-                xref1, kids1: target widget and its "Kids" array.
-                xref2, kids2: source wwidget and its "Kids" array (may be empty).
-            """
-            # make indirect objects from widgets
-            w1_ind = mupdf.pdf_new_indirect(pdf, xref1, 0)
-            w2_ind = mupdf.pdf_new_indirect(pdf, xref2, 0)
-            # find source widget in "Fields" array
-            idx = acro_fields.pdf_array_find(w2_ind)
-            acro_fields.pdf_array_delete(idx)
-
-            if not kids2.pdf_is_array():  # source widget has no kids
-                widget = mupdf.pdf_load_object(pdf, xref2)
-
-                # delete name from widget and insert target as parent
-                widget.pdf_dict_del(pymupdf.PDF_NAME("T"))
-                widget.pdf_dict_put(pymupdf.PDF_NAME("Parent"), w1_ind)
-
-                # put in target Kids
-                kids1.pdf_array_push(w2_ind)
-            else:  # copy source kids to target kids
-                for i in range(kids2.pdf_array_len()):
-                    kid = kids2.pdf_array_get(i)
-                    kid.pdf_dict_put(pymupdf.PDF_NAME("Parent"), w1_ind)
-                    kid_ind = mupdf.pdf_new_indirect(pdf, kid.pdf_to_num(), 0)
-                    kids1.pdf_array_push(kid_ind)
-
-        def new_target(pdf, acro_fields, xref1, w1, xref2, w2, name):
-            """Make new "Parent" for two widgets with same name.
-
-            Args:
-                xref1, w1: first widget
-                xref2, w2: second widget
-                name: field name
-
-            Result:
-                Both widgets have no "Kids". We create a new object with the
-                name and a "Kids" array containing the widgets.
-                Original widgets must be removed from AcroForm/Fields.
-            """
-            # make new "Parent" object
-            new = mupdf.pdf_new_dict(pdf, 5)
-            new.pdf_dict_put_text_string(pymupdf.PDF_NAME("T"), name)
-            kids = new.pdf_dict_put_array(pymupdf.PDF_NAME("Kids"), 2)
-            new_obj = mupdf.pdf_add_object(pdf, new)
-            new_obj_xref = new_obj.pdf_to_num()
-            new_ind = mupdf.pdf_new_indirect(pdf, new_obj_xref, 0)
-
-            # copy over some required source widget properties
-            ft = w1.pdf_dict_get(pymupdf.PDF_NAME("FT"))
-            w1.pdf_dict_del(pymupdf.PDF_NAME("FT"))
-            new_obj.pdf_dict_put(pymupdf.PDF_NAME("FT"), ft)
-
-            aa = w1.pdf_dict_get(pymupdf.PDF_NAME("AA"))
-            w1.pdf_dict_del(pymupdf.PDF_NAME("AA"))
-            new_obj.pdf_dict_put(pymupdf.PDF_NAME("AA"), aa)
-
-            # remove name field, insert "Parent" field in source widgets
-            w1.pdf_dict_del(pymupdf.PDF_NAME("T"))
-            w1.pdf_dict_put(pymupdf.PDF_NAME("Parent"), new_ind)
-            w2.pdf_dict_del(pymupdf.PDF_NAME("T"))
-            w2.pdf_dict_put(pymupdf.PDF_NAME("Parent"), new_ind)
-
-            # put source widgets in "kids" array
-            ind1 = mupdf.pdf_new_indirect(pdf, xref1, 0)
-            ind2 = mupdf.pdf_new_indirect(pdf, xref2, 0)
-            kids.pdf_array_push(ind1)
-            kids.pdf_array_push(ind2)
-
-            # remove source widgets from "AcroForm/Fields"
-            idx = acro_fields.pdf_array_find(ind1)
-            acro_fields.pdf_array_delete(idx)
-            idx = acro_fields.pdf_array_find(ind2)
-            acro_fields.pdf_array_delete(idx)
-
-            acro_fields.pdf_array_push(new_ind)
-
-        w1 = mupdf.pdf_load_object(pdf, xref1)
-        w2 = mupdf.pdf_load_object(pdf, xref2)
-        kids1 = w1.pdf_dict_get(pymupdf.PDF_NAME("Kids"))
-        kids2 = w2.pdf_dict_get(pymupdf.PDF_NAME("Kids"))
-
-        # check which widget has a suitable "Kids" array
-        if kids1.pdf_is_array():
-            re_target(pdf, acro_fields, xref1, kids1, xref2, kids2)  # pylint: disable=arguments-out-of-order
-        elif kids2.pdf_is_array():
-            re_target(pdf, acro_fields, xref2, kids2, xref1, kids1)  # pylint: disable=arguments-out-of-order
-        else:
-            new_target(pdf, acro_fields, xref1, w1, xref2, w2, name)  # pylint: disable=arguments-out-of-order
-
-    def get_kids(parent, kids_list):
-        """Return xref list of leaf kids for a parent.
-
-        Call with an empty list.
-        """
-        kids = mupdf.pdf_dict_get(parent, pymupdf.PDF_NAME("Kids"))
-        if not kids.pdf_is_array():
-            return kids_list
-        for i in range(kids.pdf_array_len()):
-            kid = kids.pdf_array_get(i)
-            if mupdf.pdf_is_dict(mupdf.pdf_dict_get(kid, pymupdf.PDF_NAME("Kids"))):
-                kids_list = get_kids(kid, kids_list)
-            else:
-                kids_list.append(kid.pdf_to_num())
-        return kids_list
-
-    def kids_xrefs(widget):
-        """Get the xref of top "Parent" and the list of leaf widgets."""
-        kids_list = []
-        parent = mupdf.pdf_dict_get(widget, pymupdf.PDF_NAME("Parent"))
-        parent_xref = parent.pdf_to_num()
-        if parent_xref == 0:
-            return parent_xref, kids_list
-        kids_list = get_kids(parent, kids_list)
-        return parent_xref, kids_list
-
-    def deduplicate_names(pdf, acro_fields, join_duplicates=False):
-        """Handle any widget name duplicates caused by the merge."""
-        names = {}  # key is a widget name, value a list of widgets having it.
-
-        # extract all names and widgets in "AcroForm/Fields"
-        for i in range(mupdf.pdf_array_len(acro_fields)):
-            wobject = mupdf.pdf_array_get(acro_fields, i)
-            xref = wobject.pdf_to_num()
-
-            # extract widget name and collect widget(s) using it
-            T = mupdf.pdf_dict_get_text_string(wobject, pymupdf.PDF_NAME("T"))
-            xrefs = names.get(T, [])
-            xrefs.append(xref)
-            names[T] = xrefs
-
-        for name, xrefs in names.items():
-            if len(xrefs) < 2:
-                continue
-            xref0, xref1 = xrefs[:2]  # only exactly 2 should occur!
-            if join_duplicates:  # combine fields with equal names
-                join_widgets(pdf, acro_fields, xref0, xref1, name)
-            else:  # make field names unique
-                newname = name + f" [{xref1}]"  # append this to the name
-                wobject = mupdf.pdf_load_object(pdf, xref1)
-                wobject.pdf_dict_put_text_string(pymupdf.PDF_NAME("T"), newname)
-
-        clean_kid_parents(acro_fields)
-
-    def get_acroform(doc):
-        """Retrieve the AcroForm dictionary form a PDF."""
-        pdf = mupdf.pdf_document_from_fz_document(doc)
-        # AcroForm (= central form field info)
-        return mupdf.pdf_dict_getp(mupdf.pdf_trailer(pdf), "Root/AcroForm")
-
-    tarpdf = mupdf.pdf_document_from_fz_document(tar)
-    srcpdf = mupdf.pdf_document_from_fz_document(src)
-
-    if tar.is_form_pdf:
-        # target is a Form PDF, so use it to include source fields
-        acro = get_acroform(tar)
-        # Important arrays in AcroForm
-        acro_fields = acro.pdf_dict_get(pymupdf.PDF_NAME("Fields"))
-        tar_co = acro.pdf_dict_get(pymupdf.PDF_NAME("CO"))
-        if not tar_co.pdf_is_array():
-            tar_co = acro.pdf_dict_put_array(pymupdf.PDF_NAME("CO"), 5)
-    else:
-        # target is no Form PDF, so copy over source AcroForm
-        acro = mupdf.pdf_deep_copy_obj(get_acroform(src))  # make a copy
-
-        # Clear "Fields" and "CO" arrays: will be populated by page fields.
-        # This is required to avoid copying unneeded objects.
-        acro.pdf_dict_del(pymupdf.PDF_NAME("Fields"))
-        acro.pdf_dict_put_array(pymupdf.PDF_NAME("Fields"), 5)
-        acro.pdf_dict_del(pymupdf.PDF_NAME("CO"))
-        acro.pdf_dict_put_array(pymupdf.PDF_NAME("CO"), 5)
-
-        # Enrich AcroForm for copying to target
-        acro_graft = mupdf.pdf_graft_mapped_object(graftmap, acro)
-
-        # Insert AcroForm into target PDF
-        acro_tar = mupdf.pdf_add_object(tarpdf, acro_graft)
-        acro_fields = acro_tar.pdf_dict_get(pymupdf.PDF_NAME("Fields"))
-        tar_co = acro_tar.pdf_dict_get(pymupdf.PDF_NAME("CO"))
-
-        # get its xref and insert it into target catalog
-        tar_xref = acro_tar.pdf_to_num()
-        acro_tar_ind = mupdf.pdf_new_indirect(tarpdf, tar_xref, 0)
-        root = mupdf.pdf_dict_get(mupdf.pdf_trailer(tarpdf), pymupdf.PDF_NAME("Root"))
-        root.pdf_dict_put(pymupdf.PDF_NAME("AcroForm"), acro_tar_ind)
-
-    if from_page <= to_page:
-        src_range = range(from_page, to_page + 1)
-    else:
-        src_range = range(from_page, to_page - 1, -1)
-
-    parents = {}  # information about widget parents
-
-    # remove "P" owning page reference from all widgets of all source pages
-    for i in src_range:
-        src_page = src[i]
-        for xref in [
-            xref
-            for xref, wtype, _ in src_page.annot_xrefs()
-            if wtype == pymupdf.PDF_ANNOT_WIDGET  # pylint: disable=no-member
-        ]:
-            w_obj = mupdf.pdf_load_object(srcpdf, xref)
-            w_obj.pdf_dict_del(pymupdf.PDF_NAME("P"))
-
-            # get the widget's parent structure
-            parent_xref, old_kids = kids_xrefs(w_obj)
-            if parent_xref:
-                parents[parent_xref] = {
-                    "new_xref": 0,
-                    "old_kids": old_kids,
-                    "new_kids": [],
-                }
-    # Copy over Parent widgets first - they are not page-dependent
-    for xref in parents.keys():  # pylint: disable=consider-using-dict-items
-        parent = mupdf.pdf_load_object(srcpdf, xref)
-        parent_graft = mupdf.pdf_graft_mapped_object(graftmap, parent)
-        parent_tar = mupdf.pdf_add_object(tarpdf, parent_graft)
-        kids_xrefs_new = get_kids(parent_tar, [])
-        parent_xref_new = parent_tar.pdf_to_num()
-        parent_ind = mupdf.pdf_new_indirect(tarpdf, parent_xref_new, 0)
-        acro_fields.pdf_array_push(parent_ind)
-        parents[xref]["new_xref"] = parent_xref_new
-        parents[xref]["new_kids"] = kids_xrefs_new
-
-    for i in range(len(src_range)):
-        # read first copied over page in target
-        tar_page = tar[start_at + i]
-
-        # read the original page in the source PDF
-        src_page = src[src_range[i]]
-
-        # now walk through source page widgets and copy over
-        w_xrefs = [  # widget xrefs of the source page
-            xref
-            for xref, wtype, _ in src_page.annot_xrefs()
-            if wtype == pymupdf.PDF_ANNOT_WIDGET  # pylint: disable=no-member
-        ]
-        if not w_xrefs:  # no widgets on this source page
-            continue
-
-        # convert to formal PDF page
-        tar_page_pdf = mupdf.pdf_page_from_fz_page(tar_page)
-
-        # extract annotations array
-        tar_annots = mupdf.pdf_dict_get(tar_page_pdf.obj(), pymupdf.PDF_NAME("Annots"))
-        if not mupdf.pdf_is_array(tar_annots):
-            tar_annots = mupdf.pdf_dict_put_array(
-                tar_page_pdf.obj(), pymupdf.PDF_NAME("Annots"), 5
-            )
-
-        for xref in w_xrefs:
-            w_obj = mupdf.pdf_load_object(srcpdf, xref)
-
-            # check if field takes part in inter-field validations
-            is_aac = mupdf.pdf_is_dict(mupdf.pdf_dict_getp(w_obj, "AA/C"))
-
-            # check if parent of widget already in target
-            parent_xref = mupdf.pdf_to_num(
-                w_obj.pdf_dict_get(pymupdf.PDF_NAME("Parent"))
-            )
-            if parent_xref == 0:  # parent not in target yet
-                try:
-                    w_obj_graft = mupdf.pdf_graft_mapped_object(graftmap, w_obj)
-                except Exception as e:
-                    pymupdf.message_warning(f"cannot copy widget at {xref=}: {e}")
-                    continue
-                w_obj_tar = mupdf.pdf_add_object(tarpdf, w_obj_graft)
-                tar_xref = w_obj_tar.pdf_to_num()
-                w_obj_tar_ind = mupdf.pdf_new_indirect(tarpdf, tar_xref, 0)
-                mupdf.pdf_array_push(tar_annots, w_obj_tar_ind)
-                mupdf.pdf_array_push(acro_fields, w_obj_tar_ind)
-            else:
-                parent = parents[parent_xref]
-                idx = parent["old_kids"].index(xref)  # search for xref in parent
-                tar_xref = parent["new_kids"][idx]
-                w_obj_tar_ind = mupdf.pdf_new_indirect(tarpdf, tar_xref, 0)
-                mupdf.pdf_array_push(tar_annots, w_obj_tar_ind)
-
-            # Into "AcroForm/CO" if a computation field.
-            if is_aac:
-                mupdf.pdf_array_push(tar_co, w_obj_tar_ind)
-
-    deduplicate_names(tarpdf, acro_fields, join_duplicates=join_duplicates)
-
-def do_links(
-    doc1: pymupdf.Document,
-    doc2: pymupdf.Document,
-    from_page: int = -1,
-    to_page: int = -1,
-    start_at: int = -1,
-) -> None:
-    """Insert links contained in copied page range into destination PDF.
-
-    Parameter values **must** equal those of method insert_pdf(), which must
-    have been previously executed.
-    """
-    #pymupdf.log( 'utils.do_links()')
-    # --------------------------------------------------------------------------
-    # internal function to create the actual "/Annots" object string
-    # --------------------------------------------------------------------------
-    def cre_annot(lnk, xref_dst, pno_src, ctm):
-        """Create annotation object string for a passed-in link."""
-
-        r = lnk["from"] * ctm  # rect in PDF coordinates
-        rect = _format_g(tuple(r))
-        if lnk["kind"] == pymupdf.LINK_GOTO:
-            txt = pymupdf.annot_skel["goto1"]  # annot_goto
-            idx = pno_src.index(lnk["page"])
-            p = lnk["to"] * ctm  # target point in PDF coordinates
-            annot = txt(xref_dst[idx], p.x, p.y, lnk["zoom"], rect)
-
-        elif lnk["kind"] == pymupdf.LINK_GOTOR:
-            if lnk["page"] >= 0:
-                txt = pymupdf.annot_skel["gotor1"]  # annot_gotor
-                pnt = lnk.get("to", pymupdf.Point(0, 0))  # destination point
-                if type(pnt) is not pymupdf.Point:
-                    pnt = pymupdf.Point(0, 0)
-                annot = txt(
-                    lnk["page"],
-                    pnt.x,
-                    pnt.y,
-                    lnk["zoom"],
-                    lnk["file"],
-                    lnk["file"],
-                    rect,
-                )
-            else:
-                txt = pymupdf.annot_skel["gotor2"]  # annot_gotor_n
-                to = pymupdf.get_pdf_str(lnk["to"])
-                to = to[1:-1]
-                f = lnk["file"]
-                annot = txt(to, f, rect)
-
-        elif lnk["kind"] == pymupdf.LINK_LAUNCH:
-            txt = pymupdf.annot_skel["launch"]  # annot_launch
-            annot = txt(lnk["file"], lnk["file"], rect)
-
-        elif lnk["kind"] == pymupdf.LINK_URI:
-            txt = pymupdf.annot_skel["uri"]  # annot_uri
-            annot = txt(lnk["uri"], rect)
-
-        else:
-            annot = ""
-
-        return annot
-
-    # --------------------------------------------------------------------------
-
-    # validate & normalize parameters
-    if from_page < 0:
-        fp = 0
-    elif from_page >= doc2.page_count:
-        fp = doc2.page_count - 1
-    else:
-        fp = from_page
-
-    if to_page < 0 or to_page >= doc2.page_count:
-        tp = doc2.page_count - 1
-    else:
-        tp = to_page
-
-    if start_at < 0:
-        raise ValueError("'start_at' must be >= 0")
-    sa = start_at
-
-    incr = 1 if fp <= tp else -1  # page range could be reversed
-
-    # lists of source / destination page numbers
-    pno_src = list(range(fp, tp + incr, incr))
-    pno_dst = [sa + i for i in range(len(pno_src))]
-
-    # lists of source / destination page xrefs
-    xref_src = []
-    xref_dst = []
-    for i in range(len(pno_src)):
-        p_src = pno_src[i]
-        p_dst = pno_dst[i]
-        old_xref = doc2.page_xref(p_src)
-        new_xref = doc1.page_xref(p_dst)
-        xref_src.append(old_xref)
-        xref_dst.append(new_xref)
-
-    # create the links for each copied page in destination PDF
-    for i in range(len(xref_src)):
-        page_src = doc2[pno_src[i]]  # load source page
-        links = page_src.get_links()  # get all its links
-        #pymupdf.log( '{pno_src=}')
-        #pymupdf.log( '{type(page_src)=}')
-        #pymupdf.log( '{page_src=}')
-        #pymupdf.log( '{=i len(links)}')
-        if len(links) == 0:  # no links there
-            page_src = None
-            continue
-        ctm = ~page_src.transformation_matrix  # calc page transformation matrix
-        page_dst = doc1[pno_dst[i]]  # load destination page
-        link_tab = []  # store all link definitions here
-        for l in links:
-            if l["kind"] == pymupdf.LINK_GOTO and (l["page"] not in pno_src):
-                continue  # GOTO link target not in copied pages
-            annot_text = cre_annot(l, xref_dst, pno_src, ctm)
-            if annot_text:
-                link_tab.append(annot_text)
-        if link_tab != []:
-            page_dst._addAnnot_FromString( tuple(link_tab))
-    #pymupdf.log( 'utils.do_links() returning.')
-
-
 def getLinkText(page: pymupdf.Page, lnk: dict) -> str:
     # --------------------------------------------------------------------------
     # define skeletons for /Annots object texts
@@ -2216,754 +707,6 @@
     return annot
 
 
-def delete_widget(page: pymupdf.Page, widget: pymupdf.Widget) -> pymupdf.Widget:
-    """Delete widget from page and return the next one."""
-    pymupdf.CheckParent(page)
-    annot = getattr(widget, "_annot", None)
-    if annot is None:
-        raise ValueError("bad type: widget")
-    nextwidget = widget.next
-    page.delete_annot(annot)
-    widget._annot.parent = None
-    keylist = list(widget.__dict__.keys())
-    for key in keylist:
-        del widget.__dict__[key]
-    return nextwidget
-
-
-def update_link(page: pymupdf.Page, lnk: dict) -> None:
-    """Update a link on the current page."""
-    pymupdf.CheckParent(page)
-    annot = getLinkText(page, lnk)
-    if annot == "":
-        raise ValueError("link kind not supported")
-
-    page.parent.update_object(lnk["xref"], annot, page=page)
-
-
-def insert_link(page: pymupdf.Page, lnk: dict, mark: bool = True) -> None:
-    """Insert a new link for the current page."""
-    pymupdf.CheckParent(page)
-    annot = getLinkText(page, lnk)
-    if annot == "":
-        raise ValueError("link kind not supported")
-    page._addAnnot_FromString((annot,))
-
-
-def insert_textbox(
-    page: pymupdf.Page,
-    rect: rect_like,
-    buffer: typing.Union[str, list],
-    *,
-    fontname: str = "helv",
-    fontfile: OptStr = None,
-    set_simple: int = 0,
-    encoding: int = 0,
-    fontsize: float = 11,
-    lineheight: OptFloat = None,
-    color: OptSeq = None,
-    fill: OptSeq = None,
-    expandtabs: int = 1,
-    align: int = 0,
-    rotate: int = 0,
-    render_mode: int = 0,
-    miter_limit: float = 1,
-    border_width: float = 0.05,
-    morph: OptSeq = None,
-    overlay: bool = True,
-    stroke_opacity: float = 1,
-    fill_opacity: float = 1,
-    oc: int = 0,
-) -> float:
-    """Insert text into a given rectangle.
-
-    Notes:
-        Creates a Shape object, uses its same-named method and commits it.
-    Parameters:
-        rect: (rect-like) area to use for text.
-        buffer: text to be inserted
-        fontname: a Base-14 font, font name or '/name'
-        fontfile: name of a font file
-        fontsize: font size
-        lineheight: overwrite the font property
-        color: RGB color triple
-        expandtabs: handles tabulators with string function
-        align: left, center, right, justified
-        rotate: 0, 90, 180, or 270 degrees
-        morph: morph box with a matrix and a fixpoint
-        overlay: put text in foreground or background
-    Returns:
-        unused or deficit rectangle area (float)
-    """
-    img = page.new_shape()
-    rc = img.insert_textbox(
-        rect,
-        buffer,
-        fontsize=fontsize,
-        lineheight=lineheight,
-        fontname=fontname,
-        fontfile=fontfile,
-        set_simple=set_simple,
-        encoding=encoding,
-        color=color,
-        fill=fill,
-        expandtabs=expandtabs,
-        render_mode=render_mode,
-        miter_limit=miter_limit,
-        border_width=border_width,
-        align=align,
-        rotate=rotate,
-        morph=morph,
-        stroke_opacity=stroke_opacity,
-        fill_opacity=fill_opacity,
-        oc=oc,
-    )
-    if rc >= 0:
-        img.commit(overlay)
-    return rc
-
-
-def insert_text(
-    page: pymupdf.Page,
-    point: point_like,
-    text: typing.Union[str, list],
-    *,
-    fontsize: float = 11,
-    lineheight: OptFloat = None,
-    fontname: str = "helv",
-    fontfile: OptStr = None,
-    set_simple: int = 0,
-    encoding: int = 0,
-    color: OptSeq = None,
-    fill: OptSeq = None,
-    border_width: float = 0.05,
-    miter_limit: float = 1,
-    render_mode: int = 0,
-    rotate: int = 0,
-    morph: OptSeq = None,
-    overlay: bool = True,
-    stroke_opacity: float = 1,
-    fill_opacity: float = 1,
-    oc: int = 0,
-):
-
-    img = page.new_shape()
-    rc = img.insert_text(
-        point,
-        text,
-        fontsize=fontsize,
-        lineheight=lineheight,
-        fontname=fontname,
-        fontfile=fontfile,
-        set_simple=set_simple,
-        encoding=encoding,
-        color=color,
-        fill=fill,
-        border_width=border_width,
-        render_mode=render_mode,
-        miter_limit=miter_limit,
-        rotate=rotate,
-        morph=morph,
-        stroke_opacity=stroke_opacity,
-        fill_opacity=fill_opacity,
-        oc=oc,
-    )
-    if rc >= 0:
-        img.commit(overlay)
-    return rc
-
-
-def insert_htmlbox(
-    page,
-    rect,
-    text,
-    *,
-    css=None,
-    scale_low=0,
-    archive=None,
-    rotate=0,
-    oc=0,
-    opacity=1,
-    overlay=True,
-) -> float:
-    """Insert text with optional HTML tags and stylings into a rectangle.
-
-    Args:
-        rect: (rect-like) rectangle into which the text should be placed.
-        text: (str) text with optional HTML tags and stylings.
-        css: (str) CSS styling commands.
-        scale_low: (float) force-fit content by scaling it down. Must be in
-            range [0, 1]. If 1, no scaling will take place. If 0, arbitrary
-            down-scaling is acceptable. A value of 0.1 would mean that content
-            may be scaled down by at most 90%.
-        archive: Archive object pointing to locations of used fonts or images
-        rotate: (int) rotate the text in the box by a multiple of 90 degrees.
-        oc: (int) the xref of an OCG / OCMD (Optional Content).
-        opacity: (float) set opacity of inserted content.
-        overlay: (bool) put text on top of page content.
-    Returns:
-        A tuple of floats (spare_height, scale).
-        spare_height: -1 if content did not fit, else >= 0. It is the height of the
-               unused (still available) rectangle stripe. Positive only if
-               scale_min = 1 (no down scaling).
-        scale: downscaling factor, 0 < scale <= 1. Set to 0 if spare_height = -1 (no fit).
-    """
-
-    # normalize rotation angle
-    if not rotate % 90 == 0:
-        raise ValueError("bad rotation angle")
-    while rotate < 0:
-        rotate += 360
-    while rotate >= 360:
-        rotate -= 360
-
-    if not 0 <= scale_low <= 1:
-        raise ValueError("'scale_low' must be in [0, 1]")
-
-    if css is None:
-        css = ""
-
-    rect = pymupdf.Rect(rect)
-    if rotate in (90, 270):
-        temp_rect = pymupdf.Rect(0, 0, rect.height, rect.width)
-    else:
-        temp_rect = pymupdf.Rect(0, 0, rect.width, rect.height)
-
-    # use a small border by default
-    mycss = "body {margin:1px;}" + css  # append user CSS
-
-    # either make a story, or accept a given one
-    if isinstance(text, str):  # if a string, convert to a Story
-        story = pymupdf.Story(html=text, user_css=mycss, archive=archive)
-    elif isinstance(text, pymupdf.Story):
-        story = text
-    else:
-        raise ValueError("'text' must be a string or a Story")
-    # ----------------------------------------------------------------
-    # Find a scaling factor that lets our story fit in
-    # ----------------------------------------------------------------
-    scale_max = None if scale_low == 0 else 1 / scale_low
-
-    fit = story.fit_scale(temp_rect, scale_min=1, scale_max=scale_max)
-    if not fit.big_enough:  # there was no fit
-        return (-1, scale_low)
-
-    filled = fit.filled
-    scale = 1 / fit.parameter  # shrink factor
-
-    spare_height = fit.rect.y1 - filled[3]  # unused room at rectangle bottom
-    # Note: due to MuPDF's logic this may be negative even for successful fits.
-    if scale != 1 or spare_height < 0:  # if scaling occurred, set spare_height to 0
-        spare_height = 0
-
-    def rect_function(*args):
-        return fit.rect, fit.rect, pymupdf.Identity
-
-    # draw story on temp PDF page
-    doc = story.write_with_links(rect_function)
-
-    # Insert opacity if requested.
-    # For this, we prepend a command to the /Contents.
-    if 0 <= opacity < 1:
-        tpage = doc[0]  # load page
-        # generate /ExtGstate for the page
-        alp0 = tpage._set_opacity(CA=opacity, ca=opacity)
-        s = f"/{alp0} gs\n"  # generate graphic state command
-        pymupdf.TOOLS._insert_contents(tpage, s.encode(), 0)
-
-    # put result in target page
-    page.show_pdf_page(rect, doc, 0, rotate=rotate, oc=oc, overlay=overlay)
-
-    # -------------------------------------------------------------------------
-    # re-insert links in target rect (show_pdf_page cannot copy annotations)
-    # -------------------------------------------------------------------------
-    # scaled center point of fit.rect
-    mp1 = (fit.rect.tl + fit.rect.br) / 2 * scale
-
-    # center point of target rect
-    mp2 = (rect.tl + rect.br) / 2
-
-    # compute link positioning matrix:
-    # - move center of scaled-down fit.rect to (0,0)
-    # - rotate
-    # - move (0,0) to center of target rect
-    mat = (
-        pymupdf.Matrix(scale, 0, 0, scale, -mp1.x, -mp1.y)
-        * pymupdf.Matrix(-rotate)
-        * pymupdf.Matrix(1, 0, 0, 1, mp2.x, mp2.y)
-    )
-
-    # copy over links
-    for link in doc[0].get_links():
-        link["from"] *= mat
-        page.insert_link(link)
-
-    return spare_height, scale
-
-
-def new_page(
-    doc: pymupdf.Document,
-    pno: int = -1,
-    width: float = 595,
-    height: float = 842,
-) -> pymupdf.Page:
-    """Create and return a new page object.
-
-    Args:
-        pno: (int) insert before this page. Default: after last page.
-        width: (float) page width in points. Default: 595 (ISO A4 width).
-        height: (float) page height in points. Default 842 (ISO A4 height).
-    Returns:
-        A pymupdf.Page object.
-    """
-    doc._newPage(pno, width=width, height=height)
-    return doc[pno]
-
-
-def insert_page(
-    doc: pymupdf.Document,
-    pno: int,
-    text: typing.Union[str, list, None] = None,
-    fontsize: float = 11,
-    width: float = 595,
-    height: float = 842,
-    fontname: str = "helv",
-    fontfile: OptStr = None,
-    color: OptSeq = (0,),
-) -> int:
-    """Create a new PDF page and insert some text.
-
-    Notes:
-        Function combining pymupdf.Document.new_page() and pymupdf.Page.insert_text().
-        For parameter details see these methods.
-    """
-    page = doc.new_page(pno=pno, width=width, height=height)
-    if not bool(text):
-        return 0
-    rc = page.insert_text(
-        (50, 72),
-        text,
-        fontsize=fontsize,
-        fontname=fontname,
-        fontfile=fontfile,
-        color=color,
-    )
-    return rc
-
-
-def draw_line(
-    page: pymupdf.Page,
-    p1: point_like,
-    p2: point_like,
-    color: OptSeq = (0,),
-    dashes: OptStr = None,
-    width: float = 1,
-    lineCap: int = 0,
-    lineJoin: int = 0,
-    overlay: bool = True,
-    morph: OptSeq = None,
-    stroke_opacity: float = 1,
-    fill_opacity: float = 1,
-    oc=0,
-) -> pymupdf.Point:
-    """Draw a line from point p1 to point p2."""
-    img = page.new_shape()
-    p = img.draw_line(pymupdf.Point(p1), pymupdf.Point(p2))
-    img.finish(
-        color=color,
-        dashes=dashes,
-        width=width,
-        closePath=False,
-        lineCap=lineCap,
-        lineJoin=lineJoin,
-        morph=morph,
-        stroke_opacity=stroke_opacity,
-        fill_opacity=fill_opacity,
-        oc=oc,
-    )
-    img.commit(overlay)
-
-    return p
-
-
-def draw_squiggle(
-    page: pymupdf.Page,
-    p1: point_like,
-    p2: point_like,
-    breadth: float = 2,
-    color: OptSeq = (0,),
-    dashes: OptStr = None,
-    width: float = 1,
-    lineCap: int = 0,
-    lineJoin: int = 0,
-    overlay: bool = True,
-    morph: OptSeq = None,
-    stroke_opacity: float = 1,
-    fill_opacity: float = 1,
-    oc: int = 0,
-) -> pymupdf.Point:
-    """Draw a squiggly line from point p1 to point p2."""
-    img = page.new_shape()
-    p = img.draw_squiggle(pymupdf.Point(p1), pymupdf.Point(p2), breadth=breadth)
-    img.finish(
-        color=color,
-        dashes=dashes,
-        width=width,
-        closePath=False,
-        lineCap=lineCap,
-        lineJoin=lineJoin,
-        morph=morph,
-        stroke_opacity=stroke_opacity,
-        fill_opacity=fill_opacity,
-        oc=oc,
-    )
-    img.commit(overlay)
-
-    return p
-
-
-def draw_zigzag(
-    page: pymupdf.Page,
-    p1: point_like,
-    p2: point_like,
-    breadth: float = 2,
-    color: OptSeq = (0,),
-    dashes: OptStr = None,
-    width: float = 1,
-    lineCap: int = 0,
-    lineJoin: int = 0,
-    overlay: bool = True,
-    morph: OptSeq = None,
-    stroke_opacity: float = 1,
-    fill_opacity: float = 1,
-    oc: int = 0,
-) -> pymupdf.Point:
-    """Draw a zigzag line from point p1 to point p2."""
-    img = page.new_shape()
-    p = img.draw_zigzag(pymupdf.Point(p1), pymupdf.Point(p2), breadth=breadth)
-    img.finish(
-        color=color,
-        dashes=dashes,
-        width=width,
-        closePath=False,
-        lineCap=lineCap,
-        lineJoin=lineJoin,
-        morph=morph,
-        stroke_opacity=stroke_opacity,
-        fill_opacity=fill_opacity,
-        oc=oc,
-    )
-    img.commit(overlay)
-
-    return p
-
-
-def draw_rect(
-        page: pymupdf.Page,
-        rect: rect_like,
-        color: OptSeq = (0,),
-        fill: OptSeq = None,
-        dashes: OptStr = None,
-        width: float = 1,
-        lineCap: int = 0,
-        lineJoin: int = 0,
-        morph: OptSeq = None,
-        overlay: bool = True,
-        stroke_opacity: float = 1,
-        fill_opacity: float = 1,
-        oc: int = 0,
-        radius=None,
-        ) -> pymupdf.Point:
-    '''
-    Draw a rectangle. See Shape class method for details.
-    '''
-    img = page.new_shape()
-    Q = img.draw_rect(pymupdf.Rect(rect), radius=radius)
-    img.finish(
-        color=color,
-        fill=fill,
-        dashes=dashes,
-        width=width,
-        lineCap=lineCap,
-        lineJoin=lineJoin,
-        morph=morph,
-        stroke_opacity=stroke_opacity,
-        fill_opacity=fill_opacity,
-        oc=oc,
-    )
-    img.commit(overlay)
-
-    return Q
-
-
-def draw_quad(
-    page: pymupdf.Page,
-    quad: quad_like,
-    color: OptSeq = (0,),
-    fill: OptSeq = None,
-    dashes: OptStr = None,
-    width: float = 1,
-    lineCap: int = 0,
-    lineJoin: int = 0,
-    morph: OptSeq = None,
-    overlay: bool = True,
-    stroke_opacity: float = 1,
-    fill_opacity: float = 1,
-    oc: int = 0,
-) -> pymupdf.Point:
-    """Draw a quadrilateral."""
-    img = page.new_shape()
-    Q = img.draw_quad(pymupdf.Quad(quad))
-    img.finish(
-        color=color,
-        fill=fill,
-        dashes=dashes,
-        width=width,
-        lineCap=lineCap,
-        lineJoin=lineJoin,
-        morph=morph,
-        stroke_opacity=stroke_opacity,
-        fill_opacity=fill_opacity,
-        oc=oc,
-    )
-    img.commit(overlay)
-
-    return Q
-
-
-def draw_polyline(
-    page: pymupdf.Page,
-    points: list,
-    color: OptSeq = (0,),
-    fill: OptSeq = None,
-    dashes: OptStr = None,
-    width: float = 1,
-    morph: OptSeq = None,
-    lineCap: int = 0,
-    lineJoin: int = 0,
-    overlay: bool = True,
-    closePath: bool = False,
-    stroke_opacity: float = 1,
-    fill_opacity: float = 1,
-    oc: int = 0,
-) -> pymupdf.Point:
-    """Draw multiple connected line segments."""
-    img = page.new_shape()
-    Q = img.draw_polyline(points)
-    img.finish(
-        color=color,
-        fill=fill,
-        dashes=dashes,
-        width=width,
-        lineCap=lineCap,
-        lineJoin=lineJoin,
-        morph=morph,
-        closePath=closePath,
-        stroke_opacity=stroke_opacity,
-        fill_opacity=fill_opacity,
-        oc=oc,
-    )
-    img.commit(overlay)
-
-    return Q
-
-
-def draw_circle(
-    page: pymupdf.Page,
-    center: point_like,
-    radius: float,
-    color: OptSeq = (0,),
-    fill: OptSeq = None,
-    morph: OptSeq = None,
-    dashes: OptStr = None,
-    width: float = 1,
-    lineCap: int = 0,
-    lineJoin: int = 0,
-    overlay: bool = True,
-    stroke_opacity: float = 1,
-    fill_opacity: float = 1,
-    oc: int = 0,
-) -> pymupdf.Point:
-    """Draw a circle given its center and radius."""
-    img = page.new_shape()
-    Q = img.draw_circle(pymupdf.Point(center), radius)
-    img.finish(
-        color=color,
-        fill=fill,
-        dashes=dashes,
-        width=width,
-        lineCap=lineCap,
-        lineJoin=lineJoin,
-        morph=morph,
-        stroke_opacity=stroke_opacity,
-        fill_opacity=fill_opacity,
-        oc=oc,
-    )
-    img.commit(overlay)
-    return Q
-
-
-def draw_oval(
-    page: pymupdf.Page,
-    rect: typing.Union[rect_like, quad_like],
-    color: OptSeq = (0,),
-    fill: OptSeq = None,
-    dashes: OptStr = None,
-    morph: OptSeq = None,
-    width: float = 1,
-    lineCap: int = 0,
-    lineJoin: int = 0,
-    overlay: bool = True,
-    stroke_opacity: float = 1,
-    fill_opacity: float = 1,
-    oc: int = 0,
-) -> pymupdf.Point:
-    """Draw an oval given its containing rectangle or quad."""
-    img = page.new_shape()
-    Q = img.draw_oval(rect)
-    img.finish(
-        color=color,
-        fill=fill,
-        dashes=dashes,
-        width=width,
-        lineCap=lineCap,
-        lineJoin=lineJoin,
-        morph=morph,
-        stroke_opacity=stroke_opacity,
-        fill_opacity=fill_opacity,
-        oc=oc,
-    )
-    img.commit(overlay)
-
-    return Q
-
-
-def draw_curve(
-    page: pymupdf.Page,
-    p1: point_like,
-    p2: point_like,
-    p3: point_like,
-    color: OptSeq = (0,),
-    fill: OptSeq = None,
-    dashes: OptStr = None,
-    width: float = 1,
-    morph: OptSeq = None,
-    closePath: bool = False,
-    lineCap: int = 0,
-    lineJoin: int = 0,
-    overlay: bool = True,
-    stroke_opacity: float = 1,
-    fill_opacity: float = 1,
-    oc: int = 0,
-) -> pymupdf.Point:
-    """Draw a special Bezier curve from p1 to p3, generating control points on lines p1 to p2 and p2 to p3."""
-    img = page.new_shape()
-    Q = img.draw_curve(pymupdf.Point(p1), pymupdf.Point(p2), pymupdf.Point(p3))
-    img.finish(
-        color=color,
-        fill=fill,
-        dashes=dashes,
-        width=width,
-        lineCap=lineCap,
-        lineJoin=lineJoin,
-        morph=morph,
-        closePath=closePath,
-        stroke_opacity=stroke_opacity,
-        fill_opacity=fill_opacity,
-        oc=oc,
-    )
-    img.commit(overlay)
-
-    return Q
-
-
-def draw_bezier(
-    page: pymupdf.Page,
-    p1: point_like,
-    p2: point_like,
-    p3: point_like,
-    p4: point_like,
-    color: OptSeq = (0,),
-    fill: OptSeq = None,
-    dashes: OptStr = None,
-    width: float = 1,
-    morph: OptStr = None,
-    closePath: bool = False,
-    lineCap: int = 0,
-    lineJoin: int = 0,
-    overlay: bool = True,
-    stroke_opacity: float = 1,
-    fill_opacity: float = 1,
-    oc: int = 0,
-) -> pymupdf.Point:
-    """Draw a general cubic Bezier curve from p1 to p4 using control points p2 and p3."""
-    img = page.new_shape()
-    Q = img.draw_bezier(pymupdf.Point(p1), pymupdf.Point(p2), pymupdf.Point(p3), pymupdf.Point(p4))
-    img.finish(
-        color=color,
-        fill=fill,
-        dashes=dashes,
-        width=width,
-        lineCap=lineCap,
-        lineJoin=lineJoin,
-        morph=morph,
-        closePath=closePath,
-        stroke_opacity=stroke_opacity,
-        fill_opacity=fill_opacity,
-        oc=oc,
-    )
-    img.commit(overlay)
-
-    return Q
-
-
-def draw_sector(
-    page: pymupdf.Page,
-    center: point_like,
-    point: point_like,
-    beta: float,
-    color: OptSeq = (0,),
-    fill: OptSeq = None,
-    dashes: OptStr = None,
-    fullSector: bool = True,
-    morph: OptSeq = None,
-    width: float = 1,
-    closePath: bool = False,
-    lineCap: int = 0,
-    lineJoin: int = 0,
-    overlay: bool = True,
-    stroke_opacity: float = 1,
-    fill_opacity: float = 1,
-    oc: int = 0,
-) -> pymupdf.Point:
-    """Draw a circle sector given circle center, one arc end point and the angle of the arc.
-
-    Parameters:
-        center -- center of circle
-        point -- arc end point
-        beta -- angle of arc (degrees)
-        fullSector -- connect arc ends with center
-    """
-    img = page.new_shape()
-    Q = img.draw_sector(pymupdf.Point(center), pymupdf.Point(point), beta, fullSector=fullSector)
-    img.finish(
-        color=color,
-        fill=fill,
-        dashes=dashes,
-        width=width,
-        lineCap=lineCap,
-        lineJoin=lineJoin,
-        morph=morph,
-        closePath=closePath,
-        stroke_opacity=stroke_opacity,
-        fill_opacity=fill_opacity,
-        oc=oc,
-    )
-    img.commit(overlay)
-
-    return Q
-
-
 # ----------------------------------------------------------------------
 # Name:        wx.lib.colourdb.py
 # Purpose:     Adds a bunch of colour names and RGB values to the
@@ -3081,1360 +824,6 @@
     return fontname, ext, stype, asc, dsc
 
 
-def get_char_widths(
-    doc: pymupdf.Document, xref: int, limit: int = 256, idx: int = 0, fontdict: OptDict = None
-) -> list:
-    """Get list of glyph information of a font.
-
-    Notes:
-        Must be provided by its XREF number. If we already dealt with the
-        font, it will be recorded in doc.FontInfos. Otherwise we insert an
-        entry there.
-        Finally we return the glyphs for the font. This is a list of
-        (glyph, width) where glyph is an integer controlling the char
-        appearance, and width is a float controlling the char's spacing:
-        width * fontsize is the actual space.
-        For 'simple' fonts, glyph == ord(char) will usually be true.
-        Exceptions are 'Symbol' and 'ZapfDingbats'. We are providing data for these directly here.
-    """
-    fontinfo = pymupdf.CheckFontInfo(doc, xref)
-    if fontinfo is None:  # not recorded yet: create it
-        if fontdict is None:
-            name, ext, stype, asc, dsc = _get_font_properties(doc, xref)
-            fontdict = {
-                "name": name,
-                "type": stype,
-                "ext": ext,
-                "ascender": asc,
-                "descender": dsc,
-            }
-        else:
-            name = fontdict["name"]
-            ext = fontdict["ext"]
-            stype = fontdict["type"]
-            ordering = fontdict["ordering"]
-            simple = fontdict["simple"]
-
-        if ext == "":
-            raise ValueError("xref is not a font")
-
-        # check for 'simple' fonts
-        if stype in ("Type1", "MMType1", "TrueType"):
-            simple = True
-        else:
-            simple = False
-
-        # check for CJK fonts
-        if name in ("Fangti", "Ming"):
-            ordering = 0
-        elif name in ("Heiti", "Song"):
-            ordering = 1
-        elif name in ("Gothic", "Mincho"):
-            ordering = 2
-        elif name in ("Dotum", "Batang"):
-            ordering = 3
-        else:
-            ordering = -1
-
-        fontdict["simple"] = simple
-
-        if name == "ZapfDingbats":
-            glyphs = pymupdf.zapf_glyphs
-        elif name == "Symbol":
-            glyphs = pymupdf.symbol_glyphs
-        else:
-            glyphs = None
-
-        fontdict["glyphs"] = glyphs
-        fontdict["ordering"] = ordering
-        fontinfo = [xref, fontdict]
-        doc.FontInfos.append(fontinfo)
-    else:
-        fontdict = fontinfo[1]
-        glyphs = fontdict["glyphs"]
-        simple = fontdict["simple"]
-        ordering = fontdict["ordering"]
-
-    if glyphs is None:
-        oldlimit = 0
-    else:
-        oldlimit = len(glyphs)
-
-    mylimit = max(256, limit)
-
-    if mylimit <= oldlimit:
-        return glyphs
-
-    if ordering < 0:  # not a CJK font
-        glyphs = doc._get_char_widths(
-            xref, fontdict["name"], fontdict["ext"], fontdict["ordering"], mylimit, idx
-        )
-    else:  # CJK fonts use char codes and width = 1
-        glyphs = None
-
-    fontdict["glyphs"] = glyphs
-    fontinfo[1] = fontdict
-    pymupdf.UpdateFontInfo(doc, fontinfo)
-
-    return glyphs
-
-
-class Shape:
-    """Create a new shape."""
-
-    @staticmethod
-    def horizontal_angle(C, P):
-        """Return the angle to the horizontal for the connection from C to P.
-        This uses the arcus sine function and resolves its inherent ambiguity by
-        looking up in which quadrant vector S = P - C is located.
-        """
-        S = pymupdf.Point(P - C).unit  # unit vector 'C' -> 'P'
-        alfa = math.asin(abs(S.y))  # absolute angle from horizontal
-        if S.x < 0:  # make arcsin result unique
-            if S.y <= 0:  # bottom-left
-                alfa = -(math.pi - alfa)
-            else:  # top-left
-                alfa = math.pi - alfa
-        else:
-            if S.y >= 0:  # top-right
-                pass
-            else:  # bottom-right
-                alfa = -alfa
-        return alfa
-
-    def __init__(self, page: pymupdf.Page):
-        pymupdf.CheckParent(page)
-        self.page = page
-        self.doc = page.parent
-        if not self.doc.is_pdf:
-            raise ValueError("is no PDF")
-        self.height = page.mediabox_size.y
-        self.width = page.mediabox_size.x
-        self.x = page.cropbox_position.x
-        self.y = page.cropbox_position.y
-
-        self.pctm = page.transformation_matrix  # page transf. matrix
-        self.ipctm = ~self.pctm  # inverted transf. matrix
-
-        self.draw_cont = ""
-        self.text_cont = ""
-        self.totalcont = ""
-        self.last_point = None
-        self.rect = None
-
-    def updateRect(self, x):
-        if self.rect is None:
-            if len(x) == 2:
-                self.rect = pymupdf.Rect(x, x)
-            else:
-                self.rect = pymupdf.Rect(x)
-
-        else:
-            if len(x) == 2:
-                x = pymupdf.Point(x)
-                self.rect.x0 = min(self.rect.x0, x.x)
-                self.rect.y0 = min(self.rect.y0, x.y)
-                self.rect.x1 = max(self.rect.x1, x.x)
-                self.rect.y1 = max(self.rect.y1, x.y)
-            else:
-                x = pymupdf.Rect(x)
-                self.rect.x0 = min(self.rect.x0, x.x0)
-                self.rect.y0 = min(self.rect.y0, x.y0)
-                self.rect.x1 = max(self.rect.x1, x.x1)
-                self.rect.y1 = max(self.rect.y1, x.y1)
-
-    def draw_line(self, p1: point_like, p2: point_like) -> pymupdf.Point:
-        """Draw a line between two points."""
-        p1 = pymupdf.Point(p1)
-        p2 = pymupdf.Point(p2)
-        if not (self.last_point == p1):
-            self.draw_cont += _format_g(pymupdf.JM_TUPLE(p1 * self.ipctm)) + " m\n"
-            self.last_point = p1
-            self.updateRect(p1)
-
-        self.draw_cont += _format_g(pymupdf.JM_TUPLE(p2 * self.ipctm)) + " l\n"
-        self.updateRect(p2)
-        self.last_point = p2
-        return self.last_point
-
-    def draw_polyline(self, points: list) -> pymupdf.Point:
-        """Draw several connected line segments."""
-        for i, p in enumerate(points):
-            if i == 0:
-                if not (self.last_point == pymupdf.Point(p)):
-                    self.draw_cont += _format_g(pymupdf.JM_TUPLE(pymupdf.Point(p) * self.ipctm)) + " m\n"
-                    self.last_point = pymupdf.Point(p)
-            else:
-                self.draw_cont += _format_g(pymupdf.JM_TUPLE(pymupdf.Point(p) * self.ipctm)) + " l\n"
-            self.updateRect(p)
-
-        self.last_point = pymupdf.Point(points[-1])
-        return self.last_point
-
-    def draw_bezier(
-        self,
-        p1: point_like,
-        p2: point_like,
-        p3: point_like,
-        p4: point_like,
-    ) -> pymupdf.Point:
-        """Draw a standard cubic Bezier curve."""
-        p1 = pymupdf.Point(p1)
-        p2 = pymupdf.Point(p2)
-        p3 = pymupdf.Point(p3)
-        p4 = pymupdf.Point(p4)
-        if not (self.last_point == p1):
-            self.draw_cont += _format_g(pymupdf.JM_TUPLE(p1 * self.ipctm)) + " m\n"
-        args = pymupdf.JM_TUPLE(list(p2 * self.ipctm) + list(p3 * self.ipctm) + list(p4 * self.ipctm))
-        self.draw_cont += _format_g(args) + " c\n"
-        self.updateRect(p1)
-        self.updateRect(p2)
-        self.updateRect(p3)
-        self.updateRect(p4)
-        self.last_point = p4
-        return self.last_point
-
-    def draw_oval(self, tetra: typing.Union[quad_like, rect_like]) -> pymupdf.Point:
-        """Draw an ellipse inside a tetrapod."""
-        if len(tetra) != 4:
-            raise ValueError("invalid arg length")
-        if hasattr(tetra[0], "__float__"):
-            q = pymupdf.Rect(tetra).quad
-        else:
-            q = pymupdf.Quad(tetra)
-
-        mt = q.ul + (q.ur - q.ul) * 0.5
-        mr = q.ur + (q.lr - q.ur) * 0.5
-        mb = q.ll + (q.lr - q.ll) * 0.5
-        ml = q.ul + (q.ll - q.ul) * 0.5
-        if not (self.last_point == ml):
-            self.draw_cont += _format_g(pymupdf.JM_TUPLE(ml * self.ipctm)) + " m\n"
-            self.last_point = ml
-        self.draw_curve(ml, q.ll, mb)
-        self.draw_curve(mb, q.lr, mr)
-        self.draw_curve(mr, q.ur, mt)
-        self.draw_curve(mt, q.ul, ml)
-        self.updateRect(q.rect)
-        self.last_point = ml
-        return self.last_point
-
-    def draw_circle(self, center: point_like, radius: float) -> pymupdf.Point:
-        """Draw a circle given its center and radius."""
-        if not radius > pymupdf.EPSILON:
-            raise ValueError("radius must be positive")
-        center = pymupdf.Point(center)
-        p1 = center - (radius, 0)
-        return self.draw_sector(center, p1, 360, fullSector=False)
-
-    def draw_curve(
-        self,
-        p1: point_like,
-        p2: point_like,
-        p3: point_like,
-    ) -> pymupdf.Point:
-        """Draw a curve between points using one control point."""
-        kappa = 0.55228474983
-        p1 = pymupdf.Point(p1)
-        p2 = pymupdf.Point(p2)
-        p3 = pymupdf.Point(p3)
-        k1 = p1 + (p2 - p1) * kappa
-        k2 = p3 + (p2 - p3) * kappa
-        return self.draw_bezier(p1, k1, k2, p3)
-
-    def draw_sector(
-        self,
-        center: point_like,
-        point: point_like,
-        beta: float,
-        fullSector: bool = True,
-    ) -> pymupdf.Point:
-        """Draw a circle sector."""
-        center = pymupdf.Point(center)
-        point = pymupdf.Point(point)
-        l3 = lambda a, b: _format_g((a, b)) + " m\n"
-        l4 = lambda a, b, c, d, e, f: _format_g((a, b, c, d, e, f)) + " c\n"
-        l5 = lambda a, b: _format_g((a, b)) + " l\n"
-        betar = math.radians(-beta)
-        w360 = math.radians(math.copysign(360, betar)) * (-1)
-        w90 = math.radians(math.copysign(90, betar))
-        w45 = w90 / 2
-        while abs(betar) > 2 * math.pi:
-            betar += w360  # bring angle below 360 degrees
-        if not (self.last_point == point):
-            self.draw_cont += l3(*pymupdf.JM_TUPLE(point * self.ipctm))
-            self.last_point = point
-        Q = pymupdf.Point(0, 0)  # just make sure it exists
-        C = center
-        P = point
-        S = P - C  # vector 'center' -> 'point'
-        rad = abs(S)  # circle radius
-
-        if not rad > pymupdf.EPSILON:
-            raise ValueError("radius must be positive")
-
-        alfa = self.horizontal_angle(center, point)
-        while abs(betar) > abs(w90):  # draw 90 degree arcs
-            q1 = C.x + math.cos(alfa + w90) * rad
-            q2 = C.y + math.sin(alfa + w90) * rad
-            Q = pymupdf.Point(q1, q2)  # the arc's end point
-            r1 = C.x + math.cos(alfa + w45) * rad / math.cos(w45)
-            r2 = C.y + math.sin(alfa + w45) * rad / math.cos(w45)
-            R = pymupdf.Point(r1, r2)  # crossing point of tangents
-            kappah = (1 - math.cos(w45)) * 4 / 3 / abs(R - Q)
-            kappa = kappah * abs(P - Q)
-            cp1 = P + (R - P) * kappa  # control point 1
-            cp2 = Q + (R - Q) * kappa  # control point 2
-            self.draw_cont += l4(*pymupdf.JM_TUPLE(
-                list(cp1 * self.ipctm) + list(cp2 * self.ipctm) + list(Q * self.ipctm)
-            ))
-
-            betar -= w90  # reduce param angle by 90 deg
-            alfa += w90  # advance start angle by 90 deg
-            P = Q  # advance to arc end point
-        # draw (remaining) arc
-        if abs(betar) > 1e-3:  # significant degrees left?
-            beta2 = betar / 2
-            q1 = C.x + math.cos(alfa + betar) * rad
-            q2 = C.y + math.sin(alfa + betar) * rad
-            Q = pymupdf.Point(q1, q2)  # the arc's end point
-            r1 = C.x + math.cos(alfa + beta2) * rad / math.cos(beta2)
-            r2 = C.y + math.sin(alfa + beta2) * rad / math.cos(beta2)
-            R = pymupdf.Point(r1, r2)  # crossing point of tangents
-            # kappa height is 4/3 of segment height
-            kappah = (1 - math.cos(beta2)) * 4 / 3 / abs(R - Q)  # kappa height
-            kappa = kappah * abs(P - Q) / (1 - math.cos(betar))
-            cp1 = P + (R - P) * kappa  # control point 1
-            cp2 = Q + (R - Q) * kappa  # control point 2
-            self.draw_cont += l4(*pymupdf.JM_TUPLE(
-                list(cp1 * self.ipctm) + list(cp2 * self.ipctm) + list(Q * self.ipctm)
-            ))
-        if fullSector:
-            self.draw_cont += l3(*pymupdf.JM_TUPLE(point * self.ipctm))
-            self.draw_cont += l5(*pymupdf.JM_TUPLE(center * self.ipctm))
-            self.draw_cont += l5(*pymupdf.JM_TUPLE(Q * self.ipctm))
-        self.last_point = Q
-        return self.last_point
-
-    def draw_rect(self, rect: rect_like, *, radius=None) -> pymupdf.Point:
-        """Draw a rectangle.
-
-        Args:
-            radius: if not None, the rectangle will have rounded corners.
-                This is the radius of the curvature, given as percentage of
-                the rectangle width or height. Valid are values 0 < v <= 0.5.
-                For a sequence of two values, the corners will have different
-                radii. Otherwise, the percentage will be computed from the
-                shorter side. A value of (0.5, 0.5) will draw an ellipse.
-        """
-        r = pymupdf.Rect(rect)
-        if radius is None:  # standard rectangle
-            self.draw_cont += _format_g(pymupdf.JM_TUPLE(
-                list(r.bl * self.ipctm) + [r.width, r.height]
-            )) + " re\n"
-            self.updateRect(r)
-            self.last_point = r.tl
-            return self.last_point
-        # rounded corners requested. This requires 1 or 2 values, each
-        # with 0 < value <= 0.5
-        if hasattr(radius, "__float__"):
-            if radius <= 0 or radius > 0.5:
-                raise ValueError(f"bad radius value {radius}.")
-            d = min(r.width, r.height) * radius
-            px = (d, 0)
-            py = (0, d)
-        elif hasattr(radius, "__len__") and len(radius) == 2:
-            rx, ry = radius
-            px = (rx * r.width, 0)
-            py = (0, ry * r.height)
-            if min(rx, ry) <= 0 or max(rx, ry) > 0.5:
-                raise ValueError(f"bad radius value {radius}.")
-        else:
-            raise ValueError(f"bad radius value {radius}.")
-
-        lp = self.draw_line(r.tl + py, r.bl - py)
-        lp = self.draw_curve(lp, r.bl, r.bl + px)
-
-        lp = self.draw_line(lp, r.br - px)
-        lp = self.draw_curve(lp, r.br, r.br - py)
-
-        lp = self.draw_line(lp, r.tr + py)
-        lp = self.draw_curve(lp, r.tr, r.tr - px)
-
-        lp = self.draw_line(lp, r.tl + px)
-        self.last_point = self.draw_curve(lp, r.tl, r.tl + py)
-
-        self.updateRect(r)
-        return self.last_point
-
-    def draw_quad(self, quad: quad_like) -> pymupdf.Point:
-        """Draw a Quad."""
-        q = pymupdf.Quad(quad)
-        return self.draw_polyline([q.ul, q.ll, q.lr, q.ur, q.ul])
-
-    def draw_zigzag(
-        self,
-        p1: point_like,
-        p2: point_like,
-        breadth: float = 2,
-    ) -> pymupdf.Point:
-        """Draw a zig-zagged line from p1 to p2."""
-        p1 = pymupdf.Point(p1)
-        p2 = pymupdf.Point(p2)
-        S = p2 - p1  # vector start - end
-        rad = abs(S)  # distance of points
-        cnt = 4 * int(round(rad / (4 * breadth), 0))  # always take full phases
-        if cnt < 4:
-            raise ValueError("points too close")
-        mb = rad / cnt  # revised breadth
-        matrix = pymupdf.Matrix(pymupdf.util_hor_matrix(p1, p2))  # normalize line to x-axis
-        i_mat = ~matrix  # get original position
-        points = []  # stores edges
-        for i in range(1, cnt):
-            if i % 4 == 1:  # point "above" connection
-                p = pymupdf.Point(i, -1) * mb
-            elif i % 4 == 3:  # point "below" connection
-                p = pymupdf.Point(i, 1) * mb
-            else:  # ignore others
-                continue
-            points.append(p * i_mat)
-        self.draw_polyline([p1] + points + [p2])  # add start and end points
-        return p2
-
-    def draw_squiggle(
-        self,
-        p1: point_like,
-        p2: point_like,
-        breadth=2,
-    ) -> pymupdf.Point:
-        """Draw a squiggly line from p1 to p2."""
-        p1 = pymupdf.Point(p1)
-        p2 = pymupdf.Point(p2)
-        S = p2 - p1  # vector start - end
-        rad = abs(S)  # distance of points
-        cnt = 4 * int(round(rad / (4 * breadth), 0))  # always take full phases
-        if cnt < 4:
-            raise ValueError("points too close")
-        mb = rad / cnt  # revised breadth
-        matrix = pymupdf.Matrix(pymupdf.util_hor_matrix(p1, p2))  # normalize line to x-axis
-        i_mat = ~matrix  # get original position
-        k = 2.4142135623765633  # y of draw_curve helper point
-
-        points = []  # stores edges
-        for i in range(1, cnt):
-            if i % 4 == 1:  # point "above" connection
-                p = pymupdf.Point(i, -k) * mb
-            elif i % 4 == 3:  # point "below" connection
-                p = pymupdf.Point(i, k) * mb
-            else:  # else on connection line
-                p = pymupdf.Point(i, 0) * mb
-            points.append(p * i_mat)
-
-        points = [p1] + points + [p2]
-        cnt = len(points)
-        i = 0
-        while i + 2 < cnt:
-            self.draw_curve(points[i], points[i + 1], points[i + 2])
-            i += 2
-        return p2
-
-    # ==============================================================================
-    # Shape.insert_text
-    # ==============================================================================
-    def insert_text(
-        self,
-        point: point_like,
-        buffer: typing.Union[str, list],
-        *,
-        fontsize: float = 11,
-        lineheight: OptFloat = None,
-        fontname: str = "helv",
-        fontfile: OptStr = None,
-        set_simple: bool = 0,
-        encoding: int = 0,
-        color: OptSeq = None,
-        fill: OptSeq = None,
-        render_mode: int = 0,
-        border_width: float = 0.05,
-        miter_limit: float = 1,
-        rotate: int = 0,
-        morph: OptSeq = None,
-        stroke_opacity: float = 1,
-        fill_opacity: float = 1,
-        oc: int = 0,
-    ) -> int:
-
-        # ensure 'text' is a list of strings, worth dealing with
-        if not bool(buffer):
-            return 0
-
-        if type(buffer) not in (list, tuple):
-            text = buffer.splitlines()
-        else:
-            text = buffer
-
-        if not len(text) > 0:
-            return 0
-
-        point = pymupdf.Point(point)
-        try:
-            maxcode = max([ord(c) for c in " ".join(text)])
-        except Exception:
-            pymupdf.exception_info()
-            return 0
-
-        # ensure valid 'fontname'
-        fname = fontname
-        if fname.startswith("/"):
-            fname = fname[1:]
-
-        xref = self.page.insert_font(
-            fontname=fname, fontfile=fontfile, encoding=encoding, set_simple=set_simple
-        )
-        fontinfo = pymupdf.CheckFontInfo(self.doc, xref)
-
-        fontdict = fontinfo[1]
-        ordering = fontdict["ordering"]
-        simple = fontdict["simple"]
-        bfname = fontdict["name"]
-        ascender = fontdict["ascender"]
-        descender = fontdict["descender"]
-        if lineheight:
-            lheight = fontsize * lineheight
-        elif ascender - descender <= 1:
-            lheight = fontsize * 1.2
-        else:
-            lheight = fontsize * (ascender - descender)
-
-        if maxcode > 255:
-            glyphs = self.doc.get_char_widths(xref, maxcode + 1)
-        else:
-            glyphs = fontdict["glyphs"]
-
-        tab = []
-        for t in text:
-            if simple and bfname not in ("Symbol", "ZapfDingbats"):
-                g = None
-            else:
-                g = glyphs
-            tab.append(pymupdf.getTJstr(t, g, simple, ordering))
-        text = tab
-
-        color_str = pymupdf.ColorCode(color, "c")
-        fill_str = pymupdf.ColorCode(fill, "f")
-        if not fill and render_mode == 0:  # ensure fill color when 0 Tr
-            fill = color
-            fill_str = pymupdf.ColorCode(color, "f")
-
-        morphing = pymupdf.CheckMorph(morph)
-        rot = rotate
-        if rot % 90 != 0:
-            raise ValueError("bad rotate value")
-
-        while rot < 0:
-            rot += 360
-        rot = rot % 360  # text rotate = 0, 90, 270, 180
-
-        templ1 = lambda a, b, c, d, e, f, g: f"\nq\n{a}{b}BT\n{c}1 0 0 1 {_format_g((d, e))} Tm\n/{f} {_format_g(g)} Tf "
-        templ2 = lambda a: f"TJ\n0 -{_format_g(a)} TD\n"
-        cmp90 = "0 1 -1 0 0 0 cm\n"  # rotates 90 deg counter-clockwise
-        cmm90 = "0 -1 1 0 0 0 cm\n"  # rotates 90 deg clockwise
-        cm180 = "-1 0 0 -1 0 0 cm\n"  # rotates by 180 deg.
-        height = self.height
-        width = self.width
-
-        # setting up for standard rotation directions
-        # case rotate = 0
-        if morphing:
-            m1 = pymupdf.Matrix(1, 0, 0, 1, morph[0].x + self.x, height - morph[0].y - self.y)
-            mat = ~m1 * morph[1] * m1
-            cm = _format_g(pymupdf.JM_TUPLE(mat)) + " cm\n"
-        else:
-            cm = ""
-        top = height - point.y - self.y  # start of 1st char
-        left = point.x + self.x  # start of 1. char
-        space = top  # space available
-        #headroom = point.y + self.y  # distance to page border
-        if rot == 90:
-            left = height - point.y - self.y
-            top = -point.x - self.x
-            cm += cmp90
-            space = width - abs(top)
-            #headroom = point.x + self.x
-
-        elif rot == 270:
-            left = -height + point.y + self.y
-            top = point.x + self.x
-            cm += cmm90
-            space = abs(top)
-            #headroom = width - point.x - self.x
-
-        elif rot == 180:
-            left = -point.x - self.x
-            top = -height + point.y + self.y
-            cm += cm180
-            space = abs(point.y + self.y)
-            #headroom = height - point.y - self.y
-
-        optcont = self.page._get_optional_content(oc)
-        if optcont is not None:
-            bdc = "/OC /%s BDC\n" % optcont
-            emc = "EMC\n"
-        else:
-            bdc = emc = ""
-
-        alpha = self.page._set_opacity(CA=stroke_opacity, ca=fill_opacity)
-        if alpha is None:
-            alpha = ""
-        else:
-            alpha = "/%s gs\n" % alpha
-        nres = templ1(bdc, alpha, cm, left, top, fname, fontsize)
-
-        if render_mode > 0:
-            nres += "%i Tr " % render_mode
-            nres += _format_g(border_width * fontsize) + " w "
-            if miter_limit is not None:
-                nres += _format_g(miter_limit) + " M "
-        if color is not None:
-            nres += color_str
-        if fill is not None:
-            nres += fill_str
-
-        # =========================================================================
-        #   start text insertion
-        # =========================================================================
-        nres += text[0]
-        nlines = 1  # set output line counter
-        if len(text) > 1:
-            nres += templ2(lheight)  # line 1
-        else:
-            nres += 'TJ'
-        for i in range(1, len(text)):
-            if space < lheight:
-                break  # no space left on page
-            if i > 1:
-                nres += "\nT* "
-            nres += text[i] + 'TJ'
-            space -= lheight
-            nlines += 1
-
-        nres += "\nET\n%sQ\n" % emc
-
-        # =========================================================================
-        #   end of text insertion
-        # =========================================================================
-        # update the /Contents object
-        self.text_cont += nres
-        return nlines
-
-    # ==============================================================================
-    # Shape.insert_textbox
-    # ==============================================================================
-    def insert_textbox(
-        self,
-        rect: rect_like,
-        buffer: typing.Union[str, list],
-        *,
-        fontname: OptStr = "helv",
-        fontfile: OptStr = None,
-        fontsize: float = 11,
-        lineheight: OptFloat = None,
-        set_simple: bool = 0,
-        encoding: int = 0,
-        color: OptSeq = None,
-        fill: OptSeq = None,
-        expandtabs: int = 1,
-        border_width: float = 0.05,
-        miter_limit: float = 1,
-        align: int = 0,
-        render_mode: int = 0,
-        rotate: int = 0,
-        morph: OptSeq = None,
-        stroke_opacity: float = 1,
-        fill_opacity: float = 1,
-        oc: int = 0,
-    ) -> float:
-        """Insert text into a given rectangle.
-
-        Args:
-            rect -- the textbox to fill
-            buffer -- text to be inserted
-            fontname -- a Base-14 font, font name or '/name'
-            fontfile -- name of a font file
-            fontsize -- font size
-            lineheight -- overwrite the font property
-            color -- RGB stroke color triple
-            fill -- RGB fill color triple
-            render_mode -- text rendering control
-            border_width -- thickness of glyph borders as percentage of fontsize
-            expandtabs -- handles tabulators with string function
-            align -- left, center, right, justified
-            rotate -- 0, 90, 180, or 270 degrees
-            morph -- morph box with a matrix and a fixpoint
-        Returns:
-            unused or deficit rectangle area (float)
-        """
-        rect = pymupdf.Rect(rect)
-        if rect.is_empty or rect.is_infinite:
-            raise ValueError("text box must be finite and not empty")
-
-        color_str = pymupdf.ColorCode(color, "c")
-        fill_str = pymupdf.ColorCode(fill, "f")
-        if fill is None and render_mode == 0:  # ensure fill color for 0 Tr
-            fill = color
-            fill_str = pymupdf.ColorCode(color, "f")
-
-        optcont = self.page._get_optional_content(oc)
-        if optcont is not None:
-            bdc = "/OC /%s BDC\n" % optcont
-            emc = "EMC\n"
-        else:
-            bdc = emc = ""
-
-        # determine opacity / transparency
-        alpha = self.page._set_opacity(CA=stroke_opacity, ca=fill_opacity)
-        if alpha is None:
-            alpha = ""
-        else:
-            alpha = "/%s gs\n" % alpha
-
-        if rotate % 90 != 0:
-            raise ValueError("rotate must be multiple of 90")
-
-        rot = rotate
-        while rot < 0:
-            rot += 360
-        rot = rot % 360
-
-        # is buffer worth of dealing with?
-        if not bool(buffer):
-            return rect.height if rot in (0, 180) else rect.width
-
-        cmp90 = "0 1 -1 0 0 0 cm\n"  # rotates counter-clockwise
-        cmm90 = "0 -1 1 0 0 0 cm\n"  # rotates clockwise
-        cm180 = "-1 0 0 -1 0 0 cm\n"  # rotates by 180 deg.
-        height = self.height
-
-        fname = fontname
-        if fname.startswith("/"):
-            fname = fname[1:]
-
-        xref = self.page.insert_font(
-            fontname=fname, fontfile=fontfile, encoding=encoding, set_simple=set_simple
-        )
-        fontinfo = pymupdf.CheckFontInfo(self.doc, xref)
-
-        fontdict = fontinfo[1]
-        ordering = fontdict["ordering"]
-        simple = fontdict["simple"]
-        glyphs = fontdict["glyphs"]
-        bfname = fontdict["name"]
-        ascender = fontdict["ascender"]
-        descender = fontdict["descender"]
-
-        if lineheight:
-            lheight_factor = lineheight
-        elif ascender - descender <= 1:
-            lheight_factor = 1.2
-        else:
-            lheight_factor = ascender - descender
-        lheight = fontsize * lheight_factor
-
-        # create a list from buffer, split into its lines
-        if type(buffer) in (list, tuple):
-            t0 = "\n".join(buffer)
-        else:
-            t0 = buffer
-
-        maxcode = max([ord(c) for c in t0])
-        # replace invalid char codes for simple fonts
-        if simple and maxcode > 255:
-            t0 = "".join([c if ord(c) < 256 else "?" for c in t0])
-
-        t0 = t0.splitlines()
-
-        glyphs = self.doc.get_char_widths(xref, maxcode + 1)
-        if simple and bfname not in ("Symbol", "ZapfDingbats"):
-            tj_glyphs = None
-        else:
-            tj_glyphs = glyphs
-
-        # ----------------------------------------------------------------------
-        # calculate pixel length of a string
-        # ----------------------------------------------------------------------
-        def pixlen(x):
-            """Calculate pixel length of x."""
-            if ordering < 0:
-                return sum([glyphs[ord(c)][1] for c in x]) * fontsize
-            else:
-                return len(x) * fontsize
-
-        # ---------------------------------------------------------------------
-
-        if ordering < 0:
-            blen = glyphs[32][1] * fontsize  # pixel size of space character
-        else:
-            blen = fontsize
-
-        text = ""  # output buffer
-
-        if pymupdf.CheckMorph(morph):
-            m1 = pymupdf.Matrix(
-                1, 0, 0, 1, morph[0].x + self.x, self.height - morph[0].y - self.y
-            )
-            mat = ~m1 * morph[1] * m1
-            cm = _format_g(pymupdf.JM_TUPLE(mat)) + " cm\n"
-        else:
-            cm = ""
-
-        # ---------------------------------------------------------------------
-        # adjust for text orientation / rotation
-        # ---------------------------------------------------------------------
-        progr = 1  # direction of line progress
-        c_pnt = pymupdf.Point(0, fontsize * ascender)  # used for line progress
-        if rot == 0:  # normal orientation
-            point = rect.tl + c_pnt  # line 1 is 'lheight' below top
-            maxwidth = rect.width  # pixels available in one line
-            maxheight = rect.height  # available text height
-
-        elif rot == 90:  # rotate counter clockwise
-            c_pnt = pymupdf.Point(fontsize * ascender, 0)  # progress in x-direction
-            point = rect.bl + c_pnt  # line 1 'lheight' away from left
-            maxwidth = rect.height  # pixels available in one line
-            maxheight = rect.width  # available text height
-            cm += cmp90
-
-        elif rot == 180:  # text upside down
-            # progress upwards in y direction
-            c_pnt = -pymupdf.Point(0, fontsize * ascender)
-            point = rect.br + c_pnt  # line 1 'lheight' above bottom
-            maxwidth = rect.width  # pixels available in one line
-            progr = -1  # subtract lheight for next line
-            maxheight =rect.height  # available text height
-            cm += cm180
-
-        else:  # rotate clockwise (270 or -90)
-            # progress from right to left
-            c_pnt = -pymupdf.Point(fontsize * ascender, 0)
-            point = rect.tr + c_pnt  # line 1 'lheight' left of right
-            maxwidth = rect.height  # pixels available in one line
-            progr = -1  # subtract lheight for next line
-            maxheight = rect.width  # available text height
-            cm += cmm90
-
-        # =====================================================================
-        # line loop
-        # =====================================================================
-        just_tab = []  # 'justify' indicators per line
-
-        for i, line in enumerate(t0):
-            line_t = line.expandtabs(expandtabs).split(" ")  # split into words
-            num_words = len(line_t)
-            lbuff = ""  # init line buffer
-            rest = maxwidth  # available line pixels
-            # =================================================================
-            # word loop
-            # =================================================================
-            for j in range(num_words):
-                word = line_t[j]
-                pl_w = pixlen(word)  # pixel len of word
-                if rest >= pl_w:  # does it fit on the line?
-                    lbuff += word + " "  # yes, append word
-                    rest -= pl_w + blen  # update available line space
-                    continue  # next word
-
-                # word doesn't fit - output line (if not empty)
-                if lbuff:
-                    lbuff = lbuff.rstrip() + "\n"  # line full, append line break
-                    text += lbuff  # append to total text
-                    just_tab.append(True)  # can align-justify
-
-                lbuff = ""  # re-init line buffer
-                rest = maxwidth  # re-init avail. space
-
-                if pl_w <= maxwidth:  # word shorter than 1 line?
-                    lbuff = word + " "  # start the line with it
-                    rest = maxwidth - pl_w - blen  # update free space
-                    continue
-
-                # long word: split across multiple lines - char by char ...
-                if len(just_tab) > 0:
-                    just_tab[-1] = False  # cannot align-justify
-                for c in word:
-                    if pixlen(lbuff) <= maxwidth - pixlen(c):
-                        lbuff += c
-                    else:  # line full
-                        lbuff += "\n"  # close line
-                        text += lbuff  # append to text
-                        just_tab.append(False)  # cannot align-justify
-                        lbuff = c  # start new line with this char
-
-                lbuff += " "  # finish long word
-                rest = maxwidth - pixlen(lbuff)  # long word stored
-
-            if lbuff:  # unprocessed line content?
-                text += lbuff.rstrip()  # append to text
-                just_tab.append(False)  # cannot align-justify
-
-            if i < len(t0) - 1:  # not the last line?
-                text += "\n"  # insert line break
-
-        # compute used part of the textbox
-        if text.endswith("\n"):
-            text = text[:-1]
-        lb_count = text.count("\n") + 1  # number of lines written
-
-        # text height = line count * line height plus one descender value
-        text_height = lheight * lb_count - descender * fontsize
-
-        more = text_height - maxheight  # difference to height limit
-        if more > pymupdf.EPSILON:  # landed too much outside rect
-            return (-1) * more  # return deficit, don't output
-
-        more = abs(more)
-        if more < pymupdf.EPSILON:
-            more = 0  # don't bother with epsilons
-        nres = "\nq\n%s%sBT\n" % (bdc, alpha) + cm  # initialize output buffer
-        templ = lambda a, b, c, d: f"1 0 0 1 {_format_g((a, b))} Tm /{c} {_format_g(d)} Tf "
-        # center, right, justify: output each line with its own specifics
-        text_t = text.splitlines()  # split text in lines again
-        just_tab[-1] = False  # never justify last line
-        for i, t in enumerate(text_t):
-            spacing = 0
-            pl = maxwidth - pixlen(t)  # length of empty line part
-            pnt = point + c_pnt * (i * lheight_factor)  # text start of line
-            if align == 1:  # center: right shift by half width
-                if rot in (0, 180):
-                    pnt = pnt + pymupdf.Point(pl / 2, 0) * progr
-                else:
-                    pnt = pnt - pymupdf.Point(0, pl / 2) * progr
-            elif align == 2:  # right: right shift by full width
-                if rot in (0, 180):
-                    pnt = pnt + pymupdf.Point(pl, 0) * progr
-                else:
-                    pnt = pnt - pymupdf.Point(0, pl) * progr
-            elif align == 3:  # justify
-                spaces = t.count(" ")  # number of spaces in line
-                if spaces > 0 and just_tab[i]:  # if any, and we may justify
-                    spacing = pl / spaces  # make every space this much larger
-                else:
-                    spacing = 0  # keep normal space length
-            top = height - pnt.y - self.y
-            left = pnt.x + self.x
-            if rot == 90:
-                left = height - pnt.y - self.y
-                top = -pnt.x - self.x
-            elif rot == 270:
-                left = -height + pnt.y + self.y
-                top = pnt.x + self.x
-            elif rot == 180:
-                left = -pnt.x - self.x
-                top = -height + pnt.y + self.y
-
-            nres += templ(left, top, fname, fontsize)
-
-            if render_mode > 0:
-                nres += "%i Tr " % render_mode
-                nres += _format_g(border_width * fontsize) + " w "
-                if miter_limit is not None:
-                    nres += _format_g(miter_limit) + " M "
-
-            if align == 3:
-                nres += _format_g(spacing) + " Tw "
-
-            if color is not None:
-                nres += color_str
-            if fill is not None:
-                nres += fill_str
-            nres += "%sTJ\n" % pymupdf.getTJstr(t, tj_glyphs, simple, ordering)
-
-        nres += "ET\n%sQ\n" % emc
-
-        self.text_cont += nres
-        self.updateRect(rect)
-        return more
-
-    def finish(
-        self,
-        width: float = 1,
-        color: OptSeq = (0,),
-        fill: OptSeq = None,
-        lineCap: int = 0,
-        lineJoin: int = 0,
-        dashes: OptStr = None,
-        even_odd: bool = False,
-        morph: OptSeq = None,
-        closePath: bool = True,
-        fill_opacity: float = 1,
-        stroke_opacity: float = 1,
-        oc: int = 0,
-    ) -> None:
-        """Finish the current drawing segment.
-
-        Notes:
-            Apply colors, opacity, dashes, line style and width, or
-            morphing. Also whether to close the path
-            by connecting last to first point.
-        """
-        if self.draw_cont == "":  # treat empty contents as no-op
-            return
-
-        if width == 0:  # border color makes no sense then
-            color = None
-        elif color is None:  # vice versa
-            width = 0
-        # if color == None and fill == None:
-        #     raise ValueError("at least one of 'color' or 'fill' must be given")
-        color_str = pymupdf.ColorCode(color, "c")  # ensure proper color string
-        fill_str = pymupdf.ColorCode(fill, "f")  # ensure proper fill string
-
-        optcont = self.page._get_optional_content(oc)
-        if optcont is not None:
-            self.draw_cont = "/OC /%s BDC\n" % optcont + self.draw_cont
-            emc = "EMC\n"
-        else:
-            emc = ""
-
-        alpha = self.page._set_opacity(CA=stroke_opacity, ca=fill_opacity)
-        if alpha is not None:
-            self.draw_cont = "/%s gs\n" % alpha + self.draw_cont
-
-        if width != 1 and width != 0:
-            self.draw_cont += _format_g(width) + " w\n"
-
-        if lineCap != 0:
-            self.draw_cont = "%i J\n" % lineCap + self.draw_cont
-        if lineJoin != 0:
-            self.draw_cont = "%i j\n" % lineJoin + self.draw_cont
-
-        if dashes not in (None, "", "[] 0"):
-            self.draw_cont = "%s d\n" % dashes + self.draw_cont
-
-        if closePath:
-            self.draw_cont += "h\n"
-            self.last_point = None
-
-        if color is not None:
-            self.draw_cont += color_str
-
-        if fill is not None:
-            self.draw_cont += fill_str
-            if color is not None:
-                if not even_odd:
-                    self.draw_cont += "B\n"
-                else:
-                    self.draw_cont += "B*\n"
-            else:
-                if not even_odd:
-                    self.draw_cont += "f\n"
-                else:
-                    self.draw_cont += "f*\n"
-        else:
-            self.draw_cont += "S\n"
-
-        self.draw_cont += emc
-        if pymupdf.CheckMorph(morph):
-            m1 = pymupdf.Matrix(
-                1, 0, 0, 1, morph[0].x + self.x, self.height - morph[0].y - self.y
-            )
-            mat = ~m1 * morph[1] * m1
-            self.draw_cont = _format_g(pymupdf.JM_TUPLE(mat)) + " cm\n" + self.draw_cont
-
-        self.totalcont += "\nq\n" + self.draw_cont + "Q\n"
-        self.draw_cont = ""
-        self.last_point = None
-        return
-
-    def commit(self, overlay: bool = True) -> None:
-        """Update the page's /Contents object with Shape data.
-
-        The argument controls whether data appear in foreground (default)
-        or background.
-        """
-        pymupdf.CheckParent(self.page)  # doc may have died meanwhile
-        self.totalcont += self.text_cont
-        self.totalcont = self.totalcont.encode()
-
-        if self.totalcont:
-            if overlay:
-                self.page.wrap_contents()  # ensure a balanced graphics state
-            # make /Contents object with dummy stream
-            xref = pymupdf.TOOLS._insert_contents(self.page, b" ", overlay)
-            # update it with potential compression
-            self.doc.update_stream(xref, self.totalcont)
-
-        self.last_point = None  # clean up ...
-        self.rect = None  #
-        self.draw_cont = ""  # for potential ...
-        self.text_cont = ""  # ...
-        self.totalcont = ""  # re-use
-
-
-def apply_redactions(
-    page: pymupdf.Page, images: int = 2, graphics: int = 1, text: int = 0
-) -> bool:
-    """Apply the redaction annotations of the page.
-
-    Args:
-        page: the PDF page.
-        images:
-              0 - ignore images
-              1 - remove all overlapping images
-              2 - blank out overlapping image parts
-              3 - remove image unless invisible
-        graphics:
-              0 - ignore graphics
-              1 - remove graphics if contained in rectangle
-              2 - remove all overlapping graphics
-        text:
-              0 - remove text
-              1 - ignore text
-    """
-
-    def center_rect(annot_rect, new_text, font, fsize):
-        """Calculate minimal sub-rectangle for the overlay text.
-
-        Notes:
-            Because 'insert_textbox' supports no vertical text centering,
-            we calculate an approximate number of lines here and return a
-            sub-rect with smaller height, which should still be sufficient.
-        Args:
-            annot_rect: the annotation rectangle
-            new_text: the text to insert.
-            font: the fontname. Must be one of the CJK or Base-14 set, else
-                the rectangle is returned unchanged.
-            fsize: the fontsize
-        Returns:
-            A rectangle to use instead of the annot rectangle.
-        """
-        if not new_text or annot_rect.width <= pymupdf.EPSILON:
-            return annot_rect
-        try:
-            text_width = pymupdf.get_text_length(new_text, font, fsize)
-        except (ValueError, mupdf.FzErrorBase):  # unsupported font
-            if g_exceptions_verbose:
-                pymupdf.exception_info()
-            return annot_rect
-        line_height = fsize * 1.2
-        limit = annot_rect.width
-        h = math.ceil(text_width / limit) * line_height  # estimate rect height
-        if h >= annot_rect.height:
-            return annot_rect
-        r = annot_rect
-        y = (annot_rect.tl.y + annot_rect.bl.y - h) * 0.5
-        r.y0 = y
-        return r
-
-    pymupdf.CheckParent(page)
-    doc = page.parent
-    if doc.is_encrypted or doc.is_closed:
-        raise ValueError("document closed or encrypted")
-    if not doc.is_pdf:
-        raise ValueError("is no PDF")
-
-    redact_annots = []  # storage of annot values
-    for annot in page.annots(
-        types=(pymupdf.PDF_ANNOT_REDACT,)  # pylint: disable=no-member
-    ):
-        # loop redactions
-        redact_annots.append(annot._get_redact_values())  # save annot values
-
-    if redact_annots == []:  # any redactions on this page?
-        return False  # no redactions
-
-    rc = page._apply_redactions(text, images, graphics)  # call MuPDF
-    if not rc:  # should not happen really
-        raise ValueError("Error applying redactions.")
-
-    # now write replacement text in old redact rectangles
-    shape = page.new_shape()
-    for redact in redact_annots:
-        annot_rect = redact["rect"]
-        fill = redact["fill"]
-        if fill:
-            shape.draw_rect(annot_rect)  # colorize the rect background
-            shape.finish(fill=fill, color=fill)
-        if "text" in redact.keys():  # if we also have text
-            new_text = redact["text"]
-            align = redact.get("align", 0)
-            fname = redact["fontname"]
-            fsize = redact["fontsize"]
-            color = redact["text_color"]
-            # try finding vertical centered sub-rect
-            trect = center_rect(annot_rect, new_text, fname, fsize)
-
-            rc = -1
-            while rc < 0 and fsize >= 4:  # while not enough room
-                # (re-) try insertion
-                rc = shape.insert_textbox(
-                    trect,
-                    new_text,
-                    fontname=fname,
-                    fontsize=fsize,
-                    color=color,
-                    align=align,
-                )
-                fsize -= 0.5  # reduce font if unsuccessful
-    shape.commit()  # append new contents object
-    return True
-
-
-# ------------------------------------------------------------------------------
-# Remove potentially sensitive data from a PDF. Similar to the Adobe
-# Acrobat 'sanitize' function
-# ------------------------------------------------------------------------------
-def scrub(
-    doc: pymupdf.Document,
-    attached_files: bool = True,
-    clean_pages: bool = True,
-    embedded_files: bool = True,
-    hidden_text: bool = True,
-    javascript: bool = True,
-    metadata: bool = True,
-    redactions: bool = True,
-    redact_images: int = 0,
-    remove_links: bool = True,
-    reset_fields: bool = True,
-    reset_responses: bool = True,
-    thumbnails: bool = True,
-    xml_metadata: bool = True,
-) -> None:
-    def remove_hidden(cont_lines):
-        """Remove hidden text from a PDF page.
-
-        Args:
-            cont_lines: list of lines with /Contents content. Should have status
-                from after page.cleanContents().
-
-        Returns:
-            List of /Contents lines from which hidden text has been removed.
-
-        Notes:
-            The input must have been created after the page's /Contents object(s)
-            have been cleaned with page.cleanContents(). This ensures a standard
-            formatting: one command per line, single spaces between operators.
-            This allows for drastic simplification of this code.
-        """
-        out_lines = []  # will return this
-        in_text = False  # indicate if within BT/ET object
-        suppress = False  # indicate text suppression active
-        make_return = False
-        for line in cont_lines:
-            if line == b"BT":  # start of text object
-                in_text = True  # switch on
-                out_lines.append(line)  # output it
-                continue
-            if line == b"ET":  # end of text object
-                in_text = False  # switch off
-                out_lines.append(line)  # output it
-                continue
-            if line == b"3 Tr":  # text suppression operator
-                suppress = True  # switch on
-                make_return = True
-                continue
-            if line[-2:] == b"Tr" and line[0] != b"3":
-                suppress = False  # text rendering changed
-                out_lines.append(line)
-                continue
-            if line == b"Q":  # unstack command also switches off
-                suppress = False
-                out_lines.append(line)
-                continue
-            if suppress and in_text:  # suppress hidden lines
-                continue
-            out_lines.append(line)
-        if make_return:
-            return out_lines
-        else:
-            return None
-
-    if not doc.is_pdf:  # only works for PDF
-        raise ValueError("is no PDF")
-    if doc.is_encrypted or doc.is_closed:
-        raise ValueError("closed or encrypted doc")
-
-    if not clean_pages:
-        hidden_text = False
-        redactions = False
-
-    if metadata:
-        doc.set_metadata({})  # remove standard metadata
-
-    for page in doc:
-        if reset_fields:
-            # reset form fields (widgets)
-            for widget in page.widgets():
-                widget.reset()
-
-        if remove_links:
-            links = page.get_links()  # list of all links on page
-            for link in links:  # remove all links
-                page.delete_link(link)
-
-        found_redacts = False
-        for annot in page.annots():
-            if annot.type[0] == mupdf.PDF_ANNOT_FILE_ATTACHMENT and attached_files:
-                annot.update_file(buffer_=b" ")  # set file content to empty
-            if reset_responses:
-                annot.delete_responses()
-            if annot.type[0] == pymupdf.PDF_ANNOT_REDACT:  # pylint: disable=no-member
-                found_redacts = True
-
-        if redactions and found_redacts:
-            page.apply_redactions(images=redact_images)
-
-        if not (clean_pages or hidden_text):
-            continue  # done with the page
-
-        page.clean_contents()
-        if not page.get_contents():
-            continue
-        if hidden_text:
-            xref = page.get_contents()[0]  # only one b/o cleaning!
-            cont = doc.xref_stream(xref)
-            cont_lines = remove_hidden(cont.splitlines())  # remove hidden text
-            if cont_lines:  # something was actually removed
-                cont = b"\n".join(cont_lines)
-                doc.update_stream(xref, cont)  # rewrite the page /Contents
-
-        if thumbnails:  # remove page thumbnails?
-            if doc.xref_get_key(page.xref, "Thumb")[0] != "null":
-                doc.xref_set_key(page.xref, "Thumb", "null")
-
-    # pages are scrubbed, now perform document-wide scrubbing
-    # remove embedded files
-    if embedded_files:
-        for name in doc.embfile_names():
-            doc.embfile_del(name)
-
-    if xml_metadata:
-        doc.del_xml_metadata()
-    if not (xml_metadata or javascript):
-        xref_limit = 0
-    else:
-        xref_limit = doc.xref_length()
-    for xref in range(1, xref_limit):
-        if not doc.xref_object(xref):
-            msg = "bad xref %i - clean PDF before scrubbing" % xref
-            raise ValueError(msg)
-        if javascript and doc.xref_get_key(xref, "S")[1] == "/JavaScript":
-            # a /JavaScript action object
-            obj = "<</S/JavaScript/JS()>>"  # replace with a null JavaScript
-            doc.update_object(xref, obj)  # update this object
-            continue  # no further handling
-
-        if not xml_metadata:
-            continue
-
-        if doc.xref_get_key(xref, "Type")[1] == "/Metadata":
-            # delete any metadata object directly
-            doc.update_object(xref, "<<>>")
-            doc.update_stream(xref, b"deleted", new=True)
-            continue
-
-        if doc.xref_get_key(xref, "Metadata")[0] != "null":
-            doc.xref_set_key(xref, "Metadata", "null")
-
-
 def _show_fz_text( text):
     #if mupdf_cppyy:
     #    assert isinstance( text, cppyy.gbl.mupdf.Text)
@@ -4451,418 +840,6 @@
         span = span.next
     return f'num_spans={num_spans} num_chars={num_chars}'
 
-def fill_textbox(
-    writer: pymupdf.TextWriter,
-    rect: rect_like,
-    text: typing.Union[str, list],
-    pos: point_like = None,
-    font: typing.Optional[pymupdf.Font] = None,
-    fontsize: float = 11,
-    lineheight: OptFloat = None,
-    align: int = 0,
-    warn: bool = None,
-    right_to_left: bool = False,
-    small_caps: bool = False,
-) -> tuple:
-    """Fill a rectangle with text.
-
-    Args:
-        writer: pymupdf.TextWriter object (= "self")
-        rect: rect-like to receive the text.
-        text: string or list/tuple of strings.
-        pos: point-like start position of first word.
-        font: pymupdf.Font object (default pymupdf.Font('helv')).
-        fontsize: the fontsize.
-        lineheight: overwrite the font property
-        align: (int) 0 = left, 1 = center, 2 = right, 3 = justify
-        warn: (bool) text overflow action: none, warn, or exception
-        right_to_left: (bool) indicate right-to-left language.
-    """
-    rect = pymupdf.Rect(rect)
-    if rect.is_empty:
-        raise ValueError("fill rect must not empty.")
-    if type(font) is not pymupdf.Font:
-        font = pymupdf.Font("helv")
-
-    def textlen(x):
-        """Return length of a string."""
-        return font.text_length(
-            x, fontsize=fontsize, small_caps=small_caps
-        )  # abbreviation
-
-    def char_lengths(x):
-        """Return list of single character lengths for a string."""
-        return font.char_lengths(x, fontsize=fontsize, small_caps=small_caps)
-
-    def append_this(pos, text):
-        ret = writer.append(
-                pos, text, font=font, fontsize=fontsize, small_caps=small_caps
-                )
-        return ret
-
-    tolerance = fontsize * 0.2  # extra distance to left border
-    space_len = textlen(" ")
-    std_width = rect.width - tolerance
-    std_start = rect.x0 + tolerance
-
-    def norm_words(width, words):
-        """Cut any word in pieces no longer than 'width'."""
-        nwords = []
-        word_lengths = []
-        for w in words:
-            wl_lst = char_lengths(w)
-            wl = sum(wl_lst)
-            if wl <= width:  # nothing to do - copy over
-                nwords.append(w)
-                word_lengths.append(wl)
-                continue
-
-            # word longer than rect width - split it in parts
-            n = len(wl_lst)
-            while n > 0:
-                wl = sum(wl_lst[:n])
-                if wl <= width:
-                    nwords.append(w[:n])
-                    word_lengths.append(wl)
-                    w = w[n:]
-                    wl_lst = wl_lst[n:]
-                    n = len(wl_lst)
-                else:
-                    n -= 1
-        return nwords, word_lengths
-
-    def output_justify(start, line):
-        """Justified output of a line."""
-        # ignore leading / trailing / multiple spaces
-        words = [w for w in line.split(" ") if w != ""]
-        nwords = len(words)
-        if nwords == 0:
-            return
-        if nwords == 1:  # single word cannot be justified
-            append_this(start, words[0])
-            return
-        tl = sum([textlen(w) for w in words])  # total word lengths
-        gaps = nwords - 1  # number of word gaps
-        gapl = (std_width - tl) / gaps  # width of each gap
-        for w in words:
-            _, lp = append_this(start, w)  # output one word
-            start.x = lp.x + gapl  # next start at word end plus gap
-        return
-
-    asc = font.ascender
-    dsc = font.descender
-    if not lineheight:
-        if asc - dsc <= 1:
-            lheight = 1.2
-        else:
-            lheight = asc - dsc
-    else:
-        lheight = lineheight
-
-    LINEHEIGHT = fontsize * lheight  # effective line height
-    width = std_width  # available horizontal space
-
-    # starting point of text
-    if pos is not None:
-        pos = pymupdf.Point(pos)
-    else:  # default is just below rect top-left
-        pos = rect.tl + (tolerance, fontsize * asc)
-    if pos not in rect:
-        raise ValueError("Text must start in rectangle.")
-
-    # calculate displacement factor for alignment
-    if align == pymupdf.TEXT_ALIGN_CENTER:
-        factor = 0.5
-    elif align == pymupdf.TEXT_ALIGN_RIGHT:
-        factor = 1.0
-    else:
-        factor = 0
-
-    # split in lines if just a string was given
-    if type(text) is str:
-        textlines = text.splitlines()
-    else:
-        textlines = []
-        for line in text:
-            textlines.extend(line.splitlines())
-
-    max_lines = int((rect.y1 - pos.y) / LINEHEIGHT) + 1
-
-    new_lines = []  # the final list of textbox lines
-    no_justify = []  # no justify for these line numbers
-    for i, line in enumerate(textlines):
-        if line in ("", " "):
-            new_lines.append((line, space_len))
-            width = rect.width - tolerance
-            no_justify.append((len(new_lines) - 1))
-            continue
-        if i == 0:
-            width = rect.x1 - pos.x
-        else:
-            width = rect.width - tolerance
-
-        if right_to_left:  # reverses Arabic / Hebrew text front to back
-            line = writer.clean_rtl(line)
-        tl = textlen(line)
-        if tl <= width:  # line short enough
-            new_lines.append((line, tl))
-            no_justify.append((len(new_lines) - 1))
-            continue
-
-        # we need to split the line in fitting parts
-        words = line.split(" ")  # the words in the line
-
-        # cut in parts any words that are longer than rect width
-        words, word_lengths = norm_words(width, words)
-
-        n = len(words)
-        while True:
-            line0 = " ".join(words[:n])
-            wl = sum(word_lengths[:n]) + space_len * (n - 1)
-            if wl <= width:
-                new_lines.append((line0, wl))
-                words = words[n:]
-                word_lengths = word_lengths[n:]
-                n = len(words)
-                line0 = None
-            else:
-                n -= 1
-
-            if len(words) == 0:
-                break
-            assert n
-
-    # -------------------------------------------------------------------------
-    # List of lines created. Each item is (text, tl), where 'tl' is the PDF
-    # output length (float) and 'text' is the text. Except for justified text,
-    # this is output-ready.
-    # -------------------------------------------------------------------------
-    nlines = len(new_lines)
-    if nlines > max_lines:
-        msg = "Only fitting %i of %i lines." % (max_lines, nlines)
-        if warn is None:
-            pass
-        elif warn:
-            pymupdf.message("Warning: " + msg)
-        else:
-            raise ValueError(msg)
-
-    start = pymupdf.Point()
-    no_justify += [len(new_lines) - 1]  # no justifying of last line
-    for i in range(max_lines):
-        try:
-            line, tl = new_lines.pop(0)
-        except IndexError:
-            if g_exceptions_verbose >= 2:   pymupdf.exception_info()
-            break
-
-        if right_to_left:  # Arabic, Hebrew
-            line = "".join(reversed(line))
-
-        if i == 0:  # may have different start for first line
-            start = pos
-
-        if align == pymupdf.TEXT_ALIGN_JUSTIFY and i not in no_justify and tl < std_width:
-            output_justify(start, line)
-            start.x = std_start
-            start.y += LINEHEIGHT
-            continue
-
-        if i > 0 or pos.x == std_start:  # left, center, right alignments
-            start.x += (width - tl) * factor
-
-        append_this(start, line)
-        start.x = std_start
-        start.y += LINEHEIGHT
-
-    return new_lines  # return non-written lines
-
-
-# ------------------------------------------------------------------------
-# Optional Content functions
-# ------------------------------------------------------------------------
-def get_oc(doc: pymupdf.Document, xref: int) -> int:
-    """Return optional content object xref for an image or form xobject.
-
-    Args:
-        xref: (int) xref number of an image or form xobject.
-    """
-    if doc.is_closed or doc.is_encrypted:
-        raise ValueError("document close or encrypted")
-    t, name = doc.xref_get_key(xref, "Subtype")
-    if t != "name" or name not in ("/Image", "/Form"):
-        raise ValueError("bad object type at xref %i" % xref)
-    t, oc = doc.xref_get_key(xref, "OC")
-    if t != "xref":
-        return 0
-    rc = int(oc.replace("0 R", ""))
-    return rc
-
-
-def set_oc(doc: pymupdf.Document, xref: int, oc: int) -> None:
-    """Attach optional content object to image or form xobject.
-
-    Args:
-        xref: (int) xref number of an image or form xobject
-        oc: (int) xref number of an OCG or OCMD
-    """
-    if doc.is_closed or doc.is_encrypted:
-        raise ValueError("document close or encrypted")
-    t, name = doc.xref_get_key(xref, "Subtype")
-    if t != "name" or name not in ("/Image", "/Form"):
-        raise ValueError("bad object type at xref %i" % xref)
-    if oc > 0:
-        t, name = doc.xref_get_key(oc, "Type")
-        if t != "name" or name not in ("/OCG", "/OCMD"):
-            raise ValueError("bad object type at xref %i" % oc)
-    if oc == 0 and "OC" in doc.xref_get_keys(xref):
-        doc.xref_set_key(xref, "OC", "null")
-        return None
-    doc.xref_set_key(xref, "OC", "%i 0 R" % oc)
-    return None
-
-
-def set_ocmd(
-    doc: pymupdf.Document,
-    xref: int = 0,
-    ocgs: typing.Union[list, None] = None,
-    policy: OptStr = None,
-    ve: typing.Union[list, None] = None,
-) -> int:
-    """Create or update an OCMD object in a PDF document.
-
-    Args:
-        xref: (int) 0 for creating a new object, otherwise update existing one.
-        ocgs: (list) OCG xref numbers, which shall be subject to 'policy'.
-        policy: one of 'AllOn', 'AllOff', 'AnyOn', 'AnyOff' (any casing).
-        ve: (list) visibility expression. Use instead of 'ocgs' with 'policy'.
-
-    Returns:
-        Xref of the created or updated OCMD.
-    """
-
-    all_ocgs = set(doc.get_ocgs().keys())
-
-    def ve_maker(ve):
-        if type(ve) not in (list, tuple) or len(ve) < 2:
-            raise ValueError("bad 've' format: %s" % ve)
-        if ve[0].lower() not in ("and", "or", "not"):
-            raise ValueError("bad operand: %s" % ve[0])
-        if ve[0].lower() == "not" and len(ve) != 2:
-            raise ValueError("bad 've' format: %s" % ve)
-        item = "[/%s" % ve[0].title()
-        for x in ve[1:]:
-            if type(x) is int:
-                if x not in all_ocgs:
-                    raise ValueError("bad OCG %i" % x)
-                item += " %i 0 R" % x
-            else:
-                item += " %s" % ve_maker(x)
-        item += "]"
-        return item
-
-    text = "<</Type/OCMD"
-
-    if ocgs and type(ocgs) in (list, tuple):  # some OCGs are provided
-        s = set(ocgs).difference(all_ocgs)  # contains illegal xrefs
-        if s != set():
-            msg = "bad OCGs: %s" % s
-            raise ValueError(msg)
-        text += "/OCGs[" + " ".join(map(lambda x: "%i 0 R" % x, ocgs)) + "]"
-
-    if policy:
-        policy = str(policy).lower()
-        pols = {
-            "anyon": "AnyOn",
-            "allon": "AllOn",
-            "anyoff": "AnyOff",
-            "alloff": "AllOff",
-        }
-        if policy not in ("anyon", "allon", "anyoff", "alloff"):
-            raise ValueError("bad policy: %s" % policy)
-        text += "/P/%s" % pols[policy]
-
-    if ve:
-        text += "/VE%s" % ve_maker(ve)
-
-    text += ">>"
-
-    # make new object or replace old OCMD (check type first)
-    if xref == 0:
-        xref = doc.get_new_xref()
-    elif "/Type/OCMD" not in doc.xref_object(xref, compressed=True):
-        raise ValueError("bad xref or not an OCMD")
-    doc.update_object(xref, text)
-    return xref
-
-
-def get_ocmd(doc: pymupdf.Document, xref: int) -> dict:
-    """Return the definition of an OCMD (optional content membership dictionary).
-
-    Recognizes PDF dict keys /OCGs (PDF array of OCGs), /P (policy string) and
-    /VE (visibility expression, PDF array). Via string manipulation, this
-    info is converted to a Python dictionary with keys "xref", "ocgs", "policy"
-    and "ve" - ready to recycle as input for 'set_ocmd()'.
-    """
-
-    if xref not in range(doc.xref_length()):
-        raise ValueError("bad xref")
-    text = doc.xref_object(xref, compressed=True)
-    if "/Type/OCMD" not in text:
-        raise ValueError("bad object type")
-    textlen = len(text)
-
-    p0 = text.find("/OCGs[")  # look for /OCGs key
-    p1 = text.find("]", p0)
-    if p0 < 0 or p1 < 0:  # no OCGs found
-        ocgs = None
-    else:
-        ocgs = text[p0 + 6 : p1].replace("0 R", " ").split()
-        ocgs = list(map(int, ocgs))
-
-    p0 = text.find("/P/")  # look for /P policy key
-    if p0 < 0:
-        policy = None
-    else:
-        p1 = text.find("ff", p0)
-        if p1 < 0:
-            p1 = text.find("on", p0)
-        if p1 < 0:  # some irregular syntax
-            raise ValueError("bad object at xref")
-        else:
-            policy = text[p0 + 3 : p1 + 2]
-
-    p0 = text.find("/VE[")  # look for /VE visibility expression key
-    if p0 < 0:  # no visibility expression found
-        ve = None
-    else:
-        lp = rp = 0  # find end of /VE by finding last ']'.
-        p1 = p0
-        while lp < 1 or lp != rp:
-            p1 += 1
-            if not p1 < textlen:  # some irregular syntax
-                raise ValueError("bad object at xref")
-            if text[p1] == "[":
-                lp += 1
-            if text[p1] == "]":
-                rp += 1
-        # p1 now positioned at the last "]"
-        ve = text[p0 + 3 : p1 + 1]  # the PDF /VE array
-        ve = (
-            ve.replace("/And", '"and",')
-            .replace("/Not", '"not",')
-            .replace("/Or", '"or",')
-        )
-        ve = ve.replace(" 0 R]", "]").replace(" 0 R", ",").replace("][", "],[")
-        import json
-        try:
-            ve = json.loads(ve)
-        except Exception:
-            pymupdf.exception_info()
-            pymupdf.message(f"bad /VE key: {ve!r}")
-            raise
-    return {"xref": xref, "ocgs": ocgs, "policy": policy, "ve": ve}
-
 
 """
 Handle page labels for PDF documents.
@@ -4937,50 +914,6 @@
     return construct_label(style, prefix, pagenumber)
 
 
-def get_label(page):
-    """Return the label for this PDF page.
-
-    Args:
-        page: page object.
-    Returns:
-        The label (str) of the page. Errors return an empty string.
-    """
-    # Jorj McKie, 2021-01-06
-
-    labels = page.parent._get_page_labels()
-    if not labels:
-        return ""
-    labels.sort()
-    return get_label_pno(page.number, labels)
-
-
-def get_page_numbers(doc, label, only_one=False):
-    """Return a list of page numbers with the given label.
-
-    Args:
-        doc: PDF document object (resp. 'self').
-        label: (str) label.
-        only_one: (bool) stop searching after first hit.
-    Returns:
-        List of page numbers having this label.
-    """
-    # Jorj McKie, 2021-01-06
-
-    numbers = []
-    if not label:
-        return numbers
-    labels = doc._get_page_labels()
-    if labels == []:
-        return numbers
-    for i in range(doc.page_count):
-        plabel = get_label_pno(i, labels)
-        if plabel == label:
-            numbers.append(i)
-            if only_one:
-                break
-    return numbers
-
-
 def construct_label(style, prefix, pno) -> str:
     """Construct a label based on style, prefix and page number."""
     # William Chapman, 2021-01-06
@@ -5049,94 +982,6 @@
     return "".join([a for a in roman_num(num)])
 
 
-def get_page_labels(doc):
-    """Return page label definitions in PDF document.
-
-    Args:
-        doc: PDF document (resp. 'self').
-    Returns:
-        A list of dictionaries with the following format:
-        {'startpage': int, 'prefix': str, 'style': str, 'firstpagenum': int}.
-    """
-    # Jorj McKie, 2021-01-10
-    return [rule_dict(item) for item in doc._get_page_labels()]
-
-
-def set_page_labels(doc, labels):
-    """Add / replace page label definitions in PDF document.
-
-    Args:
-        doc: PDF document (resp. 'self').
-        labels: list of label dictionaries like:
-        {'startpage': int, 'prefix': str, 'style': str, 'firstpagenum': int},
-        as returned by get_page_labels().
-    """
-    # William Chapman, 2021-01-06
-
-    def create_label_str(label):
-        """Convert Python label dict to corresponding PDF rule string.
-
-        Args:
-            label: (dict) build rule for the label.
-        Returns:
-            PDF label rule string wrapped in "<<", ">>".
-        """
-        s = "%i<<" % label["startpage"]
-        if label.get("prefix", "") != "":
-            s += "/P(%s)" % label["prefix"]
-        if label.get("style", "") != "":
-            s += "/S/%s" % label["style"]
-        if label.get("firstpagenum", 1) > 1:
-            s += "/St %i" % label["firstpagenum"]
-        s += ">>"
-        return s
-
-    def create_nums(labels):
-        """Return concatenated string of all labels rules.
-
-        Args:
-            labels: (list) dictionaries as created by function 'rule_dict'.
-        Returns:
-            PDF compatible string for page label definitions, ready to be
-            enclosed in PDF array 'Nums[...]'.
-        """
-        labels.sort(key=lambda x: x["startpage"])
-        s = "".join([create_label_str(label) for label in labels])
-        return s
-
-    doc._set_page_labels(create_nums(labels))
-
-
-# End of Page Label Code -------------------------------------------------
-
-
-def has_links(doc: pymupdf.Document) -> bool:
-    """Check whether there are links on any page."""
-    if doc.is_closed:
-        raise ValueError("document closed")
-    if not doc.is_pdf:
-        raise ValueError("is no PDF")
-    for i in range(doc.page_count):
-        for item in doc.page_annot_xrefs(i):
-            if item[1] == pymupdf.PDF_ANNOT_LINK:  # pylint: disable=no-member
-                return True
-    return False
-
-
-def has_annots(doc: pymupdf.Document) -> bool:
-    """Check whether there are annotations on any page."""
-    if doc.is_closed:
-        raise ValueError("document closed")
-    if not doc.is_pdf:
-        raise ValueError("is no PDF")
-    for i in range(doc.page_count):
-        for item in doc.page_annot_xrefs(i):
-            # pylint: disable=no-member
-            if not (item[1] == pymupdf.PDF_ANNOT_LINK or item[1] == pymupdf.PDF_ANNOT_WIDGET):  # pylint: disable=no-member
-                return True
-    return False
-
-
 # -------------------------------------------------------------------
 # Functions to recover the quad contained in a text extraction bbox
 # -------------------------------------------------------------------
@@ -5322,358 +1167,3 @@
         raise ValueError("bad span argument")
 
     return recover_bbox_quad(line_dir, span, bbox)
-
-
-# -------------------------------------------------------------------
-# Building font subsets using fontTools
-# -------------------------------------------------------------------
-def subset_fonts(doc: pymupdf.Document, verbose: bool = False, fallback: bool = False) -> OptInt:
-    """Build font subsets in a PDF.
-
-    Eligible fonts are potentially replaced by smaller versions. Page text is
-    NOT rewritten and thus should retain properties like being hidden or
-    controlled by optional content.
-
-    This method by default uses MuPDF's own internal feature to create subset
-    fonts. As this is a new function, errors may still occur. In this case,
-    please fall back to using the previous version by using "fallback=True".
-    Fallback mode requires the external package 'fontTools'.
-
-    Args:
-        fallback: use the older deprecated implementation.
-        verbose: only used by fallback mode.
-
-    Returns:
-        The new MuPDF-based code returns None.  The deprecated fallback
-        mode returns 0 if there are no fonts to subset.  Otherwise, it
-        returns the decrease in fontsize (the difference in fontsize),
-        measured in bytes.
-    """
-    # Font binaries: -  "buffer" -> (names, xrefs, (unicodes, glyphs))
-    # An embedded font is uniquely defined by its fontbuffer only. It may have
-    # multiple names and xrefs.
-    # Once the sets of used unicodes and glyphs are known, we compute a
-    # smaller version of the buffer user package fontTools.
-
-    if not fallback:  # by default use MuPDF function
-        pdf = mupdf.pdf_document_from_fz_document(doc)
-        mupdf.pdf_subset_fonts2(pdf, list(range(doc.page_count)))
-        return
-
-    font_buffers = {}
-
-    def get_old_widths(xref):
-        """Retrieve old font '/W' and '/DW' values."""
-        df = doc.xref_get_key(xref, "DescendantFonts")
-        if df[0] != "array":  # only handle xref specifications
-            return None, None
-        df_xref = int(df[1][1:-1].replace("0 R", ""))
-        widths = doc.xref_get_key(df_xref, "W")
-        if widths[0] != "array":  # no widths key found
-            widths = None
-        else:
-            widths = widths[1]
-        dwidths = doc.xref_get_key(df_xref, "DW")
-        if dwidths[0] != "int":
-            dwidths = None
-        else:
-            dwidths = dwidths[1]
-        return widths, dwidths
-
-    def set_old_widths(xref, widths, dwidths):
-        """Restore the old '/W' and '/DW' in subsetted font.
-
-        If either parameter is None or evaluates to False, the corresponding
-        dictionary key will be set to null.
-        """
-        df = doc.xref_get_key(xref, "DescendantFonts")
-        if df[0] != "array":  # only handle xref specs
-            return None
-        df_xref = int(df[1][1:-1].replace("0 R", ""))
-        if (type(widths) is not str or not widths) and doc.xref_get_key(df_xref, "W")[
-            0
-        ] != "null":
-            doc.xref_set_key(df_xref, "W", "null")
-        else:
-            doc.xref_set_key(df_xref, "W", widths)
-        if (type(dwidths) is not str or not dwidths) and doc.xref_get_key(
-            df_xref, "DW"
-        )[0] != "null":
-            doc.xref_set_key(df_xref, "DW", "null")
-        else:
-            doc.xref_set_key(df_xref, "DW", dwidths)
-        return None
-
-    def set_subset_fontname(new_xref):
-        """Generate a name prefix to tag a font as subset.
-
-        We use a random generator to select 6 upper case ASCII characters.
-        The prefixed name must be put in the font xref as the "/BaseFont" value
-        and in the FontDescriptor object as the '/FontName' value.
-        """
-        # The following generates a prefix like 'ABCDEF+'
-        import random
-        import string
-        prefix = "".join(random.choices(tuple(string.ascii_uppercase), k=6)) + "+"
-        font_str = doc.xref_object(new_xref, compressed=True)
-        font_str = font_str.replace("/BaseFont/", "/BaseFont/" + prefix)
-        df = doc.xref_get_key(new_xref, "DescendantFonts")
-        if df[0] == "array":
-            df_xref = int(df[1][1:-1].replace("0 R", ""))
-            fd = doc.xref_get_key(df_xref, "FontDescriptor")
-            if fd[0] == "xref":
-                fd_xref = int(fd[1].replace("0 R", ""))
-                fd_str = doc.xref_object(fd_xref, compressed=True)
-                fd_str = fd_str.replace("/FontName/", "/FontName/" + prefix)
-                doc.update_object(fd_xref, fd_str)
-        doc.update_object(new_xref, font_str)
-
-    def build_subset(buffer, unc_set, gid_set):
-        """Build font subset using fontTools.
-
-        Args:
-            buffer: (bytes) the font given as a binary buffer.
-            unc_set: (set) required glyph ids.
-        Returns:
-            Either None if subsetting is unsuccessful or the subset font buffer.
-        """
-        try:
-            import fontTools.subset as fts
-        except ImportError:
-            if g_exceptions_verbose:    pymupdf.exception_info()
-            pymupdf.message("This method requires fontTools to be installed.")
-            raise
-        import tempfile
-        with tempfile.TemporaryDirectory() as tmp_dir:
-            oldfont_path = f"{tmp_dir}/oldfont.ttf"
-            newfont_path = f"{tmp_dir}/newfont.ttf"
-            uncfile_path = f"{tmp_dir}/uncfile.txt"
-            args = [
-                oldfont_path,
-                "--retain-gids",
-                f"--output-file={newfont_path}",
-                "--layout-features=*",
-                "--passthrough-tables",
-                "--ignore-missing-glyphs",
-                "--ignore-missing-unicodes",
-                "--symbol-cmap",
-            ]
-
-            # store glyph ids or unicodes as file
-            with open(f"{tmp_dir}/uncfile.txt", "w", encoding='utf8') as unc_file:
-                if 0xFFFD in unc_set:  # error unicode exists -> use glyphs
-                    args.append(f"--gids-file={uncfile_path}")
-                    gid_set.add(189)
-                    unc_list = list(gid_set)
-                    for unc in unc_list:
-                        unc_file.write("%i\n" % unc)
-                else:
-                    args.append(f"--unicodes-file={uncfile_path}")
-                    unc_set.add(255)
-                    unc_list = list(unc_set)
-                    for unc in unc_list:
-                        unc_file.write("%04x\n" % unc)
-
-            # store fontbuffer as a file
-            with open(oldfont_path, "wb") as fontfile:
-                fontfile.write(buffer)
-            try:
-                os.remove(newfont_path)  # remove old file
-            except Exception:
-                pass
-            try:  # invoke fontTools subsetter
-                fts.main(args)
-                font = pymupdf.Font(fontfile=newfont_path)
-                new_buffer = font.buffer  # subset font binary
-                if font.glyph_count == 0:  # intercept empty font
-                    new_buffer = None
-            except Exception:
-                pymupdf.exception_info()
-                new_buffer = None
-        return new_buffer
-
-    def repl_fontnames(doc):
-        """Populate 'font_buffers'.
-
-        For each font candidate, store its xref and the list of names
-        by which PDF text may refer to it (there may be multiple).
-        """
-
-        def norm_name(name):
-            """Recreate font name that contains PDF hex codes.
-
-            E.g. #20 -> space, chr(32)
-            """
-            while "#" in name:
-                p = name.find("#")
-                c = int(name[p + 1 : p + 3], 16)
-                name = name.replace(name[p : p + 3], chr(c))
-            return name
-
-        def get_fontnames(doc, item):
-            """Return a list of fontnames for an item of page.get_fonts().
-
-            There may be multiple names e.g. for Type0 fonts.
-            """
-            fontname = item[3]
-            names = [fontname]
-            fontname = doc.xref_get_key(item[0], "BaseFont")[1][1:]
-            fontname = norm_name(fontname)
-            if fontname not in names:
-                names.append(fontname)
-            descendents = doc.xref_get_key(item[0], "DescendantFonts")
-            if descendents[0] != "array":
-                return names
-            descendents = descendents[1][1:-1]
-            if descendents.endswith(" 0 R"):
-                xref = int(descendents[:-4])
-                descendents = doc.xref_object(xref, compressed=True)
-            p1 = descendents.find("/BaseFont")
-            if p1 >= 0:
-                p2 = descendents.find("/", p1 + 1)
-                p1 = min(descendents.find("/", p2 + 1), descendents.find(">>", p2 + 1))
-                fontname = descendents[p2 + 1 : p1]
-                fontname = norm_name(fontname)
-                if fontname not in names:
-                    names.append(fontname)
-            return names
-
-        for i in range(doc.page_count):
-            for f in doc.get_page_fonts(i, full=True):
-                font_xref = f[0]  # font xref
-                font_ext = f[1]  # font file extension
-                basename = f[3]  # font basename
-
-                if font_ext not in (  # skip if not supported by fontTools
-                    "otf",
-                    "ttf",
-                    "woff",
-                    "woff2",
-                ):
-                    continue
-                # skip fonts which already are subsets
-                if len(basename) > 6 and basename[6] == "+":
-                    continue
-
-                extr = doc.extract_font(font_xref)
-                fontbuffer = extr[-1]
-                names = get_fontnames(doc, f)
-                name_set, xref_set, subsets = font_buffers.get(
-                    fontbuffer, (set(), set(), (set(), set()))
-                )
-                xref_set.add(font_xref)
-                for name in names:
-                    name_set.add(name)
-                font = pymupdf.Font(fontbuffer=fontbuffer)
-                name_set.add(font.name)
-                del font
-                font_buffers[fontbuffer] = (name_set, xref_set, subsets)
-
-    def find_buffer_by_name(name):
-        for buffer, (name_set, _, _) in font_buffers.items():
-            if name in name_set:
-                return buffer
-        return None
-
-    # -----------------
-    # main function
-    # -----------------
-    repl_fontnames(doc)  # populate font information
-    if not font_buffers:  # nothing found to do
-        if verbose:
-            pymupdf.message(f'No fonts to subset.')
-        return 0
-
-    old_fontsize = 0
-    new_fontsize = 0
-    for fontbuffer in font_buffers.keys():
-        old_fontsize += len(fontbuffer)
-
-    # Scan page text for usage of subsettable fonts
-    for page in doc:
-        # go through the text and extend set of used glyphs by font
-        # we use a modified MuPDF trace device, which delivers us glyph ids.
-        for span in page.get_texttrace():
-            if type(span) is not dict:  # skip useless information
-                continue
-            fontname = span["font"][:33]  # fontname for the span
-            buffer = find_buffer_by_name(fontname)
-            if buffer is None:
-                continue
-            name_set, xref_set, (set_ucs, set_gid) = font_buffers[buffer]
-            for c in span["chars"]:
-                set_ucs.add(c[0])  # unicode
-                set_gid.add(c[1])  # glyph id
-            font_buffers[buffer] = (name_set, xref_set, (set_ucs, set_gid))
-
-    # build the font subsets
-    for old_buffer, (name_set, xref_set, subsets) in font_buffers.items():
-        new_buffer = build_subset(old_buffer, subsets[0], subsets[1])
-        fontname = list(name_set)[0]
-        if new_buffer is None or len(new_buffer) >= len(old_buffer):
-            # subset was not created or did not get smaller
-            if verbose:
-                pymupdf.message(f'Cannot subset {fontname!r}.')
-            continue
-        if verbose:
-            pymupdf.message(f"Built subset of font {fontname!r}.")
-        val = doc._insert_font(fontbuffer=new_buffer)  # store subset font in PDF
-        new_xref = val[0]  # get its xref
-        set_subset_fontname(new_xref)  # tag fontname as subset font
-        font_str = doc.xref_object(  # get its object definition
-            new_xref,
-            compressed=True,
-        )
-        # walk through the original font xrefs and replace each by the subset def
-        for font_xref in xref_set:
-            # we need the original '/W' and '/DW' width values
-            width_table, def_width = get_old_widths(font_xref)
-            # ... and replace original font definition at xref with it
-            doc.update_object(font_xref, font_str)
-            # now copy over old '/W' and '/DW' values
-            if width_table or def_width:
-                set_old_widths(font_xref, width_table, def_width)
-        # 'new_xref' remains unused in the PDF and must be removed
-        # by garbage collection.
-        new_fontsize += len(new_buffer)
-
-    return old_fontsize - new_fontsize
-
-
-# -------------------------------------------------------------------
-# Copy XREF object to another XREF
-# -------------------------------------------------------------------
-def xref_copy(doc: pymupdf.Document, source: int, target: int, *, keep: list = None) -> None:
-    """Copy a PDF dictionary object to another one given their xref numbers.
-
-    Args:
-        doc: PDF document object
-        source: source xref number
-        target: target xref number, the xref must already exist
-        keep: an optional list of 1st level keys in target that should not be
-              removed before copying.
-    Notes:
-        This works similar to the copy() method of dictionaries in Python. The
-        source may be a stream object.
-    """
-    if doc.xref_is_stream(source):
-        # read new xref stream, maintaining compression
-        stream = doc.xref_stream_raw(source)
-        doc.update_stream(
-            target,
-            stream,
-            compress=False,  # keeps source compression
-            new=True,  # in case target is no stream
-        )
-
-    # empty the target completely, observe exceptions
-    if keep is None:
-        keep = []
-    for key in doc.xref_get_keys(target):
-        if key in keep:
-            continue
-        doc.xref_set_key(target, key, "null")
-    # copy over all source dict items
-    for key in doc.xref_get_keys(source):
-        item = doc.xref_get_key(source, key)
-        doc.xref_set_key(target, key, item[1])
author	Franz Glasner <fzglas.hg@dom66.de>
date	Sat, 11 Oct 2025 15:24:40 +0200
parents	a6bc019ac0b2
children