Mercurial > hgrepos > Python2 > PyMuPDF

diff src/utils.py @ 1:1d09e1dec1d9 upstream
ADD: PyMuPDF v1.26.4: the original sdist. It does not yet contain MuPDF. This normally will be downloaded when building PyMuPDF.
author: Franz Glasner <fzglas.hg@dom66.de>
date: Mon, 15 Sep 2025 11:37:51 +0200
children: a6bc019ac0b2
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/utils.py	Mon Sep 15 11:37:51 2025 +0200
@@ -0,0 +1,5679 @@
+# ------------------------------------------------------------------------
+# Copyright 2020-2022, Harald Lieder, mailto:harald.lieder@outlook.com
+# License: GNU AFFERO GPL 3.0, https://www.gnu.org/licenses/agpl-3.0.html
+#
+# Part of "PyMuPDF", a Python binding for "MuPDF" (http://mupdf.com), a
+# lightweight PDF, XPS, and E-book viewer, renderer and toolkit which is
+# maintained and developed by Artifex Software, Inc. https://artifex.com.
+# ------------------------------------------------------------------------
+import io
+import math
+import os
+import typing
+import weakref
+
+try:
+    from . import pymupdf
+except Exception:
+    import pymupdf
+try:
+    from . import mupdf
+except Exception:
+    import mupdf
+
+_format_g = pymupdf.format_g
+
+g_exceptions_verbose = pymupdf.g_exceptions_verbose
+
+point_like = "point_like"
+rect_like = "rect_like"
+matrix_like = "matrix_like"
+quad_like = "quad_like"
+
+# ByteString is gone from typing in 3.14.
+# collections.abc.Buffer available from 3.12 only
+try:
+    ByteString = typing.ByteString
+except AttributeError:
+    # pylint: disable=unsupported-binary-operation
+    ByteString = bytes | bytearray | memoryview
+
+AnyType = typing.Any
+OptInt = typing.Union[int, None]
+OptFloat = typing.Optional[float]
+OptStr = typing.Optional[str]
+OptDict = typing.Optional[dict]
+OptBytes = typing.Optional[ByteString]
+OptSeq = typing.Optional[typing.Sequence]
+
+"""
+This is a collection of functions to extend PyMupdf.
+"""
+
+
+def write_text(
+        page: pymupdf.Page,
+        rect=None,
+        writers=None,
+        overlay=True,
+        color=None,
+        opacity=None,
+        keep_proportion=True,
+        rotate=0,
+        oc=0,
+        ) -> None:
+    """Write the text of one or more pymupdf.TextWriter objects.
+
+    Args:
+        rect: target rectangle. If None, the union of the text writers is used.
+        writers: one or more pymupdf.TextWriter objects.
+        overlay: put in foreground or background.
+        keep_proportion: maintain aspect ratio of rectangle sides.
+        rotate: arbitrary rotation angle.
+        oc: the xref of an optional content object
+    """
+    assert isinstance(page, pymupdf.Page)
+    if not writers:
+        raise ValueError("need at least one pymupdf.TextWriter")
+    if type(writers) is pymupdf.TextWriter:
+        if rotate == 0 and rect is None:
+            writers.write_text(page, opacity=opacity, color=color, overlay=overlay)
+            return None
+        else:
+            writers = (writers,)
+    clip = writers[0].text_rect
+    textdoc = pymupdf.Document()
+    tpage = textdoc.new_page(width=page.rect.width, height=page.rect.height)
+    for writer in writers:
+        clip |= writer.text_rect
+        writer.write_text(tpage, opacity=opacity, color=color)
+    if rect is None:
+        rect = clip
+    page.show_pdf_page(
+        rect,
+        textdoc,
+        0,
+        overlay=overlay,
+        keep_proportion=keep_proportion,
+        rotate=rotate,
+        clip=clip,
+        oc=oc,
+    )
+    textdoc = None
+    tpage = None
+
+
+def show_pdf_page(
+        page,
+        rect,
+        docsrc,
+        pno=0,
+        keep_proportion=True,
+        overlay=True,
+        oc=0,
+        rotate=0,
+        clip=None,
+        ) -> int:
+    """Show page number 'pno' of PDF 'docsrc' in rectangle 'rect'.
+
+    Args:
+        rect: (rect-like) where to place the source image
+        docsrc: (document) source PDF
+        pno: (int) source page number
+        keep_proportion: (bool) do not change width-height-ratio
+        overlay: (bool) put in foreground
+        oc: (xref) make visibility dependent on this OCG / OCMD (which must be defined in the target PDF)
+        rotate: (int) degrees (multiple of 90)
+        clip: (rect-like) part of source page rectangle
+    Returns:
+        xref of inserted object (for reuse)
+    """
+    def calc_matrix(sr, tr, keep=True, rotate=0):
+        """Calculate transformation matrix from source to target rect.
+
+        Notes:
+            The product of four matrices in this sequence: (1) translate correct
+            source corner to origin, (2) rotate, (3) scale, (4) translate to
+            target's top-left corner.
+        Args:
+            sr: source rect in PDF (!) coordinate system
+            tr: target rect in PDF coordinate system
+            keep: whether to keep source ratio of width to height
+            rotate: rotation angle in degrees
+        Returns:
+            Transformation matrix.
+        """
+        # calc center point of source rect
+        smp = (sr.tl + sr.br) / 2.0
+        # calc center point of target rect
+        tmp = (tr.tl + tr.br) / 2.0
+
+        # m moves to (0, 0), then rotates
+        m = pymupdf.Matrix(1, 0, 0, 1, -smp.x, -smp.y) * pymupdf.Matrix(rotate)
+
+        sr1 = sr * m  # resulting source rect to calculate scale factors
+
+        fw = tr.width / sr1.width  # scale the width
+        fh = tr.height / sr1.height  # scale the height
+        if keep:
+            fw = fh = min(fw, fh)  # take min if keeping aspect ratio
+
+        m *= pymupdf.Matrix(fw, fh)  # concat scale matrix
+        m *= pymupdf.Matrix(1, 0, 0, 1, tmp.x, tmp.y)  # concat move to target center
+        return pymupdf.JM_TUPLE(m)
+
+    pymupdf.CheckParent(page)
+    doc = page.parent
+
+    if not doc.is_pdf or not docsrc.is_pdf:
+        raise ValueError("is no PDF")
+
+    if rect.is_empty or rect.is_infinite:
+        raise ValueError("rect must be finite and not empty")
+
+    while pno < 0:  # support negative page numbers
+        pno += docsrc.page_count
+    src_page = docsrc[pno]  # load source page
+
+    tar_rect = rect * ~page.transformation_matrix  # target rect in PDF coordinates
+
+    src_rect = src_page.rect if not clip else src_page.rect & clip  # source rect
+    if src_rect.is_empty or src_rect.is_infinite:
+        raise ValueError("clip must be finite and not empty")
+    src_rect = src_rect * ~src_page.transformation_matrix  # ... in PDF coord
+
+    matrix = calc_matrix(src_rect, tar_rect, keep=keep_proportion, rotate=rotate)
+
+    # list of existing /Form /XObjects
+    ilst = [i[1] for i in doc.get_page_xobjects(page.number)]
+    ilst += [i[7] for i in doc.get_page_images(page.number)]
+    ilst += [i[4] for i in doc.get_page_fonts(page.number)]
+
+    # create a name not in that list
+    n = "fzFrm"
+    i = 0
+    _imgname = n + "0"
+    while _imgname in ilst:
+        i += 1
+        _imgname = n + str(i)
+
+    isrc = docsrc._graft_id  # used as key for graftmaps
+    if doc._graft_id == isrc:
+        raise ValueError("source document must not equal target")
+
+    # retrieve / make pymupdf.Graftmap for source PDF
+    gmap = doc.Graftmaps.get(isrc, None)
+    if gmap is None:
+        gmap = pymupdf.Graftmap(doc)
+        doc.Graftmaps[isrc] = gmap
+
+    # take note of generated xref for automatic reuse
+    pno_id = (isrc, pno)  # id of docsrc[pno]
+    xref = doc.ShownPages.get(pno_id, 0)
+
+    if overlay:
+        page.wrap_contents()  # ensure a balanced graphics state
+    xref = page._show_pdf_page(
+        src_page,
+        overlay=overlay,
+        matrix=matrix,
+        xref=xref,
+        oc=oc,
+        clip=src_rect,
+        graftmap=gmap,
+        _imgname=_imgname,
+    )
+    doc.ShownPages[pno_id] = xref
+
+    return xref
+
+
+def replace_image(page: pymupdf.Page, xref: int, *, filename=None, pixmap=None, stream=None):
+    """Replace the image referred to by xref.
+
+    Replace the image by changing the object definition stored under xref. This
+    will leave the pages appearance instructions intact, so the new image is
+    being displayed with the same bbox, rotation etc.
+    By providing a small fully transparent image, an effect as if the image had
+    been deleted can be achieved.
+    A typical use may include replacing large images by a smaller version,
+    e.g. with a lower resolution or graylevel instead of colored.
+
+    Args:
+        xref: the xref of the image to replace.
+        filename, pixmap, stream: exactly one of these must be provided. The
+            meaning being the same as in Page.insert_image.
+    """
+    doc = page.parent  # the owning document
+    if not doc.xref_is_image(xref):
+        raise ValueError("xref not an image")  # insert new image anywhere in page
+    if bool(filename) + bool(stream) + bool(pixmap) != 1:
+        raise ValueError("Exactly one of filename/stream/pixmap must be given")
+    new_xref = page.insert_image(
+        page.rect, filename=filename, stream=stream, pixmap=pixmap
+    )
+    doc.xref_copy(new_xref, xref)  # copy over new to old
+    last_contents_xref = page.get_contents()[-1]
+    # new image insertion has created a new /Contents source,
+    # which we will set to spaces now
+    doc.update_stream(last_contents_xref, b" ")
+    page._image_info = None  # clear cache of extracted image information
+
+
+def delete_image(page: pymupdf.Page, xref: int):
+    """Delete the image referred to by xef.
+
+    Actually replaces by a small transparent Pixmap using method Page.replace_image.
+
+    Args:
+        xref: xref of the image to delete.
+    """
+    # make a small 100% transparent pixmap (of just any dimension)
+    pix = pymupdf.Pixmap(pymupdf.csGRAY, (0, 0, 1, 1), 1)
+    pix.clear_with()  # clear all samples bytes to 0x00
+    page.replace_image(xref, pixmap=pix)
+
+
+def insert_image(
+        page,
+        rect,
+        *,
+        alpha=-1,
+        filename=None,
+        height=0,
+        keep_proportion=True,
+        mask=None,
+        oc=0,
+        overlay=True,
+        pixmap=None,
+        rotate=0,
+        stream=None,
+        width=0,
+        xref=0,
+        ):
+    """Insert an image for display in a rectangle.
+
+    Args:
+        rect: (rect_like) position of image on the page.
+        alpha: (int, optional) set to 0 if image has no transparency.
+        filename: (str, Path, file object) image filename.
+        height: (int)
+        keep_proportion: (bool) keep width / height ratio (default).
+        mask: (bytes, optional) image consisting of alpha values to use.
+        oc: (int) xref of OCG or OCMD to declare as Optional Content.
+        overlay: (bool) put in foreground (default) or background.
+        pixmap: (pymupdf.Pixmap) use this as image.
+        rotate: (int) rotate by 0, 90, 180 or 270 degrees.
+        stream: (bytes) use this as image.
+        width: (int)
+        xref: (int) use this as image.
+
+    'page' and 'rect' are positional, all other parameters are keywords.
+
+    If 'xref' is given, that image is used. Other input options are ignored.
+    Else, exactly one of pixmap, stream or filename must be given.
+
+    'alpha=0' for non-transparent images improves performance significantly.
+    Affects stream and filename only.
+
+    Optimum transparent insertions are possible by using filename / stream in
+    conjunction with a 'mask' image of alpha values.
+
+    Returns:
+        xref (int) of inserted image. Re-use as argument for multiple insertions.
+    """
+    pymupdf.CheckParent(page)
+    doc = page.parent
+    if not doc.is_pdf:
+        raise ValueError("is no PDF")
+
+    if xref == 0 and (bool(filename) + bool(stream) + bool(pixmap) != 1):
+        raise ValueError("xref=0 needs exactly one of filename, pixmap, stream")
+
+    if filename:
+        if type(filename) is str:
+            pass
+        elif hasattr(filename, "absolute"):
+            filename = str(filename)
+        elif hasattr(filename, "name"):
+            filename = filename.name
+        else:
+            raise ValueError("bad filename")
+
+    if filename and not os.path.exists(filename):
+        raise FileNotFoundError("No such file: '%s'" % filename)
+    elif stream and type(stream) not in (bytes, bytearray, io.BytesIO):
+        raise ValueError("stream must be bytes-like / BytesIO")
+    elif pixmap and type(pixmap) is not pymupdf.Pixmap:
+        raise ValueError("pixmap must be a pymupdf.Pixmap")
+    if mask and not (stream or filename):
+        raise ValueError("mask requires stream or filename")
+    if mask and type(mask) not in (bytes, bytearray, io.BytesIO):
+        raise ValueError("mask must be bytes-like / BytesIO")
+    while rotate < 0:
+        rotate += 360
+    while rotate >= 360:
+        rotate -= 360
+    if rotate not in (0, 90, 180, 270):
+        raise ValueError("bad rotate value")
+
+    r = pymupdf.Rect(rect)
+    if r.is_empty or r.is_infinite:
+        raise ValueError("rect must be finite and not empty")
+    clip = r * ~page.transformation_matrix
+
+    # Create a unique image reference name.
+    ilst = [i[7] for i in doc.get_page_images(page.number)]
+    ilst += [i[1] for i in doc.get_page_xobjects(page.number)]
+    ilst += [i[4] for i in doc.get_page_fonts(page.number)]
+    n = "fzImg"  # 'pymupdf image'
+    i = 0
+    _imgname = n + "0"  # first name candidate
+    while _imgname in ilst:
+        i += 1
+        _imgname = n + str(i)  # try new name
+
+    if overlay:
+        page.wrap_contents()  # ensure a balanced graphics state
+    digests = doc.InsertedImages
+    xref, digests = page._insert_image(
+        filename=filename,
+        pixmap=pixmap,
+        stream=stream,
+        imask=mask,
+        clip=clip,
+        overlay=overlay,
+        oc=oc,
+        xref=xref,
+        rotate=rotate,
+        keep_proportion=keep_proportion,
+        width=width,
+        height=height,
+        alpha=alpha,
+        _imgname=_imgname,
+        digests=digests,
+    )
+    if digests is not None:
+        doc.InsertedImages = digests
+
+    return xref
+
+
+def search_for(
+        page,
+        text,
+        *,
+        clip=None,
+        quads=False,
+        flags=pymupdf.TEXT_DEHYPHENATE
+            | pymupdf.TEXT_PRESERVE_WHITESPACE
+            | pymupdf.TEXT_PRESERVE_LIGATURES
+            | pymupdf.TEXT_MEDIABOX_CLIP
+            ,
+        textpage=None,
+        ) -> list:
+    """Search for a string on a page.
+
+    Args:
+        text: string to be searched for
+        clip: restrict search to this rectangle
+        quads: (bool) return quads instead of rectangles
+        flags: bit switches, default: join hyphened words
+        textpage: a pre-created pymupdf.TextPage
+    Returns:
+        a list of rectangles or quads, each containing one occurrence.
+    """
+    if clip is not None:
+        clip = pymupdf.Rect(clip)
+
+    pymupdf.CheckParent(page)
+    tp = textpage
+    if tp is None:
+        tp = page.get_textpage(clip=clip, flags=flags)  # create pymupdf.TextPage
+    elif getattr(tp, "parent") != page:
+        raise ValueError("not a textpage of this page")
+    rlist = tp.search(text, quads=quads)
+    if textpage is None:
+        del tp
+    return rlist
+
+
+def search_page_for(
+    doc: pymupdf.Document,
+    pno: int,
+    text: str,
+    quads: bool = False,
+    clip: rect_like = None,
+    flags: int = pymupdf.TEXT_DEHYPHENATE
+            | pymupdf.TEXT_PRESERVE_LIGATURES
+            | pymupdf.TEXT_PRESERVE_WHITESPACE
+            | pymupdf.TEXT_MEDIABOX_CLIP
+            ,
+    textpage: pymupdf.TextPage = None,
+) -> list:
+    """Search for a string on a page.
+
+    Args:
+        pno: page number
+        text: string to be searched for
+        clip: restrict search to this rectangle
+        quads: (bool) return quads instead of rectangles
+        flags: bit switches, default: join hyphened words
+        textpage: reuse a prepared textpage
+    Returns:
+        a list of rectangles or quads, each containing an occurrence.
+    """
+
+    return doc[pno].search_for(
+        text,
+        quads=quads,
+        clip=clip,
+        flags=flags,
+        textpage=textpage,
+    )
+
+
+def get_text_blocks(
+    page: pymupdf.Page,
+    clip: rect_like = None,
+    flags: OptInt = None,
+    textpage: pymupdf.TextPage = None,
+    sort: bool = False,
+) -> list:
+    """Return the text blocks on a page.
+
+    Notes:
+        Lines in a block are concatenated with line breaks.
+    Args:
+        flags: (int) control the amount of data parsed into the textpage.
+    Returns:
+        A list of the blocks. Each item contains the containing rectangle
+        coordinates, text lines, running block number and block type.
+    """
+    pymupdf.CheckParent(page)
+    if flags is None:
+        flags = pymupdf.TEXTFLAGS_BLOCKS
+    tp = textpage
+    if tp is None:
+        tp = page.get_textpage(clip=clip, flags=flags)
+    elif getattr(tp, "parent") != page:
+        raise ValueError("not a textpage of this page")
+
+    blocks = tp.extractBLOCKS()
+    if textpage is None:
+        del tp
+    if sort:
+        blocks.sort(key=lambda b: (b[3], b[0]))
+    return blocks
+
+
+def get_text_words(
+    page: pymupdf.Page,
+    clip: rect_like = None,
+    flags: OptInt = None,
+    textpage: pymupdf.TextPage = None,
+    sort: bool = False,
+    delimiters=None,
+    tolerance=3,
+) -> list:
+    """Return the text words as a list with the bbox for each word.
+
+    Args:
+        page: pymupdf.Page
+        clip: (rect-like) area on page to consider
+        flags: (int) control the amount of data parsed into the textpage.
+        textpage: (pymupdf.TextPage) either passed-in or None.
+        sort: (bool) sort the words in reading sequence.
+        delimiters: (str,list) characters to use as word delimiters.
+        tolerance: (float) consider words to be part of the same line if
+            top or bottom coordinate are not larger than this. Relevant
+            only if sort=True.
+
+    Returns:
+        Word tuples (x0, y0, x1, y1, "word", bno, lno, wno).
+    """
+
+    def sort_words(words):
+        """Sort words line-wise, forgiving small deviations."""
+        words.sort(key=lambda w: (w[3], w[0]))
+        nwords = []  # final word list
+        line = [words[0]]  # collects words roughly in same line
+        lrect = pymupdf.Rect(words[0][:4])  # start the line rectangle
+        for w in words[1:]:
+            wrect = pymupdf.Rect(w[:4])
+            if (
+                abs(wrect.y0 - lrect.y0) <= tolerance
+                or abs(wrect.y1 - lrect.y1) <= tolerance
+            ):
+                line.append(w)
+                lrect |= wrect
+            else:
+                line.sort(key=lambda w: w[0])  # sort words in line l-t-r
+                nwords.extend(line)  # append to final words list
+                line = [w]  # start next line
+                lrect = wrect  # start next line rect
+
+        line.sort(key=lambda w: w[0])  # sort words in line l-t-r
+        nwords.extend(line)  # append to final words list
+
+        return nwords
+
+    pymupdf.CheckParent(page)
+    if flags is None:
+        flags = pymupdf.TEXTFLAGS_WORDS
+    tp = textpage
+    if tp is None:
+        tp = page.get_textpage(clip=clip, flags=flags)
+    elif getattr(tp, "parent") != page:
+        raise ValueError("not a textpage of this page")
+
+    words = tp.extractWORDS(delimiters)
+
+    # if textpage was given, we subselect the words in clip
+    if textpage is not None and clip is not None:
+        # sub-select words contained in clip
+        clip = pymupdf.Rect(clip)
+        words = [
+            w for w in words if abs(clip & w[:4]) >= 0.5 * abs(pymupdf.Rect(w[:4]))
+        ]
+
+    if textpage is None:
+        del tp
+    if words and sort:
+        # advanced sort if any words found
+        words = sort_words(words)
+
+    return words
+
+
+def get_sorted_text(
+    page: pymupdf.Page,
+    clip: rect_like = None,
+    flags: OptInt = None,
+    textpage: pymupdf.TextPage = None,
+    tolerance=3,
+) -> str:
+    """Extract plain text avoiding unacceptable line breaks.
+
+    Text contained in clip will be sorted in reading sequence. Some effort
+    is also spent to simulate layout vertically and horizontally.
+
+    Args:
+        page: pymupdf.Page
+        clip: (rect-like) only consider text inside
+        flags: (int) text extraction flags
+        textpage: pymupdf.TextPage
+        tolerance: (float) consider words to be on the same line if their top
+            or bottom coordinates do not differ more than this.
+
+    Notes:
+        If a TextPage is provided, all text is checked for being inside clip
+        with at least 50% of its bbox.
+        This allows to use some "global" TextPage in conjunction with sub-
+        selecting words in parts of the defined TextPage rectangle.
+
+    Returns:
+        A text string in reading sequence. Left indentation of each line,
+        inter-line and inter-word distances strive to reflect the layout.
+    """
+
+    def line_text(clip, line):
+        """Create the string of one text line.
+
+        We are trying to simulate some horizontal layout here, too.
+
+        Args:
+            clip: (pymupdf.Rect) the area from which all text is being read.
+            line: (list) word tuples (rect, text) contained in the line
+        Returns:
+            Text in this line. Generated from words in 'line'. Distance from
+            predecessor is translated to multiple spaces, thus simulating
+            text indentations and large horizontal distances.
+        """
+        line.sort(key=lambda w: w[0].x0)
+        ltext = ""  # text in the line
+        x1 = clip.x0  # end coordinate of ltext
+        lrect = pymupdf.EMPTY_RECT()  # bbox of this line
+        for r, t in line:
+            lrect |= r  # update line bbox
+            # convert distance to previous word to multiple spaces
+            dist = max(
+                int(round((r.x0 - x1) / r.width * len(t))),
+                0 if (x1 == clip.x0 or r.x0 <= x1) else 1,
+            )  # number of space characters
+
+            ltext += " " * dist + t  # append word string
+            x1 = r.x1  # update new end position
+        return ltext
+
+    # Extract words in correct sequence first.
+    words = [
+        (pymupdf.Rect(w[:4]), w[4])
+        for w in get_text_words(
+            page,
+            clip=clip,
+            flags=flags,
+            textpage=textpage,
+            sort=True,
+            tolerance=tolerance,
+        )
+    ]
+
+    if not words:  # no text present
+        return ""
+    totalbox = pymupdf.EMPTY_RECT()  # area covering all text
+    for wr, text in words:
+        totalbox |= wr
+
+    lines = []  # list of reconstituted lines
+    line = [words[0]]  # current line
+    lrect = words[0][0]  # the line's rectangle
+
+    # walk through the words
+    for wr, text in words[1:]:  # start with second word
+        w0r, _ = line[-1]  # read previous word in current line
+
+        # if this word matches top or bottom of the line, append it
+        if abs(lrect.y0 - wr.y0) <= tolerance or abs(lrect.y1 - wr.y1) <= tolerance:
+            line.append((wr, text))
+            lrect |= wr
+        else:
+            # output current line and re-initialize
+            ltext = line_text(totalbox, line)
+            lines.append((lrect, ltext))
+            line = [(wr, text)]
+            lrect = wr
+
+    # also append unfinished last line
+    ltext = line_text(totalbox, line)
+    lines.append((lrect, ltext))
+
+    # sort all lines vertically
+    lines.sort(key=lambda l: (l[0].y1))
+
+    text = lines[0][1]  # text of first line
+    y1 = lines[0][0].y1  # its bottom coordinate
+    for lrect, ltext in lines[1:]:
+        distance = min(int(round((lrect.y0 - y1) / lrect.height)), 5)
+        breaks = "\n" * (distance + 1)
+        text += breaks + ltext
+        y1 = lrect.y1
+
+    # return text in clip
+    return text
+
+
+def get_textbox(
+    page: pymupdf.Page,
+    rect: rect_like,
+    textpage: pymupdf.TextPage = None,
+) -> str:
+    tp = textpage
+    if tp is None:
+        tp = page.get_textpage()
+    elif getattr(tp, "parent") != page:
+        raise ValueError("not a textpage of this page")
+    rc = tp.extractTextbox(rect)
+    if textpage is None:
+        del tp
+    return rc
+
+
+def get_text_selection(
+    page: pymupdf.Page,
+    p1: point_like,
+    p2: point_like,
+    clip: rect_like = None,
+    textpage: pymupdf.TextPage = None,
+):
+    pymupdf.CheckParent(page)
+    tp = textpage
+    if tp is None:
+        tp = page.get_textpage(clip=clip, flags=pymupdf.TEXT_DEHYPHENATE)
+    elif getattr(tp, "parent") != page:
+        raise ValueError("not a textpage of this page")
+    rc = tp.extractSelection(p1, p2)
+    if textpage is None:
+        del tp
+    return rc
+
+
+def get_textpage_ocr(
+    page: pymupdf.Page,
+    flags: int = 0,
+    language: str = "eng",
+    dpi: int = 72,
+    full: bool = False,
+    tessdata: str = None,
+) -> pymupdf.TextPage:
+    """Create a Textpage from combined results of normal and OCR text parsing.
+
+    Args:
+        flags: (int) control content becoming part of the result.
+        language: (str) specify expected language(s). Default is "eng" (English).
+        dpi: (int) resolution in dpi, default 72.
+        full: (bool) whether to OCR the full page image, or only its images (default)
+    """
+    pymupdf.CheckParent(page)
+    tessdata = pymupdf.get_tessdata(tessdata)
+
+    def full_ocr(page, dpi, language, flags):
+        zoom = dpi / 72
+        mat = pymupdf.Matrix(zoom, zoom)
+        pix = page.get_pixmap(matrix=mat)
+        ocr_pdf = pymupdf.Document(
+                "pdf",
+                pix.pdfocr_tobytes(
+                    compress=False,
+                    language=language,
+                    tessdata=tessdata,
+                    ),
+                )
+        ocr_page = ocr_pdf.load_page(0)
+        unzoom = page.rect.width / ocr_page.rect.width
+        ctm = pymupdf.Matrix(unzoom, unzoom) * page.derotation_matrix
+        tpage = ocr_page.get_textpage(flags=flags, matrix=ctm)
+        ocr_pdf.close()
+        pix = None
+        tpage.parent = weakref.proxy(page)
+        return tpage
+
+    # if OCR for the full page, OCR its pixmap @ desired dpi
+    if full:
+        return full_ocr(page, dpi, language, flags)
+
+    # For partial OCR, make a normal textpage, then extend it with text that
+    # is OCRed from each image.
+    # Because of this, we need the images flag bit set ON.
+    tpage = page.get_textpage(flags=flags)
+    for block in page.get_text("dict", flags=pymupdf.TEXT_PRESERVE_IMAGES)["blocks"]:
+        if block["type"] != 1:  # only look at images
+            continue
+        bbox = pymupdf.Rect(block["bbox"])
+        if bbox.width <= 3 or bbox.height <= 3:  # ignore tiny stuff
+            continue
+        try:
+            pix = pymupdf.Pixmap(block["image"])  # get image pixmap
+            if pix.n - pix.alpha != 3:  # we need to convert this to RGB!
+                pix = pymupdf.Pixmap(pymupdf.csRGB, pix)
+            if pix.alpha:  # must remove alpha channel
+                pix = pymupdf.Pixmap(pix, 0)
+            imgdoc = pymupdf.Document(
+                    "pdf",
+                    pix.pdfocr_tobytes(language=language, tessdata=tessdata),
+                    )  # pdf with OCRed page
+            imgpage = imgdoc.load_page(0)  # read image as a page
+            pix = None
+            # compute matrix to transform coordinates back to that of 'page'
+            imgrect = imgpage.rect  # page size of image PDF
+            shrink = pymupdf.Matrix(1 / imgrect.width, 1 / imgrect.height)
+            mat = shrink * block["transform"]
+            imgpage.extend_textpage(tpage, flags=0, matrix=mat)
+            imgdoc.close()
+        except (RuntimeError, mupdf.FzErrorBase):
+            if 0 and g_exceptions_verbose:
+                # Don't show exception info here because it can happen in
+                # normal operation (see test_3842b).
+                pymupdf.exception_info()
+            tpage = None
+            pymupdf.message("Falling back to full page OCR")
+            return full_ocr(page, dpi, language, flags)
+
+    return tpage
+
+
+def get_image_info(page: pymupdf.Page, hashes: bool = False, xrefs: bool = False) -> list:
+    """Extract image information only from a pymupdf.TextPage.
+
+    Args:
+        hashes: (bool) include MD5 hash for each image.
+        xrefs: (bool) try to find the xref for each image. Sets hashes to true.
+    """
+    doc = page.parent
+    if xrefs and doc.is_pdf:
+        hashes = True
+    if not doc.is_pdf:
+        xrefs = False
+    imginfo = getattr(page, "_image_info", None)
+    if imginfo and not xrefs:
+        return imginfo
+    if not imginfo:
+        tp = page.get_textpage(flags=pymupdf.TEXT_PRESERVE_IMAGES)
+        imginfo = tp.extractIMGINFO(hashes=hashes)
+        del tp
+        if hashes:
+            page._image_info = imginfo
+    if not xrefs or not doc.is_pdf:
+        return imginfo
+    imglist = page.get_images()
+    digests = {}
+    for item in imglist:
+        xref = item[0]
+        pix = pymupdf.Pixmap(doc, xref)
+        digests[pix.digest] = xref
+        del pix
+    for i in range(len(imginfo)):
+        item = imginfo[i]
+        xref = digests.get(item["digest"], 0)
+        item["xref"] = xref
+        imginfo[i] = item
+    return imginfo
+
+
+def get_image_rects(page: pymupdf.Page, name, transform=False) -> list:
+    """Return list of image positions on a page.
+
+    Args:
+        name: (str, list, int) image identification. May be reference name, an
+              item of the page's image list or an xref.
+        transform: (bool) whether to also return the transformation matrix.
+    Returns:
+        A list of pymupdf.Rect objects or tuples of (pymupdf.Rect, pymupdf.Matrix)
+        for all image locations on the page.
+    """
+    if type(name) in (list, tuple):
+        xref = name[0]
+    elif type(name) is int:
+        xref = name
+    else:
+        imglist = [i for i in page.get_images() if i[7] == name]
+        if imglist == []:
+            raise ValueError("bad image name")
+        elif len(imglist) != 1:
+            raise ValueError("multiple image names found")
+        xref = imglist[0][0]
+    pix = pymupdf.Pixmap(page.parent, xref)  # make pixmap of the image to compute MD5
+    digest = pix.digest
+    del pix
+    infos = page.get_image_info(hashes=True)
+    if not transform:
+        bboxes = [pymupdf.Rect(im["bbox"]) for im in infos if im["digest"] == digest]
+    else:
+        bboxes = [
+            (pymupdf.Rect(im["bbox"]), pymupdf.Matrix(im["transform"]))
+            for im in infos
+            if im["digest"] == digest
+        ]
+    return bboxes
+
+
+def get_text(
+    page: pymupdf.Page,
+    option: str = "text",
+    *,
+    clip: rect_like = None,
+    flags: OptInt = None,
+    textpage: pymupdf.TextPage = None,
+    sort: bool = False,
+    delimiters=None,
+    tolerance=3,
+):
+    """Extract text from a page or an annotation.
+
+    This is a unifying wrapper for various methods of the pymupdf.TextPage class.
+
+    Args:
+        option: (str) text, words, blocks, html, dict, json, rawdict, xhtml or xml.
+        clip: (rect-like) restrict output to this area.
+        flags: bit switches to e.g. exclude images or decompose ligatures.
+        textpage: reuse this pymupdf.TextPage and make no new one. If specified,
+            'flags' and 'clip' are ignored.
+
+    Returns:
+        the output of methods get_text_words / get_text_blocks or pymupdf.TextPage
+        methods extractText, extractHTML, extractDICT, extractJSON, extractRAWDICT,
+        extractXHTML or etractXML respectively.
+        Default and misspelling choice is "text".
+    """
+    formats = {
+        "text": pymupdf.TEXTFLAGS_TEXT,
+        "html": pymupdf.TEXTFLAGS_HTML,
+        "json": pymupdf.TEXTFLAGS_DICT,
+        "rawjson": pymupdf.TEXTFLAGS_RAWDICT,
+        "xml": pymupdf.TEXTFLAGS_XML,
+        "xhtml": pymupdf.TEXTFLAGS_XHTML,
+        "dict": pymupdf.TEXTFLAGS_DICT,
+        "rawdict": pymupdf.TEXTFLAGS_RAWDICT,
+        "words": pymupdf.TEXTFLAGS_WORDS,
+        "blocks": pymupdf.TEXTFLAGS_BLOCKS,
+    }
+    option = option.lower()
+    assert option in formats
+    if option not in formats:
+        option = "text"
+    if flags is None:
+        flags = formats[option]
+
+    if option == "words":
+        return get_text_words(
+            page,
+            clip=clip,
+            flags=flags,
+            textpage=textpage,
+            sort=sort,
+            delimiters=delimiters,
+        )
+    if option == "blocks":
+        return get_text_blocks(
+            page, clip=clip, flags=flags, textpage=textpage, sort=sort
+        )
+
+    if option == "text" and sort:
+        return get_sorted_text(
+            page,
+            clip=clip,
+            flags=flags,
+            textpage=textpage,
+            tolerance=tolerance,
+        )
+
+    pymupdf.CheckParent(page)
+    cb = None
+    if option in ("html", "xml", "xhtml"):  # no clipping for MuPDF functions
+        clip = page.cropbox
+    if clip is not None:
+        clip = pymupdf.Rect(clip)
+        cb = None
+    elif type(page) is pymupdf.Page:
+        cb = page.cropbox
+    # pymupdf.TextPage with or without images
+    tp = textpage
+    #pymupdf.exception_info()
+    if tp is None:
+        tp = page.get_textpage(clip=clip, flags=flags)
+    elif getattr(tp, "parent") != page:
+        raise ValueError("not a textpage of this page")
+    #pymupdf.log( '{option=}')
+    if option == "json":
+        t = tp.extractJSON(cb=cb, sort=sort)
+    elif option == "rawjson":
+        t = tp.extractRAWJSON(cb=cb, sort=sort)
+    elif option == "dict":
+        t = tp.extractDICT(cb=cb, sort=sort)
+    elif option == "rawdict":
+        t = tp.extractRAWDICT(cb=cb, sort=sort)
+    elif option == "html":
+        t = tp.extractHTML()
+    elif option == "xml":
+        t = tp.extractXML()
+    elif option == "xhtml":
+        t = tp.extractXHTML()
+    else:
+        t = tp.extractText(sort=sort)
+
+    if textpage is None:
+        del tp
+    return t
+
+
+def get_page_text(
+    doc: pymupdf.Document,
+    pno: int,
+    option: str = "text",
+    clip: rect_like = None,
+    flags: OptInt = None,
+    textpage: pymupdf.TextPage = None,
+    sort: bool = False,
+) -> typing.Any:
+    """Extract a document page's text by page number.
+
+    Notes:
+        Convenience function calling page.get_text().
+    Args:
+        pno: page number
+        option: (str) text, words, blocks, html, dict, json, rawdict, xhtml or xml.
+    Returns:
+        output from page.TextPage().
+    """
+    return doc[pno].get_text(option, clip=clip, flags=flags, sort=sort)
+
+def get_pixmap(
+        page: pymupdf.Page,
+        *,
+        matrix: matrix_like=pymupdf.Identity,
+        dpi=None,
+        colorspace: pymupdf.Colorspace=pymupdf.csRGB,
+        clip: rect_like=None,
+        alpha: bool=False,
+        annots: bool=True,
+        ) -> pymupdf.Pixmap:
+    """Create pixmap of page.
+
+    Keyword args:
+        matrix: Matrix for transformation (default: Identity).
+        dpi: desired dots per inch. If given, matrix is ignored.
+        colorspace: (str/Colorspace) cmyk, rgb, gray - case ignored, default csRGB.
+        clip: (irect-like) restrict rendering to this area.
+        alpha: (bool) whether to include alpha channel
+        annots: (bool) whether to also render annotations
+    """
+    if dpi:
+        zoom = dpi / 72
+        matrix = pymupdf.Matrix(zoom, zoom)
+
+    if type(colorspace) is str:
+        if colorspace.upper() == "GRAY":
+            colorspace = pymupdf.csGRAY
+        elif colorspace.upper() == "CMYK":
+            colorspace = pymupdf.csCMYK
+        else:
+            colorspace = pymupdf.csRGB
+    if colorspace.n not in (1, 3, 4):
+        raise ValueError("unsupported colorspace")
+
+    dl = page.get_displaylist(annots=annots)
+    pix = dl.get_pixmap(matrix=matrix, colorspace=colorspace, alpha=alpha, clip=clip)
+    dl = None
+    if dpi:
+        pix.set_dpi(dpi, dpi)
+    return pix
+
+
+def get_page_pixmap(
+    doc: pymupdf.Document,
+    pno: int,
+    *,
+    matrix: matrix_like = pymupdf.Identity,
+    dpi=None,
+    colorspace: pymupdf.Colorspace = pymupdf.csRGB,
+    clip: rect_like = None,
+    alpha: bool = False,
+    annots: bool = True,
+) -> pymupdf.Pixmap:
+    """Create pixmap of document page by page number.
+
+    Notes:
+        Convenience function calling page.get_pixmap.
+    Args:
+        pno: (int) page number
+        matrix: pymupdf.Matrix for transformation (default: pymupdf.Identity).
+        colorspace: (str,pymupdf.Colorspace) rgb, rgb, gray - case ignored, default csRGB.
+        clip: (irect-like) restrict rendering to this area.
+        alpha: (bool) include alpha channel
+        annots: (bool) also render annotations
+    """
+    return doc[pno].get_pixmap(
+            matrix=matrix,
+            dpi=dpi, colorspace=colorspace,
+            clip=clip,
+            alpha=alpha,
+            annots=annots
+            )
+
+
+def getLinkDict(ln, document=None) -> dict:
+    if isinstance(ln, pymupdf.Outline):
+        dest = ln.destination(document)
+    elif isinstance(ln, pymupdf.Link):
+        dest = ln.dest
+    else:
+        assert 0, f'Unexpected {type(ln)=}.'
+    nl = {"kind": dest.kind, "xref": 0}
+    try:
+        if hasattr(ln, 'rect'):
+            nl["from"] = ln.rect
+    except Exception:
+        # This seems to happen quite often in PyMuPDF/tests.
+        if g_exceptions_verbose >= 2:   pymupdf.exception_info()
+        pass
+    pnt = pymupdf.Point(0, 0)
+    if dest.flags & pymupdf.LINK_FLAG_L_VALID:
+        pnt.x = dest.lt.x
+    if dest.flags & pymupdf.LINK_FLAG_T_VALID:
+        pnt.y = dest.lt.y
+
+    if dest.kind == pymupdf.LINK_URI:
+        nl["uri"] = dest.uri
+
+    elif dest.kind == pymupdf.LINK_GOTO:
+        nl["page"] = dest.page
+        nl["to"] = pnt
+        if dest.flags & pymupdf.LINK_FLAG_R_IS_ZOOM:
+            nl["zoom"] = dest.rb.x
+        else:
+            nl["zoom"] = 0.0
+
+    elif dest.kind == pymupdf.LINK_GOTOR:
+        nl["file"] = dest.file_spec.replace("\\", "/")
+        nl["page"] = dest.page
+        if dest.page < 0:
+            nl["to"] = dest.dest
+        else:
+            nl["to"] = pnt
+            if dest.flags & pymupdf.LINK_FLAG_R_IS_ZOOM:
+                nl["zoom"] = dest.rb.x
+            else:
+                nl["zoom"] = 0.0
+
+    elif dest.kind == pymupdf.LINK_LAUNCH:
+        nl["file"] = dest.file_spec.replace("\\", "/")
+
+    elif dest.kind == pymupdf.LINK_NAMED:
+        # The dicts should not have same key(s).
+        assert not (dest.named.keys() & nl.keys())
+        nl.update(dest.named)
+        if 'to' in nl:
+            nl['to'] = pymupdf.Point(nl['to'])
+
+    else:
+        nl["page"] = dest.page
+    return nl
+
+
+def get_links(page: pymupdf.Page) -> list:
+    """Create a list of all links contained in a PDF page.
+
+    Notes:
+        see PyMuPDF ducmentation for details.
+    """
+
+    pymupdf.CheckParent(page)
+    ln = page.first_link
+    links = []
+    while ln:
+        nl = getLinkDict(ln, page.parent)
+        links.append(nl)
+        ln = ln.next
+    if links != [] and page.parent.is_pdf:
+        linkxrefs = [x for x in
+                #page.annot_xrefs()
+                pymupdf.JM_get_annot_xref_list2(page)
+                if x[1] == pymupdf.PDF_ANNOT_LINK  # pylint: disable=no-member
+                ]
+        if len(linkxrefs) == len(links):
+            for i in range(len(linkxrefs)):
+                links[i]["xref"] = linkxrefs[i][0]
+                links[i]["id"] = linkxrefs[i][2]
+    return links
+
+
+def get_toc(
+    doc: pymupdf.Document,
+    simple: bool = True,
+) -> list:
+    """Create a table of contents.
+
+    Args:
+        simple: a bool to control output. Returns a list, where each entry consists of outline level, title, page number and link destination (if simple = False). For details see PyMuPDF's documentation.
+    """
+    def recurse(olItem, liste, lvl):
+        """Recursively follow the outline item chain and record item information in a list."""
+        while olItem and olItem.this.m_internal:
+            if olItem.title:
+                title = olItem.title
+            else:
+                title = " "
+
+            if not olItem.is_external:
+                if olItem.uri:
+                    if olItem.page == -1:
+                        resolve = doc.resolve_link(olItem.uri)
+                        page = resolve[0] + 1
+                    else:
+                        page = olItem.page + 1
+                else:
+                    page = -1
+            else:
+                page = -1
+
+            if not simple:
+                link = getLinkDict(olItem, doc)
+                liste.append([lvl, title, page, link])
+            else:
+                liste.append([lvl, title, page])
+
+            if olItem.down:
+                liste = recurse(olItem.down, liste, lvl + 1)
+            olItem = olItem.next
+        return liste
+
+    # ensure document is open
+    if doc.is_closed:
+        raise ValueError("document closed")
+    doc.init_doc()
+    olItem = doc.outline
+    if not olItem:
+        return []
+    lvl = 1
+    liste = []
+    toc = recurse(olItem, liste, lvl)
+    if doc.is_pdf and not simple:
+        doc._extend_toc_items(toc)
+    return toc
+
+
+def del_toc_item(
+    doc: pymupdf.Document,
+    idx: int,
+) -> None:
+    """Delete TOC / bookmark item by index."""
+    xref = doc.get_outline_xrefs()[idx]
+    doc._remove_toc_item(xref)
+
+
+def set_toc_item(
+    doc: pymupdf.Document,
+    idx: int,
+    dest_dict: OptDict = None,
+    kind: OptInt = None,
+    pno: OptInt = None,
+    uri: OptStr = None,
+    title: OptStr = None,
+    to: point_like = None,
+    filename: OptStr = None,
+    zoom: float = 0,
+) -> None:
+    """Update TOC item by index.
+
+    It allows changing the item's title and link destination.
+
+    Args:
+        idx:
+            (int) desired index of the TOC list, as created by get_toc.
+        dest_dict:
+            (dict) destination dictionary as created by get_toc(False).
+            Outrules all other parameters. If None, the remaining parameters
+            are used to make a dest dictionary.
+        kind:
+            (int) kind of link (pymupdf.LINK_GOTO, etc.). If None, then only
+            the title will be updated. If pymupdf.LINK_NONE, the TOC item will
+            be deleted.
+        pno:
+            (int) page number (1-based like in get_toc). Required if
+            pymupdf.LINK_GOTO.
+        uri:
+            (str) the URL, required if pymupdf.LINK_URI.
+        title:
+            (str) the new title. No change if None.
+        to:
+            (point-like) destination on the target page. If omitted, (72, 36)
+            will be used as target coordinates.
+        filename:
+            (str) destination filename, required for pymupdf.LINK_GOTOR and
+            pymupdf.LINK_LAUNCH.
+        name:
+            (str) a destination name for pymupdf.LINK_NAMED.
+        zoom:
+            (float) a zoom factor for the target location (pymupdf.LINK_GOTO).
+    """
+    xref = doc.get_outline_xrefs()[idx]
+    page_xref = 0
+    if type(dest_dict) is dict:
+        if dest_dict["kind"] == pymupdf.LINK_GOTO:
+            pno = dest_dict["page"]
+            page_xref = doc.page_xref(pno)
+            page_height = doc.page_cropbox(pno).height
+            to = dest_dict.get('to', pymupdf.Point(72, 36))
+            to.y = page_height - to.y
+            dest_dict["to"] = to
+        action = getDestStr(page_xref, dest_dict)
+        if not action.startswith("/A"):
+            raise ValueError("bad bookmark dest")
+        color = dest_dict.get("color")
+        if color:
+            color = list(map(float, color))
+            if len(color) != 3 or min(color) < 0 or max(color) > 1:
+                raise ValueError("bad color value")
+        bold = dest_dict.get("bold", False)
+        italic = dest_dict.get("italic", False)
+        flags = italic + 2 * bold
+        collapse = dest_dict.get("collapse")
+        return doc._update_toc_item(
+            xref,
+            action=action[2:],
+            title=title,
+            color=color,
+            flags=flags,
+            collapse=collapse,
+        )
+
+    if kind == pymupdf.LINK_NONE:  # delete bookmark item
+        return doc.del_toc_item(idx)
+    if kind is None and title is None:  # treat as no-op
+        return None
+    if kind is None:  # only update title text
+        return doc._update_toc_item(xref, action=None, title=title)
+
+    if kind == pymupdf.LINK_GOTO:
+        if pno is None or pno not in range(1, doc.page_count + 1):
+            raise ValueError("bad page number")
+        page_xref = doc.page_xref(pno - 1)
+        page_height = doc.page_cropbox(pno - 1).height
+        if to is None:
+            to = pymupdf.Point(72, page_height - 36)
+        else:
+            to = pymupdf.Point(to)
+            to.y = page_height - to.y
+
+    ddict = {
+        "kind": kind,
+        "to": to,
+        "uri": uri,
+        "page": pno,
+        "file": filename,
+        "zoom": zoom,
+    }
+    action = getDestStr(page_xref, ddict)
+    if action == "" or not action.startswith("/A"):
+        raise ValueError("bad bookmark dest")
+
+    return doc._update_toc_item(xref, action=action[2:], title=title)
+
+
+def get_area(*args) -> float:
+    """Calculate area of rectangle.\nparameter is one of 'px' (default), 'in', 'cm', or 'mm'."""
+    rect = args[0]
+    if len(args) > 1:
+        unit = args[1]
+    else:
+        unit = "px"
+    u = {"px": (1, 1), "in": (1.0, 72.0), "cm": (2.54, 72.0), "mm": (25.4, 72.0)}
+    f = (u[unit][0] / u[unit][1]) ** 2
+    return f * rect.width * rect.height
+
+
+def set_metadata(doc: pymupdf.Document, m: dict = None) -> None:
+    """Update the PDF /Info object.
+
+    Args:
+        m: a dictionary like doc.metadata.
+    """
+    if not doc.is_pdf:
+        raise ValueError("is no PDF")
+    if doc.is_closed or doc.is_encrypted:
+        raise ValueError("document closed or encrypted")
+    if m is None:
+        m = {}
+    elif type(m) is not dict:
+        raise ValueError("bad metadata")
+    keymap = {
+        "author": "Author",
+        "producer": "Producer",
+        "creator": "Creator",
+        "title": "Title",
+        "format": None,
+        "encryption": None,
+        "creationDate": "CreationDate",
+        "modDate": "ModDate",
+        "subject": "Subject",
+        "keywords": "Keywords",
+        "trapped": "Trapped",
+    }
+    valid_keys = set(keymap.keys())
+    diff_set = set(m.keys()).difference(valid_keys)
+    if diff_set != set():
+        msg = "bad dict key(s): %s" % diff_set
+        raise ValueError(msg)
+
+    t, temp = doc.xref_get_key(-1, "Info")
+    if t != "xref":
+        info_xref = 0
+    else:
+        info_xref = int(temp.replace("0 R", ""))
+
+    if m == {} and info_xref == 0:  # nothing to do
+        return
+
+    if info_xref == 0:  # no prev metadata: get new xref
+        info_xref = doc.get_new_xref()
+        doc.update_object(info_xref, "<<>>")  # fill it with empty object
+        doc.xref_set_key(-1, "Info", "%i 0 R" % info_xref)
+    elif m == {}:  # remove existing metadata
+        doc.xref_set_key(-1, "Info", "null")
+        doc.init_doc()
+        return
+
+    for key, val in [(k, v) for k, v in m.items() if keymap[k] is not None]:
+        pdf_key = keymap[key]
+        if not bool(val) or val in ("none", "null"):
+            val = "null"
+        else:
+            val = pymupdf.get_pdf_str(val)
+        doc.xref_set_key(info_xref, pdf_key, val)
+    doc.init_doc()
+    return
+
+
+def getDestStr(xref: int, ddict: dict) -> str:
+    """Calculate the PDF action string.
+
+    Notes:
+        Supports Link annotations and outline items (bookmarks).
+    """
+    if not ddict:
+        return ""
+    str_goto = lambda a, b, c, d: f"/A<</S/GoTo/D[{a} 0 R/XYZ {_format_g((b, c, d))}]>>"
+    str_gotor1 = lambda a, b, c, d, e, f: f"/A<</S/GoToR/D[{a} /XYZ {_format_g((b, c, d))}]/F<</F{e}/UF{f}/Type/Filespec>>>>"
+    str_gotor2 = lambda a, b, c: f"/A<</S/GoToR/D{a}/F<</F{b}/UF{c}/Type/Filespec>>>>"
+    str_launch = lambda a, b: f"/A<</S/Launch/F<</F{a}/UF{b}/Type/Filespec>>>>"
+    str_uri = lambda a: f"/A<</S/URI/URI{a}>>"
+
+    if type(ddict) in (int, float):
+        dest = str_goto(xref, 0, ddict, 0)
+        return dest
+    d_kind = ddict.get("kind", pymupdf.LINK_NONE)
+
+    if d_kind == pymupdf.LINK_NONE:
+        return ""
+
+    if ddict["kind"] == pymupdf.LINK_GOTO:
+        d_zoom = ddict.get("zoom", 0)
+        to = ddict.get("to", pymupdf.Point(0, 0))
+        d_left, d_top = to
+        dest = str_goto(xref, d_left, d_top, d_zoom)
+        return dest
+
+    if ddict["kind"] == pymupdf.LINK_URI:
+        dest = str_uri(pymupdf.get_pdf_str(ddict["uri"]),)
+        return dest
+
+    if ddict["kind"] == pymupdf.LINK_LAUNCH:
+        fspec = pymupdf.get_pdf_str(ddict["file"])
+        dest = str_launch(fspec, fspec)
+        return dest
+
+    if ddict["kind"] == pymupdf.LINK_GOTOR and ddict["page"] < 0:
+        fspec = pymupdf.get_pdf_str(ddict["file"])
+        dest = str_gotor2(pymupdf.get_pdf_str(ddict["to"]), fspec, fspec)
+        return dest
+
+    if ddict["kind"] == pymupdf.LINK_GOTOR and ddict["page"] >= 0:
+        fspec = pymupdf.get_pdf_str(ddict["file"])
+        dest = str_gotor1(
+            ddict["page"],
+            ddict["to"].x,
+            ddict["to"].y,
+            ddict["zoom"],
+            fspec,
+            fspec,
+        )
+        return dest
+
+    return ""
+
+
+def set_toc(
+    doc: pymupdf.Document,
+    toc: list,
+    collapse: int = 1,
+) -> int:
+    """Create new outline tree (table of contents, TOC).
+
+    Args:
+        toc: (list, tuple) each entry must contain level, title, page and
+            optionally top margin on the page. None or '()' remove the TOC.
+        collapse: (int) collapses entries beyond this level. Zero or None
+            shows all entries unfolded.
+    Returns:
+        the number of inserted items, or the number of removed items respectively.
+    """
+    if doc.is_closed or doc.is_encrypted:
+        raise ValueError("document closed or encrypted")
+    if not doc.is_pdf:
+        raise ValueError("is no PDF")
+    if not toc:  # remove all entries
+        return len(doc._delToC())
+
+    # validity checks --------------------------------------------------------
+    if type(toc) not in (list, tuple):
+        raise ValueError("'toc' must be list or tuple")
+    toclen = len(toc)
+    page_count = doc.page_count
+    t0 = toc[0]
+    if type(t0) not in (list, tuple):
+        raise ValueError("items must be sequences of 3 or 4 items")
+    if t0[0] != 1:
+        raise ValueError("hierarchy level of item 0 must be 1")
+    for i in list(range(toclen - 1)):
+        t1 = toc[i]
+        t2 = toc[i + 1]
+        if not -1 <= t1[2] <= page_count:
+            raise ValueError("row %i: page number out of range" % i)
+        if (type(t2) not in (list, tuple)) or len(t2) not in (3, 4):
+            raise ValueError("bad row %i" % (i + 1))
+        if (type(t2[0]) is not int) or t2[0] < 1:
+            raise ValueError("bad hierarchy level in row %i" % (i + 1))
+        if t2[0] > t1[0] + 1:
+            raise ValueError("bad hierarchy level in row %i" % (i + 1))
+    # no formal errors in toc --------------------------------------------------
+
+    # --------------------------------------------------------------------------
+    # make a list of xref numbers, which we can use for our TOC entries
+    # --------------------------------------------------------------------------
+    old_xrefs = doc._delToC()  # del old outlines, get their xref numbers
+
+    # prepare table of xrefs for new bookmarks
+    old_xrefs = []
+    xref = [0] + old_xrefs
+    xref[0] = doc._getOLRootNumber()  # entry zero is outline root xref number
+    if toclen > len(old_xrefs):  # too few old xrefs?
+        for i in range((toclen - len(old_xrefs))):
+            xref.append(doc.get_new_xref())  # acquire new ones
+
+    lvltab = {0: 0}  # to store last entry per hierarchy level
+
+    # ------------------------------------------------------------------------------
+    # contains new outline objects as strings - first one is the outline root
+    # ------------------------------------------------------------------------------
+    olitems = [{"count": 0, "first": -1, "last": -1, "xref": xref[0]}]
+    # ------------------------------------------------------------------------------
+    # build olitems as a list of PDF-like connected dictionaries
+    # ------------------------------------------------------------------------------
+    for i in range(toclen):
+        o = toc[i]
+        lvl = o[0]  # level
+        title = pymupdf.get_pdf_str(o[1])  # title
+        pno = min(doc.page_count - 1, max(0, o[2] - 1))  # page number
+        page_xref = doc.page_xref(pno)
+        page_height = doc.page_cropbox(pno).height
+        top = pymupdf.Point(72, page_height - 36)
+        dest_dict = {"to": top, "kind": pymupdf.LINK_GOTO}  # fall back target
+        if o[2] < 0:
+            dest_dict["kind"] = pymupdf.LINK_NONE
+        if len(o) > 3:  # some target is specified
+            if type(o[3]) in (int, float):  # convert a number to a point
+                dest_dict["to"] = pymupdf.Point(72, page_height - o[3])
+            else:  # if something else, make sure we have a dict
+                # We make a copy of o[3] to avoid modifying our caller's data.
+                dest_dict = o[3].copy() if type(o[3]) is dict else dest_dict
+                if "to" not in dest_dict:  # target point not in dict?
+                    dest_dict["to"] = top  # put default in
+                else:  # transform target to PDF coordinates
+                    page = doc[pno]
+                    point = pymupdf.Point(dest_dict["to"])
+                    point.y = page.cropbox.height - point.y
+                    point = point * page.rotation_matrix
+                    dest_dict["to"] = (point.x, point.y)
+        d = {}
+        d["first"] = -1
+        d["count"] = 0
+        d["last"] = -1
+        d["prev"] = -1
+        d["next"] = -1
+        d["dest"] = getDestStr(page_xref, dest_dict)
+        d["top"] = dest_dict["to"]
+        d["title"] = title
+        d["parent"] = lvltab[lvl - 1]
+        d["xref"] = xref[i + 1]
+        d["color"] = dest_dict.get("color")
+        d["flags"] = dest_dict.get("italic", 0) + 2 * dest_dict.get("bold", 0)
+        lvltab[lvl] = i + 1
+        parent = olitems[lvltab[lvl - 1]]  # the parent entry
+
+        if (
+            dest_dict.get("collapse") or collapse and lvl > collapse
+        ):  # suppress expansion
+            parent["count"] -= 1  # make /Count negative
+        else:
+            parent["count"] += 1  # positive /Count
+
+        if parent["first"] == -1:
+            parent["first"] = i + 1
+            parent["last"] = i + 1
+        else:
+            d["prev"] = parent["last"]
+            prev = olitems[parent["last"]]
+            prev["next"] = i + 1
+            parent["last"] = i + 1
+        olitems.append(d)
+
+    # ------------------------------------------------------------------------------
+    # now create each outline item as a string and insert it in the PDF
+    # ------------------------------------------------------------------------------
+    for i, ol in enumerate(olitems):
+        txt = "<<"
+        if ol["count"] != 0:
+            txt += "/Count %i" % ol["count"]
+        try:
+            txt += ol["dest"]
+        except Exception:
+            # Verbose in PyMuPDF/tests.
+            if g_exceptions_verbose >= 2:   pymupdf.exception_info()
+            pass
+        try:
+            if ol["first"] > -1:
+                txt += "/First %i 0 R" % xref[ol["first"]]
+        except Exception:
+            if g_exceptions_verbose >= 2:   pymupdf.exception_info()
+            pass
+        try:
+            if ol["last"] > -1:
+                txt += "/Last %i 0 R" % xref[ol["last"]]
+        except Exception:
+            if g_exceptions_verbose >= 2:   pymupdf.exception_info()
+            pass
+        try:
+            if ol["next"] > -1:
+                txt += "/Next %i 0 R" % xref[ol["next"]]
+        except Exception:
+            # Verbose in PyMuPDF/tests.
+            if g_exceptions_verbose >= 2:   pymupdf.exception_info()
+            pass
+        try:
+            if ol["parent"] > -1:
+                txt += "/Parent %i 0 R" % xref[ol["parent"]]
+        except Exception:
+            # Verbose in PyMuPDF/tests.
+            if g_exceptions_verbose >= 2:   pymupdf.exception_info()
+            pass
+        try:
+            if ol["prev"] > -1:
+                txt += "/Prev %i 0 R" % xref[ol["prev"]]
+        except Exception:
+            # Verbose in PyMuPDF/tests.
+            if g_exceptions_verbose >= 2:   pymupdf.exception_info()
+            pass
+        try:
+            txt += "/Title" + ol["title"]
+        except Exception:
+            # Verbose in PyMuPDF/tests.
+            if g_exceptions_verbose >= 2:   pymupdf.exception_info()
+            pass
+
+        if ol.get("color") and len(ol["color"]) == 3:
+            txt += f"/C[ {_format_g(tuple(ol['color']))}]"
+        if ol.get("flags", 0) > 0:
+            txt += "/F %i" % ol["flags"]
+
+        if i == 0:  # special: this is the outline root
+            txt += "/Type/Outlines"  # so add the /Type entry
+        txt += ">>"
+        doc.update_object(xref[i], txt)  # insert the PDF object
+
+    doc.init_doc()
+    return toclen
+
+
+def do_widgets(
+    tar: pymupdf.Document,
+    src: pymupdf.Document,
+    graftmap,
+    from_page: int = -1,
+    to_page: int = -1,
+    start_at: int = -1,
+    join_duplicates=0,
+) -> None:
+    """Insert widgets of copied page range into target PDF.
+
+    Parameter values **must** equal those of method insert_pdf() which
+    must have been previously executed.
+    """
+    if not src.is_form_pdf:  # nothing to do: source PDF has no fields
+        return
+
+    def clean_kid_parents(acro_fields):
+        """ Make sure all kids have correct "Parent" pointers."""
+        for i in range(acro_fields.pdf_array_len()):
+            parent = acro_fields.pdf_array_get(i)
+            kids = parent.pdf_dict_get(pymupdf.PDF_NAME("Kids"))
+            for j in range(kids.pdf_array_len()):
+                kid = kids.pdf_array_get(j)
+                kid.pdf_dict_put(pymupdf.PDF_NAME("Parent"), parent)
+
+    def join_widgets(pdf, acro_fields, xref1, xref2, name):
+        """Called for each pair of widgets having the same name.
+
+        Args:
+            pdf: target MuPDF document
+            acro_fields: object Root/AcroForm/Fields
+            xref1, xref2: widget xrefs having same names
+            name: (str) the name
+
+        Result:
+            Defined or updated widget parent that points to both widgets.
+        """
+
+        def re_target(pdf, acro_fields, xref1, kids1, xref2, kids2):
+            """Merge widget in xref2 into "Kids" list of widget xref1.
+
+            Args:
+                xref1, kids1: target widget and its "Kids" array.
+                xref2, kids2: source wwidget and its "Kids" array (may be empty).
+            """
+            # make indirect objects from widgets
+            w1_ind = mupdf.pdf_new_indirect(pdf, xref1, 0)
+            w2_ind = mupdf.pdf_new_indirect(pdf, xref2, 0)
+            # find source widget in "Fields" array
+            idx = acro_fields.pdf_array_find(w2_ind)
+            acro_fields.pdf_array_delete(idx)
+
+            if not kids2.pdf_is_array():  # source widget has no kids
+                widget = mupdf.pdf_load_object(pdf, xref2)
+
+                # delete name from widget and insert target as parent
+                widget.pdf_dict_del(pymupdf.PDF_NAME("T"))
+                widget.pdf_dict_put(pymupdf.PDF_NAME("Parent"), w1_ind)
+
+                # put in target Kids
+                kids1.pdf_array_push(w2_ind)
+            else:  # copy source kids to target kids
+                for i in range(kids2.pdf_array_len()):
+                    kid = kids2.pdf_array_get(i)
+                    kid.pdf_dict_put(pymupdf.PDF_NAME("Parent"), w1_ind)
+                    kid_ind = mupdf.pdf_new_indirect(pdf, kid.pdf_to_num(), 0)
+                    kids1.pdf_array_push(kid_ind)
+
+        def new_target(pdf, acro_fields, xref1, w1, xref2, w2, name):
+            """Make new "Parent" for two widgets with same name.
+
+            Args:
+                xref1, w1: first widget
+                xref2, w2: second widget
+                name: field name
+
+            Result:
+                Both widgets have no "Kids". We create a new object with the
+                name and a "Kids" array containing the widgets.
+                Original widgets must be removed from AcroForm/Fields.
+            """
+            # make new "Parent" object
+            new = mupdf.pdf_new_dict(pdf, 5)
+            new.pdf_dict_put_text_string(pymupdf.PDF_NAME("T"), name)
+            kids = new.pdf_dict_put_array(pymupdf.PDF_NAME("Kids"), 2)
+            new_obj = mupdf.pdf_add_object(pdf, new)
+            new_obj_xref = new_obj.pdf_to_num()
+            new_ind = mupdf.pdf_new_indirect(pdf, new_obj_xref, 0)
+
+            # copy over some required source widget properties
+            ft = w1.pdf_dict_get(pymupdf.PDF_NAME("FT"))
+            w1.pdf_dict_del(pymupdf.PDF_NAME("FT"))
+            new_obj.pdf_dict_put(pymupdf.PDF_NAME("FT"), ft)
+
+            aa = w1.pdf_dict_get(pymupdf.PDF_NAME("AA"))
+            w1.pdf_dict_del(pymupdf.PDF_NAME("AA"))
+            new_obj.pdf_dict_put(pymupdf.PDF_NAME("AA"), aa)
+
+            # remove name field, insert "Parent" field in source widgets
+            w1.pdf_dict_del(pymupdf.PDF_NAME("T"))
+            w1.pdf_dict_put(pymupdf.PDF_NAME("Parent"), new_ind)
+            w2.pdf_dict_del(pymupdf.PDF_NAME("T"))
+            w2.pdf_dict_put(pymupdf.PDF_NAME("Parent"), new_ind)
+
+            # put source widgets in "kids" array
+            ind1 = mupdf.pdf_new_indirect(pdf, xref1, 0)
+            ind2 = mupdf.pdf_new_indirect(pdf, xref2, 0)
+            kids.pdf_array_push(ind1)
+            kids.pdf_array_push(ind2)
+
+            # remove source widgets from "AcroForm/Fields"
+            idx = acro_fields.pdf_array_find(ind1)
+            acro_fields.pdf_array_delete(idx)
+            idx = acro_fields.pdf_array_find(ind2)
+            acro_fields.pdf_array_delete(idx)
+
+            acro_fields.pdf_array_push(new_ind)
+
+        w1 = mupdf.pdf_load_object(pdf, xref1)
+        w2 = mupdf.pdf_load_object(pdf, xref2)
+        kids1 = w1.pdf_dict_get(pymupdf.PDF_NAME("Kids"))
+        kids2 = w2.pdf_dict_get(pymupdf.PDF_NAME("Kids"))
+
+        # check which widget has a suitable "Kids" array
+        if kids1.pdf_is_array():
+            re_target(pdf, acro_fields, xref1, kids1, xref2, kids2)  # pylint: disable=arguments-out-of-order
+        elif kids2.pdf_is_array():
+            re_target(pdf, acro_fields, xref2, kids2, xref1, kids1)  # pylint: disable=arguments-out-of-order
+        else:
+            new_target(pdf, acro_fields, xref1, w1, xref2, w2, name)  # pylint: disable=arguments-out-of-order
+
+    def get_kids(parent, kids_list):
+        """Return xref list of leaf kids for a parent.
+
+        Call with an empty list.
+        """
+        kids = mupdf.pdf_dict_get(parent, pymupdf.PDF_NAME("Kids"))
+        if not kids.pdf_is_array():
+            return kids_list
+        for i in range(kids.pdf_array_len()):
+            kid = kids.pdf_array_get(i)
+            if mupdf.pdf_is_dict(mupdf.pdf_dict_get(kid, pymupdf.PDF_NAME("Kids"))):
+                kids_list = get_kids(kid, kids_list)
+            else:
+                kids_list.append(kid.pdf_to_num())
+        return kids_list
+
+    def kids_xrefs(widget):
+        """Get the xref of top "Parent" and the list of leaf widgets."""
+        kids_list = []
+        parent = mupdf.pdf_dict_get(widget, pymupdf.PDF_NAME("Parent"))
+        parent_xref = parent.pdf_to_num()
+        if parent_xref == 0:
+            return parent_xref, kids_list
+        kids_list = get_kids(parent, kids_list)
+        return parent_xref, kids_list
+
+    def deduplicate_names(pdf, acro_fields, join_duplicates=False):
+        """Handle any widget name duplicates caused by the merge."""
+        names = {}  # key is a widget name, value a list of widgets having it.
+
+        # extract all names and widgets in "AcroForm/Fields"
+        for i in range(mupdf.pdf_array_len(acro_fields)):
+            wobject = mupdf.pdf_array_get(acro_fields, i)
+            xref = wobject.pdf_to_num()
+
+            # extract widget name and collect widget(s) using it
+            T = mupdf.pdf_dict_get_text_string(wobject, pymupdf.PDF_NAME("T"))
+            xrefs = names.get(T, [])
+            xrefs.append(xref)
+            names[T] = xrefs
+
+        for name, xrefs in names.items():
+            if len(xrefs) < 2:
+                continue
+            xref0, xref1 = xrefs[:2]  # only exactly 2 should occur!
+            if join_duplicates:  # combine fields with equal names
+                join_widgets(pdf, acro_fields, xref0, xref1, name)
+            else:  # make field names unique
+                newname = name + f" [{xref1}]"  # append this to the name
+                wobject = mupdf.pdf_load_object(pdf, xref1)
+                wobject.pdf_dict_put_text_string(pymupdf.PDF_NAME("T"), newname)
+
+        clean_kid_parents(acro_fields)
+
+    def get_acroform(doc):
+        """Retrieve the AcroForm dictionary form a PDF."""
+        pdf = mupdf.pdf_document_from_fz_document(doc)
+        # AcroForm (= central form field info)
+        return mupdf.pdf_dict_getp(mupdf.pdf_trailer(pdf), "Root/AcroForm")
+
+    tarpdf = mupdf.pdf_document_from_fz_document(tar)
+    srcpdf = mupdf.pdf_document_from_fz_document(src)
+
+    if tar.is_form_pdf:
+        # target is a Form PDF, so use it to include source fields
+        acro = get_acroform(tar)
+        # Important arrays in AcroForm
+        acro_fields = acro.pdf_dict_get(pymupdf.PDF_NAME("Fields"))
+        tar_co = acro.pdf_dict_get(pymupdf.PDF_NAME("CO"))
+        if not tar_co.pdf_is_array():
+            tar_co = acro.pdf_dict_put_array(pymupdf.PDF_NAME("CO"), 5)
+    else:
+        # target is no Form PDF, so copy over source AcroForm
+        acro = mupdf.pdf_deep_copy_obj(get_acroform(src))  # make a copy
+
+        # Clear "Fields" and "CO" arrays: will be populated by page fields.
+        # This is required to avoid copying unneeded objects.
+        acro.pdf_dict_del(pymupdf.PDF_NAME("Fields"))
+        acro.pdf_dict_put_array(pymupdf.PDF_NAME("Fields"), 5)
+        acro.pdf_dict_del(pymupdf.PDF_NAME("CO"))
+        acro.pdf_dict_put_array(pymupdf.PDF_NAME("CO"), 5)
+
+        # Enrich AcroForm for copying to target
+        acro_graft = mupdf.pdf_graft_mapped_object(graftmap, acro)
+
+        # Insert AcroForm into target PDF
+        acro_tar = mupdf.pdf_add_object(tarpdf, acro_graft)
+        acro_fields = acro_tar.pdf_dict_get(pymupdf.PDF_NAME("Fields"))
+        tar_co = acro_tar.pdf_dict_get(pymupdf.PDF_NAME("CO"))
+
+        # get its xref and insert it into target catalog
+        tar_xref = acro_tar.pdf_to_num()
+        acro_tar_ind = mupdf.pdf_new_indirect(tarpdf, tar_xref, 0)
+        root = mupdf.pdf_dict_get(mupdf.pdf_trailer(tarpdf), pymupdf.PDF_NAME("Root"))
+        root.pdf_dict_put(pymupdf.PDF_NAME("AcroForm"), acro_tar_ind)
+
+    if from_page <= to_page:
+        src_range = range(from_page, to_page + 1)
+    else:
+        src_range = range(from_page, to_page - 1, -1)
+
+    parents = {}  # information about widget parents
+
+    # remove "P" owning page reference from all widgets of all source pages
+    for i in src_range:
+        src_page = src[i]
+        for xref in [
+            xref
+            for xref, wtype, _ in src_page.annot_xrefs()
+            if wtype == pymupdf.PDF_ANNOT_WIDGET  # pylint: disable=no-member
+        ]:
+            w_obj = mupdf.pdf_load_object(srcpdf, xref)
+            w_obj.pdf_dict_del(pymupdf.PDF_NAME("P"))
+
+            # get the widget's parent structure
+            parent_xref, old_kids = kids_xrefs(w_obj)
+            if parent_xref:
+                parents[parent_xref] = {
+                    "new_xref": 0,
+                    "old_kids": old_kids,
+                    "new_kids": [],
+                }
+    # Copy over Parent widgets first - they are not page-dependent
+    for xref in parents.keys():  # pylint: disable=consider-using-dict-items
+        parent = mupdf.pdf_load_object(srcpdf, xref)
+        parent_graft = mupdf.pdf_graft_mapped_object(graftmap, parent)
+        parent_tar = mupdf.pdf_add_object(tarpdf, parent_graft)
+        kids_xrefs_new = get_kids(parent_tar, [])
+        parent_xref_new = parent_tar.pdf_to_num()
+        parent_ind = mupdf.pdf_new_indirect(tarpdf, parent_xref_new, 0)
+        acro_fields.pdf_array_push(parent_ind)
+        parents[xref]["new_xref"] = parent_xref_new
+        parents[xref]["new_kids"] = kids_xrefs_new
+
+    for i in range(len(src_range)):
+        # read first copied over page in target
+        tar_page = tar[start_at + i]
+
+        # read the original page in the source PDF
+        src_page = src[src_range[i]]
+
+        # now walk through source page widgets and copy over
+        w_xrefs = [  # widget xrefs of the source page
+            xref
+            for xref, wtype, _ in src_page.annot_xrefs()
+            if wtype == pymupdf.PDF_ANNOT_WIDGET  # pylint: disable=no-member
+        ]
+        if not w_xrefs:  # no widgets on this source page
+            continue
+
+        # convert to formal PDF page
+        tar_page_pdf = mupdf.pdf_page_from_fz_page(tar_page)
+
+        # extract annotations array
+        tar_annots = mupdf.pdf_dict_get(tar_page_pdf.obj(), pymupdf.PDF_NAME("Annots"))
+        if not mupdf.pdf_is_array(tar_annots):
+            tar_annots = mupdf.pdf_dict_put_array(
+                tar_page_pdf.obj(), pymupdf.PDF_NAME("Annots"), 5
+            )
+
+        for xref in w_xrefs:
+            w_obj = mupdf.pdf_load_object(srcpdf, xref)
+
+            # check if field takes part in inter-field validations
+            is_aac = mupdf.pdf_is_dict(mupdf.pdf_dict_getp(w_obj, "AA/C"))
+
+            # check if parent of widget already in target
+            parent_xref = mupdf.pdf_to_num(
+                w_obj.pdf_dict_get(pymupdf.PDF_NAME("Parent"))
+            )
+            if parent_xref == 0:  # parent not in target yet
+                try:
+                    w_obj_graft = mupdf.pdf_graft_mapped_object(graftmap, w_obj)
+                except Exception as e:
+                    pymupdf.message_warning(f"cannot copy widget at {xref=}: {e}")
+                    continue
+                w_obj_tar = mupdf.pdf_add_object(tarpdf, w_obj_graft)
+                tar_xref = w_obj_tar.pdf_to_num()
+                w_obj_tar_ind = mupdf.pdf_new_indirect(tarpdf, tar_xref, 0)
+                mupdf.pdf_array_push(tar_annots, w_obj_tar_ind)
+                mupdf.pdf_array_push(acro_fields, w_obj_tar_ind)
+            else:
+                parent = parents[parent_xref]
+                idx = parent["old_kids"].index(xref)  # search for xref in parent
+                tar_xref = parent["new_kids"][idx]
+                w_obj_tar_ind = mupdf.pdf_new_indirect(tarpdf, tar_xref, 0)
+                mupdf.pdf_array_push(tar_annots, w_obj_tar_ind)
+
+            # Into "AcroForm/CO" if a computation field.
+            if is_aac:
+                mupdf.pdf_array_push(tar_co, w_obj_tar_ind)
+
+    deduplicate_names(tarpdf, acro_fields, join_duplicates=join_duplicates)
+
+def do_links(
+    doc1: pymupdf.Document,
+    doc2: pymupdf.Document,
+    from_page: int = -1,
+    to_page: int = -1,
+    start_at: int = -1,
+) -> None:
+    """Insert links contained in copied page range into destination PDF.
+
+    Parameter values **must** equal those of method insert_pdf(), which must
+    have been previously executed.
+    """
+    #pymupdf.log( 'utils.do_links()')
+    # --------------------------------------------------------------------------
+    # internal function to create the actual "/Annots" object string
+    # --------------------------------------------------------------------------
+    def cre_annot(lnk, xref_dst, pno_src, ctm):
+        """Create annotation object string for a passed-in link."""
+
+        r = lnk["from"] * ctm  # rect in PDF coordinates
+        rect = _format_g(tuple(r))
+        if lnk["kind"] == pymupdf.LINK_GOTO:
+            txt = pymupdf.annot_skel["goto1"]  # annot_goto
+            idx = pno_src.index(lnk["page"])
+            p = lnk["to"] * ctm  # target point in PDF coordinates
+            annot = txt(xref_dst[idx], p.x, p.y, lnk["zoom"], rect)
+
+        elif lnk["kind"] == pymupdf.LINK_GOTOR:
+            if lnk["page"] >= 0:
+                txt = pymupdf.annot_skel["gotor1"]  # annot_gotor
+                pnt = lnk.get("to", pymupdf.Point(0, 0))  # destination point
+                if type(pnt) is not pymupdf.Point:
+                    pnt = pymupdf.Point(0, 0)
+                annot = txt(
+                    lnk["page"],
+                    pnt.x,
+                    pnt.y,
+                    lnk["zoom"],
+                    lnk["file"],
+                    lnk["file"],
+                    rect,
+                )
+            else:
+                txt = pymupdf.annot_skel["gotor2"]  # annot_gotor_n
+                to = pymupdf.get_pdf_str(lnk["to"])
+                to = to[1:-1]
+                f = lnk["file"]
+                annot = txt(to, f, rect)
+
+        elif lnk["kind"] == pymupdf.LINK_LAUNCH:
+            txt = pymupdf.annot_skel["launch"]  # annot_launch
+            annot = txt(lnk["file"], lnk["file"], rect)
+
+        elif lnk["kind"] == pymupdf.LINK_URI:
+            txt = pymupdf.annot_skel["uri"]  # annot_uri
+            annot = txt(lnk["uri"], rect)
+
+        else:
+            annot = ""
+
+        return annot
+
+    # --------------------------------------------------------------------------
+
+    # validate & normalize parameters
+    if from_page < 0:
+        fp = 0
+    elif from_page >= doc2.page_count:
+        fp = doc2.page_count - 1
+    else:
+        fp = from_page
+
+    if to_page < 0 or to_page >= doc2.page_count:
+        tp = doc2.page_count - 1
+    else:
+        tp = to_page
+
+    if start_at < 0:
+        raise ValueError("'start_at' must be >= 0")
+    sa = start_at
+
+    incr = 1 if fp <= tp else -1  # page range could be reversed
+
+    # lists of source / destination page numbers
+    pno_src = list(range(fp, tp + incr, incr))
+    pno_dst = [sa + i for i in range(len(pno_src))]
+
+    # lists of source / destination page xrefs
+    xref_src = []
+    xref_dst = []
+    for i in range(len(pno_src)):
+        p_src = pno_src[i]
+        p_dst = pno_dst[i]
+        old_xref = doc2.page_xref(p_src)
+        new_xref = doc1.page_xref(p_dst)
+        xref_src.append(old_xref)
+        xref_dst.append(new_xref)
+
+    # create the links for each copied page in destination PDF
+    for i in range(len(xref_src)):
+        page_src = doc2[pno_src[i]]  # load source page
+        links = page_src.get_links()  # get all its links
+        #pymupdf.log( '{pno_src=}')
+        #pymupdf.log( '{type(page_src)=}')
+        #pymupdf.log( '{page_src=}')
+        #pymupdf.log( '{=i len(links)}')
+        if len(links) == 0:  # no links there
+            page_src = None
+            continue
+        ctm = ~page_src.transformation_matrix  # calc page transformation matrix
+        page_dst = doc1[pno_dst[i]]  # load destination page
+        link_tab = []  # store all link definitions here
+        for l in links:
+            if l["kind"] == pymupdf.LINK_GOTO and (l["page"] not in pno_src):
+                continue  # GOTO link target not in copied pages
+            annot_text = cre_annot(l, xref_dst, pno_src, ctm)
+            if annot_text:
+                link_tab.append(annot_text)
+        if link_tab != []:
+            page_dst._addAnnot_FromString( tuple(link_tab))
+    #pymupdf.log( 'utils.do_links() returning.')
+
+
+def getLinkText(page: pymupdf.Page, lnk: dict) -> str:
+    # --------------------------------------------------------------------------
+    # define skeletons for /Annots object texts
+    # --------------------------------------------------------------------------
+    ctm = page.transformation_matrix
+    ictm = ~ctm
+    r = lnk["from"]
+    rect = _format_g(tuple(r * ictm))
+
+    annot = ""
+    if lnk["kind"] == pymupdf.LINK_GOTO:
+        if lnk["page"] >= 0:
+            txt = pymupdf.annot_skel["goto1"]  # annot_goto
+            pno = lnk["page"]
+            xref = page.parent.page_xref(pno)
+            pnt = lnk.get("to", pymupdf.Point(0, 0))  # destination point
+            dest_page = page.parent[pno]
+            dest_ctm = dest_page.transformation_matrix
+            dest_ictm = ~dest_ctm
+            ipnt = pnt * dest_ictm
+            annot = txt(xref, ipnt.x, ipnt.y, lnk.get("zoom", 0), rect)
+        else:
+            txt = pymupdf.annot_skel["goto2"]  # annot_goto_n
+            annot = txt(pymupdf.get_pdf_str(lnk["to"]), rect)
+
+    elif lnk["kind"] == pymupdf.LINK_GOTOR:
+        if lnk["page"] >= 0:
+            txt = pymupdf.annot_skel["gotor1"]  # annot_gotor
+            pnt = lnk.get("to", pymupdf.Point(0, 0))  # destination point
+            if type(pnt) is not pymupdf.Point:
+                pnt = pymupdf.Point(0, 0)
+            annot = txt(
+                lnk["page"],
+                pnt.x,
+                pnt.y,
+                lnk.get("zoom", 0),
+                lnk["file"],
+                lnk["file"],
+                rect,
+            )
+        else:
+            txt = pymupdf.annot_skel["gotor2"]  # annot_gotor_n
+            annot = txt(pymupdf.get_pdf_str(lnk["to"]), lnk["file"], rect)
+
+    elif lnk["kind"] == pymupdf.LINK_LAUNCH:
+        txt = pymupdf.annot_skel["launch"]  # annot_launch
+        annot = txt(lnk["file"], lnk["file"], rect)
+
+    elif lnk["kind"] == pymupdf.LINK_URI:
+        txt = pymupdf.annot_skel["uri"]  # txt = annot_uri
+        annot = txt(lnk["uri"], rect)
+
+    elif lnk["kind"] == pymupdf.LINK_NAMED:
+        txt = pymupdf.annot_skel["named"]  # annot_named
+        lname = lnk.get("name")  # check presence of key
+        if lname is None:  # if missing, fall back to alternative
+            lname = lnk["nameddest"]
+        annot = txt(lname, rect)
+    if not annot:
+        return annot
+
+    # add a /NM PDF key to the object definition
+    link_names = dict(  # existing ids and their xref
+        [(x[0], x[2]) for x in page.annot_xrefs() if x[1] == pymupdf.PDF_ANNOT_LINK]   # pylint: disable=no-member
+    )
+
+    old_name = lnk.get("id", "")  # id value in the argument
+
+    if old_name and (lnk["xref"], old_name) in link_names.items():
+        name = old_name  # no new name if this is an update only
+    else:
+        i = 0
+        stem = pymupdf.TOOLS.set_annot_stem() + "-L%i"
+        while True:
+            name = stem % i
+            if name not in link_names.values():
+                break
+            i += 1
+    # add /NM key to object definition
+    annot = annot.replace("/Link", "/Link/NM(%s)" % name)
+    return annot
+
+
+def delete_widget(page: pymupdf.Page, widget: pymupdf.Widget) -> pymupdf.Widget:
+    """Delete widget from page and return the next one."""
+    pymupdf.CheckParent(page)
+    annot = getattr(widget, "_annot", None)
+    if annot is None:
+        raise ValueError("bad type: widget")
+    nextwidget = widget.next
+    page.delete_annot(annot)
+    widget._annot.parent = None
+    keylist = list(widget.__dict__.keys())
+    for key in keylist:
+        del widget.__dict__[key]
+    return nextwidget
+
+
+def update_link(page: pymupdf.Page, lnk: dict) -> None:
+    """Update a link on the current page."""
+    pymupdf.CheckParent(page)
+    annot = getLinkText(page, lnk)
+    if annot == "":
+        raise ValueError("link kind not supported")
+
+    page.parent.update_object(lnk["xref"], annot, page=page)
+
+
+def insert_link(page: pymupdf.Page, lnk: dict, mark: bool = True) -> None:
+    """Insert a new link for the current page."""
+    pymupdf.CheckParent(page)
+    annot = getLinkText(page, lnk)
+    if annot == "":
+        raise ValueError("link kind not supported")
+    page._addAnnot_FromString((annot,))
+
+
+def insert_textbox(
+    page: pymupdf.Page,
+    rect: rect_like,
+    buffer: typing.Union[str, list],
+    *,
+    fontname: str = "helv",
+    fontfile: OptStr = None,
+    set_simple: int = 0,
+    encoding: int = 0,
+    fontsize: float = 11,
+    lineheight: OptFloat = None,
+    color: OptSeq = None,
+    fill: OptSeq = None,
+    expandtabs: int = 1,
+    align: int = 0,
+    rotate: int = 0,
+    render_mode: int = 0,
+    miter_limit: float = 1,
+    border_width: float = 0.05,
+    morph: OptSeq = None,
+    overlay: bool = True,
+    stroke_opacity: float = 1,
+    fill_opacity: float = 1,
+    oc: int = 0,
+) -> float:
+    """Insert text into a given rectangle.
+
+    Notes:
+        Creates a Shape object, uses its same-named method and commits it.
+    Parameters:
+        rect: (rect-like) area to use for text.
+        buffer: text to be inserted
+        fontname: a Base-14 font, font name or '/name'
+        fontfile: name of a font file
+        fontsize: font size
+        lineheight: overwrite the font property
+        color: RGB color triple
+        expandtabs: handles tabulators with string function
+        align: left, center, right, justified
+        rotate: 0, 90, 180, or 270 degrees
+        morph: morph box with a matrix and a fixpoint
+        overlay: put text in foreground or background
+    Returns:
+        unused or deficit rectangle area (float)
+    """
+    img = page.new_shape()
+    rc = img.insert_textbox(
+        rect,
+        buffer,
+        fontsize=fontsize,
+        lineheight=lineheight,
+        fontname=fontname,
+        fontfile=fontfile,
+        set_simple=set_simple,
+        encoding=encoding,
+        color=color,
+        fill=fill,
+        expandtabs=expandtabs,
+        render_mode=render_mode,
+        miter_limit=miter_limit,
+        border_width=border_width,
+        align=align,
+        rotate=rotate,
+        morph=morph,
+        stroke_opacity=stroke_opacity,
+        fill_opacity=fill_opacity,
+        oc=oc,
+    )
+    if rc >= 0:
+        img.commit(overlay)
+    return rc
+
+
+def insert_text(
+    page: pymupdf.Page,
+    point: point_like,
+    text: typing.Union[str, list],
+    *,
+    fontsize: float = 11,
+    lineheight: OptFloat = None,
+    fontname: str = "helv",
+    fontfile: OptStr = None,
+    set_simple: int = 0,
+    encoding: int = 0,
+    color: OptSeq = None,
+    fill: OptSeq = None,
+    border_width: float = 0.05,
+    miter_limit: float = 1,
+    render_mode: int = 0,
+    rotate: int = 0,
+    morph: OptSeq = None,
+    overlay: bool = True,
+    stroke_opacity: float = 1,
+    fill_opacity: float = 1,
+    oc: int = 0,
+):
+
+    img = page.new_shape()
+    rc = img.insert_text(
+        point,
+        text,
+        fontsize=fontsize,
+        lineheight=lineheight,
+        fontname=fontname,
+        fontfile=fontfile,
+        set_simple=set_simple,
+        encoding=encoding,
+        color=color,
+        fill=fill,
+        border_width=border_width,
+        render_mode=render_mode,
+        miter_limit=miter_limit,
+        rotate=rotate,
+        morph=morph,
+        stroke_opacity=stroke_opacity,
+        fill_opacity=fill_opacity,
+        oc=oc,
+    )
+    if rc >= 0:
+        img.commit(overlay)
+    return rc
+
+
+def insert_htmlbox(
+    page,
+    rect,
+    text,
+    *,
+    css=None,
+    scale_low=0,
+    archive=None,
+    rotate=0,
+    oc=0,
+    opacity=1,
+    overlay=True,
+) -> float:
+    """Insert text with optional HTML tags and stylings into a rectangle.
+
+    Args:
+        rect: (rect-like) rectangle into which the text should be placed.
+        text: (str) text with optional HTML tags and stylings.
+        css: (str) CSS styling commands.
+        scale_low: (float) force-fit content by scaling it down. Must be in
+            range [0, 1]. If 1, no scaling will take place. If 0, arbitrary
+            down-scaling is acceptable. A value of 0.1 would mean that content
+            may be scaled down by at most 90%.
+        archive: Archive object pointing to locations of used fonts or images
+        rotate: (int) rotate the text in the box by a multiple of 90 degrees.
+        oc: (int) the xref of an OCG / OCMD (Optional Content).
+        opacity: (float) set opacity of inserted content.
+        overlay: (bool) put text on top of page content.
+    Returns:
+        A tuple of floats (spare_height, scale).
+        spare_height: -1 if content did not fit, else >= 0. It is the height of the
+               unused (still available) rectangle stripe. Positive only if
+               scale_min = 1 (no down scaling).
+        scale: downscaling factor, 0 < scale <= 1. Set to 0 if spare_height = -1 (no fit).
+    """
+
+    # normalize rotation angle
+    if not rotate % 90 == 0:
+        raise ValueError("bad rotation angle")
+    while rotate < 0:
+        rotate += 360
+    while rotate >= 360:
+        rotate -= 360
+
+    if not 0 <= scale_low <= 1:
+        raise ValueError("'scale_low' must be in [0, 1]")
+
+    if css is None:
+        css = ""
+
+    rect = pymupdf.Rect(rect)
+    if rotate in (90, 270):
+        temp_rect = pymupdf.Rect(0, 0, rect.height, rect.width)
+    else:
+        temp_rect = pymupdf.Rect(0, 0, rect.width, rect.height)
+
+    # use a small border by default
+    mycss = "body {margin:1px;}" + css  # append user CSS
+
+    # either make a story, or accept a given one
+    if isinstance(text, str):  # if a string, convert to a Story
+        story = pymupdf.Story(html=text, user_css=mycss, archive=archive)
+    elif isinstance(text, pymupdf.Story):
+        story = text
+    else:
+        raise ValueError("'text' must be a string or a Story")
+    # ----------------------------------------------------------------
+    # Find a scaling factor that lets our story fit in
+    # ----------------------------------------------------------------
+    scale_max = None if scale_low == 0 else 1 / scale_low
+
+    fit = story.fit_scale(temp_rect, scale_min=1, scale_max=scale_max)
+    if not fit.big_enough:  # there was no fit
+        return (-1, scale_low)
+
+    filled = fit.filled
+    scale = 1 / fit.parameter  # shrink factor
+
+    spare_height = fit.rect.y1 - filled[3]  # unused room at rectangle bottom
+    # Note: due to MuPDF's logic this may be negative even for successful fits.
+    if scale != 1 or spare_height < 0:  # if scaling occurred, set spare_height to 0
+        spare_height = 0
+
+    def rect_function(*args):
+        return fit.rect, fit.rect, pymupdf.Identity
+
+    # draw story on temp PDF page
+    doc = story.write_with_links(rect_function)
+
+    # Insert opacity if requested.
+    # For this, we prepend a command to the /Contents.
+    if 0 <= opacity < 1:
+        tpage = doc[0]  # load page
+        # generate /ExtGstate for the page
+        alp0 = tpage._set_opacity(CA=opacity, ca=opacity)
+        s = f"/{alp0} gs\n"  # generate graphic state command
+        pymupdf.TOOLS._insert_contents(tpage, s.encode(), 0)
+
+    # put result in target page
+    page.show_pdf_page(rect, doc, 0, rotate=rotate, oc=oc, overlay=overlay)
+
+    # -------------------------------------------------------------------------
+    # re-insert links in target rect (show_pdf_page cannot copy annotations)
+    # -------------------------------------------------------------------------
+    # scaled center point of fit.rect
+    mp1 = (fit.rect.tl + fit.rect.br) / 2 * scale
+
+    # center point of target rect
+    mp2 = (rect.tl + rect.br) / 2
+
+    # compute link positioning matrix:
+    # - move center of scaled-down fit.rect to (0,0)
+    # - rotate
+    # - move (0,0) to center of target rect
+    mat = (
+        pymupdf.Matrix(scale, 0, 0, scale, -mp1.x, -mp1.y)
+        * pymupdf.Matrix(-rotate)
+        * pymupdf.Matrix(1, 0, 0, 1, mp2.x, mp2.y)
+    )
+
+    # copy over links
+    for link in doc[0].get_links():
+        link["from"] *= mat
+        page.insert_link(link)
+
+    return spare_height, scale
+
+
+def new_page(
+    doc: pymupdf.Document,
+    pno: int = -1,
+    width: float = 595,
+    height: float = 842,
+) -> pymupdf.Page:
+    """Create and return a new page object.
+
+    Args:
+        pno: (int) insert before this page. Default: after last page.
+        width: (float) page width in points. Default: 595 (ISO A4 width).
+        height: (float) page height in points. Default 842 (ISO A4 height).
+    Returns:
+        A pymupdf.Page object.
+    """
+    doc._newPage(pno, width=width, height=height)
+    return doc[pno]
+
+
+def insert_page(
+    doc: pymupdf.Document,
+    pno: int,
+    text: typing.Union[str, list, None] = None,
+    fontsize: float = 11,
+    width: float = 595,
+    height: float = 842,
+    fontname: str = "helv",
+    fontfile: OptStr = None,
+    color: OptSeq = (0,),
+) -> int:
+    """Create a new PDF page and insert some text.
+
+    Notes:
+        Function combining pymupdf.Document.new_page() and pymupdf.Page.insert_text().
+        For parameter details see these methods.
+    """
+    page = doc.new_page(pno=pno, width=width, height=height)
+    if not bool(text):
+        return 0
+    rc = page.insert_text(
+        (50, 72),
+        text,
+        fontsize=fontsize,
+        fontname=fontname,
+        fontfile=fontfile,
+        color=color,
+    )
+    return rc
+
+
+def draw_line(
+    page: pymupdf.Page,
+    p1: point_like,
+    p2: point_like,
+    color: OptSeq = (0,),
+    dashes: OptStr = None,
+    width: float = 1,
+    lineCap: int = 0,
+    lineJoin: int = 0,
+    overlay: bool = True,
+    morph: OptSeq = None,
+    stroke_opacity: float = 1,
+    fill_opacity: float = 1,
+    oc=0,
+) -> pymupdf.Point:
+    """Draw a line from point p1 to point p2."""
+    img = page.new_shape()
+    p = img.draw_line(pymupdf.Point(p1), pymupdf.Point(p2))
+    img.finish(
+        color=color,
+        dashes=dashes,
+        width=width,
+        closePath=False,
+        lineCap=lineCap,
+        lineJoin=lineJoin,
+        morph=morph,
+        stroke_opacity=stroke_opacity,
+        fill_opacity=fill_opacity,
+        oc=oc,
+    )
+    img.commit(overlay)
+
+    return p
+
+
+def draw_squiggle(
+    page: pymupdf.Page,
+    p1: point_like,
+    p2: point_like,
+    breadth: float = 2,
+    color: OptSeq = (0,),
+    dashes: OptStr = None,
+    width: float = 1,
+    lineCap: int = 0,
+    lineJoin: int = 0,
+    overlay: bool = True,
+    morph: OptSeq = None,
+    stroke_opacity: float = 1,
+    fill_opacity: float = 1,
+    oc: int = 0,
+) -> pymupdf.Point:
+    """Draw a squiggly line from point p1 to point p2."""
+    img = page.new_shape()
+    p = img.draw_squiggle(pymupdf.Point(p1), pymupdf.Point(p2), breadth=breadth)
+    img.finish(
+        color=color,
+        dashes=dashes,
+        width=width,
+        closePath=False,
+        lineCap=lineCap,
+        lineJoin=lineJoin,
+        morph=morph,
+        stroke_opacity=stroke_opacity,
+        fill_opacity=fill_opacity,
+        oc=oc,
+    )
+    img.commit(overlay)
+
+    return p
+
+
+def draw_zigzag(
+    page: pymupdf.Page,
+    p1: point_like,
+    p2: point_like,
+    breadth: float = 2,
+    color: OptSeq = (0,),
+    dashes: OptStr = None,
+    width: float = 1,
+    lineCap: int = 0,
+    lineJoin: int = 0,
+    overlay: bool = True,
+    morph: OptSeq = None,
+    stroke_opacity: float = 1,
+    fill_opacity: float = 1,
+    oc: int = 0,
+) -> pymupdf.Point:
+    """Draw a zigzag line from point p1 to point p2."""
+    img = page.new_shape()
+    p = img.draw_zigzag(pymupdf.Point(p1), pymupdf.Point(p2), breadth=breadth)
+    img.finish(
+        color=color,
+        dashes=dashes,
+        width=width,
+        closePath=False,
+        lineCap=lineCap,
+        lineJoin=lineJoin,
+        morph=morph,
+        stroke_opacity=stroke_opacity,
+        fill_opacity=fill_opacity,
+        oc=oc,
+    )
+    img.commit(overlay)
+
+    return p
+
+
+def draw_rect(
+        page: pymupdf.Page,
+        rect: rect_like,
+        color: OptSeq = (0,),
+        fill: OptSeq = None,
+        dashes: OptStr = None,
+        width: float = 1,
+        lineCap: int = 0,
+        lineJoin: int = 0,
+        morph: OptSeq = None,
+        overlay: bool = True,
+        stroke_opacity: float = 1,
+        fill_opacity: float = 1,
+        oc: int = 0,
+        radius=None,
+        ) -> pymupdf.Point:
+    '''
+    Draw a rectangle. See Shape class method for details.
+    '''
+    img = page.new_shape()
+    Q = img.draw_rect(pymupdf.Rect(rect), radius=radius)
+    img.finish(
+        color=color,
+        fill=fill,
+        dashes=dashes,
+        width=width,
+        lineCap=lineCap,
+        lineJoin=lineJoin,
+        morph=morph,
+        stroke_opacity=stroke_opacity,
+        fill_opacity=fill_opacity,
+        oc=oc,
+    )
+    img.commit(overlay)
+
+    return Q
+
+
+def draw_quad(
+    page: pymupdf.Page,
+    quad: quad_like,
+    color: OptSeq = (0,),
+    fill: OptSeq = None,
+    dashes: OptStr = None,
+    width: float = 1,
+    lineCap: int = 0,
+    lineJoin: int = 0,
+    morph: OptSeq = None,
+    overlay: bool = True,
+    stroke_opacity: float = 1,
+    fill_opacity: float = 1,
+    oc: int = 0,
+) -> pymupdf.Point:
+    """Draw a quadrilateral."""
+    img = page.new_shape()
+    Q = img.draw_quad(pymupdf.Quad(quad))
+    img.finish(
+        color=color,
+        fill=fill,
+        dashes=dashes,
+        width=width,
+        lineCap=lineCap,
+        lineJoin=lineJoin,
+        morph=morph,
+        stroke_opacity=stroke_opacity,
+        fill_opacity=fill_opacity,
+        oc=oc,
+    )
+    img.commit(overlay)
+
+    return Q
+
+
+def draw_polyline(
+    page: pymupdf.Page,
+    points: list,
+    color: OptSeq = (0,),
+    fill: OptSeq = None,
+    dashes: OptStr = None,
+    width: float = 1,
+    morph: OptSeq = None,
+    lineCap: int = 0,
+    lineJoin: int = 0,
+    overlay: bool = True,
+    closePath: bool = False,
+    stroke_opacity: float = 1,
+    fill_opacity: float = 1,
+    oc: int = 0,
+) -> pymupdf.Point:
+    """Draw multiple connected line segments."""
+    img = page.new_shape()
+    Q = img.draw_polyline(points)
+    img.finish(
+        color=color,
+        fill=fill,
+        dashes=dashes,
+        width=width,
+        lineCap=lineCap,
+        lineJoin=lineJoin,
+        morph=morph,
+        closePath=closePath,
+        stroke_opacity=stroke_opacity,
+        fill_opacity=fill_opacity,
+        oc=oc,
+    )
+    img.commit(overlay)
+
+    return Q
+
+
+def draw_circle(
+    page: pymupdf.Page,
+    center: point_like,
+    radius: float,
+    color: OptSeq = (0,),
+    fill: OptSeq = None,
+    morph: OptSeq = None,
+    dashes: OptStr = None,
+    width: float = 1,
+    lineCap: int = 0,
+    lineJoin: int = 0,
+    overlay: bool = True,
+    stroke_opacity: float = 1,
+    fill_opacity: float = 1,
+    oc: int = 0,
+) -> pymupdf.Point:
+    """Draw a circle given its center and radius."""
+    img = page.new_shape()
+    Q = img.draw_circle(pymupdf.Point(center), radius)
+    img.finish(
+        color=color,
+        fill=fill,
+        dashes=dashes,
+        width=width,
+        lineCap=lineCap,
+        lineJoin=lineJoin,
+        morph=morph,
+        stroke_opacity=stroke_opacity,
+        fill_opacity=fill_opacity,
+        oc=oc,
+    )
+    img.commit(overlay)
+    return Q
+
+
+def draw_oval(
+    page: pymupdf.Page,
+    rect: typing.Union[rect_like, quad_like],
+    color: OptSeq = (0,),
+    fill: OptSeq = None,
+    dashes: OptStr = None,
+    morph: OptSeq = None,
+    width: float = 1,
+    lineCap: int = 0,
+    lineJoin: int = 0,
+    overlay: bool = True,
+    stroke_opacity: float = 1,
+    fill_opacity: float = 1,
+    oc: int = 0,
+) -> pymupdf.Point:
+    """Draw an oval given its containing rectangle or quad."""
+    img = page.new_shape()
+    Q = img.draw_oval(rect)
+    img.finish(
+        color=color,
+        fill=fill,
+        dashes=dashes,
+        width=width,
+        lineCap=lineCap,
+        lineJoin=lineJoin,
+        morph=morph,
+        stroke_opacity=stroke_opacity,
+        fill_opacity=fill_opacity,
+        oc=oc,
+    )
+    img.commit(overlay)
+
+    return Q
+
+
+def draw_curve(
+    page: pymupdf.Page,
+    p1: point_like,
+    p2: point_like,
+    p3: point_like,
+    color: OptSeq = (0,),
+    fill: OptSeq = None,
+    dashes: OptStr = None,
+    width: float = 1,
+    morph: OptSeq = None,
+    closePath: bool = False,
+    lineCap: int = 0,
+    lineJoin: int = 0,
+    overlay: bool = True,
+    stroke_opacity: float = 1,
+    fill_opacity: float = 1,
+    oc: int = 0,
+) -> pymupdf.Point:
+    """Draw a special Bezier curve from p1 to p3, generating control points on lines p1 to p2 and p2 to p3."""
+    img = page.new_shape()
+    Q = img.draw_curve(pymupdf.Point(p1), pymupdf.Point(p2), pymupdf.Point(p3))
+    img.finish(
+        color=color,
+        fill=fill,
+        dashes=dashes,
+        width=width,
+        lineCap=lineCap,
+        lineJoin=lineJoin,
+        morph=morph,
+        closePath=closePath,
+        stroke_opacity=stroke_opacity,
+        fill_opacity=fill_opacity,
+        oc=oc,
+    )
+    img.commit(overlay)
+
+    return Q
+
+
+def draw_bezier(
+    page: pymupdf.Page,
+    p1: point_like,
+    p2: point_like,
+    p3: point_like,
+    p4: point_like,
+    color: OptSeq = (0,),
+    fill: OptSeq = None,
+    dashes: OptStr = None,
+    width: float = 1,
+    morph: OptStr = None,
+    closePath: bool = False,
+    lineCap: int = 0,
+    lineJoin: int = 0,
+    overlay: bool = True,
+    stroke_opacity: float = 1,
+    fill_opacity: float = 1,
+    oc: int = 0,
+) -> pymupdf.Point:
+    """Draw a general cubic Bezier curve from p1 to p4 using control points p2 and p3."""
+    img = page.new_shape()
+    Q = img.draw_bezier(pymupdf.Point(p1), pymupdf.Point(p2), pymupdf.Point(p3), pymupdf.Point(p4))
+    img.finish(
+        color=color,
+        fill=fill,
+        dashes=dashes,
+        width=width,
+        lineCap=lineCap,
+        lineJoin=lineJoin,
+        morph=morph,
+        closePath=closePath,
+        stroke_opacity=stroke_opacity,
+        fill_opacity=fill_opacity,
+        oc=oc,
+    )
+    img.commit(overlay)
+
+    return Q
+
+
+def draw_sector(
+    page: pymupdf.Page,
+    center: point_like,
+    point: point_like,
+    beta: float,
+    color: OptSeq = (0,),
+    fill: OptSeq = None,
+    dashes: OptStr = None,
+    fullSector: bool = True,
+    morph: OptSeq = None,
+    width: float = 1,
+    closePath: bool = False,
+    lineCap: int = 0,
+    lineJoin: int = 0,
+    overlay: bool = True,
+    stroke_opacity: float = 1,
+    fill_opacity: float = 1,
+    oc: int = 0,
+) -> pymupdf.Point:
+    """Draw a circle sector given circle center, one arc end point and the angle of the arc.
+
+    Parameters:
+        center -- center of circle
+        point -- arc end point
+        beta -- angle of arc (degrees)
+        fullSector -- connect arc ends with center
+    """
+    img = page.new_shape()
+    Q = img.draw_sector(pymupdf.Point(center), pymupdf.Point(point), beta, fullSector=fullSector)
+    img.finish(
+        color=color,
+        fill=fill,
+        dashes=dashes,
+        width=width,
+        lineCap=lineCap,
+        lineJoin=lineJoin,
+        morph=morph,
+        closePath=closePath,
+        stroke_opacity=stroke_opacity,
+        fill_opacity=fill_opacity,
+        oc=oc,
+    )
+    img.commit(overlay)
+
+    return Q
+
+
+# ----------------------------------------------------------------------
+# Name:        wx.lib.colourdb.py
+# Purpose:     Adds a bunch of colour names and RGB values to the
+#              colour database so they can be found by name
+#
+# Author:      Robin Dunn
+#
+# Created:     13-March-2001
+# Copyright:   (c) 2001-2017 by Total Control Software
+# Licence:     wxWindows license
+# Tags:        phoenix-port, unittest, documented
+# ----------------------------------------------------------------------
+
+
+def getColorList() -> list:
+    """
+    Returns a list of upper-case colour names.
+    :rtype: list of strings
+    """
+    return [name for name, r, g, b in pymupdf.colors_wx_list()]
+
+
+def getColorInfoList() -> list:
+    """
+    Returns list of (name, red, gree, blue) tuples, where:
+        name: upper-case color name.
+        read, green, blue: integers in range 0..255.
+    :rtype: list of tuples
+    """
+    return pymupdf.colors_wx_list()
+
+
+def getColor(name: str) -> tuple:
+    """Retrieve RGB color in PDF format by name.
+
+    Returns:
+        a triple of floats in range 0 to 1. In case of name-not-found, "white" is returned.
+    """
+    return pymupdf.colors_pdf_dict().get(name.lower(), (1, 1, 1))
+
+
+def getColorHSV(name: str) -> tuple:
+    """Retrieve the hue, saturation, value triple of a color name.
+
+    Returns:
+        a triple (degree, percent, percent). If not found (-1, -1, -1) is returned.
+    """
+    try:
+        x = getColorInfoList()[getColorList().index(name.upper())]
+    except Exception:
+        if g_exceptions_verbose:    pymupdf.exception_info()
+        return (-1, -1, -1)
+
+    r = x[1] / 255.0
+    g = x[2] / 255.0
+    b = x[3] / 255.0
+    cmax = max(r, g, b)
+    V = round(cmax * 100, 1)
+    cmin = min(r, g, b)
+    delta = cmax - cmin
+    if delta == 0:
+        hue = 0
+    elif cmax == r:
+        hue = 60.0 * (((g - b) / delta) % 6)
+    elif cmax == g:
+        hue = 60.0 * (((b - r) / delta) + 2)
+    else:
+        hue = 60.0 * (((r - g) / delta) + 4)
+
+    H = int(round(hue))
+
+    if cmax == 0:
+        sat = 0
+    else:
+        sat = delta / cmax
+    S = int(round(sat * 100))
+
+    return (H, S, V)
+
+
+def _get_font_properties(doc: pymupdf.Document, xref: int) -> tuple:
+    fontname, ext, stype, buffer = doc.extract_font(xref)
+    asc = 0.8
+    dsc = -0.2
+    if ext == "":
+        return fontname, ext, stype, asc, dsc
+
+    if buffer:
+        try:
+            font = pymupdf.Font(fontbuffer=buffer)
+            asc = font.ascender
+            dsc = font.descender
+            bbox = font.bbox
+            if asc - dsc < 1:
+                if bbox.y0 < dsc:
+                    dsc = bbox.y0
+                asc = 1 - dsc
+        except Exception:
+            pymupdf.exception_info()
+            asc *= 1.2
+            dsc *= 1.2
+        return fontname, ext, stype, asc, dsc
+    if ext != "n/a":
+        try:
+            font = pymupdf.Font(fontname)
+            asc = font.ascender
+            dsc = font.descender
+        except Exception:
+            pymupdf.exception_info()
+            asc *= 1.2
+            dsc *= 1.2
+    else:
+        asc *= 1.2
+        dsc *= 1.2
+    return fontname, ext, stype, asc, dsc
+
+
+def get_char_widths(
+    doc: pymupdf.Document, xref: int, limit: int = 256, idx: int = 0, fontdict: OptDict = None
+) -> list:
+    """Get list of glyph information of a font.
+
+    Notes:
+        Must be provided by its XREF number. If we already dealt with the
+        font, it will be recorded in doc.FontInfos. Otherwise we insert an
+        entry there.
+        Finally we return the glyphs for the font. This is a list of
+        (glyph, width) where glyph is an integer controlling the char
+        appearance, and width is a float controlling the char's spacing:
+        width * fontsize is the actual space.
+        For 'simple' fonts, glyph == ord(char) will usually be true.
+        Exceptions are 'Symbol' and 'ZapfDingbats'. We are providing data for these directly here.
+    """
+    fontinfo = pymupdf.CheckFontInfo(doc, xref)
+    if fontinfo is None:  # not recorded yet: create it
+        if fontdict is None:
+            name, ext, stype, asc, dsc = _get_font_properties(doc, xref)
+            fontdict = {
+                "name": name,
+                "type": stype,
+                "ext": ext,
+                "ascender": asc,
+                "descender": dsc,
+            }
+        else:
+            name = fontdict["name"]
+            ext = fontdict["ext"]
+            stype = fontdict["type"]
+            ordering = fontdict["ordering"]
+            simple = fontdict["simple"]
+
+        if ext == "":
+            raise ValueError("xref is not a font")
+
+        # check for 'simple' fonts
+        if stype in ("Type1", "MMType1", "TrueType"):
+            simple = True
+        else:
+            simple = False
+
+        # check for CJK fonts
+        if name in ("Fangti", "Ming"):
+            ordering = 0
+        elif name in ("Heiti", "Song"):
+            ordering = 1
+        elif name in ("Gothic", "Mincho"):
+            ordering = 2
+        elif name in ("Dotum", "Batang"):
+            ordering = 3
+        else:
+            ordering = -1
+
+        fontdict["simple"] = simple
+
+        if name == "ZapfDingbats":
+            glyphs = pymupdf.zapf_glyphs
+        elif name == "Symbol":
+            glyphs = pymupdf.symbol_glyphs
+        else:
+            glyphs = None
+
+        fontdict["glyphs"] = glyphs
+        fontdict["ordering"] = ordering
+        fontinfo = [xref, fontdict]
+        doc.FontInfos.append(fontinfo)
+    else:
+        fontdict = fontinfo[1]
+        glyphs = fontdict["glyphs"]
+        simple = fontdict["simple"]
+        ordering = fontdict["ordering"]
+
+    if glyphs is None:
+        oldlimit = 0
+    else:
+        oldlimit = len(glyphs)
+
+    mylimit = max(256, limit)
+
+    if mylimit <= oldlimit:
+        return glyphs
+
+    if ordering < 0:  # not a CJK font
+        glyphs = doc._get_char_widths(
+            xref, fontdict["name"], fontdict["ext"], fontdict["ordering"], mylimit, idx
+        )
+    else:  # CJK fonts use char codes and width = 1
+        glyphs = None
+
+    fontdict["glyphs"] = glyphs
+    fontinfo[1] = fontdict
+    pymupdf.UpdateFontInfo(doc, fontinfo)
+
+    return glyphs
+
+
+class Shape:
+    """Create a new shape."""
+
+    @staticmethod
+    def horizontal_angle(C, P):
+        """Return the angle to the horizontal for the connection from C to P.
+        This uses the arcus sine function and resolves its inherent ambiguity by
+        looking up in which quadrant vector S = P - C is located.
+        """
+        S = pymupdf.Point(P - C).unit  # unit vector 'C' -> 'P'
+        alfa = math.asin(abs(S.y))  # absolute angle from horizontal
+        if S.x < 0:  # make arcsin result unique
+            if S.y <= 0:  # bottom-left
+                alfa = -(math.pi - alfa)
+            else:  # top-left
+                alfa = math.pi - alfa
+        else:
+            if S.y >= 0:  # top-right
+                pass
+            else:  # bottom-right
+                alfa = -alfa
+        return alfa
+
+    def __init__(self, page: pymupdf.Page):
+        pymupdf.CheckParent(page)
+        self.page = page
+        self.doc = page.parent
+        if not self.doc.is_pdf:
+            raise ValueError("is no PDF")
+        self.height = page.mediabox_size.y
+        self.width = page.mediabox_size.x
+        self.x = page.cropbox_position.x
+        self.y = page.cropbox_position.y
+
+        self.pctm = page.transformation_matrix  # page transf. matrix
+        self.ipctm = ~self.pctm  # inverted transf. matrix
+
+        self.draw_cont = ""
+        self.text_cont = ""
+        self.totalcont = ""
+        self.last_point = None
+        self.rect = None
+
+    def updateRect(self, x):
+        if self.rect is None:
+            if len(x) == 2:
+                self.rect = pymupdf.Rect(x, x)
+            else:
+                self.rect = pymupdf.Rect(x)
+
+        else:
+            if len(x) == 2:
+                x = pymupdf.Point(x)
+                self.rect.x0 = min(self.rect.x0, x.x)
+                self.rect.y0 = min(self.rect.y0, x.y)
+                self.rect.x1 = max(self.rect.x1, x.x)
+                self.rect.y1 = max(self.rect.y1, x.y)
+            else:
+                x = pymupdf.Rect(x)
+                self.rect.x0 = min(self.rect.x0, x.x0)
+                self.rect.y0 = min(self.rect.y0, x.y0)
+                self.rect.x1 = max(self.rect.x1, x.x1)
+                self.rect.y1 = max(self.rect.y1, x.y1)
+
+    def draw_line(self, p1: point_like, p2: point_like) -> pymupdf.Point:
+        """Draw a line between two points."""
+        p1 = pymupdf.Point(p1)
+        p2 = pymupdf.Point(p2)
+        if not (self.last_point == p1):
+            self.draw_cont += _format_g(pymupdf.JM_TUPLE(p1 * self.ipctm)) + " m\n"
+            self.last_point = p1
+            self.updateRect(p1)
+
+        self.draw_cont += _format_g(pymupdf.JM_TUPLE(p2 * self.ipctm)) + " l\n"
+        self.updateRect(p2)
+        self.last_point = p2
+        return self.last_point
+
+    def draw_polyline(self, points: list) -> pymupdf.Point:
+        """Draw several connected line segments."""
+        for i, p in enumerate(points):
+            if i == 0:
+                if not (self.last_point == pymupdf.Point(p)):
+                    self.draw_cont += _format_g(pymupdf.JM_TUPLE(pymupdf.Point(p) * self.ipctm)) + " m\n"
+                    self.last_point = pymupdf.Point(p)
+            else:
+                self.draw_cont += _format_g(pymupdf.JM_TUPLE(pymupdf.Point(p) * self.ipctm)) + " l\n"
+            self.updateRect(p)
+
+        self.last_point = pymupdf.Point(points[-1])
+        return self.last_point
+
+    def draw_bezier(
+        self,
+        p1: point_like,
+        p2: point_like,
+        p3: point_like,
+        p4: point_like,
+    ) -> pymupdf.Point:
+        """Draw a standard cubic Bezier curve."""
+        p1 = pymupdf.Point(p1)
+        p2 = pymupdf.Point(p2)
+        p3 = pymupdf.Point(p3)
+        p4 = pymupdf.Point(p4)
+        if not (self.last_point == p1):
+            self.draw_cont += _format_g(pymupdf.JM_TUPLE(p1 * self.ipctm)) + " m\n"
+        args = pymupdf.JM_TUPLE(list(p2 * self.ipctm) + list(p3 * self.ipctm) + list(p4 * self.ipctm))
+        self.draw_cont += _format_g(args) + " c\n"
+        self.updateRect(p1)
+        self.updateRect(p2)
+        self.updateRect(p3)
+        self.updateRect(p4)
+        self.last_point = p4
+        return self.last_point
+
+    def draw_oval(self, tetra: typing.Union[quad_like, rect_like]) -> pymupdf.Point:
+        """Draw an ellipse inside a tetrapod."""
+        if len(tetra) != 4:
+            raise ValueError("invalid arg length")
+        if hasattr(tetra[0], "__float__"):
+            q = pymupdf.Rect(tetra).quad
+        else:
+            q = pymupdf.Quad(tetra)
+
+        mt = q.ul + (q.ur - q.ul) * 0.5
+        mr = q.ur + (q.lr - q.ur) * 0.5
+        mb = q.ll + (q.lr - q.ll) * 0.5
+        ml = q.ul + (q.ll - q.ul) * 0.5
+        if not (self.last_point == ml):
+            self.draw_cont += _format_g(pymupdf.JM_TUPLE(ml * self.ipctm)) + " m\n"
+            self.last_point = ml
+        self.draw_curve(ml, q.ll, mb)
+        self.draw_curve(mb, q.lr, mr)
+        self.draw_curve(mr, q.ur, mt)
+        self.draw_curve(mt, q.ul, ml)
+        self.updateRect(q.rect)
+        self.last_point = ml
+        return self.last_point
+
+    def draw_circle(self, center: point_like, radius: float) -> pymupdf.Point:
+        """Draw a circle given its center and radius."""
+        if not radius > pymupdf.EPSILON:
+            raise ValueError("radius must be positive")
+        center = pymupdf.Point(center)
+        p1 = center - (radius, 0)
+        return self.draw_sector(center, p1, 360, fullSector=False)
+
+    def draw_curve(
+        self,
+        p1: point_like,
+        p2: point_like,
+        p3: point_like,
+    ) -> pymupdf.Point:
+        """Draw a curve between points using one control point."""
+        kappa = 0.55228474983
+        p1 = pymupdf.Point(p1)
+        p2 = pymupdf.Point(p2)
+        p3 = pymupdf.Point(p3)
+        k1 = p1 + (p2 - p1) * kappa
+        k2 = p3 + (p2 - p3) * kappa
+        return self.draw_bezier(p1, k1, k2, p3)
+
+    def draw_sector(
+        self,
+        center: point_like,
+        point: point_like,
+        beta: float,
+        fullSector: bool = True,
+    ) -> pymupdf.Point:
+        """Draw a circle sector."""
+        center = pymupdf.Point(center)
+        point = pymupdf.Point(point)
+        l3 = lambda a, b: _format_g((a, b)) + " m\n"
+        l4 = lambda a, b, c, d, e, f: _format_g((a, b, c, d, e, f)) + " c\n"
+        l5 = lambda a, b: _format_g((a, b)) + " l\n"
+        betar = math.radians(-beta)
+        w360 = math.radians(math.copysign(360, betar)) * (-1)
+        w90 = math.radians(math.copysign(90, betar))
+        w45 = w90 / 2
+        while abs(betar) > 2 * math.pi:
+            betar += w360  # bring angle below 360 degrees
+        if not (self.last_point == point):
+            self.draw_cont += l3(*pymupdf.JM_TUPLE(point * self.ipctm))
+            self.last_point = point
+        Q = pymupdf.Point(0, 0)  # just make sure it exists
+        C = center
+        P = point
+        S = P - C  # vector 'center' -> 'point'
+        rad = abs(S)  # circle radius
+
+        if not rad > pymupdf.EPSILON:
+            raise ValueError("radius must be positive")
+
+        alfa = self.horizontal_angle(center, point)
+        while abs(betar) > abs(w90):  # draw 90 degree arcs
+            q1 = C.x + math.cos(alfa + w90) * rad
+            q2 = C.y + math.sin(alfa + w90) * rad
+            Q = pymupdf.Point(q1, q2)  # the arc's end point
+            r1 = C.x + math.cos(alfa + w45) * rad / math.cos(w45)
+            r2 = C.y + math.sin(alfa + w45) * rad / math.cos(w45)
+            R = pymupdf.Point(r1, r2)  # crossing point of tangents
+            kappah = (1 - math.cos(w45)) * 4 / 3 / abs(R - Q)
+            kappa = kappah * abs(P - Q)
+            cp1 = P + (R - P) * kappa  # control point 1
+            cp2 = Q + (R - Q) * kappa  # control point 2
+            self.draw_cont += l4(*pymupdf.JM_TUPLE(
+                list(cp1 * self.ipctm) + list(cp2 * self.ipctm) + list(Q * self.ipctm)
+            ))
+
+            betar -= w90  # reduce param angle by 90 deg
+            alfa += w90  # advance start angle by 90 deg
+            P = Q  # advance to arc end point
+        # draw (remaining) arc
+        if abs(betar) > 1e-3:  # significant degrees left?
+            beta2 = betar / 2
+            q1 = C.x + math.cos(alfa + betar) * rad
+            q2 = C.y + math.sin(alfa + betar) * rad
+            Q = pymupdf.Point(q1, q2)  # the arc's end point
+            r1 = C.x + math.cos(alfa + beta2) * rad / math.cos(beta2)
+            r2 = C.y + math.sin(alfa + beta2) * rad / math.cos(beta2)
+            R = pymupdf.Point(r1, r2)  # crossing point of tangents
+            # kappa height is 4/3 of segment height
+            kappah = (1 - math.cos(beta2)) * 4 / 3 / abs(R - Q)  # kappa height
+            kappa = kappah * abs(P - Q) / (1 - math.cos(betar))
+            cp1 = P + (R - P) * kappa  # control point 1
+            cp2 = Q + (R - Q) * kappa  # control point 2
+            self.draw_cont += l4(*pymupdf.JM_TUPLE(
+                list(cp1 * self.ipctm) + list(cp2 * self.ipctm) + list(Q * self.ipctm)
+            ))
+        if fullSector:
+            self.draw_cont += l3(*pymupdf.JM_TUPLE(point * self.ipctm))
+            self.draw_cont += l5(*pymupdf.JM_TUPLE(center * self.ipctm))
+            self.draw_cont += l5(*pymupdf.JM_TUPLE(Q * self.ipctm))
+        self.last_point = Q
+        return self.last_point
+
+    def draw_rect(self, rect: rect_like, *, radius=None) -> pymupdf.Point:
+        """Draw a rectangle.
+
+        Args:
+            radius: if not None, the rectangle will have rounded corners.
+                This is the radius of the curvature, given as percentage of
+                the rectangle width or height. Valid are values 0 < v <= 0.5.
+                For a sequence of two values, the corners will have different
+                radii. Otherwise, the percentage will be computed from the
+                shorter side. A value of (0.5, 0.5) will draw an ellipse.
+        """
+        r = pymupdf.Rect(rect)
+        if radius is None:  # standard rectangle
+            self.draw_cont += _format_g(pymupdf.JM_TUPLE(
+                list(r.bl * self.ipctm) + [r.width, r.height]
+            )) + " re\n"
+            self.updateRect(r)
+            self.last_point = r.tl
+            return self.last_point
+        # rounded corners requested. This requires 1 or 2 values, each
+        # with 0 < value <= 0.5
+        if hasattr(radius, "__float__"):
+            if radius <= 0 or radius > 0.5:
+                raise ValueError(f"bad radius value {radius}.")
+            d = min(r.width, r.height) * radius
+            px = (d, 0)
+            py = (0, d)
+        elif hasattr(radius, "__len__") and len(radius) == 2:
+            rx, ry = radius
+            px = (rx * r.width, 0)
+            py = (0, ry * r.height)
+            if min(rx, ry) <= 0 or max(rx, ry) > 0.5:
+                raise ValueError(f"bad radius value {radius}.")
+        else:
+            raise ValueError(f"bad radius value {radius}.")
+
+        lp = self.draw_line(r.tl + py, r.bl - py)
+        lp = self.draw_curve(lp, r.bl, r.bl + px)
+
+        lp = self.draw_line(lp, r.br - px)
+        lp = self.draw_curve(lp, r.br, r.br - py)
+
+        lp = self.draw_line(lp, r.tr + py)
+        lp = self.draw_curve(lp, r.tr, r.tr - px)
+
+        lp = self.draw_line(lp, r.tl + px)
+        self.last_point = self.draw_curve(lp, r.tl, r.tl + py)
+
+        self.updateRect(r)
+        return self.last_point
+
+    def draw_quad(self, quad: quad_like) -> pymupdf.Point:
+        """Draw a Quad."""
+        q = pymupdf.Quad(quad)
+        return self.draw_polyline([q.ul, q.ll, q.lr, q.ur, q.ul])
+
+    def draw_zigzag(
+        self,
+        p1: point_like,
+        p2: point_like,
+        breadth: float = 2,
+    ) -> pymupdf.Point:
+        """Draw a zig-zagged line from p1 to p2."""
+        p1 = pymupdf.Point(p1)
+        p2 = pymupdf.Point(p2)
+        S = p2 - p1  # vector start - end
+        rad = abs(S)  # distance of points
+        cnt = 4 * int(round(rad / (4 * breadth), 0))  # always take full phases
+        if cnt < 4:
+            raise ValueError("points too close")
+        mb = rad / cnt  # revised breadth
+        matrix = pymupdf.Matrix(pymupdf.util_hor_matrix(p1, p2))  # normalize line to x-axis
+        i_mat = ~matrix  # get original position
+        points = []  # stores edges
+        for i in range(1, cnt):
+            if i % 4 == 1:  # point "above" connection
+                p = pymupdf.Point(i, -1) * mb
+            elif i % 4 == 3:  # point "below" connection
+                p = pymupdf.Point(i, 1) * mb
+            else:  # ignore others
+                continue
+            points.append(p * i_mat)
+        self.draw_polyline([p1] + points + [p2])  # add start and end points
+        return p2
+
+    def draw_squiggle(
+        self,
+        p1: point_like,
+        p2: point_like,
+        breadth=2,
+    ) -> pymupdf.Point:
+        """Draw a squiggly line from p1 to p2."""
+        p1 = pymupdf.Point(p1)
+        p2 = pymupdf.Point(p2)
+        S = p2 - p1  # vector start - end
+        rad = abs(S)  # distance of points
+        cnt = 4 * int(round(rad / (4 * breadth), 0))  # always take full phases
+        if cnt < 4:
+            raise ValueError("points too close")
+        mb = rad / cnt  # revised breadth
+        matrix = pymupdf.Matrix(pymupdf.util_hor_matrix(p1, p2))  # normalize line to x-axis
+        i_mat = ~matrix  # get original position
+        k = 2.4142135623765633  # y of draw_curve helper point
+
+        points = []  # stores edges
+        for i in range(1, cnt):
+            if i % 4 == 1:  # point "above" connection
+                p = pymupdf.Point(i, -k) * mb
+            elif i % 4 == 3:  # point "below" connection
+                p = pymupdf.Point(i, k) * mb
+            else:  # else on connection line
+                p = pymupdf.Point(i, 0) * mb
+            points.append(p * i_mat)
+
+        points = [p1] + points + [p2]
+        cnt = len(points)
+        i = 0
+        while i + 2 < cnt:
+            self.draw_curve(points[i], points[i + 1], points[i + 2])
+            i += 2
+        return p2
+
+    # ==============================================================================
+    # Shape.insert_text
+    # ==============================================================================
+    def insert_text(
+        self,
+        point: point_like,
+        buffer: typing.Union[str, list],
+        *,
+        fontsize: float = 11,
+        lineheight: OptFloat = None,
+        fontname: str = "helv",
+        fontfile: OptStr = None,
+        set_simple: bool = 0,
+        encoding: int = 0,
+        color: OptSeq = None,
+        fill: OptSeq = None,
+        render_mode: int = 0,
+        border_width: float = 0.05,
+        miter_limit: float = 1,
+        rotate: int = 0,
+        morph: OptSeq = None,
+        stroke_opacity: float = 1,
+        fill_opacity: float = 1,
+        oc: int = 0,
+    ) -> int:
+
+        # ensure 'text' is a list of strings, worth dealing with
+        if not bool(buffer):
+            return 0
+
+        if type(buffer) not in (list, tuple):
+            text = buffer.splitlines()
+        else:
+            text = buffer
+
+        if not len(text) > 0:
+            return 0
+
+        point = pymupdf.Point(point)
+        try:
+            maxcode = max([ord(c) for c in " ".join(text)])
+        except Exception:
+            pymupdf.exception_info()
+            return 0
+
+        # ensure valid 'fontname'
+        fname = fontname
+        if fname.startswith("/"):
+            fname = fname[1:]
+
+        xref = self.page.insert_font(
+            fontname=fname, fontfile=fontfile, encoding=encoding, set_simple=set_simple
+        )
+        fontinfo = pymupdf.CheckFontInfo(self.doc, xref)
+
+        fontdict = fontinfo[1]
+        ordering = fontdict["ordering"]
+        simple = fontdict["simple"]
+        bfname = fontdict["name"]
+        ascender = fontdict["ascender"]
+        descender = fontdict["descender"]
+        if lineheight:
+            lheight = fontsize * lineheight
+        elif ascender - descender <= 1:
+            lheight = fontsize * 1.2
+        else:
+            lheight = fontsize * (ascender - descender)
+
+        if maxcode > 255:
+            glyphs = self.doc.get_char_widths(xref, maxcode + 1)
+        else:
+            glyphs = fontdict["glyphs"]
+
+        tab = []
+        for t in text:
+            if simple and bfname not in ("Symbol", "ZapfDingbats"):
+                g = None
+            else:
+                g = glyphs
+            tab.append(pymupdf.getTJstr(t, g, simple, ordering))
+        text = tab
+
+        color_str = pymupdf.ColorCode(color, "c")
+        fill_str = pymupdf.ColorCode(fill, "f")
+        if not fill and render_mode == 0:  # ensure fill color when 0 Tr
+            fill = color
+            fill_str = pymupdf.ColorCode(color, "f")
+
+        morphing = pymupdf.CheckMorph(morph)
+        rot = rotate
+        if rot % 90 != 0:
+            raise ValueError("bad rotate value")
+
+        while rot < 0:
+            rot += 360
+        rot = rot % 360  # text rotate = 0, 90, 270, 180
+
+        templ1 = lambda a, b, c, d, e, f, g: f"\nq\n{a}{b}BT\n{c}1 0 0 1 {_format_g((d, e))} Tm\n/{f} {_format_g(g)} Tf "
+        templ2 = lambda a: f"TJ\n0 -{_format_g(a)} TD\n"
+        cmp90 = "0 1 -1 0 0 0 cm\n"  # rotates 90 deg counter-clockwise
+        cmm90 = "0 -1 1 0 0 0 cm\n"  # rotates 90 deg clockwise
+        cm180 = "-1 0 0 -1 0 0 cm\n"  # rotates by 180 deg.
+        height = self.height
+        width = self.width
+
+        # setting up for standard rotation directions
+        # case rotate = 0
+        if morphing:
+            m1 = pymupdf.Matrix(1, 0, 0, 1, morph[0].x + self.x, height - morph[0].y - self.y)
+            mat = ~m1 * morph[1] * m1
+            cm = _format_g(pymupdf.JM_TUPLE(mat)) + " cm\n"
+        else:
+            cm = ""
+        top = height - point.y - self.y  # start of 1st char
+        left = point.x + self.x  # start of 1. char
+        space = top  # space available
+        #headroom = point.y + self.y  # distance to page border
+        if rot == 90:
+            left = height - point.y - self.y
+            top = -point.x - self.x
+            cm += cmp90
+            space = width - abs(top)
+            #headroom = point.x + self.x
+
+        elif rot == 270:
+            left = -height + point.y + self.y
+            top = point.x + self.x
+            cm += cmm90
+            space = abs(top)
+            #headroom = width - point.x - self.x
+
+        elif rot == 180:
+            left = -point.x - self.x
+            top = -height + point.y + self.y
+            cm += cm180
+            space = abs(point.y + self.y)
+            #headroom = height - point.y - self.y
+
+        optcont = self.page._get_optional_content(oc)
+        if optcont is not None:
+            bdc = "/OC /%s BDC\n" % optcont
+            emc = "EMC\n"
+        else:
+            bdc = emc = ""
+
+        alpha = self.page._set_opacity(CA=stroke_opacity, ca=fill_opacity)
+        if alpha is None:
+            alpha = ""
+        else:
+            alpha = "/%s gs\n" % alpha
+        nres = templ1(bdc, alpha, cm, left, top, fname, fontsize)
+
+        if render_mode > 0:
+            nres += "%i Tr " % render_mode
+            nres += _format_g(border_width * fontsize) + " w "
+            if miter_limit is not None:
+                nres += _format_g(miter_limit) + " M "
+        if color is not None:
+            nres += color_str
+        if fill is not None:
+            nres += fill_str
+
+        # =========================================================================
+        #   start text insertion
+        # =========================================================================
+        nres += text[0]
+        nlines = 1  # set output line counter
+        if len(text) > 1:
+            nres += templ2(lheight)  # line 1
+        else:
+            nres += 'TJ'
+        for i in range(1, len(text)):
+            if space < lheight:
+                break  # no space left on page
+            if i > 1:
+                nres += "\nT* "
+            nres += text[i] + 'TJ'
+            space -= lheight
+            nlines += 1
+
+        nres += "\nET\n%sQ\n" % emc
+
+        # =========================================================================
+        #   end of text insertion
+        # =========================================================================
+        # update the /Contents object
+        self.text_cont += nres
+        return nlines
+
+    # ==============================================================================
+    # Shape.insert_textbox
+    # ==============================================================================
+    def insert_textbox(
+        self,
+        rect: rect_like,
+        buffer: typing.Union[str, list],
+        *,
+        fontname: OptStr = "helv",
+        fontfile: OptStr = None,
+        fontsize: float = 11,
+        lineheight: OptFloat = None,
+        set_simple: bool = 0,
+        encoding: int = 0,
+        color: OptSeq = None,
+        fill: OptSeq = None,
+        expandtabs: int = 1,
+        border_width: float = 0.05,
+        miter_limit: float = 1,
+        align: int = 0,
+        render_mode: int = 0,
+        rotate: int = 0,
+        morph: OptSeq = None,
+        stroke_opacity: float = 1,
+        fill_opacity: float = 1,
+        oc: int = 0,
+    ) -> float:
+        """Insert text into a given rectangle.
+
+        Args:
+            rect -- the textbox to fill
+            buffer -- text to be inserted
+            fontname -- a Base-14 font, font name or '/name'
+            fontfile -- name of a font file
+            fontsize -- font size
+            lineheight -- overwrite the font property
+            color -- RGB stroke color triple
+            fill -- RGB fill color triple
+            render_mode -- text rendering control
+            border_width -- thickness of glyph borders as percentage of fontsize
+            expandtabs -- handles tabulators with string function
+            align -- left, center, right, justified
+            rotate -- 0, 90, 180, or 270 degrees
+            morph -- morph box with a matrix and a fixpoint
+        Returns:
+            unused or deficit rectangle area (float)
+        """
+        rect = pymupdf.Rect(rect)
+        if rect.is_empty or rect.is_infinite:
+            raise ValueError("text box must be finite and not empty")
+
+        color_str = pymupdf.ColorCode(color, "c")
+        fill_str = pymupdf.ColorCode(fill, "f")
+        if fill is None and render_mode == 0:  # ensure fill color for 0 Tr
+            fill = color
+            fill_str = pymupdf.ColorCode(color, "f")
+
+        optcont = self.page._get_optional_content(oc)
+        if optcont is not None:
+            bdc = "/OC /%s BDC\n" % optcont
+            emc = "EMC\n"
+        else:
+            bdc = emc = ""
+
+        # determine opacity / transparency
+        alpha = self.page._set_opacity(CA=stroke_opacity, ca=fill_opacity)
+        if alpha is None:
+            alpha = ""
+        else:
+            alpha = "/%s gs\n" % alpha
+
+        if rotate % 90 != 0:
+            raise ValueError("rotate must be multiple of 90")
+
+        rot = rotate
+        while rot < 0:
+            rot += 360
+        rot = rot % 360
+
+        # is buffer worth of dealing with?
+        if not bool(buffer):
+            return rect.height if rot in (0, 180) else rect.width
+
+        cmp90 = "0 1 -1 0 0 0 cm\n"  # rotates counter-clockwise
+        cmm90 = "0 -1 1 0 0 0 cm\n"  # rotates clockwise
+        cm180 = "-1 0 0 -1 0 0 cm\n"  # rotates by 180 deg.
+        height = self.height
+
+        fname = fontname
+        if fname.startswith("/"):
+            fname = fname[1:]
+
+        xref = self.page.insert_font(
+            fontname=fname, fontfile=fontfile, encoding=encoding, set_simple=set_simple
+        )
+        fontinfo = pymupdf.CheckFontInfo(self.doc, xref)
+
+        fontdict = fontinfo[1]
+        ordering = fontdict["ordering"]
+        simple = fontdict["simple"]
+        glyphs = fontdict["glyphs"]
+        bfname = fontdict["name"]
+        ascender = fontdict["ascender"]
+        descender = fontdict["descender"]
+
+        if lineheight:
+            lheight_factor = lineheight
+        elif ascender - descender <= 1:
+            lheight_factor = 1.2
+        else:
+            lheight_factor = ascender - descender
+        lheight = fontsize * lheight_factor
+
+        # create a list from buffer, split into its lines
+        if type(buffer) in (list, tuple):
+            t0 = "\n".join(buffer)
+        else:
+            t0 = buffer
+
+        maxcode = max([ord(c) for c in t0])
+        # replace invalid char codes for simple fonts
+        if simple and maxcode > 255:
+            t0 = "".join([c if ord(c) < 256 else "?" for c in t0])
+
+        t0 = t0.splitlines()
+
+        glyphs = self.doc.get_char_widths(xref, maxcode + 1)
+        if simple and bfname not in ("Symbol", "ZapfDingbats"):
+            tj_glyphs = None
+        else:
+            tj_glyphs = glyphs
+
+        # ----------------------------------------------------------------------
+        # calculate pixel length of a string
+        # ----------------------------------------------------------------------
+        def pixlen(x):
+            """Calculate pixel length of x."""
+            if ordering < 0:
+                return sum([glyphs[ord(c)][1] for c in x]) * fontsize
+            else:
+                return len(x) * fontsize
+
+        # ---------------------------------------------------------------------
+
+        if ordering < 0:
+            blen = glyphs[32][1] * fontsize  # pixel size of space character
+        else:
+            blen = fontsize
+
+        text = ""  # output buffer
+
+        if pymupdf.CheckMorph(morph):
+            m1 = pymupdf.Matrix(
+                1, 0, 0, 1, morph[0].x + self.x, self.height - morph[0].y - self.y
+            )
+            mat = ~m1 * morph[1] * m1
+            cm = _format_g(pymupdf.JM_TUPLE(mat)) + " cm\n"
+        else:
+            cm = ""
+
+        # ---------------------------------------------------------------------
+        # adjust for text orientation / rotation
+        # ---------------------------------------------------------------------
+        progr = 1  # direction of line progress
+        c_pnt = pymupdf.Point(0, fontsize * ascender)  # used for line progress
+        if rot == 0:  # normal orientation
+            point = rect.tl + c_pnt  # line 1 is 'lheight' below top
+            maxwidth = rect.width  # pixels available in one line
+            maxheight = rect.height  # available text height
+
+        elif rot == 90:  # rotate counter clockwise
+            c_pnt = pymupdf.Point(fontsize * ascender, 0)  # progress in x-direction
+            point = rect.bl + c_pnt  # line 1 'lheight' away from left
+            maxwidth = rect.height  # pixels available in one line
+            maxheight = rect.width  # available text height
+            cm += cmp90
+
+        elif rot == 180:  # text upside down
+            # progress upwards in y direction
+            c_pnt = -pymupdf.Point(0, fontsize * ascender)
+            point = rect.br + c_pnt  # line 1 'lheight' above bottom
+            maxwidth = rect.width  # pixels available in one line
+            progr = -1  # subtract lheight for next line
+            maxheight =rect.height  # available text height
+            cm += cm180
+
+        else:  # rotate clockwise (270 or -90)
+            # progress from right to left
+            c_pnt = -pymupdf.Point(fontsize * ascender, 0)
+            point = rect.tr + c_pnt  # line 1 'lheight' left of right
+            maxwidth = rect.height  # pixels available in one line
+            progr = -1  # subtract lheight for next line
+            maxheight = rect.width  # available text height
+            cm += cmm90
+
+        # =====================================================================
+        # line loop
+        # =====================================================================
+        just_tab = []  # 'justify' indicators per line
+
+        for i, line in enumerate(t0):
+            line_t = line.expandtabs(expandtabs).split(" ")  # split into words
+            num_words = len(line_t)
+            lbuff = ""  # init line buffer
+            rest = maxwidth  # available line pixels
+            # =================================================================
+            # word loop
+            # =================================================================
+            for j in range(num_words):
+                word = line_t[j]
+                pl_w = pixlen(word)  # pixel len of word
+                if rest >= pl_w:  # does it fit on the line?
+                    lbuff += word + " "  # yes, append word
+                    rest -= pl_w + blen  # update available line space
+                    continue  # next word
+
+                # word doesn't fit - output line (if not empty)
+                if lbuff:
+                    lbuff = lbuff.rstrip() + "\n"  # line full, append line break
+                    text += lbuff  # append to total text
+                    just_tab.append(True)  # can align-justify
+
+                lbuff = ""  # re-init line buffer
+                rest = maxwidth  # re-init avail. space
+
+                if pl_w <= maxwidth:  # word shorter than 1 line?
+                    lbuff = word + " "  # start the line with it
+                    rest = maxwidth - pl_w - blen  # update free space
+                    continue
+
+                # long word: split across multiple lines - char by char ...
+                if len(just_tab) > 0:
+                    just_tab[-1] = False  # cannot align-justify
+                for c in word:
+                    if pixlen(lbuff) <= maxwidth - pixlen(c):
+                        lbuff += c
+                    else:  # line full
+                        lbuff += "\n"  # close line
+                        text += lbuff  # append to text
+                        just_tab.append(False)  # cannot align-justify
+                        lbuff = c  # start new line with this char
+
+                lbuff += " "  # finish long word
+                rest = maxwidth - pixlen(lbuff)  # long word stored
+
+            if lbuff:  # unprocessed line content?
+                text += lbuff.rstrip()  # append to text
+                just_tab.append(False)  # cannot align-justify
+
+            if i < len(t0) - 1:  # not the last line?
+                text += "\n"  # insert line break
+
+        # compute used part of the textbox
+        if text.endswith("\n"):
+            text = text[:-1]
+        lb_count = text.count("\n") + 1  # number of lines written
+
+        # text height = line count * line height plus one descender value
+        text_height = lheight * lb_count - descender * fontsize
+
+        more = text_height - maxheight  # difference to height limit
+        if more > pymupdf.EPSILON:  # landed too much outside rect
+            return (-1) * more  # return deficit, don't output
+
+        more = abs(more)
+        if more < pymupdf.EPSILON:
+            more = 0  # don't bother with epsilons
+        nres = "\nq\n%s%sBT\n" % (bdc, alpha) + cm  # initialize output buffer
+        templ = lambda a, b, c, d: f"1 0 0 1 {_format_g((a, b))} Tm /{c} {_format_g(d)} Tf "
+        # center, right, justify: output each line with its own specifics
+        text_t = text.splitlines()  # split text in lines again
+        just_tab[-1] = False  # never justify last line
+        for i, t in enumerate(text_t):
+            spacing = 0
+            pl = maxwidth - pixlen(t)  # length of empty line part
+            pnt = point + c_pnt * (i * lheight_factor)  # text start of line
+            if align == 1:  # center: right shift by half width
+                if rot in (0, 180):
+                    pnt = pnt + pymupdf.Point(pl / 2, 0) * progr
+                else:
+                    pnt = pnt - pymupdf.Point(0, pl / 2) * progr
+            elif align == 2:  # right: right shift by full width
+                if rot in (0, 180):
+                    pnt = pnt + pymupdf.Point(pl, 0) * progr
+                else:
+                    pnt = pnt - pymupdf.Point(0, pl) * progr
+            elif align == 3:  # justify
+                spaces = t.count(" ")  # number of spaces in line
+                if spaces > 0 and just_tab[i]:  # if any, and we may justify
+                    spacing = pl / spaces  # make every space this much larger
+                else:
+                    spacing = 0  # keep normal space length
+            top = height - pnt.y - self.y
+            left = pnt.x + self.x
+            if rot == 90:
+                left = height - pnt.y - self.y
+                top = -pnt.x - self.x
+            elif rot == 270:
+                left = -height + pnt.y + self.y
+                top = pnt.x + self.x
+            elif rot == 180:
+                left = -pnt.x - self.x
+                top = -height + pnt.y + self.y
+
+            nres += templ(left, top, fname, fontsize)
+
+            if render_mode > 0:
+                nres += "%i Tr " % render_mode
+                nres += _format_g(border_width * fontsize) + " w "
+                if miter_limit is not None:
+                    nres += _format_g(miter_limit) + " M "
+
+            if align == 3:
+                nres += _format_g(spacing) + " Tw "
+
+            if color is not None:
+                nres += color_str
+            if fill is not None:
+                nres += fill_str
+            nres += "%sTJ\n" % pymupdf.getTJstr(t, tj_glyphs, simple, ordering)
+
+        nres += "ET\n%sQ\n" % emc
+
+        self.text_cont += nres
+        self.updateRect(rect)
+        return more
+
+    def finish(
+        self,
+        width: float = 1,
+        color: OptSeq = (0,),
+        fill: OptSeq = None,
+        lineCap: int = 0,
+        lineJoin: int = 0,
+        dashes: OptStr = None,
+        even_odd: bool = False,
+        morph: OptSeq = None,
+        closePath: bool = True,
+        fill_opacity: float = 1,
+        stroke_opacity: float = 1,
+        oc: int = 0,
+    ) -> None:
+        """Finish the current drawing segment.
+
+        Notes:
+            Apply colors, opacity, dashes, line style and width, or
+            morphing. Also whether to close the path
+            by connecting last to first point.
+        """
+        if self.draw_cont == "":  # treat empty contents as no-op
+            return
+
+        if width == 0:  # border color makes no sense then
+            color = None
+        elif color is None:  # vice versa
+            width = 0
+        # if color == None and fill == None:
+        #     raise ValueError("at least one of 'color' or 'fill' must be given")
+        color_str = pymupdf.ColorCode(color, "c")  # ensure proper color string
+        fill_str = pymupdf.ColorCode(fill, "f")  # ensure proper fill string
+
+        optcont = self.page._get_optional_content(oc)
+        if optcont is not None:
+            self.draw_cont = "/OC /%s BDC\n" % optcont + self.draw_cont
+            emc = "EMC\n"
+        else:
+            emc = ""
+
+        alpha = self.page._set_opacity(CA=stroke_opacity, ca=fill_opacity)
+        if alpha is not None:
+            self.draw_cont = "/%s gs\n" % alpha + self.draw_cont
+
+        if width != 1 and width != 0:
+            self.draw_cont += _format_g(width) + " w\n"
+
+        if lineCap != 0:
+            self.draw_cont = "%i J\n" % lineCap + self.draw_cont
+        if lineJoin != 0:
+            self.draw_cont = "%i j\n" % lineJoin + self.draw_cont
+
+        if dashes not in (None, "", "[] 0"):
+            self.draw_cont = "%s d\n" % dashes + self.draw_cont
+
+        if closePath:
+            self.draw_cont += "h\n"
+            self.last_point = None
+
+        if color is not None:
+            self.draw_cont += color_str
+
+        if fill is not None:
+            self.draw_cont += fill_str
+            if color is not None:
+                if not even_odd:
+                    self.draw_cont += "B\n"
+                else:
+                    self.draw_cont += "B*\n"
+            else:
+                if not even_odd:
+                    self.draw_cont += "f\n"
+                else:
+                    self.draw_cont += "f*\n"
+        else:
+            self.draw_cont += "S\n"
+
+        self.draw_cont += emc
+        if pymupdf.CheckMorph(morph):
+            m1 = pymupdf.Matrix(
+                1, 0, 0, 1, morph[0].x + self.x, self.height - morph[0].y - self.y
+            )
+            mat = ~m1 * morph[1] * m1
+            self.draw_cont = _format_g(pymupdf.JM_TUPLE(mat)) + " cm\n" + self.draw_cont
+
+        self.totalcont += "\nq\n" + self.draw_cont + "Q\n"
+        self.draw_cont = ""
+        self.last_point = None
+        return
+
+    def commit(self, overlay: bool = True) -> None:
+        """Update the page's /Contents object with Shape data.
+
+        The argument controls whether data appear in foreground (default)
+        or background.
+        """
+        pymupdf.CheckParent(self.page)  # doc may have died meanwhile
+        self.totalcont += self.text_cont
+        self.totalcont = self.totalcont.encode()
+
+        if self.totalcont:
+            if overlay:
+                self.page.wrap_contents()  # ensure a balanced graphics state
+            # make /Contents object with dummy stream
+            xref = pymupdf.TOOLS._insert_contents(self.page, b" ", overlay)
+            # update it with potential compression
+            self.doc.update_stream(xref, self.totalcont)
+
+        self.last_point = None  # clean up ...
+        self.rect = None  #
+        self.draw_cont = ""  # for potential ...
+        self.text_cont = ""  # ...
+        self.totalcont = ""  # re-use
+
+
+def apply_redactions(
+    page: pymupdf.Page, images: int = 2, graphics: int = 1, text: int = 0
+) -> bool:
+    """Apply the redaction annotations of the page.
+
+    Args:
+        page: the PDF page.
+        images:
+              0 - ignore images
+              1 - remove all overlapping images
+              2 - blank out overlapping image parts
+              3 - remove image unless invisible
+        graphics:
+              0 - ignore graphics
+              1 - remove graphics if contained in rectangle
+              2 - remove all overlapping graphics
+        text:
+              0 - remove text
+              1 - ignore text
+    """
+
+    def center_rect(annot_rect, new_text, font, fsize):
+        """Calculate minimal sub-rectangle for the overlay text.
+
+        Notes:
+            Because 'insert_textbox' supports no vertical text centering,
+            we calculate an approximate number of lines here and return a
+            sub-rect with smaller height, which should still be sufficient.
+        Args:
+            annot_rect: the annotation rectangle
+            new_text: the text to insert.
+            font: the fontname. Must be one of the CJK or Base-14 set, else
+                the rectangle is returned unchanged.
+            fsize: the fontsize
+        Returns:
+            A rectangle to use instead of the annot rectangle.
+        """
+        if not new_text or annot_rect.width <= pymupdf.EPSILON:
+            return annot_rect
+        try:
+            text_width = pymupdf.get_text_length(new_text, font, fsize)
+        except (ValueError, mupdf.FzErrorBase):  # unsupported font
+            if g_exceptions_verbose:
+                pymupdf.exception_info()
+            return annot_rect
+        line_height = fsize * 1.2
+        limit = annot_rect.width
+        h = math.ceil(text_width / limit) * line_height  # estimate rect height
+        if h >= annot_rect.height:
+            return annot_rect
+        r = annot_rect
+        y = (annot_rect.tl.y + annot_rect.bl.y - h) * 0.5
+        r.y0 = y
+        return r
+
+    pymupdf.CheckParent(page)
+    doc = page.parent
+    if doc.is_encrypted or doc.is_closed:
+        raise ValueError("document closed or encrypted")
+    if not doc.is_pdf:
+        raise ValueError("is no PDF")
+
+    redact_annots = []  # storage of annot values
+    for annot in page.annots(
+        types=(pymupdf.PDF_ANNOT_REDACT,)  # pylint: disable=no-member
+    ):
+        # loop redactions
+        redact_annots.append(annot._get_redact_values())  # save annot values
+
+    if redact_annots == []:  # any redactions on this page?
+        return False  # no redactions
+
+    rc = page._apply_redactions(text, images, graphics)  # call MuPDF
+    if not rc:  # should not happen really
+        raise ValueError("Error applying redactions.")
+
+    # now write replacement text in old redact rectangles
+    shape = page.new_shape()
+    for redact in redact_annots:
+        annot_rect = redact["rect"]
+        fill = redact["fill"]
+        if fill:
+            shape.draw_rect(annot_rect)  # colorize the rect background
+            shape.finish(fill=fill, color=fill)
+        if "text" in redact.keys():  # if we also have text
+            new_text = redact["text"]
+            align = redact.get("align", 0)
+            fname = redact["fontname"]
+            fsize = redact["fontsize"]
+            color = redact["text_color"]
+            # try finding vertical centered sub-rect
+            trect = center_rect(annot_rect, new_text, fname, fsize)
+
+            rc = -1
+            while rc < 0 and fsize >= 4:  # while not enough room
+                # (re-) try insertion
+                rc = shape.insert_textbox(
+                    trect,
+                    new_text,
+                    fontname=fname,
+                    fontsize=fsize,
+                    color=color,
+                    align=align,
+                )
+                fsize -= 0.5  # reduce font if unsuccessful
+    shape.commit()  # append new contents object
+    return True
+
+
+# ------------------------------------------------------------------------------
+# Remove potentially sensitive data from a PDF. Similar to the Adobe
+# Acrobat 'sanitize' function
+# ------------------------------------------------------------------------------
+def scrub(
+    doc: pymupdf.Document,
+    attached_files: bool = True,
+    clean_pages: bool = True,
+    embedded_files: bool = True,
+    hidden_text: bool = True,
+    javascript: bool = True,
+    metadata: bool = True,
+    redactions: bool = True,
+    redact_images: int = 0,
+    remove_links: bool = True,
+    reset_fields: bool = True,
+    reset_responses: bool = True,
+    thumbnails: bool = True,
+    xml_metadata: bool = True,
+) -> None:
+    def remove_hidden(cont_lines):
+        """Remove hidden text from a PDF page.
+
+        Args:
+            cont_lines: list of lines with /Contents content. Should have status
+                from after page.cleanContents().
+
+        Returns:
+            List of /Contents lines from which hidden text has been removed.
+
+        Notes:
+            The input must have been created after the page's /Contents object(s)
+            have been cleaned with page.cleanContents(). This ensures a standard
+            formatting: one command per line, single spaces between operators.
+            This allows for drastic simplification of this code.
+        """
+        out_lines = []  # will return this
+        in_text = False  # indicate if within BT/ET object
+        suppress = False  # indicate text suppression active
+        make_return = False
+        for line in cont_lines:
+            if line == b"BT":  # start of text object
+                in_text = True  # switch on
+                out_lines.append(line)  # output it
+                continue
+            if line == b"ET":  # end of text object
+                in_text = False  # switch off
+                out_lines.append(line)  # output it
+                continue
+            if line == b"3 Tr":  # text suppression operator
+                suppress = True  # switch on
+                make_return = True
+                continue
+            if line[-2:] == b"Tr" and line[0] != b"3":
+                suppress = False  # text rendering changed
+                out_lines.append(line)
+                continue
+            if line == b"Q":  # unstack command also switches off
+                suppress = False
+                out_lines.append(line)
+                continue
+            if suppress and in_text:  # suppress hidden lines
+                continue
+            out_lines.append(line)
+        if make_return:
+            return out_lines
+        else:
+            return None
+
+    if not doc.is_pdf:  # only works for PDF
+        raise ValueError("is no PDF")
+    if doc.is_encrypted or doc.is_closed:
+        raise ValueError("closed or encrypted doc")
+
+    if not clean_pages:
+        hidden_text = False
+        redactions = False
+
+    if metadata:
+        doc.set_metadata({})  # remove standard metadata
+
+    for page in doc:
+        if reset_fields:
+            # reset form fields (widgets)
+            for widget in page.widgets():
+                widget.reset()
+
+        if remove_links:
+            links = page.get_links()  # list of all links on page
+            for link in links:  # remove all links
+                page.delete_link(link)
+
+        found_redacts = False
+        for annot in page.annots():
+            if annot.type[0] == mupdf.PDF_ANNOT_FILE_ATTACHMENT and attached_files:
+                annot.update_file(buffer_=b" ")  # set file content to empty
+            if reset_responses:
+                annot.delete_responses()
+            if annot.type[0] == pymupdf.PDF_ANNOT_REDACT:  # pylint: disable=no-member
+                found_redacts = True
+
+        if redactions and found_redacts:
+            page.apply_redactions(images=redact_images)
+
+        if not (clean_pages or hidden_text):
+            continue  # done with the page
+
+        page.clean_contents()
+        if not page.get_contents():
+            continue
+        if hidden_text:
+            xref = page.get_contents()[0]  # only one b/o cleaning!
+            cont = doc.xref_stream(xref)
+            cont_lines = remove_hidden(cont.splitlines())  # remove hidden text
+            if cont_lines:  # something was actually removed
+                cont = b"\n".join(cont_lines)
+                doc.update_stream(xref, cont)  # rewrite the page /Contents
+
+        if thumbnails:  # remove page thumbnails?
+            if doc.xref_get_key(page.xref, "Thumb")[0] != "null":
+                doc.xref_set_key(page.xref, "Thumb", "null")
+
+    # pages are scrubbed, now perform document-wide scrubbing
+    # remove embedded files
+    if embedded_files:
+        for name in doc.embfile_names():
+            doc.embfile_del(name)
+
+    if xml_metadata:
+        doc.del_xml_metadata()
+    if not (xml_metadata or javascript):
+        xref_limit = 0
+    else:
+        xref_limit = doc.xref_length()
+    for xref in range(1, xref_limit):
+        if not doc.xref_object(xref):
+            msg = "bad xref %i - clean PDF before scrubbing" % xref
+            raise ValueError(msg)
+        if javascript and doc.xref_get_key(xref, "S")[1] == "/JavaScript":
+            # a /JavaScript action object
+            obj = "<</S/JavaScript/JS()>>"  # replace with a null JavaScript
+            doc.update_object(xref, obj)  # update this object
+            continue  # no further handling
+
+        if not xml_metadata:
+            continue
+
+        if doc.xref_get_key(xref, "Type")[1] == "/Metadata":
+            # delete any metadata object directly
+            doc.update_object(xref, "<<>>")
+            doc.update_stream(xref, b"deleted", new=True)
+            continue
+
+        if doc.xref_get_key(xref, "Metadata")[0] != "null":
+            doc.xref_set_key(xref, "Metadata", "null")
+
+
+def _show_fz_text( text):
+    #if mupdf_cppyy:
+    #    assert isinstance( text, cppyy.gbl.mupdf.Text)
+    #else:
+    #    assert isinstance( text, mupdf.Text)
+    num_spans = 0
+    num_chars = 0
+    span = text.m_internal.head
+    while 1:
+        if not span:
+            break
+        num_spans += 1
+        num_chars += span.len
+        span = span.next
+    return f'num_spans={num_spans} num_chars={num_chars}'
+
+def fill_textbox(
+    writer: pymupdf.TextWriter,
+    rect: rect_like,
+    text: typing.Union[str, list],
+    pos: point_like = None,
+    font: typing.Optional[pymupdf.Font] = None,
+    fontsize: float = 11,
+    lineheight: OptFloat = None,
+    align: int = 0,
+    warn: bool = None,
+    right_to_left: bool = False,
+    small_caps: bool = False,
+) -> tuple:
+    """Fill a rectangle with text.
+
+    Args:
+        writer: pymupdf.TextWriter object (= "self")
+        rect: rect-like to receive the text.
+        text: string or list/tuple of strings.
+        pos: point-like start position of first word.
+        font: pymupdf.Font object (default pymupdf.Font('helv')).
+        fontsize: the fontsize.
+        lineheight: overwrite the font property
+        align: (int) 0 = left, 1 = center, 2 = right, 3 = justify
+        warn: (bool) text overflow action: none, warn, or exception
+        right_to_left: (bool) indicate right-to-left language.
+    """
+    rect = pymupdf.Rect(rect)
+    if rect.is_empty:
+        raise ValueError("fill rect must not empty.")
+    if type(font) is not pymupdf.Font:
+        font = pymupdf.Font("helv")
+
+    def textlen(x):
+        """Return length of a string."""
+        return font.text_length(
+            x, fontsize=fontsize, small_caps=small_caps
+        )  # abbreviation
+
+    def char_lengths(x):
+        """Return list of single character lengths for a string."""
+        return font.char_lengths(x, fontsize=fontsize, small_caps=small_caps)
+
+    def append_this(pos, text):
+        ret = writer.append(
+                pos, text, font=font, fontsize=fontsize, small_caps=small_caps
+                )
+        return ret
+
+    tolerance = fontsize * 0.2  # extra distance to left border
+    space_len = textlen(" ")
+    std_width = rect.width - tolerance
+    std_start = rect.x0 + tolerance
+
+    def norm_words(width, words):
+        """Cut any word in pieces no longer than 'width'."""
+        nwords = []
+        word_lengths = []
+        for w in words:
+            wl_lst = char_lengths(w)
+            wl = sum(wl_lst)
+            if wl <= width:  # nothing to do - copy over
+                nwords.append(w)
+                word_lengths.append(wl)
+                continue
+
+            # word longer than rect width - split it in parts
+            n = len(wl_lst)
+            while n > 0:
+                wl = sum(wl_lst[:n])
+                if wl <= width:
+                    nwords.append(w[:n])
+                    word_lengths.append(wl)
+                    w = w[n:]
+                    wl_lst = wl_lst[n:]
+                    n = len(wl_lst)
+                else:
+                    n -= 1
+        return nwords, word_lengths
+
+    def output_justify(start, line):
+        """Justified output of a line."""
+        # ignore leading / trailing / multiple spaces
+        words = [w for w in line.split(" ") if w != ""]
+        nwords = len(words)
+        if nwords == 0:
+            return
+        if nwords == 1:  # single word cannot be justified
+            append_this(start, words[0])
+            return
+        tl = sum([textlen(w) for w in words])  # total word lengths
+        gaps = nwords - 1  # number of word gaps
+        gapl = (std_width - tl) / gaps  # width of each gap
+        for w in words:
+            _, lp = append_this(start, w)  # output one word
+            start.x = lp.x + gapl  # next start at word end plus gap
+        return
+
+    asc = font.ascender
+    dsc = font.descender
+    if not lineheight:
+        if asc - dsc <= 1:
+            lheight = 1.2
+        else:
+            lheight = asc - dsc
+    else:
+        lheight = lineheight
+
+    LINEHEIGHT = fontsize * lheight  # effective line height
+    width = std_width  # available horizontal space
+
+    # starting point of text
+    if pos is not None:
+        pos = pymupdf.Point(pos)
+    else:  # default is just below rect top-left
+        pos = rect.tl + (tolerance, fontsize * asc)
+    if pos not in rect:
+        raise ValueError("Text must start in rectangle.")
+
+    # calculate displacement factor for alignment
+    if align == pymupdf.TEXT_ALIGN_CENTER:
+        factor = 0.5
+    elif align == pymupdf.TEXT_ALIGN_RIGHT:
+        factor = 1.0
+    else:
+        factor = 0
+
+    # split in lines if just a string was given
+    if type(text) is str:
+        textlines = text.splitlines()
+    else:
+        textlines = []
+        for line in text:
+            textlines.extend(line.splitlines())
+
+    max_lines = int((rect.y1 - pos.y) / LINEHEIGHT) + 1
+
+    new_lines = []  # the final list of textbox lines
+    no_justify = []  # no justify for these line numbers
+    for i, line in enumerate(textlines):
+        if line in ("", " "):
+            new_lines.append((line, space_len))
+            width = rect.width - tolerance
+            no_justify.append((len(new_lines) - 1))
+            continue
+        if i == 0:
+            width = rect.x1 - pos.x
+        else:
+            width = rect.width - tolerance
+
+        if right_to_left:  # reverses Arabic / Hebrew text front to back
+            line = writer.clean_rtl(line)
+        tl = textlen(line)
+        if tl <= width:  # line short enough
+            new_lines.append((line, tl))
+            no_justify.append((len(new_lines) - 1))
+            continue
+
+        # we need to split the line in fitting parts
+        words = line.split(" ")  # the words in the line
+
+        # cut in parts any words that are longer than rect width
+        words, word_lengths = norm_words(width, words)
+
+        n = len(words)
+        while True:
+            line0 = " ".join(words[:n])
+            wl = sum(word_lengths[:n]) + space_len * (n - 1)
+            if wl <= width:
+                new_lines.append((line0, wl))
+                words = words[n:]
+                word_lengths = word_lengths[n:]
+                n = len(words)
+                line0 = None
+            else:
+                n -= 1
+
+            if len(words) == 0:
+                break
+            assert n
+
+    # -------------------------------------------------------------------------
+    # List of lines created. Each item is (text, tl), where 'tl' is the PDF
+    # output length (float) and 'text' is the text. Except for justified text,
+    # this is output-ready.
+    # -------------------------------------------------------------------------
+    nlines = len(new_lines)
+    if nlines > max_lines:
+        msg = "Only fitting %i of %i lines." % (max_lines, nlines)
+        if warn is None:
+            pass
+        elif warn:
+            pymupdf.message("Warning: " + msg)
+        else:
+            raise ValueError(msg)
+
+    start = pymupdf.Point()
+    no_justify += [len(new_lines) - 1]  # no justifying of last line
+    for i in range(max_lines):
+        try:
+            line, tl = new_lines.pop(0)
+        except IndexError:
+            if g_exceptions_verbose >= 2:   pymupdf.exception_info()
+            break
+
+        if right_to_left:  # Arabic, Hebrew
+            line = "".join(reversed(line))
+
+        if i == 0:  # may have different start for first line
+            start = pos
+
+        if align == pymupdf.TEXT_ALIGN_JUSTIFY and i not in no_justify and tl < std_width:
+            output_justify(start, line)
+            start.x = std_start
+            start.y += LINEHEIGHT
+            continue
+
+        if i > 0 or pos.x == std_start:  # left, center, right alignments
+            start.x += (width - tl) * factor
+
+        append_this(start, line)
+        start.x = std_start
+        start.y += LINEHEIGHT
+
+    return new_lines  # return non-written lines
+
+
+# ------------------------------------------------------------------------
+# Optional Content functions
+# ------------------------------------------------------------------------
+def get_oc(doc: pymupdf.Document, xref: int) -> int:
+    """Return optional content object xref for an image or form xobject.
+
+    Args:
+        xref: (int) xref number of an image or form xobject.
+    """
+    if doc.is_closed or doc.is_encrypted:
+        raise ValueError("document close or encrypted")
+    t, name = doc.xref_get_key(xref, "Subtype")
+    if t != "name" or name not in ("/Image", "/Form"):
+        raise ValueError("bad object type at xref %i" % xref)
+    t, oc = doc.xref_get_key(xref, "OC")
+    if t != "xref":
+        return 0
+    rc = int(oc.replace("0 R", ""))
+    return rc
+
+
+def set_oc(doc: pymupdf.Document, xref: int, oc: int) -> None:
+    """Attach optional content object to image or form xobject.
+
+    Args:
+        xref: (int) xref number of an image or form xobject
+        oc: (int) xref number of an OCG or OCMD
+    """
+    if doc.is_closed or doc.is_encrypted:
+        raise ValueError("document close or encrypted")
+    t, name = doc.xref_get_key(xref, "Subtype")
+    if t != "name" or name not in ("/Image", "/Form"):
+        raise ValueError("bad object type at xref %i" % xref)
+    if oc > 0:
+        t, name = doc.xref_get_key(oc, "Type")
+        if t != "name" or name not in ("/OCG", "/OCMD"):
+            raise ValueError("bad object type at xref %i" % oc)
+    if oc == 0 and "OC" in doc.xref_get_keys(xref):
+        doc.xref_set_key(xref, "OC", "null")
+        return None
+    doc.xref_set_key(xref, "OC", "%i 0 R" % oc)
+    return None
+
+
+def set_ocmd(
+    doc: pymupdf.Document,
+    xref: int = 0,
+    ocgs: typing.Union[list, None] = None,
+    policy: OptStr = None,
+    ve: typing.Union[list, None] = None,
+) -> int:
+    """Create or update an OCMD object in a PDF document.
+
+    Args:
+        xref: (int) 0 for creating a new object, otherwise update existing one.
+        ocgs: (list) OCG xref numbers, which shall be subject to 'policy'.
+        policy: one of 'AllOn', 'AllOff', 'AnyOn', 'AnyOff' (any casing).
+        ve: (list) visibility expression. Use instead of 'ocgs' with 'policy'.
+
+    Returns:
+        Xref of the created or updated OCMD.
+    """
+
+    all_ocgs = set(doc.get_ocgs().keys())
+
+    def ve_maker(ve):
+        if type(ve) not in (list, tuple) or len(ve) < 2:
+            raise ValueError("bad 've' format: %s" % ve)
+        if ve[0].lower() not in ("and", "or", "not"):
+            raise ValueError("bad operand: %s" % ve[0])
+        if ve[0].lower() == "not" and len(ve) != 2:
+            raise ValueError("bad 've' format: %s" % ve)
+        item = "[/%s" % ve[0].title()
+        for x in ve[1:]:
+            if type(x) is int:
+                if x not in all_ocgs:
+                    raise ValueError("bad OCG %i" % x)
+                item += " %i 0 R" % x
+            else:
+                item += " %s" % ve_maker(x)
+        item += "]"
+        return item
+
+    text = "<</Type/OCMD"
+
+    if ocgs and type(ocgs) in (list, tuple):  # some OCGs are provided
+        s = set(ocgs).difference(all_ocgs)  # contains illegal xrefs
+        if s != set():
+            msg = "bad OCGs: %s" % s
+            raise ValueError(msg)
+        text += "/OCGs[" + " ".join(map(lambda x: "%i 0 R" % x, ocgs)) + "]"
+
+    if policy:
+        policy = str(policy).lower()
+        pols = {
+            "anyon": "AnyOn",
+            "allon": "AllOn",
+            "anyoff": "AnyOff",
+            "alloff": "AllOff",
+        }
+        if policy not in ("anyon", "allon", "anyoff", "alloff"):
+            raise ValueError("bad policy: %s" % policy)
+        text += "/P/%s" % pols[policy]
+
+    if ve:
+        text += "/VE%s" % ve_maker(ve)
+
+    text += ">>"
+
+    # make new object or replace old OCMD (check type first)
+    if xref == 0:
+        xref = doc.get_new_xref()
+    elif "/Type/OCMD" not in doc.xref_object(xref, compressed=True):
+        raise ValueError("bad xref or not an OCMD")
+    doc.update_object(xref, text)
+    return xref
+
+
+def get_ocmd(doc: pymupdf.Document, xref: int) -> dict:
+    """Return the definition of an OCMD (optional content membership dictionary).
+
+    Recognizes PDF dict keys /OCGs (PDF array of OCGs), /P (policy string) and
+    /VE (visibility expression, PDF array). Via string manipulation, this
+    info is converted to a Python dictionary with keys "xref", "ocgs", "policy"
+    and "ve" - ready to recycle as input for 'set_ocmd()'.
+    """
+
+    if xref not in range(doc.xref_length()):
+        raise ValueError("bad xref")
+    text = doc.xref_object(xref, compressed=True)
+    if "/Type/OCMD" not in text:
+        raise ValueError("bad object type")
+    textlen = len(text)
+
+    p0 = text.find("/OCGs[")  # look for /OCGs key
+    p1 = text.find("]", p0)
+    if p0 < 0 or p1 < 0:  # no OCGs found
+        ocgs = None
+    else:
+        ocgs = text[p0 + 6 : p1].replace("0 R", " ").split()
+        ocgs = list(map(int, ocgs))
+
+    p0 = text.find("/P/")  # look for /P policy key
+    if p0 < 0:
+        policy = None
+    else:
+        p1 = text.find("ff", p0)
+        if p1 < 0:
+            p1 = text.find("on", p0)
+        if p1 < 0:  # some irregular syntax
+            raise ValueError("bad object at xref")
+        else:
+            policy = text[p0 + 3 : p1 + 2]
+
+    p0 = text.find("/VE[")  # look for /VE visibility expression key
+    if p0 < 0:  # no visibility expression found
+        ve = None
+    else:
+        lp = rp = 0  # find end of /VE by finding last ']'.
+        p1 = p0
+        while lp < 1 or lp != rp:
+            p1 += 1
+            if not p1 < textlen:  # some irregular syntax
+                raise ValueError("bad object at xref")
+            if text[p1] == "[":
+                lp += 1
+            if text[p1] == "]":
+                rp += 1
+        # p1 now positioned at the last "]"
+        ve = text[p0 + 3 : p1 + 1]  # the PDF /VE array
+        ve = (
+            ve.replace("/And", '"and",')
+            .replace("/Not", '"not",')
+            .replace("/Or", '"or",')
+        )
+        ve = ve.replace(" 0 R]", "]").replace(" 0 R", ",").replace("][", "],[")
+        import json
+        try:
+            ve = json.loads(ve)
+        except Exception:
+            pymupdf.exception_info()
+            pymupdf.message(f"bad /VE key: {ve!r}")
+            raise
+    return {"xref": xref, "ocgs": ocgs, "policy": policy, "ve": ve}
+
+
+"""
+Handle page labels for PDF documents.
+
+Reading
+-------
+* compute the label of a page
+* find page number(s) having the given label.
+
+Writing
+-------
+Supports setting (defining) page labels for PDF documents.
+
+A big Thank You goes to WILLIAM CHAPMAN who contributed the idea and
+significant parts of the following code during late December 2020
+through early January 2021.
+"""
+
+
+def rule_dict(item):
+    """Make a Python dict from a PDF page label rule.
+
+    Args:
+        item -- a tuple (pno, rule) with the start page number and the rule
+                string like <</S/D...>>.
+    Returns:
+        A dict like
+        {'startpage': int, 'prefix': str, 'style': str, 'firstpagenum': int}.
+    """
+    # Jorj McKie, 2021-01-06
+
+    pno, rule = item
+    rule = rule[2:-2].split("/")[1:]  # strip "<<" and ">>"
+    d = {"startpage": pno, "prefix": "", "firstpagenum": 1}
+    skip = False
+    for i, item in enumerate(rule): # pylint: disable=redefined-argument-from-local
+        if skip:  # this item has already been processed
+            skip = False  # deactivate skipping again
+            continue
+        if item == "S":  # style specification
+            d["style"] = rule[i + 1]  # next item has the style
+            skip = True  # do not process next item again
+            continue
+        if item.startswith("P"):  # prefix specification: extract the string
+            x = item[1:].replace("(", "").replace(")", "")
+            d["prefix"] = x
+            continue
+        if item.startswith("St"):  # start page number specification
+            x = int(item[2:])
+            d["firstpagenum"] = x
+    return d
+
+
+def get_label_pno(pgNo, labels):
+    """Return the label for this page number.
+
+    Args:
+        pgNo: page number, 0-based.
+        labels: result of doc._get_page_labels().
+    Returns:
+        The label (str) of the page number. Errors return an empty string.
+    """
+    # Jorj McKie, 2021-01-06
+
+    item = [x for x in labels if x[0] <= pgNo][-1]
+    rule = rule_dict(item)
+    prefix = rule.get("prefix", "")
+    style = rule.get("style", "")
+    # make sure we start at 0 when enumerating the alphabet
+    delta = -1 if style in ("a", "A") else 0
+    pagenumber = pgNo - rule["startpage"] + rule["firstpagenum"] + delta
+    return construct_label(style, prefix, pagenumber)
+
+
+def get_label(page):
+    """Return the label for this PDF page.
+
+    Args:
+        page: page object.
+    Returns:
+        The label (str) of the page. Errors return an empty string.
+    """
+    # Jorj McKie, 2021-01-06
+
+    labels = page.parent._get_page_labels()
+    if not labels:
+        return ""
+    labels.sort()
+    return get_label_pno(page.number, labels)
+
+
+def get_page_numbers(doc, label, only_one=False):
+    """Return a list of page numbers with the given label.
+
+    Args:
+        doc: PDF document object (resp. 'self').
+        label: (str) label.
+        only_one: (bool) stop searching after first hit.
+    Returns:
+        List of page numbers having this label.
+    """
+    # Jorj McKie, 2021-01-06
+
+    numbers = []
+    if not label:
+        return numbers
+    labels = doc._get_page_labels()
+    if labels == []:
+        return numbers
+    for i in range(doc.page_count):
+        plabel = get_label_pno(i, labels)
+        if plabel == label:
+            numbers.append(i)
+            if only_one:
+                break
+    return numbers
+
+
+def construct_label(style, prefix, pno) -> str:
+    """Construct a label based on style, prefix and page number."""
+    # William Chapman, 2021-01-06
+
+    n_str = ""
+    if style == "D":
+        n_str = str(pno)
+    elif style == "r":
+        n_str = integerToRoman(pno).lower()
+    elif style == "R":
+        n_str = integerToRoman(pno).upper()
+    elif style == "a":
+        n_str = integerToLetter(pno).lower()
+    elif style == "A":
+        n_str = integerToLetter(pno).upper()
+    result = prefix + n_str
+    return result
+
+
+def integerToLetter(i) -> str:
+    """Returns letter sequence string for integer i."""
+    # William Chapman, Jorj McKie, 2021-01-06
+    import string
+    ls = string.ascii_uppercase
+    n, a = 1, i
+    while pow(26, n) <= a:
+        a -= int(math.pow(26, n))
+        n += 1
+
+    str_t = ""
+    for j in reversed(range(n)):
+        f, g = divmod(a, int(math.pow(26, j)))
+        str_t += ls[f]
+        a = g
+    return str_t
+
+
+def integerToRoman(num: int) -> str:
+    """Return roman numeral for an integer."""
+    # William Chapman, Jorj McKie, 2021-01-06
+
+    roman = (
+        (1000, "M"),
+        (900, "CM"),
+        (500, "D"),
+        (400, "CD"),
+        (100, "C"),
+        (90, "XC"),
+        (50, "L"),
+        (40, "XL"),
+        (10, "X"),
+        (9, "IX"),
+        (5, "V"),
+        (4, "IV"),
+        (1, "I"),
+    )
+
+    def roman_num(num):
+        for r, ltr in roman:
+            x, _ = divmod(num, r)
+            yield ltr * x
+            num -= r * x
+            if num <= 0:
+                break
+
+    return "".join([a for a in roman_num(num)])
+
+
+def get_page_labels(doc):
+    """Return page label definitions in PDF document.
+
+    Args:
+        doc: PDF document (resp. 'self').
+    Returns:
+        A list of dictionaries with the following format:
+        {'startpage': int, 'prefix': str, 'style': str, 'firstpagenum': int}.
+    """
+    # Jorj McKie, 2021-01-10
+    return [rule_dict(item) for item in doc._get_page_labels()]
+
+
+def set_page_labels(doc, labels):
+    """Add / replace page label definitions in PDF document.
+
+    Args:
+        doc: PDF document (resp. 'self').
+        labels: list of label dictionaries like:
+        {'startpage': int, 'prefix': str, 'style': str, 'firstpagenum': int},
+        as returned by get_page_labels().
+    """
+    # William Chapman, 2021-01-06
+
+    def create_label_str(label):
+        """Convert Python label dict to corresponding PDF rule string.
+
+        Args:
+            label: (dict) build rule for the label.
+        Returns:
+            PDF label rule string wrapped in "<<", ">>".
+        """
+        s = "%i<<" % label["startpage"]
+        if label.get("prefix", "") != "":
+            s += "/P(%s)" % label["prefix"]
+        if label.get("style", "") != "":
+            s += "/S/%s" % label["style"]
+        if label.get("firstpagenum", 1) > 1:
+            s += "/St %i" % label["firstpagenum"]
+        s += ">>"
+        return s
+
+    def create_nums(labels):
+        """Return concatenated string of all labels rules.
+
+        Args:
+            labels: (list) dictionaries as created by function 'rule_dict'.
+        Returns:
+            PDF compatible string for page label definitions, ready to be
+            enclosed in PDF array 'Nums[...]'.
+        """
+        labels.sort(key=lambda x: x["startpage"])
+        s = "".join([create_label_str(label) for label in labels])
+        return s
+
+    doc._set_page_labels(create_nums(labels))
+
+
+# End of Page Label Code -------------------------------------------------
+
+
+def has_links(doc: pymupdf.Document) -> bool:
+    """Check whether there are links on any page."""
+    if doc.is_closed:
+        raise ValueError("document closed")
+    if not doc.is_pdf:
+        raise ValueError("is no PDF")
+    for i in range(doc.page_count):
+        for item in doc.page_annot_xrefs(i):
+            if item[1] == pymupdf.PDF_ANNOT_LINK:  # pylint: disable=no-member
+                return True
+    return False
+
+
+def has_annots(doc: pymupdf.Document) -> bool:
+    """Check whether there are annotations on any page."""
+    if doc.is_closed:
+        raise ValueError("document closed")
+    if not doc.is_pdf:
+        raise ValueError("is no PDF")
+    for i in range(doc.page_count):
+        for item in doc.page_annot_xrefs(i):
+            # pylint: disable=no-member
+            if not (item[1] == pymupdf.PDF_ANNOT_LINK or item[1] == pymupdf.PDF_ANNOT_WIDGET):  # pylint: disable=no-member
+                return True
+    return False
+
+
+# -------------------------------------------------------------------
+# Functions to recover the quad contained in a text extraction bbox
+# -------------------------------------------------------------------
+def recover_bbox_quad(line_dir: tuple, span: dict, bbox: tuple) -> pymupdf.Quad:
+    """Compute the quad located inside the bbox.
+
+    The bbox may be any of the resp. tuples occurring inside the given span.
+
+    Args:
+        line_dir: (tuple) 'line["dir"]' of the owning line or None.
+        span: (dict) the span. May be from get_texttrace() method.
+        bbox: (tuple) the bbox of the span or any of its characters.
+    Returns:
+        The quad which is wrapped by the bbox.
+    """
+    if line_dir is None:
+        line_dir = span["dir"]
+    cos, sin = line_dir
+    bbox = pymupdf.Rect(bbox)  # make it a rect
+    if pymupdf.TOOLS.set_small_glyph_heights():  # ==> just fontsize as height
+        d = 1
+    else:
+        d = span["ascender"] - span["descender"]
+
+    height = d * span["size"]  # the quad's rectangle height
+    # The following are distances from the bbox corners, at which we find the
+    # respective quad points. The computation depends on in which quadrant the
+    # text writing angle is located.
+    hs = height * sin
+    hc = height * cos
+    if hc >= 0 and hs <= 0:  # quadrant 1
+        ul = bbox.bl - (0, hc)
+        ur = bbox.tr + (hs, 0)
+        ll = bbox.bl - (hs, 0)
+        lr = bbox.tr + (0, hc)
+    elif hc <= 0 and hs <= 0:  # quadrant 2
+        ul = bbox.br + (hs, 0)
+        ur = bbox.tl - (0, hc)
+        ll = bbox.br + (0, hc)
+        lr = bbox.tl - (hs, 0)
+    elif hc <= 0 and hs >= 0:  # quadrant 3
+        ul = bbox.tr - (0, hc)
+        ur = bbox.bl + (hs, 0)
+        ll = bbox.tr - (hs, 0)
+        lr = bbox.bl + (0, hc)
+    else:  # quadrant 4
+        ul = bbox.tl + (hs, 0)
+        ur = bbox.br - (0, hc)
+        ll = bbox.tl + (0, hc)
+        lr = bbox.br - (hs, 0)
+    return pymupdf.Quad(ul, ur, ll, lr)
+
+
+def recover_quad(line_dir: tuple, span: dict) -> pymupdf.Quad:
+    """Recover the quadrilateral of a text span.
+
+    Args:
+        line_dir: (tuple) 'line["dir"]' of the owning line.
+        span: the span.
+    Returns:
+        The quadrilateral enveloping the span's text.
+    """
+    if type(line_dir) is not tuple or len(line_dir) != 2:
+        raise ValueError("bad line dir argument")
+    if type(span) is not dict:
+        raise ValueError("bad span argument")
+    return recover_bbox_quad(line_dir, span, span["bbox"])
+
+
+def recover_line_quad(line: dict, spans: list = None) -> pymupdf.Quad:
+    """Calculate the line quad for 'dict' / 'rawdict' text extractions.
+
+    The lower quad points are those of the first, resp. last span quad.
+    The upper points are determined by the maximum span quad height.
+    From this, compute a rect with bottom-left in (0, 0), convert this to a
+    quad and rotate and shift back to cover the text of the spans.
+
+    Args:
+        spans: (list, optional) sub-list of spans to consider.
+    Returns:
+        pymupdf.Quad covering selected spans.
+    """
+    if spans is None:  # no sub-selection
+        spans = line["spans"]  # all spans
+    if len(spans) == 0:
+        raise ValueError("bad span list")
+    line_dir = line["dir"]  # text direction
+    cos, sin = line_dir
+    q0 = recover_quad(line_dir, spans[0])  # quad of first span
+    if len(spans) > 1:  # get quad of last span
+        q1 = recover_quad(line_dir, spans[-1])
+    else:
+        q1 = q0  # last = first
+
+    line_ll = q0.ll  # lower-left of line quad
+    line_lr = q1.lr  # lower-right of line quad
+
+    mat0 = pymupdf.planish_line(line_ll, line_lr)
+
+    # map base line to x-axis such that line_ll goes to (0, 0)
+    x_lr = line_lr * mat0
+
+    small = pymupdf.TOOLS.set_small_glyph_heights()  # small glyph heights?
+
+    h = max(
+        [s["size"] * (1 if small else (s["ascender"] - s["descender"])) for s in spans]
+    )
+
+    line_rect = pymupdf.Rect(0, -h, x_lr.x, 0)  # line rectangle
+    line_quad = line_rect.quad  # make it a quad and:
+    line_quad *= ~mat0
+    return line_quad
+
+
+def recover_span_quad(line_dir: tuple, span: dict, chars: list = None) -> pymupdf.Quad:
+    """Calculate the span quad for 'dict' / 'rawdict' text extractions.
+
+    Notes:
+        There are two execution paths:
+        1. For the full span quad, the result of 'recover_quad' is returned.
+        2. For the quad of a sub-list of characters, the char quads are
+           computed and joined. This is only supported for the "rawdict"
+           extraction option.
+
+    Args:
+        line_dir: (tuple) 'line["dir"]' of the owning line.
+        span: (dict) the span.
+        chars: (list, optional) sub-list of characters to consider.
+    Returns:
+        pymupdf.Quad covering selected characters.
+    """
+    if line_dir is None:  # must be a span from get_texttrace()
+        line_dir = span["dir"]
+    if chars is None:  # no sub-selection
+        return recover_quad(line_dir, span)
+    if "chars" not in span.keys():
+        raise ValueError("need 'rawdict' option to sub-select chars")
+
+    q0 = recover_char_quad(line_dir, span, chars[0])  # quad of first char
+    if len(chars) > 1:  # get quad of last char
+        q1 = recover_char_quad(line_dir, span, chars[-1])
+    else:
+        q1 = q0  # last = first
+
+    span_ll = q0.ll  # lower-left of span quad
+    span_lr = q1.lr  # lower-right of span quad
+    mat0 = pymupdf.planish_line(span_ll, span_lr)
+    # map base line to x-axis such that span_ll goes to (0, 0)
+    x_lr = span_lr * mat0
+
+    small = pymupdf.TOOLS.set_small_glyph_heights()  # small glyph heights?
+    h = span["size"] * (1 if small else (span["ascender"] - span["descender"]))
+
+    span_rect = pymupdf.Rect(0, -h, x_lr.x, 0)  # line rectangle
+    span_quad = span_rect.quad  # make it a quad and:
+    span_quad *= ~mat0  # rotate back and shift back
+    return span_quad
+
+
+def recover_char_quad(line_dir: tuple, span: dict, char: dict) -> pymupdf.Quad:
+    """Recover the quadrilateral of a text character.
+
+    This requires the "rawdict" option of text extraction.
+
+    Args:
+        line_dir: (tuple) 'line["dir"]' of the span's line.
+        span: (dict) the span dict.
+        char: (dict) the character dict.
+    Returns:
+        The quadrilateral enveloping the character.
+    """
+    if line_dir is None:
+        line_dir = span["dir"]
+    if type(line_dir) is not tuple or len(line_dir) != 2:
+        raise ValueError("bad line dir argument")
+    if type(span) is not dict:
+        raise ValueError("bad span argument")
+    if type(char) is dict:
+        bbox = pymupdf.Rect(char["bbox"])
+    elif type(char) is tuple:
+        bbox = pymupdf.Rect(char[3])
+    else:
+        raise ValueError("bad span argument")
+
+    return recover_bbox_quad(line_dir, span, bbox)
+
+
+# -------------------------------------------------------------------
+# Building font subsets using fontTools
+# -------------------------------------------------------------------
+def subset_fonts(doc: pymupdf.Document, verbose: bool = False, fallback: bool = False) -> OptInt:
+    """Build font subsets in a PDF.
+
+    Eligible fonts are potentially replaced by smaller versions. Page text is
+    NOT rewritten and thus should retain properties like being hidden or
+    controlled by optional content.
+
+    This method by default uses MuPDF's own internal feature to create subset
+    fonts. As this is a new function, errors may still occur. In this case,
+    please fall back to using the previous version by using "fallback=True".
+    Fallback mode requires the external package 'fontTools'.
+
+    Args:
+        fallback: use the older deprecated implementation.
+        verbose: only used by fallback mode.
+
+    Returns:
+        The new MuPDF-based code returns None.  The deprecated fallback
+        mode returns 0 if there are no fonts to subset.  Otherwise, it
+        returns the decrease in fontsize (the difference in fontsize),
+        measured in bytes.
+    """
+    # Font binaries: -  "buffer" -> (names, xrefs, (unicodes, glyphs))
+    # An embedded font is uniquely defined by its fontbuffer only. It may have
+    # multiple names and xrefs.
+    # Once the sets of used unicodes and glyphs are known, we compute a
+    # smaller version of the buffer user package fontTools.
+
+    if not fallback:  # by default use MuPDF function
+        pdf = mupdf.pdf_document_from_fz_document(doc)
+        mupdf.pdf_subset_fonts2(pdf, list(range(doc.page_count)))
+        return
+
+    font_buffers = {}
+
+    def get_old_widths(xref):
+        """Retrieve old font '/W' and '/DW' values."""
+        df = doc.xref_get_key(xref, "DescendantFonts")
+        if df[0] != "array":  # only handle xref specifications
+            return None, None
+        df_xref = int(df[1][1:-1].replace("0 R", ""))
+        widths = doc.xref_get_key(df_xref, "W")
+        if widths[0] != "array":  # no widths key found
+            widths = None
+        else:
+            widths = widths[1]
+        dwidths = doc.xref_get_key(df_xref, "DW")
+        if dwidths[0] != "int":
+            dwidths = None
+        else:
+            dwidths = dwidths[1]
+        return widths, dwidths
+
+    def set_old_widths(xref, widths, dwidths):
+        """Restore the old '/W' and '/DW' in subsetted font.
+
+        If either parameter is None or evaluates to False, the corresponding
+        dictionary key will be set to null.
+        """
+        df = doc.xref_get_key(xref, "DescendantFonts")
+        if df[0] != "array":  # only handle xref specs
+            return None
+        df_xref = int(df[1][1:-1].replace("0 R", ""))
+        if (type(widths) is not str or not widths) and doc.xref_get_key(df_xref, "W")[
+            0
+        ] != "null":
+            doc.xref_set_key(df_xref, "W", "null")
+        else:
+            doc.xref_set_key(df_xref, "W", widths)
+        if (type(dwidths) is not str or not dwidths) and doc.xref_get_key(
+            df_xref, "DW"
+        )[0] != "null":
+            doc.xref_set_key(df_xref, "DW", "null")
+        else:
+            doc.xref_set_key(df_xref, "DW", dwidths)
+        return None
+
+    def set_subset_fontname(new_xref):
+        """Generate a name prefix to tag a font as subset.
+
+        We use a random generator to select 6 upper case ASCII characters.
+        The prefixed name must be put in the font xref as the "/BaseFont" value
+        and in the FontDescriptor object as the '/FontName' value.
+        """
+        # The following generates a prefix like 'ABCDEF+'
+        import random
+        import string
+        prefix = "".join(random.choices(tuple(string.ascii_uppercase), k=6)) + "+"
+        font_str = doc.xref_object(new_xref, compressed=True)
+        font_str = font_str.replace("/BaseFont/", "/BaseFont/" + prefix)
+        df = doc.xref_get_key(new_xref, "DescendantFonts")
+        if df[0] == "array":
+            df_xref = int(df[1][1:-1].replace("0 R", ""))
+            fd = doc.xref_get_key(df_xref, "FontDescriptor")
+            if fd[0] == "xref":
+                fd_xref = int(fd[1].replace("0 R", ""))
+                fd_str = doc.xref_object(fd_xref, compressed=True)
+                fd_str = fd_str.replace("/FontName/", "/FontName/" + prefix)
+                doc.update_object(fd_xref, fd_str)
+        doc.update_object(new_xref, font_str)
+
+    def build_subset(buffer, unc_set, gid_set):
+        """Build font subset using fontTools.
+
+        Args:
+            buffer: (bytes) the font given as a binary buffer.
+            unc_set: (set) required glyph ids.
+        Returns:
+            Either None if subsetting is unsuccessful or the subset font buffer.
+        """
+        try:
+            import fontTools.subset as fts
+        except ImportError:
+            if g_exceptions_verbose:    pymupdf.exception_info()
+            pymupdf.message("This method requires fontTools to be installed.")
+            raise
+        import tempfile
+        with tempfile.TemporaryDirectory() as tmp_dir:
+            oldfont_path = f"{tmp_dir}/oldfont.ttf"
+            newfont_path = f"{tmp_dir}/newfont.ttf"
+            uncfile_path = f"{tmp_dir}/uncfile.txt"
+            args = [
+                oldfont_path,
+                "--retain-gids",
+                f"--output-file={newfont_path}",
+                "--layout-features=*",
+                "--passthrough-tables",
+                "--ignore-missing-glyphs",
+                "--ignore-missing-unicodes",
+                "--symbol-cmap",
+            ]
+
+            # store glyph ids or unicodes as file
+            with open(f"{tmp_dir}/uncfile.txt", "w", encoding='utf8') as unc_file:
+                if 0xFFFD in unc_set:  # error unicode exists -> use glyphs
+                    args.append(f"--gids-file={uncfile_path}")
+                    gid_set.add(189)
+                    unc_list = list(gid_set)
+                    for unc in unc_list:
+                        unc_file.write("%i\n" % unc)
+                else:
+                    args.append(f"--unicodes-file={uncfile_path}")
+                    unc_set.add(255)
+                    unc_list = list(unc_set)
+                    for unc in unc_list:
+                        unc_file.write("%04x\n" % unc)
+
+            # store fontbuffer as a file
+            with open(oldfont_path, "wb") as fontfile:
+                fontfile.write(buffer)
+            try:
+                os.remove(newfont_path)  # remove old file
+            except Exception:
+                pass
+            try:  # invoke fontTools subsetter
+                fts.main(args)
+                font = pymupdf.Font(fontfile=newfont_path)
+                new_buffer = font.buffer  # subset font binary
+                if font.glyph_count == 0:  # intercept empty font
+                    new_buffer = None
+            except Exception:
+                pymupdf.exception_info()
+                new_buffer = None
+        return new_buffer
+
+    def repl_fontnames(doc):
+        """Populate 'font_buffers'.
+
+        For each font candidate, store its xref and the list of names
+        by which PDF text may refer to it (there may be multiple).
+        """
+
+        def norm_name(name):
+            """Recreate font name that contains PDF hex codes.
+
+            E.g. #20 -> space, chr(32)
+            """
+            while "#" in name:
+                p = name.find("#")
+                c = int(name[p + 1 : p + 3], 16)
+                name = name.replace(name[p : p + 3], chr(c))
+            return name
+
+        def get_fontnames(doc, item):
+            """Return a list of fontnames for an item of page.get_fonts().
+
+            There may be multiple names e.g. for Type0 fonts.
+            """
+            fontname = item[3]
+            names = [fontname]
+            fontname = doc.xref_get_key(item[0], "BaseFont")[1][1:]
+            fontname = norm_name(fontname)
+            if fontname not in names:
+                names.append(fontname)
+            descendents = doc.xref_get_key(item[0], "DescendantFonts")
+            if descendents[0] != "array":
+                return names
+            descendents = descendents[1][1:-1]
+            if descendents.endswith(" 0 R"):
+                xref = int(descendents[:-4])
+                descendents = doc.xref_object(xref, compressed=True)
+            p1 = descendents.find("/BaseFont")
+            if p1 >= 0:
+                p2 = descendents.find("/", p1 + 1)
+                p1 = min(descendents.find("/", p2 + 1), descendents.find(">>", p2 + 1))
+                fontname = descendents[p2 + 1 : p1]
+                fontname = norm_name(fontname)
+                if fontname not in names:
+                    names.append(fontname)
+            return names
+
+        for i in range(doc.page_count):
+            for f in doc.get_page_fonts(i, full=True):
+                font_xref = f[0]  # font xref
+                font_ext = f[1]  # font file extension
+                basename = f[3]  # font basename
+
+                if font_ext not in (  # skip if not supported by fontTools
+                    "otf",
+                    "ttf",
+                    "woff",
+                    "woff2",
+                ):
+                    continue
+                # skip fonts which already are subsets
+                if len(basename) > 6 and basename[6] == "+":
+                    continue
+
+                extr = doc.extract_font(font_xref)
+                fontbuffer = extr[-1]
+                names = get_fontnames(doc, f)
+                name_set, xref_set, subsets = font_buffers.get(
+                    fontbuffer, (set(), set(), (set(), set()))
+                )
+                xref_set.add(font_xref)
+                for name in names:
+                    name_set.add(name)
+                font = pymupdf.Font(fontbuffer=fontbuffer)
+                name_set.add(font.name)
+                del font
+                font_buffers[fontbuffer] = (name_set, xref_set, subsets)
+
+    def find_buffer_by_name(name):
+        for buffer, (name_set, _, _) in font_buffers.items():
+            if name in name_set:
+                return buffer
+        return None
+
+    # -----------------
+    # main function
+    # -----------------
+    repl_fontnames(doc)  # populate font information
+    if not font_buffers:  # nothing found to do
+        if verbose:
+            pymupdf.message(f'No fonts to subset.')
+        return 0
+
+    old_fontsize = 0
+    new_fontsize = 0
+    for fontbuffer in font_buffers.keys():
+        old_fontsize += len(fontbuffer)
+
+    # Scan page text for usage of subsettable fonts
+    for page in doc:
+        # go through the text and extend set of used glyphs by font
+        # we use a modified MuPDF trace device, which delivers us glyph ids.
+        for span in page.get_texttrace():
+            if type(span) is not dict:  # skip useless information
+                continue
+            fontname = span["font"][:33]  # fontname for the span
+            buffer = find_buffer_by_name(fontname)
+            if buffer is None:
+                continue
+            name_set, xref_set, (set_ucs, set_gid) = font_buffers[buffer]
+            for c in span["chars"]:
+                set_ucs.add(c[0])  # unicode
+                set_gid.add(c[1])  # glyph id
+            font_buffers[buffer] = (name_set, xref_set, (set_ucs, set_gid))
+
+    # build the font subsets
+    for old_buffer, (name_set, xref_set, subsets) in font_buffers.items():
+        new_buffer = build_subset(old_buffer, subsets[0], subsets[1])
+        fontname = list(name_set)[0]
+        if new_buffer is None or len(new_buffer) >= len(old_buffer):
+            # subset was not created or did not get smaller
+            if verbose:
+                pymupdf.message(f'Cannot subset {fontname!r}.')
+            continue
+        if verbose:
+            pymupdf.message(f"Built subset of font {fontname!r}.")
+        val = doc._insert_font(fontbuffer=new_buffer)  # store subset font in PDF
+        new_xref = val[0]  # get its xref
+        set_subset_fontname(new_xref)  # tag fontname as subset font
+        font_str = doc.xref_object(  # get its object definition
+            new_xref,
+            compressed=True,
+        )
+        # walk through the original font xrefs and replace each by the subset def
+        for font_xref in xref_set:
+            # we need the original '/W' and '/DW' width values
+            width_table, def_width = get_old_widths(font_xref)
+            # ... and replace original font definition at xref with it
+            doc.update_object(font_xref, font_str)
+            # now copy over old '/W' and '/DW' values
+            if width_table or def_width:
+                set_old_widths(font_xref, width_table, def_width)
+        # 'new_xref' remains unused in the PDF and must be removed
+        # by garbage collection.
+        new_fontsize += len(new_buffer)
+
+    return old_fontsize - new_fontsize
+
+
+# -------------------------------------------------------------------
+# Copy XREF object to another XREF
+# -------------------------------------------------------------------
+def xref_copy(doc: pymupdf.Document, source: int, target: int, *, keep: list = None) -> None:
+    """Copy a PDF dictionary object to another one given their xref numbers.
+
+    Args:
+        doc: PDF document object
+        source: source xref number
+        target: target xref number, the xref must already exist
+        keep: an optional list of 1st level keys in target that should not be
+              removed before copying.
+    Notes:
+        This works similar to the copy() method of dictionaries in Python. The
+        source may be a stream object.
+    """
+    if doc.xref_is_stream(source):
+        # read new xref stream, maintaining compression
+        stream = doc.xref_stream_raw(source)
+        doc.update_stream(
+            target,
+            stream,
+            compress=False,  # keeps source compression
+            new=True,  # in case target is no stream
+        )
+
+    # empty the target completely, observe exceptions
+    if keep is None:
+        keep = []
+    for key in doc.xref_get_keys(target):
+        if key in keep:
+            continue
+        doc.xref_set_key(target, key, "null")
+    # copy over all source dict items
+    for key in doc.xref_get_keys(source):
+        item = doc.xref_get_key(source, key)
+        doc.xref_set_key(target, key, item[1])
author	Franz Glasner <fzglas.hg@dom66.de>
date	Mon, 15 Sep 2025 11:37:51 +0200
parents
children	a6bc019ac0b2