Mercurial > hgrepos > Python2 > PyMuPDF

diff src/init.py @ 41:71bcc18e306f
MERGE: New upstream PyMuPDF v1.26.5 including MuPDF v1.26.10 BUGS: Needs some additional changes yet. Not yet tested.
author: Franz Glasner <fzglas.hg@dom66.de>
date: Sat, 11 Oct 2025 15:24:40 +0200
parents: 3b13504f9d89 a6bc019ac0b2
children: 4621bd954a09
--- a/src/__init__.py	Tue Sep 23 10:27:15 2025 +0200
+++ b/src/__init__.py	Sat Oct 11 15:24:40 2025 +0200
@@ -17,7 +17,6 @@
 import os
 import pathlib
 import glob
-import packaging.version
 import re
 import string
 import sys
@@ -384,6 +383,7 @@
 from ._build import pymupdf_git_diff    # noqa F401
 from ._build import pymupdf_git_sha     # noqa F401
 from ._build import pymupdf_version     # noqa F401
+from ._build import pymupdf_version_tuple   # noqa F401
 from ._build import swig_version        # noqa F401
 from ._build import swig_version_tuple  # noqa F401
 
@@ -394,7 +394,6 @@
 
 # Versions as tuples; useful when comparing versions.
 #
-pymupdf_version_tuple = packaging.version.Version(pymupdf_version).release
 mupdf_version_tuple = packaging.version.Version(mupdf_version).release
 
 assert mupdf_version_tuple == (mupdf.FZ_VERSION_MAJOR, mupdf.FZ_VERSION_MINOR, mupdf.FZ_VERSION_PATCH), \
@@ -1036,6 +1035,12 @@
         stream = JM_BinFromBuffer(buf)
         res['stream'] = stream
         return res
+    
+    def get_text(self, *args, **kwargs):
+        return utils.get_text(self, *args, **kwargs)
+
+    def get_textbox(self, *args, **kwargs):
+        return utils.get_textbox(self, *args, **kwargs)
 
     def get_textpage(self, clip=None, flags=0):
         """Make annotation TextPage."""
@@ -3059,6 +3064,14 @@
         v = JM_pdf_obj_from_str( pdf, font)
         mupdf.pdf_dict_put( fonts, k, v)
 
+    def del_toc_item(
+            self,
+            idx: int,
+            ) -> None:
+        """Delete TOC / bookmark item by index."""
+        xref = self.get_outline_xrefs()[idx]
+        self._remove_toc_item(xref)
+
     def _delToC(self):
         """Delete the TOC."""
         if self.is_closed or self.is_encrypted:
@@ -3104,6 +3117,454 @@
             raise ValueError( MSG_BAD_XREF)
         mupdf.pdf_delete_object(pdf, xref)
 
+    def _do_links(
+            doc1: 'Document',
+            doc2: 'Document',
+            from_page: int = -1,
+            to_page: int = -1,
+            start_at: int = -1,
+            ) -> None:
+        """Insert links contained in copied page range into destination PDF.
+
+        Parameter values **must** equal those of method insert_pdf(), which must
+        have been previously executed.
+        """
+        #pymupdf.log( 'utils.do_links()')
+        # --------------------------------------------------------------------------
+        # internal function to create the actual "/Annots" object string
+        # --------------------------------------------------------------------------
+        def cre_annot(lnk, xref_dst, pno_src, ctm):
+            """Create annotation object string for a passed-in link."""
+
+            r = lnk["from"] * ctm  # rect in PDF coordinates
+            rect = _format_g(tuple(r))
+            if lnk["kind"] == LINK_GOTO:
+                txt = annot_skel["goto1"]  # annot_goto
+                idx = pno_src.index(lnk["page"])
+                p = lnk["to"] * ctm  # target point in PDF coordinates
+                annot = txt(xref_dst[idx], p.x, p.y, lnk["zoom"], rect)
+
+            elif lnk["kind"] == LINK_GOTOR:
+                if lnk["page"] >= 0:
+                    txt = annot_skel["gotor1"]  # annot_gotor
+                    pnt = lnk.get("to", Point(0, 0))  # destination point
+                    if type(pnt) is not Point:
+                        pnt = Point(0, 0)
+                    annot = txt(
+                        lnk["page"],
+                        pnt.x,
+                        pnt.y,
+                        lnk["zoom"],
+                        lnk["file"],
+                        lnk["file"],
+                        rect,
+                    )
+                else:
+                    txt = annot_skel["gotor2"]  # annot_gotor_n
+                    to = get_pdf_str(lnk["to"])
+                    to = to[1:-1]
+                    f = lnk["file"]
+                    annot = txt(to, f, rect)
+
+            elif lnk["kind"] == LINK_LAUNCH:
+                txt = annot_skel["launch"]  # annot_launch
+                annot = txt(lnk["file"], lnk["file"], rect)
+
+            elif lnk["kind"] == LINK_URI:
+                txt = annot_skel["uri"]  # annot_uri
+                annot = txt(lnk["uri"], rect)
+
+            else:
+                annot = ""
+
+            return annot
+
+        # --------------------------------------------------------------------------
+
+        # validate & normalize parameters
+        if from_page < 0:
+            fp = 0
+        elif from_page >= doc2.page_count:
+            fp = doc2.page_count - 1
+        else:
+            fp = from_page
+
+        if to_page < 0 or to_page >= doc2.page_count:
+            tp = doc2.page_count - 1
+        else:
+            tp = to_page
+
+        if start_at < 0:
+            raise ValueError("'start_at' must be >= 0")
+        sa = start_at
+
+        incr = 1 if fp <= tp else -1  # page range could be reversed
+
+        # lists of source / destination page numbers
+        pno_src = list(range(fp, tp + incr, incr))
+        pno_dst = [sa + i for i in range(len(pno_src))]
+
+        # lists of source / destination page xrefs
+        xref_src = []
+        xref_dst = []
+        for i in range(len(pno_src)):
+            p_src = pno_src[i]
+            p_dst = pno_dst[i]
+            old_xref = doc2.page_xref(p_src)
+            new_xref = doc1.page_xref(p_dst)
+            xref_src.append(old_xref)
+            xref_dst.append(new_xref)
+
+        # create the links for each copied page in destination PDF
+        for i in range(len(xref_src)):
+            page_src = doc2[pno_src[i]]  # load source page
+            links = page_src.get_links()  # get all its links
+            #log( '{pno_src=}')
+            #log( '{type(page_src)=}')
+            #log( '{page_src=}')
+            #log( '{=i len(links)}')
+            if len(links) == 0:  # no links there
+                page_src = None
+                continue
+            ctm = ~page_src.transformation_matrix  # calc page transformation matrix
+            page_dst = doc1[pno_dst[i]]  # load destination page
+            link_tab = []  # store all link definitions here
+            for l in links:
+                if l["kind"] == LINK_GOTO and (l["page"] not in pno_src):
+                    continue  # GOTO link target not in copied pages
+                annot_text = cre_annot(l, xref_dst, pno_src, ctm)
+                if annot_text:
+                    link_tab.append(annot_text)
+            if link_tab != []:
+                page_dst._addAnnot_FromString( tuple(link_tab))
+        #log( 'utils.do_links() returning.')
+
+    def _do_widgets(
+            tar: 'Document',
+            src: 'Document',
+            graftmap,
+            from_page: int = -1,
+            to_page: int = -1,
+            start_at: int = -1,
+            join_duplicates=0,
+            ) -> None:
+        """Insert widgets of copied page range into target PDF.
+
+        Parameter values **must** equal those of method insert_pdf() which
+        must have been previously executed.
+        """
+        if not src.is_form_pdf:  # nothing to do: source PDF has no fields
+            return
+
+        def clean_kid_parents(acro_fields):
+            """ Make sure all kids have correct "Parent" pointers."""
+            for i in range(acro_fields.pdf_array_len()):
+                parent = acro_fields.pdf_array_get(i)
+                kids = parent.pdf_dict_get(PDF_NAME("Kids"))
+                for j in range(kids.pdf_array_len()):
+                    kid = kids.pdf_array_get(j)
+                    kid.pdf_dict_put(PDF_NAME("Parent"), parent)
+
+        def join_widgets(pdf, acro_fields, xref1, xref2, name):
+            """Called for each pair of widgets having the same name.
+
+            Args:
+                pdf: target MuPDF document
+                acro_fields: object Root/AcroForm/Fields
+                xref1, xref2: widget xrefs having same names
+                name: (str) the name
+
+            Result:
+                Defined or updated widget parent that points to both widgets.
+            """
+
+            def re_target(pdf, acro_fields, xref1, kids1, xref2, kids2):
+                """Merge widget in xref2 into "Kids" list of widget xref1.
+
+                Args:
+                    xref1, kids1: target widget and its "Kids" array.
+                    xref2, kids2: source wwidget and its "Kids" array (may be empty).
+                """
+                # make indirect objects from widgets
+                w1_ind = mupdf.pdf_new_indirect(pdf, xref1, 0)
+                w2_ind = mupdf.pdf_new_indirect(pdf, xref2, 0)
+                # find source widget in "Fields" array
+                idx = acro_fields.pdf_array_find(w2_ind)
+                acro_fields.pdf_array_delete(idx)
+
+                if not kids2.pdf_is_array():  # source widget has no kids
+                    widget = mupdf.pdf_load_object(pdf, xref2)
+
+                    # delete name from widget and insert target as parent
+                    widget.pdf_dict_del(PDF_NAME("T"))
+                    widget.pdf_dict_put(PDF_NAME("Parent"), w1_ind)
+
+                    # put in target Kids
+                    kids1.pdf_array_push(w2_ind)
+                else:  # copy source kids to target kids
+                    for i in range(kids2.pdf_array_len()):
+                        kid = kids2.pdf_array_get(i)
+                        kid.pdf_dict_put(PDF_NAME("Parent"), w1_ind)
+                        kid_ind = mupdf.pdf_new_indirect(pdf, kid.pdf_to_num(), 0)
+                        kids1.pdf_array_push(kid_ind)
+
+            def new_target(pdf, acro_fields, xref1, w1, xref2, w2, name):
+                """Make new "Parent" for two widgets with same name.
+
+                Args:
+                    xref1, w1: first widget
+                    xref2, w2: second widget
+                    name: field name
+
+                Result:
+                    Both widgets have no "Kids". We create a new object with the
+                    name and a "Kids" array containing the widgets.
+                    Original widgets must be removed from AcroForm/Fields.
+                """
+                # make new "Parent" object
+                new = mupdf.pdf_new_dict(pdf, 5)
+                new.pdf_dict_put_text_string(PDF_NAME("T"), name)
+                kids = new.pdf_dict_put_array(PDF_NAME("Kids"), 2)
+                new_obj = mupdf.pdf_add_object(pdf, new)
+                new_obj_xref = new_obj.pdf_to_num()
+                new_ind = mupdf.pdf_new_indirect(pdf, new_obj_xref, 0)
+
+                # copy over some required source widget properties
+                ft = w1.pdf_dict_get(PDF_NAME("FT"))
+                w1.pdf_dict_del(PDF_NAME("FT"))
+                new_obj.pdf_dict_put(PDF_NAME("FT"), ft)
+
+                aa = w1.pdf_dict_get(PDF_NAME("AA"))
+                w1.pdf_dict_del(PDF_NAME("AA"))
+                new_obj.pdf_dict_put(PDF_NAME("AA"), aa)
+
+                # remove name field, insert "Parent" field in source widgets
+                w1.pdf_dict_del(PDF_NAME("T"))
+                w1.pdf_dict_put(PDF_NAME("Parent"), new_ind)
+                w2.pdf_dict_del(PDF_NAME("T"))
+                w2.pdf_dict_put(PDF_NAME("Parent"), new_ind)
+
+                # put source widgets in "kids" array
+                ind1 = mupdf.pdf_new_indirect(pdf, xref1, 0)
+                ind2 = mupdf.pdf_new_indirect(pdf, xref2, 0)
+                kids.pdf_array_push(ind1)
+                kids.pdf_array_push(ind2)
+
+                # remove source widgets from "AcroForm/Fields"
+                idx = acro_fields.pdf_array_find(ind1)
+                acro_fields.pdf_array_delete(idx)
+                idx = acro_fields.pdf_array_find(ind2)
+                acro_fields.pdf_array_delete(idx)
+
+                acro_fields.pdf_array_push(new_ind)
+
+            w1 = mupdf.pdf_load_object(pdf, xref1)
+            w2 = mupdf.pdf_load_object(pdf, xref2)
+            kids1 = w1.pdf_dict_get(PDF_NAME("Kids"))
+            kids2 = w2.pdf_dict_get(PDF_NAME("Kids"))
+
+            # check which widget has a suitable "Kids" array
+            if kids1.pdf_is_array():
+                re_target(pdf, acro_fields, xref1, kids1, xref2, kids2)  # pylint: disable=arguments-out-of-order
+            elif kids2.pdf_is_array():
+                re_target(pdf, acro_fields, xref2, kids2, xref1, kids1)  # pylint: disable=arguments-out-of-order
+            else:
+                new_target(pdf, acro_fields, xref1, w1, xref2, w2, name)  # pylint: disable=arguments-out-of-order
+
+        def get_kids(parent, kids_list):
+            """Return xref list of leaf kids for a parent.
+
+            Call with an empty list.
+            """
+            kids = mupdf.pdf_dict_get(parent, PDF_NAME("Kids"))
+            if not kids.pdf_is_array():
+                return kids_list
+            for i in range(kids.pdf_array_len()):
+                kid = kids.pdf_array_get(i)
+                if mupdf.pdf_is_dict(mupdf.pdf_dict_get(kid, PDF_NAME("Kids"))):
+                    kids_list = get_kids(kid, kids_list)
+                else:
+                    kids_list.append(kid.pdf_to_num())
+            return kids_list
+
+        def kids_xrefs(widget):
+            """Get the xref of top "Parent" and the list of leaf widgets."""
+            kids_list = []
+            parent = mupdf.pdf_dict_get(widget, PDF_NAME("Parent"))
+            parent_xref = parent.pdf_to_num()
+            if parent_xref == 0:
+                return parent_xref, kids_list
+            kids_list = get_kids(parent, kids_list)
+            return parent_xref, kids_list
+
+        def deduplicate_names(pdf, acro_fields, join_duplicates=False):
+            """Handle any widget name duplicates caused by the merge."""
+            names = {}  # key is a widget name, value a list of widgets having it.
+
+            # extract all names and widgets in "AcroForm/Fields"
+            for i in range(mupdf.pdf_array_len(acro_fields)):
+                wobject = mupdf.pdf_array_get(acro_fields, i)
+                xref = wobject.pdf_to_num()
+
+                # extract widget name and collect widget(s) using it
+                T = mupdf.pdf_dict_get_text_string(wobject, PDF_NAME("T"))
+                xrefs = names.get(T, [])
+                xrefs.append(xref)
+                names[T] = xrefs
+
+            for name, xrefs in names.items():
+                if len(xrefs) < 2:
+                    continue
+                xref0, xref1 = xrefs[:2]  # only exactly 2 should occur!
+                if join_duplicates:  # combine fields with equal names
+                    join_widgets(pdf, acro_fields, xref0, xref1, name)
+                else:  # make field names unique
+                    newname = name + f" [{xref1}]"  # append this to the name
+                    wobject = mupdf.pdf_load_object(pdf, xref1)
+                    wobject.pdf_dict_put_text_string(PDF_NAME("T"), newname)
+
+            clean_kid_parents(acro_fields)
+
+        def get_acroform(doc):
+            """Retrieve the AcroForm dictionary form a PDF."""
+            pdf = mupdf.pdf_document_from_fz_document(doc)
+            # AcroForm (= central form field info)
+            return mupdf.pdf_dict_getp(mupdf.pdf_trailer(pdf), "Root/AcroForm")
+
+        tarpdf = mupdf.pdf_document_from_fz_document(tar)
+        srcpdf = mupdf.pdf_document_from_fz_document(src)
+
+        if tar.is_form_pdf:
+            # target is a Form PDF, so use it to include source fields
+            acro = get_acroform(tar)
+            # Important arrays in AcroForm
+            acro_fields = acro.pdf_dict_get(PDF_NAME("Fields"))
+            tar_co = acro.pdf_dict_get(PDF_NAME("CO"))
+            if not tar_co.pdf_is_array():
+                tar_co = acro.pdf_dict_put_array(PDF_NAME("CO"), 5)
+        else:
+            # target is no Form PDF, so copy over source AcroForm
+            acro = mupdf.pdf_deep_copy_obj(get_acroform(src))  # make a copy
+
+            # Clear "Fields" and "CO" arrays: will be populated by page fields.
+            # This is required to avoid copying unneeded objects.
+            acro.pdf_dict_del(PDF_NAME("Fields"))
+            acro.pdf_dict_put_array(PDF_NAME("Fields"), 5)
+            acro.pdf_dict_del(PDF_NAME("CO"))
+            acro.pdf_dict_put_array(PDF_NAME("CO"), 5)
+
+            # Enrich AcroForm for copying to target
+            acro_graft = mupdf.pdf_graft_mapped_object(graftmap, acro)
+
+            # Insert AcroForm into target PDF
+            acro_tar = mupdf.pdf_add_object(tarpdf, acro_graft)
+            acro_fields = acro_tar.pdf_dict_get(PDF_NAME("Fields"))
+            tar_co = acro_tar.pdf_dict_get(PDF_NAME("CO"))
+
+            # get its xref and insert it into target catalog
+            tar_xref = acro_tar.pdf_to_num()
+            acro_tar_ind = mupdf.pdf_new_indirect(tarpdf, tar_xref, 0)
+            root = mupdf.pdf_dict_get(mupdf.pdf_trailer(tarpdf), PDF_NAME("Root"))
+            root.pdf_dict_put(PDF_NAME("AcroForm"), acro_tar_ind)
+
+        if from_page <= to_page:
+            src_range = range(from_page, to_page + 1)
+        else:
+            src_range = range(from_page, to_page - 1, -1)
+
+        parents = {}  # information about widget parents
+
+        # remove "P" owning page reference from all widgets of all source pages
+        for i in src_range:
+            src_page = src[i]
+            for xref in [
+                xref
+                for xref, wtype, _ in src_page.annot_xrefs()
+                if wtype == mupdf.PDF_ANNOT_WIDGET  # pylint: disable=no-member
+            ]:
+                w_obj = mupdf.pdf_load_object(srcpdf, xref)
+                w_obj.pdf_dict_del(PDF_NAME("P"))
+
+                # get the widget's parent structure
+                parent_xref, old_kids = kids_xrefs(w_obj)
+                if parent_xref:
+                    parents[parent_xref] = {
+                        "new_xref": 0,
+                        "old_kids": old_kids,
+                        "new_kids": [],
+                    }
+        # Copy over Parent widgets first - they are not page-dependent
+        for xref in parents.keys():  # pylint: disable=consider-using-dict-items
+            parent = mupdf.pdf_load_object(srcpdf, xref)
+            parent_graft = mupdf.pdf_graft_mapped_object(graftmap, parent)
+            parent_tar = mupdf.pdf_add_object(tarpdf, parent_graft)
+            kids_xrefs_new = get_kids(parent_tar, [])
+            parent_xref_new = parent_tar.pdf_to_num()
+            parent_ind = mupdf.pdf_new_indirect(tarpdf, parent_xref_new, 0)
+            acro_fields.pdf_array_push(parent_ind)
+            parents[xref]["new_xref"] = parent_xref_new
+            parents[xref]["new_kids"] = kids_xrefs_new
+
+        for i in range(len(src_range)):
+            # read first copied over page in target
+            tar_page = tar[start_at + i]
+
+            # read the original page in the source PDF
+            src_page = src[src_range[i]]
+
+            # now walk through source page widgets and copy over
+            w_xrefs = [  # widget xrefs of the source page
+                xref
+                for xref, wtype, _ in src_page.annot_xrefs()
+                if wtype == mupdf.PDF_ANNOT_WIDGET  # pylint: disable=no-member
+            ]
+            if not w_xrefs:  # no widgets on this source page
+                continue
+
+            # convert to formal PDF page
+            tar_page_pdf = mupdf.pdf_page_from_fz_page(tar_page)
+
+            # extract annotations array
+            tar_annots = mupdf.pdf_dict_get(tar_page_pdf.obj(), PDF_NAME("Annots"))
+            if not mupdf.pdf_is_array(tar_annots):
+                tar_annots = mupdf.pdf_dict_put_array(
+                    tar_page_pdf.obj(), PDF_NAME("Annots"), 5
+                )
+
+            for xref in w_xrefs:
+                w_obj = mupdf.pdf_load_object(srcpdf, xref)
+
+                # check if field takes part in inter-field validations
+                is_aac = mupdf.pdf_is_dict(mupdf.pdf_dict_getp(w_obj, "AA/C"))
+
+                # check if parent of widget already in target
+                parent_xref = mupdf.pdf_to_num(
+                    w_obj.pdf_dict_get(PDF_NAME("Parent"))
+                )
+                if parent_xref == 0:  # parent not in target yet
+                    try:
+                        w_obj_graft = mupdf.pdf_graft_mapped_object(graftmap, w_obj)
+                    except Exception as e:
+                        message_warning(f"cannot copy widget at {xref=}: {e}")
+                        continue
+                    w_obj_tar = mupdf.pdf_add_object(tarpdf, w_obj_graft)
+                    tar_xref = w_obj_tar.pdf_to_num()
+                    w_obj_tar_ind = mupdf.pdf_new_indirect(tarpdf, tar_xref, 0)
+                    mupdf.pdf_array_push(tar_annots, w_obj_tar_ind)
+                    mupdf.pdf_array_push(acro_fields, w_obj_tar_ind)
+                else:
+                    parent = parents[parent_xref]
+                    idx = parent["old_kids"].index(xref)  # search for xref in parent
+                    tar_xref = parent["new_kids"][idx]
+                    w_obj_tar_ind = mupdf.pdf_new_indirect(tarpdf, tar_xref, 0)
+                    mupdf.pdf_array_push(tar_annots, w_obj_tar_ind)
+
+                # Into "AcroForm/CO" if a computation field.
+                if is_aac:
+                    mupdf.pdf_array_push(tar_co, w_obj_tar_ind)
+
+        deduplicate_names(tarpdf, acro_fields, join_duplicates=join_duplicates)
+
     def _embeddedFileGet(self, idx):
         pdf = _as_pdf_document(self)
         names = mupdf.pdf_dict_getl(
@@ -4267,6 +4728,107 @@
 
         self._reset_page_refs()
 
+    def get_char_widths(
+            doc: 'Document',
+            xref: int,
+            limit: int = 256,
+            idx: int = 0,
+            fontdict: OptDict = None,
+            ) -> list:
+        """Get list of glyph information of a font.
+
+        Notes:
+            Must be provided by its XREF number. If we already dealt with the
+            font, it will be recorded in doc.FontInfos. Otherwise we insert an
+            entry there.
+            Finally we return the glyphs for the font. This is a list of
+            (glyph, width) where glyph is an integer controlling the char
+            appearance, and width is a float controlling the char's spacing:
+            width * fontsize is the actual space.
+            For 'simple' fonts, glyph == ord(char) will usually be true.
+            Exceptions are 'Symbol' and 'ZapfDingbats'. We are providing data for these directly here.
+        """
+        fontinfo = CheckFontInfo(doc, xref)
+        if fontinfo is None:  # not recorded yet: create it
+            if fontdict is None:
+                name, ext, stype, asc, dsc = utils._get_font_properties(doc, xref)
+                fontdict = {
+                    "name": name,
+                    "type": stype,
+                    "ext": ext,
+                    "ascender": asc,
+                    "descender": dsc,
+                }
+            else:
+                name = fontdict["name"]
+                ext = fontdict["ext"]
+                stype = fontdict["type"]
+                ordering = fontdict["ordering"]
+                simple = fontdict["simple"]
+
+            if ext == "":
+                raise ValueError("xref is not a font")
+
+            # check for 'simple' fonts
+            if stype in ("Type1", "MMType1", "TrueType"):
+                simple = True
+            else:
+                simple = False
+
+            # check for CJK fonts
+            if name in ("Fangti", "Ming"):
+                ordering = 0
+            elif name in ("Heiti", "Song"):
+                ordering = 1
+            elif name in ("Gothic", "Mincho"):
+                ordering = 2
+            elif name in ("Dotum", "Batang"):
+                ordering = 3
+            else:
+                ordering = -1
+
+            fontdict["simple"] = simple
+
+            if name == "ZapfDingbats":
+                glyphs = zapf_glyphs
+            elif name == "Symbol":
+                glyphs = symbol_glyphs
+            else:
+                glyphs = None
+
+            fontdict["glyphs"] = glyphs
+            fontdict["ordering"] = ordering
+            fontinfo = [xref, fontdict]
+            doc.FontInfos.append(fontinfo)
+        else:
+            fontdict = fontinfo[1]
+            glyphs = fontdict["glyphs"]
+            simple = fontdict["simple"]
+            ordering = fontdict["ordering"]
+
+        if glyphs is None:
+            oldlimit = 0
+        else:
+            oldlimit = len(glyphs)
+
+        mylimit = max(256, limit)
+
+        if mylimit <= oldlimit:
+            return glyphs
+
+        if ordering < 0:  # not a CJK font
+            glyphs = doc._get_char_widths(
+                xref, fontdict["name"], fontdict["ext"], fontdict["ordering"], mylimit, idx
+            )
+        else:  # CJK fonts use char codes and width = 1
+            glyphs = None
+
+        fontdict["glyphs"] = glyphs
+        fontinfo[1] = fontdict
+        UpdateFontInfo(doc, fontinfo)
+
+        return glyphs
+
     def get_layer(self, config=-1):
         """Content of ON, OFF, RBGroups of an OC layer."""
         pdf = _as_pdf_document(self)
@@ -4324,6 +4886,23 @@
         xref = mupdf.pdf_create_object(pdf)
         return xref
 
+    def get_oc(doc: 'Document', xref: int) -> int:
+        """Return optional content object xref for an image or form xobject.
+
+        Args:
+            xref: (int) xref number of an image or form xobject.
+        """
+        if doc.is_closed or doc.is_encrypted:
+            raise ValueError("document close or encrypted")
+        t, name = doc.xref_get_key(xref, "Subtype")
+        if t != "name" or name not in ("/Image", "/Form"):
+            raise ValueError("bad object type at xref %i" % xref)
+        t, oc = doc.xref_get_key(xref, "OC")
+        if t != "xref":
+            return 0
+        rc = int(oc.replace("0 R", ""))
+        return rc
+    
     def get_ocgs(self):
         """Show existing optional content groups."""
         ci = mupdf.pdf_new_name( "CreatorInfo")
@@ -4356,7 +4935,11 @@
                         o = mupdf.pdf_array_get( intent, j)
                         if mupdf.pdf_is_name( o):
                             intents.append( mupdf.pdf_to_name( o))
-            hidden = mupdf.pdf_is_ocg_hidden( pdf, mupdf.PdfObj(), usage, ocg)
+            if mupdf_version_tuple >= (1, 27):
+                resource_stack = mupdf.PdfResourceStack()
+                hidden = mupdf.pdf_is_ocg_hidden( pdf, resource_stack, usage, ocg)
+            else:
+                hidden = mupdf.pdf_is_ocg_hidden( pdf, mupdf.PdfObj(), usage, ocg)
             item = {
                     "name": name,
                     "intent": intents,
@@ -4367,6 +4950,73 @@
             rc[ temp] = item
         return rc
 
+    def get_ocmd(doc: 'Document', xref: int) -> dict:
+        """Return the definition of an OCMD (optional content membership dictionary).
+
+        Recognizes PDF dict keys /OCGs (PDF array of OCGs), /P (policy string) and
+        /VE (visibility expression, PDF array). Via string manipulation, this
+        info is converted to a Python dictionary with keys "xref", "ocgs", "policy"
+        and "ve" - ready to recycle as input for 'set_ocmd()'.
+        """
+
+        if xref not in range(doc.xref_length()):
+            raise ValueError("bad xref")
+        text = doc.xref_object(xref, compressed=True)
+        if "/Type/OCMD" not in text:
+            raise ValueError("bad object type")
+        textlen = len(text)
+
+        p0 = text.find("/OCGs[")  # look for /OCGs key
+        p1 = text.find("]", p0)
+        if p0 < 0 or p1 < 0:  # no OCGs found
+            ocgs = None
+        else:
+            ocgs = text[p0 + 6 : p1].replace("0 R", " ").split()
+            ocgs = list(map(int, ocgs))
+
+        p0 = text.find("/P/")  # look for /P policy key
+        if p0 < 0:
+            policy = None
+        else:
+            p1 = text.find("ff", p0)
+            if p1 < 0:
+                p1 = text.find("on", p0)
+            if p1 < 0:  # some irregular syntax
+                raise ValueError("bad object at xref")
+            else:
+                policy = text[p0 + 3 : p1 + 2]
+
+        p0 = text.find("/VE[")  # look for /VE visibility expression key
+        if p0 < 0:  # no visibility expression found
+            ve = None
+        else:
+            lp = rp = 0  # find end of /VE by finding last ']'.
+            p1 = p0
+            while lp < 1 or lp != rp:
+                p1 += 1
+                if not p1 < textlen:  # some irregular syntax
+                    raise ValueError("bad object at xref")
+                if text[p1] == "[":
+                    lp += 1
+                if text[p1] == "]":
+                    rp += 1
+            # p1 now positioned at the last "]"
+            ve = text[p0 + 3 : p1 + 1]  # the PDF /VE array
+            ve = (
+                ve.replace("/And", '"and",')
+                .replace("/Not", '"not",')
+                .replace("/Or", '"or",')
+            )
+            ve = ve.replace(" 0 R]", "]").replace(" 0 R", ",").replace("][", "],[")
+            import json
+            try:
+                ve = json.loads(ve)
+            except Exception:
+                exception_info()
+                message(f"bad /VE key: {ve!r}")
+                raise
+        return {"xref": xref, "ocgs": ocgs, "policy": policy, "ve": ve}
+
     def get_outline_xrefs(self):
         """Get list of outline xref numbers."""
         xrefs = []
@@ -4415,6 +5065,98 @@
             return [v[:-1] for v in val]
         return val
 
+    def get_page_labels(self):
+        """Return page label definitions in PDF document.
+
+        Returns:
+            A list of dictionaries with the following format:
+            {'startpage': int, 'prefix': str, 'style': str, 'firstpagenum': int}.
+        """
+        # Jorj McKie, 2021-01-10
+        return [utils.rule_dict(item) for item in self._get_page_labels()]
+
+    def get_page_numbers(doc, label, only_one=False):
+        """Return a list of page numbers with the given label.
+
+        Args:
+            doc: PDF document object (resp. 'self').
+            label: (str) label.
+            only_one: (bool) stop searching after first hit.
+        Returns:
+            List of page numbers having this label.
+        """
+        # Jorj McKie, 2021-01-06
+
+        numbers = []
+        if not label:
+            return numbers
+        labels = doc._get_page_labels()
+        if labels == []:
+            return numbers
+        for i in range(doc.page_count):
+            plabel = utils.get_label_pno(i, labels)
+            if plabel == label:
+                numbers.append(i)
+                if only_one:
+                    break
+        return numbers
+    
+    def get_page_pixmap(
+            doc: 'Document',
+            pno: int,
+            *,
+            matrix: matrix_like = None,
+            dpi=None,
+            colorspace: Colorspace = None,
+            clip: rect_like = None,
+            alpha: bool = False,
+            annots: bool = True,
+            ) -> 'Pixmap':
+        """Create pixmap of document page by page number.
+
+        Notes:
+            Convenience function calling page.get_pixmap.
+        Args:
+            pno: (int) page number
+            matrix: pymupdf.Matrix for transformation (default: pymupdf.Identity).
+            colorspace: (str,pymupdf.Colorspace) rgb, rgb, gray - case ignored, default csRGB.
+            clip: (irect-like) restrict rendering to this area.
+            alpha: (bool) include alpha channel
+            annots: (bool) also render annotations
+        """
+        if matrix is None:
+            matrix = Identity
+        if colorspace is None:
+            colorspace = csRGB
+        return doc[pno].get_pixmap(
+                matrix=matrix,
+                dpi=dpi, colorspace=colorspace,
+                clip=clip,
+                alpha=alpha,
+                annots=annots
+                )
+    
+    def get_page_text(
+            doc: 'Document',
+            pno: int,
+            option: str = "text",
+            clip: rect_like = None,
+            flags: OptInt = None,
+            textpage: 'TextPage' = None,
+            sort: bool = False,
+            ) -> typing.Any:
+        """Extract a document page's text by page number.
+
+        Notes:
+            Convenience function calling page.get_text().
+        Args:
+            pno: page number
+            option: (str) text, words, blocks, html, dict, json, rawdict, xhtml or xml.
+        Returns:
+            output from page.TextPage().
+        """
+        return doc[pno].get_text(option, clip=clip, flags=flags, sort=sort)
+    
     def get_page_xobjects(self, pno: int) -> list:
         """Retrieve a list of XObjects used on a page.
         """
@@ -4441,6 +5183,60 @@
             sigflag = mupdf.pdf_to_int(sigflags)
         return sigflag
 
+    def get_toc(
+            doc: 'Document',
+            simple: bool = True,
+            ) -> list:
+        """Create a table of contents.
+
+        Args:
+            simple: a bool to control output. Returns a list, where each entry consists of outline level, title, page number and link destination (if simple = False). For details see PyMuPDF's documentation.
+        """
+        def recurse(olItem, liste, lvl):
+            """Recursively follow the outline item chain and record item information in a list."""
+            while olItem and olItem.this.m_internal:
+                if olItem.title:
+                    title = olItem.title
+                else:
+                    title = " "
+
+                if not olItem.is_external:
+                    if olItem.uri:
+                        if olItem.page == -1:
+                            resolve = doc.resolve_link(olItem.uri)
+                            page = resolve[0] + 1
+                        else:
+                            page = olItem.page + 1
+                    else:
+                        page = -1
+                else:
+                    page = -1
+
+                if not simple:
+                    link = utils.getLinkDict(olItem, doc)
+                    liste.append([lvl, title, page, link])
+                else:
+                    liste.append([lvl, title, page])
+
+                if olItem.down:
+                    liste = recurse(olItem.down, liste, lvl + 1)
+                olItem = olItem.next
+            return liste
+
+        # ensure document is open
+        if doc.is_closed:
+            raise ValueError("document closed")
+        doc.init_doc()
+        olItem = doc.outline
+        if not olItem:
+            return []
+        lvl = 1
+        liste = []
+        toc = recurse(olItem, liste, lvl)
+        if doc.is_pdf and not simple:
+            doc._extend_toc_items(toc)
+        return toc
+    
     def get_xml_metadata(self):
         """Get document XML metadata."""
         xml = None
@@ -4458,6 +5254,31 @@
             rc = ''
         return rc
 
+    def has_annots(doc: 'Document') -> bool:
+        """Check whether there are annotations on any page."""
+        if doc.is_closed:
+            raise ValueError("document closed")
+        if not doc.is_pdf:
+            raise ValueError("is no PDF")
+        for i in range(doc.page_count):
+            for item in doc.page_annot_xrefs(i):
+                # pylint: disable=no-member
+                if not (item[1] == mupdf.PDF_ANNOT_LINK or item[1] == mupdf.PDF_ANNOT_WIDGET):  # pylint: disable=no-member
+                    return True
+        return False
+    
+    def has_links(doc: 'Document') -> bool:
+        """Check whether there are links on any page."""
+        if doc.is_closed:
+            raise ValueError("document closed")
+        if not doc.is_pdf:
+            raise ValueError("is no PDF")
+        for i in range(doc.page_count):
+            for item in doc.page_annot_xrefs(i):
+                if item[1] == mupdf.PDF_ANNOT_LINK:  # pylint: disable=no-member
+                    return True
+        return False
+    
     def init_doc(self):
         if self.is_encrypted:
             raise ValueError("cannot initialize - document still encrypted")
@@ -4523,6 +5344,36 @@
                 final=final,
                 )
 
+    def insert_page(
+            doc: 'Document',
+            pno: int,
+            text: typing.Union[str, list, None] = None,
+            fontsize: float = 11,
+            width: float = 595,
+            height: float = 842,
+            fontname: str = "helv",
+            fontfile: OptStr = None,
+            color: OptSeq = (0,),
+            ) -> int:
+        """Create a new PDF page and insert some text.
+
+        Notes:
+            Function combining pymupdf.Document.new_page() and pymupdf.Page.insert_text().
+            For parameter details see these methods.
+        """
+        page = doc.new_page(pno=pno, width=width, height=height)
+        if not bool(text):
+            return 0
+        rc = page.insert_text(
+            (50, 72),
+            text,
+            fontsize=fontsize,
+            fontname=fontname,
+            fontfile=fontfile,
+            color=color,
+        )
+        return rc
+    
     def insert_pdf(
             self,
             docsrc,
@@ -5023,6 +5874,24 @@
         ret = mupdf.fz_needs_password( document)
         return ret
 
+    def new_page(
+            doc: 'Document',
+            pno: int = -1,
+            width: float = 595,
+            height: float = 842,
+            ) -> Page:
+        """Create and return a new page object.
+
+        Args:
+            pno: (int) insert before this page. Default: after last page.
+            width: (float) page width in points. Default: 595 (ISO A4 width).
+            height: (float) page height in points. Default 842 (ISO A4 height).
+        Returns:
+            A pymupdf.Page object.
+        """
+        doc._newPage(pno, width=width, height=height)
+        return doc[pno]
+    
     def next_location(self, page_id):
         """Get (chapter, page) of next page."""
         if self.is_closed or self.is_encrypted:
@@ -5669,6 +6538,201 @@
         """ Save PDF incrementally"""
         return self.save(self.name, incremental=True, encryption=mupdf.PDF_ENCRYPT_KEEP)
 
+    # ------------------------------------------------------------------------------
+    # Remove potentially sensitive data from a PDF. Similar to the Adobe
+    # Acrobat 'sanitize' function
+    # ------------------------------------------------------------------------------
+    def scrub(
+            doc: 'Document',
+            attached_files: bool = True,
+            clean_pages: bool = True,
+            embedded_files: bool = True,
+            hidden_text: bool = True,
+            javascript: bool = True,
+            metadata: bool = True,
+            redactions: bool = True,
+            redact_images: int = 0,
+            remove_links: bool = True,
+            reset_fields: bool = True,
+            reset_responses: bool = True,
+            thumbnails: bool = True,
+            xml_metadata: bool = True,
+            ) -> None:
+        
+        def remove_hidden(cont_lines):
+            """Remove hidden text from a PDF page.
+
+            Args:
+                cont_lines: list of lines with /Contents content. Should have status
+                    from after page.cleanContents().
+
+            Returns:
+                List of /Contents lines from which hidden text has been removed.
+
+            Notes:
+                The input must have been created after the page's /Contents object(s)
+                have been cleaned with page.cleanContents(). This ensures a standard
+                formatting: one command per line, single spaces between operators.
+                This allows for drastic simplification of this code.
+            """
+            out_lines = []  # will return this
+            in_text = False  # indicate if within BT/ET object
+            suppress = False  # indicate text suppression active
+            make_return = False
+            for line in cont_lines:
+                if line == b"BT":  # start of text object
+                    in_text = True  # switch on
+                    out_lines.append(line)  # output it
+                    continue
+                if line == b"ET":  # end of text object
+                    in_text = False  # switch off
+                    out_lines.append(line)  # output it
+                    continue
+                if line == b"3 Tr":  # text suppression operator
+                    suppress = True  # switch on
+                    make_return = True
+                    continue
+                if line[-2:] == b"Tr" and line[0] != b"3":
+                    suppress = False  # text rendering changed
+                    out_lines.append(line)
+                    continue
+                if line == b"Q":  # unstack command also switches off
+                    suppress = False
+                    out_lines.append(line)
+                    continue
+                if suppress and in_text:  # suppress hidden lines
+                    continue
+                out_lines.append(line)
+            if make_return:
+                return out_lines
+            else:
+                return None
+
+        if not doc.is_pdf:  # only works for PDF
+            raise ValueError("is no PDF")
+        if doc.is_encrypted or doc.is_closed:
+            raise ValueError("closed or encrypted doc")
+
+        if not clean_pages:
+            hidden_text = False
+            redactions = False
+
+        if metadata:
+            doc.set_metadata({})  # remove standard metadata
+
+        for page in doc:
+            if reset_fields:
+                # reset form fields (widgets)
+                for widget in page.widgets():
+                    widget.reset()
+
+            if remove_links:
+                links = page.get_links()  # list of all links on page
+                for link in links:  # remove all links
+                    page.delete_link(link)
+
+            found_redacts = False
+            for annot in page.annots():
+                if annot.type[0] == mupdf.PDF_ANNOT_FILE_ATTACHMENT and attached_files:
+                    annot.update_file(buffer_=b" ")  # set file content to empty
+                if reset_responses:
+                    annot.delete_responses()
+                if annot.type[0] == mupdf.PDF_ANNOT_REDACT:  # pylint: disable=no-member
+                    found_redacts = True
+
+            if redactions and found_redacts:
+                page.apply_redactions(images=redact_images)
+
+            if not (clean_pages or hidden_text):
+                continue  # done with the page
+
+            page.clean_contents()
+            if not page.get_contents():
+                continue
+            if hidden_text:
+                xrefs = page.get_contents()
+                assert len(xrefs) == 1  # only one because of cleaning.
+                xref = xrefs[0]
+                cont = doc.xref_stream(xref)
+                cont_lines = remove_hidden(cont.splitlines())  # remove hidden text
+                if cont_lines:  # something was actually removed
+                    cont = b"\n".join(cont_lines)
+                    doc.update_stream(xref, cont)  # rewrite the page /Contents
+
+            if thumbnails:  # remove page thumbnails?
+                if doc.xref_get_key(page.xref, "Thumb")[0] != "null":
+                    doc.xref_set_key(page.xref, "Thumb", "null")
+
+        # pages are scrubbed, now perform document-wide scrubbing
+        # remove embedded files
+        if embedded_files:
+            for name in doc.embfile_names():
+                doc.embfile_del(name)
+
+        if xml_metadata:
+            doc.del_xml_metadata()
+        if not (xml_metadata or javascript):
+            xref_limit = 0
+        else:
+            xref_limit = doc.xref_length()
+        for xref in range(1, xref_limit):
+            if not doc.xref_object(xref):
+                msg = "bad xref %i - clean PDF before scrubbing" % xref
+                raise ValueError(msg)
+            if javascript and doc.xref_get_key(xref, "S")[1] == "/JavaScript":
+                # a /JavaScript action object
+                obj = "<</S/JavaScript/JS()>>"  # replace with a null JavaScript
+                doc.update_object(xref, obj)  # update this object
+                continue  # no further handling
+
+            if not xml_metadata:
+                continue
+
+            if doc.xref_get_key(xref, "Type")[1] == "/Metadata":
+                # delete any metadata object directly
+                doc.update_object(xref, "<<>>")
+                doc.update_stream(xref, b"deleted", new=True)
+                continue
+
+            if doc.xref_get_key(xref, "Metadata")[0] != "null":
+                doc.xref_set_key(xref, "Metadata", "null")
+    
+    def search_page_for(
+            doc: 'Document',
+            pno: int,
+            text: str,
+            quads: bool = False,
+            clip: rect_like = None,
+            flags: int = None,
+            textpage: 'TextPage' = None,
+            ) -> list:
+        """Search for a string on a page.
+
+        Args:
+            pno: page number
+            text: string to be searched for
+            clip: restrict search to this rectangle
+            quads: (bool) return quads instead of rectangles
+            flags: bit switches, default: join hyphened words
+            textpage: reuse a prepared textpage
+        Returns:
+            a list of rectangles or quads, each containing an occurrence.
+        """
+        if flags is None:
+            flags = (0
+                    | TEXT_DEHYPHENATE
+                    | TEXT_PRESERVE_LIGATURES
+                    | TEXT_PRESERVE_WHITESPACE
+                    | TEXT_MEDIABOX_CLIP
+                    )
+        return doc[pno].search_for(
+            text,
+            quads=quads,
+            clip=clip,
+            flags=flags,
+            textpage=textpage,
+        )
+    
     def select(self, pyliste):
         """Build sub-pdf with page numbers in the list."""
         if self.is_closed or self.is_encrypted:
@@ -5813,6 +6877,162 @@
         self.xref_set_key(xref, "MarkInfo", pdfdict)
         return True
 
+    def set_metadata(doc: 'Document', m: dict = None) -> None:
+        """Update the PDF /Info object.
+
+        Args:
+            m: a dictionary like doc.metadata.
+        """
+        if not doc.is_pdf:
+            raise ValueError("is no PDF")
+        if doc.is_closed or doc.is_encrypted:
+            raise ValueError("document closed or encrypted")
+        if m is None:
+            m = {}
+        elif type(m) is not dict:
+            raise ValueError("bad metadata")
+        keymap = {
+            "author": "Author",
+            "producer": "Producer",
+            "creator": "Creator",
+            "title": "Title",
+            "format": None,
+            "encryption": None,
+            "creationDate": "CreationDate",
+            "modDate": "ModDate",
+            "subject": "Subject",
+            "keywords": "Keywords",
+            "trapped": "Trapped",
+        }
+        valid_keys = set(keymap.keys())
+        diff_set = set(m.keys()).difference(valid_keys)
+        if diff_set != set():
+            msg = "bad dict key(s): %s" % diff_set
+            raise ValueError(msg)
+
+        t, temp = doc.xref_get_key(-1, "Info")
+        if t != "xref":
+            info_xref = 0
+        else:
+            info_xref = int(temp.replace("0 R", ""))
+
+        if m == {} and info_xref == 0:  # nothing to do
+            return
+
+        if info_xref == 0:  # no prev metadata: get new xref
+            info_xref = doc.get_new_xref()
+            doc.update_object(info_xref, "<<>>")  # fill it with empty object
+            doc.xref_set_key(-1, "Info", "%i 0 R" % info_xref)
+        elif m == {}:  # remove existing metadata
+            doc.xref_set_key(-1, "Info", "null")
+            doc.init_doc()
+            return
+
+        for key, val in [(k, v) for k, v in m.items() if keymap[k] is not None]:
+            pdf_key = keymap[key]
+            if not bool(val) or val in ("none", "null"):
+                val = "null"
+            else:
+                val = get_pdf_str(val)
+            doc.xref_set_key(info_xref, pdf_key, val)
+        doc.init_doc()
+        return
+
+    def set_oc(doc: 'Document', xref: int, oc: int) -> None:
+        """Attach optional content object to image or form xobject.
+
+        Args:
+            xref: (int) xref number of an image or form xobject
+            oc: (int) xref number of an OCG or OCMD
+        """
+        if doc.is_closed or doc.is_encrypted:
+            raise ValueError("document close or encrypted")
+        t, name = doc.xref_get_key(xref, "Subtype")
+        if t != "name" or name not in ("/Image", "/Form"):
+            raise ValueError("bad object type at xref %i" % xref)
+        if oc > 0:
+            t, name = doc.xref_get_key(oc, "Type")
+            if t != "name" or name not in ("/OCG", "/OCMD"):
+                raise ValueError("bad object type at xref %i" % oc)
+        if oc == 0 and "OC" in doc.xref_get_keys(xref):
+            doc.xref_set_key(xref, "OC", "null")
+            return None
+        doc.xref_set_key(xref, "OC", "%i 0 R" % oc)
+        return None
+
+    def set_ocmd(
+            doc: 'Document',
+            xref: int = 0,
+            ocgs: typing.Union[list, None] = None,
+            policy: OptStr = None,
+            ve: typing.Union[list, None] = None,
+            ) -> int:
+        """Create or update an OCMD object in a PDF document.
+
+        Args:
+            xref: (int) 0 for creating a new object, otherwise update existing one.
+            ocgs: (list) OCG xref numbers, which shall be subject to 'policy'.
+            policy: one of 'AllOn', 'AllOff', 'AnyOn', 'AnyOff' (any casing).
+            ve: (list) visibility expression. Use instead of 'ocgs' with 'policy'.
+
+        Returns:
+            Xref of the created or updated OCMD.
+        """
+
+        all_ocgs = set(doc.get_ocgs().keys())
+
+        def ve_maker(ve):
+            if type(ve) not in (list, tuple) or len(ve) < 2:
+                raise ValueError("bad 've' format: %s" % ve)
+            if ve[0].lower() not in ("and", "or", "not"):
+                raise ValueError("bad operand: %s" % ve[0])
+            if ve[0].lower() == "not" and len(ve) != 2:
+                raise ValueError("bad 've' format: %s" % ve)
+            item = "[/%s" % ve[0].title()
+            for x in ve[1:]:
+                if type(x) is int:
+                    if x not in all_ocgs:
+                        raise ValueError("bad OCG %i" % x)
+                    item += " %i 0 R" % x
+                else:
+                    item += " %s" % ve_maker(x)
+            item += "]"
+            return item
+
+        text = "<</Type/OCMD"
+
+        if ocgs and type(ocgs) in (list, tuple):  # some OCGs are provided
+            s = set(ocgs).difference(all_ocgs)  # contains illegal xrefs
+            if s != set():
+                msg = "bad OCGs: %s" % s
+                raise ValueError(msg)
+            text += "/OCGs[" + " ".join(map(lambda x: "%i 0 R" % x, ocgs)) + "]"
+
+        if policy:
+            policy = str(policy).lower()
+            pols = {
+                "anyon": "AnyOn",
+                "allon": "AllOn",
+                "anyoff": "AnyOff",
+                "alloff": "AllOff",
+            }
+            if policy not in ("anyon", "allon", "anyoff", "alloff"):
+                raise ValueError("bad policy: %s" % policy)
+            text += "/P/%s" % pols[policy]
+
+        if ve:
+            text += "/VE%s" % ve_maker(ve)
+
+        text += ">>"
+
+        # make new object or replace old OCMD (check type first)
+        if xref == 0:
+            xref = doc.get_new_xref()
+        elif "/Type/OCMD" not in doc.xref_object(xref, compressed=True):
+            raise ValueError("bad xref or not an OCMD")
+        doc.update_object(xref, text)
+        return xref
+
     def set_pagelayout(self, pagelayout: str):
         """Set the PDF PageLayout value."""
         valid = ("SinglePage", "OneColumn", "TwoColumnLeft", "TwoColumnRight", "TwoPageLeft", "TwoPageRight")
@@ -5845,6 +7065,349 @@
                 return True
         raise ValueError("bad PageMode value")
 
+    def set_page_labels(doc, labels):
+        """Add / replace page label definitions in PDF document.
+
+        Args:
+            doc: PDF document (resp. 'self').
+            labels: list of label dictionaries like:
+            {'startpage': int, 'prefix': str, 'style': str, 'firstpagenum': int},
+            as returned by get_page_labels().
+        """
+        # William Chapman, 2021-01-06
+
+        def create_label_str(label):
+            """Convert Python label dict to corresponding PDF rule string.
+
+            Args:
+                label: (dict) build rule for the label.
+            Returns:
+                PDF label rule string wrapped in "<<", ">>".
+            """
+            s = "%i<<" % label["startpage"]
+            if label.get("prefix", "") != "":
+                s += "/P(%s)" % label["prefix"]
+            if label.get("style", "") != "":
+                s += "/S/%s" % label["style"]
+            if label.get("firstpagenum", 1) > 1:
+                s += "/St %i" % label["firstpagenum"]
+            s += ">>"
+            return s
+
+        def create_nums(labels):
+            """Return concatenated string of all labels rules.
+
+            Args:
+                labels: (list) dictionaries as created by function 'rule_dict'.
+            Returns:
+                PDF compatible string for page label definitions, ready to be
+                enclosed in PDF array 'Nums[...]'.
+            """
+            labels.sort(key=lambda x: x["startpage"])
+            s = "".join([create_label_str(label) for label in labels])
+            return s
+
+        doc._set_page_labels(create_nums(labels))
+
+    def set_toc(
+            doc: 'Document',
+            toc: list,
+            collapse: int = 1,
+            ) -> int:
+        """Create new outline tree (table of contents, TOC).
+
+        Args:
+            toc: (list, tuple) each entry must contain level, title, page and
+                optionally top margin on the page. None or '()' remove the TOC.
+            collapse: (int) collapses entries beyond this level. Zero or None
+                shows all entries unfolded.
+        Returns:
+            the number of inserted items, or the number of removed items respectively.
+        """
+        if doc.is_closed or doc.is_encrypted:
+            raise ValueError("document closed or encrypted")
+        if not doc.is_pdf:
+            raise ValueError("is no PDF")
+        if not toc:  # remove all entries
+            return len(doc._delToC())
+
+        # validity checks --------------------------------------------------------
+        if type(toc) not in (list, tuple):
+            raise ValueError("'toc' must be list or tuple")
+        toclen = len(toc)
+        page_count = doc.page_count
+        t0 = toc[0]
+        if type(t0) not in (list, tuple):
+            raise ValueError("items must be sequences of 3 or 4 items")
+        if t0[0] != 1:
+            raise ValueError("hierarchy level of item 0 must be 1")
+        for i in list(range(toclen - 1)):
+            t1 = toc[i]
+            t2 = toc[i + 1]
+            if not -1 <= t1[2] <= page_count:
+                raise ValueError("row %i: page number out of range" % i)
+            if (type(t2) not in (list, tuple)) or len(t2) not in (3, 4):
+                raise ValueError("bad row %i" % (i + 1))
+            if (type(t2[0]) is not int) or t2[0] < 1:
+                raise ValueError("bad hierarchy level in row %i" % (i + 1))
+            if t2[0] > t1[0] + 1:
+                raise ValueError("bad hierarchy level in row %i" % (i + 1))
+        # no formal errors in toc --------------------------------------------------
+
+        # --------------------------------------------------------------------------
+        # make a list of xref numbers, which we can use for our TOC entries
+        # --------------------------------------------------------------------------
+        old_xrefs = doc._delToC()  # del old outlines, get their xref numbers
+
+        # prepare table of xrefs for new bookmarks
+        old_xrefs = []
+        xref = [0] + old_xrefs
+        xref[0] = doc._getOLRootNumber()  # entry zero is outline root xref number
+        if toclen > len(old_xrefs):  # too few old xrefs?
+            for i in range((toclen - len(old_xrefs))):
+                xref.append(doc.get_new_xref())  # acquire new ones
+
+        lvltab = {0: 0}  # to store last entry per hierarchy level
+
+        # ------------------------------------------------------------------------------
+        # contains new outline objects as strings - first one is the outline root
+        # ------------------------------------------------------------------------------
+        olitems = [{"count": 0, "first": -1, "last": -1, "xref": xref[0]}]
+        # ------------------------------------------------------------------------------
+        # build olitems as a list of PDF-like connected dictionaries
+        # ------------------------------------------------------------------------------
+        for i in range(toclen):
+            o = toc[i]
+            lvl = o[0]  # level
+            title = get_pdf_str(o[1])  # title
+            pno = min(doc.page_count - 1, max(0, o[2] - 1))  # page number
+            page_xref = doc.page_xref(pno)
+            page_height = doc.page_cropbox(pno).height
+            top = Point(72, page_height - 36)
+            dest_dict = {"to": top, "kind": LINK_GOTO}  # fall back target
+            if o[2] < 0:
+                dest_dict["kind"] = LINK_NONE
+            if len(o) > 3:  # some target is specified
+                if type(o[3]) in (int, float):  # convert a number to a point
+                    dest_dict["to"] = Point(72, page_height - o[3])
+                else:  # if something else, make sure we have a dict
+                    # We make a copy of o[3] to avoid modifying our caller's data.
+                    dest_dict = o[3].copy() if type(o[3]) is dict else dest_dict
+                    if "to" not in dest_dict:  # target point not in dict?
+                        dest_dict["to"] = top  # put default in
+                    else:  # transform target to PDF coordinates
+                        page = doc[pno]
+                        point = Point(dest_dict["to"])
+                        point.y = page.cropbox.height - point.y
+                        point = point * page.rotation_matrix
+                        dest_dict["to"] = (point.x, point.y)
+            d = {}
+            d["first"] = -1
+            d["count"] = 0
+            d["last"] = -1
+            d["prev"] = -1
+            d["next"] = -1
+            d["dest"] = utils.getDestStr(page_xref, dest_dict)
+            d["top"] = dest_dict["to"]
+            d["title"] = title
+            d["parent"] = lvltab[lvl - 1]
+            d["xref"] = xref[i + 1]
+            d["color"] = dest_dict.get("color")
+            d["flags"] = dest_dict.get("italic", 0) + 2 * dest_dict.get("bold", 0)
+            lvltab[lvl] = i + 1
+            parent = olitems[lvltab[lvl - 1]]  # the parent entry
+
+            if (
+                dest_dict.get("collapse") or collapse and lvl > collapse
+            ):  # suppress expansion
+                parent["count"] -= 1  # make /Count negative
+            else:
+                parent["count"] += 1  # positive /Count
+
+            if parent["first"] == -1:
+                parent["first"] = i + 1
+                parent["last"] = i + 1
+            else:
+                d["prev"] = parent["last"]
+                prev = olitems[parent["last"]]
+                prev["next"] = i + 1
+                parent["last"] = i + 1
+            olitems.append(d)
+
+        # ------------------------------------------------------------------------------
+        # now create each outline item as a string and insert it in the PDF
+        # ------------------------------------------------------------------------------
+        for i, ol in enumerate(olitems):
+            txt = "<<"
+            if ol["count"] != 0:
+                txt += "/Count %i" % ol["count"]
+            try:
+                txt += ol["dest"]
+            except Exception:
+                # Verbose in PyMuPDF/tests.
+                if g_exceptions_verbose >= 2:   exception_info()
+                pass
+            try:
+                if ol["first"] > -1:
+                    txt += "/First %i 0 R" % xref[ol["first"]]
+            except Exception:
+                if g_exceptions_verbose >= 2:   exception_info()
+                pass
+            try:
+                if ol["last"] > -1:
+                    txt += "/Last %i 0 R" % xref[ol["last"]]
+            except Exception:
+                if g_exceptions_verbose >= 2:   exception_info()
+                pass
+            try:
+                if ol["next"] > -1:
+                    txt += "/Next %i 0 R" % xref[ol["next"]]
+            except Exception:
+                # Verbose in PyMuPDF/tests.
+                if g_exceptions_verbose >= 2:   exception_info()
+                pass
+            try:
+                if ol["parent"] > -1:
+                    txt += "/Parent %i 0 R" % xref[ol["parent"]]
+            except Exception:
+                # Verbose in PyMuPDF/tests.
+                if g_exceptions_verbose >= 2:   exception_info()
+                pass
+            try:
+                if ol["prev"] > -1:
+                    txt += "/Prev %i 0 R" % xref[ol["prev"]]
+            except Exception:
+                # Verbose in PyMuPDF/tests.
+                if g_exceptions_verbose >= 2:   exception_info()
+                pass
+            try:
+                txt += "/Title" + ol["title"]
+            except Exception:
+                # Verbose in PyMuPDF/tests.
+                if g_exceptions_verbose >= 2:   exception_info()
+                pass
+
+            if ol.get("color") and len(ol["color"]) == 3:
+                txt += f"/C[ {_format_g(tuple(ol['color']))}]"
+            if ol.get("flags", 0) > 0:
+                txt += "/F %i" % ol["flags"]
+
+            if i == 0:  # special: this is the outline root
+                txt += "/Type/Outlines"  # so add the /Type entry
+            txt += ">>"
+            doc.update_object(xref[i], txt)  # insert the PDF object
+
+        doc.init_doc()
+        return toclen
+
+    def set_toc_item(
+            doc: 'Document',
+            idx: int,
+            dest_dict: OptDict = None,
+            kind: OptInt = None,
+            pno: OptInt = None,
+            uri: OptStr = None,
+            title: OptStr = None,
+            to: point_like = None,
+            filename: OptStr = None,
+            zoom: float = 0,
+            ) -> None:
+        """Update TOC item by index.
+
+        It allows changing the item's title and link destination.
+
+        Args:
+            idx:
+                (int) desired index of the TOC list, as created by get_toc.
+            dest_dict:
+                (dict) destination dictionary as created by get_toc(False).
+                Outrules all other parameters. If None, the remaining parameters
+                are used to make a dest dictionary.
+            kind:
+                (int) kind of link (pymupdf.LINK_GOTO, etc.). If None, then only
+                the title will be updated. If pymupdf.LINK_NONE, the TOC item will
+                be deleted.
+            pno:
+                (int) page number (1-based like in get_toc). Required if
+                pymupdf.LINK_GOTO.
+            uri:
+                (str) the URL, required if pymupdf.LINK_URI.
+            title:
+                (str) the new title. No change if None.
+            to:
+                (point-like) destination on the target page. If omitted, (72, 36)
+                will be used as target coordinates.
+            filename:
+                (str) destination filename, required for pymupdf.LINK_GOTOR and
+                pymupdf.LINK_LAUNCH.
+            name:
+                (str) a destination name for pymupdf.LINK_NAMED.
+            zoom:
+                (float) a zoom factor for the target location (pymupdf.LINK_GOTO).
+        """
+        xref = doc.get_outline_xrefs()[idx]
+        page_xref = 0
+        if type(dest_dict) is dict:
+            if dest_dict["kind"] == LINK_GOTO:
+                pno = dest_dict["page"]
+                page_xref = doc.page_xref(pno)
+                page_height = doc.page_cropbox(pno).height
+                to = dest_dict.get('to', Point(72, 36))
+                to.y = page_height - to.y
+                dest_dict["to"] = to
+            action = utils.getDestStr(page_xref, dest_dict)
+            if not action.startswith("/A"):
+                raise ValueError("bad bookmark dest")
+            color = dest_dict.get("color")
+            if color:
+                color = list(map(float, color))
+                if len(color) != 3 or min(color) < 0 or max(color) > 1:
+                    raise ValueError("bad color value")
+            bold = dest_dict.get("bold", False)
+            italic = dest_dict.get("italic", False)
+            flags = italic + 2 * bold
+            collapse = dest_dict.get("collapse")
+            return doc._update_toc_item(
+                xref,
+                action=action[2:],
+                title=title,
+                color=color,
+                flags=flags,
+                collapse=collapse,
+            )
+
+        if kind == LINK_NONE:  # delete bookmark item
+            return doc.del_toc_item(idx)
+        if kind is None and title is None:  # treat as no-op
+            return None
+        if kind is None:  # only update title text
+            return doc._update_toc_item(xref, action=None, title=title)
+
+        if kind == LINK_GOTO:
+            if pno is None or pno not in range(1, doc.page_count + 1):
+                raise ValueError("bad page number")
+            page_xref = doc.page_xref(pno - 1)
+            page_height = doc.page_cropbox(pno - 1).height
+            if to is None:
+                to = Point(72, page_height - 36)
+            else:
+                to = Point(to)
+                to.y = page_height - to.y
+
+        ddict = {
+            "kind": kind,
+            "to": to,
+            "uri": uri,
+            "page": pno,
+            "file": filename,
+            "zoom": zoom,
+        }
+        action = utils.getDestStr(page_xref, ddict)
+        if action == "" or not action.startswith("/A"):
+            raise ValueError("bad bookmark dest")
+
+        return doc._update_toc_item(xref, action=action[2:], title=title)
+
     def set_xml_metadata(self, metadata):
         """Store XML document level metadata."""
         if self.is_closed or self.is_encrypted:
@@ -5863,6 +7426,318 @@
             mupdf.pdf_dict_put( xml, PDF_NAME('Subtype'), PDF_NAME('XML'))
             mupdf.pdf_dict_put( root, PDF_NAME('Metadata'), xml)
 
+    def subset_fonts(doc: 'Document', verbose: bool = False, fallback: bool = False) -> OptInt:
+        """Build font subsets in a PDF.
+
+        Eligible fonts are potentially replaced by smaller versions. Page text is
+        NOT rewritten and thus should retain properties like being hidden or
+        controlled by optional content.
+
+        This method by default uses MuPDF's own internal feature to create subset
+        fonts. As this is a new function, errors may still occur. In this case,
+        please fall back to using the previous version by using "fallback=True".
+        Fallback mode requires the external package 'fontTools'.
+
+        Args:
+            fallback: use the older deprecated implementation.
+            verbose: only used by fallback mode.
+
+        Returns:
+            The new MuPDF-based code returns None.  The deprecated fallback
+            mode returns 0 if there are no fonts to subset.  Otherwise, it
+            returns the decrease in fontsize (the difference in fontsize),
+            measured in bytes.
+        """
+        # Font binaries: -  "buffer" -> (names, xrefs, (unicodes, glyphs))
+        # An embedded font is uniquely defined by its fontbuffer only. It may have
+        # multiple names and xrefs.
+        # Once the sets of used unicodes and glyphs are known, we compute a
+        # smaller version of the buffer user package fontTools.
+
+        if not fallback:  # by default use MuPDF function
+            pdf = mupdf.pdf_document_from_fz_document(doc)
+            mupdf.pdf_subset_fonts2(pdf, list(range(doc.page_count)))
+            return
+
+        font_buffers = {}
+
+        def get_old_widths(xref):
+            """Retrieve old font '/W' and '/DW' values."""
+            df = doc.xref_get_key(xref, "DescendantFonts")
+            if df[0] != "array":  # only handle xref specifications
+                return None, None
+            df_xref = int(df[1][1:-1].replace("0 R", ""))
+            widths = doc.xref_get_key(df_xref, "W")
+            if widths[0] != "array":  # no widths key found
+                widths = None
+            else:
+                widths = widths[1]
+            dwidths = doc.xref_get_key(df_xref, "DW")
+            if dwidths[0] != "int":
+                dwidths = None
+            else:
+                dwidths = dwidths[1]
+            return widths, dwidths
+
+        def set_old_widths(xref, widths, dwidths):
+            """Restore the old '/W' and '/DW' in subsetted font.
+
+            If either parameter is None or evaluates to False, the corresponding
+            dictionary key will be set to null.
+            """
+            df = doc.xref_get_key(xref, "DescendantFonts")
+            if df[0] != "array":  # only handle xref specs
+                return None
+            df_xref = int(df[1][1:-1].replace("0 R", ""))
+            if (type(widths) is not str or not widths) and doc.xref_get_key(df_xref, "W")[
+                0
+            ] != "null":
+                doc.xref_set_key(df_xref, "W", "null")
+            else:
+                doc.xref_set_key(df_xref, "W", widths)
+            if (type(dwidths) is not str or not dwidths) and doc.xref_get_key(
+                df_xref, "DW"
+            )[0] != "null":
+                doc.xref_set_key(df_xref, "DW", "null")
+            else:
+                doc.xref_set_key(df_xref, "DW", dwidths)
+            return None
+
+        def set_subset_fontname(new_xref):
+            """Generate a name prefix to tag a font as subset.
+
+            We use a random generator to select 6 upper case ASCII characters.
+            The prefixed name must be put in the font xref as the "/BaseFont" value
+            and in the FontDescriptor object as the '/FontName' value.
+            """
+            # The following generates a prefix like 'ABCDEF+'
+            import random
+            import string
+            prefix = "".join(random.choices(tuple(string.ascii_uppercase), k=6)) + "+"
+            font_str = doc.xref_object(new_xref, compressed=True)
+            font_str = font_str.replace("/BaseFont/", "/BaseFont/" + prefix)
+            df = doc.xref_get_key(new_xref, "DescendantFonts")
+            if df[0] == "array":
+                df_xref = int(df[1][1:-1].replace("0 R", ""))
+                fd = doc.xref_get_key(df_xref, "FontDescriptor")
+                if fd[0] == "xref":
+                    fd_xref = int(fd[1].replace("0 R", ""))
+                    fd_str = doc.xref_object(fd_xref, compressed=True)
+                    fd_str = fd_str.replace("/FontName/", "/FontName/" + prefix)
+                    doc.update_object(fd_xref, fd_str)
+            doc.update_object(new_xref, font_str)
+
+        def build_subset(buffer, unc_set, gid_set):
+            """Build font subset using fontTools.
+
+            Args:
+                buffer: (bytes) the font given as a binary buffer.
+                unc_set: (set) required glyph ids.
+            Returns:
+                Either None if subsetting is unsuccessful or the subset font buffer.
+            """
+            try:
+                import fontTools.subset as fts
+            except ImportError:
+                if g_exceptions_verbose:    exception_info()
+                message("This method requires fontTools to be installed.")
+                raise
+            import tempfile
+            with tempfile.TemporaryDirectory() as tmp_dir:
+                oldfont_path = f"{tmp_dir}/oldfont.ttf"
+                newfont_path = f"{tmp_dir}/newfont.ttf"
+                uncfile_path = f"{tmp_dir}/uncfile.txt"
+                args = [
+                    oldfont_path,
+                    "--retain-gids",
+                    f"--output-file={newfont_path}",
+                    "--layout-features=*",
+                    "--passthrough-tables",
+                    "--ignore-missing-glyphs",
+                    "--ignore-missing-unicodes",
+                    "--symbol-cmap",
+                ]
+
+                # store glyph ids or unicodes as file
+                with open(f"{tmp_dir}/uncfile.txt", "w", encoding='utf8') as unc_file:
+                    if 0xFFFD in unc_set:  # error unicode exists -> use glyphs
+                        args.append(f"--gids-file={uncfile_path}")
+                        gid_set.add(189)
+                        unc_list = list(gid_set)
+                        for unc in unc_list:
+                            unc_file.write("%i\n" % unc)
+                    else:
+                        args.append(f"--unicodes-file={uncfile_path}")
+                        unc_set.add(255)
+                        unc_list = list(unc_set)
+                        for unc in unc_list:
+                            unc_file.write("%04x\n" % unc)
+
+                # store fontbuffer as a file
+                with open(oldfont_path, "wb") as fontfile:
+                    fontfile.write(buffer)
+                try:
+                    os.remove(newfont_path)  # remove old file
+                except Exception:
+                    pass
+                try:  # invoke fontTools subsetter
+                    fts.main(args)
+                    font = Font(fontfile=newfont_path)
+                    new_buffer = font.buffer  # subset font binary
+                    if font.glyph_count == 0:  # intercept empty font
+                        new_buffer = None
+                except Exception:
+                    exception_info()
+                    new_buffer = None
+            return new_buffer
+
+        def repl_fontnames(doc):
+            """Populate 'font_buffers'.
+
+            For each font candidate, store its xref and the list of names
+            by which PDF text may refer to it (there may be multiple).
+            """
+
+            def norm_name(name):
+                """Recreate font name that contains PDF hex codes.
+
+                E.g. #20 -> space, chr(32)
+                """
+                while "#" in name:
+                    p = name.find("#")
+                    c = int(name[p + 1 : p + 3], 16)
+                    name = name.replace(name[p : p + 3], chr(c))
+                return name
+
+            def get_fontnames(doc, item):
+                """Return a list of fontnames for an item of page.get_fonts().
+
+                There may be multiple names e.g. for Type0 fonts.
+                """
+                fontname = item[3]
+                names = [fontname]
+                fontname = doc.xref_get_key(item[0], "BaseFont")[1][1:]
+                fontname = norm_name(fontname)
+                if fontname not in names:
+                    names.append(fontname)
+                descendents = doc.xref_get_key(item[0], "DescendantFonts")
+                if descendents[0] != "array":
+                    return names
+                descendents = descendents[1][1:-1]
+                if descendents.endswith(" 0 R"):
+                    xref = int(descendents[:-4])
+                    descendents = doc.xref_object(xref, compressed=True)
+                p1 = descendents.find("/BaseFont")
+                if p1 >= 0:
+                    p2 = descendents.find("/", p1 + 1)
+                    p1 = min(descendents.find("/", p2 + 1), descendents.find(">>", p2 + 1))
+                    fontname = descendents[p2 + 1 : p1]
+                    fontname = norm_name(fontname)
+                    if fontname not in names:
+                        names.append(fontname)
+                return names
+
+            for i in range(doc.page_count):
+                for f in doc.get_page_fonts(i, full=True):
+                    font_xref = f[0]  # font xref
+                    font_ext = f[1]  # font file extension
+                    basename = f[3]  # font basename
+
+                    if font_ext not in (  # skip if not supported by fontTools
+                        "otf",
+                        "ttf",
+                        "woff",
+                        "woff2",
+                    ):
+                        continue
+                    # skip fonts which already are subsets
+                    if len(basename) > 6 and basename[6] == "+":
+                        continue
+
+                    extr = doc.extract_font(font_xref)
+                    fontbuffer = extr[-1]
+                    names = get_fontnames(doc, f)
+                    name_set, xref_set, subsets = font_buffers.get(
+                        fontbuffer, (set(), set(), (set(), set()))
+                    )
+                    xref_set.add(font_xref)
+                    for name in names:
+                        name_set.add(name)
+                    font = Font(fontbuffer=fontbuffer)
+                    name_set.add(font.name)
+                    del font
+                    font_buffers[fontbuffer] = (name_set, xref_set, subsets)
+
+        def find_buffer_by_name(name):
+            for buffer, (name_set, _, _) in font_buffers.items():
+                if name in name_set:
+                    return buffer
+            return None
+
+        # -----------------
+        # main function
+        # -----------------
+        repl_fontnames(doc)  # populate font information
+        if not font_buffers:  # nothing found to do
+            if verbose:
+                message(f'No fonts to subset.')
+            return 0
+
+        old_fontsize = 0
+        new_fontsize = 0
+        for fontbuffer in font_buffers.keys():
+            old_fontsize += len(fontbuffer)
+
+        # Scan page text for usage of subsettable fonts
+        for page in doc:
+            # go through the text and extend set of used glyphs by font
+            # we use a modified MuPDF trace device, which delivers us glyph ids.
+            for span in page.get_texttrace():
+                if type(span) is not dict:  # skip useless information
+                    continue
+                fontname = span["font"][:33]  # fontname for the span
+                buffer = find_buffer_by_name(fontname)
+                if buffer is None:
+                    continue
+                name_set, xref_set, (set_ucs, set_gid) = font_buffers[buffer]
+                for c in span["chars"]:
+                    set_ucs.add(c[0])  # unicode
+                    set_gid.add(c[1])  # glyph id
+                font_buffers[buffer] = (name_set, xref_set, (set_ucs, set_gid))
+
+        # build the font subsets
+        for old_buffer, (name_set, xref_set, subsets) in font_buffers.items():
+            new_buffer = build_subset(old_buffer, subsets[0], subsets[1])
+            fontname = list(name_set)[0]
+            if new_buffer is None or len(new_buffer) >= len(old_buffer):
+                # subset was not created or did not get smaller
+                if verbose:
+                    message(f'Cannot subset {fontname!r}.')
+                continue
+            if verbose:
+                message(f"Built subset of font {fontname!r}.")
+            val = doc._insert_font(fontbuffer=new_buffer)  # store subset font in PDF
+            new_xref = val[0]  # get its xref
+            set_subset_fontname(new_xref)  # tag fontname as subset font
+            font_str = doc.xref_object(  # get its object definition
+                new_xref,
+                compressed=True,
+            )
+            # walk through the original font xrefs and replace each by the subset def
+            for font_xref in xref_set:
+                # we need the original '/W' and '/DW' width values
+                width_table, def_width = get_old_widths(font_xref)
+                # ... and replace original font definition at xref with it
+                doc.update_object(font_xref, font_str)
+                # now copy over old '/W' and '/DW' values
+                if width_table or def_width:
+                    set_old_widths(font_xref, width_table, def_width)
+            # 'new_xref' remains unused in the PDF and must be removed
+            # by garbage collection.
+            new_fontsize += len(new_buffer)
+
+        return old_fontsize - new_fontsize
+
     def switch_layer(self, config, as_default=0):
         """Activate an OC layer."""
         pdf = _as_pdf_document(self)
@@ -5973,6 +7848,9 @@
                 compression_effort=compression_effort,
         )
         return bio.getvalue()
+    
+    def tobytes(self, *args, **kwargs):
+        return self.write(*args, **kwargs)
 
     @property
     def xref(self):
@@ -5980,6 +7858,41 @@
         CheckParent(self)
         return self.parent.page_xref(self.number)
 
+    def xref_copy(doc: 'Document', source: int, target: int, *, keep: list = None) -> None:
+        """Copy a PDF dictionary object to another one given their xref numbers.
+
+        Args:
+            doc: PDF document object
+            source: source xref number
+            target: target xref number, the xref must already exist
+            keep: an optional list of 1st level keys in target that should not be
+                  removed before copying.
+        Notes:
+            This works similar to the copy() method of dictionaries in Python. The
+            source may be a stream object.
+        """
+        if doc.xref_is_stream(source):
+            # read new xref stream, maintaining compression
+            stream = doc.xref_stream_raw(source)
+            doc.update_stream(
+                target,
+                stream,
+                compress=False,  # keeps source compression
+                new=True,  # in case target is no stream
+            )
+
+        # empty the target completely, observe exceptions
+        if keep is None:
+            keep = []
+        for key in doc.xref_get_keys(target):
+            if key in keep:
+                continue
+            doc.xref_set_key(target, key, "null")
+        # copy over all source dict items
+        for key in doc.xref_get_keys(source):
+            item = doc.xref_get_key(source, key)
+            doc.xref_set_key(target, key, item[1])
+    
     def xref_get_key(self, xref, key):
         """Get PDF dict key value of object at 'xref'."""
         pdf = _as_pdf_document(self)
@@ -6196,7 +8109,6 @@
     __slots__ = ('this', 'page_count2', 'this_is_pdf', '__dict__')
     
     outline = property(lambda self: self._outline)
-    tobytes = write
     is_stream = xref_is_stream
 
 open = Document
@@ -8735,6 +10647,117 @@
             annot._yielded=True
             yield annot
 
+    def apply_redactions(
+            page: 'Page',
+            images: int = 2,
+            graphics: int = 1,
+            text: int = 0,
+            ) -> bool:
+        """Apply the redaction annotations of the page.
+
+        Args:
+            page: the PDF page.
+            images:
+                  0 - ignore images
+                  1 - remove all overlapping images
+                  2 - blank out overlapping image parts
+                  3 - remove image unless invisible
+            graphics:
+                  0 - ignore graphics
+                  1 - remove graphics if contained in rectangle
+                  2 - remove all overlapping graphics
+            text:
+                  0 - remove text
+                  1 - ignore text
+        """
+
+        def center_rect(annot_rect, new_text, font, fsize):
+            """Calculate minimal sub-rectangle for the overlay text.
+
+            Notes:
+                Because 'insert_textbox' supports no vertical text centering,
+                we calculate an approximate number of lines here and return a
+                sub-rect with smaller height, which should still be sufficient.
+            Args:
+                annot_rect: the annotation rectangle
+                new_text: the text to insert.
+                font: the fontname. Must be one of the CJK or Base-14 set, else
+                    the rectangle is returned unchanged.
+                fsize: the fontsize
+            Returns:
+                A rectangle to use instead of the annot rectangle.
+            """
+            if not new_text or annot_rect.width <= EPSILON:
+                return annot_rect
+            try:
+                text_width = get_text_length(new_text, font, fsize)
+            except (ValueError, mupdf.FzErrorBase):  # unsupported font
+                if g_exceptions_verbose:
+                    exception_info()
+                return annot_rect
+            line_height = fsize * 1.2
+            limit = annot_rect.width
+            h = math.ceil(text_width / limit) * line_height  # estimate rect height
+            if h >= annot_rect.height:
+                return annot_rect
+            r = annot_rect
+            y = (annot_rect.tl.y + annot_rect.bl.y - h) * 0.5
+            r.y0 = y
+            return r
+
+        CheckParent(page)
+        doc = page.parent
+        if doc.is_encrypted or doc.is_closed:
+            raise ValueError("document closed or encrypted")
+        if not doc.is_pdf:
+            raise ValueError("is no PDF")
+
+        redact_annots = []  # storage of annot values
+        for annot in page.annots(
+            types=(mupdf.PDF_ANNOT_REDACT,)  # pylint: disable=no-member
+        ):
+            # loop redactions
+            redact_annots.append(annot._get_redact_values())  # save annot values
+
+        if redact_annots == []:  # any redactions on this page?
+            return False  # no redactions
+
+        rc = page._apply_redactions(text, images, graphics)  # call MuPDF
+        if not rc:  # should not happen really
+            raise ValueError("Error applying redactions.")
+
+        # now write replacement text in old redact rectangles
+        shape = page.new_shape()
+        for redact in redact_annots:
+            annot_rect = redact["rect"]
+            fill = redact["fill"]
+            if fill:
+                shape.draw_rect(annot_rect)  # colorize the rect background
+                shape.finish(fill=fill, color=fill)
+            if "text" in redact.keys():  # if we also have text
+                new_text = redact["text"]
+                align = redact.get("align", 0)
+                fname = redact["fontname"]
+                fsize = redact["fontsize"]
+                color = redact["text_color"]
+                # try finding vertical centered sub-rect
+                trect = center_rect(annot_rect, new_text, fname, fsize)
+
+                rc = -1
+                while rc < 0 and fsize >= 4:  # while not enough room
+                    # (re-) try insertion
+                    rc = shape.insert_textbox(
+                        trect,
+                        new_text,
+                        fontname=fname,
+                        fontsize=fsize,
+                        color=color,
+                        align=align,
+                    )
+                    fsize -= 0.5  # reduce font if unsuccessful
+        shape.commit()  # append new contents object
+        return True
+
     def recolor(self, components=1):
         """Convert colorspaces of objects on the page.
         
@@ -8843,6 +10866,19 @@
         annot._erase()
         return val
 
+    def delete_image(page: 'Page', xref: int):
+        """Delete the image referred to by xef.
+
+        Actually replaces by a small transparent Pixmap using method Page.replace_image.
+
+        Args:
+            xref: xref of the image to delete.
+        """
+        # make a small 100% transparent pixmap (of just any dimension)
+        pix = Pixmap(csGRAY, (0, 0, 1, 1), 1)
+        pix.clear_with()  # clear all samples bytes to 0x00
+        page.replace_image(xref, pixmap=pix)
+
     def delete_link(self, linkdict):
         """Delete a Link."""
         CheckParent(self)
@@ -8887,6 +10923,20 @@
 
         return finished()
 
+    def delete_widget(page: 'Page', widget: Widget) -> Widget:
+        """Delete widget from page and return the next one."""
+        CheckParent(page)
+        annot = getattr(widget, "_annot", None)
+        if annot is None:
+            raise ValueError("bad type: widget")
+        nextwidget = widget.next
+        page.delete_annot(annot)
+        widget._annot.parent = None
+        keylist = list(widget.__dict__.keys())
+        for key in keylist:
+            del widget.__dict__[key]
+        return nextwidget
+
     @property
     def derotation_matrix(self) -> Matrix:
         """Reflects page de-rotation."""
@@ -8897,6 +10947,408 @@
             return Matrix(mupdf.FzRect(mupdf.FzRect.UNIT))
         return Matrix(JM_derotate_page_matrix(pdfpage))
 
+    def draw_bezier(
+            page: 'Page',
+            p1: point_like,
+            p2: point_like,
+            p3: point_like,
+            p4: point_like,
+            color: OptSeq = (0,),
+            fill: OptSeq = None,
+            dashes: OptStr = None,
+            width: float = 1,
+            morph: OptStr = None,
+            closePath: bool = False,
+            lineCap: int = 0,
+            lineJoin: int = 0,
+            overlay: bool = True,
+            stroke_opacity: float = 1,
+            fill_opacity: float = 1,
+            oc: int = 0,
+            ) -> Point:
+        """Draw a general cubic Bezier curve from p1 to p4 using control points p2 and p3."""
+        img = page.new_shape()
+        Q = img.draw_bezier(Point(p1), Point(p2), Point(p3), Point(p4))
+        img.finish(
+                color=color,
+                fill=fill,
+                dashes=dashes,
+                width=width,
+                lineCap=lineCap,
+                lineJoin=lineJoin,
+                morph=morph,
+                closePath=closePath,
+                stroke_opacity=stroke_opacity,
+                fill_opacity=fill_opacity,
+                oc=oc,
+                )
+        img.commit(overlay)
+
+        return Q
+
+    def draw_circle(
+            page: 'Page',
+            center: point_like,
+            radius: float,
+            color: OptSeq = (0,),
+            fill: OptSeq = None,
+            morph: OptSeq = None,
+            dashes: OptStr = None,
+            width: float = 1,
+            lineCap: int = 0,
+            lineJoin: int = 0,
+            overlay: bool = True,
+            stroke_opacity: float = 1,
+            fill_opacity: float = 1,
+            oc: int = 0,
+            ) -> Point:
+        """Draw a circle given its center and radius."""
+        img = page.new_shape()
+        Q = img.draw_circle(Point(center), radius)
+        img.finish(
+                color=color,
+                fill=fill,
+                dashes=dashes,
+                width=width,
+                lineCap=lineCap,
+                lineJoin=lineJoin,
+                morph=morph,
+                stroke_opacity=stroke_opacity,
+                fill_opacity=fill_opacity,
+                oc=oc,
+                )
+        img.commit(overlay)
+        return Q
+
+    def draw_curve(
+            page: 'Page',
+            p1: point_like,
+            p2: point_like,
+            p3: point_like,
+            color: OptSeq = (0,),
+            fill: OptSeq = None,
+            dashes: OptStr = None,
+            width: float = 1,
+            morph: OptSeq = None,
+            closePath: bool = False,
+            lineCap: int = 0,
+            lineJoin: int = 0,
+            overlay: bool = True,
+            stroke_opacity: float = 1,
+            fill_opacity: float = 1,
+            oc: int = 0,
+            ) -> Point:
+        """Draw a special Bezier curve from p1 to p3, generating control points on lines p1 to p2 and p2 to p3."""
+        img = page.new_shape()
+        Q = img.draw_curve(Point(p1), Point(p2), Point(p3))
+        img.finish(
+                color=color,
+                fill=fill,
+                dashes=dashes,
+                width=width,
+                lineCap=lineCap,
+                lineJoin=lineJoin,
+                morph=morph,
+                closePath=closePath,
+                stroke_opacity=stroke_opacity,
+                fill_opacity=fill_opacity,
+                oc=oc,
+                )
+        img.commit(overlay)
+
+        return Q
+
+    def draw_line(
+            page: 'Page',
+            p1: point_like,
+            p2: point_like,
+            color: OptSeq = (0,),
+            dashes: OptStr = None,
+            width: float = 1,
+            lineCap: int = 0,
+            lineJoin: int = 0,
+            overlay: bool = True,
+            morph: OptSeq = None,
+            stroke_opacity: float = 1,
+            fill_opacity: float = 1,
+            oc=0,
+            ) -> Point:
+        """Draw a line from point p1 to point p2."""
+        img = page.new_shape()
+        p = img.draw_line(Point(p1), Point(p2))
+        img.finish(
+                color=color,
+                dashes=dashes,
+                width=width,
+                closePath=False,
+                lineCap=lineCap,
+                lineJoin=lineJoin,
+                morph=morph,
+                stroke_opacity=stroke_opacity,
+                fill_opacity=fill_opacity,
+                oc=oc,
+                )
+        img.commit(overlay)
+
+        return p
+
+    def draw_oval(
+            page: 'Page',
+            rect: typing.Union[rect_like, quad_like],
+            color: OptSeq = (0,),
+            fill: OptSeq = None,
+            dashes: OptStr = None,
+            morph: OptSeq = None,
+            width: float = 1,
+            lineCap: int = 0,
+            lineJoin: int = 0,
+            overlay: bool = True,
+            stroke_opacity: float = 1,
+            fill_opacity: float = 1,
+            oc: int = 0,
+            ) -> Point:
+        """Draw an oval given its containing rectangle or quad."""
+        img = page.new_shape()
+        Q = img.draw_oval(rect)
+        img.finish(
+                color=color,
+                fill=fill,
+                dashes=dashes,
+                width=width,
+                lineCap=lineCap,
+                lineJoin=lineJoin,
+                morph=morph,
+                stroke_opacity=stroke_opacity,
+                fill_opacity=fill_opacity,
+                oc=oc,
+                )
+        img.commit(overlay)
+
+        return Q
+
+    def draw_polyline(
+            page: 'Page',
+            points: list,
+            color: OptSeq = (0,),
+            fill: OptSeq = None,
+            dashes: OptStr = None,
+            width: float = 1,
+            morph: OptSeq = None,
+            lineCap: int = 0,
+            lineJoin: int = 0,
+            overlay: bool = True,
+            closePath: bool = False,
+            stroke_opacity: float = 1,
+            fill_opacity: float = 1,
+            oc: int = 0,
+            ) -> Point:
+        """Draw multiple connected line segments."""
+        img = page.new_shape()
+        Q = img.draw_polyline(points)
+        img.finish(
+                color=color,
+                fill=fill,
+                dashes=dashes,
+                width=width,
+                lineCap=lineCap,
+                lineJoin=lineJoin,
+                morph=morph,
+                closePath=closePath,
+                stroke_opacity=stroke_opacity,
+                fill_opacity=fill_opacity,
+                oc=oc,
+                )
+        img.commit(overlay)
+
+        return Q
+
+    def draw_quad(
+            page: 'Page',
+            quad: quad_like,
+            color: OptSeq = (0,),
+            fill: OptSeq = None,
+            dashes: OptStr = None,
+            width: float = 1,
+            lineCap: int = 0,
+            lineJoin: int = 0,
+            morph: OptSeq = None,
+            overlay: bool = True,
+            stroke_opacity: float = 1,
+            fill_opacity: float = 1,
+            oc: int = 0,
+            ) -> Point:
+        """Draw a quadrilateral."""
+        img = page.new_shape()
+        Q = img.draw_quad(Quad(quad))
+        img.finish(
+                color=color,
+                fill=fill,
+                dashes=dashes,
+                width=width,
+                lineCap=lineCap,
+                lineJoin=lineJoin,
+                morph=morph,
+                stroke_opacity=stroke_opacity,
+                fill_opacity=fill_opacity,
+                oc=oc,
+                )
+        img.commit(overlay)
+
+        return Q
+
+    def draw_rect(
+            page: 'Page',
+            rect: rect_like,
+            color: OptSeq = (0,),
+            fill: OptSeq = None,
+            dashes: OptStr = None,
+            width: float = 1,
+            lineCap: int = 0,
+            lineJoin: int = 0,
+            morph: OptSeq = None,
+            overlay: bool = True,
+            stroke_opacity: float = 1,
+            fill_opacity: float = 1,
+            oc: int = 0,
+            radius=None,
+            ) -> Point:
+        '''
+        Draw a rectangle. See Shape class method for details.
+        '''
+        img = page.new_shape()
+        Q = img.draw_rect(Rect(rect), radius=radius)
+        img.finish(
+                color=color,
+                fill=fill,
+                dashes=dashes,
+                width=width,
+                lineCap=lineCap,
+                lineJoin=lineJoin,
+                morph=morph,
+                stroke_opacity=stroke_opacity,
+                fill_opacity=fill_opacity,
+                oc=oc,
+                )
+        img.commit(overlay)
+
+        return Q
+
+    def draw_sector(
+            page: 'Page',
+            center: point_like,
+            point: point_like,
+            beta: float,
+            color: OptSeq = (0,),
+            fill: OptSeq = None,
+            dashes: OptStr = None,
+            fullSector: bool = True,
+            morph: OptSeq = None,
+            width: float = 1,
+            closePath: bool = False,
+            lineCap: int = 0,
+            lineJoin: int = 0,
+            overlay: bool = True,
+            stroke_opacity: float = 1,
+            fill_opacity: float = 1,
+            oc: int = 0,
+            ) -> Point:
+        """Draw a circle sector given circle center, one arc end point and the angle of the arc.
+
+        Parameters:
+            center -- center of circle
+            point -- arc end point
+            beta -- angle of arc (degrees)
+            fullSector -- connect arc ends with center
+        """
+        img = page.new_shape()
+        Q = img.draw_sector(Point(center), Point(point), beta, fullSector=fullSector)
+        img.finish(
+                color=color,
+                fill=fill,
+                dashes=dashes,
+                width=width,
+                lineCap=lineCap,
+                lineJoin=lineJoin,
+                morph=morph,
+                closePath=closePath,
+                stroke_opacity=stroke_opacity,
+                fill_opacity=fill_opacity,
+                oc=oc,
+                )
+        img.commit(overlay)
+
+        return Q
+
+    def draw_squiggle(
+            page: 'Page',
+            p1: point_like,
+            p2: point_like,
+            breadth: float = 2,
+            color: OptSeq = (0,),
+            dashes: OptStr = None,
+            width: float = 1,
+            lineCap: int = 0,
+            lineJoin: int = 0,
+            overlay: bool = True,
+            morph: OptSeq = None,
+            stroke_opacity: float = 1,
+            fill_opacity: float = 1,
+            oc: int = 0,
+            ) -> Point:
+        """Draw a squiggly line from point p1 to point p2."""
+        img = page.new_shape()
+        p = img.draw_squiggle(Point(p1), Point(p2), breadth=breadth)
+        img.finish(
+                color=color,
+                dashes=dashes,
+                width=width,
+                closePath=False,
+                lineCap=lineCap,
+                lineJoin=lineJoin,
+                morph=morph,
+                stroke_opacity=stroke_opacity,
+                fill_opacity=fill_opacity,
+                oc=oc,
+                )
+        img.commit(overlay)
+
+        return p
+
+    def draw_zigzag(
+            page: 'Page',
+            p1: point_like,
+            p2: point_like,
+            breadth: float = 2,
+            color: OptSeq = (0,),
+            dashes: OptStr = None,
+            width: float = 1,
+            lineCap: int = 0,
+            lineJoin: int = 0,
+            overlay: bool = True,
+            morph: OptSeq = None,
+            stroke_opacity: float = 1,
+            fill_opacity: float = 1,
+            oc: int = 0,
+            ) -> Point:
+        """Draw a zigzag line from point p1 to point p2."""
+        img = page.new_shape()
+        p = img.draw_zigzag(Point(p1), Point(p2), breadth=breadth)
+        img.finish(
+                color=color,
+                dashes=dashes,
+                width=width,
+                closePath=False,
+                lineCap=lineCap,
+                lineJoin=lineJoin,
+                morph=morph,
+                stroke_opacity=stroke_opacity,
+                fill_opacity=fill_opacity,
+                oc=oc,
+                )
+        img.commit(overlay)
+
+        return p
+
     def extend_textpage(self, tpage, flags=0, matrix=None):
         page = self.this
         tp = tpage.this
@@ -9219,6 +11671,168 @@
             val = None
             return paths
 
+    def get_image_info(
+            page: 'Page',
+            hashes: bool = False,
+            xrefs: bool = False
+            ) -> list:
+        """Extract image information only from a pymupdf.TextPage.
+
+        Args:
+            hashes: (bool) include MD5 hash for each image.
+            xrefs: (bool) try to find the xref for each image. Sets hashes to true.
+        """
+        doc = page.parent
+        if xrefs and doc.is_pdf:
+            hashes = True
+        if not doc.is_pdf:
+            xrefs = False
+        imginfo = getattr(page, "_image_info", None)
+        if imginfo and not xrefs:
+            return imginfo
+        if not imginfo:
+            tp = page.get_textpage(flags=TEXT_PRESERVE_IMAGES)
+            imginfo = tp.extractIMGINFO(hashes=hashes)
+            del tp
+            if hashes:
+                page._image_info = imginfo
+        if not xrefs or not doc.is_pdf:
+            return imginfo
+        imglist = page.get_images()
+        digests = {}
+        for item in imglist:
+            xref = item[0]
+            pix = Pixmap(doc, xref)
+            digests[pix.digest] = xref
+            del pix
+        for i in range(len(imginfo)):
+            item = imginfo[i]
+            xref = digests.get(item["digest"], 0)
+            item["xref"] = xref
+            imginfo[i] = item
+        return imginfo
+
+    def get_image_rects(page: 'Page', name, transform=False) -> list:
+        """Return list of image positions on a page.
+
+        Args:
+            name: (str, list, int) image identification. May be reference name, an
+                  item of the page's image list or an xref.
+            transform: (bool) whether to also return the transformation matrix.
+        Returns:
+            A list of pymupdf.Rect objects or tuples of (pymupdf.Rect, pymupdf.Matrix)
+            for all image locations on the page.
+        """
+        if type(name) in (list, tuple):
+            xref = name[0]
+        elif type(name) is int:
+            xref = name
+        else:
+            imglist = [i for i in page.get_images() if i[7] == name]
+            if imglist == []:
+                raise ValueError("bad image name")
+            elif len(imglist) != 1:
+                raise ValueError("multiple image names found")
+            xref = imglist[0][0]
+        pix = Pixmap(page.parent, xref)  # make pixmap of the image to compute MD5
+        digest = pix.digest
+        del pix
+        infos = page.get_image_info(hashes=True)
+        if not transform:
+            bboxes = [Rect(im["bbox"]) for im in infos if im["digest"] == digest]
+        else:
+            bboxes = [
+                (Rect(im["bbox"]), Matrix(im["transform"]))
+                for im in infos
+                if im["digest"] == digest
+            ]
+        return bboxes
+
+    def get_label(page):
+        """Return the label for this PDF page.
+
+        Args:
+            page: page object.
+        Returns:
+            The label (str) of the page. Errors return an empty string.
+        """
+        # Jorj McKie, 2021-01-06
+
+        labels = page.parent._get_page_labels()
+        if not labels:
+            return ""
+        labels.sort()
+        return utils.get_label_pno(page.number, labels)
+
+    def get_links(page: 'Page') -> list:
+        """Create a list of all links contained in a PDF page.
+
+        Notes:
+            see PyMuPDF ducmentation for details.
+        """
+
+        CheckParent(page)
+        ln = page.first_link
+        links = []
+        while ln:
+            nl = utils.getLinkDict(ln, page.parent)
+            links.append(nl)
+            ln = ln.next
+        if links != [] and page.parent.is_pdf:
+            linkxrefs = [x for x in
+                    #page.annot_xrefs()
+                    JM_get_annot_xref_list2(page)
+                    if x[1] == mupdf.PDF_ANNOT_LINK  # pylint: disable=no-member
+                    ]
+            if len(linkxrefs) == len(links):
+                for i in range(len(linkxrefs)):
+                    links[i]["xref"] = linkxrefs[i][0]
+                    links[i]["id"] = linkxrefs[i][2]
+        return links
+
+    def get_pixmap(
+                page: 'Page',
+                *,
+                matrix: matrix_like=Identity,
+                dpi=None,
+                colorspace: Colorspace=None,
+                clip: rect_like=None,
+                alpha: bool=False,
+                annots: bool=True,
+                ) -> 'Pixmap':
+        """Create pixmap of page.
+
+        Keyword args:
+            matrix: Matrix for transformation (default: Identity).
+            dpi: desired dots per inch. If given, matrix is ignored.
+            colorspace: (str/Colorspace) cmyk, rgb, gray - case ignored, default csRGB.
+            clip: (irect-like) restrict rendering to this area.
+            alpha: (bool) whether to include alpha channel
+            annots: (bool) whether to also render annotations
+        """
+        if colorspace is None:
+            colorspace = csRGB
+        if dpi:
+            zoom = dpi / 72
+            matrix = Matrix(zoom, zoom)
+
+        if type(colorspace) is str:
+            if colorspace.upper() == "GRAY":
+                colorspace = csGRAY
+            elif colorspace.upper() == "CMYK":
+                colorspace = csCMYK
+            else:
+                colorspace = csRGB
+        if colorspace.n not in (1, 3, 4):
+            raise ValueError("unsupported colorspace")
+
+        dl = page.get_displaylist(annots=annots)
+        pix = dl.get_pixmap(matrix=matrix, colorspace=colorspace, alpha=alpha, clip=clip)
+        dl = None
+        if dpi:
+            pix.set_dpi(dpi, dpi)
+        return pix
+
     def remove_rotation(self):
         """Set page rotation to 0 while maintaining visual appearance."""
         rot = self.rotation  # normalized rotation value
@@ -9504,6 +12118,21 @@
             del tp
         return rc
 
+    def get_text(self, *args, **kwargs):
+        return utils.get_text(self, *args, **kwargs)
+
+    def get_text_blocks(self, *args, **kwargs):
+        return utils.get_text_blocks(self, *args, **kwargs)
+    
+    def get_text_selection(self, *args, **kwargs):
+        return utils.get_text_selection(self, *args, **kwargs)
+    
+    def get_text_words(self, *args, **kwargs):
+        return utils.get_text_words(self, *args, **kwargs)
+    
+    def get_textpage_ocr(self, *args, **kwargs):
+        return utils.get_textpage_ocr(self, *args, **kwargs)
+    
     def get_textpage(self, clip: rect_like = None, flags: int = 0, matrix=None) -> "TextPage":
         CheckParent(self)
         if matrix is None:
@@ -9629,6 +12258,406 @@
         doc.get_char_widths(xref, fontdict=fontdict)
         return xref
 
+    def insert_htmlbox(
+        page,
+        rect,
+        text,
+        *,
+        css=None,
+        scale_low=0,
+        archive=None,
+        rotate=0,
+        oc=0,
+        opacity=1,
+        overlay=True,
+        _scale_word_width=True,
+        _verbose=False,
+    ) -> tuple:
+        """Insert text with optional HTML tags and stylings into a rectangle.
+
+        Args:
+            rect: (rect-like) rectangle into which the text should be placed.
+            text: (str) text with optional HTML tags and stylings.
+            css: (str) CSS styling commands.
+            scale_low: (float) force-fit content by scaling it down. Must be in
+                range [0, 1]. If 1, no scaling will take place. If 0, arbitrary
+                down-scaling is acceptable. A value of 0.1 would mean that content
+                may be scaled down by at most 90%.
+            archive: Archive object pointing to locations of used fonts or images
+            rotate: (int) rotate the text in the box by a multiple of 90 degrees.
+            oc: (int) the xref of an OCG / OCMD (Optional Content).
+            opacity: (float) set opacity of inserted content.
+            overlay: (bool) put text on top of page content.
+            _scale_word_width: internal, for testing only.
+            _verbose: internal, for testing only.
+        Returns:
+            A tuple of floats (spare_height, scale).
+            spare_height:
+                The height of the remaining space in <rect> below the
+                text, or -1 if we failed to fit.
+            scale:
+                The scaling required; `0 < scale <= 1`.
+                Will be less than `scale_low` if we failed to fit.
+        """
+        # normalize rotation angle
+        if not rotate % 90 == 0:
+            raise ValueError("bad rotation angle")
+        while rotate < 0:
+            rotate += 360
+        while rotate >= 360:
+            rotate -= 360
+
+        if not 0 <= scale_low <= 1:
+            raise ValueError("'scale_low' must be in [0, 1]")
+
+        if css is None:
+            css = ""
+
+        rect = Rect(rect)
+        if rotate in (90, 270):
+            temp_rect = Rect(0, 0, rect.height, rect.width)
+        else:
+            temp_rect = Rect(0, 0, rect.width, rect.height)
+
+        # use a small border by default
+        mycss = "body {margin:1px;}" + css  # append user CSS
+
+        # either make a story, or accept a given one
+        if isinstance(text, str):  # if a string, convert to a Story
+            story = Story(html=text, user_css=mycss, archive=archive)
+        elif isinstance(text, Story):
+            story = text
+        else:
+            raise ValueError("'text' must be a string or a Story")
+        
+        # ----------------------------------------------------------------
+        # Find a scaling factor that lets our story fit in. Instead of scaling
+        # the text smaller, we instead look at how much bigger the rect needs
+        # to be to fit the text, then reverse the scaling to get how much we
+        # need to scale down the text.
+        # ----------------------------------------------------------------
+        rect_scale_max = None if scale_low == 0 else 1 / scale_low
+
+        fit = story.fit_scale(
+                temp_rect,
+                scale_min=1,
+                scale_max=rect_scale_max,
+                flags=mupdf.FZ_PLACE_STORY_FLAG_NO_OVERFLOW if _scale_word_width else 0,
+                verbose=_verbose,
+                )
+        
+        if not fit.big_enough:  # there was no fit
+            scale = 1 / fit.parameter
+            return (-1, scale)
+
+        # fit.filled is a tuple; we convert it in place to a Rect for
+        # convenience. (fit.rect is already a Rect.)
+        fit.filled = Rect(fit.filled)
+        assert (fit.rect.x0, fit.rect.y0) == (0, 0)
+        assert (fit.filled.x0, fit.filled.y0) == (0, 0)
+        
+        scale = 1 / fit.parameter
+        assert scale >= scale_low, f'{scale_low=} {scale=}'
+        
+        spare_height = max((fit.rect.y1 - fit.filled.y1) * scale, 0)
+
+        def rect_function(*args):
+            return fit.rect, fit.rect, None
+
+        # draw story on temp PDF page
+        doc = story.write_with_links(rect_function)
+
+        # Insert opacity if requested.
+        # For this, we prepend a command to the /Contents.
+        if 0 <= opacity < 1:
+            tpage = doc[0]  # load page
+            # generate /ExtGstate for the page
+            alp0 = tpage._set_opacity(CA=opacity, ca=opacity)
+            s = f"/{alp0} gs\n"  # generate graphic state command
+            TOOLS._insert_contents(tpage, s.encode(), 0)
+
+        # put result in target page
+        page.show_pdf_page(rect, doc, 0, rotate=rotate, oc=oc, overlay=overlay)
+
+        # -------------------------------------------------------------------------
+        # re-insert links in target rect (show_pdf_page cannot copy annotations)
+        # -------------------------------------------------------------------------
+        # scaled center point of fit.rect
+        mp1 = (fit.rect.tl + fit.rect.br) / 2 * scale
+
+        # center point of target rect
+        mp2 = (rect.tl + rect.br) / 2
+
+        # compute link positioning matrix:
+        # - move center of scaled-down fit.rect to (0,0)
+        # - rotate
+        # - move (0,0) to center of target rect
+        mat = (
+            Matrix(scale, 0, 0, scale, -mp1.x, -mp1.y)
+            * Matrix(-rotate)
+            * Matrix(1, 0, 0, 1, mp2.x, mp2.y)
+        )
+
+        # copy over links
+        for link in doc[0].get_links():
+            link["from"] *= mat
+            page.insert_link(link)
+
+        return spare_height, scale
+
+    def insert_image(
+            page,
+            rect,
+            *,
+            alpha=-1,
+            filename=None,
+            height=0,
+            keep_proportion=True,
+            mask=None,
+            oc=0,
+            overlay=True,
+            pixmap=None,
+            rotate=0,
+            stream=None,
+            width=0,
+            xref=0,
+            ):
+        """Insert an image for display in a rectangle.
+
+        Args:
+            rect: (rect_like) position of image on the page.
+            alpha: (int, optional) set to 0 if image has no transparency.
+            filename: (str, Path, file object) image filename.
+            height: (int)
+            keep_proportion: (bool) keep width / height ratio (default).
+            mask: (bytes, optional) image consisting of alpha values to use.
+            oc: (int) xref of OCG or OCMD to declare as Optional Content.
+            overlay: (bool) put in foreground (default) or background.
+            pixmap: (pymupdf.Pixmap) use this as image.
+            rotate: (int) rotate by 0, 90, 180 or 270 degrees.
+            stream: (bytes) use this as image.
+            width: (int)
+            xref: (int) use this as image.
+
+        'page' and 'rect' are positional, all other parameters are keywords.
+
+        If 'xref' is given, that image is used. Other input options are ignored.
+        Else, exactly one of pixmap, stream or filename must be given.
+
+        'alpha=0' for non-transparent images improves performance significantly.
+        Affects stream and filename only.
+
+        Optimum transparent insertions are possible by using filename / stream in
+        conjunction with a 'mask' image of alpha values.
+
+        Returns:
+            xref (int) of inserted image. Re-use as argument for multiple insertions.
+        """
+        CheckParent(page)
+        doc = page.parent
+        if not doc.is_pdf:
+            raise ValueError("is no PDF")
+
+        if xref == 0 and (bool(filename) + bool(stream) + bool(pixmap) != 1):
+            raise ValueError("xref=0 needs exactly one of filename, pixmap, stream")
+
+        if filename:
+            if type(filename) is str:
+                pass
+            elif hasattr(filename, "absolute"):
+                filename = str(filename)
+            elif hasattr(filename, "name"):
+                filename = filename.name
+            else:
+                raise ValueError("bad filename")
+
+        if filename and not os.path.exists(filename):
+            raise FileNotFoundError("No such file: '%s'" % filename)
+        elif stream and type(stream) not in (bytes, bytearray, io.BytesIO):
+            raise ValueError("stream must be bytes-like / BytesIO")
+        elif pixmap and type(pixmap) is not Pixmap:
+            raise ValueError("pixmap must be a Pixmap")
+        if mask and not (stream or filename):
+            raise ValueError("mask requires stream or filename")
+        if mask and type(mask) not in (bytes, bytearray, io.BytesIO):
+            raise ValueError("mask must be bytes-like / BytesIO")
+        while rotate < 0:
+            rotate += 360
+        while rotate >= 360:
+            rotate -= 360
+        if rotate not in (0, 90, 180, 270):
+            raise ValueError("bad rotate value")
+
+        r = Rect(rect)
+        if r.is_empty or r.is_infinite:
+            raise ValueError("rect must be finite and not empty")
+        clip = r * ~page.transformation_matrix
+
+        # Create a unique image reference name.
+        ilst = [i[7] for i in doc.get_page_images(page.number)]
+        ilst += [i[1] for i in doc.get_page_xobjects(page.number)]
+        ilst += [i[4] for i in doc.get_page_fonts(page.number)]
+        n = "fzImg"  # 'pymupdf image'
+        i = 0
+        _imgname = n + "0"  # first name candidate
+        while _imgname in ilst:
+            i += 1
+            _imgname = n + str(i)  # try new name
+
+        if overlay:
+            page.wrap_contents()  # ensure a balanced graphics state
+        digests = doc.InsertedImages
+        xref, digests = page._insert_image(
+            filename=filename,
+            pixmap=pixmap,
+            stream=stream,
+            imask=mask,
+            clip=clip,
+            overlay=overlay,
+            oc=oc,
+            xref=xref,
+            rotate=rotate,
+            keep_proportion=keep_proportion,
+            width=width,
+            height=height,
+            alpha=alpha,
+            _imgname=_imgname,
+            digests=digests,
+        )
+        if digests is not None:
+            doc.InsertedImages = digests
+
+        return xref
+
+    def insert_link(page: 'Page', lnk: dict, mark: bool = True) -> None:
+        """Insert a new link for the current page."""
+        CheckParent(page)
+        annot = utils.getLinkText(page, lnk)
+        if annot == "":
+            raise ValueError("link kind not supported")
+        page._addAnnot_FromString((annot,))
+
+    def insert_text(
+            page: 'Page',
+            point: point_like,
+            text: typing.Union[str, list],
+            *,
+            fontsize: float = 11,
+            lineheight: OptFloat = None,
+            fontname: str = "helv",
+            fontfile: OptStr = None,
+            set_simple: int = 0,
+            encoding: int = 0,
+            color: OptSeq = None,
+            fill: OptSeq = None,
+            border_width: float = 0.05,
+            miter_limit: float = 1,
+            render_mode: int = 0,
+            rotate: int = 0,
+            morph: OptSeq = None,
+            overlay: bool = True,
+            stroke_opacity: float = 1,
+            fill_opacity: float = 1,
+            oc: int = 0,
+            ):
+
+        img = page.new_shape()
+        rc = img.insert_text(
+            point,
+            text,
+            fontsize=fontsize,
+            lineheight=lineheight,
+            fontname=fontname,
+            fontfile=fontfile,
+            set_simple=set_simple,
+            encoding=encoding,
+            color=color,
+            fill=fill,
+            border_width=border_width,
+            render_mode=render_mode,
+            miter_limit=miter_limit,
+            rotate=rotate,
+            morph=morph,
+            stroke_opacity=stroke_opacity,
+            fill_opacity=fill_opacity,
+            oc=oc,
+        )
+        if rc >= 0:
+            img.commit(overlay)
+        return rc
+
+    def insert_textbox(
+            page: 'Page',
+            rect: rect_like,
+            buffer: typing.Union[str, list],
+            *,
+            fontname: str = "helv",
+            fontfile: OptStr = None,
+            set_simple: int = 0,
+            encoding: int = 0,
+            fontsize: float = 11,
+            lineheight: OptFloat = None,
+            color: OptSeq = None,
+            fill: OptSeq = None,
+            expandtabs: int = 1,
+            align: int = 0,
+            rotate: int = 0,
+            render_mode: int = 0,
+            miter_limit: float = 1,
+            border_width: float = 0.05,
+            morph: OptSeq = None,
+            overlay: bool = True,
+            stroke_opacity: float = 1,
+            fill_opacity: float = 1,
+            oc: int = 0,
+            ) -> float:
+        """Insert text into a given rectangle.
+
+        Notes:
+            Creates a Shape object, uses its same-named method and commits it.
+        Parameters:
+            rect: (rect-like) area to use for text.
+            buffer: text to be inserted
+            fontname: a Base-14 font, font name or '/name'
+            fontfile: name of a font file
+            fontsize: font size
+            lineheight: overwrite the font property
+            color: RGB color triple
+            expandtabs: handles tabulators with string function
+            align: left, center, right, justified
+            rotate: 0, 90, 180, or 270 degrees
+            morph: morph box with a matrix and a fixpoint
+            overlay: put text in foreground or background
+        Returns:
+            unused or deficit rectangle area (float)
+        """
+        img = page.new_shape()
+        rc = img.insert_textbox(
+            rect,
+            buffer,
+            fontsize=fontsize,
+            lineheight=lineheight,
+            fontname=fontname,
+            fontfile=fontfile,
+            set_simple=set_simple,
+            encoding=encoding,
+            color=color,
+            fill=fill,
+            expandtabs=expandtabs,
+            render_mode=render_mode,
+            miter_limit=miter_limit,
+            border_width=border_width,
+            align=align,
+            rotate=rotate,
+            morph=morph,
+            stroke_opacity=stroke_opacity,
+            fill_opacity=fill_opacity,
+            oc=oc,
+        )
+        if rc >= 0:
+            img.commit(overlay)
+        return rc
+
     @property
     def is_wrapped(self):
         """Check if /Contents is in a balanced graphics state."""
@@ -9741,6 +12770,9 @@
     def mediabox_size(self):
         return Point(self.mediabox.x1, self.mediabox.y1)
 
+    def new_shape(self):
+        return Shape(self)
+
     #@property
     #def parent( self):
     #    assert self._parent
@@ -9760,6 +12792,44 @@
         # fixme this looks wrong.
         self.this = page
 
+    def replace_image(
+            page: 'Page',
+            xref: int,
+            *,
+            filename=None,
+            pixmap=None,
+            stream=None,
+            ):
+        """Replace the image referred to by xref.
+
+        Replace the image by changing the object definition stored under xref. This
+        will leave the pages appearance instructions intact, so the new image is
+        being displayed with the same bbox, rotation etc.
+        By providing a small fully transparent image, an effect as if the image had
+        been deleted can be achieved.
+        A typical use may include replacing large images by a smaller version,
+        e.g. with a lower resolution or graylevel instead of colored.
+
+        Args:
+            xref: the xref of the image to replace.
+            filename, pixmap, stream: exactly one of these must be provided. The
+                meaning being the same as in Page.insert_image.
+        """
+        doc = page.parent  # the owning document
+        if not doc.xref_is_image(xref):
+            raise ValueError("xref not an image")  # insert new image anywhere in page
+        if bool(filename) + bool(stream) + bool(pixmap) != 1:
+            raise ValueError("Exactly one of filename/stream/pixmap must be given")
+        new_xref = page.insert_image(
+            page.rect, filename=filename, stream=stream, pixmap=pixmap
+        )
+        doc.xref_copy(new_xref, xref)  # copy over new to old
+        last_contents_xref = page.get_contents()[-1]
+        # new image insertion has created a new /Contents source,
+        # which we will set to spaces now
+        doc.update_stream(last_contents_xref, b" ")
+        page._image_info = None  # clear cache of extracted image information
+
     @property
     def rotation(self):
         """Page rotation."""
@@ -9781,6 +12851,47 @@
         CheckParent(self)
         mupdf.fz_run_page(self.this, dw.device, JM_matrix_from_py(m), mupdf.FzCookie())
 
+    def search_for(
+            page,
+            text,
+            *,
+            clip=None,
+            quads=False,
+            flags=None,
+            textpage=None,
+            ) -> list:
+        """Search for a string on a page.
+
+        Args:
+            text: string to be searched for
+            clip: restrict search to this rectangle
+            quads: (bool) return quads instead of rectangles
+            flags: bit switches, default: join hyphened words
+            textpage: a pre-created pymupdf.TextPage
+        Returns:
+            a list of rectangles or quads, each containing one occurrence.
+        """
+        if flags is None:
+            flags=(0
+                | TEXT_DEHYPHENATE
+                | TEXT_PRESERVE_WHITESPACE
+                | TEXT_PRESERVE_LIGATURES
+                | TEXT_MEDIABOX_CLIP
+                )
+        if clip is not None:
+            clip = Rect(clip)
+
+        CheckParent(page)
+        tp = textpage
+        if tp is None:
+            tp = page.get_textpage(clip=clip, flags=flags)  # create pymupdf.TextPage
+        elif getattr(tp, "parent") != page:
+            raise ValueError("not a textpage of this page")
+        rlist = tp.search(text, quads=quads)
+        if textpage is None:
+            del tp
+        return rlist
+
     def set_artbox(self, rect):
         """Set the ArtBox."""
         return self._set_pagebox("ArtBox", rect)
@@ -9848,6 +12959,130 @@
         """Set the TrimBox."""
         return self._set_pagebox("TrimBox", rect)
 
+    def show_pdf_page(
+            page,
+            rect,
+            docsrc,
+            pno=0,
+            keep_proportion=True,
+            overlay=True,
+            oc=0,
+            rotate=0,
+            clip=None,
+            ) -> int:
+        """Show page number 'pno' of PDF 'docsrc' in rectangle 'rect'.
+
+        Args:
+            rect: (rect-like) where to place the source image
+            docsrc: (document) source PDF
+            pno: (int) source page number
+            keep_proportion: (bool) do not change width-height-ratio
+            overlay: (bool) put in foreground
+            oc: (xref) make visibility dependent on this OCG / OCMD (which must be defined in the target PDF)
+            rotate: (int) degrees (multiple of 90)
+            clip: (rect-like) part of source page rectangle
+        Returns:
+            xref of inserted object (for reuse)
+        """
+        def calc_matrix(sr, tr, keep=True, rotate=0):
+            """Calculate transformation matrix from source to target rect.
+
+            Notes:
+                The product of four matrices in this sequence: (1) translate correct
+                source corner to origin, (2) rotate, (3) scale, (4) translate to
+                target's top-left corner.
+            Args:
+                sr: source rect in PDF (!) coordinate system
+                tr: target rect in PDF coordinate system
+                keep: whether to keep source ratio of width to height
+                rotate: rotation angle in degrees
+            Returns:
+                Transformation matrix.
+            """
+            # calc center point of source rect
+            smp = (sr.tl + sr.br) / 2.0
+            # calc center point of target rect
+            tmp = (tr.tl + tr.br) / 2.0
+
+            # m moves to (0, 0), then rotates
+            m = Matrix(1, 0, 0, 1, -smp.x, -smp.y) * Matrix(rotate)
+
+            sr1 = sr * m  # resulting source rect to calculate scale factors
+
+            fw = tr.width / sr1.width  # scale the width
+            fh = tr.height / sr1.height  # scale the height
+            if keep:
+                fw = fh = min(fw, fh)  # take min if keeping aspect ratio
+
+            m *= Matrix(fw, fh)  # concat scale matrix
+            m *= Matrix(1, 0, 0, 1, tmp.x, tmp.y)  # concat move to target center
+            return JM_TUPLE(m)
+
+        CheckParent(page)
+        doc = page.parent
+
+        if not doc.is_pdf or not docsrc.is_pdf:
+            raise ValueError("is no PDF")
+
+        if rect.is_empty or rect.is_infinite:
+            raise ValueError("rect must be finite and not empty")
+
+        while pno < 0:  # support negative page numbers
+            pno += docsrc.page_count
+        src_page = docsrc[pno]  # load source page
+
+        tar_rect = rect * ~page.transformation_matrix  # target rect in PDF coordinates
+
+        src_rect = src_page.rect if not clip else src_page.rect & clip  # source rect
+        if src_rect.is_empty or src_rect.is_infinite:
+            raise ValueError("clip must be finite and not empty")
+        src_rect = src_rect * ~src_page.transformation_matrix  # ... in PDF coord
+
+        matrix = calc_matrix(src_rect, tar_rect, keep=keep_proportion, rotate=rotate)
+
+        # list of existing /Form /XObjects
+        ilst = [i[1] for i in doc.get_page_xobjects(page.number)]
+        ilst += [i[7] for i in doc.get_page_images(page.number)]
+        ilst += [i[4] for i in doc.get_page_fonts(page.number)]
+
+        # create a name not in that list
+        n = "fzFrm"
+        i = 0
+        _imgname = n + "0"
+        while _imgname in ilst:
+            i += 1
+            _imgname = n + str(i)
+
+        isrc = docsrc._graft_id  # used as key for graftmaps
+        if doc._graft_id == isrc:
+            raise ValueError("source document must not equal target")
+
+        # retrieve / make Graftmap for source PDF
+        gmap = doc.Graftmaps.get(isrc, None)
+        if gmap is None:
+            gmap = Graftmap(doc)
+            doc.Graftmaps[isrc] = gmap
+
+        # take note of generated xref for automatic reuse
+        pno_id = (isrc, pno)  # id of docsrc[pno]
+        xref = doc.ShownPages.get(pno_id, 0)
+
+        if overlay:
+            page.wrap_contents()  # ensure a balanced graphics state
+        xref = page._show_pdf_page(
+            src_page,
+            overlay=overlay,
+            matrix=matrix,
+            xref=xref,
+            oc=oc,
+            clip=src_rect,
+            graftmap=gmap,
+            _imgname=_imgname,
+        )
+        doc.ShownPages[pno_id] = xref
+
+        return xref
+
     @property
     def transformation_matrix(self):
         """Page transformation matrix."""
@@ -9876,6 +13111,15 @@
         mb = self.mediabox
         return Rect(rect[0], mb.y1 - rect[3], rect[2], mb.y1 - rect[1])
 
+    def update_link(page: 'Page', lnk: dict) -> None:
+        """Update a link on the current page."""
+        CheckParent(page)
+        annot = utils.getLinkText(page, lnk)
+        if annot == "":
+            raise ValueError("link kind not supported")
+
+        page.parent.update_object(lnk["xref"], annot, page=page)
+
     def widgets(self, types=None):
         """ Generator over the widgets of a page.
 
@@ -9903,6 +13147,57 @@
             append = b"\nQ" * pop + b"\n"
             TOOLS._insert_contents(self, append, True)
 
+    def write_text(
+            page: 'Page',
+            rect=None,
+            writers=None,
+            overlay=True,
+            color=None,
+            opacity=None,
+            keep_proportion=True,
+            rotate=0,
+            oc=0,
+            ) -> None:
+        """Write the text of one or more pymupdf.TextWriter objects.
+
+        Args:
+            rect: target rectangle. If None, the union of the text writers is used.
+            writers: one or more pymupdf.TextWriter objects.
+            overlay: put in foreground or background.
+            keep_proportion: maintain aspect ratio of rectangle sides.
+            rotate: arbitrary rotation angle.
+            oc: the xref of an optional content object
+        """
+        assert isinstance(page, Page)
+        if not writers:
+            raise ValueError("need at least one pymupdf.TextWriter")
+        if type(writers) is TextWriter:
+            if rotate == 0 and rect is None:
+                writers.write_text(page, opacity=opacity, color=color, overlay=overlay)
+                return None
+            else:
+                writers = (writers,)
+        clip = writers[0].text_rect
+        textdoc = Document()
+        tpage = textdoc.new_page(width=page.rect.width, height=page.rect.height)
+        for writer in writers:
+            clip |= writer.text_rect
+            writer.write_text(tpage, opacity=opacity, color=color)
+        if rect is None:
+            rect = clip
+        page.show_pdf_page(
+            rect,
+            textdoc,
+            0,
+            overlay=overlay,
+            keep_proportion=keep_proportion,
+            rotate=rotate,
+            clip=clip,
+            oc=oc,
+        )
+        textdoc = None
+        tpage = None
+
     @property
     def xref(self):
         """PDF xref number of page."""
@@ -11503,6 +14798,996 @@
     tr = top_right
 
 
+class Shape:
+    """Create a new shape."""
+
+    @staticmethod
+    def horizontal_angle(C, P):
+        """Return the angle to the horizontal for the connection from C to P.
+        This uses the arcus sine function and resolves its inherent ambiguity by
+        looking up in which quadrant vector S = P - C is located.
+        """
+        S = Point(P - C).unit  # unit vector 'C' -> 'P'
+        alfa = math.asin(abs(S.y))  # absolute angle from horizontal
+        if S.x < 0:  # make arcsin result unique
+            if S.y <= 0:  # bottom-left
+                alfa = -(math.pi - alfa)
+            else:  # top-left
+                alfa = math.pi - alfa
+        else:
+            if S.y >= 0:  # top-right
+                pass
+            else:  # bottom-right
+                alfa = -alfa
+        return alfa
+
+    def __init__(self, page: Page):
+        CheckParent(page)
+        self.page = page
+        self.doc = page.parent
+        if not self.doc.is_pdf:
+            raise ValueError("is no PDF")
+        self.height = page.mediabox_size.y
+        self.width = page.mediabox_size.x
+        self.x = page.cropbox_position.x
+        self.y = page.cropbox_position.y
+
+        self.pctm = page.transformation_matrix  # page transf. matrix
+        self.ipctm = ~self.pctm  # inverted transf. matrix
+
+        self.draw_cont = ""
+        self.text_cont = ""
+        self.totalcont = ""
+        self.last_point = None
+        self.rect = None
+
+    def updateRect(self, x):
+        if self.rect is None:
+            if len(x) == 2:
+                self.rect = Rect(x, x)
+            else:
+                self.rect = Rect(x)
+
+        else:
+            if len(x) == 2:
+                x = Point(x)
+                self.rect.x0 = min(self.rect.x0, x.x)
+                self.rect.y0 = min(self.rect.y0, x.y)
+                self.rect.x1 = max(self.rect.x1, x.x)
+                self.rect.y1 = max(self.rect.y1, x.y)
+            else:
+                x = Rect(x)
+                self.rect.x0 = min(self.rect.x0, x.x0)
+                self.rect.y0 = min(self.rect.y0, x.y0)
+                self.rect.x1 = max(self.rect.x1, x.x1)
+                self.rect.y1 = max(self.rect.y1, x.y1)
+
+    def draw_line(self, p1: point_like, p2: point_like) -> Point:
+        """Draw a line between two points."""
+        p1 = Point(p1)
+        p2 = Point(p2)
+        if not (self.last_point == p1):
+            self.draw_cont += _format_g(JM_TUPLE(p1 * self.ipctm)) + " m\n"
+            self.last_point = p1
+            self.updateRect(p1)
+
+        self.draw_cont += _format_g(JM_TUPLE(p2 * self.ipctm)) + " l\n"
+        self.updateRect(p2)
+        self.last_point = p2
+        return self.last_point
+
+    def draw_polyline(self, points: list) -> Point:
+        """Draw several connected line segments."""
+        for i, p in enumerate(points):
+            if i == 0:
+                if not (self.last_point == Point(p)):
+                    self.draw_cont += _format_g(JM_TUPLE(Point(p) * self.ipctm)) + " m\n"
+                    self.last_point = Point(p)
+            else:
+                self.draw_cont += _format_g(JM_TUPLE(Point(p) * self.ipctm)) + " l\n"
+            self.updateRect(p)
+
+        self.last_point = Point(points[-1])
+        return self.last_point
+
+    def draw_bezier(
+        self,
+        p1: point_like,
+        p2: point_like,
+        p3: point_like,
+        p4: point_like,
+    ) -> Point:
+        """Draw a standard cubic Bezier curve."""
+        p1 = Point(p1)
+        p2 = Point(p2)
+        p3 = Point(p3)
+        p4 = Point(p4)
+        if not (self.last_point == p1):
+            self.draw_cont += _format_g(JM_TUPLE(p1 * self.ipctm)) + " m\n"
+        args = JM_TUPLE(list(p2 * self.ipctm) + list(p3 * self.ipctm) + list(p4 * self.ipctm))
+        self.draw_cont += _format_g(args) + " c\n"
+        self.updateRect(p1)
+        self.updateRect(p2)
+        self.updateRect(p3)
+        self.updateRect(p4)
+        self.last_point = p4
+        return self.last_point
+
+    def draw_oval(self, tetra: typing.Union[quad_like, rect_like]) -> Point:
+        """Draw an ellipse inside a tetrapod."""
+        if len(tetra) != 4:
+            raise ValueError("invalid arg length")
+        if hasattr(tetra[0], "__float__"):
+            q = Rect(tetra).quad
+        else:
+            q = Quad(tetra)
+
+        mt = q.ul + (q.ur - q.ul) * 0.5
+        mr = q.ur + (q.lr - q.ur) * 0.5
+        mb = q.ll + (q.lr - q.ll) * 0.5
+        ml = q.ul + (q.ll - q.ul) * 0.5
+        if not (self.last_point == ml):
+            self.draw_cont += _format_g(JM_TUPLE(ml * self.ipctm)) + " m\n"
+            self.last_point = ml
+        self.draw_curve(ml, q.ll, mb)
+        self.draw_curve(mb, q.lr, mr)
+        self.draw_curve(mr, q.ur, mt)
+        self.draw_curve(mt, q.ul, ml)
+        self.updateRect(q.rect)
+        self.last_point = ml
+        return self.last_point
+
+    def draw_circle(self, center: point_like, radius: float) -> Point:
+        """Draw a circle given its center and radius."""
+        if not radius > EPSILON:
+            raise ValueError("radius must be positive")
+        center = Point(center)
+        p1 = center - (radius, 0)
+        return self.draw_sector(center, p1, 360, fullSector=False)
+
+    def draw_curve(
+        self,
+        p1: point_like,
+        p2: point_like,
+        p3: point_like,
+    ) -> Point:
+        """Draw a curve between points using one control point."""
+        kappa = 0.55228474983
+        p1 = Point(p1)
+        p2 = Point(p2)
+        p3 = Point(p3)
+        k1 = p1 + (p2 - p1) * kappa
+        k2 = p3 + (p2 - p3) * kappa
+        return self.draw_bezier(p1, k1, k2, p3)
+
+    def draw_sector(
+        self,
+        center: point_like,
+        point: point_like,
+        beta: float,
+        fullSector: bool = True,
+    ) -> Point:
+        """Draw a circle sector."""
+        center = Point(center)
+        point = Point(point)
+        l3 = lambda a, b: _format_g((a, b)) + " m\n"
+        l4 = lambda a, b, c, d, e, f: _format_g((a, b, c, d, e, f)) + " c\n"
+        l5 = lambda a, b: _format_g((a, b)) + " l\n"
+        betar = math.radians(-beta)
+        w360 = math.radians(math.copysign(360, betar)) * (-1)
+        w90 = math.radians(math.copysign(90, betar))
+        w45 = w90 / 2
+        while abs(betar) > 2 * math.pi:
+            betar += w360  # bring angle below 360 degrees
+        if not (self.last_point == point):
+            self.draw_cont += l3(*JM_TUPLE(point * self.ipctm))
+            self.last_point = point
+        Q = Point(0, 0)  # just make sure it exists
+        C = center
+        P = point
+        S = P - C  # vector 'center' -> 'point'
+        rad = abs(S)  # circle radius
+
+        if not rad > EPSILON:
+            raise ValueError("radius must be positive")
+
+        alfa = self.horizontal_angle(center, point)
+        while abs(betar) > abs(w90):  # draw 90 degree arcs
+            q1 = C.x + math.cos(alfa + w90) * rad
+            q2 = C.y + math.sin(alfa + w90) * rad
+            Q = Point(q1, q2)  # the arc's end point
+            r1 = C.x + math.cos(alfa + w45) * rad / math.cos(w45)
+            r2 = C.y + math.sin(alfa + w45) * rad / math.cos(w45)
+            R = Point(r1, r2)  # crossing point of tangents
+            kappah = (1 - math.cos(w45)) * 4 / 3 / abs(R - Q)
+            kappa = kappah * abs(P - Q)
+            cp1 = P + (R - P) * kappa  # control point 1
+            cp2 = Q + (R - Q) * kappa  # control point 2
+            self.draw_cont += l4(*JM_TUPLE(
+                list(cp1 * self.ipctm) + list(cp2 * self.ipctm) + list(Q * self.ipctm)
+            ))
+
+            betar -= w90  # reduce param angle by 90 deg
+            alfa += w90  # advance start angle by 90 deg
+            P = Q  # advance to arc end point
+        # draw (remaining) arc
+        if abs(betar) > 1e-3:  # significant degrees left?
+            beta2 = betar / 2
+            q1 = C.x + math.cos(alfa + betar) * rad
+            q2 = C.y + math.sin(alfa + betar) * rad
+            Q = Point(q1, q2)  # the arc's end point
+            r1 = C.x + math.cos(alfa + beta2) * rad / math.cos(beta2)
+            r2 = C.y + math.sin(alfa + beta2) * rad / math.cos(beta2)
+            R = Point(r1, r2)  # crossing point of tangents
+            # kappa height is 4/3 of segment height
+            kappah = (1 - math.cos(beta2)) * 4 / 3 / abs(R - Q)  # kappa height
+            kappa = kappah * abs(P - Q) / (1 - math.cos(betar))
+            cp1 = P + (R - P) * kappa  # control point 1
+            cp2 = Q + (R - Q) * kappa  # control point 2
+            self.draw_cont += l4(*JM_TUPLE(
+                list(cp1 * self.ipctm) + list(cp2 * self.ipctm) + list(Q * self.ipctm)
+            ))
+        if fullSector:
+            self.draw_cont += l3(*JM_TUPLE(point * self.ipctm))
+            self.draw_cont += l5(*JM_TUPLE(center * self.ipctm))
+            self.draw_cont += l5(*JM_TUPLE(Q * self.ipctm))
+        self.last_point = Q
+        return self.last_point
+
+    def draw_rect(self, rect: rect_like, *, radius=None) -> Point:
+        """Draw a rectangle.
+
+        Args:
+            radius: if not None, the rectangle will have rounded corners.
+                This is the radius of the curvature, given as percentage of
+                the rectangle width or height. Valid are values 0 < v <= 0.5.
+                For a sequence of two values, the corners will have different
+                radii. Otherwise, the percentage will be computed from the
+                shorter side. A value of (0.5, 0.5) will draw an ellipse.
+        """
+        r = Rect(rect)
+        if radius is None:  # standard rectangle
+            self.draw_cont += _format_g(JM_TUPLE(
+                list(r.bl * self.ipctm) + [r.width, r.height]
+            )) + " re\n"
+            self.updateRect(r)
+            self.last_point = r.tl
+            return self.last_point
+        # rounded corners requested. This requires 1 or 2 values, each
+        # with 0 < value <= 0.5
+        if hasattr(radius, "__float__"):
+            if radius <= 0 or radius > 0.5:
+                raise ValueError(f"bad radius value {radius}.")
+            d = min(r.width, r.height) * radius
+            px = (d, 0)
+            py = (0, d)
+        elif hasattr(radius, "__len__") and len(radius) == 2:
+            rx, ry = radius
+            px = (rx * r.width, 0)
+            py = (0, ry * r.height)
+            if min(rx, ry) <= 0 or max(rx, ry) > 0.5:
+                raise ValueError(f"bad radius value {radius}.")
+        else:
+            raise ValueError(f"bad radius value {radius}.")
+
+        lp = self.draw_line(r.tl + py, r.bl - py)
+        lp = self.draw_curve(lp, r.bl, r.bl + px)
+
+        lp = self.draw_line(lp, r.br - px)
+        lp = self.draw_curve(lp, r.br, r.br - py)
+
+        lp = self.draw_line(lp, r.tr + py)
+        lp = self.draw_curve(lp, r.tr, r.tr - px)
+
+        lp = self.draw_line(lp, r.tl + px)
+        self.last_point = self.draw_curve(lp, r.tl, r.tl + py)
+
+        self.updateRect(r)
+        return self.last_point
+
+    def draw_quad(self, quad: quad_like) -> Point:
+        """Draw a Quad."""
+        q = Quad(quad)
+        return self.draw_polyline([q.ul, q.ll, q.lr, q.ur, q.ul])
+
+    def draw_zigzag(
+        self,
+        p1: point_like,
+        p2: point_like,
+        breadth: float = 2,
+    ) -> Point:
+        """Draw a zig-zagged line from p1 to p2."""
+        p1 = Point(p1)
+        p2 = Point(p2)
+        S = p2 - p1  # vector start - end
+        rad = abs(S)  # distance of points
+        cnt = 4 * int(round(rad / (4 * breadth), 0))  # always take full phases
+        if cnt < 4:
+            raise ValueError("points too close")
+        mb = rad / cnt  # revised breadth
+        matrix = Matrix(util_hor_matrix(p1, p2))  # normalize line to x-axis
+        i_mat = ~matrix  # get original position
+        points = []  # stores edges
+        for i in range(1, cnt):
+            if i % 4 == 1:  # point "above" connection
+                p = Point(i, -1) * mb
+            elif i % 4 == 3:  # point "below" connection
+                p = Point(i, 1) * mb
+            else:  # ignore others
+                continue
+            points.append(p * i_mat)
+        self.draw_polyline([p1] + points + [p2])  # add start and end points
+        return p2
+
+    def draw_squiggle(
+        self,
+        p1: point_like,
+        p2: point_like,
+        breadth=2,
+    ) -> Point:
+        """Draw a squiggly line from p1 to p2."""
+        p1 = Point(p1)
+        p2 = Point(p2)
+        S = p2 - p1  # vector start - end
+        rad = abs(S)  # distance of points
+        cnt = 4 * int(round(rad / (4 * breadth), 0))  # always take full phases
+        if cnt < 4:
+            raise ValueError("points too close")
+        mb = rad / cnt  # revised breadth
+        matrix = Matrix(util_hor_matrix(p1, p2))  # normalize line to x-axis
+        i_mat = ~matrix  # get original position
+        k = 2.4142135623765633  # y of draw_curve helper point
+
+        points = []  # stores edges
+        for i in range(1, cnt):
+            if i % 4 == 1:  # point "above" connection
+                p = Point(i, -k) * mb
+            elif i % 4 == 3:  # point "below" connection
+                p = Point(i, k) * mb
+            else:  # else on connection line
+                p = Point(i, 0) * mb
+            points.append(p * i_mat)
+
+        points = [p1] + points + [p2]
+        cnt = len(points)
+        i = 0
+        while i + 2 < cnt:
+            self.draw_curve(points[i], points[i + 1], points[i + 2])
+            i += 2
+        return p2
+
+    # ==============================================================================
+    # Shape.insert_text
+    # ==============================================================================
+    def insert_text(
+        self,
+        point: point_like,
+        buffer: typing.Union[str, list],
+        *,
+        fontsize: float = 11,
+        lineheight: OptFloat = None,
+        fontname: str = "helv",
+        fontfile: OptStr = None,
+        set_simple: bool = 0,
+        encoding: int = 0,
+        color: OptSeq = None,
+        fill: OptSeq = None,
+        render_mode: int = 0,
+        border_width: float = 0.05,
+        miter_limit: float = 1,
+        rotate: int = 0,
+        morph: OptSeq = None,
+        stroke_opacity: float = 1,
+        fill_opacity: float = 1,
+        oc: int = 0,
+    ) -> int:
+
+        # ensure 'text' is a list of strings, worth dealing with
+        if not bool(buffer):
+            return 0
+
+        if type(buffer) not in (list, tuple):
+            text = buffer.splitlines()
+        else:
+            text = buffer
+
+        if not len(text) > 0:
+            return 0
+
+        point = Point(point)
+        try:
+            maxcode = max([ord(c) for c in " ".join(text)])
+        except Exception:
+            exception_info()
+            return 0
+
+        # ensure valid 'fontname'
+        fname = fontname
+        if fname.startswith("/"):
+            fname = fname[1:]
+
+        xref = self.page.insert_font(
+            fontname=fname, fontfile=fontfile, encoding=encoding, set_simple=set_simple
+        )
+        fontinfo = CheckFontInfo(self.doc, xref)
+
+        fontdict = fontinfo[1]
+        ordering = fontdict["ordering"]
+        simple = fontdict["simple"]
+        bfname = fontdict["name"]
+        ascender = fontdict["ascender"]
+        descender = fontdict["descender"]
+        if lineheight:
+            lheight = fontsize * lineheight
+        elif ascender - descender <= 1:
+            lheight = fontsize * 1.2
+        else:
+            lheight = fontsize * (ascender - descender)
+
+        if maxcode > 255:
+            glyphs = self.doc.get_char_widths(xref, maxcode + 1)
+        else:
+            glyphs = fontdict["glyphs"]
+
+        tab = []
+        for t in text:
+            if simple and bfname not in ("Symbol", "ZapfDingbats"):
+                g = None
+            else:
+                g = glyphs
+            tab.append(getTJstr(t, g, simple, ordering))
+        text = tab
+
+        color_str = ColorCode(color, "c")
+        fill_str = ColorCode(fill, "f")
+        if not fill and render_mode == 0:  # ensure fill color when 0 Tr
+            fill = color
+            fill_str = ColorCode(color, "f")
+
+        morphing = CheckMorph(morph)
+        rot = rotate
+        if rot % 90 != 0:
+            raise ValueError("bad rotate value")
+
+        while rot < 0:
+            rot += 360
+        rot = rot % 360  # text rotate = 0, 90, 270, 180
+
+        templ1 = lambda a, b, c, d, e, f, g: f"\nq\n{a}{b}BT\n{c}1 0 0 1 {_format_g((d, e))} Tm\n/{f} {_format_g(g)} Tf "
+        templ2 = lambda a: f"TJ\n0 -{_format_g(a)} TD\n"
+        cmp90 = "0 1 -1 0 0 0 cm\n"  # rotates 90 deg counter-clockwise
+        cmm90 = "0 -1 1 0 0 0 cm\n"  # rotates 90 deg clockwise
+        cm180 = "-1 0 0 -1 0 0 cm\n"  # rotates by 180 deg.
+        height = self.height
+        width = self.width
+
+        # setting up for standard rotation directions
+        # case rotate = 0
+        if morphing:
+            m1 = Matrix(1, 0, 0, 1, morph[0].x + self.x, height - morph[0].y - self.y)
+            mat = ~m1 * morph[1] * m1
+            cm = _format_g(JM_TUPLE(mat)) + " cm\n"
+        else:
+            cm = ""
+        top = height - point.y - self.y  # start of 1st char
+        left = point.x + self.x  # start of 1. char
+        space = top  # space available
+        #headroom = point.y + self.y  # distance to page border
+        if rot == 90:
+            left = height - point.y - self.y
+            top = -point.x - self.x
+            cm += cmp90
+            space = width - abs(top)
+            #headroom = point.x + self.x
+
+        elif rot == 270:
+            left = -height + point.y + self.y
+            top = point.x + self.x
+            cm += cmm90
+            space = abs(top)
+            #headroom = width - point.x - self.x
+
+        elif rot == 180:
+            left = -point.x - self.x
+            top = -height + point.y + self.y
+            cm += cm180
+            space = abs(point.y + self.y)
+            #headroom = height - point.y - self.y
+
+        optcont = self.page._get_optional_content(oc)
+        if optcont is not None:
+            bdc = "/OC /%s BDC\n" % optcont
+            emc = "EMC\n"
+        else:
+            bdc = emc = ""
+
+        alpha = self.page._set_opacity(CA=stroke_opacity, ca=fill_opacity)
+        if alpha is None:
+            alpha = ""
+        else:
+            alpha = "/%s gs\n" % alpha
+        nres = templ1(bdc, alpha, cm, left, top, fname, fontsize)
+
+        if render_mode > 0:
+            nres += "%i Tr " % render_mode
+            nres += _format_g(border_width * fontsize) + " w "
+            if miter_limit is not None:
+                nres += _format_g(miter_limit) + " M "
+        if color is not None:
+            nres += color_str
+        if fill is not None:
+            nres += fill_str
+
+        # =========================================================================
+        #   start text insertion
+        # =========================================================================
+        nres += text[0]
+        nlines = 1  # set output line counter
+        if len(text) > 1:
+            nres += templ2(lheight)  # line 1
+        else:
+            nres += 'TJ'
+        for i in range(1, len(text)):
+            if space < lheight:
+                break  # no space left on page
+            if i > 1:
+                nres += "\nT* "
+            nres += text[i] + 'TJ'
+            space -= lheight
+            nlines += 1
+
+        nres += "\nET\n%sQ\n" % emc
+
+        # =========================================================================
+        #   end of text insertion
+        # =========================================================================
+        # update the /Contents object
+        self.text_cont += nres
+        return nlines
+
+    # ==============================================================================
+    # Shape.insert_textbox
+    # ==============================================================================
+    def insert_textbox(
+        self,
+        rect: rect_like,
+        buffer: typing.Union[str, list],
+        *,
+        fontname: OptStr = "helv",
+        fontfile: OptStr = None,
+        fontsize: float = 11,
+        lineheight: OptFloat = None,
+        set_simple: bool = 0,
+        encoding: int = 0,
+        color: OptSeq = None,
+        fill: OptSeq = None,
+        expandtabs: int = 1,
+        border_width: float = 0.05,
+        miter_limit: float = 1,
+        align: int = 0,
+        render_mode: int = 0,
+        rotate: int = 0,
+        morph: OptSeq = None,
+        stroke_opacity: float = 1,
+        fill_opacity: float = 1,
+        oc: int = 0,
+    ) -> float:
+        """Insert text into a given rectangle.
+
+        Args:
+            rect -- the textbox to fill
+            buffer -- text to be inserted
+            fontname -- a Base-14 font, font name or '/name'
+            fontfile -- name of a font file
+            fontsize -- font size
+            lineheight -- overwrite the font property
+            color -- RGB stroke color triple
+            fill -- RGB fill color triple
+            render_mode -- text rendering control
+            border_width -- thickness of glyph borders as percentage of fontsize
+            expandtabs -- handles tabulators with string function
+            align -- left, center, right, justified
+            rotate -- 0, 90, 180, or 270 degrees
+            morph -- morph box with a matrix and a fixpoint
+        Returns:
+            unused or deficit rectangle area (float)
+        """
+        rect = Rect(rect)
+        if rect.is_empty or rect.is_infinite:
+            raise ValueError("text box must be finite and not empty")
+
+        color_str = ColorCode(color, "c")
+        fill_str = ColorCode(fill, "f")
+        if fill is None and render_mode == 0:  # ensure fill color for 0 Tr
+            fill = color
+            fill_str = ColorCode(color, "f")
+
+        optcont = self.page._get_optional_content(oc)
+        if optcont is not None:
+            bdc = "/OC /%s BDC\n" % optcont
+            emc = "EMC\n"
+        else:
+            bdc = emc = ""
+
+        # determine opacity / transparency
+        alpha = self.page._set_opacity(CA=stroke_opacity, ca=fill_opacity)
+        if alpha is None:
+            alpha = ""
+        else:
+            alpha = "/%s gs\n" % alpha
+
+        if rotate % 90 != 0:
+            raise ValueError("rotate must be multiple of 90")
+
+        rot = rotate
+        while rot < 0:
+            rot += 360
+        rot = rot % 360
+
+        # is buffer worth of dealing with?
+        if not bool(buffer):
+            return rect.height if rot in (0, 180) else rect.width
+
+        cmp90 = "0 1 -1 0 0 0 cm\n"  # rotates counter-clockwise
+        cmm90 = "0 -1 1 0 0 0 cm\n"  # rotates clockwise
+        cm180 = "-1 0 0 -1 0 0 cm\n"  # rotates by 180 deg.
+        height = self.height
+
+        fname = fontname
+        if fname.startswith("/"):
+            fname = fname[1:]
+
+        xref = self.page.insert_font(
+            fontname=fname, fontfile=fontfile, encoding=encoding, set_simple=set_simple
+        )
+        fontinfo = CheckFontInfo(self.doc, xref)
+
+        fontdict = fontinfo[1]
+        ordering = fontdict["ordering"]
+        simple = fontdict["simple"]
+        glyphs = fontdict["glyphs"]
+        bfname = fontdict["name"]
+        ascender = fontdict["ascender"]
+        descender = fontdict["descender"]
+
+        if lineheight:
+            lheight_factor = lineheight
+        elif ascender - descender <= 1:
+            lheight_factor = 1.2
+        else:
+            lheight_factor = ascender - descender
+        lheight = fontsize * lheight_factor
+
+        # create a list from buffer, split into its lines
+        if type(buffer) in (list, tuple):
+            t0 = "\n".join(buffer)
+        else:
+            t0 = buffer
+
+        maxcode = max([ord(c) for c in t0])
+        # replace invalid char codes for simple fonts
+        if simple and maxcode > 255:
+            t0 = "".join([c if ord(c) < 256 else "?" for c in t0])
+
+        t0 = t0.splitlines()
+
+        glyphs = self.doc.get_char_widths(xref, maxcode + 1)
+        if simple and bfname not in ("Symbol", "ZapfDingbats"):
+            tj_glyphs = None
+        else:
+            tj_glyphs = glyphs
+
+        # ----------------------------------------------------------------------
+        # calculate pixel length of a string
+        # ----------------------------------------------------------------------
+        def pixlen(x):
+            """Calculate pixel length of x."""
+            if ordering < 0:
+                return sum([glyphs[ord(c)][1] for c in x]) * fontsize
+            else:
+                return len(x) * fontsize
+
+        # ---------------------------------------------------------------------
+
+        if ordering < 0:
+            blen = glyphs[32][1] * fontsize  # pixel size of space character
+        else:
+            blen = fontsize
+
+        text = ""  # output buffer
+
+        if CheckMorph(morph):
+            m1 = Matrix(
+                1, 0, 0, 1, morph[0].x + self.x, self.height - morph[0].y - self.y
+            )
+            mat = ~m1 * morph[1] * m1
+            cm = _format_g(JM_TUPLE(mat)) + " cm\n"
+        else:
+            cm = ""
+
+        # ---------------------------------------------------------------------
+        # adjust for text orientation / rotation
+        # ---------------------------------------------------------------------
+        progr = 1  # direction of line progress
+        c_pnt = Point(0, fontsize * ascender)  # used for line progress
+        if rot == 0:  # normal orientation
+            point = rect.tl + c_pnt  # line 1 is 'lheight' below top
+            maxwidth = rect.width  # pixels available in one line
+            maxheight = rect.height  # available text height
+
+        elif rot == 90:  # rotate counter clockwise
+            c_pnt = Point(fontsize * ascender, 0)  # progress in x-direction
+            point = rect.bl + c_pnt  # line 1 'lheight' away from left
+            maxwidth = rect.height  # pixels available in one line
+            maxheight = rect.width  # available text height
+            cm += cmp90
+
+        elif rot == 180:  # text upside down
+            # progress upwards in y direction
+            c_pnt = -Point(0, fontsize * ascender)
+            point = rect.br + c_pnt  # line 1 'lheight' above bottom
+            maxwidth = rect.width  # pixels available in one line
+            progr = -1  # subtract lheight for next line
+            maxheight =rect.height  # available text height
+            cm += cm180
+
+        else:  # rotate clockwise (270 or -90)
+            # progress from right to left
+            c_pnt = -Point(fontsize * ascender, 0)
+            point = rect.tr + c_pnt  # line 1 'lheight' left of right
+            maxwidth = rect.height  # pixels available in one line
+            progr = -1  # subtract lheight for next line
+            maxheight = rect.width  # available text height
+            cm += cmm90
+
+        # =====================================================================
+        # line loop
+        # =====================================================================
+        just_tab = []  # 'justify' indicators per line
+
+        for i, line in enumerate(t0):
+            line_t = line.expandtabs(expandtabs).split(" ")  # split into words
+            num_words = len(line_t)
+            lbuff = ""  # init line buffer
+            rest = maxwidth  # available line pixels
+            # =================================================================
+            # word loop
+            # =================================================================
+            for j in range(num_words):
+                word = line_t[j]
+                pl_w = pixlen(word)  # pixel len of word
+                if rest >= pl_w:  # does it fit on the line?
+                    lbuff += word + " "  # yes, append word
+                    rest -= pl_w + blen  # update available line space
+                    continue  # next word
+
+                # word doesn't fit - output line (if not empty)
+                if lbuff:
+                    lbuff = lbuff.rstrip() + "\n"  # line full, append line break
+                    text += lbuff  # append to total text
+                    just_tab.append(True)  # can align-justify
+
+                lbuff = ""  # re-init line buffer
+                rest = maxwidth  # re-init avail. space
+
+                if pl_w <= maxwidth:  # word shorter than 1 line?
+                    lbuff = word + " "  # start the line with it
+                    rest = maxwidth - pl_w - blen  # update free space
+                    continue
+
+                # long word: split across multiple lines - char by char ...
+                if len(just_tab) > 0:
+                    just_tab[-1] = False  # cannot align-justify
+                for c in word:
+                    if pixlen(lbuff) <= maxwidth - pixlen(c):
+                        lbuff += c
+                    else:  # line full
+                        lbuff += "\n"  # close line
+                        text += lbuff  # append to text
+                        just_tab.append(False)  # cannot align-justify
+                        lbuff = c  # start new line with this char
+
+                lbuff += " "  # finish long word
+                rest = maxwidth - pixlen(lbuff)  # long word stored
+
+            if lbuff:  # unprocessed line content?
+                text += lbuff.rstrip()  # append to text
+                just_tab.append(False)  # cannot align-justify
+
+            if i < len(t0) - 1:  # not the last line?
+                text += "\n"  # insert line break
+
+        # compute used part of the textbox
+        if text.endswith("\n"):
+            text = text[:-1]
+        lb_count = text.count("\n") + 1  # number of lines written
+
+        # text height = line count * line height plus one descender value
+        text_height = lheight * lb_count - descender * fontsize
+
+        more = text_height - maxheight  # difference to height limit
+        if more > EPSILON:  # landed too much outside rect
+            return (-1) * more  # return deficit, don't output
+
+        more = abs(more)
+        if more < EPSILON:
+            more = 0  # don't bother with epsilons
+        nres = "\nq\n%s%sBT\n" % (bdc, alpha) + cm  # initialize output buffer
+        templ = lambda a, b, c, d: f"1 0 0 1 {_format_g((a, b))} Tm /{c} {_format_g(d)} Tf "
+        # center, right, justify: output each line with its own specifics
+        text_t = text.splitlines()  # split text in lines again
+        just_tab[-1] = False  # never justify last line
+        for i, t in enumerate(text_t):
+            spacing = 0
+            pl = maxwidth - pixlen(t)  # length of empty line part
+            pnt = point + c_pnt * (i * lheight_factor)  # text start of line
+            if align == 1:  # center: right shift by half width
+                if rot in (0, 180):
+                    pnt = pnt + Point(pl / 2, 0) * progr
+                else:
+                    pnt = pnt - Point(0, pl / 2) * progr
+            elif align == 2:  # right: right shift by full width
+                if rot in (0, 180):
+                    pnt = pnt + Point(pl, 0) * progr
+                else:
+                    pnt = pnt - Point(0, pl) * progr
+            elif align == 3:  # justify
+                spaces = t.count(" ")  # number of spaces in line
+                if spaces > 0 and just_tab[i]:  # if any, and we may justify
+                    spacing = pl / spaces  # make every space this much larger
+                else:
+                    spacing = 0  # keep normal space length
+            top = height - pnt.y - self.y
+            left = pnt.x + self.x
+            if rot == 90:
+                left = height - pnt.y - self.y
+                top = -pnt.x - self.x
+            elif rot == 270:
+                left = -height + pnt.y + self.y
+                top = pnt.x + self.x
+            elif rot == 180:
+                left = -pnt.x - self.x
+                top = -height + pnt.y + self.y
+
+            nres += templ(left, top, fname, fontsize)
+
+            if render_mode > 0:
+                nres += "%i Tr " % render_mode
+                nres += _format_g(border_width * fontsize) + " w "
+                if miter_limit is not None:
+                    nres += _format_g(miter_limit) + " M "
+
+            if align == 3:
+                nres += _format_g(spacing) + " Tw "
+
+            if color is not None:
+                nres += color_str
+            if fill is not None:
+                nres += fill_str
+            nres += "%sTJ\n" % getTJstr(t, tj_glyphs, simple, ordering)
+
+        nres += "ET\n%sQ\n" % emc
+
+        self.text_cont += nres
+        self.updateRect(rect)
+        return more
+
+    def finish(
+        self,
+        width: float = 1,
+        color: OptSeq = (0,),
+        fill: OptSeq = None,
+        lineCap: int = 0,
+        lineJoin: int = 0,
+        dashes: OptStr = None,
+        even_odd: bool = False,
+        morph: OptSeq = None,
+        closePath: bool = True,
+        fill_opacity: float = 1,
+        stroke_opacity: float = 1,
+        oc: int = 0,
+    ) -> None:
+        """Finish the current drawing segment.
+
+        Notes:
+            Apply colors, opacity, dashes, line style and width, or
+            morphing. Also whether to close the path
+            by connecting last to first point.
+        """
+        if self.draw_cont == "":  # treat empty contents as no-op
+            return
+
+        if width == 0:  # border color makes no sense then
+            color = None
+        elif color is None:  # vice versa
+            width = 0
+        # if color == None and fill == None:
+        #     raise ValueError("at least one of 'color' or 'fill' must be given")
+        color_str = ColorCode(color, "c")  # ensure proper color string
+        fill_str = ColorCode(fill, "f")  # ensure proper fill string
+
+        optcont = self.page._get_optional_content(oc)
+        if optcont is not None:
+            self.draw_cont = "/OC /%s BDC\n" % optcont + self.draw_cont
+            emc = "EMC\n"
+        else:
+            emc = ""
+
+        alpha = self.page._set_opacity(CA=stroke_opacity, ca=fill_opacity)
+        if alpha is not None:
+            self.draw_cont = "/%s gs\n" % alpha + self.draw_cont
+
+        if width != 1 and width != 0:
+            self.draw_cont += _format_g(width) + " w\n"
+
+        if lineCap != 0:
+            self.draw_cont = "%i J\n" % lineCap + self.draw_cont
+        if lineJoin != 0:
+            self.draw_cont = "%i j\n" % lineJoin + self.draw_cont
+
+        if dashes not in (None, "", "[] 0"):
+            self.draw_cont = "%s d\n" % dashes + self.draw_cont
+
+        if closePath:
+            self.draw_cont += "h\n"
+            self.last_point = None
+
+        if color is not None:
+            self.draw_cont += color_str
+
+        if fill is not None:
+            self.draw_cont += fill_str
+            if color is not None:
+                if not even_odd:
+                    self.draw_cont += "B\n"
+                else:
+                    self.draw_cont += "B*\n"
+            else:
+                if not even_odd:
+                    self.draw_cont += "f\n"
+                else:
+                    self.draw_cont += "f*\n"
+        else:
+            self.draw_cont += "S\n"
+
+        self.draw_cont += emc
+        if CheckMorph(morph):
+            m1 = Matrix(
+                1, 0, 0, 1, morph[0].x + self.x, self.height - morph[0].y - self.y
+            )
+            mat = ~m1 * morph[1] * m1
+            self.draw_cont = _format_g(JM_TUPLE(mat)) + " cm\n" + self.draw_cont
+
+        self.totalcont += "\nq\n" + self.draw_cont + "Q\n"
+        self.draw_cont = ""
+        self.last_point = None
+        return
+
+    def commit(self, overlay: bool = True) -> None:
+        """Update the page's /Contents object with Shape data.
+
+        The argument controls whether data appear in foreground (default)
+        or background.
+        """
+        CheckParent(self.page)  # doc may have died meanwhile
+        self.totalcont += self.text_cont
+        self.totalcont = self.totalcont.encode()
+
+        if self.totalcont:
+            if overlay:
+                self.page.wrap_contents()  # ensure a balanced graphics state
+            # make /Contents object with dummy stream
+            xref = TOOLS._insert_contents(self.page, b" ", overlay)
+            # update it with potential compression
+            self.doc.update_stream(xref, self.totalcont)
+
+        self.last_point = None  # clean up ...
+        self.rect = None  #
+        self.draw_cont = ""  # for potential ...
+        self.text_cont = ""  # ...
+        self.totalcont = ""  # re-use
+
+
 class Story:
 
     def __init__( self, html='', user_css=None, em=12, archive=None):
@@ -11664,10 +15949,13 @@
             function( position2)
         mupdf.fz_story_positions( self.this, function2)
 
-    def place( self, where):
+    def place( self, where, flags=0):
+        '''
+        Wrapper for fz_place_story_flags().
+        '''
         where = JM_rect_from_py( where)
         filled = mupdf.FzRect()
-        more = mupdf.fz_place_story( self.this, where, filled)
+        more = mupdf.fz_place_story_flags( self.this, where, filled, flags)
         return more, JM_py_from_rect( filled)
 
     def reset( self):
@@ -11784,7 +16072,9 @@
         `big_enough`:
             `True` if the fit succeeded.
         `filled`:
-            From the last call to `Story.place()`.
+            Tuple (x0, y0, x1, y1) from the last call to `Story.place()`. This
+            will be wider than .rect if any single word (which we never split)
+            was too wide for .rect.
         `more`:
             `False` if the fit succeeded.
         `numcalls`:
@@ -11792,7 +16082,7 @@
         `parameter`:
             The successful parameter value, or the largest failing value.
         `rect`:
-            The rect created from `parameter`.
+            The pumupdf.Rect created from `parameter`.
         '''
         def __init__(self, big_enough=None, filled=None, more=None, numcalls=None, parameter=None, rect=None):
             self.big_enough = big_enough
@@ -11812,7 +16102,7 @@
                     f' rect={self.rect}'
                     )
 
-    def fit(self, fn, pmin=None, pmax=None, delta=0.001, verbose=False):
+    def fit(self, fn, pmin=None, pmax=None, delta=0.001, verbose=False, flags=0):
         '''
         Finds optimal rect that contains the story `self`.
         
@@ -11839,6 +16129,9 @@
             Maximum error in returned `parameter`.
         :arg verbose:
             If true we output diagnostics.
+        :arg flags:
+            Passed to mupdf.fz_place_story_flags(). e.g.
+            zero or `mupdf.FZ_PLACE_STORY_FLAG_NO_OVERFLOW`.
         '''
         def log(text):
             assert verbose
@@ -11894,7 +16187,7 @@
                 if verbose:
                     log(f'update(): not calling self.place() because rect is empty.')
             else:
-                more, filled = self.place(rect)
+                more, filled = self.place(rect, flags)
                 state.numcalls += 1
                 big_enough = not more
                 result = Story.FitResult(
@@ -11963,12 +16256,12 @@
             parameter = (state.pmin + state.pmax) / 2
             update(parameter)
 
-    def fit_scale(self, rect, scale_min=0, scale_max=None, delta=0.001, verbose=False):
+    def fit_scale(self, rect, scale_min=0, scale_max=None, delta=0.001, verbose=False, flags=0):
         '''
         Finds smallest value `scale` in range `scale_min..scale_max` where
         `scale * rect` is large enough to contain the story `self`.
 
-        Returns a `Story.FitResult` instance.
+        Returns a `Story.FitResult` instance with `.parameter` set to `scale`.
 
         :arg width:
             width of rect.
@@ -11983,13 +16276,15 @@
             Maximum error in returned scale.
         :arg verbose:
             If true we output diagnostics.
+        :arg flags:
+            Passed to Story.place().
         '''
         x0, y0, x1, y1 = rect
         width = x1 - x0
         height = y1 - y0
         def fn(scale):
             return Rect(x0, y0, x0 + scale*width, y0 + scale*height)
-        return self.fit(fn, scale_min, scale_max, delta, verbose)
+        return self.fit(fn, scale_min, scale_max, delta, verbose, flags)
 
     def fit_height(self, width, height_min=0, height_max=None, origin=(0, 0), delta=0.001, verbose=False):
         '''
@@ -12316,6 +16611,10 @@
                             and not mupdf.fz_is_infinite_rect(tp_rect)
                             ):
                         continue
+
+                    if buflen == 0 and ch.m_internal.c == 0x200d:
+                        # ZERO WIDTH JOINER cannot start a word
+                        continue
                     word_delimiter = JM_is_word_delimiter(ch.m_internal.c, delimiters)
                     this_char_rtl = JM_is_rtl_char(ch.m_internal.c)
                     if word_delimiter or this_char_rtl != last_char_rtl:
@@ -12516,6 +16815,232 @@
         text = " ".join(words)
         return text
 
+    def fill_textbox(
+            writer: 'TextWriter',
+            rect: rect_like,
+            text: typing.Union[str, list],
+            pos: point_like = None,
+            font: typing.Optional[Font] = None,
+            fontsize: float = 11,
+            lineheight: OptFloat = None,
+            align: int = 0,
+            warn: bool = None,
+            right_to_left: bool = False,
+            small_caps: bool = False,
+            ) -> tuple:
+        """Fill a rectangle with text.
+
+        Args:
+            writer: pymupdf.TextWriter object (= "self")
+            rect: rect-like to receive the text.
+            text: string or list/tuple of strings.
+            pos: point-like start position of first word.
+            font: pymupdf.Font object (default pymupdf.Font('helv')).
+            fontsize: the fontsize.
+            lineheight: overwrite the font property
+            align: (int) 0 = left, 1 = center, 2 = right, 3 = justify
+            warn: (bool) text overflow action: none, warn, or exception
+            right_to_left: (bool) indicate right-to-left language.
+        """
+        rect = Rect(rect)
+        if rect.is_empty:
+            raise ValueError("fill rect must not empty.")
+        if type(font) is not Font:
+            font = Font("helv")
+
+        def textlen(x):
+            """Return length of a string."""
+            return font.text_length(
+                x, fontsize=fontsize, small_caps=small_caps
+            )  # abbreviation
+
+        def char_lengths(x):
+            """Return list of single character lengths for a string."""
+            return font.char_lengths(x, fontsize=fontsize, small_caps=small_caps)
+
+        def append_this(pos, text):
+            ret = writer.append(
+                    pos, text, font=font, fontsize=fontsize, small_caps=small_caps
+                    )
+            return ret
+
+        tolerance = fontsize * 0.2  # extra distance to left border
+        space_len = textlen(" ")
+        std_width = rect.width - tolerance
+        std_start = rect.x0 + tolerance
+
+        def norm_words(width, words):
+            """Cut any word in pieces no longer than 'width'."""
+            nwords = []
+            word_lengths = []
+            for w in words:
+                wl_lst = char_lengths(w)
+                wl = sum(wl_lst)
+                if wl <= width:  # nothing to do - copy over
+                    nwords.append(w)
+                    word_lengths.append(wl)
+                    continue
+
+                # word longer than rect width - split it in parts
+                n = len(wl_lst)
+                while n > 0:
+                    wl = sum(wl_lst[:n])
+                    if wl <= width:
+                        nwords.append(w[:n])
+                        word_lengths.append(wl)
+                        w = w[n:]
+                        wl_lst = wl_lst[n:]
+                        n = len(wl_lst)
+                    else:
+                        n -= 1
+            return nwords, word_lengths
+
+        def output_justify(start, line):
+            """Justified output of a line."""
+            # ignore leading / trailing / multiple spaces
+            words = [w for w in line.split(" ") if w != ""]
+            nwords = len(words)
+            if nwords == 0:
+                return
+            if nwords == 1:  # single word cannot be justified
+                append_this(start, words[0])
+                return
+            tl = sum([textlen(w) for w in words])  # total word lengths
+            gaps = nwords - 1  # number of word gaps
+            gapl = (std_width - tl) / gaps  # width of each gap
+            for w in words:
+                _, lp = append_this(start, w)  # output one word
+                start.x = lp.x + gapl  # next start at word end plus gap
+            return
+
+        asc = font.ascender
+        dsc = font.descender
+        if not lineheight:
+            if asc - dsc <= 1:
+                lheight = 1.2
+            else:
+                lheight = asc - dsc
+        else:
+            lheight = lineheight
+
+        LINEHEIGHT = fontsize * lheight  # effective line height
+        width = std_width  # available horizontal space
+
+        # starting point of text
+        if pos is not None:
+            pos = Point(pos)
+        else:  # default is just below rect top-left
+            pos = rect.tl + (tolerance, fontsize * asc)
+        if pos not in rect:
+            raise ValueError("Text must start in rectangle.")
+
+        # calculate displacement factor for alignment
+        if align == TEXT_ALIGN_CENTER:
+            factor = 0.5
+        elif align == TEXT_ALIGN_RIGHT:
+            factor = 1.0
+        else:
+            factor = 0
+
+        # split in lines if just a string was given
+        if type(text) is str:
+            textlines = text.splitlines()
+        else:
+            textlines = []
+            for line in text:
+                textlines.extend(line.splitlines())
+
+        max_lines = int((rect.y1 - pos.y) / LINEHEIGHT) + 1
+
+        new_lines = []  # the final list of textbox lines
+        no_justify = []  # no justify for these line numbers
+        for i, line in enumerate(textlines):
+            if line in ("", " "):
+                new_lines.append((line, space_len))
+                width = rect.width - tolerance
+                no_justify.append((len(new_lines) - 1))
+                continue
+            if i == 0:
+                width = rect.x1 - pos.x
+            else:
+                width = rect.width - tolerance
+
+            if right_to_left:  # reverses Arabic / Hebrew text front to back
+                line = writer.clean_rtl(line)
+            tl = textlen(line)
+            if tl <= width:  # line short enough
+                new_lines.append((line, tl))
+                no_justify.append((len(new_lines) - 1))
+                continue
+
+            # we need to split the line in fitting parts
+            words = line.split(" ")  # the words in the line
+
+            # cut in parts any words that are longer than rect width
+            words, word_lengths = norm_words(width, words)
+
+            n = len(words)
+            while True:
+                line0 = " ".join(words[:n])
+                wl = sum(word_lengths[:n]) + space_len * (n - 1)
+                if wl <= width:
+                    new_lines.append((line0, wl))
+                    words = words[n:]
+                    word_lengths = word_lengths[n:]
+                    n = len(words)
+                    line0 = None
+                else:
+                    n -= 1
+
+                if len(words) == 0:
+                    break
+                assert n
+
+        # -------------------------------------------------------------------------
+        # List of lines created. Each item is (text, tl), where 'tl' is the PDF
+        # output length (float) and 'text' is the text. Except for justified text,
+        # this is output-ready.
+        # -------------------------------------------------------------------------
+        nlines = len(new_lines)
+        if nlines > max_lines:
+            msg = "Only fitting %i of %i lines." % (max_lines, nlines)
+            if warn is None:
+                pass
+            elif warn:
+                message("Warning: " + msg)
+            else:
+                raise ValueError(msg)
+
+        start = Point()
+        no_justify += [len(new_lines) - 1]  # no justifying of last line
+        for i in range(max_lines):
+            try:
+                line, tl = new_lines.pop(0)
+            except IndexError:
+                if g_exceptions_verbose >= 2:   exception_info()
+                break
+
+            if right_to_left:  # Arabic, Hebrew
+                line = "".join(reversed(line))
+
+            if i == 0:  # may have different start for first line
+                start = pos
+
+            if align == TEXT_ALIGN_JUSTIFY and i not in no_justify and tl < std_width:
+                output_justify(start, line)
+                start.x = std_start
+                start.y += LINEHEIGHT
+                continue
+
+            if i > 0 or pos.x == std_start:  # left, center, right alignments
+                start.x += (width - tl) * factor
+
+            append_this(start, line)
+            start.x = std_start
+            start.y += LINEHEIGHT
+
+        return new_lines  # return non-written lines
+
     def write_text(self, page, color=None, opacity=-1, overlay=1, morph=None, matrix=None, render_mode=0, oc=0):
         """Write the text to a PDF page having the TextWriter's page size.
 
@@ -12736,6 +17261,16 @@
         """Check if x is in the rectangle."""
         return self.__contains__(x)
 
+    def get_area(self, *args) -> float:
+        """Calculate area of rectangle.\nparameter is one of 'px' (default), 'in', 'cm', or 'mm'."""
+        if args:
+            unit = args[0]
+        else:
+            unit = "px"
+        u = {"px": (1, 1), "in": (1.0, 72.0), "cm": (2.54, 72.0), "mm": (25.4, 72.0)}
+        f = (u[unit][0] / u[unit][1]) ** 2
+        return f * self.width * self.height
+
     def include_point(self, p):
         """Extend rectangle to include point p."""
         rect = self.rect.include_point(p)
@@ -20925,6 +25460,82 @@
     return _wxcolors
 
 
+def _mupdf_devel(make_links=True):
+    '''
+    Allows PyMuPDF installation to be used to compile and link programmes that
+    use the MuPDF C/C++ API.
+    
+    Args:
+        make_links:
+            If true, then on non-windows we also create softlinks to any shared
+            libraries that are supplied with a version suffix; this allows them
+            to be used in a link command.
+
+            For example we create links such as:
+
+            site-packages/pymupdf/
+                libmupdf.so -> libmupdf.so.26.7
+                libmupdfcpp.so -> libmupdfcpp.so.26.7
+    
+    Returns: (mupdf_include, mupdf_lib).
+        mupdf_include:
+            Path of MuPDF include directory within PyMuPDF install.
+        mupdf_lib
+            Path of MuPDF library directory within PyMuPDF install.
+    '''
+    import platform
+    
+    log(f'{mupdf_version=}')
+    
+    p = os.path.normpath(f'{__file__}/..')
+
+    mupdf_include = f'{p}/mupdf-devel/include'
+    
+    if platform.system() == 'Windows':
+        # Separate .lib files are used at build time.
+        mupdf_lib = f'{p}/mupdf-devel/lib'
+    else:
+        # .so files are used for both buildtime and runtime linking.
+        mupdf_lib = p
+    log(f'Within installed PyMuPDF:')
+    log(f'    {mupdf_include=}')
+    log(f'    {mupdf_lib=}')
+
+    assert os.path.isdir(mupdf_include), f'Not a directory: {mupdf_include=}.'
+    assert os.path.isdir(mupdf_lib), f'Not a directory: {mupdf_lib=}.'
+
+    if platform.system() != 'Windows' and make_links:
+        # Make symbolic links within the installed pymupdf module so
+        # that ld can find libmupdf.so etc. This is a bit of a hack, but
+        # necessary because wheels cannot contain symbolic links.
+        #
+        # For example we create `libmupdf.so -> libmupdf.so.24.8`.
+        #
+        # We are careful to only create symlinks for the expected MuPDF
+        # version, in case old .so files from a previous install are still
+        # in place.
+        #
+        log(f'Creating symlinks in {mupdf_lib=} for MuPDF-{mupdf_version} .so files.')
+        regex_suffix = mupdf_version.split('.')[1:3]
+        regex_suffix = '[.]'.join(regex_suffix)
+        mupdf_lib_regex = f'^(lib[^.]+[.]so)[.]{regex_suffix}$'
+        log(f'{mupdf_lib_regex=}.')
+        for leaf in os.listdir(mupdf_lib):
+            m = re.match(mupdf_lib_regex, leaf)
+            if m:
+                pfrom = f'{mupdf_lib}/{m.group(1)}'
+                # os.path.exists() can return false if softlink exists
+                # but points to non-existent file, so we also use
+                # `os.path.islink()`.
+                if os.path.islink(pfrom) or os.path.exists(pfrom):
+                    log(f'Removing existing link {pfrom=}.')
+                    os.remove(pfrom)
+                log(f'Creating symlink: {pfrom} -> {leaf}')
+                os.symlink(leaf, pfrom)
+    
+    return mupdf_include, mupdf_lib
+
+
 # We cannot import utils earlier because it imports this .py file itself and
 # uses some pymupdf.* types in function typing.
 #
@@ -20939,83 +25550,9 @@
 recover_quad                = utils.recover_quad
 recover_span_quad           = utils.recover_span_quad
 
-Annot.get_text              = utils.get_text
-Annot.get_textbox           = utils.get_textbox
-
-Document._do_links          = utils.do_links
-Document._do_widgets        = utils.do_widgets
-Document.del_toc_item       = utils.del_toc_item
-Document.get_char_widths    = utils.get_char_widths
-Document.get_oc             = utils.get_oc
-Document.get_ocmd           = utils.get_ocmd
-Document.get_page_labels    = utils.get_page_labels
-Document.get_page_numbers   = utils.get_page_numbers
-Document.get_page_pixmap    = utils.get_page_pixmap
-Document.get_page_text      = utils.get_page_text
-Document.get_toc            = utils.get_toc
-Document.has_annots         = utils.has_annots
-Document.has_links          = utils.has_links
-Document.insert_page        = utils.insert_page
-Document.new_page           = utils.new_page
-Document.scrub              = utils.scrub
-Document.search_page_for    = utils.search_page_for
-Document.set_metadata       = utils.set_metadata
-Document.set_oc             = utils.set_oc
-Document.set_ocmd           = utils.set_ocmd
-Document.set_page_labels    = utils.set_page_labels
-Document.set_toc            = utils.set_toc
-Document.set_toc_item       = utils.set_toc_item
-Document.subset_fonts       = utils.subset_fonts
-Document.tobytes            = Document.write
-Document.xref_copy          = utils.xref_copy
-
-IRect.get_area              = utils.get_area
-
-Page.apply_redactions       = utils.apply_redactions
-Page.delete_image           = utils.delete_image
-Page.delete_widget          = utils.delete_widget
-Page.draw_bezier            = utils.draw_bezier
-Page.draw_circle            = utils.draw_circle
-Page.draw_curve             = utils.draw_curve
-Page.draw_line              = utils.draw_line
-Page.draw_oval              = utils.draw_oval
-Page.draw_polyline          = utils.draw_polyline
-Page.draw_quad              = utils.draw_quad
-Page.draw_rect              = utils.draw_rect
-Page.draw_sector            = utils.draw_sector
-Page.draw_squiggle          = utils.draw_squiggle
-Page.draw_zigzag            = utils.draw_zigzag
-Page.get_image_info         = utils.get_image_info
-Page.get_image_rects        = utils.get_image_rects
-Page.get_label              = utils.get_label
-Page.get_links              = utils.get_links
-Page.get_pixmap             = utils.get_pixmap
-Page.get_text               = utils.get_text
-Page.get_text_blocks        = utils.get_text_blocks
-Page.get_text_selection     = utils.get_text_selection
-Page.get_text_words         = utils.get_text_words
-Page.get_textbox            = utils.get_textbox
-Page.get_textpage_ocr       = utils.get_textpage_ocr
-Page.insert_image           = utils.insert_image
-Page.insert_link            = utils.insert_link
-Page.insert_text            = utils.insert_text
-Page.insert_textbox         = utils.insert_textbox
-Page.insert_htmlbox         = utils.insert_htmlbox
-Page.new_shape              = lambda x: utils.Shape(x)
-Page.replace_image          = utils.replace_image
-Page.search_for             = utils.search_for
-Page.show_pdf_page          = utils.show_pdf_page
-Page.update_link            = utils.update_link
-Page.write_text             = utils.write_text
-Shape                       = utils.Shape
 from .table import find_tables
-
 Page.find_tables = find_tables
 
-Rect.get_area               = utils.get_area
-
-TextWriter.fill_textbox     = utils.fill_textbox
-
 
 class FitzDeprecation(DeprecationWarning):
     pass
@@ -21285,19 +25822,19 @@
     _alias( Rect, 'is_infinite')
     _alias( TextWriter, 'fill_textbox')
     _alias( TextWriter, 'write_text')
-    _alias( utils.Shape, 'draw_bezier')
-    _alias( utils.Shape, 'draw_circle')
-    _alias( utils.Shape, 'draw_curve')
-    _alias( utils.Shape, 'draw_line')
-    _alias( utils.Shape, 'draw_oval')
-    _alias( utils.Shape, 'draw_polyline')
-    _alias( utils.Shape, 'draw_quad')
-    _alias( utils.Shape, 'draw_rect')
-    _alias( utils.Shape, 'draw_sector')
-    _alias( utils.Shape, 'draw_squiggle')
-    _alias( utils.Shape, 'draw_zigzag')
-    _alias( utils.Shape, 'insert_text')
-    _alias( utils.Shape, 'insert_textbox')
+    _alias( Shape, 'draw_bezier')
+    _alias( Shape, 'draw_circle')
+    _alias( Shape, 'draw_curve')
+    _alias( Shape, 'draw_line')
+    _alias( Shape, 'draw_oval')
+    _alias( Shape, 'draw_polyline')
+    _alias( Shape, 'draw_quad')
+    _alias( Shape, 'draw_rect')
+    _alias( Shape, 'draw_sector')
+    _alias( Shape, 'draw_squiggle')
+    _alias( Shape, 'draw_zigzag')
+    _alias( Shape, 'insert_text')
+    _alias( Shape, 'insert_textbox')
 
 if 0:
     restore_aliases()
author	Franz Glasner <fzglas.hg@dom66.de>
date	Sat, 11 Oct 2025 15:24:40 +0200
parents	3b13504f9d89 a6bc019ac0b2
children	4621bd954a09