Python2/PyMuPDF: src/__init_

comparison src/init.py @ 41:71bcc18e306f

MERGE: New upstream PyMuPDF v1.26.5 including MuPDF v1.26.10 BUGS: Needs some additional changes yet. Not yet tested.

author	Franz Glasner <fzglas.hg@dom66.de>
date	Sat, 11 Oct 2025 15:24:40 +0200
parents	3b13504f9d89 a6bc019ac0b2
children	4621bd954a09

comparison

equal deleted inserted replaced

-:8934ac156ef5
+:71bcc18e306f
 import io
 import math
 import os
 import pathlib
 import glob
-import packaging.version
 import re
 import string
 import sys
 import tarfile
 import time
 from ._build import mupdf_location      # noqa F401
 from ._build import pymupdf_git_branch  # noqa F401
 from ._build import pymupdf_git_diff    # noqa F401
 from ._build import pymupdf_git_sha     # noqa F401
 from ._build import pymupdf_version     # noqa F401
+from ._build import pymupdf_version_tuple   # noqa F401
 from ._build import swig_version        # noqa F401
 from ._build import swig_version_tuple  # noqa F401
 mupdf_version = mupdf.FZ_VERSION
 # Removed in PyMuPDF-1.26.1.
 pymupdf_date = None
 # Versions as tuples; useful when comparing versions.
 #
-pymupdf_version_tuple = packaging.version.Version(pymupdf_version).release
 mupdf_version_tuple = packaging.version.Version(mupdf_version).release
 assert mupdf_version_tuple == (mupdf.FZ_VERSION_MAJOR, mupdf.FZ_VERSION_MINOR, mupdf.FZ_VERSION_PATCH), \
 f'Inconsistent MuPDF version numbers: {mupdf_version_tuple=} != {(mupdf.FZ_VERSION_MAJOR, mupdf.FZ_VERSION_MINOR, mupdf.FZ_VERSION_PATCH)=}'
 res['compression'] = mupdf.pdf_to_name(obj)
 buf = mupdf.pdf_load_stream(sound)
 stream = JM_BinFromBuffer(buf)
 res['stream'] = stream
 return res
+def get_text(self, *args, **kwargs):
+return utils.get_text(self, *args, **kwargs)
+def get_textbox(self, *args, **kwargs):
+return utils.get_textbox(self, *args, **kwargs)
 def get_textpage(self, clip=None, flags=0):
 """Make annotation TextPage."""
 CheckParent(self)
 options = mupdf.FzStextOptions(flags)
 raise RuntimeError( "PDF has no form fonts yet")
 k = mupdf.pdf_new_name( name)
 v = JM_pdf_obj_from_str( pdf, font)
 mupdf.pdf_dict_put( fonts, k, v)
+def del_toc_item(
+self,
+idx: int,
+) -> None:
+"""Delete TOC / bookmark item by index."""
+xref = self.get_outline_xrefs()[idx]
+self._remove_toc_item(xref)
 def _delToC(self):
 """Delete the TOC."""
 if self.is_closed or self.is_encrypted:
 raise ValueError("document closed or encrypted")
 xrefs = []  # create Python list
 """Delete object."""
 pdf = _as_pdf_document(self)
 if not _INRANGE(xref, 1, mupdf.pdf_xref_len(pdf)-1):
 raise ValueError( MSG_BAD_XREF)
 mupdf.pdf_delete_object(pdf, xref)
+def _do_links(
+doc1: 'Document',
+doc2: 'Document',
+from_page: int = -1,
+to_page: int = -1,
+start_at: int = -1,
+) -> None:
+"""Insert links contained in copied page range into destination PDF.
+Parameter values **must** equal those of method insert_pdf(), which must
+have been previously executed.
+"""
+#pymupdf.log( 'utils.do_links()')
+# --------------------------------------------------------------------------
+# internal function to create the actual "/Annots" object string
+# --------------------------------------------------------------------------
+def cre_annot(lnk, xref_dst, pno_src, ctm):
+"""Create annotation object string for a passed-in link."""
+r = lnk["from"] * ctm  # rect in PDF coordinates
+rect = _format_g(tuple(r))
+if lnk["kind"] == LINK_GOTO:
+txt = annot_skel["goto1"]  # annot_goto
+idx = pno_src.index(lnk["page"])
+p = lnk["to"] * ctm  # target point in PDF coordinates
+annot = txt(xref_dst[idx], p.x, p.y, lnk["zoom"], rect)
+elif lnk["kind"] == LINK_GOTOR:
+if lnk["page"] >= 0:
+txt = annot_skel["gotor1"]  # annot_gotor
+pnt = lnk.get("to", Point(0, 0))  # destination point
+if type(pnt) is not Point:
+pnt = Point(0, 0)
+annot = txt(
+lnk["page"],
+pnt.x,
+pnt.y,
+lnk["zoom"],
+lnk["file"],
+lnk["file"],
+rect,
+)
+else:
+txt = annot_skel["gotor2"]  # annot_gotor_n
+to = get_pdf_str(lnk["to"])
+to = to[1:-1]
+f = lnk["file"]
+annot = txt(to, f, rect)
+elif lnk["kind"] == LINK_LAUNCH:
+txt = annot_skel["launch"]  # annot_launch
+annot = txt(lnk["file"], lnk["file"], rect)
+elif lnk["kind"] == LINK_URI:
+txt = annot_skel["uri"]  # annot_uri
+annot = txt(lnk["uri"], rect)
+else:
+annot = ""
+return annot
+# --------------------------------------------------------------------------
+# validate & normalize parameters
+if from_page < 0:
+fp = 0
+elif from_page >= doc2.page_count:
+fp = doc2.page_count - 1
+else:
+fp = from_page
+if to_page < 0 or to_page >= doc2.page_count:
+tp = doc2.page_count - 1
+else:
+tp = to_page
+if start_at < 0:
+raise ValueError("'start_at' must be >= 0")
+sa = start_at
+incr = 1 if fp <= tp else -1  # page range could be reversed
+# lists of source / destination page numbers
+pno_src = list(range(fp, tp + incr, incr))
+pno_dst = [sa + i for i in range(len(pno_src))]
+# lists of source / destination page xrefs
+xref_src = []
+xref_dst = []
+for i in range(len(pno_src)):
+p_src = pno_src[i]
+p_dst = pno_dst[i]
+old_xref = doc2.page_xref(p_src)
+new_xref = doc1.page_xref(p_dst)
+xref_src.append(old_xref)
+xref_dst.append(new_xref)
+# create the links for each copied page in destination PDF
+for i in range(len(xref_src)):
+page_src = doc2[pno_src[i]]  # load source page
+links = page_src.get_links()  # get all its links
+#log( '{pno_src=}')
+#log( '{type(page_src)=}')
+#log( '{page_src=}')
+#log( '{=i len(links)}')
+if len(links) == 0:  # no links there
+page_src = None
+continue
+ctm = ~page_src.transformation_matrix  # calc page transformation matrix
+page_dst = doc1[pno_dst[i]]  # load destination page
+link_tab = []  # store all link definitions here
+for l in links:
+if l["kind"] == LINK_GOTO and (l["page"] not in pno_src):
+continue  # GOTO link target not in copied pages
+annot_text = cre_annot(l, xref_dst, pno_src, ctm)
+if annot_text:
+link_tab.append(annot_text)
+if link_tab != []:
+page_dst._addAnnot_FromString( tuple(link_tab))
+#log( 'utils.do_links() returning.')
+def _do_widgets(
+tar: 'Document',
+src: 'Document',
+graftmap,
+from_page: int = -1,
+to_page: int = -1,
+start_at: int = -1,
+join_duplicates=0,
+) -> None:
+"""Insert widgets of copied page range into target PDF.
+Parameter values **must** equal those of method insert_pdf() which
+must have been previously executed.
+"""
+if not src.is_form_pdf:  # nothing to do: source PDF has no fields
+return
+def clean_kid_parents(acro_fields):
+""" Make sure all kids have correct "Parent" pointers."""
+for i in range(acro_fields.pdf_array_len()):
+parent = acro_fields.pdf_array_get(i)
+kids = parent.pdf_dict_get(PDF_NAME("Kids"))
+for j in range(kids.pdf_array_len()):
+kid = kids.pdf_array_get(j)
+kid.pdf_dict_put(PDF_NAME("Parent"), parent)
+def join_widgets(pdf, acro_fields, xref1, xref2, name):
+"""Called for each pair of widgets having the same name.
+Args:
+pdf: target MuPDF document
+acro_fields: object Root/AcroForm/Fields
+xref1, xref2: widget xrefs having same names
+name: (str) the name
+Result:
+Defined or updated widget parent that points to both widgets.
+"""
+def re_target(pdf, acro_fields, xref1, kids1, xref2, kids2):
+"""Merge widget in xref2 into "Kids" list of widget xref1.
+Args:
+xref1, kids1: target widget and its "Kids" array.
+xref2, kids2: source wwidget and its "Kids" array (may be empty).
+"""
+# make indirect objects from widgets
+w1_ind = mupdf.pdf_new_indirect(pdf, xref1, 0)
+w2_ind = mupdf.pdf_new_indirect(pdf, xref2, 0)
+# find source widget in "Fields" array
+idx = acro_fields.pdf_array_find(w2_ind)
+acro_fields.pdf_array_delete(idx)
+if not kids2.pdf_is_array():  # source widget has no kids
+widget = mupdf.pdf_load_object(pdf, xref2)
+# delete name from widget and insert target as parent
+widget.pdf_dict_del(PDF_NAME("T"))
+widget.pdf_dict_put(PDF_NAME("Parent"), w1_ind)
+# put in target Kids
+kids1.pdf_array_push(w2_ind)
+else:  # copy source kids to target kids
+for i in range(kids2.pdf_array_len()):
+kid = kids2.pdf_array_get(i)
+kid.pdf_dict_put(PDF_NAME("Parent"), w1_ind)
+kid_ind = mupdf.pdf_new_indirect(pdf, kid.pdf_to_num(), 0)
+kids1.pdf_array_push(kid_ind)
+def new_target(pdf, acro_fields, xref1, w1, xref2, w2, name):
+"""Make new "Parent" for two widgets with same name.
+Args:
+xref1, w1: first widget
+xref2, w2: second widget
+name: field name
+Result:
+Both widgets have no "Kids". We create a new object with the
+name and a "Kids" array containing the widgets.
+Original widgets must be removed from AcroForm/Fields.
+"""
+# make new "Parent" object
+new = mupdf.pdf_new_dict(pdf, 5)
+new.pdf_dict_put_text_string(PDF_NAME("T"), name)
+kids = new.pdf_dict_put_array(PDF_NAME("Kids"), 2)
+new_obj = mupdf.pdf_add_object(pdf, new)
+new_obj_xref = new_obj.pdf_to_num()
+new_ind = mupdf.pdf_new_indirect(pdf, new_obj_xref, 0)
+# copy over some required source widget properties
+ft = w1.pdf_dict_get(PDF_NAME("FT"))
+w1.pdf_dict_del(PDF_NAME("FT"))
+new_obj.pdf_dict_put(PDF_NAME("FT"), ft)
+aa = w1.pdf_dict_get(PDF_NAME("AA"))
+w1.pdf_dict_del(PDF_NAME("AA"))
+new_obj.pdf_dict_put(PDF_NAME("AA"), aa)
+# remove name field, insert "Parent" field in source widgets
+w1.pdf_dict_del(PDF_NAME("T"))
+w1.pdf_dict_put(PDF_NAME("Parent"), new_ind)
+w2.pdf_dict_del(PDF_NAME("T"))
+w2.pdf_dict_put(PDF_NAME("Parent"), new_ind)
+# put source widgets in "kids" array
+ind1 = mupdf.pdf_new_indirect(pdf, xref1, 0)
+ind2 = mupdf.pdf_new_indirect(pdf, xref2, 0)
+kids.pdf_array_push(ind1)
+kids.pdf_array_push(ind2)
+# remove source widgets from "AcroForm/Fields"
+idx = acro_fields.pdf_array_find(ind1)
+acro_fields.pdf_array_delete(idx)
+idx = acro_fields.pdf_array_find(ind2)
+acro_fields.pdf_array_delete(idx)
+acro_fields.pdf_array_push(new_ind)
+w1 = mupdf.pdf_load_object(pdf, xref1)
+w2 = mupdf.pdf_load_object(pdf, xref2)
+kids1 = w1.pdf_dict_get(PDF_NAME("Kids"))
+kids2 = w2.pdf_dict_get(PDF_NAME("Kids"))
+# check which widget has a suitable "Kids" array
+if kids1.pdf_is_array():
+re_target(pdf, acro_fields, xref1, kids1, xref2, kids2)  # pylint: disable=arguments-out-of-order
+elif kids2.pdf_is_array():
+re_target(pdf, acro_fields, xref2, kids2, xref1, kids1)  # pylint: disable=arguments-out-of-order
+else:
+new_target(pdf, acro_fields, xref1, w1, xref2, w2, name)  # pylint: disable=arguments-out-of-order
+def get_kids(parent, kids_list):
+"""Return xref list of leaf kids for a parent.
+Call with an empty list.
+"""
+kids = mupdf.pdf_dict_get(parent, PDF_NAME("Kids"))
+if not kids.pdf_is_array():
+return kids_list
+for i in range(kids.pdf_array_len()):
+kid = kids.pdf_array_get(i)
+if mupdf.pdf_is_dict(mupdf.pdf_dict_get(kid, PDF_NAME("Kids"))):
+kids_list = get_kids(kid, kids_list)
+else:
+kids_list.append(kid.pdf_to_num())
+return kids_list
+def kids_xrefs(widget):
+"""Get the xref of top "Parent" and the list of leaf widgets."""
+kids_list = []
+parent = mupdf.pdf_dict_get(widget, PDF_NAME("Parent"))
+parent_xref = parent.pdf_to_num()
+if parent_xref == 0:
+return parent_xref, kids_list
+kids_list = get_kids(parent, kids_list)
+return parent_xref, kids_list
+def deduplicate_names(pdf, acro_fields, join_duplicates=False):
+"""Handle any widget name duplicates caused by the merge."""
+names = {}  # key is a widget name, value a list of widgets having it.
+# extract all names and widgets in "AcroForm/Fields"
+for i in range(mupdf.pdf_array_len(acro_fields)):
+wobject = mupdf.pdf_array_get(acro_fields, i)
+xref = wobject.pdf_to_num()
+# extract widget name and collect widget(s) using it
+T = mupdf.pdf_dict_get_text_string(wobject, PDF_NAME("T"))
+xrefs = names.get(T, [])
+xrefs.append(xref)
+names[T] = xrefs
+for name, xrefs in names.items():
+if len(xrefs) < 2:
+continue
+xref0, xref1 = xrefs[:2]  # only exactly 2 should occur!
+if join_duplicates:  # combine fields with equal names
+join_widgets(pdf, acro_fields, xref0, xref1, name)
+else:  # make field names unique
+newname = name + f" [{xref1}]"  # append this to the name
+wobject = mupdf.pdf_load_object(pdf, xref1)
+wobject.pdf_dict_put_text_string(PDF_NAME("T"), newname)
+clean_kid_parents(acro_fields)
+def get_acroform(doc):
+"""Retrieve the AcroForm dictionary form a PDF."""
+pdf = mupdf.pdf_document_from_fz_document(doc)
+# AcroForm (= central form field info)
+return mupdf.pdf_dict_getp(mupdf.pdf_trailer(pdf), "Root/AcroForm")
+tarpdf = mupdf.pdf_document_from_fz_document(tar)
+srcpdf = mupdf.pdf_document_from_fz_document(src)
+if tar.is_form_pdf:
+# target is a Form PDF, so use it to include source fields
+acro = get_acroform(tar)
+# Important arrays in AcroForm
+acro_fields = acro.pdf_dict_get(PDF_NAME("Fields"))
+tar_co = acro.pdf_dict_get(PDF_NAME("CO"))
+if not tar_co.pdf_is_array():
+tar_co = acro.pdf_dict_put_array(PDF_NAME("CO"), 5)
+else:
+# target is no Form PDF, so copy over source AcroForm
+acro = mupdf.pdf_deep_copy_obj(get_acroform(src))  # make a copy
+# Clear "Fields" and "CO" arrays: will be populated by page fields.
+# This is required to avoid copying unneeded objects.
+acro.pdf_dict_del(PDF_NAME("Fields"))
+acro.pdf_dict_put_array(PDF_NAME("Fields"), 5)
+acro.pdf_dict_del(PDF_NAME("CO"))
+acro.pdf_dict_put_array(PDF_NAME("CO"), 5)
+# Enrich AcroForm for copying to target
+acro_graft = mupdf.pdf_graft_mapped_object(graftmap, acro)
+# Insert AcroForm into target PDF
+acro_tar = mupdf.pdf_add_object(tarpdf, acro_graft)
+acro_fields = acro_tar.pdf_dict_get(PDF_NAME("Fields"))
+tar_co = acro_tar.pdf_dict_get(PDF_NAME("CO"))
+# get its xref and insert it into target catalog
+tar_xref = acro_tar.pdf_to_num()
+acro_tar_ind = mupdf.pdf_new_indirect(tarpdf, tar_xref, 0)
+root = mupdf.pdf_dict_get(mupdf.pdf_trailer(tarpdf), PDF_NAME("Root"))
+root.pdf_dict_put(PDF_NAME("AcroForm"), acro_tar_ind)
+if from_page <= to_page:
+src_range = range(from_page, to_page + 1)
+else:
+src_range = range(from_page, to_page - 1, -1)
+parents = {}  # information about widget parents
+# remove "P" owning page reference from all widgets of all source pages
+for i in src_range:
+src_page = src[i]
+for xref in [
+xref
+for xref, wtype, _ in src_page.annot_xrefs()
+if wtype == mupdf.PDF_ANNOT_WIDGET  # pylint: disable=no-member
+]:
+w_obj = mupdf.pdf_load_object(srcpdf, xref)
+w_obj.pdf_dict_del(PDF_NAME("P"))
+# get the widget's parent structure
+parent_xref, old_kids = kids_xrefs(w_obj)
+if parent_xref:
+parents[parent_xref] = {
+"new_xref": 0,
+"old_kids": old_kids,
+"new_kids": [],
+}
+# Copy over Parent widgets first - they are not page-dependent
+for xref in parents.keys():  # pylint: disable=consider-using-dict-items
+parent = mupdf.pdf_load_object(srcpdf, xref)
+parent_graft = mupdf.pdf_graft_mapped_object(graftmap, parent)
+parent_tar = mupdf.pdf_add_object(tarpdf, parent_graft)
+kids_xrefs_new = get_kids(parent_tar, [])
+parent_xref_new = parent_tar.pdf_to_num()
+parent_ind = mupdf.pdf_new_indirect(tarpdf, parent_xref_new, 0)
+acro_fields.pdf_array_push(parent_ind)
+parents[xref]["new_xref"] = parent_xref_new
+parents[xref]["new_kids"] = kids_xrefs_new
+for i in range(len(src_range)):
+# read first copied over page in target
+tar_page = tar[start_at + i]
+# read the original page in the source PDF
+src_page = src[src_range[i]]
+# now walk through source page widgets and copy over
+w_xrefs = [  # widget xrefs of the source page
+xref
+for xref, wtype, _ in src_page.annot_xrefs()
+if wtype == mupdf.PDF_ANNOT_WIDGET  # pylint: disable=no-member
+]
+if not w_xrefs:  # no widgets on this source page
+continue
+# convert to formal PDF page
+tar_page_pdf = mupdf.pdf_page_from_fz_page(tar_page)
+# extract annotations array
+tar_annots = mupdf.pdf_dict_get(tar_page_pdf.obj(), PDF_NAME("Annots"))
+if not mupdf.pdf_is_array(tar_annots):
+tar_annots = mupdf.pdf_dict_put_array(
+tar_page_pdf.obj(), PDF_NAME("Annots"), 5
+)
+for xref in w_xrefs:
+w_obj = mupdf.pdf_load_object(srcpdf, xref)
+# check if field takes part in inter-field validations
+is_aac = mupdf.pdf_is_dict(mupdf.pdf_dict_getp(w_obj, "AA/C"))
+# check if parent of widget already in target
+parent_xref = mupdf.pdf_to_num(
+w_obj.pdf_dict_get(PDF_NAME("Parent"))
+)
+if parent_xref == 0:  # parent not in target yet
+try:
+w_obj_graft = mupdf.pdf_graft_mapped_object(graftmap, w_obj)
+except Exception as e:
+message_warning(f"cannot copy widget at {xref=}: {e}")
+continue
+w_obj_tar = mupdf.pdf_add_object(tarpdf, w_obj_graft)
+tar_xref = w_obj_tar.pdf_to_num()
+w_obj_tar_ind = mupdf.pdf_new_indirect(tarpdf, tar_xref, 0)
+mupdf.pdf_array_push(tar_annots, w_obj_tar_ind)
+mupdf.pdf_array_push(acro_fields, w_obj_tar_ind)
+else:
+parent = parents[parent_xref]
+idx = parent["old_kids"].index(xref)  # search for xref in parent
+tar_xref = parent["new_kids"][idx]
+w_obj_tar_ind = mupdf.pdf_new_indirect(tarpdf, tar_xref, 0)
+mupdf.pdf_array_push(tar_annots, w_obj_tar_ind)
+# Into "AcroForm/CO" if a computation field.
+if is_aac:
+mupdf.pdf_array_push(tar_co, w_obj_tar_ind)
+deduplicate_names(tarpdf, acro_fields, join_duplicates=join_duplicates)
 def _embeddedFileGet(self, idx):
 pdf = _as_pdf_document(self)
 names = mupdf.pdf_dict_getl(
 mupdf.pdf_trailer(pdf),
 finally:
 mupdf.ll_pdf_drop_page_tree( pdf.m_internal)
 self._reset_page_refs()
+def get_char_widths(
+doc: 'Document',
+xref: int,
+limit: int = 256,
+idx: int = 0,
+fontdict: OptDict = None,
+) -> list:
+"""Get list of glyph information of a font.
+Notes:
+Must be provided by its XREF number. If we already dealt with the
+font, it will be recorded in doc.FontInfos. Otherwise we insert an
+entry there.
+Finally we return the glyphs for the font. This is a list of
+(glyph, width) where glyph is an integer controlling the char
+appearance, and width is a float controlling the char's spacing:
+width * fontsize is the actual space.
+For 'simple' fonts, glyph == ord(char) will usually be true.
+Exceptions are 'Symbol' and 'ZapfDingbats'. We are providing data for these directly here.
+"""
+fontinfo = CheckFontInfo(doc, xref)
+if fontinfo is None:  # not recorded yet: create it
+if fontdict is None:
+name, ext, stype, asc, dsc = utils._get_font_properties(doc, xref)
+fontdict = {
+"name": name,
+"type": stype,
+"ext": ext,
+"ascender": asc,
+"descender": dsc,
+}
+else:
+name = fontdict["name"]
+ext = fontdict["ext"]
+stype = fontdict["type"]
+ordering = fontdict["ordering"]
+simple = fontdict["simple"]
+if ext == "":
+raise ValueError("xref is not a font")
+# check for 'simple' fonts
+if stype in ("Type1", "MMType1", "TrueType"):
+simple = True
+else:
+simple = False
+# check for CJK fonts
+if name in ("Fangti", "Ming"):
+ordering = 0
+elif name in ("Heiti", "Song"):
+ordering = 1
+elif name in ("Gothic", "Mincho"):
+ordering = 2
+elif name in ("Dotum", "Batang"):
+ordering = 3
+else:
+ordering = -1
+fontdict["simple"] = simple
+if name == "ZapfDingbats":
+glyphs = zapf_glyphs
+elif name == "Symbol":
+glyphs = symbol_glyphs
+else:
+glyphs = None
+fontdict["glyphs"] = glyphs
+fontdict["ordering"] = ordering
+fontinfo = [xref, fontdict]
+doc.FontInfos.append(fontinfo)
+else:
+fontdict = fontinfo[1]
+glyphs = fontdict["glyphs"]
+simple = fontdict["simple"]
+ordering = fontdict["ordering"]
+if glyphs is None:
+oldlimit = 0
+else:
+oldlimit = len(glyphs)
+mylimit = max(256, limit)
+if mylimit <= oldlimit:
+return glyphs
+if ordering < 0:  # not a CJK font
+glyphs = doc._get_char_widths(
+xref, fontdict["name"], fontdict["ext"], fontdict["ordering"], mylimit, idx
+)
+else:  # CJK fonts use char codes and width = 1
+glyphs = None
+fontdict["glyphs"] = glyphs
+fontinfo[1] = fontdict
+UpdateFontInfo(doc, fontinfo)
+return glyphs
 def get_layer(self, config=-1):
 """Content of ON, OFF, RBGroups of an OC layer."""
 pdf = _as_pdf_document(self)
 ocp = mupdf.pdf_dict_getl(
 mupdf.pdf_trailer( pdf),
 xref = 0
 ENSURE_OPERATION(pdf)
 xref = mupdf.pdf_create_object(pdf)
 return xref
+def get_oc(doc: 'Document', xref: int) -> int:
+"""Return optional content object xref for an image or form xobject.
+Args:
+xref: (int) xref number of an image or form xobject.
+"""
+if doc.is_closed or doc.is_encrypted:
+raise ValueError("document close or encrypted")
+t, name = doc.xref_get_key(xref, "Subtype")
+if t != "name" or name not in ("/Image", "/Form"):
+raise ValueError("bad object type at xref %i" % xref)
+t, oc = doc.xref_get_key(xref, "OC")
+if t != "xref":
+return 0
+rc = int(oc.replace("0 R", ""))
+return rc
 def get_ocgs(self):
 """Show existing optional content groups."""
 ci = mupdf.pdf_new_name( "CreatorInfo")
 pdf = _as_pdf_document(self)
 ocgs = mupdf.pdf_dict_getl(
 m = mupdf.pdf_array_len( intent)
 for j in range(m):
 o = mupdf.pdf_array_get( intent, j)
 if mupdf.pdf_is_name( o):
 intents.append( mupdf.pdf_to_name( o))
-hidden = mupdf.pdf_is_ocg_hidden( pdf, mupdf.PdfObj(), usage, ocg)
+if mupdf_version_tuple >= (1, 27):
+resource_stack = mupdf.PdfResourceStack()
+hidden = mupdf.pdf_is_ocg_hidden( pdf, resource_stack, usage, ocg)
+else:
+hidden = mupdf.pdf_is_ocg_hidden( pdf, mupdf.PdfObj(), usage, ocg)
 item = {
 "name": name,
 "intent": intents,
 "on": not hidden,
 "usage": usage,
 }
 temp = xref
 rc[ temp] = item
 return rc
+def get_ocmd(doc: 'Document', xref: int) -> dict:
+"""Return the definition of an OCMD (optional content membership dictionary).
+Recognizes PDF dict keys /OCGs (PDF array of OCGs), /P (policy string) and
+/VE (visibility expression, PDF array). Via string manipulation, this
+info is converted to a Python dictionary with keys "xref", "ocgs", "policy"
+and "ve" - ready to recycle as input for 'set_ocmd()'.
+"""
+if xref not in range(doc.xref_length()):
+raise ValueError("bad xref")
+text = doc.xref_object(xref, compressed=True)
+if "/Type/OCMD" not in text:
+raise ValueError("bad object type")
+textlen = len(text)
+p0 = text.find("/OCGs[")  # look for /OCGs key
+p1 = text.find("]", p0)
+if p0 < 0 or p1 < 0:  # no OCGs found
+ocgs = None
+else:
+ocgs = text[p0 + 6 : p1].replace("0 R", " ").split()
+ocgs = list(map(int, ocgs))
+p0 = text.find("/P/")  # look for /P policy key
+if p0 < 0:
+policy = None
+else:
+p1 = text.find("ff", p0)
+if p1 < 0:
+p1 = text.find("on", p0)
+if p1 < 0:  # some irregular syntax
+raise ValueError("bad object at xref")
+else:
+policy = text[p0 + 3 : p1 + 2]
+p0 = text.find("/VE[")  # look for /VE visibility expression key
+if p0 < 0:  # no visibility expression found
+ve = None
+else:
+lp = rp = 0  # find end of /VE by finding last ']'.
+p1 = p0
+while lp < 1 or lp != rp:
+p1 += 1
+if not p1 < textlen:  # some irregular syntax
+raise ValueError("bad object at xref")
+if text[p1] == "[":
+lp += 1
+if text[p1] == "]":
+rp += 1
+# p1 now positioned at the last "]"
+ve = text[p0 + 3 : p1 + 1]  # the PDF /VE array
+ve = (
+ve.replace("/And", '"and",')
+.replace("/Not", '"not",')
+.replace("/Or", '"or",')
+)
+ve = ve.replace(" 0 R]", "]").replace(" 0 R", ",").replace("][", "],[")
+import json
+try:
+ve = json.loads(ve)
+except Exception:
+exception_info()
+message(f"bad /VE key: {ve!r}")
+raise
+return {"xref": xref, "ocgs": ocgs, "policy": policy, "ve": ve}
 def get_outline_xrefs(self):
 """Get list of outline xref numbers."""
 xrefs = []
 pdf = _as_pdf_document(self, required=0)
 val = self._getPageInfo(pno, 2)
 if not full:
 return [v[:-1] for v in val]
 return val
+def get_page_labels(self):
+"""Return page label definitions in PDF document.
+Returns:
+A list of dictionaries with the following format:
+{'startpage': int, 'prefix': str, 'style': str, 'firstpagenum': int}.
+"""
+# Jorj McKie, 2021-01-10
+return [utils.rule_dict(item) for item in self._get_page_labels()]
+def get_page_numbers(doc, label, only_one=False):
+"""Return a list of page numbers with the given label.
+Args:
+doc: PDF document object (resp. 'self').
+label: (str) label.
+only_one: (bool) stop searching after first hit.
+Returns:
+List of page numbers having this label.
+"""
+# Jorj McKie, 2021-01-06
+numbers = []
+if not label:
+return numbers
+labels = doc._get_page_labels()
+if labels == []:
+return numbers
+for i in range(doc.page_count):
+plabel = utils.get_label_pno(i, labels)
+if plabel == label:
+numbers.append(i)
+if only_one:
+break
+return numbers
+def get_page_pixmap(
+doc: 'Document',
+pno: int,
+*,
+matrix: matrix_like = None,
+dpi=None,
+colorspace: Colorspace = None,
+clip: rect_like = None,
+alpha: bool = False,
+annots: bool = True,
+) -> 'Pixmap':
+"""Create pixmap of document page by page number.
+Notes:
+Convenience function calling page.get_pixmap.
+Args:
+pno: (int) page number
+matrix: pymupdf.Matrix for transformation (default: pymupdf.Identity).
+colorspace: (str,pymupdf.Colorspace) rgb, rgb, gray - case ignored, default csRGB.
+clip: (irect-like) restrict rendering to this area.
+alpha: (bool) include alpha channel
+annots: (bool) also render annotations
+"""
+if matrix is None:
+matrix = Identity
+if colorspace is None:
+colorspace = csRGB
+return doc[pno].get_pixmap(
+matrix=matrix,
+dpi=dpi, colorspace=colorspace,
+clip=clip,
+alpha=alpha,
+annots=annots
+)
+def get_page_text(
+doc: 'Document',
+pno: int,
+option: str = "text",
+clip: rect_like = None,
+flags: OptInt = None,
+textpage: 'TextPage' = None,
+sort: bool = False,
+) -> typing.Any:
+"""Extract a document page's text by page number.
+Notes:
+Convenience function calling page.get_text().
+Args:
+pno: page number
+option: (str) text, words, blocks, html, dict, json, rawdict, xhtml or xml.
+Returns:
+output from page.TextPage().
+"""
+return doc[pno].get_text(option, clip=clip, flags=flags, sort=sort)
 def get_page_xobjects(self, pno: int) -> list:
 """Retrieve a list of XObjects used on a page.
 """
 if self.is_closed or self.is_encrypted:
 raise ValueError("document closed or encrypted")
 sigflag = -1
 if sigflags.m_internal:
 sigflag = mupdf.pdf_to_int(sigflags)
 return sigflag
+def get_toc(
+doc: 'Document',
+simple: bool = True,
+) -> list:
+"""Create a table of contents.
+Args:
+simple: a bool to control output. Returns a list, where each entry consists of outline level, title, page number and link destination (if simple = False). For details see PyMuPDF's documentation.
+"""
+def recurse(olItem, liste, lvl):
+"""Recursively follow the outline item chain and record item information in a list."""
+while olItem and olItem.this.m_internal:
+if olItem.title:
+title = olItem.title
+else:
+title = " "
+if not olItem.is_external:
+if olItem.uri:
+if olItem.page == -1:
+resolve = doc.resolve_link(olItem.uri)
+page = resolve[0] + 1
+else:
+page = olItem.page + 1
+else:
+page = -1
+else:
+page = -1
+if not simple:
+link = utils.getLinkDict(olItem, doc)
+liste.append([lvl, title, page, link])
+else:
+liste.append([lvl, title, page])
+if olItem.down:
+liste = recurse(olItem.down, liste, lvl + 1)
+olItem = olItem.next
+return liste
+# ensure document is open
+if doc.is_closed:
+raise ValueError("document closed")
+doc.init_doc()
+olItem = doc.outline
+if not olItem:
+return []
+lvl = 1
+liste = []
+toc = recurse(olItem, liste, lvl)
+if doc.is_pdf and not simple:
+doc._extend_toc_items(toc)
+return toc
 def get_xml_metadata(self):
 """Get document XML metadata."""
 xml = None
 pdf = _as_pdf_document(self, required=0)
 if pdf.m_internal:
 rc = JM_UnicodeFromBuffer(buff)
 else:
 rc = ''
 return rc
+def has_annots(doc: 'Document') -> bool:
+"""Check whether there are annotations on any page."""
+if doc.is_closed:
+raise ValueError("document closed")
+if not doc.is_pdf:
+raise ValueError("is no PDF")
+for i in range(doc.page_count):
+for item in doc.page_annot_xrefs(i):
+# pylint: disable=no-member
+if not (item[1] == mupdf.PDF_ANNOT_LINK or item[1] == mupdf.PDF_ANNOT_WIDGET):  # pylint: disable=no-member
+return True
+return False
+def has_links(doc: 'Document') -> bool:
+"""Check whether there are links on any page."""
+if doc.is_closed:
+raise ValueError("document closed")
+if not doc.is_pdf:
+raise ValueError("is no PDF")
+for i in range(doc.page_count):
+for item in doc.page_annot_xrefs(i):
+if item[1] == mupdf.PDF_ANNOT_LINK:  # pylint: disable=no-member
+return True
+return False
 def init_doc(self):
 if self.is_encrypted:
 raise ValueError("cannot initialize - document still encrypted")
 self._outline = self._loadOutline()
 self.metadata = dict(
 annots=annots,
 show_progress=show_progress,
 final=final,
 )
+def insert_page(
+doc: 'Document',
+pno: int,
+text: typing.Union[str, list, None] = None,
+fontsize: float = 11,
+width: float = 595,
+height: float = 842,
+fontname: str = "helv",
+fontfile: OptStr = None,
+color: OptSeq = (0,),
+) -> int:
+"""Create a new PDF page and insert some text.
+Notes:
+Function combining pymupdf.Document.new_page() and pymupdf.Page.insert_text().
+For parameter details see these methods.
+"""
+page = doc.new_page(pno=pno, width=width, height=height)
+if not bool(text):
+return 0
+rc = page.insert_text(
+(50, 72),
+text,
+fontsize=fontsize,
+fontname=fontname,
+fontfile=fontfile,
+color=color,
+)
+return rc
 def insert_pdf(
 self,
 docsrc,
 *,
 from_page=-1,
 raise ValueError("document closed")
 document = self.this if isinstance(self.this, mupdf.FzDocument) else self.this.super()
 ret = mupdf.fz_needs_password( document)
 return ret
+def new_page(
+doc: 'Document',
+pno: int = -1,
+width: float = 595,
+height: float = 842,
+) -> Page:
+"""Create and return a new page object.
+Args:
+pno: (int) insert before this page. Default: after last page.
+width: (float) page width in points. Default: 595 (ISO A4 width).
+height: (float) page height in points. Default 842 (ISO A4 height).
+Returns:
+A pymupdf.Page object.
+"""
+doc._newPage(pno, width=width, height=height)
+return doc[pno]
 def next_location(self, page_id):
 """Get (chapter, page) of next page."""
 if self.is_closed or self.is_encrypted:
 raise ValueError("document closed or encrypted")
 if type(page_id) is int:
 def saveIncr(self):
 """ Save PDF incrementally"""
 return self.save(self.name, incremental=True, encryption=mupdf.PDF_ENCRYPT_KEEP)
+# ------------------------------------------------------------------------------
+# Remove potentially sensitive data from a PDF. Similar to the Adobe
+# Acrobat 'sanitize' function
+# ------------------------------------------------------------------------------
+def scrub(
+doc: 'Document',
+attached_files: bool = True,
+clean_pages: bool = True,
+embedded_files: bool = True,
+hidden_text: bool = True,
+javascript: bool = True,
+metadata: bool = True,
+redactions: bool = True,
+redact_images: int = 0,
+remove_links: bool = True,
+reset_fields: bool = True,
+reset_responses: bool = True,
+thumbnails: bool = True,
+xml_metadata: bool = True,
+) -> None:
+def remove_hidden(cont_lines):
+"""Remove hidden text from a PDF page.
+Args:
+cont_lines: list of lines with /Contents content. Should have status
+from after page.cleanContents().
+Returns:
+List of /Contents lines from which hidden text has been removed.
+Notes:
+The input must have been created after the page's /Contents object(s)
+have been cleaned with page.cleanContents(). This ensures a standard
+formatting: one command per line, single spaces between operators.
+This allows for drastic simplification of this code.
+"""
+out_lines = []  # will return this
+in_text = False  # indicate if within BT/ET object
+suppress = False  # indicate text suppression active
+make_return = False
+for line in cont_lines:
+if line == b"BT":  # start of text object
+in_text = True  # switch on
+out_lines.append(line)  # output it
+continue
+if line == b"ET":  # end of text object
+in_text = False  # switch off
+out_lines.append(line)  # output it
+continue
+if line == b"3 Tr":  # text suppression operator
+suppress = True  # switch on
+make_return = True
+continue
+if line[-2:] == b"Tr" and line[0] != b"3":
+suppress = False  # text rendering changed
+out_lines.append(line)
+continue
+if line == b"Q":  # unstack command also switches off
+suppress = False
+out_lines.append(line)
+continue
+if suppress and in_text:  # suppress hidden lines
+continue
+out_lines.append(line)
+if make_return:
+return out_lines
+else:
+return None
+if not doc.is_pdf:  # only works for PDF
+raise ValueError("is no PDF")
+if doc.is_encrypted or doc.is_closed:
+raise ValueError("closed or encrypted doc")
+if not clean_pages:
+hidden_text = False
+redactions = False
+if metadata:
+doc.set_metadata({})  # remove standard metadata
+for page in doc:
+if reset_fields:
+# reset form fields (widgets)
+for widget in page.widgets():
+widget.reset()
+if remove_links:
+links = page.get_links()  # list of all links on page
+for link in links:  # remove all links
+page.delete_link(link)
+found_redacts = False
+for annot in page.annots():
+if annot.type[0] == mupdf.PDF_ANNOT_FILE_ATTACHMENT and attached_files:
+annot.update_file(buffer_=b" ")  # set file content to empty
+if reset_responses:
+annot.delete_responses()
+if annot.type[0] == mupdf.PDF_ANNOT_REDACT:  # pylint: disable=no-member
+found_redacts = True
+if redactions and found_redacts:
+page.apply_redactions(images=redact_images)
+if not (clean_pages or hidden_text):
+continue  # done with the page
+page.clean_contents()
+if not page.get_contents():
+continue
+if hidden_text:
+xrefs = page.get_contents()
+assert len(xrefs) == 1  # only one because of cleaning.
+xref = xrefs[0]
+cont = doc.xref_stream(xref)
+cont_lines = remove_hidden(cont.splitlines())  # remove hidden text
+if cont_lines:  # something was actually removed
+cont = b"\n".join(cont_lines)
+doc.update_stream(xref, cont)  # rewrite the page /Contents
+if thumbnails:  # remove page thumbnails?
+if doc.xref_get_key(page.xref, "Thumb")[0] != "null":
+doc.xref_set_key(page.xref, "Thumb", "null")
+# pages are scrubbed, now perform document-wide scrubbing
+# remove embedded files
+if embedded_files:
+for name in doc.embfile_names():
+doc.embfile_del(name)
+if xml_metadata:
+doc.del_xml_metadata()
+if not (xml_metadata or javascript):
+xref_limit = 0
+else:
+xref_limit = doc.xref_length()
+for xref in range(1, xref_limit):
+if not doc.xref_object(xref):
+msg = "bad xref %i - clean PDF before scrubbing" % xref
+raise ValueError(msg)
+if javascript and doc.xref_get_key(xref, "S")[1] == "/JavaScript":
+# a /JavaScript action object
+obj = "<</S/JavaScript/JS()>>"  # replace with a null JavaScript
+doc.update_object(xref, obj)  # update this object
+continue  # no further handling
+if not xml_metadata:
+continue
+if doc.xref_get_key(xref, "Type")[1] == "/Metadata":
+# delete any metadata object directly
+doc.update_object(xref, "<<>>")
+doc.update_stream(xref, b"deleted", new=True)
+continue
+if doc.xref_get_key(xref, "Metadata")[0] != "null":
+doc.xref_set_key(xref, "Metadata", "null")
+def search_page_for(
+doc: 'Document',
+pno: int,
+text: str,
+quads: bool = False,
+clip: rect_like = None,
+flags: int = None,
+textpage: 'TextPage' = None,
+) -> list:
+"""Search for a string on a page.
+Args:
+pno: page number
+text: string to be searched for
+clip: restrict search to this rectangle
+quads: (bool) return quads instead of rectangles
+flags: bit switches, default: join hyphened words
+textpage: reuse a prepared textpage
+Returns:
+a list of rectangles or quads, each containing an occurrence.
+"""
+if flags is None:
+flags = (0
+| TEXT_DEHYPHENATE
+| TEXT_PRESERVE_LIGATURES
+| TEXT_PRESERVE_WHITESPACE
+| TEXT_MEDIABOX_CLIP
+)
+return doc[pno].search_for(
+text,
+quads=quads,
+clip=clip,
+flags=flags,
+textpage=textpage,
+)
 def select(self, pyliste):
 """Build sub-pdf with page numbers in the list."""
 if self.is_closed or self.is_encrypted:
 raise ValueError("document closed or encrypted")
 if not self.is_pdf:
 pdfdict += f"/{key} {value}"
 pdfdict += ">>"
 self.xref_set_key(xref, "MarkInfo", pdfdict)
 return True
+def set_metadata(doc: 'Document', m: dict = None) -> None:
+"""Update the PDF /Info object.
+Args:
+m: a dictionary like doc.metadata.
+"""
+if not doc.is_pdf:
+raise ValueError("is no PDF")
+if doc.is_closed or doc.is_encrypted:
+raise ValueError("document closed or encrypted")
+if m is None:
+m = {}
+elif type(m) is not dict:
+raise ValueError("bad metadata")
+keymap = {
+"author": "Author",
+"producer": "Producer",
+"creator": "Creator",
+"title": "Title",
+"format": None,
+"encryption": None,
+"creationDate": "CreationDate",
+"modDate": "ModDate",
+"subject": "Subject",
+"keywords": "Keywords",
+"trapped": "Trapped",
+}
+valid_keys = set(keymap.keys())
+diff_set = set(m.keys()).difference(valid_keys)
+if diff_set != set():
+msg = "bad dict key(s): %s" % diff_set
+raise ValueError(msg)
+t, temp = doc.xref_get_key(-1, "Info")
+if t != "xref":
+info_xref = 0
+else:
+info_xref = int(temp.replace("0 R", ""))
+if m == {} and info_xref == 0:  # nothing to do
+return
+if info_xref == 0:  # no prev metadata: get new xref
+info_xref = doc.get_new_xref()
+doc.update_object(info_xref, "<<>>")  # fill it with empty object
+doc.xref_set_key(-1, "Info", "%i 0 R" % info_xref)
+elif m == {}:  # remove existing metadata
+doc.xref_set_key(-1, "Info", "null")
+doc.init_doc()
+return
+for key, val in [(k, v) for k, v in m.items() if keymap[k] is not None]:
+pdf_key = keymap[key]
+if not bool(val) or val in ("none", "null"):
+val = "null"
+else:
+val = get_pdf_str(val)
+doc.xref_set_key(info_xref, pdf_key, val)
+doc.init_doc()
+return
+def set_oc(doc: 'Document', xref: int, oc: int) -> None:
+"""Attach optional content object to image or form xobject.
+Args:
+xref: (int) xref number of an image or form xobject
+oc: (int) xref number of an OCG or OCMD
+"""
+if doc.is_closed or doc.is_encrypted:
+raise ValueError("document close or encrypted")
+t, name = doc.xref_get_key(xref, "Subtype")
+if t != "name" or name not in ("/Image", "/Form"):
+raise ValueError("bad object type at xref %i" % xref)
+if oc > 0:
+t, name = doc.xref_get_key(oc, "Type")
+if t != "name" or name not in ("/OCG", "/OCMD"):
+raise ValueError("bad object type at xref %i" % oc)
+if oc == 0 and "OC" in doc.xref_get_keys(xref):
+doc.xref_set_key(xref, "OC", "null")
+return None
+doc.xref_set_key(xref, "OC", "%i 0 R" % oc)
+return None
+def set_ocmd(
+doc: 'Document',
+xref: int = 0,
+ocgs: typing.Union[list, None] = None,
+policy: OptStr = None,
+ve: typing.Union[list, None] = None,
+) -> int:
+"""Create or update an OCMD object in a PDF document.
+Args:
+xref: (int) 0 for creating a new object, otherwise update existing one.
+ocgs: (list) OCG xref numbers, which shall be subject to 'policy'.
+policy: one of 'AllOn', 'AllOff', 'AnyOn', 'AnyOff' (any casing).
+ve: (list) visibility expression. Use instead of 'ocgs' with 'policy'.
+Returns:
+Xref of the created or updated OCMD.
+"""
+all_ocgs = set(doc.get_ocgs().keys())
+def ve_maker(ve):
+if type(ve) not in (list, tuple) or len(ve) < 2:
+raise ValueError("bad 've' format: %s" % ve)
+if ve[0].lower() not in ("and", "or", "not"):
+raise ValueError("bad operand: %s" % ve[0])
+if ve[0].lower() == "not" and len(ve) != 2:
+raise ValueError("bad 've' format: %s" % ve)
+item = "[/%s" % ve[0].title()
+for x in ve[1:]:
+if type(x) is int:
+if x not in all_ocgs:
+raise ValueError("bad OCG %i" % x)
+item += " %i 0 R" % x
+else:
+item += " %s" % ve_maker(x)
+item += "]"
+return item
+text = "<</Type/OCMD"
+if ocgs and type(ocgs) in (list, tuple):  # some OCGs are provided
+s = set(ocgs).difference(all_ocgs)  # contains illegal xrefs
+if s != set():
+msg = "bad OCGs: %s" % s
+raise ValueError(msg)
+text += "/OCGs[" + " ".join(map(lambda x: "%i 0 R" % x, ocgs)) + "]"
+if policy:
+policy = str(policy).lower()
+pols = {
+"anyon": "AnyOn",
+"allon": "AllOn",
+"anyoff": "AnyOff",
+"alloff": "AllOff",
+}
+if policy not in ("anyon", "allon", "anyoff", "alloff"):
+raise ValueError("bad policy: %s" % policy)
+text += "/P/%s" % pols[policy]
+if ve:
+text += "/VE%s" % ve_maker(ve)
+text += ">>"
+# make new object or replace old OCMD (check type first)
+if xref == 0:
+xref = doc.get_new_xref()
+elif "/Type/OCMD" not in doc.xref_object(xref, compressed=True):
+raise ValueError("bad xref or not an OCMD")
+doc.update_object(xref, text)
+return xref
 def set_pagelayout(self, pagelayout: str):
 """Set the PDF PageLayout value."""
 valid = ("SinglePage", "OneColumn", "TwoColumnLeft", "TwoColumnRight", "TwoPageLeft", "TwoPageRight")
 xref = self.pdf_catalog()
 if xref == 0:
 for v in valid:
 if pagemode.lower() == v.lower():
 self.xref_set_key(xref, "PageMode", f"/{v}")
 return True
 raise ValueError("bad PageMode value")
+def set_page_labels(doc, labels):
+"""Add / replace page label definitions in PDF document.
+Args:
+doc: PDF document (resp. 'self').
+labels: list of label dictionaries like:
+{'startpage': int, 'prefix': str, 'style': str, 'firstpagenum': int},
+as returned by get_page_labels().
+"""
+# William Chapman, 2021-01-06
+def create_label_str(label):
+"""Convert Python label dict to corresponding PDF rule string.
+Args:
+label: (dict) build rule for the label.
+Returns:
+PDF label rule string wrapped in "<<", ">>".
+"""
+s = "%i<<" % label["startpage"]
+if label.get("prefix", "") != "":
+s += "/P(%s)" % label["prefix"]
+if label.get("style", "") != "":
+s += "/S/%s" % label["style"]
+if label.get("firstpagenum", 1) > 1:
+s += "/St %i" % label["firstpagenum"]
+s += ">>"
+return s
+def create_nums(labels):
+"""Return concatenated string of all labels rules.
+Args:
+labels: (list) dictionaries as created by function 'rule_dict'.
+Returns:
+PDF compatible string for page label definitions, ready to be
+enclosed in PDF array 'Nums[...]'.
+"""
+labels.sort(key=lambda x: x["startpage"])
+s = "".join([create_label_str(label) for label in labels])
+return s
+doc._set_page_labels(create_nums(labels))
+def set_toc(
+doc: 'Document',
+toc: list,
+collapse: int = 1,
+) -> int:
+"""Create new outline tree (table of contents, TOC).
+Args:
+toc: (list, tuple) each entry must contain level, title, page and
+optionally top margin on the page. None or '()' remove the TOC.
+collapse: (int) collapses entries beyond this level. Zero or None
+shows all entries unfolded.
+Returns:
+the number of inserted items, or the number of removed items respectively.
+"""
+if doc.is_closed or doc.is_encrypted:
+raise ValueError("document closed or encrypted")
+if not doc.is_pdf:
+raise ValueError("is no PDF")
+if not toc:  # remove all entries
+return len(doc._delToC())
+# validity checks --------------------------------------------------------
+if type(toc) not in (list, tuple):
+raise ValueError("'toc' must be list or tuple")
+toclen = len(toc)
+page_count = doc.page_count
+t0 = toc[0]
+if type(t0) not in (list, tuple):
+raise ValueError("items must be sequences of 3 or 4 items")
+if t0[0] != 1:
+raise ValueError("hierarchy level of item 0 must be 1")
+for i in list(range(toclen - 1)):
+t1 = toc[i]
+t2 = toc[i + 1]
+if not -1 <= t1[2] <= page_count:
+raise ValueError("row %i: page number out of range" % i)
+if (type(t2) not in (list, tuple)) or len(t2) not in (3, 4):
+raise ValueError("bad row %i" % (i + 1))
+if (type(t2[0]) is not int) or t2[0] < 1:
+raise ValueError("bad hierarchy level in row %i" % (i + 1))
+if t2[0] > t1[0] + 1:
+raise ValueError("bad hierarchy level in row %i" % (i + 1))
+# no formal errors in toc --------------------------------------------------
+# --------------------------------------------------------------------------
+# make a list of xref numbers, which we can use for our TOC entries
+# --------------------------------------------------------------------------
+old_xrefs = doc._delToC()  # del old outlines, get their xref numbers
+# prepare table of xrefs for new bookmarks
+old_xrefs = []
+xref = [0] + old_xrefs
+xref[0] = doc._getOLRootNumber()  # entry zero is outline root xref number
+if toclen > len(old_xrefs):  # too few old xrefs?
+for i in range((toclen - len(old_xrefs))):
+xref.append(doc.get_new_xref())  # acquire new ones
+lvltab = {0: 0}  # to store last entry per hierarchy level
+# ------------------------------------------------------------------------------
+# contains new outline objects as strings - first one is the outline root
+# ------------------------------------------------------------------------------
+olitems = [{"count": 0, "first": -1, "last": -1, "xref": xref[0]}]
+# ------------------------------------------------------------------------------
+# build olitems as a list of PDF-like connected dictionaries
+# ------------------------------------------------------------------------------
+for i in range(toclen):
+o = toc[i]
+lvl = o[0]  # level
+title = get_pdf_str(o[1])  # title
+pno = min(doc.page_count - 1, max(0, o[2] - 1))  # page number
+page_xref = doc.page_xref(pno)
+page_height = doc.page_cropbox(pno).height
+top = Point(72, page_height - 36)
+dest_dict = {"to": top, "kind": LINK_GOTO}  # fall back target
+if o[2] < 0:
+dest_dict["kind"] = LINK_NONE
+if len(o) > 3:  # some target is specified
+if type(o[3]) in (int, float):  # convert a number to a point
+dest_dict["to"] = Point(72, page_height - o[3])
+else:  # if something else, make sure we have a dict
+# We make a copy of o[3] to avoid modifying our caller's data.
+dest_dict = o[3].copy() if type(o[3]) is dict else dest_dict
+if "to" not in dest_dict:  # target point not in dict?
+dest_dict["to"] = top  # put default in
+else:  # transform target to PDF coordinates
+page = doc[pno]
+point = Point(dest_dict["to"])
+point.y = page.cropbox.height - point.y
+point = point * page.rotation_matrix
+dest_dict["to"] = (point.x, point.y)
+d = {}
+d["first"] = -1
+d["count"] = 0
+d["last"] = -1
+d["prev"] = -1
+d["next"] = -1
+d["dest"] = utils.getDestStr(page_xref, dest_dict)
+d["top"] = dest_dict["to"]
+d["title"] = title
+d["parent"] = lvltab[lvl - 1]
+d["xref"] = xref[i + 1]
+d["color"] = dest_dict.get("color")
+d["flags"] = dest_dict.get("italic", 0) + 2 * dest_dict.get("bold", 0)
+lvltab[lvl] = i + 1
+parent = olitems[lvltab[lvl - 1]]  # the parent entry
+if (
+dest_dict.get("collapse") or collapse and lvl > collapse
+):  # suppress expansion
+parent["count"] -= 1  # make /Count negative
+else:
+parent["count"] += 1  # positive /Count
+if parent["first"] == -1:
+parent["first"] = i + 1
+parent["last"] = i + 1
+else:
+d["prev"] = parent["last"]
+prev = olitems[parent["last"]]
+prev["next"] = i + 1
+parent["last"] = i + 1
+olitems.append(d)
+# ------------------------------------------------------------------------------
+# now create each outline item as a string and insert it in the PDF
+# ------------------------------------------------------------------------------
+for i, ol in enumerate(olitems):
+txt = "<<"
+if ol["count"] != 0:
+txt += "/Count %i" % ol["count"]
+try:
+txt += ol["dest"]
+except Exception:
+# Verbose in PyMuPDF/tests.
+if g_exceptions_verbose >= 2:   exception_info()
+pass
+try:
+if ol["first"] > -1:
+txt += "/First %i 0 R" % xref[ol["first"]]
+except Exception:
+if g_exceptions_verbose >= 2:   exception_info()
+pass
+try:
+if ol["last"] > -1:
+txt += "/Last %i 0 R" % xref[ol["last"]]
+except Exception:
+if g_exceptions_verbose >= 2:   exception_info()
+pass
+try:
+if ol["next"] > -1:
+txt += "/Next %i 0 R" % xref[ol["next"]]
+except Exception:
+# Verbose in PyMuPDF/tests.
+if g_exceptions_verbose >= 2:   exception_info()
+pass
+try:
+if ol["parent"] > -1:
+txt += "/Parent %i 0 R" % xref[ol["parent"]]
+except Exception:
+# Verbose in PyMuPDF/tests.
+if g_exceptions_verbose >= 2:   exception_info()
+pass
+try:
+if ol["prev"] > -1:
+txt += "/Prev %i 0 R" % xref[ol["prev"]]
+except Exception:
+# Verbose in PyMuPDF/tests.
+if g_exceptions_verbose >= 2:   exception_info()
+pass
+try:
+txt += "/Title" + ol["title"]
+except Exception:
+# Verbose in PyMuPDF/tests.
+if g_exceptions_verbose >= 2:   exception_info()
+pass
+if ol.get("color") and len(ol["color"]) == 3:
+txt += f"/C[ {_format_g(tuple(ol['color']))}]"
+if ol.get("flags", 0) > 0:
+txt += "/F %i" % ol["flags"]
+if i == 0:  # special: this is the outline root
+txt += "/Type/Outlines"  # so add the /Type entry
+txt += ">>"
+doc.update_object(xref[i], txt)  # insert the PDF object
+doc.init_doc()
+return toclen
+def set_toc_item(
+doc: 'Document',
+idx: int,
+dest_dict: OptDict = None,
+kind: OptInt = None,
+pno: OptInt = None,
+uri: OptStr = None,
+title: OptStr = None,
+to: point_like = None,
+filename: OptStr = None,
+zoom: float = 0,
+) -> None:
+"""Update TOC item by index.
+It allows changing the item's title and link destination.
+Args:
+idx:
+(int) desired index of the TOC list, as created by get_toc.
+dest_dict:
+(dict) destination dictionary as created by get_toc(False).
+Outrules all other parameters. If None, the remaining parameters
+are used to make a dest dictionary.
+kind:
+(int) kind of link (pymupdf.LINK_GOTO, etc.). If None, then only
+the title will be updated. If pymupdf.LINK_NONE, the TOC item will
+be deleted.
+pno:
+(int) page number (1-based like in get_toc). Required if
+pymupdf.LINK_GOTO.
+uri:
+(str) the URL, required if pymupdf.LINK_URI.
+title:
+(str) the new title. No change if None.
+to:
+(point-like) destination on the target page. If omitted, (72, 36)
+will be used as target coordinates.
+filename:
+(str) destination filename, required for pymupdf.LINK_GOTOR and
+pymupdf.LINK_LAUNCH.
+name:
+(str) a destination name for pymupdf.LINK_NAMED.
+zoom:
+(float) a zoom factor for the target location (pymupdf.LINK_GOTO).
+"""
+xref = doc.get_outline_xrefs()[idx]
+page_xref = 0
+if type(dest_dict) is dict:
+if dest_dict["kind"] == LINK_GOTO:
+pno = dest_dict["page"]
+page_xref = doc.page_xref(pno)
+page_height = doc.page_cropbox(pno).height
+to = dest_dict.get('to', Point(72, 36))
+to.y = page_height - to.y
+dest_dict["to"] = to
+action = utils.getDestStr(page_xref, dest_dict)
+if not action.startswith("/A"):
+raise ValueError("bad bookmark dest")
+color = dest_dict.get("color")
+if color:
+color = list(map(float, color))
+if len(color) != 3 or min(color) < 0 or max(color) > 1:
+raise ValueError("bad color value")
+bold = dest_dict.get("bold", False)
+italic = dest_dict.get("italic", False)
+flags = italic + 2 * bold
+collapse = dest_dict.get("collapse")
+return doc._update_toc_item(
+xref,
+action=action[2:],
+title=title,
+color=color,
+flags=flags,
+collapse=collapse,
+)
+if kind == LINK_NONE:  # delete bookmark item
+return doc.del_toc_item(idx)
+if kind is None and title is None:  # treat as no-op
+return None
+if kind is None:  # only update title text
+return doc._update_toc_item(xref, action=None, title=title)
+if kind == LINK_GOTO:
+if pno is None or pno not in range(1, doc.page_count + 1):
+raise ValueError("bad page number")
+page_xref = doc.page_xref(pno - 1)
+page_height = doc.page_cropbox(pno - 1).height
+if to is None:
+to = Point(72, page_height - 36)
+else:
+to = Point(to)
+to.y = page_height - to.y
+ddict = {
+"kind": kind,
+"to": to,
+"uri": uri,
+"page": pno,
+"file": filename,
+"zoom": zoom,
+}
+action = utils.getDestStr(page_xref, ddict)
+if action == "" or not action.startswith("/A"):
+raise ValueError("bad bookmark dest")
+return doc._update_toc_item(xref, action=action[2:], title=title)
 def set_xml_metadata(self, metadata):
 """Store XML document level metadata."""
 if self.is_closed or self.is_encrypted:
 raise ValueError("document closed or encrypted")
 else:
 xml = mupdf.pdf_add_stream( pdf, res, mupdf.PdfObj(), 0)
 mupdf.pdf_dict_put( xml, PDF_NAME('Type'), PDF_NAME('Metadata'))
 mupdf.pdf_dict_put( xml, PDF_NAME('Subtype'), PDF_NAME('XML'))
 mupdf.pdf_dict_put( root, PDF_NAME('Metadata'), xml)
+def subset_fonts(doc: 'Document', verbose: bool = False, fallback: bool = False) -> OptInt:
+"""Build font subsets in a PDF.
+Eligible fonts are potentially replaced by smaller versions. Page text is
+NOT rewritten and thus should retain properties like being hidden or
+controlled by optional content.
+This method by default uses MuPDF's own internal feature to create subset
+fonts. As this is a new function, errors may still occur. In this case,
+please fall back to using the previous version by using "fallback=True".
+Fallback mode requires the external package 'fontTools'.
+Args:
+fallback: use the older deprecated implementation.
+verbose: only used by fallback mode.
+Returns:
+The new MuPDF-based code returns None.  The deprecated fallback
+mode returns 0 if there are no fonts to subset.  Otherwise, it
+returns the decrease in fontsize (the difference in fontsize),
+measured in bytes.
+"""
+# Font binaries: -  "buffer" -> (names, xrefs, (unicodes, glyphs))
+# An embedded font is uniquely defined by its fontbuffer only. It may have
+# multiple names and xrefs.
+# Once the sets of used unicodes and glyphs are known, we compute a
+# smaller version of the buffer user package fontTools.
+if not fallback:  # by default use MuPDF function
+pdf = mupdf.pdf_document_from_fz_document(doc)
+mupdf.pdf_subset_fonts2(pdf, list(range(doc.page_count)))
+return
+font_buffers = {}
+def get_old_widths(xref):
+"""Retrieve old font '/W' and '/DW' values."""
+df = doc.xref_get_key(xref, "DescendantFonts")
+if df[0] != "array":  # only handle xref specifications
+return None, None
+df_xref = int(df[1][1:-1].replace("0 R", ""))
+widths = doc.xref_get_key(df_xref, "W")
+if widths[0] != "array":  # no widths key found
+widths = None
+else:
+widths = widths[1]
+dwidths = doc.xref_get_key(df_xref, "DW")
+if dwidths[0] != "int":
+dwidths = None
+else:
+dwidths = dwidths[1]
+return widths, dwidths
+def set_old_widths(xref, widths, dwidths):
+"""Restore the old '/W' and '/DW' in subsetted font.
+If either parameter is None or evaluates to False, the corresponding
+dictionary key will be set to null.
+"""
+df = doc.xref_get_key(xref, "DescendantFonts")
+if df[0] != "array":  # only handle xref specs
+return None
+df_xref = int(df[1][1:-1].replace("0 R", ""))
+if (type(widths) is not str or not widths) and doc.xref_get_key(df_xref, "W")[
+0
+] != "null":
+doc.xref_set_key(df_xref, "W", "null")
+else:
+doc.xref_set_key(df_xref, "W", widths)
+if (type(dwidths) is not str or not dwidths) and doc.xref_get_key(
+df_xref, "DW"
+)[0] != "null":
+doc.xref_set_key(df_xref, "DW", "null")
+else:
+doc.xref_set_key(df_xref, "DW", dwidths)
+return None
+def set_subset_fontname(new_xref):
+"""Generate a name prefix to tag a font as subset.
+We use a random generator to select 6 upper case ASCII characters.
+The prefixed name must be put in the font xref as the "/BaseFont" value
+and in the FontDescriptor object as the '/FontName' value.
+"""
+# The following generates a prefix like 'ABCDEF+'
+import random
+import string
+prefix = "".join(random.choices(tuple(string.ascii_uppercase), k=6)) + "+"
+font_str = doc.xref_object(new_xref, compressed=True)
+font_str = font_str.replace("/BaseFont/", "/BaseFont/" + prefix)
+df = doc.xref_get_key(new_xref, "DescendantFonts")
+if df[0] == "array":
+df_xref = int(df[1][1:-1].replace("0 R", ""))
+fd = doc.xref_get_key(df_xref, "FontDescriptor")
+if fd[0] == "xref":
+fd_xref = int(fd[1].replace("0 R", ""))
+fd_str = doc.xref_object(fd_xref, compressed=True)
+fd_str = fd_str.replace("/FontName/", "/FontName/" + prefix)
+doc.update_object(fd_xref, fd_str)
+doc.update_object(new_xref, font_str)
+def build_subset(buffer, unc_set, gid_set):
+"""Build font subset using fontTools.
+Args:
+buffer: (bytes) the font given as a binary buffer.
+unc_set: (set) required glyph ids.
+Returns:
+Either None if subsetting is unsuccessful or the subset font buffer.
+"""
+try:
+import fontTools.subset as fts
+except ImportError:
+if g_exceptions_verbose:    exception_info()
+message("This method requires fontTools to be installed.")
+raise
+import tempfile
+with tempfile.TemporaryDirectory() as tmp_dir:
+oldfont_path = f"{tmp_dir}/oldfont.ttf"
+newfont_path = f"{tmp_dir}/newfont.ttf"
+uncfile_path = f"{tmp_dir}/uncfile.txt"
+args = [
+oldfont_path,
+"--retain-gids",
+f"--output-file={newfont_path}",
+"--layout-features=*",
+"--passthrough-tables",
+"--ignore-missing-glyphs",
+"--ignore-missing-unicodes",
+"--symbol-cmap",
+]
+# store glyph ids or unicodes as file
+with open(f"{tmp_dir}/uncfile.txt", "w", encoding='utf8') as unc_file:
+if 0xFFFD in unc_set:  # error unicode exists -> use glyphs
+args.append(f"--gids-file={uncfile_path}")
+gid_set.add(189)
+unc_list = list(gid_set)
+for unc in unc_list:
+unc_file.write("%i\n" % unc)
+else:
+args.append(f"--unicodes-file={uncfile_path}")
+unc_set.add(255)
+unc_list = list(unc_set)
+for unc in unc_list:
+unc_file.write("%04x\n" % unc)
+# store fontbuffer as a file
+with open(oldfont_path, "wb") as fontfile:
+fontfile.write(buffer)
+try:
+os.remove(newfont_path)  # remove old file
+except Exception:
+pass
+try:  # invoke fontTools subsetter
+fts.main(args)
+font = Font(fontfile=newfont_path)
+new_buffer = font.buffer  # subset font binary
+if font.glyph_count == 0:  # intercept empty font
+new_buffer = None
+except Exception:
+exception_info()
+new_buffer = None
+return new_buffer
+def repl_fontnames(doc):
+"""Populate 'font_buffers'.
+For each font candidate, store its xref and the list of names
+by which PDF text may refer to it (there may be multiple).
+"""
+def norm_name(name):
+"""Recreate font name that contains PDF hex codes.
+E.g. #20 -> space, chr(32)
+"""
+while "#" in name:
+p = name.find("#")
+c = int(name[p + 1 : p + 3], 16)
+name = name.replace(name[p : p + 3], chr(c))
+return name
+def get_fontnames(doc, item):
+"""Return a list of fontnames for an item of page.get_fonts().
+There may be multiple names e.g. for Type0 fonts.
+"""
+fontname = item[3]
+names = [fontname]
+fontname = doc.xref_get_key(item[0], "BaseFont")[1][1:]
+fontname = norm_name(fontname)
+if fontname not in names:
+names.append(fontname)
+descendents = doc.xref_get_key(item[0], "DescendantFonts")
+if descendents[0] != "array":
+return names
+descendents = descendents[1][1:-1]
+if descendents.endswith(" 0 R"):
+xref = int(descendents[:-4])
+descendents = doc.xref_object(xref, compressed=True)
+p1 = descendents.find("/BaseFont")
+if p1 >= 0:
+p2 = descendents.find("/", p1 + 1)
+p1 = min(descendents.find("/", p2 + 1), descendents.find(">>", p2 + 1))
+fontname = descendents[p2 + 1 : p1]
+fontname = norm_name(fontname)
+if fontname not in names:
+names.append(fontname)
+return names
+for i in range(doc.page_count):
+for f in doc.get_page_fonts(i, full=True):
+font_xref = f[0]  # font xref
+font_ext = f[1]  # font file extension
+basename = f[3]  # font basename
+if font_ext not in (  # skip if not supported by fontTools
+"otf",
+"ttf",
+"woff",
+"woff2",
+):
+continue
+# skip fonts which already are subsets
+if len(basename) > 6 and basename[6] == "+":
+continue
+extr = doc.extract_font(font_xref)
+fontbuffer = extr[-1]
+names = get_fontnames(doc, f)
+name_set, xref_set, subsets = font_buffers.get(
+fontbuffer, (set(), set(), (set(), set()))
+)
+xref_set.add(font_xref)
+for name in names:
+name_set.add(name)
+font = Font(fontbuffer=fontbuffer)
+name_set.add(font.name)
+del font
+font_buffers[fontbuffer] = (name_set, xref_set, subsets)
+def find_buffer_by_name(name):
+for buffer, (name_set, _, _) in font_buffers.items():
+if name in name_set:
+return buffer
+return None
+# -----------------
+# main function
+# -----------------
+repl_fontnames(doc)  # populate font information
+if not font_buffers:  # nothing found to do
+if verbose:
+message(f'No fonts to subset.')
+return 0
+old_fontsize = 0
+new_fontsize = 0
+for fontbuffer in font_buffers.keys():
+old_fontsize += len(fontbuffer)
+# Scan page text for usage of subsettable fonts
+for page in doc:
+# go through the text and extend set of used glyphs by font
+# we use a modified MuPDF trace device, which delivers us glyph ids.
+for span in page.get_texttrace():
+if type(span) is not dict:  # skip useless information
+continue
+fontname = span["font"][:33]  # fontname for the span
+buffer = find_buffer_by_name(fontname)
+if buffer is None:
+continue
+name_set, xref_set, (set_ucs, set_gid) = font_buffers[buffer]
+for c in span["chars"]:
+set_ucs.add(c[0])  # unicode
+set_gid.add(c[1])  # glyph id
+font_buffers[buffer] = (name_set, xref_set, (set_ucs, set_gid))
+# build the font subsets
+for old_buffer, (name_set, xref_set, subsets) in font_buffers.items():
+new_buffer = build_subset(old_buffer, subsets[0], subsets[1])
+fontname = list(name_set)[0]
+if new_buffer is None or len(new_buffer) >= len(old_buffer):
+# subset was not created or did not get smaller
+if verbose:
+message(f'Cannot subset {fontname!r}.')
+continue
+if verbose:
+message(f"Built subset of font {fontname!r}.")
+val = doc._insert_font(fontbuffer=new_buffer)  # store subset font in PDF
+new_xref = val[0]  # get its xref
+set_subset_fontname(new_xref)  # tag fontname as subset font
+font_str = doc.xref_object(  # get its object definition
+new_xref,
+compressed=True,
+)
+# walk through the original font xrefs and replace each by the subset def
+for font_xref in xref_set:
+# we need the original '/W' and '/DW' width values
+width_table, def_width = get_old_widths(font_xref)
+# ... and replace original font definition at xref with it
+doc.update_object(font_xref, font_str)
+# now copy over old '/W' and '/DW' values
+if width_table or def_width:
+set_old_widths(font_xref, width_table, def_width)
+# 'new_xref' remains unused in the PDF and must be removed
+# by garbage collection.
+new_fontsize += len(new_buffer)
+return old_fontsize - new_fontsize
 def switch_layer(self, config, as_default=0):
 """Activate an OC layer."""
 pdf = _as_pdf_document(self)
 cfgs = mupdf.pdf_dict_getl(
 preserve_metadata=preserve_metadata,
 use_objstms=use_objstms,
 compression_effort=compression_effort,
 )
 return bio.getvalue()
+def tobytes(self, *args, **kwargs):
+return self.write(*args, **kwargs)
 @property
 def xref(self):
 """PDF xref number of page."""
 CheckParent(self)
 return self.parent.page_xref(self.number)
+def xref_copy(doc: 'Document', source: int, target: int, *, keep: list = None) -> None:
+"""Copy a PDF dictionary object to another one given their xref numbers.
+Args:
+doc: PDF document object
+source: source xref number
+target: target xref number, the xref must already exist
+keep: an optional list of 1st level keys in target that should not be
+removed before copying.
+Notes:
+This works similar to the copy() method of dictionaries in Python. The
+source may be a stream object.
+"""
+if doc.xref_is_stream(source):
+# read new xref stream, maintaining compression
+stream = doc.xref_stream_raw(source)
+doc.update_stream(
+target,
+stream,
+compress=False,  # keeps source compression
+new=True,  # in case target is no stream
+)
+# empty the target completely, observe exceptions
+if keep is None:
+keep = []
+for key in doc.xref_get_keys(target):
+if key in keep:
+continue
+doc.xref_set_key(target, key, "null")
+# copy over all source dict items
+for key in doc.xref_get_keys(source):
+item = doc.xref_get_key(source, key)
+doc.xref_set_key(target, key, item[1])
 def xref_get_key(self, xref, key):
 """Get PDF dict key value of object at 'xref'."""
 pdf = _as_pdf_document(self)
 xreflen = mupdf.pdf_xref_len(pdf)
 if not _INRANGE(xref, 1, xreflen-1) and xref != -1:
 return xref
 __slots__ = ('this', 'page_count2', 'this_is_pdf', '__dict__')
 outline = property(lambda self: self._outline)
-tobytes = write
 is_stream = xref_is_stream
 open = Document
 for xref in annot_xrefs:
 annot = self.load_annot(xref)
 annot._yielded=True
 yield annot
+def apply_redactions(
+page: 'Page',
+images: int = 2,
+graphics: int = 1,
+text: int = 0,
+) -> bool:
+"""Apply the redaction annotations of the page.
+Args:
+page: the PDF page.
+images:
+0 - ignore images
+1 - remove all overlapping images
+2 - blank out overlapping image parts
+3 - remove image unless invisible
+graphics:
+0 - ignore graphics
+1 - remove graphics if contained in rectangle
+2 - remove all overlapping graphics
+text:
+0 - remove text
+1 - ignore text
+"""
+def center_rect(annot_rect, new_text, font, fsize):
+"""Calculate minimal sub-rectangle for the overlay text.
+Notes:
+Because 'insert_textbox' supports no vertical text centering,
+we calculate an approximate number of lines here and return a
+sub-rect with smaller height, which should still be sufficient.
+Args:
+annot_rect: the annotation rectangle
+new_text: the text to insert.
+font: the fontname. Must be one of the CJK or Base-14 set, else
+the rectangle is returned unchanged.
+fsize: the fontsize
+Returns:
+A rectangle to use instead of the annot rectangle.
+"""
+if not new_text or annot_rect.width <= EPSILON:
+return annot_rect
+try:
+text_width = get_text_length(new_text, font, fsize)
+except (ValueError, mupdf.FzErrorBase):  # unsupported font
+if g_exceptions_verbose:
+exception_info()
+return annot_rect
+line_height = fsize * 1.2
+limit = annot_rect.width
+h = math.ceil(text_width / limit) * line_height  # estimate rect height
+if h >= annot_rect.height:
+return annot_rect
+r = annot_rect
+y = (annot_rect.tl.y + annot_rect.bl.y - h) * 0.5
+r.y0 = y
+return r
+CheckParent(page)
+doc = page.parent
+if doc.is_encrypted or doc.is_closed:
+raise ValueError("document closed or encrypted")
+if not doc.is_pdf:
+raise ValueError("is no PDF")
+redact_annots = []  # storage of annot values
+for annot in page.annots(
+types=(mupdf.PDF_ANNOT_REDACT,)  # pylint: disable=no-member
+):
+# loop redactions
+redact_annots.append(annot._get_redact_values())  # save annot values
+if redact_annots == []:  # any redactions on this page?
+return False  # no redactions
+rc = page._apply_redactions(text, images, graphics)  # call MuPDF
+if not rc:  # should not happen really
+raise ValueError("Error applying redactions.")
+# now write replacement text in old redact rectangles
+shape = page.new_shape()
+for redact in redact_annots:
+annot_rect = redact["rect"]
+fill = redact["fill"]
+if fill:
+shape.draw_rect(annot_rect)  # colorize the rect background
+shape.finish(fill=fill, color=fill)
+if "text" in redact.keys():  # if we also have text
+new_text = redact["text"]
+align = redact.get("align", 0)
+fname = redact["fontname"]
+fsize = redact["fontsize"]
+color = redact["text_color"]
+# try finding vertical centered sub-rect
+trect = center_rect(annot_rect, new_text, fname, fsize)
+rc = -1
+while rc < 0 and fsize >= 4:  # while not enough room
+# (re-) try insertion
+rc = shape.insert_textbox(
+trect,
+new_text,
+fontname=fname,
+fontsize=fsize,
+color=color,
+align=align,
+)
+fsize -= 0.5  # reduce font if unsuccessful
+shape.commit()  # append new contents object
+return True
 def recolor(self, components=1):
 """Convert colorspaces of objects on the page.
 Valid values are 1, 3 and 4.
 """
 val.parent = weakref.proxy(self) # owning page object
 val.parent._annot_refs[id(val)] = val
 annot._erase()
 return val
+def delete_image(page: 'Page', xref: int):
+"""Delete the image referred to by xef.
+Actually replaces by a small transparent Pixmap using method Page.replace_image.
+Args:
+xref: xref of the image to delete.
+"""
+# make a small 100% transparent pixmap (of just any dimension)
+pix = Pixmap(csGRAY, (0, 0, 1, 1), 1)
+pix.clear_with()  # clear all samples bytes to 0x00
+page.replace_image(xref, pixmap=pix)
 def delete_link(self, linkdict):
 """Delete a Link."""
 CheckParent(self)
 if not isinstance( linkdict, dict):
 return  # have no dictionary
 mupdf.pdf_dict_put( page.obj(), PDF_NAME('Annots'), annots)
 JM_refresh_links( page)
 return finished()
+def delete_widget(page: 'Page', widget: Widget) -> Widget:
+"""Delete widget from page and return the next one."""
+CheckParent(page)
+annot = getattr(widget, "_annot", None)
+if annot is None:
+raise ValueError("bad type: widget")
+nextwidget = widget.next
+page.delete_annot(annot)
+widget._annot.parent = None
+keylist = list(widget.__dict__.keys())
+for key in keylist:
+del widget.__dict__[key]
+return nextwidget
 @property
 def derotation_matrix(self) -> Matrix:
 """Reflects page de-rotation."""
 if g_use_extra:
 return Matrix(extra.Page_derotate_matrix( self.this))
 pdfpage = self._pdf_page(required=False)
 if not pdfpage.m_internal:
 return Matrix(mupdf.FzRect(mupdf.FzRect.UNIT))
 return Matrix(JM_derotate_page_matrix(pdfpage))
+def draw_bezier(
+page: 'Page',
+p1: point_like,
+p2: point_like,
+p3: point_like,
+p4: point_like,
+color: OptSeq = (0,),
+fill: OptSeq = None,
+dashes: OptStr = None,
+width: float = 1,
+morph: OptStr = None,
+closePath: bool = False,
+lineCap: int = 0,
+lineJoin: int = 0,
+overlay: bool = True,
+stroke_opacity: float = 1,
+fill_opacity: float = 1,
+oc: int = 0,
+) -> Point:
+"""Draw a general cubic Bezier curve from p1 to p4 using control points p2 and p3."""
+img = page.new_shape()
+Q = img.draw_bezier(Point(p1), Point(p2), Point(p3), Point(p4))
+img.finish(
+color=color,
+fill=fill,
+dashes=dashes,
+width=width,
+lineCap=lineCap,
+lineJoin=lineJoin,
+morph=morph,
+closePath=closePath,
+stroke_opacity=stroke_opacity,
+fill_opacity=fill_opacity,
+oc=oc,
+)
+img.commit(overlay)
+return Q
+def draw_circle(
+page: 'Page',
+center: point_like,
+radius: float,
+color: OptSeq = (0,),
+fill: OptSeq = None,
+morph: OptSeq = None,
+dashes: OptStr = None,
+width: float = 1,
+lineCap: int = 0,
+lineJoin: int = 0,
+overlay: bool = True,
+stroke_opacity: float = 1,
+fill_opacity: float = 1,
+oc: int = 0,
+) -> Point:
+"""Draw a circle given its center and radius."""
+img = page.new_shape()
+Q = img.draw_circle(Point(center), radius)
+img.finish(
+color=color,
+fill=fill,
+dashes=dashes,
+width=width,
+lineCap=lineCap,
+lineJoin=lineJoin,
+morph=morph,
+stroke_opacity=stroke_opacity,
+fill_opacity=fill_opacity,
+oc=oc,
+)
+img.commit(overlay)
+return Q
+def draw_curve(
+page: 'Page',
+p1: point_like,
+p2: point_like,
+p3: point_like,
+color: OptSeq = (0,),
+fill: OptSeq = None,
+dashes: OptStr = None,
+width: float = 1,
+morph: OptSeq = None,
+closePath: bool = False,
+lineCap: int = 0,
+lineJoin: int = 0,
+overlay: bool = True,
+stroke_opacity: float = 1,
+fill_opacity: float = 1,
+oc: int = 0,
+) -> Point:
+"""Draw a special Bezier curve from p1 to p3, generating control points on lines p1 to p2 and p2 to p3."""
+img = page.new_shape()
+Q = img.draw_curve(Point(p1), Point(p2), Point(p3))
+img.finish(
+color=color,
+fill=fill,
+dashes=dashes,
+width=width,
+lineCap=lineCap,
+lineJoin=lineJoin,
+morph=morph,
+closePath=closePath,
+stroke_opacity=stroke_opacity,
+fill_opacity=fill_opacity,
+oc=oc,
+)
+img.commit(overlay)
+return Q
+def draw_line(
+page: 'Page',
+p1: point_like,
+p2: point_like,
+color: OptSeq = (0,),
+dashes: OptStr = None,
+width: float = 1,
+lineCap: int = 0,
+lineJoin: int = 0,
+overlay: bool = True,
+morph: OptSeq = None,
+stroke_opacity: float = 1,
+fill_opacity: float = 1,
+oc=0,
+) -> Point:
+"""Draw a line from point p1 to point p2."""
+img = page.new_shape()
+p = img.draw_line(Point(p1), Point(p2))
+img.finish(
+color=color,
+dashes=dashes,
+width=width,
+closePath=False,
+lineCap=lineCap,
+lineJoin=lineJoin,
+morph=morph,
+stroke_opacity=stroke_opacity,
+fill_opacity=fill_opacity,
+oc=oc,
+)
+img.commit(overlay)
+return p
+def draw_oval(
+page: 'Page',
+rect: typing.Union[rect_like, quad_like],
+color: OptSeq = (0,),
+fill: OptSeq = None,
+dashes: OptStr = None,
+morph: OptSeq = None,
+width: float = 1,
+lineCap: int = 0,
+lineJoin: int = 0,
+overlay: bool = True,
+stroke_opacity: float = 1,
+fill_opacity: float = 1,
+oc: int = 0,
+) -> Point:
+"""Draw an oval given its containing rectangle or quad."""
+img = page.new_shape()
+Q = img.draw_oval(rect)
+img.finish(
+color=color,
+fill=fill,
+dashes=dashes,
+width=width,
+lineCap=lineCap,
+lineJoin=lineJoin,
+morph=morph,
+stroke_opacity=stroke_opacity,
+fill_opacity=fill_opacity,
+oc=oc,
+)
+img.commit(overlay)
+return Q
+def draw_polyline(
+page: 'Page',
+points: list,
+color: OptSeq = (0,),
+fill: OptSeq = None,
+dashes: OptStr = None,
+width: float = 1,
+morph: OptSeq = None,
+lineCap: int = 0,
+lineJoin: int = 0,
+overlay: bool = True,
+closePath: bool = False,
+stroke_opacity: float = 1,
+fill_opacity: float = 1,
+oc: int = 0,
+) -> Point:
+"""Draw multiple connected line segments."""
+img = page.new_shape()
+Q = img.draw_polyline(points)
+img.finish(
+color=color,
+fill=fill,
+dashes=dashes,
+width=width,
+lineCap=lineCap,
+lineJoin=lineJoin,
+morph=morph,
+closePath=closePath,
+stroke_opacity=stroke_opacity,
+fill_opacity=fill_opacity,
+oc=oc,
+)
+img.commit(overlay)
+return Q
+def draw_quad(
+page: 'Page',
+quad: quad_like,
+color: OptSeq = (0,),
+fill: OptSeq = None,
+dashes: OptStr = None,
+width: float = 1,
+lineCap: int = 0,
+lineJoin: int = 0,
+morph: OptSeq = None,
+overlay: bool = True,
+stroke_opacity: float = 1,
+fill_opacity: float = 1,
+oc: int = 0,
+) -> Point:
+"""Draw a quadrilateral."""
+img = page.new_shape()
+Q = img.draw_quad(Quad(quad))
+img.finish(
+color=color,
+fill=fill,
+dashes=dashes,
+width=width,
+lineCap=lineCap,
+lineJoin=lineJoin,
+morph=morph,
+stroke_opacity=stroke_opacity,
+fill_opacity=fill_opacity,
+oc=oc,
+)
+img.commit(overlay)
+return Q
+def draw_rect(
+page: 'Page',
+rect: rect_like,
+color: OptSeq = (0,),
+fill: OptSeq = None,
+dashes: OptStr = None,
+width: float = 1,
+lineCap: int = 0,
+lineJoin: int = 0,
+morph: OptSeq = None,
+overlay: bool = True,
+stroke_opacity: float = 1,
+fill_opacity: float = 1,
+oc: int = 0,
+radius=None,
+) -> Point:
+'''
+Draw a rectangle. See Shape class method for details.
+'''
+img = page.new_shape()
+Q = img.draw_rect(Rect(rect), radius=radius)
+img.finish(
+color=color,
+fill=fill,
+dashes=dashes,
+width=width,
+lineCap=lineCap,
+lineJoin=lineJoin,
+morph=morph,
+stroke_opacity=stroke_opacity,
+fill_opacity=fill_opacity,
+oc=oc,
+)
+img.commit(overlay)
+return Q
+def draw_sector(
+page: 'Page',
+center: point_like,
+point: point_like,
+beta: float,
+color: OptSeq = (0,),
+fill: OptSeq = None,
+dashes: OptStr = None,
+fullSector: bool = True,
+morph: OptSeq = None,
+width: float = 1,
+closePath: bool = False,
+lineCap: int = 0,
+lineJoin: int = 0,
+overlay: bool = True,
+stroke_opacity: float = 1,
+fill_opacity: float = 1,
+oc: int = 0,
+) -> Point:
+"""Draw a circle sector given circle center, one arc end point and the angle of the arc.
+Parameters:
+center -- center of circle
+point -- arc end point
+beta -- angle of arc (degrees)
+fullSector -- connect arc ends with center
+"""
+img = page.new_shape()
+Q = img.draw_sector(Point(center), Point(point), beta, fullSector=fullSector)
+img.finish(
+color=color,
+fill=fill,
+dashes=dashes,
+width=width,
+lineCap=lineCap,
+lineJoin=lineJoin,
+morph=morph,
+closePath=closePath,
+stroke_opacity=stroke_opacity,
+fill_opacity=fill_opacity,
+oc=oc,
+)
+img.commit(overlay)
+return Q
+def draw_squiggle(
+page: 'Page',
+p1: point_like,
+p2: point_like,
+breadth: float = 2,
+color: OptSeq = (0,),
+dashes: OptStr = None,
+width: float = 1,
+lineCap: int = 0,
+lineJoin: int = 0,
+overlay: bool = True,
+morph: OptSeq = None,
+stroke_opacity: float = 1,
+fill_opacity: float = 1,
+oc: int = 0,
+) -> Point:
+"""Draw a squiggly line from point p1 to point p2."""
+img = page.new_shape()
+p = img.draw_squiggle(Point(p1), Point(p2), breadth=breadth)
+img.finish(
+color=color,
+dashes=dashes,
+width=width,
+closePath=False,
+lineCap=lineCap,
+lineJoin=lineJoin,
+morph=morph,
+stroke_opacity=stroke_opacity,
+fill_opacity=fill_opacity,
+oc=oc,
+)
+img.commit(overlay)
+return p
+def draw_zigzag(
+page: 'Page',
+p1: point_like,
+p2: point_like,
+breadth: float = 2,
+color: OptSeq = (0,),
+dashes: OptStr = None,
+width: float = 1,
+lineCap: int = 0,
+lineJoin: int = 0,
+overlay: bool = True,
+morph: OptSeq = None,
+stroke_opacity: float = 1,
+fill_opacity: float = 1,
+oc: int = 0,
+) -> Point:
+"""Draw a zigzag line from point p1 to point p2."""
+img = page.new_shape()
+p = img.draw_zigzag(Point(p1), Point(p2), breadth=breadth)
+img.finish(
+color=color,
+dashes=dashes,
+width=width,
+closePath=False,
+lineCap=lineCap,
+lineJoin=lineJoin,
+morph=morph,
+stroke_opacity=stroke_opacity,
+fill_opacity=fill_opacity,
+oc=oc,
+)
+img.commit(overlay)
+return p
 def extend_textpage(self, tpage, flags=0, matrix=None):
 page = self.this
 tp = tpage.this
 assert isinstance( tp, mupdf.FzStextPage)
 paths.append(npath)
 val = None
 return paths
+def get_image_info(
+page: 'Page',
+hashes: bool = False,
+xrefs: bool = False
+) -> list:
+"""Extract image information only from a pymupdf.TextPage.
+Args:
+hashes: (bool) include MD5 hash for each image.
+xrefs: (bool) try to find the xref for each image. Sets hashes to true.
+"""
+doc = page.parent
+if xrefs and doc.is_pdf:
+hashes = True
+if not doc.is_pdf:
+xrefs = False
+imginfo = getattr(page, "_image_info", None)
+if imginfo and not xrefs:
+return imginfo
+if not imginfo:
+tp = page.get_textpage(flags=TEXT_PRESERVE_IMAGES)
+imginfo = tp.extractIMGINFO(hashes=hashes)
+del tp
+if hashes:
+page._image_info = imginfo
+if not xrefs or not doc.is_pdf:
+return imginfo
+imglist = page.get_images()
+digests = {}
+for item in imglist:
+xref = item[0]
+pix = Pixmap(doc, xref)
+digests[pix.digest] = xref
+del pix
+for i in range(len(imginfo)):
+item = imginfo[i]
+xref = digests.get(item["digest"], 0)
+item["xref"] = xref
+imginfo[i] = item
+return imginfo
+def get_image_rects(page: 'Page', name, transform=False) -> list:
+"""Return list of image positions on a page.
+Args:
+name: (str, list, int) image identification. May be reference name, an
+item of the page's image list or an xref.
+transform: (bool) whether to also return the transformation matrix.
+Returns:
+A list of pymupdf.Rect objects or tuples of (pymupdf.Rect, pymupdf.Matrix)
+for all image locations on the page.
+"""
+if type(name) in (list, tuple):
+xref = name[0]
+elif type(name) is int:
+xref = name
+else:
+imglist = [i for i in page.get_images() if i[7] == name]
+if imglist == []:
+raise ValueError("bad image name")
+elif len(imglist) != 1:
+raise ValueError("multiple image names found")
+xref = imglist[0][0]
+pix = Pixmap(page.parent, xref)  # make pixmap of the image to compute MD5
+digest = pix.digest
+del pix
+infos = page.get_image_info(hashes=True)
+if not transform:
+bboxes = [Rect(im["bbox"]) for im in infos if im["digest"] == digest]
+else:
+bboxes = [
+(Rect(im["bbox"]), Matrix(im["transform"]))
+for im in infos
+if im["digest"] == digest
+]
+return bboxes
+def get_label(page):
+"""Return the label for this PDF page.
+Args:
+page: page object.
+Returns:
+The label (str) of the page. Errors return an empty string.
+"""
+# Jorj McKie, 2021-01-06
+labels = page.parent._get_page_labels()
+if not labels:
+return ""
+labels.sort()
+return utils.get_label_pno(page.number, labels)
+def get_links(page: 'Page') -> list:
+"""Create a list of all links contained in a PDF page.
+Notes:
+see PyMuPDF ducmentation for details.
+"""
+CheckParent(page)
+ln = page.first_link
+links = []
+while ln:
+nl = utils.getLinkDict(ln, page.parent)
+links.append(nl)
+ln = ln.next
+if links != [] and page.parent.is_pdf:
+linkxrefs = [x for x in
+#page.annot_xrefs()
+JM_get_annot_xref_list2(page)
+if x[1] == mupdf.PDF_ANNOT_LINK  # pylint: disable=no-member
+]
+if len(linkxrefs) == len(links):
+for i in range(len(linkxrefs)):
+links[i]["xref"] = linkxrefs[i][0]
+links[i]["id"] = linkxrefs[i][2]
+return links
+def get_pixmap(
+page: 'Page',
+*,
+matrix: matrix_like=Identity,
+dpi=None,
+colorspace: Colorspace=None,
+clip: rect_like=None,
+alpha: bool=False,
+annots: bool=True,
+) -> 'Pixmap':
+"""Create pixmap of page.
+Keyword args:
+matrix: Matrix for transformation (default: Identity).
+dpi: desired dots per inch. If given, matrix is ignored.
+colorspace: (str/Colorspace) cmyk, rgb, gray - case ignored, default csRGB.
+clip: (irect-like) restrict rendering to this area.
+alpha: (bool) whether to include alpha channel
+annots: (bool) whether to also render annotations
+"""
+if colorspace is None:
+colorspace = csRGB
+if dpi:
+zoom = dpi / 72
+matrix = Matrix(zoom, zoom)
+if type(colorspace) is str:
+if colorspace.upper() == "GRAY":
+colorspace = csGRAY
+elif colorspace.upper() == "CMYK":
+colorspace = csCMYK
+else:
+colorspace = csRGB
+if colorspace.n not in (1, 3, 4):
+raise ValueError("unsupported colorspace")
+dl = page.get_displaylist(annots=annots)
+pix = dl.get_pixmap(matrix=matrix, colorspace=colorspace, alpha=alpha, clip=clip)
+dl = None
+if dpi:
+pix.set_dpi(dpi, dpi)
+return pix
 def remove_rotation(self):
 """Set page rotation to 0 while maintaining visual appearance."""
 rot = self.rotation  # normalized rotation value
 if rot == 0:
 return  Identity # nothing to do
 rc = tp.extractTextbox(rect)
 if textpage is None:
 del tp
 return rc
+def get_text(self, *args, **kwargs):
+return utils.get_text(self, *args, **kwargs)
+def get_text_blocks(self, *args, **kwargs):
+return utils.get_text_blocks(self, *args, **kwargs)
+def get_text_selection(self, *args, **kwargs):
+return utils.get_text_selection(self, *args, **kwargs)
+def get_text_words(self, *args, **kwargs):
+return utils.get_text_words(self, *args, **kwargs)
+def get_textpage_ocr(self, *args, **kwargs):
+return utils.get_textpage_ocr(self, *args, **kwargs)
 def get_textpage(self, clip: rect_like = None, flags: int = 0, matrix=None) -> "TextPage":
 CheckParent(self)
 if matrix is None:
 matrix = Matrix(1, 1)
 old_rotation = self.rotation
 return xref               # we are done
 # need to create document font info
 doc.get_char_widths(xref, fontdict=fontdict)
 return xref
+def insert_htmlbox(
+page,
+rect,
+text,
+*,
+css=None,
+scale_low=0,
+archive=None,
+rotate=0,
+oc=0,
+opacity=1,
+overlay=True,
+_scale_word_width=True,
+_verbose=False,
+) -> tuple:
+"""Insert text with optional HTML tags and stylings into a rectangle.
+Args:
+rect: (rect-like) rectangle into which the text should be placed.
+text: (str) text with optional HTML tags and stylings.
+css: (str) CSS styling commands.
+scale_low: (float) force-fit content by scaling it down. Must be in
+range [0, 1]. If 1, no scaling will take place. If 0, arbitrary
+down-scaling is acceptable. A value of 0.1 would mean that content
+may be scaled down by at most 90%.
+archive: Archive object pointing to locations of used fonts or images
+rotate: (int) rotate the text in the box by a multiple of 90 degrees.
+oc: (int) the xref of an OCG / OCMD (Optional Content).
+opacity: (float) set opacity of inserted content.
+overlay: (bool) put text on top of page content.
+_scale_word_width: internal, for testing only.
+_verbose: internal, for testing only.
+Returns:
+A tuple of floats (spare_height, scale).
+spare_height:
+The height of the remaining space in <rect> below the
+text, or -1 if we failed to fit.
+scale:
+The scaling required; `0 < scale <= 1`.
+Will be less than `scale_low` if we failed to fit.
+"""
+# normalize rotation angle
+if not rotate % 90 == 0:
+raise ValueError("bad rotation angle")
+while rotate < 0:
+rotate += 360
+while rotate >= 360:
+rotate -= 360
+if not 0 <= scale_low <= 1:
+raise ValueError("'scale_low' must be in [0, 1]")
+if css is None:
+css = ""
+rect = Rect(rect)
+if rotate in (90, 270):
+temp_rect = Rect(0, 0, rect.height, rect.width)
+else:
+temp_rect = Rect(0, 0, rect.width, rect.height)
+# use a small border by default
+mycss = "body {margin:1px;}" + css  # append user CSS
+# either make a story, or accept a given one
+if isinstance(text, str):  # if a string, convert to a Story
+story = Story(html=text, user_css=mycss, archive=archive)
+elif isinstance(text, Story):
+story = text
+else:
+raise ValueError("'text' must be a string or a Story")
+# ----------------------------------------------------------------
+# Find a scaling factor that lets our story fit in. Instead of scaling
+# the text smaller, we instead look at how much bigger the rect needs
+# to be to fit the text, then reverse the scaling to get how much we
+# need to scale down the text.
+# ----------------------------------------------------------------
+rect_scale_max = None if scale_low == 0 else 1 / scale_low
+fit = story.fit_scale(
+temp_rect,
+scale_min=1,
+scale_max=rect_scale_max,
+flags=mupdf.FZ_PLACE_STORY_FLAG_NO_OVERFLOW if _scale_word_width else 0,
+verbose=_verbose,
+)
+if not fit.big_enough:  # there was no fit
+scale = 1 / fit.parameter
+return (-1, scale)
+# fit.filled is a tuple; we convert it in place to a Rect for
+# convenience. (fit.rect is already a Rect.)
+fit.filled = Rect(fit.filled)
+assert (fit.rect.x0, fit.rect.y0) == (0, 0)
+assert (fit.filled.x0, fit.filled.y0) == (0, 0)
+scale = 1 / fit.parameter
+assert scale >= scale_low, f'{scale_low=} {scale=}'
+spare_height = max((fit.rect.y1 - fit.filled.y1) * scale, 0)
+def rect_function(*args):
+return fit.rect, fit.rect, None
+# draw story on temp PDF page
+doc = story.write_with_links(rect_function)
+# Insert opacity if requested.
+# For this, we prepend a command to the /Contents.
+if 0 <= opacity < 1:
+tpage = doc[0]  # load page
+# generate /ExtGstate for the page
+alp0 = tpage._set_opacity(CA=opacity, ca=opacity)
+s = f"/{alp0} gs\n"  # generate graphic state command
+TOOLS._insert_contents(tpage, s.encode(), 0)
+# put result in target page
+page.show_pdf_page(rect, doc, 0, rotate=rotate, oc=oc, overlay=overlay)
+# -------------------------------------------------------------------------
+# re-insert links in target rect (show_pdf_page cannot copy annotations)
+# -------------------------------------------------------------------------
+# scaled center point of fit.rect
+mp1 = (fit.rect.tl + fit.rect.br) / 2 * scale
+# center point of target rect
+mp2 = (rect.tl + rect.br) / 2
+# compute link positioning matrix:
+# - move center of scaled-down fit.rect to (0,0)
+# - rotate
+# - move (0,0) to center of target rect
+mat = (
+Matrix(scale, 0, 0, scale, -mp1.x, -mp1.y)
+* Matrix(-rotate)
+* Matrix(1, 0, 0, 1, mp2.x, mp2.y)
+)
+# copy over links
+for link in doc[0].get_links():
+link["from"] *= mat
+page.insert_link(link)
+return spare_height, scale
+def insert_image(
+page,
+rect,
+*,
+alpha=-1,
+filename=None,
+height=0,
+keep_proportion=True,
+mask=None,
+oc=0,
+overlay=True,
+pixmap=None,
+rotate=0,
+stream=None,
+width=0,
+xref=0,
+):
+"""Insert an image for display in a rectangle.
+Args:
+rect: (rect_like) position of image on the page.
+alpha: (int, optional) set to 0 if image has no transparency.
+filename: (str, Path, file object) image filename.
+height: (int)
+keep_proportion: (bool) keep width / height ratio (default).
+mask: (bytes, optional) image consisting of alpha values to use.
+oc: (int) xref of OCG or OCMD to declare as Optional Content.
+overlay: (bool) put in foreground (default) or background.
+pixmap: (pymupdf.Pixmap) use this as image.
+rotate: (int) rotate by 0, 90, 180 or 270 degrees.
+stream: (bytes) use this as image.
+width: (int)
+xref: (int) use this as image.
+'page' and 'rect' are positional, all other parameters are keywords.
+If 'xref' is given, that image is used. Other input options are ignored.
+Else, exactly one of pixmap, stream or filename must be given.
+'alpha=0' for non-transparent images improves performance significantly.
+Affects stream and filename only.
+Optimum transparent insertions are possible by using filename / stream in
+conjunction with a 'mask' image of alpha values.
+Returns:
+xref (int) of inserted image. Re-use as argument for multiple insertions.
+"""
+CheckParent(page)
+doc = page.parent
+if not doc.is_pdf:
+raise ValueError("is no PDF")
+if xref == 0 and (bool(filename) + bool(stream) + bool(pixmap) != 1):
+raise ValueError("xref=0 needs exactly one of filename, pixmap, stream")
+if filename:
+if type(filename) is str:
+pass
+elif hasattr(filename, "absolute"):
+filename = str(filename)
+elif hasattr(filename, "name"):
+filename = filename.name
+else:
+raise ValueError("bad filename")
+if filename and not os.path.exists(filename):
+raise FileNotFoundError("No such file: '%s'" % filename)
+elif stream and type(stream) not in (bytes, bytearray, io.BytesIO):
+raise ValueError("stream must be bytes-like / BytesIO")
+elif pixmap and type(pixmap) is not Pixmap:
+raise ValueError("pixmap must be a Pixmap")
+if mask and not (stream or filename):
+raise ValueError("mask requires stream or filename")
+if mask and type(mask) not in (bytes, bytearray, io.BytesIO):
+raise ValueError("mask must be bytes-like / BytesIO")
+while rotate < 0:
+rotate += 360
+while rotate >= 360:
+rotate -= 360
+if rotate not in (0, 90, 180, 270):
+raise ValueError("bad rotate value")
+r = Rect(rect)
+if r.is_empty or r.is_infinite:
+raise ValueError("rect must be finite and not empty")
+clip = r * ~page.transformation_matrix
+# Create a unique image reference name.
+ilst = [i[7] for i in doc.get_page_images(page.number)]
+ilst += [i[1] for i in doc.get_page_xobjects(page.number)]
+ilst += [i[4] for i in doc.get_page_fonts(page.number)]
+n = "fzImg"  # 'pymupdf image'
+i = 0
+_imgname = n + "0"  # first name candidate
+while _imgname in ilst:
+i += 1
+_imgname = n + str(i)  # try new name
+if overlay:
+page.wrap_contents()  # ensure a balanced graphics state
+digests = doc.InsertedImages
+xref, digests = page._insert_image(
+filename=filename,
+pixmap=pixmap,
+stream=stream,
+imask=mask,
+clip=clip,
+overlay=overlay,
+oc=oc,
+xref=xref,
+rotate=rotate,
+keep_proportion=keep_proportion,
+width=width,
+height=height,
+alpha=alpha,
+_imgname=_imgname,
+digests=digests,
+)
+if digests is not None:
+doc.InsertedImages = digests
+return xref
+def insert_link(page: 'Page', lnk: dict, mark: bool = True) -> None:
+"""Insert a new link for the current page."""
+CheckParent(page)
+annot = utils.getLinkText(page, lnk)
+if annot == "":
+raise ValueError("link kind not supported")
+page._addAnnot_FromString((annot,))
+def insert_text(
+page: 'Page',
+point: point_like,
+text: typing.Union[str, list],
+*,
+fontsize: float = 11,
+lineheight: OptFloat = None,
+fontname: str = "helv",
+fontfile: OptStr = None,
+set_simple: int = 0,
+encoding: int = 0,
+color: OptSeq = None,
+fill: OptSeq = None,
+border_width: float = 0.05,
+miter_limit: float = 1,
+render_mode: int = 0,
+rotate: int = 0,
+morph: OptSeq = None,
+overlay: bool = True,
+stroke_opacity: float = 1,
+fill_opacity: float = 1,
+oc: int = 0,
+):
+img = page.new_shape()
+rc = img.insert_text(
+point,
+text,
+fontsize=fontsize,
+lineheight=lineheight,
+fontname=fontname,
+fontfile=fontfile,
+set_simple=set_simple,
+encoding=encoding,
+color=color,
+fill=fill,
+border_width=border_width,
+render_mode=render_mode,
+miter_limit=miter_limit,
+rotate=rotate,
+morph=morph,
+stroke_opacity=stroke_opacity,
+fill_opacity=fill_opacity,
+oc=oc,
+)
+if rc >= 0:
+img.commit(overlay)
+return rc
+def insert_textbox(
+page: 'Page',
+rect: rect_like,
+buffer: typing.Union[str, list],
+*,
+fontname: str = "helv",
+fontfile: OptStr = None,
+set_simple: int = 0,
+encoding: int = 0,
+fontsize: float = 11,
+lineheight: OptFloat = None,
+color: OptSeq = None,
+fill: OptSeq = None,
+expandtabs: int = 1,
+align: int = 0,
+rotate: int = 0,
+render_mode: int = 0,
+miter_limit: float = 1,
+border_width: float = 0.05,
+morph: OptSeq = None,
+overlay: bool = True,
+stroke_opacity: float = 1,
+fill_opacity: float = 1,
+oc: int = 0,
+) -> float:
+"""Insert text into a given rectangle.
+Notes:
+Creates a Shape object, uses its same-named method and commits it.
+Parameters:
+rect: (rect-like) area to use for text.
+buffer: text to be inserted
+fontname: a Base-14 font, font name or '/name'
+fontfile: name of a font file
+fontsize: font size
+lineheight: overwrite the font property
+color: RGB color triple
+expandtabs: handles tabulators with string function
+align: left, center, right, justified
+rotate: 0, 90, 180, or 270 degrees
+morph: morph box with a matrix and a fixpoint
+overlay: put text in foreground or background
+Returns:
+unused or deficit rectangle area (float)
+"""
+img = page.new_shape()
+rc = img.insert_textbox(
+rect,
+buffer,
+fontsize=fontsize,
+lineheight=lineheight,
+fontname=fontname,
+fontfile=fontfile,
+set_simple=set_simple,
+encoding=encoding,
+color=color,
+fill=fill,
+expandtabs=expandtabs,
+render_mode=render_mode,
+miter_limit=miter_limit,
+border_width=border_width,
+align=align,
+rotate=rotate,
+morph=morph,
+stroke_opacity=stroke_opacity,
+fill_opacity=fill_opacity,
+oc=oc,
+)
+if rc >= 0:
+img.commit(overlay)
+return rc
 @property
 def is_wrapped(self):
 """Check if /Contents is in a balanced graphics state."""
 return self._count_q_balance() == (0, 0)
 @property
 def mediabox_size(self):
 return Point(self.mediabox.x1, self.mediabox.y1)
+def new_shape(self):
+return Shape(self)
 #@property
 #def parent( self):
 #    assert self._parent
 #    if self._parent:
 #        return self._parent
 CheckParent(self)
 doc = self.parent
 page = doc.reload_page(self)
 # fixme this looks wrong.
 self.this = page
+def replace_image(
+page: 'Page',
+xref: int,
+*,
+filename=None,
+pixmap=None,
+stream=None,
+):
+"""Replace the image referred to by xref.
+Replace the image by changing the object definition stored under xref. This
+will leave the pages appearance instructions intact, so the new image is
+being displayed with the same bbox, rotation etc.
+By providing a small fully transparent image, an effect as if the image had
+been deleted can be achieved.
+A typical use may include replacing large images by a smaller version,
+e.g. with a lower resolution or graylevel instead of colored.
+Args:
+xref: the xref of the image to replace.
+filename, pixmap, stream: exactly one of these must be provided. The
+meaning being the same as in Page.insert_image.
+"""
+doc = page.parent  # the owning document
+if not doc.xref_is_image(xref):
+raise ValueError("xref not an image")  # insert new image anywhere in page
+if bool(filename) + bool(stream) + bool(pixmap) != 1:
+raise ValueError("Exactly one of filename/stream/pixmap must be given")
+new_xref = page.insert_image(
+page.rect, filename=filename, stream=stream, pixmap=pixmap
+)
+doc.xref_copy(new_xref, xref)  # copy over new to old
+last_contents_xref = page.get_contents()[-1]
+# new image insertion has created a new /Contents source,
+# which we will set to spaces now
+doc.update_stream(last_contents_xref, b" ")
+page._image_info = None  # clear cache of extracted image information
 @property
 def rotation(self):
 """Page rotation."""
 CheckParent(self)
 """Run page through a device.
 dw: DeviceWrapper
 """
 CheckParent(self)
 mupdf.fz_run_page(self.this, dw.device, JM_matrix_from_py(m), mupdf.FzCookie())
+def search_for(
+page,
+text,
+*,
+clip=None,
+quads=False,
+flags=None,
+textpage=None,
+) -> list:
+"""Search for a string on a page.
+Args:
+text: string to be searched for
+clip: restrict search to this rectangle
+quads: (bool) return quads instead of rectangles
+flags: bit switches, default: join hyphened words
+textpage: a pre-created pymupdf.TextPage
+Returns:
+a list of rectangles or quads, each containing one occurrence.
+"""
+if flags is None:
+flags=(0
+| TEXT_DEHYPHENATE
+| TEXT_PRESERVE_WHITESPACE
+| TEXT_PRESERVE_LIGATURES
+| TEXT_MEDIABOX_CLIP
+)
+if clip is not None:
+clip = Rect(clip)
+CheckParent(page)
+tp = textpage
+if tp is None:
+tp = page.get_textpage(clip=clip, flags=flags)  # create pymupdf.TextPage
+elif getattr(tp, "parent") != page:
+raise ValueError("not a textpage of this page")
+rlist = tp.search(text, quads=quads)
+if textpage is None:
+del tp
+return rlist
 def set_artbox(self, rect):
 """Set the ArtBox."""
 return self._set_pagebox("ArtBox", rect)
 def set_trimbox(self, rect):
 """Set the TrimBox."""
 return self._set_pagebox("TrimBox", rect)
+def show_pdf_page(
+page,
+rect,
+docsrc,
+pno=0,
+keep_proportion=True,
+overlay=True,
+oc=0,
+rotate=0,
+clip=None,
+) -> int:
+"""Show page number 'pno' of PDF 'docsrc' in rectangle 'rect'.
+Args:
+rect: (rect-like) where to place the source image
+docsrc: (document) source PDF
+pno: (int) source page number
+keep_proportion: (bool) do not change width-height-ratio
+overlay: (bool) put in foreground
+oc: (xref) make visibility dependent on this OCG / OCMD (which must be defined in the target PDF)
+rotate: (int) degrees (multiple of 90)
+clip: (rect-like) part of source page rectangle
+Returns:
+xref of inserted object (for reuse)
+"""
+def calc_matrix(sr, tr, keep=True, rotate=0):
+"""Calculate transformation matrix from source to target rect.
+Notes:
+The product of four matrices in this sequence: (1) translate correct
+source corner to origin, (2) rotate, (3) scale, (4) translate to
+target's top-left corner.
+Args:
+sr: source rect in PDF (!) coordinate system
+tr: target rect in PDF coordinate system
+keep: whether to keep source ratio of width to height
+rotate: rotation angle in degrees
+Returns:
+Transformation matrix.
+"""
+# calc center point of source rect
+smp = (sr.tl + sr.br) / 2.0
+# calc center point of target rect
+tmp = (tr.tl + tr.br) / 2.0
+# m moves to (0, 0), then rotates
+m = Matrix(1, 0, 0, 1, -smp.x, -smp.y) * Matrix(rotate)
+sr1 = sr * m  # resulting source rect to calculate scale factors
+fw = tr.width / sr1.width  # scale the width
+fh = tr.height / sr1.height  # scale the height
+if keep:
+fw = fh = min(fw, fh)  # take min if keeping aspect ratio
+m *= Matrix(fw, fh)  # concat scale matrix
+m *= Matrix(1, 0, 0, 1, tmp.x, tmp.y)  # concat move to target center
+return JM_TUPLE(m)
+CheckParent(page)
+doc = page.parent
+if not doc.is_pdf or not docsrc.is_pdf:
+raise ValueError("is no PDF")
+if rect.is_empty or rect.is_infinite:
+raise ValueError("rect must be finite and not empty")
+while pno < 0:  # support negative page numbers
+pno += docsrc.page_count
+src_page = docsrc[pno]  # load source page
+tar_rect = rect * ~page.transformation_matrix  # target rect in PDF coordinates
+src_rect = src_page.rect if not clip else src_page.rect & clip  # source rect
+if src_rect.is_empty or src_rect.is_infinite:
+raise ValueError("clip must be finite and not empty")
+src_rect = src_rect * ~src_page.transformation_matrix  # ... in PDF coord
+matrix = calc_matrix(src_rect, tar_rect, keep=keep_proportion, rotate=rotate)
+# list of existing /Form /XObjects
+ilst = [i[1] for i in doc.get_page_xobjects(page.number)]
+ilst += [i[7] for i in doc.get_page_images(page.number)]
+ilst += [i[4] for i in doc.get_page_fonts(page.number)]
+# create a name not in that list
+n = "fzFrm"
+i = 0
+_imgname = n + "0"
+while _imgname in ilst:
+i += 1
+_imgname = n + str(i)
+isrc = docsrc._graft_id  # used as key for graftmaps
+if doc._graft_id == isrc:
+raise ValueError("source document must not equal target")
+# retrieve / make Graftmap for source PDF
+gmap = doc.Graftmaps.get(isrc, None)
+if gmap is None:
+gmap = Graftmap(doc)
+doc.Graftmaps[isrc] = gmap
+# take note of generated xref for automatic reuse
+pno_id = (isrc, pno)  # id of docsrc[pno]
+xref = doc.ShownPages.get(pno_id, 0)
+if overlay:
+page.wrap_contents()  # ensure a balanced graphics state
+xref = page._show_pdf_page(
+src_page,
+overlay=overlay,
+matrix=matrix,
+xref=xref,
+oc=oc,
+clip=src_rect,
+graftmap=gmap,
+_imgname=_imgname,
+)
+doc.ShownPages[pno_id] = xref
+return xref
 @property
 def transformation_matrix(self):
 """Page transformation matrix."""
 CheckParent(self)
 rect = self._other_box("TrimBox")
 if rect is None:
 return self.cropbox
 mb = self.mediabox
 return Rect(rect[0], mb.y1 - rect[3], rect[2], mb.y1 - rect[1])
+def update_link(page: 'Page', lnk: dict) -> None:
+"""Update a link on the current page."""
+CheckParent(page)
+annot = utils.getLinkText(page, lnk)
+if annot == "":
+raise ValueError("link kind not supported")
+page.parent.update_object(lnk["xref"], annot, page=page)
 def widgets(self, types=None):
 """ Generator over the widgets of a page.
 Args:
 prepend = b"q\n" * push
 TOOLS._insert_contents(self, prepend, False)
 if pop > 0:  # append required pop commands
 append = b"\nQ" * pop + b"\n"
 TOOLS._insert_contents(self, append, True)
+def write_text(
+page: 'Page',
+rect=None,
+writers=None,
+overlay=True,
+color=None,
+opacity=None,
+keep_proportion=True,
+rotate=0,
+oc=0,
+) -> None:
+"""Write the text of one or more pymupdf.TextWriter objects.
+Args:
+rect: target rectangle. If None, the union of the text writers is used.
+writers: one or more pymupdf.TextWriter objects.
+overlay: put in foreground or background.
+keep_proportion: maintain aspect ratio of rectangle sides.
+rotate: arbitrary rotation angle.
+oc: the xref of an optional content object
+"""
+assert isinstance(page, Page)
+if not writers:
+raise ValueError("need at least one pymupdf.TextWriter")
+if type(writers) is TextWriter:
+if rotate == 0 and rect is None:
+writers.write_text(page, opacity=opacity, color=color, overlay=overlay)
+return None
+else:
+writers = (writers,)
+clip = writers[0].text_rect
+textdoc = Document()
+tpage = textdoc.new_page(width=page.rect.width, height=page.rect.height)
+for writer in writers:
+clip |= writer.text_rect
+writer.write_text(tpage, opacity=opacity, color=color)
+if rect is None:
+rect = clip
+page.show_pdf_page(
+rect,
+textdoc,
+0,
+overlay=overlay,
+keep_proportion=keep_proportion,
+rotate=rotate,
+clip=clip,
+oc=oc,
+)
+textdoc = None
+tpage = None
 @property
 def xref(self):
 """PDF xref number of page."""
 CheckParent(self)
 irect = property(round)
 tl = top_left
 tr = top_right
+class Shape:
+"""Create a new shape."""
+@staticmethod
+def horizontal_angle(C, P):
+"""Return the angle to the horizontal for the connection from C to P.
+This uses the arcus sine function and resolves its inherent ambiguity by
+looking up in which quadrant vector S = P - C is located.
+"""
+S = Point(P - C).unit  # unit vector 'C' -> 'P'
+alfa = math.asin(abs(S.y))  # absolute angle from horizontal
+if S.x < 0:  # make arcsin result unique
+if S.y <= 0:  # bottom-left
+alfa = -(math.pi - alfa)
+else:  # top-left
+alfa = math.pi - alfa
+else:
+if S.y >= 0:  # top-right
+pass
+else:  # bottom-right
+alfa = -alfa
+return alfa
+def __init__(self, page: Page):
+CheckParent(page)
+self.page = page
+self.doc = page.parent
+if not self.doc.is_pdf:
+raise ValueError("is no PDF")
+self.height = page.mediabox_size.y
+self.width = page.mediabox_size.x
+self.x = page.cropbox_position.x
+self.y = page.cropbox_position.y
+self.pctm = page.transformation_matrix  # page transf. matrix
+self.ipctm = ~self.pctm  # inverted transf. matrix
+self.draw_cont = ""
+self.text_cont = ""
+self.totalcont = ""
+self.last_point = None
+self.rect = None
+def updateRect(self, x):
+if self.rect is None:
+if len(x) == 2:
+self.rect = Rect(x, x)
+else:
+self.rect = Rect(x)
+else:
+if len(x) == 2:
+x = Point(x)
+self.rect.x0 = min(self.rect.x0, x.x)
+self.rect.y0 = min(self.rect.y0, x.y)
+self.rect.x1 = max(self.rect.x1, x.x)
+self.rect.y1 = max(self.rect.y1, x.y)
+else:
+x = Rect(x)
+self.rect.x0 = min(self.rect.x0, x.x0)
+self.rect.y0 = min(self.rect.y0, x.y0)
+self.rect.x1 = max(self.rect.x1, x.x1)
+self.rect.y1 = max(self.rect.y1, x.y1)
+def draw_line(self, p1: point_like, p2: point_like) -> Point:
+"""Draw a line between two points."""
+p1 = Point(p1)
+p2 = Point(p2)
+if not (self.last_point == p1):
+self.draw_cont += _format_g(JM_TUPLE(p1 * self.ipctm)) + " m\n"
+self.last_point = p1
+self.updateRect(p1)
+self.draw_cont += _format_g(JM_TUPLE(p2 * self.ipctm)) + " l\n"
+self.updateRect(p2)
+self.last_point = p2
+return self.last_point
+def draw_polyline(self, points: list) -> Point:
+"""Draw several connected line segments."""
+for i, p in enumerate(points):
+if i == 0:
+if not (self.last_point == Point(p)):
+self.draw_cont += _format_g(JM_TUPLE(Point(p) * self.ipctm)) + " m\n"
+self.last_point = Point(p)
+else:
+self.draw_cont += _format_g(JM_TUPLE(Point(p) * self.ipctm)) + " l\n"
+self.updateRect(p)
+self.last_point = Point(points[-1])
+return self.last_point
+def draw_bezier(
+self,
+p1: point_like,
+p2: point_like,
+p3: point_like,
+p4: point_like,
+) -> Point:
+"""Draw a standard cubic Bezier curve."""
+p1 = Point(p1)
+p2 = Point(p2)
+p3 = Point(p3)
+p4 = Point(p4)
+if not (self.last_point == p1):
+self.draw_cont += _format_g(JM_TUPLE(p1 * self.ipctm)) + " m\n"
+args = JM_TUPLE(list(p2 * self.ipctm) + list(p3 * self.ipctm) + list(p4 * self.ipctm))
+self.draw_cont += _format_g(args) + " c\n"
+self.updateRect(p1)
+self.updateRect(p2)
+self.updateRect(p3)
+self.updateRect(p4)
+self.last_point = p4
+return self.last_point
+def draw_oval(self, tetra: typing.Union[quad_like, rect_like]) -> Point:
+"""Draw an ellipse inside a tetrapod."""
+if len(tetra) != 4:
+raise ValueError("invalid arg length")
+if hasattr(tetra[0], "__float__"):
+q = Rect(tetra).quad
+else:
+q = Quad(tetra)
+mt = q.ul + (q.ur - q.ul) * 0.5
+mr = q.ur + (q.lr - q.ur) * 0.5
+mb = q.ll + (q.lr - q.ll) * 0.5
+ml = q.ul + (q.ll - q.ul) * 0.5
+if not (self.last_point == ml):
+self.draw_cont += _format_g(JM_TUPLE(ml * self.ipctm)) + " m\n"
+self.last_point = ml
+self.draw_curve(ml, q.ll, mb)
+self.draw_curve(mb, q.lr, mr)
+self.draw_curve(mr, q.ur, mt)
+self.draw_curve(mt, q.ul, ml)
+self.updateRect(q.rect)
+self.last_point = ml
+return self.last_point
+def draw_circle(self, center: point_like, radius: float) -> Point:
+"""Draw a circle given its center and radius."""
+if not radius > EPSILON:
+raise ValueError("radius must be positive")
+center = Point(center)
+p1 = center - (radius, 0)
+return self.draw_sector(center, p1, 360, fullSector=False)
+def draw_curve(
+self,
+p1: point_like,
+p2: point_like,
+p3: point_like,
+) -> Point:
+"""Draw a curve between points using one control point."""
+kappa = 0.55228474983
+p1 = Point(p1)
+p2 = Point(p2)
+p3 = Point(p3)
+k1 = p1 + (p2 - p1) * kappa
+k2 = p3 + (p2 - p3) * kappa
+return self.draw_bezier(p1, k1, k2, p3)
+def draw_sector(
+self,
+center: point_like,
+point: point_like,
+beta: float,
+fullSector: bool = True,
+) -> Point:
+"""Draw a circle sector."""
+center = Point(center)
+point = Point(point)
+l3 = lambda a, b: _format_g((a, b)) + " m\n"
+l4 = lambda a, b, c, d, e, f: _format_g((a, b, c, d, e, f)) + " c\n"
+l5 = lambda a, b: _format_g((a, b)) + " l\n"
+betar = math.radians(-beta)
+w360 = math.radians(math.copysign(360, betar)) * (-1)
+w90 = math.radians(math.copysign(90, betar))
+w45 = w90 / 2
+while abs(betar) > 2 * math.pi:
+betar += w360  # bring angle below 360 degrees
+if not (self.last_point == point):
+self.draw_cont += l3(*JM_TUPLE(point * self.ipctm))
+self.last_point = point
+Q = Point(0, 0)  # just make sure it exists
+C = center
+P = point
+S = P - C  # vector 'center' -> 'point'
+rad = abs(S)  # circle radius
+if not rad > EPSILON:
+raise ValueError("radius must be positive")
+alfa = self.horizontal_angle(center, point)
+while abs(betar) > abs(w90):  # draw 90 degree arcs
+q1 = C.x + math.cos(alfa + w90) * rad
+q2 = C.y + math.sin(alfa + w90) * rad
+Q = Point(q1, q2)  # the arc's end point
+r1 = C.x + math.cos(alfa + w45) * rad / math.cos(w45)
+r2 = C.y + math.sin(alfa + w45) * rad / math.cos(w45)
+R = Point(r1, r2)  # crossing point of tangents
+kappah = (1 - math.cos(w45)) * 4 / 3 / abs(R - Q)
+kappa = kappah * abs(P - Q)
+cp1 = P + (R - P) * kappa  # control point 1
+cp2 = Q + (R - Q) * kappa  # control point 2
+self.draw_cont += l4(*JM_TUPLE(
+list(cp1 * self.ipctm) + list(cp2 * self.ipctm) + list(Q * self.ipctm)
+))
+betar -= w90  # reduce param angle by 90 deg
+alfa += w90  # advance start angle by 90 deg
+P = Q  # advance to arc end point
+# draw (remaining) arc
+if abs(betar) > 1e-3:  # significant degrees left?
+beta2 = betar / 2
+q1 = C.x + math.cos(alfa + betar) * rad
+q2 = C.y + math.sin(alfa + betar) * rad
+Q = Point(q1, q2)  # the arc's end point
+r1 = C.x + math.cos(alfa + beta2) * rad / math.cos(beta2)
+r2 = C.y + math.sin(alfa + beta2) * rad / math.cos(beta2)
+R = Point(r1, r2)  # crossing point of tangents
+# kappa height is 4/3 of segment height
+kappah = (1 - math.cos(beta2)) * 4 / 3 / abs(R - Q)  # kappa height
+kappa = kappah * abs(P - Q) / (1 - math.cos(betar))
+cp1 = P + (R - P) * kappa  # control point 1
+cp2 = Q + (R - Q) * kappa  # control point 2
+self.draw_cont += l4(*JM_TUPLE(
+list(cp1 * self.ipctm) + list(cp2 * self.ipctm) + list(Q * self.ipctm)
+))
+if fullSector:
+self.draw_cont += l3(*JM_TUPLE(point * self.ipctm))
+self.draw_cont += l5(*JM_TUPLE(center * self.ipctm))
+self.draw_cont += l5(*JM_TUPLE(Q * self.ipctm))
+self.last_point = Q
+return self.last_point
+def draw_rect(self, rect: rect_like, *, radius=None) -> Point:
+"""Draw a rectangle.
+Args:
+radius: if not None, the rectangle will have rounded corners.
+This is the radius of the curvature, given as percentage of
+the rectangle width or height. Valid are values 0 < v <= 0.5.
+For a sequence of two values, the corners will have different
+radii. Otherwise, the percentage will be computed from the
+shorter side. A value of (0.5, 0.5) will draw an ellipse.
+"""
+r = Rect(rect)
+if radius is None:  # standard rectangle
+self.draw_cont += _format_g(JM_TUPLE(
+list(r.bl * self.ipctm) + [r.width, r.height]
+)) + " re\n"
+self.updateRect(r)
+self.last_point = r.tl
+return self.last_point
+# rounded corners requested. This requires 1 or 2 values, each
+# with 0 < value <= 0.5
+if hasattr(radius, "__float__"):
+if radius <= 0 or radius > 0.5:
+raise ValueError(f"bad radius value {radius}.")
+d = min(r.width, r.height) * radius
+px = (d, 0)
+py = (0, d)
+elif hasattr(radius, "__len__") and len(radius) == 2:
+rx, ry = radius
+px = (rx * r.width, 0)
+py = (0, ry * r.height)
+if min(rx, ry) <= 0 or max(rx, ry) > 0.5:
+raise ValueError(f"bad radius value {radius}.")
+else:
+raise ValueError(f"bad radius value {radius}.")
+lp = self.draw_line(r.tl + py, r.bl - py)
+lp = self.draw_curve(lp, r.bl, r.bl + px)
+lp = self.draw_line(lp, r.br - px)
+lp = self.draw_curve(lp, r.br, r.br - py)
+lp = self.draw_line(lp, r.tr + py)
+lp = self.draw_curve(lp, r.tr, r.tr - px)
+lp = self.draw_line(lp, r.tl + px)
+self.last_point = self.draw_curve(lp, r.tl, r.tl + py)
+self.updateRect(r)
+return self.last_point
+def draw_quad(self, quad: quad_like) -> Point:
+"""Draw a Quad."""
+q = Quad(quad)
+return self.draw_polyline([q.ul, q.ll, q.lr, q.ur, q.ul])
+def draw_zigzag(
+self,
+p1: point_like,
+p2: point_like,
+breadth: float = 2,
+) -> Point:
+"""Draw a zig-zagged line from p1 to p2."""
+p1 = Point(p1)
+p2 = Point(p2)
+S = p2 - p1  # vector start - end
+rad = abs(S)  # distance of points
+cnt = 4 * int(round(rad / (4 * breadth), 0))  # always take full phases
+if cnt < 4:
+raise ValueError("points too close")
+mb = rad / cnt  # revised breadth
+matrix = Matrix(util_hor_matrix(p1, p2))  # normalize line to x-axis
+i_mat = ~matrix  # get original position
+points = []  # stores edges
+for i in range(1, cnt):
+if i % 4 == 1:  # point "above" connection
+p = Point(i, -1) * mb
+elif i % 4 == 3:  # point "below" connection
+p = Point(i, 1) * mb
+else:  # ignore others
+continue
+points.append(p * i_mat)
+self.draw_polyline([p1] + points + [p2])  # add start and end points
+return p2
+def draw_squiggle(
+self,
+p1: point_like,
+p2: point_like,
+breadth=2,
+) -> Point:
+"""Draw a squiggly line from p1 to p2."""
+p1 = Point(p1)
+p2 = Point(p2)
+S = p2 - p1  # vector start - end
+rad = abs(S)  # distance of points
+cnt = 4 * int(round(rad / (4 * breadth), 0))  # always take full phases
+if cnt < 4:
+raise ValueError("points too close")
+mb = rad / cnt  # revised breadth
+matrix = Matrix(util_hor_matrix(p1, p2))  # normalize line to x-axis
+i_mat = ~matrix  # get original position
+k = 2.4142135623765633  # y of draw_curve helper point
+points = []  # stores edges
+for i in range(1, cnt):
+if i % 4 == 1:  # point "above" connection
+p = Point(i, -k) * mb
+elif i % 4 == 3:  # point "below" connection
+p = Point(i, k) * mb
+else:  # else on connection line
+p = Point(i, 0) * mb
+points.append(p * i_mat)
+points = [p1] + points + [p2]
+cnt = len(points)
+i = 0
+while i + 2 < cnt:
+self.draw_curve(points[i], points[i + 1], points[i + 2])
+i += 2
+return p2
+# ==============================================================================
+# Shape.insert_text
+# ==============================================================================
+def insert_text(
+self,
+point: point_like,
+buffer: typing.Union[str, list],
+*,
+fontsize: float = 11,
+lineheight: OptFloat = None,
+fontname: str = "helv",
+fontfile: OptStr = None,
+set_simple: bool = 0,
+encoding: int = 0,
+color: OptSeq = None,
+fill: OptSeq = None,
+render_mode: int = 0,
+border_width: float = 0.05,
+miter_limit: float = 1,
+rotate: int = 0,
+morph: OptSeq = None,
+stroke_opacity: float = 1,
+fill_opacity: float = 1,
+oc: int = 0,
+) -> int:
+# ensure 'text' is a list of strings, worth dealing with
+if not bool(buffer):
+return 0
+if type(buffer) not in (list, tuple):
+text = buffer.splitlines()
+else:
+text = buffer
+if not len(text) > 0:
+return 0
+point = Point(point)
+try:
+maxcode = max([ord(c) for c in " ".join(text)])
+except Exception:
+exception_info()
+return 0
+# ensure valid 'fontname'
+fname = fontname
+if fname.startswith("/"):
+fname = fname[1:]
+xref = self.page.insert_font(
+fontname=fname, fontfile=fontfile, encoding=encoding, set_simple=set_simple
+)
+fontinfo = CheckFontInfo(self.doc, xref)
+fontdict = fontinfo[1]
+ordering = fontdict["ordering"]
+simple = fontdict["simple"]
+bfname = fontdict["name"]
+ascender = fontdict["ascender"]
+descender = fontdict["descender"]
+if lineheight:
+lheight = fontsize * lineheight
+elif ascender - descender <= 1:
+lheight = fontsize * 1.2
+else:
+lheight = fontsize * (ascender - descender)
+if maxcode > 255:
+glyphs = self.doc.get_char_widths(xref, maxcode + 1)
+else:
+glyphs = fontdict["glyphs"]
+tab = []
+for t in text:
+if simple and bfname not in ("Symbol", "ZapfDingbats"):
+g = None
+else:
+g = glyphs
+tab.append(getTJstr(t, g, simple, ordering))
+text = tab
+color_str = ColorCode(color, "c")
+fill_str = ColorCode(fill, "f")
+if not fill and render_mode == 0:  # ensure fill color when 0 Tr
+fill = color
+fill_str = ColorCode(color, "f")
+morphing = CheckMorph(morph)
+rot = rotate
+if rot % 90 != 0:
+raise ValueError("bad rotate value")
+while rot < 0:
+rot += 360
+rot = rot % 360  # text rotate = 0, 90, 270, 180
+templ1 = lambda a, b, c, d, e, f, g: f"\nq\n{a}{b}BT\n{c}1 0 0 1 {_format_g((d, e))} Tm\n/{f} {_format_g(g)} Tf "
+templ2 = lambda a: f"TJ\n0 -{_format_g(a)} TD\n"
+cmp90 = "0 1 -1 0 0 0 cm\n"  # rotates 90 deg counter-clockwise
+cmm90 = "0 -1 1 0 0 0 cm\n"  # rotates 90 deg clockwise
+cm180 = "-1 0 0 -1 0 0 cm\n"  # rotates by 180 deg.
+height = self.height
+width = self.width
+# setting up for standard rotation directions
+# case rotate = 0
+if morphing:
+m1 = Matrix(1, 0, 0, 1, morph[0].x + self.x, height - morph[0].y - self.y)
+mat = ~m1 * morph[1] * m1
+cm = _format_g(JM_TUPLE(mat)) + " cm\n"
+else:
+cm = ""
+top = height - point.y - self.y  # start of 1st char
+left = point.x + self.x  # start of 1. char
+space = top  # space available
+#headroom = point.y + self.y  # distance to page border
+if rot == 90:
+left = height - point.y - self.y
+top = -point.x - self.x
+cm += cmp90
+space = width - abs(top)
+#headroom = point.x + self.x
+elif rot == 270:
+left = -height + point.y + self.y
+top = point.x + self.x
+cm += cmm90
+space = abs(top)
+#headroom = width - point.x - self.x
+elif rot == 180:
+left = -point.x - self.x
+top = -height + point.y + self.y
+cm += cm180
+space = abs(point.y + self.y)
+#headroom = height - point.y - self.y
+optcont = self.page._get_optional_content(oc)
+if optcont is not None:
+bdc = "/OC /%s BDC\n" % optcont
+emc = "EMC\n"
+else:
+bdc = emc = ""
+alpha = self.page._set_opacity(CA=stroke_opacity, ca=fill_opacity)
+if alpha is None:
+alpha = ""
+else:
+alpha = "/%s gs\n" % alpha
+nres = templ1(bdc, alpha, cm, left, top, fname, fontsize)
+if render_mode > 0:
+nres += "%i Tr " % render_mode
+nres += _format_g(border_width * fontsize) + " w "
+if miter_limit is not None:
+nres += _format_g(miter_limit) + " M "
+if color is not None:
+nres += color_str
+if fill is not None:
+nres += fill_str
+# =========================================================================
+#   start text insertion
+# =========================================================================
+nres += text[0]
+nlines = 1  # set output line counter
+if len(text) > 1:
+nres += templ2(lheight)  # line 1
+else:
+nres += 'TJ'
+for i in range(1, len(text)):
+if space < lheight:
+break  # no space left on page
+if i > 1:
+nres += "\nT* "
+nres += text[i] + 'TJ'
+space -= lheight
+nlines += 1
+nres += "\nET\n%sQ\n" % emc
+# =========================================================================
+#   end of text insertion
+# =========================================================================
+# update the /Contents object
+self.text_cont += nres
+return nlines
+# ==============================================================================
+# Shape.insert_textbox
+# ==============================================================================
+def insert_textbox(
+self,
+rect: rect_like,
+buffer: typing.Union[str, list],
+*,
+fontname: OptStr = "helv",
+fontfile: OptStr = None,
+fontsize: float = 11,
+lineheight: OptFloat = None,
+set_simple: bool = 0,
+encoding: int = 0,
+color: OptSeq = None,
+fill: OptSeq = None,
+expandtabs: int = 1,
+border_width: float = 0.05,
+miter_limit: float = 1,
+align: int = 0,
+render_mode: int = 0,
+rotate: int = 0,
+morph: OptSeq = None,
+stroke_opacity: float = 1,
+fill_opacity: float = 1,
+oc: int = 0,
+) -> float:
+"""Insert text into a given rectangle.
+Args:
+rect -- the textbox to fill
+buffer -- text to be inserted
+fontname -- a Base-14 font, font name or '/name'
+fontfile -- name of a font file
+fontsize -- font size
+lineheight -- overwrite the font property
+color -- RGB stroke color triple
+fill -- RGB fill color triple
+render_mode -- text rendering control
+border_width -- thickness of glyph borders as percentage of fontsize
+expandtabs -- handles tabulators with string function
+align -- left, center, right, justified
+rotate -- 0, 90, 180, or 270 degrees
+morph -- morph box with a matrix and a fixpoint
+Returns:
+unused or deficit rectangle area (float)
+"""
+rect = Rect(rect)
+if rect.is_empty or rect.is_infinite:
+raise ValueError("text box must be finite and not empty")
+color_str = ColorCode(color, "c")
+fill_str = ColorCode(fill, "f")
+if fill is None and render_mode == 0:  # ensure fill color for 0 Tr
+fill = color
+fill_str = ColorCode(color, "f")
+optcont = self.page._get_optional_content(oc)
+if optcont is not None:
+bdc = "/OC /%s BDC\n" % optcont
+emc = "EMC\n"
+else:
+bdc = emc = ""
+# determine opacity / transparency
+alpha = self.page._set_opacity(CA=stroke_opacity, ca=fill_opacity)
+if alpha is None:
+alpha = ""
+else:
+alpha = "/%s gs\n" % alpha
+if rotate % 90 != 0:
+raise ValueError("rotate must be multiple of 90")
+rot = rotate
+while rot < 0:
+rot += 360
+rot = rot % 360
+# is buffer worth of dealing with?
+if not bool(buffer):
+return rect.height if rot in (0, 180) else rect.width
+cmp90 = "0 1 -1 0 0 0 cm\n"  # rotates counter-clockwise
+cmm90 = "0 -1 1 0 0 0 cm\n"  # rotates clockwise
+cm180 = "-1 0 0 -1 0 0 cm\n"  # rotates by 180 deg.
+height = self.height
+fname = fontname
+if fname.startswith("/"):
+fname = fname[1:]
+xref = self.page.insert_font(
+fontname=fname, fontfile=fontfile, encoding=encoding, set_simple=set_simple
+)
+fontinfo = CheckFontInfo(self.doc, xref)
+fontdict = fontinfo[1]
+ordering = fontdict["ordering"]
+simple = fontdict["simple"]
+glyphs = fontdict["glyphs"]
+bfname = fontdict["name"]
+ascender = fontdict["ascender"]
+descender = fontdict["descender"]
+if lineheight:
+lheight_factor = lineheight
+elif ascender - descender <= 1:
+lheight_factor = 1.2
+else:
+lheight_factor = ascender - descender
+lheight = fontsize * lheight_factor
+# create a list from buffer, split into its lines
+if type(buffer) in (list, tuple):
+t0 = "\n".join(buffer)
+else:
+t0 = buffer
+maxcode = max([ord(c) for c in t0])
+# replace invalid char codes for simple fonts
+if simple and maxcode > 255:
+t0 = "".join([c if ord(c) < 256 else "?" for c in t0])
+t0 = t0.splitlines()
+glyphs = self.doc.get_char_widths(xref, maxcode + 1)
+if simple and bfname not in ("Symbol", "ZapfDingbats"):
+tj_glyphs = None
+else:
+tj_glyphs = glyphs
+# ----------------------------------------------------------------------
+# calculate pixel length of a string
+# ----------------------------------------------------------------------
+def pixlen(x):
+"""Calculate pixel length of x."""
+if ordering < 0:
+return sum([glyphs[ord(c)][1] for c in x]) * fontsize
+else:
+return len(x) * fontsize
+# ---------------------------------------------------------------------
+if ordering < 0:
+blen = glyphs[32][1] * fontsize  # pixel size of space character
+else:
+blen = fontsize
+text = ""  # output buffer
+if CheckMorph(morph):
+m1 = Matrix(
+1, 0, 0, 1, morph[0].x + self.x, self.height - morph[0].y - self.y
+)
+mat = ~m1 * morph[1] * m1
+cm = _format_g(JM_TUPLE(mat)) + " cm\n"
+else:
+cm = ""
+# ---------------------------------------------------------------------
+# adjust for text orientation / rotation
+# ---------------------------------------------------------------------
+progr = 1  # direction of line progress
+c_pnt = Point(0, fontsize * ascender)  # used for line progress
+if rot == 0:  # normal orientation
+point = rect.tl + c_pnt  # line 1 is 'lheight' below top
+maxwidth = rect.width  # pixels available in one line
+maxheight = rect.height  # available text height
+elif rot == 90:  # rotate counter clockwise
+c_pnt = Point(fontsize * ascender, 0)  # progress in x-direction
+point = rect.bl + c_pnt  # line 1 'lheight' away from left
+maxwidth = rect.height  # pixels available in one line
+maxheight = rect.width  # available text height
+cm += cmp90
+elif rot == 180:  # text upside down
+# progress upwards in y direction
+c_pnt = -Point(0, fontsize * ascender)
+point = rect.br + c_pnt  # line 1 'lheight' above bottom
+maxwidth = rect.width  # pixels available in one line
+progr = -1  # subtract lheight for next line
+maxheight =rect.height  # available text height
+cm += cm180
+else:  # rotate clockwise (270 or -90)
+# progress from right to left
+c_pnt = -Point(fontsize * ascender, 0)
+point = rect.tr + c_pnt  # line 1 'lheight' left of right
+maxwidth = rect.height  # pixels available in one line
+progr = -1  # subtract lheight for next line
+maxheight = rect.width  # available text height
+cm += cmm90
+# =====================================================================
+# line loop
+# =====================================================================
+just_tab = []  # 'justify' indicators per line
+for i, line in enumerate(t0):
+line_t = line.expandtabs(expandtabs).split(" ")  # split into words
+num_words = len(line_t)
+lbuff = ""  # init line buffer
+rest = maxwidth  # available line pixels
+# =================================================================
+# word loop
+# =================================================================
+for j in range(num_words):
+word = line_t[j]
+pl_w = pixlen(word)  # pixel len of word
+if rest >= pl_w:  # does it fit on the line?
+lbuff += word + " "  # yes, append word
+rest -= pl_w + blen  # update available line space
+continue  # next word
+# word doesn't fit - output line (if not empty)
+if lbuff:
+lbuff = lbuff.rstrip() + "\n"  # line full, append line break
+text += lbuff  # append to total text
+just_tab.append(True)  # can align-justify
+lbuff = ""  # re-init line buffer
+rest = maxwidth  # re-init avail. space
+if pl_w <= maxwidth:  # word shorter than 1 line?
+lbuff = word + " "  # start the line with it
+rest = maxwidth - pl_w - blen  # update free space
+continue
+# long word: split across multiple lines - char by char ...
+if len(just_tab) > 0:
+just_tab[-1] = False  # cannot align-justify
+for c in word:
+if pixlen(lbuff) <= maxwidth - pixlen(c):
+lbuff += c
+else:  # line full
+lbuff += "\n"  # close line
+text += lbuff  # append to text
+just_tab.append(False)  # cannot align-justify
+lbuff = c  # start new line with this char
+lbuff += " "  # finish long word
+rest = maxwidth - pixlen(lbuff)  # long word stored
+if lbuff:  # unprocessed line content?
+text += lbuff.rstrip()  # append to text
+just_tab.append(False)  # cannot align-justify
+if i < len(t0) - 1:  # not the last line?
+text += "\n"  # insert line break
+# compute used part of the textbox
+if text.endswith("\n"):
+text = text[:-1]
+lb_count = text.count("\n") + 1  # number of lines written
+# text height = line count * line height plus one descender value
+text_height = lheight * lb_count - descender * fontsize
+more = text_height - maxheight  # difference to height limit
+if more > EPSILON:  # landed too much outside rect
+return (-1) * more  # return deficit, don't output
+more = abs(more)
+if more < EPSILON:
+more = 0  # don't bother with epsilons
+nres = "\nq\n%s%sBT\n" % (bdc, alpha) + cm  # initialize output buffer
+templ = lambda a, b, c, d: f"1 0 0 1 {_format_g((a, b))} Tm /{c} {_format_g(d)} Tf "
+# center, right, justify: output each line with its own specifics
+text_t = text.splitlines()  # split text in lines again
+just_tab[-1] = False  # never justify last line
+for i, t in enumerate(text_t):
+spacing = 0
+pl = maxwidth - pixlen(t)  # length of empty line part
+pnt = point + c_pnt * (i * lheight_factor)  # text start of line
+if align == 1:  # center: right shift by half width
+if rot in (0, 180):
+pnt = pnt + Point(pl / 2, 0) * progr
+else:
+pnt = pnt - Point(0, pl / 2) * progr
+elif align == 2:  # right: right shift by full width
+if rot in (0, 180):
+pnt = pnt + Point(pl, 0) * progr
+else:
+pnt = pnt - Point(0, pl) * progr
+elif align == 3:  # justify
+spaces = t.count(" ")  # number of spaces in line
+if spaces > 0 and just_tab[i]:  # if any, and we may justify
+spacing = pl / spaces  # make every space this much larger
+else:
+spacing = 0  # keep normal space length
+top = height - pnt.y - self.y
+left = pnt.x + self.x
+if rot == 90:
+left = height - pnt.y - self.y
+top = -pnt.x - self.x
+elif rot == 270:
+left = -height + pnt.y + self.y
+top = pnt.x + self.x
+elif rot == 180:
+left = -pnt.x - self.x
+top = -height + pnt.y + self.y
+nres += templ(left, top, fname, fontsize)
+if render_mode > 0:
+nres += "%i Tr " % render_mode
+nres += _format_g(border_width * fontsize) + " w "
+if miter_limit is not None:
+nres += _format_g(miter_limit) + " M "
+if align == 3:
+nres += _format_g(spacing) + " Tw "
+if color is not None:
+nres += color_str
+if fill is not None:
+nres += fill_str
+nres += "%sTJ\n" % getTJstr(t, tj_glyphs, simple, ordering)
+nres += "ET\n%sQ\n" % emc
+self.text_cont += nres
+self.updateRect(rect)
+return more
+def finish(
+self,
+width: float = 1,
+color: OptSeq = (0,),
+fill: OptSeq = None,
+lineCap: int = 0,
+lineJoin: int = 0,
+dashes: OptStr = None,
+even_odd: bool = False,
+morph: OptSeq = None,
+closePath: bool = True,
+fill_opacity: float = 1,
+stroke_opacity: float = 1,
+oc: int = 0,
+) -> None:
+"""Finish the current drawing segment.
+Notes:
+Apply colors, opacity, dashes, line style and width, or
+morphing. Also whether to close the path
+by connecting last to first point.
+"""
+if self.draw_cont == "":  # treat empty contents as no-op
+return
+if width == 0:  # border color makes no sense then
+color = None
+elif color is None:  # vice versa
+width = 0
+# if color == None and fill == None:
+#     raise ValueError("at least one of 'color' or 'fill' must be given")
+color_str = ColorCode(color, "c")  # ensure proper color string
+fill_str = ColorCode(fill, "f")  # ensure proper fill string
+optcont = self.page._get_optional_content(oc)
+if optcont is not None:
+self.draw_cont = "/OC /%s BDC\n" % optcont + self.draw_cont
+emc = "EMC\n"
+else:
+emc = ""
+alpha = self.page._set_opacity(CA=stroke_opacity, ca=fill_opacity)
+if alpha is not None:
+self.draw_cont = "/%s gs\n" % alpha + self.draw_cont
+if width != 1 and width != 0:
+self.draw_cont += _format_g(width) + " w\n"
+if lineCap != 0:
+self.draw_cont = "%i J\n" % lineCap + self.draw_cont
+if lineJoin != 0:
+self.draw_cont = "%i j\n" % lineJoin + self.draw_cont
+if dashes not in (None, "", "[] 0"):
+self.draw_cont = "%s d\n" % dashes + self.draw_cont
+if closePath:
+self.draw_cont += "h\n"
+self.last_point = None
+if color is not None:
+self.draw_cont += color_str
+if fill is not None:
+self.draw_cont += fill_str
+if color is not None:
+if not even_odd:
+self.draw_cont += "B\n"
+else:
+self.draw_cont += "B*\n"
+else:
+if not even_odd:
+self.draw_cont += "f\n"
+else:
+self.draw_cont += "f*\n"
+else:
+self.draw_cont += "S\n"
+self.draw_cont += emc
+if CheckMorph(morph):
+m1 = Matrix(
+1, 0, 0, 1, morph[0].x + self.x, self.height - morph[0].y - self.y
+)
+mat = ~m1 * morph[1] * m1
+self.draw_cont = _format_g(JM_TUPLE(mat)) + " cm\n" + self.draw_cont
+self.totalcont += "\nq\n" + self.draw_cont + "Q\n"
+self.draw_cont = ""
+self.last_point = None
+return
+def commit(self, overlay: bool = True) -> None:
+"""Update the page's /Contents object with Shape data.
+The argument controls whether data appear in foreground (default)
+or background.
+"""
+CheckParent(self.page)  # doc may have died meanwhile
+self.totalcont += self.text_cont
+self.totalcont = self.totalcont.encode()
+if self.totalcont:
+if overlay:
+self.page.wrap_contents()  # ensure a balanced graphics state
+# make /Contents object with dummy stream
+xref = TOOLS._insert_contents(self.page, b" ", overlay)
+# update it with potential compression
+self.doc.update_stream(xref, self.totalcont)
+self.last_point = None  # clean up ...
+self.rect = None  #
+self.draw_cont = ""  # for potential ...
+self.text_cont = ""  # ...
+self.totalcont = ""  # re-use
 class Story:
 def __init__( self, html='', user_css=None, em=12, archive=None):
 buffer_ = mupdf.fz_new_buffer_from_copied_data( html.encode('utf-8'))
 if archive and not isinstance(archive, Archive):
 for k, v in args.items():
 setattr( position2, k, v)
 function( position2)
 mupdf.fz_story_positions( self.this, function2)
-def place( self, where):
+def place( self, where, flags=0):
+'''
+Wrapper for fz_place_story_flags().
+'''
 where = JM_rect_from_py( where)
 filled = mupdf.FzRect()
-more = mupdf.fz_place_story( self.this, where, filled)
+more = mupdf.fz_place_story_flags( self.this, where, filled, flags)
 return more, JM_py_from_rect( filled)
 def reset( self):
 mupdf.fz_reset_story( self.this)
 Members:
 `big_enough`:
 `True` if the fit succeeded.
 `filled`:
-From the last call to `Story.place()`.
+Tuple (x0, y0, x1, y1) from the last call to `Story.place()`. This
+will be wider than .rect if any single word (which we never split)
+was too wide for .rect.
 `more`:
 `False` if the fit succeeded.
 `numcalls`:
 Number of calls made to `self.place()`.
 `parameter`:
 The successful parameter value, or the largest failing value.
 `rect`:
-The rect created from `parameter`.
+The pumupdf.Rect created from `parameter`.
 '''
 def __init__(self, big_enough=None, filled=None, more=None, numcalls=None, parameter=None, rect=None):
 self.big_enough = big_enough
 self.filled = filled
 self.more = more
 f' numcalls={self.numcalls}'
 f' parameter={self.parameter}'
 f' rect={self.rect}'
 )
-def fit(self, fn, pmin=None, pmax=None, delta=0.001, verbose=False):
+def fit(self, fn, pmin=None, pmax=None, delta=0.001, verbose=False, flags=0):
 '''
 Finds optimal rect that contains the story `self`.
 Returns a `Story.FitResult` instance.
 Maximum parameter to consider; `None` for +infinity.
 :arg delta:
 Maximum error in returned `parameter`.
 :arg verbose:
 If true we output diagnostics.
+:arg flags:
+Passed to mupdf.fz_place_story_flags(). e.g.
+zero or `mupdf.FZ_PLACE_STORY_FLAG_NO_OVERFLOW`.
 '''
 def log(text):
 assert verbose
 message(f'fit(): {text}')
 big_enough = False
 result = Story.FitResult(parameter=parameter, numcalls=state.numcalls)
 if verbose:
 log(f'update(): not calling self.place() because rect is empty.')
 else:
-more, filled = self.place(rect)
+more, filled = self.place(rect, flags)
 state.numcalls += 1
 big_enough = not more
 result = Story.FitResult(
 filled=filled,
 more=more,
 if state.pmax - state.pmin < delta:
 return ret()
 parameter = (state.pmin + state.pmax) / 2
 update(parameter)
-def fit_scale(self, rect, scale_min=0, scale_max=None, delta=0.001, verbose=False):
+def fit_scale(self, rect, scale_min=0, scale_max=None, delta=0.001, verbose=False, flags=0):
 '''
 Finds smallest value `scale` in range `scale_min..scale_max` where
 `scale * rect` is large enough to contain the story `self`.
-Returns a `Story.FitResult` instance.
+Returns a `Story.FitResult` instance with `.parameter` set to `scale`.
 :arg width:
 width of rect.
 :arg height:
 height of rect.
 infinite.
 :arg delta:
 Maximum error in returned scale.
 :arg verbose:
 If true we output diagnostics.
+:arg flags:
+Passed to Story.place().
 '''
 x0, y0, x1, y1 = rect
 width = x1 - x0
 height = y1 - y0
 def fn(scale):
 return Rect(x0, y0, x0 + scale*width, y0 + scale*height)
-return self.fit(fn, scale_min, scale_max, delta, verbose)
+return self.fit(fn, scale_min, scale_max, delta, verbose, flags)
 def fit_height(self, width, height_min=0, height_max=None, origin=(0, 0), delta=0.001, verbose=False):
 '''
 Finds smallest height in range `height_min..height_max` where a rect
 with size `(width, height)` is large enough to contain the story
 cbbox = JM_char_bbox(line, ch)
 if (not JM_rects_overlap(tp_rect, cbbox)
 and not mupdf.fz_is_infinite_rect(tp_rect)
 ):
 continue
+if buflen == 0 and ch.m_internal.c == 0x200d:
+# ZERO WIDTH JOINER cannot start a word
+continue
 word_delimiter = JM_is_word_delimiter(ch.m_internal.c, delimiters)
 this_char_rtl = JM_is_rtl_char(ch.m_internal.c)
 if word_delimiter or this_char_rtl != last_char_rtl:
 if buflen == 0 and word_delimiter:
 continue    # skip delimiters at line start
 elif idx[i] == idx2[-1] + 1:  # new adjacent Latin word
 idx2.append(idx[i])
 text = " ".join(words)
 return text
+def fill_textbox(
+writer: 'TextWriter',
+rect: rect_like,
+text: typing.Union[str, list],
+pos: point_like = None,
+font: typing.Optional[Font] = None,
+fontsize: float = 11,
+lineheight: OptFloat = None,
+align: int = 0,
+warn: bool = None,
+right_to_left: bool = False,
+small_caps: bool = False,
+) -> tuple:
+"""Fill a rectangle with text.
+Args:
+writer: pymupdf.TextWriter object (= "self")
+rect: rect-like to receive the text.
+text: string or list/tuple of strings.
+pos: point-like start position of first word.
+font: pymupdf.Font object (default pymupdf.Font('helv')).
+fontsize: the fontsize.
+lineheight: overwrite the font property
+align: (int) 0 = left, 1 = center, 2 = right, 3 = justify
+warn: (bool) text overflow action: none, warn, or exception
+right_to_left: (bool) indicate right-to-left language.
+"""
+rect = Rect(rect)
+if rect.is_empty:
+raise ValueError("fill rect must not empty.")
+if type(font) is not Font:
+font = Font("helv")
+def textlen(x):
+"""Return length of a string."""
+return font.text_length(
+x, fontsize=fontsize, small_caps=small_caps
+)  # abbreviation
+def char_lengths(x):
+"""Return list of single character lengths for a string."""
+return font.char_lengths(x, fontsize=fontsize, small_caps=small_caps)
+def append_this(pos, text):
+ret = writer.append(
+pos, text, font=font, fontsize=fontsize, small_caps=small_caps
+)
+return ret
+tolerance = fontsize * 0.2  # extra distance to left border
+space_len = textlen(" ")
+std_width = rect.width - tolerance
+std_start = rect.x0 + tolerance
+def norm_words(width, words):
+"""Cut any word in pieces no longer than 'width'."""
+nwords = []
+word_lengths = []
+for w in words:
+wl_lst = char_lengths(w)
+wl = sum(wl_lst)
+if wl <= width:  # nothing to do - copy over
+nwords.append(w)
+word_lengths.append(wl)
+continue
+# word longer than rect width - split it in parts
+n = len(wl_lst)
+while n > 0:
+wl = sum(wl_lst[:n])
+if wl <= width:
+nwords.append(w[:n])
+word_lengths.append(wl)
+w = w[n:]
+wl_lst = wl_lst[n:]
+n = len(wl_lst)
+else:
+n -= 1
+return nwords, word_lengths
+def output_justify(start, line):
+"""Justified output of a line."""
+# ignore leading / trailing / multiple spaces
+words = [w for w in line.split(" ") if w != ""]
+nwords = len(words)
+if nwords == 0:
+return
+if nwords == 1:  # single word cannot be justified
+append_this(start, words[0])
+return
+tl = sum([textlen(w) for w in words])  # total word lengths
+gaps = nwords - 1  # number of word gaps
+gapl = (std_width - tl) / gaps  # width of each gap
+for w in words:
+_, lp = append_this(start, w)  # output one word
+start.x = lp.x + gapl  # next start at word end plus gap
+return
+asc = font.ascender
+dsc = font.descender
+if not lineheight:
+if asc - dsc <= 1:
+lheight = 1.2
+else:
+lheight = asc - dsc
+else:
+lheight = lineheight
+LINEHEIGHT = fontsize * lheight  # effective line height
+width = std_width  # available horizontal space
+# starting point of text
+if pos is not None:
+pos = Point(pos)
+else:  # default is just below rect top-left
+pos = rect.tl + (tolerance, fontsize * asc)
+if pos not in rect:
+raise ValueError("Text must start in rectangle.")
+# calculate displacement factor for alignment
+if align == TEXT_ALIGN_CENTER:
+factor = 0.5
+elif align == TEXT_ALIGN_RIGHT:
+factor = 1.0
+else:
+factor = 0
+# split in lines if just a string was given
+if type(text) is str:
+textlines = text.splitlines()
+else:
+textlines = []
+for line in text:
+textlines.extend(line.splitlines())
+max_lines = int((rect.y1 - pos.y) / LINEHEIGHT) + 1
+new_lines = []  # the final list of textbox lines
+no_justify = []  # no justify for these line numbers
+for i, line in enumerate(textlines):
+if line in ("", " "):
+new_lines.append((line, space_len))
+width = rect.width - tolerance
+no_justify.append((len(new_lines) - 1))
+continue
+if i == 0:
+width = rect.x1 - pos.x
+else:
+width = rect.width - tolerance
+if right_to_left:  # reverses Arabic / Hebrew text front to back
+line = writer.clean_rtl(line)
+tl = textlen(line)
+if tl <= width:  # line short enough
+new_lines.append((line, tl))
+no_justify.append((len(new_lines) - 1))
+continue
+# we need to split the line in fitting parts
+words = line.split(" ")  # the words in the line
+# cut in parts any words that are longer than rect width
+words, word_lengths = norm_words(width, words)
+n = len(words)
+while True:
+line0 = " ".join(words[:n])
+wl = sum(word_lengths[:n]) + space_len * (n - 1)
+if wl <= width:
+new_lines.append((line0, wl))
+words = words[n:]
+word_lengths = word_lengths[n:]
+n = len(words)
+line0 = None
+else:
+n -= 1
+if len(words) == 0:
+break
+assert n
+# -------------------------------------------------------------------------
+# List of lines created. Each item is (text, tl), where 'tl' is the PDF
+# output length (float) and 'text' is the text. Except for justified text,
+# this is output-ready.
+# -------------------------------------------------------------------------
+nlines = len(new_lines)
+if nlines > max_lines:
+msg = "Only fitting %i of %i lines." % (max_lines, nlines)
+if warn is None:
+pass
+elif warn:
+message("Warning: " + msg)
+else:
+raise ValueError(msg)
+start = Point()
+no_justify += [len(new_lines) - 1]  # no justifying of last line
+for i in range(max_lines):
+try:
+line, tl = new_lines.pop(0)
+except IndexError:
+if g_exceptions_verbose >= 2:   exception_info()
+break
+if right_to_left:  # Arabic, Hebrew
+line = "".join(reversed(line))
+if i == 0:  # may have different start for first line
+start = pos
+if align == TEXT_ALIGN_JUSTIFY and i not in no_justify and tl < std_width:
+output_justify(start, line)
+start.x = std_start
+start.y += LINEHEIGHT
+continue
+if i > 0 or pos.x == std_start:  # left, center, right alignments
+start.x += (width - tl) * factor
+append_this(start, line)
+start.x = std_start
+start.y += LINEHEIGHT
+return new_lines  # return non-written lines
 def write_text(self, page, color=None, opacity=-1, overlay=1, morph=None, matrix=None, render_mode=0, oc=0):
 """Write the text to a PDF page having the TextWriter's page size.
 Args:
 return max(0, self.y1 - self.y0)
 def contains(self, x):
 """Check if x is in the rectangle."""
 return self.__contains__(x)
+def get_area(self, *args) -> float:
+"""Calculate area of rectangle.\nparameter is one of 'px' (default), 'in', 'cm', or 'mm'."""
+if args:
+unit = args[0]
+else:
+unit = "px"
+u = {"px": (1, 1), "in": (1.0, 72.0), "cm": (2.54, 72.0), "mm": (25.4, 72.0)}
+f = (u[unit][0] / u[unit][1]) ** 2
+return f * self.width * self.height
 def include_point(self, p):
 """Extend rectangle to include point p."""
 rect = self.rect.include_point(p)
 return rect.irect
 red, green, blue: integers in range 0..255.
 '''
 return _wxcolors
+def _mupdf_devel(make_links=True):
+'''
+Allows PyMuPDF installation to be used to compile and link programmes that
+use the MuPDF C/C++ API.
+Args:
+make_links:
+If true, then on non-windows we also create softlinks to any shared
+libraries that are supplied with a version suffix; this allows them
+to be used in a link command.
+For example we create links such as:
+site-packages/pymupdf/
+libmupdf.so -> libmupdf.so.26.7
+libmupdfcpp.so -> libmupdfcpp.so.26.7
+Returns: (mupdf_include, mupdf_lib).
+mupdf_include:
+Path of MuPDF include directory within PyMuPDF install.
+mupdf_lib
+Path of MuPDF library directory within PyMuPDF install.
+'''
+import platform
+log(f'{mupdf_version=}')
+p = os.path.normpath(f'{__file__}/..')
+mupdf_include = f'{p}/mupdf-devel/include'
+if platform.system() == 'Windows':
+# Separate .lib files are used at build time.
+mupdf_lib = f'{p}/mupdf-devel/lib'
+else:
+# .so files are used for both buildtime and runtime linking.
+mupdf_lib = p
+log(f'Within installed PyMuPDF:')
+log(f'    {mupdf_include=}')
+log(f'    {mupdf_lib=}')
+assert os.path.isdir(mupdf_include), f'Not a directory: {mupdf_include=}.'
+assert os.path.isdir(mupdf_lib), f'Not a directory: {mupdf_lib=}.'
+if platform.system() != 'Windows' and make_links:
+# Make symbolic links within the installed pymupdf module so
+# that ld can find libmupdf.so etc. This is a bit of a hack, but
+# necessary because wheels cannot contain symbolic links.
+#
+# For example we create `libmupdf.so -> libmupdf.so.24.8`.
+#
+# We are careful to only create symlinks for the expected MuPDF
+# version, in case old .so files from a previous install are still
+# in place.
+#
+log(f'Creating symlinks in {mupdf_lib=} for MuPDF-{mupdf_version} .so files.')
+regex_suffix = mupdf_version.split('.')[1:3]
+regex_suffix = '[.]'.join(regex_suffix)
+mupdf_lib_regex = f'^(lib[^.]+[.]so)[.]{regex_suffix}$'
+log(f'{mupdf_lib_regex=}.')
+for leaf in os.listdir(mupdf_lib):
+m = re.match(mupdf_lib_regex, leaf)
+if m:
+pfrom = f'{mupdf_lib}/{m.group(1)}'
+# os.path.exists() can return false if softlink exists
+# but points to non-existent file, so we also use
+# `os.path.islink()`.
+if os.path.islink(pfrom) or os.path.exists(pfrom):
+log(f'Removing existing link {pfrom=}.')
+os.remove(pfrom)
+log(f'Creating symlink: {pfrom} -> {leaf}')
+os.symlink(leaf, pfrom)
+return mupdf_include, mupdf_lib
 # We cannot import utils earlier because it imports this .py file itself and
 # uses some pymupdf.* types in function typing.
 #
 from . import utils
 recover_char_quad           = utils.recover_char_quad
 recover_line_quad           = utils.recover_line_quad
 recover_quad                = utils.recover_quad
 recover_span_quad           = utils.recover_span_quad
-Annot.get_text              = utils.get_text
-Annot.get_textbox           = utils.get_textbox
-Document._do_links          = utils.do_links
-Document._do_widgets        = utils.do_widgets
-Document.del_toc_item       = utils.del_toc_item
-Document.get_char_widths    = utils.get_char_widths
-Document.get_oc             = utils.get_oc
-Document.get_ocmd           = utils.get_ocmd
-Document.get_page_labels    = utils.get_page_labels
-Document.get_page_numbers   = utils.get_page_numbers
-Document.get_page_pixmap    = utils.get_page_pixmap
-Document.get_page_text      = utils.get_page_text
-Document.get_toc            = utils.get_toc
-Document.has_annots         = utils.has_annots
-Document.has_links          = utils.has_links
-Document.insert_page        = utils.insert_page
-Document.new_page           = utils.new_page
-Document.scrub              = utils.scrub
-Document.search_page_for    = utils.search_page_for
-Document.set_metadata       = utils.set_metadata
-Document.set_oc             = utils.set_oc
-Document.set_ocmd           = utils.set_ocmd
-Document.set_page_labels    = utils.set_page_labels
-Document.set_toc            = utils.set_toc
-Document.set_toc_item       = utils.set_toc_item
-Document.subset_fonts       = utils.subset_fonts
-Document.tobytes            = Document.write
-Document.xref_copy          = utils.xref_copy
-IRect.get_area              = utils.get_area
-Page.apply_redactions       = utils.apply_redactions
-Page.delete_image           = utils.delete_image
-Page.delete_widget          = utils.delete_widget
-Page.draw_bezier            = utils.draw_bezier
-Page.draw_circle            = utils.draw_circle
-Page.draw_curve             = utils.draw_curve
-Page.draw_line              = utils.draw_line
-Page.draw_oval              = utils.draw_oval
-Page.draw_polyline          = utils.draw_polyline
-Page.draw_quad              = utils.draw_quad
-Page.draw_rect              = utils.draw_rect
-Page.draw_sector            = utils.draw_sector
-Page.draw_squiggle          = utils.draw_squiggle
-Page.draw_zigzag            = utils.draw_zigzag
-Page.get_image_info         = utils.get_image_info
-Page.get_image_rects        = utils.get_image_rects
-Page.get_label              = utils.get_label
-Page.get_links              = utils.get_links
-Page.get_pixmap             = utils.get_pixmap
-Page.get_text               = utils.get_text
-Page.get_text_blocks        = utils.get_text_blocks
-Page.get_text_selection     = utils.get_text_selection
-Page.get_text_words         = utils.get_text_words
-Page.get_textbox            = utils.get_textbox
-Page.get_textpage_ocr       = utils.get_textpage_ocr
-Page.insert_image           = utils.insert_image
-Page.insert_link            = utils.insert_link
-Page.insert_text            = utils.insert_text
-Page.insert_textbox         = utils.insert_textbox
-Page.insert_htmlbox         = utils.insert_htmlbox
-Page.new_shape              = lambda x: utils.Shape(x)
-Page.replace_image          = utils.replace_image
-Page.search_for             = utils.search_for
-Page.show_pdf_page          = utils.show_pdf_page
-Page.update_link            = utils.update_link
-Page.write_text             = utils.write_text
-Shape                       = utils.Shape
 from .table import find_tables
 Page.find_tables = find_tables
-Rect.get_area               = utils.get_area
-TextWriter.fill_textbox     = utils.fill_textbox
 class FitzDeprecation(DeprecationWarning):
 pass
 _alias( Rect, 'include_rect')
 _alias( Rect, 'is_empty')
 _alias( Rect, 'is_infinite')
 _alias( TextWriter, 'fill_textbox')
 _alias( TextWriter, 'write_text')
-_alias( utils.Shape, 'draw_bezier')
+_alias( Shape, 'draw_bezier')
-_alias( utils.Shape, 'draw_circle')
+_alias( Shape, 'draw_circle')
-_alias( utils.Shape, 'draw_curve')
+_alias( Shape, 'draw_curve')
-_alias( utils.Shape, 'draw_line')
+_alias( Shape, 'draw_line')
-_alias( utils.Shape, 'draw_oval')
+_alias( Shape, 'draw_oval')
-_alias( utils.Shape, 'draw_polyline')
+_alias( Shape, 'draw_polyline')
-_alias( utils.Shape, 'draw_quad')
+_alias( Shape, 'draw_quad')
-_alias( utils.Shape, 'draw_rect')
+_alias( Shape, 'draw_rect')
-_alias( utils.Shape, 'draw_sector')
+_alias( Shape, 'draw_sector')
-_alias( utils.Shape, 'draw_squiggle')
+_alias( Shape, 'draw_squiggle')
-_alias( utils.Shape, 'draw_zigzag')
+_alias( Shape, 'draw_zigzag')
-_alias( utils.Shape, 'insert_text')
+_alias( Shape, 'insert_text')
-_alias( utils.Shape, 'insert_textbox')
+_alias( Shape, 'insert_textbox')
 if 0:
 restore_aliases()
 __version__ = VersionBind

Mercurial > hgrepos > Python2 > PyMuPDF

comparison src/__init__.py @ 41:71bcc18e306f

comparison src/init.py @ 41:71bcc18e306f